Source code for torchdr.affinity.knn_normalized

# -*- coding: utf-8 -*-
"""Affinity matrices with normalizations using nearest neighbor distances."""

# Author: Hugues Van Assel <vanasselhugues@gmail.com>
#         Cédric Vincent-Cuaz <cedric.vincent-cuaz@inria.fr>
#
# License: BSD 3-Clause License

import torch
from typing import Tuple

from torchdr.affinity.base import Affinity, LogAffinity
from torchdr.utils import (
    kmin,
    wrap_vectors,
    batch_transpose,
    logsumexp_red,
    sum_red,
)


@wrap_vectors
def _log_SelfTuning(C, sigma):
    r"""Return the self-tuning affinity matrix with sample-wise bandwidth.

    The bandwidth is determined by the distance from a point
    to its K-th neirest neighbor in log domain.

    Parameters
    ----------
    C : torch.Tensor or pykeops.torch.LazyTensor of shape (n, n)
        Pairwise distance matrix.
    sigma : torch.Tensor of shape (n,)
        Sample-wise bandwidth parameter.

    Returns
    -------
    log_P : torch.Tensor or pykeops.torch.LazyTensor
    """
    sigma_t = batch_transpose(sigma)
    return -C / (sigma * sigma_t)


@wrap_vectors
def _log_MAGIC(C, sigma):
    r"""Return the MAGIC affinity matrix with sample-wise bandwidth.

    The bandwidth is determined by the distance from a point
    to its K-th neirest neighbor in log domain.

    Parameters
    ----------
    C : torch.Tensor or pykeops.torch.LazyTensor of shape (n, n)
        Pairwise distance matrix.
    sigma : torch.Tensor of shape (n,)
        Sample-wise bandwidth parameter.

    Returns
    -------
    log_P : torch.Tensor or pykeops.torch.LazyTensor
    """
    return -C / sigma


[docs] class SelfTuningAffinity(LogAffinity): r"""Compute the self-tuning affinity introduced in [22]_. The affinity has a sample-wise bandwidth :math:`\mathbf{\sigma} \in \mathbb{R}^n`. .. math:: \exp \left( - \frac{C_{ij}}{\sigma_i \sigma_j} \right) In the above, :math:`\mathbf{C}` is the pairwise distance matrix and :math:`\sigma_i` is the distance from the K’th nearest neighbor of data point :math:`\mathbf{x}_i`. Parameters ---------- K : int, optional K-th neirest neighbor . normalization_dim : int or Tuple[int], optional Dimension along which to normalize the affinity matrix. metric : str, optional Metric to use for pairwise distances computation. zero_diag : bool, optional Whether to set the diagonal of the affinity matrix to zero. device : str, optional Device to use for computations. keops : bool, optional Whether to use KeOps for computations. verbose : bool, optional Verbosity. Default is False. References ---------- .. [22] Max Zelnik-Manor, L., & Perona, P. (2004). Self-tuning spectral clustering. Advances in neural information processing systems (NeurIPS). """ def __init__( self, K: int = 7, normalization_dim: int | Tuple[int] = (0, 1), metric: str = "sqeuclidean", zero_diag: bool = True, device: str = None, keops: bool = True, verbose: bool = False, ): super().__init__( metric=metric, zero_diag=zero_diag, device=device, keops=keops, verbose=verbose, ) self.K = K self.normalization_dim = normalization_dim def _compute_log_affinity(self, X: torch.Tensor): r"""Fit the self-tuning affinity model to the provided data. Parameters ---------- X : torch.Tensor Input data. Returns ------- log_affinity_matrix : torch.Tensor or pykeops.torch.LazyTensor The computed affinity matrix in log domain. """ C = self._distance_matrix(X) minK_values, minK_indices = kmin(C, k=self.K, dim=1) self.sigma_ = minK_values[:, -1] log_affinity_matrix = _log_SelfTuning(C, self.sigma_) if self.normalization_dim is not None: self.log_normalization_ = logsumexp_red( log_affinity_matrix, self.normalization_dim ) log_affinity_matrix = log_affinity_matrix - self.log_normalization_ return log_affinity_matrix
[docs] class MAGICAffinity(Affinity): r"""Compute the MAGIC affinity introduced in [23]_. The construction is as follows. First, it computes a Gaussian kernel with sample-wise bandwidth :math:`\mathbf{\sigma} \in \mathbb{R}^n`. .. math:: P_{ij} \leftarrow \exp \left( - \frac{C_{ij}}{\sigma_i} \right) In the above, :math:`\mathbf{C}` is the pairwise distance matrix and :math:`\sigma_i` is the distance from the K’th nearest neighbor of data point :math:`\mathbf{x}_i`. Then it averages the affinity matrix with its transpose: .. math:: P_{ij} \leftarrow \frac{P_{ij} + P_{ji}}{2} \:. Finally, it normalizes the affinity matrix along each row: .. math:: P_{ij} \leftarrow \frac{P_{ij}}{\sum_{t} P_{it}} \:. Parameters ---------- K : int, optional K-th neirest neighbor . metric : str, optional Metric to use for pairwise distances computation. zero_diag : bool, optional Whether to set the diagonal of the affinity matrix to zero. device : str, optional Device to use for computations. keops : bool, optional Whether to use KeOps for computations. verbose : bool, optional Verbosity. Default is False. References ---------- .. [23] Van Dijk, D., Sharma, R., Nainys, J., Yim, K., Kathail, P., Carr, A. J., ... & Pe’er, D. (2018). Recovering Gene Interactions from Single-Cell Data Using Data Diffusion Cell, 174(3). """ def __init__( self, K: int = 7, metric: str = "sqeuclidean", zero_diag: bool = True, device: str = None, keops: bool = True, verbose: bool = False, ): super().__init__( metric=metric, zero_diag=zero_diag, device=device, keops=keops, verbose=verbose, ) self.K = K def _compute_affinity(self, X: torch.Tensor): r"""Fit the MAGIC affinity model to the provided data. Parameters ---------- X : torch.Tensor Input data. Returns ------- affinity_matrix : torch.Tensor or pykeops.torch.LazyTensor The computed affinity matrix. """ C = self._distance_matrix(X) minK_values, minK_indices = kmin(C, k=self.K, dim=1) self.sigma_ = minK_values[:, -1] affinity_matrix = _log_MAGIC(C, self.sigma_).exp() affinity_matrix = (affinity_matrix + batch_transpose(affinity_matrix)) / 2 self.normalization_ = sum_red(affinity_matrix, 1) affinity_matrix = affinity_matrix / self.normalization_ return affinity_matrix