"""Common simple affinities."""
# Author: Hugues Van Assel <vanasselhugues@gmail.com>
# Nicolas Courty <ncourty@irisa.fr>
#
# License: BSD 3-Clause License
import torch
from typing import Union, Optional
from scipy.optimize import curve_fit
import numpy as np
from torchdr.affinity.base import UnnormalizedAffinity, UnnormalizedLogAffinity
from torchdr.utils import LazyTensorType
# from umap/umap/umap_.py
def find_ab_params(spread, min_dist):
"""Fit a, b params as in UMAP.
Fit (a, b) for the differentiable curve used in lower
dimensional fuzzy simplicial complex construction. We want the
smooth curve (from a pre-defined family with simple gradient) that
best matches an offset exponential decay.
"""
def curve(x, a, b):
return 1.0 / (1.0 + a * x ** (2 * b))
xv = np.linspace(0, spread * 3, 300)
yv = np.zeros(xv.shape)
yv[xv < min_dist] = 1.0
yv[xv >= min_dist] = np.exp(-(xv[xv >= min_dist] - min_dist) / spread)
params, covar = curve_fit(curve, xv, yv)
return params[0], params[1]
[docs]
class GaussianAffinity(UnnormalizedLogAffinity):
r"""Compute the Gaussian affinity matrix.
Its expression is as follows : :math:`\exp( - \mathbf{C} / \sigma)`
where :math:`\mathbf{C}` is the pairwise distance matrix and
:math:`\sigma` is the bandwidth parameter.
Parameters
----------
sigma : float, optional
Bandwidth parameter.
metric : str, optional
Metric to use for pairwise distances computation.
zero_diag : bool, optional
Whether to set the diagonal of the affinity matrix to zero.
device : str, optional
Device to use for computations.
backend : {"keops", "faiss", None}, optional
Which backend to use for handling sparsity and memory efficiency.
Default is None.
verbose : bool, optional
Verbosity.
"""
def __init__(
self,
sigma: float = 1.0,
metric: str = "sqeuclidean",
zero_diag: bool = True,
device: str = "auto",
backend: Optional[str] = None,
verbose: bool = True,
):
super().__init__(
metric=metric,
zero_diag=zero_diag,
device=device,
backend=backend,
verbose=verbose,
)
self.sigma = sigma
def _log_affinity_formula(self, C: Union[torch.Tensor, LazyTensorType]):
return -C / self.sigma
[docs]
class StudentAffinity(UnnormalizedLogAffinity):
r"""Compute the Student affinity matrix based on the Student-t distribution.
Its expression is given by:
.. math::
\left(1 + \frac{\mathbf{C}}{\nu}\right)^{-\frac{\nu + 1}{2}}
where :math:`\nu > 0` is the degrees of freedom parameter.
Parameters
----------
degrees_of_freedom : int, optional
Degrees of freedom for the Student-t distribution.
metric : str, optional
Metric to use for pairwise distances computation.
zero_diag : bool, optional
Whether to set the diagonal of the affinity matrix to zero.
device : str, optional
Device to use for computations.
backend : {"keops", "faiss", None}, optional
Which backend to use for handling sparsity and memory efficiency.
Default is None.
verbose : bool, optional
Verbosity. Default is False.
"""
def __init__(
self,
degrees_of_freedom: int = 1,
metric: str = "sqeuclidean",
zero_diag: bool = True,
device: str = "auto",
backend: Optional[str] = None,
verbose: bool = False,
):
super().__init__(
metric=metric,
zero_diag=zero_diag,
device=device,
backend=backend,
verbose=verbose,
)
self.degrees_of_freedom = degrees_of_freedom
def _log_affinity_formula(self, C: Union[torch.Tensor, LazyTensorType]):
return (
-0.5
* (self.degrees_of_freedom + 1)
* (C / self.degrees_of_freedom + 1).log()
)
class CauchyAffinity(UnnormalizedLogAffinity):
r"""Computes the Cauchy affinity matrix based on the Cauchy distribution.
Its expression is given by:
.. math::
\frac{1}{\pi \gamma} \left[\frac{\gamma^2}{\mathbf{C}+\gamma^2}\right]
where :math:`\gamma > 0` is a scale parameter.
Parameters
----------
gamma : float, optional
Scale parameter for the Cauchy distribution.
metric : str, optional
Metric to use for pairwise distances computation.
zero_diag : bool, optional
Whether to set the diagonal of the affinity matrix to zero.
device : str, optional
Device to use for computations.
backend : {"keops", "faiss", None}, optional
Which backend to use for handling sparsity and memory efficiency.
Default is None.
verbose : bool, optional
Verbosity.
"""
def __init__(
self,
gamma: float = 1,
metric: str = "sqhyperbolic",
zero_diag: bool = True,
device: str = "auto",
backend: Optional[str] = None,
verbose: bool = True,
):
super().__init__(
metric=metric,
zero_diag=zero_diag,
device=device,
backend=backend,
verbose=verbose,
)
self.gamma = gamma
def _log_affinity_formula(self, C: Union[torch.Tensor, LazyTensorType]):
return (self.gamma / (C + self.gamma**2)).log()
class NegativeCostAffinity(UnnormalizedAffinity):
r"""Compute the negative cost affinity matrix.
Its expression is given by :math:`-\mathbf{C}`
where :math:`\mathbf{C}` is the pairwise distance matrix.
Parameters
----------
metric : str, optional
Metric to use for pairwise distances computation. Default is "sqeuclidean".
zero_diag : bool, optional
Whether to set the diagonal of the affinity matrix to zero. Default is True.
device : str, optional
Device to use for computations. Default is "cuda".
backend : {"keops", "faiss", None}, optional
Which backend to use for handling sparsity and memory efficiency.
Default is None.
verbose : bool, optional
Verbosity. Default is False.
"""
def __init__(
self,
metric: str = "sqeuclidean",
zero_diag: bool = True,
device: str = "auto",
backend: Optional[str] = None,
verbose: bool = False,
):
super().__init__(
metric=metric,
device=device,
backend=backend,
verbose=verbose,
zero_diag=zero_diag,
)
def _affinity_formula(self, C: Union[torch.Tensor, LazyTensorType]):
return -C
[docs]
class ScalarProductAffinity(NegativeCostAffinity):
r"""Compute the scalar product affinity matrix.
Its expression is given by :math:`\mathbf{X} \mathbf{X}^\top`
where :math:`\mathbf{X} = (\mathbf{x}_1, \ldots, \mathbf{x}_n)^\top`
with each row vector :math:`\mathbf{x}_i` corresponding to the i-th data sample.
Parameters
----------
device : str, optional
Device to use for computations. Default is "cuda".
backend : {"keops", "faiss", None}, optional
Which backend to use for handling sparsity and memory efficiency.
Default is None.
verbose : bool, optional
Verbosity. Default is False.
"""
def __init__(
self,
device: str = "auto",
backend: Optional[str] = None,
verbose: bool = False,
):
super().__init__(
metric="angular",
device=device,
backend=backend,
verbose=verbose,
zero_diag=False,
)
[docs]
class UMAPAffinityOut(UnnormalizedLogAffinity):
r"""Compute the affinity used in embedding space in UMAP :cite:`mcinnes2018umap`.
Its :math:`(i,j)` coefficient is as follows:
.. math::
1 / \left(1 + a C_{ij}^{b} \right)
where parameters a and b are fitted to the spread and min_dist parameters.
Parameters
----------
min_dist : float, optional
min_dist parameter from UMAP. Provides the minimum distance apart that
points are allowed to be.
spread : float, optional
spread parameter from UMAP.
a : float, optional
factor of the cost matrix.
b : float, optional
exponent of the cost matrix.
degrees_of_freedom : int, optional
Degrees of freedom for the Student-t distribution.
metric : str, optional
Metric to use for pairwise distances computation.
zero_diag : bool, optional
Whether to set the diagonal of the affinity matrix to zero.
device : str, optional
Device to use for computations.
backend : {"keops", "faiss", None}, optional
Which backend to use for handling sparsity and memory efficiency.
Default is None.
verbose : bool, optional
Verbosity. Default is False.
"""
def __init__(
self,
min_dist: float = 0.1,
spread: float = 1,
a: Optional[float] = None,
b: Optional[float] = None,
metric: str = "sqeuclidean",
zero_diag: bool = True,
device: str = "auto",
backend: Optional[str] = None,
verbose: bool = False,
):
super().__init__(
metric=metric,
zero_diag=zero_diag,
device=device,
backend=backend,
verbose=verbose,
)
self.min_dist = min_dist
self.spread = spread
if a is None or b is None:
fitted_a, fitted_b = find_ab_params(self.spread, self.min_dist)
self._a, self._b = fitted_a.item(), fitted_b.item()
else:
self._a = a
self._b = b
def _log_affinity_formula(self, C: torch.Tensor):
return -(1 + self._a * C**self._b).log()