Source code for torchdr.diffusion_embedding.phate

"""PHATE algorithm."""

# Author: Guillaume Huguet @guillaumehu
#         Danqi Liao @Danqi7
#         Hugues Van Assel @huguesva
#
# License: BSD 3-Clause License

from typing import Optional

from torchdr.affinity import PHATEAffinity
from torchdr.affinity_matcher import AffinityMatcher
from torchdr.utils import square_loss, pairwise_distances


[docs] class PHATE(AffinityMatcher): r"""Implementation of PHATE introduced in :cite:`moon2019visualizing`. PHATE is a diffusion map-based method that uses a potential affinity matrix :math:`\mathbf{P}` implemented in :class:`~torchdr.PHATEAffinity` as input. The loss function is defined as: .. math:: \sqrt{\sum_{i,j} (P_{ij} - \|\mathbf{z}_i - \mathbf{z}_j\|)^2 / \sum_{i,j} P_{ij}^2} \:. Parameters ---------- k : int, optional Number of nearest neighbors. Default is 5. n_components : int, optional Dimension of the embedding space. Default is 2. t : int, optional Diffusion time parameter. Default is 100. alpha : float, optional Exponent for the alpha-decay kernel. Default is 10.0. optimizer : str or torch.optim.Optimizer, optional Name of an optimizer from torch.optim or an optimizer class. Default is "Adam". optimizer_kwargs : dict, optional Additional keyword arguments for the optimizer. lr : float or 'auto', optional Learning rate for the optimizer. Default is 1e0. scheduler : str or torch.optim.lr_scheduler.LRScheduler, optional Name of a scheduler from torch.optim.lr_scheduler or a scheduler class. Default is None (no scheduler). scheduler_kwargs : dict, optional Additional keyword arguments for the scheduler. min_grad_norm : float, optional Tolerance for stopping criterion. Default is 1e-15. max_iter : int, optional Maximum number of iterations. Default is 1000. init : str, torch.Tensor, or np.ndarray, optional Initialization method for the embedding. Default is "pca". init_scaling : float, optional Scaling factor for the initial embedding. Default is 1e-4. device : str, optional Device to use for computations. Default is "auto". backend : {"None}, optional Which backend to use for handling sparsity and memory efficiency. Default is None. verbose : bool, optional Verbosity of the optimization process. Default is False. random_state : float, optional Random seed for reproducibility. Default is None. check_interval : int, optional Number of iterations between two checks for convergence. Default is 50. """ # noqa: E501 def __init__( self, k: int = 5, n_components: int = 2, t: int = 100, alpha: float = 10.0, optimizer: str = "Adam", optimizer_kwargs: dict = {}, lr: float = 1e0, scheduler: Optional[str] = None, scheduler_kwargs: dict = {}, min_grad_norm: float = 1e-15, max_iter: int = 1000, init: str = "pca", init_scaling: float = 1e-4, device: str = "auto", backend: Optional[str] = None, verbose: bool = False, random_state: Optional[float] = None, check_interval: int = 50, metric_in: str = "euclidean", ): if backend == "faiss" or backend == "keops": raise ValueError( f"[TorchDR] ERROR : {self.__class__.__name__} class does not support backend {backend}." ) self.metric_in = metric_in self.k = k self.t = t self.alpha = alpha affinity_in = PHATEAffinity( k=k, t=t, alpha=alpha, metric=metric_in, backend=backend, device=device, ) super().__init__( affinity_in=affinity_in, affinity_out=None, n_components=n_components, optimizer=optimizer, optimizer_kwargs=optimizer_kwargs, lr=lr, scheduler=scheduler, scheduler_kwargs=scheduler_kwargs, min_grad_norm=min_grad_norm, max_iter=max_iter, init=init, init_scaling=init_scaling, device=device, backend=backend, verbose=verbose, random_state=random_state, check_interval=check_interval, ) def _loss(self): Q = ( -pairwise_distances( self.embedding_, metric="sqeuclidean", backend=self.backend )[0] .clamp(min=1e-12) .sqrt() ) # for numerical stability loss = square_loss(self.affinity_in_, Q) / (self.affinity_in_**2).sum() return loss.sqrt()