Source code for torchdr.spectral_embedding.kernel_pca

"""Kernel Principal Component Analysis module."""

# Authors: Hugues Van Assel <vanasselhugues@gmail.com>
#          Mathurin Massias
#
# License: BSD 3-Clause License

from typing import Union, Any, Optional

import torch

from torchdr.base import DRModule
from torchdr.utils import svd_flip

from torchdr.utils import center_kernel, check_nonnegativity_eigenvalues
from torchdr.affinity import (
    Affinity,
    NormalizedGaussianAffinity,
)
from torchdr.distance import FaissConfig


[docs] class KernelPCA(DRModule): r"""Kernel Principal Component Analysis module. Parameters ---------- affinity : Affinity, default=NormalizedGaussianAffinity(normalization_dim=None) Affinity object to compute the kernel matrix. n_components : int, default=2 Number of components to project the input data onto. device : str, default="auto" Device on which the computations are performed. backend : {"keops", "faiss", None} or FaissConfig, optional Which backend to use for handling sparsity and memory efficiency. Can be: - "keops": Use KeOps for memory-efficient symbolic computations - "faiss": Use FAISS for fast k-NN computations with default settings - None: Use standard PyTorch operations - FaissConfig object: Use FAISS with custom configuration Default is None. verbose : bool, default=False Whether to print information during the computations. random_state : float, default=None Random seed for reproducibility. nodiag : bool, default=False Whether to remove eigenvectors with a zero eigenvalue. """ def __init__( self, affinity: Affinity = NormalizedGaussianAffinity(normalization_dim=None), n_components: int = 2, device: str = "auto", backend: Union[str, FaissConfig, None] = None, verbose: bool = False, random_state: float = None, nodiag: bool = False, **kwargs, ): super().__init__( n_components=n_components, device=device, backend=backend, verbose=verbose, random_state=random_state, **kwargs, ) self.affinity = affinity self.affinity.backend = backend self.affinity.device = device self.nodiag = nodiag if backend == "keops": raise NotImplementedError( "[TorchDR] ERROR : KeOps is not (yet) supported for KernelPCA." ) def _fit_transform(self, X: torch.Tensor, y: Optional[Any] = None) -> torch.Tensor: r"""Fit the KernelPCA model and project the input data onto the components. Parameters ---------- X : torch.Tensor of shape (n_samples, n_features) Data on which to fit the KernelPCA model and project onto the components. y : Optional[Any], default=None Ignored in this method. Returns ------- embedding_ : torch.Tensor of shape (n_samples, n_components) Projected data. """ K = self.affinity(X) K = center_kernel(K, return_all=False) eigvals, eigvecs = torch.linalg.eigh(K) eigvals = check_nonnegativity_eigenvalues(eigvals) # flip eigenvectors' sign to enforce deterministic output eigvecs, _ = svd_flip(eigvecs, torch.zeros_like(eigvecs).T) # sort eigenvectors in descending order (torch eigvals are increasing) eigvals = torch.flip(eigvals, dims=(0,)) eigvecs = torch.flip(eigvecs, dims=(1,)) # remove eigenvectors with a zero eigenvalue (null space) if required if self.nodiag or self.n_components is None: eigvecs = eigvecs[:, eigvals > 0] eigvals = eigvals[eigvals > 0] eigvecs = eigvecs[:, : self.n_components] self.eigenvectors_ = eigvecs self.eigenvalues_ = eigvals self.embedding_ = ( self.eigenvectors_ * self.eigenvalues_[: self.n_components].sqrt() ) return self.embedding_