Source code for torchdr.spectral_embedding.kernel_pca

"""Kernel Principal Component Analysis module."""

# Authors: Hugues Van Assel <vanasselhugues@gmail.com>
#          Mathurin Massias
#
# License: BSD 3-Clause License

from typing import Union, Any, Optional

import torch

from torchdr.base import DRModule
from torchdr.utils import svd_flip

from torchdr.utils import center_kernel, check_nonnegativity_eigenvalues
from torchdr.affinity import (
    Affinity,
    NormalizedGaussianAffinity,
)
from torchdr.distance import FaissConfig



[docs]
class KernelPCA(DRModule):
    r"""Kernel Principal Component Analysis module.

    Parameters
    ----------
    affinity : Affinity, default=NormalizedGaussianAffinity(normalization_dim=None)
        Affinity object to compute the kernel matrix.
    n_components : int, default=2
        Number of components to project the input data onto.
    device : str, default="auto"
        Device on which the computations are performed.
    backend : {"keops", "faiss", None} or FaissConfig, optional
        Which backend to use for handling sparsity and memory efficiency.
        Can be:
        - "keops": Use KeOps for memory-efficient symbolic computations
        - "faiss": Use FAISS for fast k-NN computations with default settings
        - None: Use standard PyTorch operations
        - FaissConfig object: Use FAISS with custom configuration
        Default is None.
    verbose : bool, default=False
        Whether to print information during the computations.
    random_state : float, default=None
        Random seed for reproducibility.
    nodiag : bool, default=False
        Whether to remove eigenvectors with a zero eigenvalue.
    """

    def __init__(
        self,
        affinity: Affinity = NormalizedGaussianAffinity(normalization_dim=None),
        n_components: int = 2,
        device: str = "auto",
        backend: Union[str, FaissConfig, None] = None,
        verbose: bool = False,
        random_state: float = None,
        nodiag: bool = False,
        **kwargs,
    ):
        super().__init__(
            n_components=n_components,
            device=device,
            backend=backend,
            verbose=verbose,
            random_state=random_state,
            **kwargs,
        )

        self.affinity = affinity
        self.affinity.backend = backend
        self.affinity.device = device
        self.nodiag = nodiag

        if backend == "keops":
            raise NotImplementedError(
                "[TorchDR] ERROR : KeOps is not (yet) supported for KernelPCA."
            )

    def _fit_transform(self, X: torch.Tensor, y: Optional[Any] = None) -> torch.Tensor:
        r"""Fit the KernelPCA model and project the input data onto the components.

        Parameters
        ----------
        X : torch.Tensor of shape (n_samples, n_features)
            Data on which to fit the KernelPCA model and project onto the components.
        y : Optional[Any], default=None
            Ignored in this method.

        Returns
        -------
        embedding_ : torch.Tensor of shape (n_samples, n_components)
            Projected data.
        """
        K = self.affinity(X)
        K = center_kernel(K, return_all=False)

        eigvals, eigvecs = torch.linalg.eigh(K)
        eigvals = check_nonnegativity_eigenvalues(eigvals)

        # flip eigenvectors' sign to enforce deterministic output
        eigvecs, _ = svd_flip(eigvecs, torch.zeros_like(eigvecs).T)

        # sort eigenvectors in descending order (torch eigvals are increasing)
        eigvals = torch.flip(eigvals, dims=(0,))
        eigvecs = torch.flip(eigvecs, dims=(1,))

        # remove eigenvectors with a zero eigenvalue (null space) if required
        if self.nodiag or self.n_components is None:
            eigvecs = eigvecs[:, eigvals > 0]
            eigvals = eigvals[eigvals > 0]

        eigvecs = eigvecs[:, : self.n_components]

        self.eigenvectors_ = eigvecs
        self.eigenvalues_ = eigvals
        self.embedding_ = (
            self.eigenvectors_ * self.eigenvalues_[: self.n_components].sqrt()
        )
        return self.embedding_