Source code for torchdr.spectral

# -*- coding: utf-8 -*-
"""Spectral methods for dimensionality reduction."""

# Authors: Hugues Van Assel <vanasselhugues@gmail.com>
#          Mathurin Massias
#
# License: BSD 3-Clause License

import torch
import numpy as np

from torchdr.base import DRModule
from torchdr.utils import (
    to_torch,
    sum_red,
    svd_flip,
    handle_backend,
    center_kernel,
    check_nonnegativity_eigenvalues,
)
from torchdr.affinity import (
    Affinity,
    GaussianAffinity,
    UnnormalizedAffinity,
    UnnormalizedLogAffinity,
)


[docs] class PCA(DRModule): r"""Principal Component Analysis module. Parameters ---------- n_components : int, default=2 Number of components to project the input data onto. device : str, default="auto" Device on which the computations are performed. verbose : bool, default=False Whether to print information during the computations. random_state : float, default=0 Random seed for reproducibility. """ def __init__( self, n_components: int = 2, device: str = "auto", verbose: bool = False, random_state: float = 0, ): super().__init__( n_components=n_components, device=device, verbose=verbose, random_state=random_state, )
[docs] def fit(self, X: torch.Tensor | np.ndarray): r"""Fit the PCA model. Parameters ---------- X : torch.Tensor or np.ndarray of shape (n_samples, n_features) Data on which to fit the PCA model. Returns ------- self : PCA The fitted PCA model. """ X = to_torch(X, device=self.device) self.mean_ = X.mean(0, keepdim=True) U, S, V = torch.linalg.svd(X - self.mean_, full_matrices=False) U, V = svd_flip(U, V) # flip eigenvectors' sign to enforce deterministic output self.components_ = V[: self.n_components] self.embedding_ = U[:, : self.n_components] @ S[: self.n_components].diag() return self
[docs] @handle_backend def transform(self, X: torch.Tensor | np.ndarray): r"""Project the input data onto the PCA components. Parameters ---------- X : torch.Tensor or np.ndarray of shape (n_samples, n_features) Data to project onto the PCA components. Returns ------- X_new : torch.Tensor or np.ndarray of shape (n_samples, n_components) Projected data. """ return (X - self.mean_) @ self.components_.T
[docs] def fit_transform(self, X: torch.Tensor | np.ndarray): r"""Fit the PCA model and project the input data onto the components. Parameters ---------- X : torch.Tensor or np.ndarray of shape (n_samples, n_features) Data on which to fit the PCA model and project onto the components. Returns ------- X_new : torch.Tensor or np.ndarray of shape (n_samples, n_components) Projected data. """ self.fit(X) return self.embedding_
[docs] class KernelPCA(DRModule): r"""Kernel Principal Component Analysis module. Parameters ---------- affinity : Affinity, default=GaussianAffinity() Affinity object to compute the kernel matrix. n_components : int, default=2 Number of components to project the input data onto. device : str, default="auto" Device on which the computations are performed. keops : bool, default=False Whether to use KeOps for computations. verbose : bool, default=False Whether to print information during the computations. random_state : float, default=0 Random seed for reproducibility. nodiag : bool, default=False Whether to remove eigenvectors with a zero eigenvalue. """ def __init__( self, affinity: Affinity = GaussianAffinity(), n_components: int = 2, device: str = "auto", keops: bool = False, verbose: bool = False, random_state: float = 0, nodiag: bool = False, ): super().__init__( n_components=n_components, device=device, keops=keops, verbose=verbose, random_state=random_state, ) self.affinity = affinity self.affinity.keops = keops self.affinity.device = device self.affinity.random_state = random_state self.nodiag = nodiag if keops: raise NotImplementedError( "[TorchDR] ERROR : KeOps is not (yet) supported for KernelPCA." )
[docs] def fit(self, X: torch.Tensor | np.ndarray): r"""Fit the KernelPCA model. Parameters ---------- X : torch.Tensor or np.ndarray of shape (n_samples, n_features) Data on which to fit the KernelPCA model. Returns ------- self : KernelPCA The fitted KernelPCA model. """ X = to_torch(X, device=self.device) self.X_fit_ = X.clone() K = self.affinity(X) K, _, col_mean, mean = center_kernel(K, return_all=True) self.K_fit_rows_ = col_mean self.K_fit_all_ = mean eigvals, eigvecs = torch.linalg.eigh(K) eigvals = check_nonnegativity_eigenvalues(eigvals) # flip eigenvectors' sign to enforce deterministic output eigvecs, _ = svd_flip(eigvecs, torch.zeros_like(eigvecs).T) # sort eigenvectors in descending order (torch eigvals are increasing) eigvals = torch.flip(eigvals, dims=(0,)) eigvecs = torch.flip(eigvecs, dims=(1,)) # remove eigenvectors with a zero eigenvalue (null space) if required if self.nodiag or self.n_components is None: eigvecs = eigvecs[:, eigvals > 0] eigvals = eigvals[eigvals > 0] eigvecs = eigvecs[:, : self.n_components] self.eigenvectors_ = eigvecs self.eigenvalues_ = eigvals return self
[docs] @handle_backend def transform(self, X: torch.Tensor | np.ndarray): r"""Project the input data onto the KernelPCA components. Parameters ---------- X : torch.Tensor or np.ndarray of shape (n_samples, n_features) Data to project onto the KernelPCA components. Returns ------- X_new : torch.Tensor or np.ndarray of shape (n_samples, n_components) Projected data. """ if not isinstance( self.affinity, (UnnormalizedAffinity, UnnormalizedLogAffinity) ): aff_name = self.affinity.__class__.__name__ raise ValueError( "KernelPCA.transform can only be used when `affinity` is " "an UnnormalizedAffinity or UnnormalizedLogAffinity. " f"{aff_name} is not. Use the fit_transform method instead." ) K = self.affinity(X, self.X_fit_) # center à la sklearn: using fit data for rows and all, new data for col pred_cols = sum_red(K, 1) / self.K_fit_rows_.shape[1] K -= self.K_fit_rows_ K -= pred_cols K += self.K_fit_all_ result = ( K
[docs] @ self.eigenvectors_ @ torch.diag(1 / self.eigenvalues_[: self.n_components]).sqrt() ) # remove np.inf arising from division by 0 eigenvalues: zero_eigvals = self.eigenvalues_[: self.n_components] == 0 if zero_eigvals.any(): result[:, zero_eigvals] = 0 return result
def fit_transform(self, X: torch.Tensor | np.ndarray): r"""Fit the KernelPCA model and project the input data onto the components. Parameters ---------- X : torch.Tensor or np.ndarray of shape (n_samples, n_features) Data on which to fit the KernelPCA model and project onto the components. Returns ------- X_new : torch.Tensor or np.ndarray of shape (n_samples, n_components) Projected data. """ self.fit(X) return self.eigenvectors_ * self.eigenvalues_[: self.n_components].sqrt()