Source code for torchdr.spectral_embedding.pca

"""Principal Component Analysis module."""

# Authors: Hugues Van Assel <vanasselhugues@gmail.com>
#
# License: BSD 3-Clause License

from typing import Optional, Union, Any

import numpy as np
import torch

from torchdr.base import DRModule
from torchdr.utils import handle_type, svd_flip


[docs] class PCA(DRModule): r"""Principal Component Analysis module. Parameters ---------- n_components : int, default=2 Number of components to project the input data onto. device : str, default="auto" Device on which the computations are performed. verbose : bool, default=False Whether to print information during the computations. random_state : float, default=None Random seed for reproducibility. svd_driver : str, optional Name of the cuSOLVER method to be used for torch.linalg.svd. This keyword argument only works on CUDA inputs. Available options are: None, gesvd, gesvdj and gesvda. Defaults to None. """ def __init__( self, n_components: int = 2, device: str = "auto", verbose: bool = False, random_state: float = None, svd_driver: Optional[str] = None, **kwargs, ): super().__init__( n_components=n_components, device=device, verbose=verbose, random_state=random_state, **kwargs, ) self.svd_driver = svd_driver self.mean_ = None self.components_ = None def _fit_transform(self, X: torch.Tensor, y: Optional[Any] = None) -> torch.Tensor: """Fit the PCA model and apply the dimensionality reduction on X. Parameters ---------- X : torch.Tensor of shape (n_samples, n_features) Data on which to fit the PCA model and project onto the components. y : Optional[Any], default=None Target values (None for unsupervised transformations). Returns ------- embedding_ : torch.Tensor of shape (n_samples, n_components) Projected data. """ self.mean_ = X.mean(0, keepdim=True) U, S, V = torch.linalg.svd( X - self.mean_, full_matrices=False, driver=self.svd_driver ) U, V = svd_flip(U, V) # flip eigenvectors' sign to enforce deterministic output self.components_ = V[: self.n_components] self.embedding_ = U[:, : self.n_components] * S[: self.n_components] return self.embedding_
[docs] @handle_type( accept_sparse=False, ensure_min_samples=2, ensure_min_features=1, ensure_2d=True, ) def transform( self, X: Union[torch.Tensor, np.ndarray] ) -> Union[torch.Tensor, np.ndarray]: r"""Project the input data onto the PCA components. Parameters ---------- X : torch.Tensor or np.ndarray of shape (n_samples, n_features) Data to project onto the PCA components. Returns ------- X_new : torch.Tensor or np.ndarray of shape (n_samples, n_components) Projected data. """ return (X - self.mean_) @ self.components_.T