-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Cluster algorithms, PCA, and chemical conformere analysis -- -- Please see the README on GitLab at -- https://gitlab.com/theoretical-chemistry-jena/quantum-chemistry/ConfoCluster @package ConClusion @version 0.0.2 module ConClusion.Numeric.Data -- | Exception regarding indexing in some kind of aaray. newtype IndexException IndexException :: String -> IndexException -- | Converts a vector from the HMatrix package to the Massiv -- representation. vecH2M :: (Element e, Mutable r Ix1 e) => Vector e -> Vector r e -- | Converts a vector from the Massiv representation to the HMatrix -- representation. vecM2H :: (Manifest r Ix1 e, Element e) => Vector r e -> Vector e -- | Converts a matrix from the HMatrix representation to the Massiv -- representation. matH2M :: (Mutable r Ix1 e, Element e) => Matrix e -> Matrix r e -- | Converts a matrix from Massiv to HMatrix representation. matM2H :: (Manifest r Ix1 e, Element e, Resize r Ix2, Load r Ix2 e) => Matrix r e -> Matrix e -- | Magnitude of a vector (length). magnitude :: (Numeric r e, Source r Ix1 e, Floating e) => Vector r e -> e -- | Normalise a vector. normalise :: (Numeric r e, Source r Ix1 e, Floating e) => Vector r e -> Vector r e -- | Angle between two vectors. angle :: (Numeric r e, Source r Ix1 e, Floating e) => Vector r e -> Vector r e -> e -- | Find the minimal distance in a distance matrix, which is not the main -- diagonal. minDistAt :: (Manifest r Ix2 e, MonadThrow m, Ord e) => Matrix r e -> m (e, Ix2) -- | Find the minimal element of a vector, which is at a larger than the -- supplied index. minDistAtVec :: (Manifest r Ix1 e, MonadThrow m, Ord e) => Ix1 -> Vector r e -> m (e, Ix1) -- | Like minimumM but also returns the index of the minimal -- element. iMinimumM :: (Manifest r ix a, MonadThrow m, Ord a) => Array r ix a -> m (a, ix) -- | Quickly print a matrix with numerical values printMat :: (Source r Ix2 e, Real e) => Matrix r e -> Matrix D Text -- | A binary tree. data BinTree e Leaf :: e -> BinTree e Node :: e -> BinTree e -> BinTree e -> BinTree e -- | Look at the root of a binary tree. root :: BinTree e -> e -- | Steps down each branch of a tree until some criterion is satisfied or -- the end of the branch is reached. Each end of the branch is added to a -- result. takeBranchesWhile :: (a -> Bool) -> BinTree a -> Vector DL a -- | Takes the first value in each branch, that does not fullfill the -- criterion anymore and adds it to the result. Terminal leafes of the -- branches are always taken. takeLeafyBranchesWhile :: (a -> Bool) -> BinTree a -> Vector DL a instance GHC.Show.Show ConClusion.Numeric.Data.IndexException instance GHC.Generics.Generic (ConClusion.Numeric.Data.BinTree e) instance GHC.Show.Show e => GHC.Show.Show (ConClusion.Numeric.Data.BinTree e) instance GHC.Classes.Eq e => GHC.Classes.Eq (ConClusion.Numeric.Data.BinTree e) instance Data.Aeson.Types.FromJSON.FromJSON e => Data.Aeson.Types.FromJSON.FromJSON (ConClusion.Numeric.Data.BinTree e) instance Data.Aeson.Types.ToJSON.ToJSON e => Data.Aeson.Types.ToJSON.ToJSON (ConClusion.Numeric.Data.BinTree e) instance GHC.Base.Functor ConClusion.Numeric.Data.BinTree instance GHC.Exception.Type.Exception ConClusion.Numeric.Data.IndexException -- | This module implements routines to work with simple molden style XYZ -- trajectories. This includes parsers as well as functions to obtain -- structural features in internal coordinates. -- -- For an introduction into PCA see -- https://www.cs.cmu.edu/~elaw/papers/pca.pdf. -- -- Diherdrals require a special metric, see -- https://onlinelibrary.wiley.com/doi/full/10.1002/prot.20310). module ConClusion.Chemistry.Topology -- | A Molecule in cartesian coordinates. data Molecule type Trajectory = Seq Molecule -- | Parser for molecules in Molden XYZ format. xyz :: Parser Molecule -- | Parser for trajectories in XYZ format as produced by CREST. trajectory :: Parser Trajectory -- | Selection of a bond between two atoms. data B B :: Int -> Int -> B -- | Selection of an angle between three atoms. data A A :: Int -> Int -> Int -> A -- | Selection of a dihedral angle between four atoms. Rotation around the -- central two. data D D :: Int -> Int -> Int -> Int -> D -- | Selections data Feature Energy :: Feature Bond :: B -> Feature Angle :: A -> Feature Dihedral :: D -> Feature -- | Obtains the feature matrix <math> for a principal component -- analysis. Given <math> features to analyse in <math> -- measurements, <math> will be a <math> matrix. getFeatures :: (MonadThrow m, Traversable f) => f Feature -> Trajectory -> m (Matrix DL Double) module ConClusion.Numeric.Statistics data PCA PCA :: Matrix U Double -> Matrix U Double -> Matrix U Double -> Matrix U Double -> Double -> Double -> Vector U Double -> Vector U Double -> Matrix U Double -> Matrix U Double -> PCA -- | Original feature matrix. [$sel:x:PCA] :: PCA -> Matrix U Double -- | Feature matrix in mean deviation form. [$sel:x':PCA] :: PCA -> Matrix U Double -- | Transformed data. [$sel:y:PCA] :: PCA -> Matrix U Double -- | Transformation matrix to transform feature matrix into PCA result -- matrix. [$sel:a:PCA] :: PCA -> Matrix U Double -- | Mean squared error introduced by PCA. [$sel:mse:PCA] :: PCA -> Double -- | Percentage of the behaviour captured in the remaining dimensions. [$sel:remaining:PCA] :: PCA -> Double -- | All eigenvalues from the diagonalisation of the covariance matrix. [$sel:allEigenValues:PCA] :: PCA -> Vector U Double -- | Eigenvalues that were kept for PCA. [$sel:pcaEigenValues:PCA] :: PCA -> Vector U Double -- | All eigenvectors from the diagonalisation of the covariance matrix. [$sel:allEigenVecs:PCA] :: PCA -> Matrix U Double -- | Eigenvectors that were kept for PCA. [$sel:pcaEigenVecs:PCA] :: PCA -> Matrix U Double -- | Performs a PCA on the feature matrix <math> by solving the -- eigenproblem of the covariance matrix. The function takes the feature -- matrix directly and perfoms the conversion to mean deviation form, the -- calculation of the covariance matrix and the eigenvalue problem -- automatically. pca :: (Numeric r Double, Mutable r Ix2 Double, Manifest r Ix1 Double, Source (R r) Ix2 Double, Extract r Ix2 Double, MonadThrow m) => Int -> Matrix r Double -> m PCA -- | Normalise each value so that the maximum absolute value in each row -- becomes one. normalise :: (Ord e, Unbox e, Numeric r e, Fractional e, Source r Ix2 e, Mutable r Ix2 e) => Array r Ix2 e -> Array r Ix2 e -- | Subtract the mean value of all columns from the feature matrix. Brings -- the feature matrix to mean deviation form. meanDeviation :: (Source r Ix2 e, Fractional e, Unbox e, Numeric r e, Mutable r Ix2 e) => Matrix r e -> Matrix r e -- | Obtains the covariance matrix <math> from the feature matrix -- <math>. <math> where <math> is the number of columns -- in the matrix. -- -- The feature matrix should be in mean deviation form, see -- meanDeviation. covariance :: (Numeric r e, Mutable r Ix2 e, Fractional e) => Matrix r e -> Matrix r e -- | Distance matrix generator functions. type DistFn r e = Matrix r e -> Matrix r e -- | The <math> norm between two vectors. Generalisation of Manhattan -- and Euclidean distances. <math> lpNorm :: (Mutable r Ix2 e, Floating e) => Int -> DistFn r e -- | The Manhattan distance between two vectors. Specialisation of the -- <math> norm for <math>. <math> manhattan :: (Mutable r Ix2 e, Floating e) => DistFn r e -- | The Euclidean distance between two vectors. Specialisation of the -- <math> norm for <math>. <math> euclidean :: (Mutable r Ix2 e, Floating e) => DistFn r e -- | Mahalanobis distance between points. Suitable for non correlated axes. -- <math> where <math> is the covariance matrix. mahalanobis :: (Unbox e, Numeric r e, Mutable r Ix2 e, Mutable r Ix1 e, Field e) => DistFn r e -- | Representation of clusters. type Clusters = Vector B IntSet -- | Exception for invalid search distances. newtype DistanceInvalidException e DistanceInvalidException :: e -> DistanceInvalidException e -- | DBScan algorithm. dbscan :: (MonadThrow m, Ord e, Num e, Typeable e, Show e, Source r Ix2 e) => DistFn r e -> Int -> e -> Matrix r e -> m Clusters -- | A dendrogram as a binary tree. data Dendrogram e -- | A strategy/distance measure for clusters. data JoinStrat e SingleLinkage :: JoinStrat e CompleteLinkage :: JoinStrat e Median :: JoinStrat e UPGMA :: JoinStrat e WPGMA :: JoinStrat e Centroid :: JoinStrat e Ward :: JoinStrat e LWFB :: e -> JoinStrat e LW :: e -> e -> e -> e -> JoinStrat e -- | Performance improved hierarchical clustering algorithm. -- GENERIC_LINKAGE from figure 3, -- https://arxiv.org/pdf/1109.2378.pdf. hca :: (MonadThrow m, Mutable r Ix1 e, Mutable r Ix2 e, Mutable r Ix1 (e, Ix1), Manifest (R r) Ix1 e, OuterSlice r Ix2 e, Ord e, Unbox e, Fractional e) => DistFn r e -> JoinStrat e -> Matrix r e -> m (Dendrogram e) -- | Cut a Dendrogram at a given distance and obtain all clusters -- from it. cutDendroAt :: Ord e => Dendrogram e -> e -> Clusters instance GHC.Classes.Eq e => GHC.Classes.Eq (ConClusion.Numeric.Statistics.DistanceInvalidException e) instance GHC.Show.Show e => GHC.Show.Show (ConClusion.Numeric.Statistics.DistanceInvalidException e) instance GHC.Generics.Generic (ConClusion.Numeric.Statistics.DendroNode e) instance GHC.Show.Show e => GHC.Show.Show (ConClusion.Numeric.Statistics.DendroNode e) instance GHC.Classes.Eq e => GHC.Classes.Eq (ConClusion.Numeric.Statistics.DendroNode e) instance GHC.Generics.Generic (ConClusion.Numeric.Statistics.Dendrogram e) instance GHC.Classes.Eq e => GHC.Classes.Eq (ConClusion.Numeric.Statistics.Dendrogram e) instance GHC.Show.Show e => GHC.Show.Show (ConClusion.Numeric.Statistics.Dendrogram e) instance GHC.Show.Show e => GHC.Show.Show (ConClusion.Numeric.Statistics.JoinStrat e) instance GHC.Classes.Eq e => GHC.Classes.Eq (ConClusion.Numeric.Statistics.JoinStrat e) instance Data.Aeson.Types.ToJSON.ToJSON e => Data.Aeson.Types.ToJSON.ToJSON (ConClusion.Numeric.Statistics.Dendrogram e) instance Data.Aeson.Types.FromJSON.FromJSON e => Data.Aeson.Types.FromJSON.FromJSON (ConClusion.Numeric.Statistics.Dendrogram e) instance Data.Aeson.Types.FromJSON.FromJSON e => Data.Aeson.Types.FromJSON.FromJSON (ConClusion.Numeric.Statistics.DendroNode e) instance Data.Aeson.Types.ToJSON.ToJSON e => Data.Aeson.Types.ToJSON.ToJSON (ConClusion.Numeric.Statistics.DendroNode e) instance (Data.Typeable.Internal.Typeable e, GHC.Show.Show e) => GHC.Exception.Type.Exception (ConClusion.Numeric.Statistics.DistanceInvalidException e)