-- Hoogle documentation, generated by Haddock
-- See Hoogle, http://www.haskell.org/hoogle/
-- | Cluster algorithms, PCA, and chemical conformere analysis
--
-- Please see the README on GitLab at
-- https://gitlab.com/theoretical-chemistry-jena/quantum-chemistry/ConfoCluster
@package ConClusion
@version 0.0.2
module ConClusion.Numeric.Data
-- | Exception regarding indexing in some kind of aaray.
newtype IndexException
IndexException :: String -> IndexException
-- | Converts a vector from the HMatrix package to the Massiv
-- representation.
vecH2M :: (Element e, Mutable r Ix1 e) => Vector e -> Vector r e
-- | Converts a vector from the Massiv representation to the HMatrix
-- representation.
vecM2H :: (Manifest r Ix1 e, Element e) => Vector r e -> Vector e
-- | Converts a matrix from the HMatrix representation to the Massiv
-- representation.
matH2M :: (Mutable r Ix1 e, Element e) => Matrix e -> Matrix r e
-- | Converts a matrix from Massiv to HMatrix representation.
matM2H :: (Manifest r Ix1 e, Element e, Resize r Ix2, Load r Ix2 e) => Matrix r e -> Matrix e
-- | Magnitude of a vector (length).
magnitude :: (Numeric r e, Source r Ix1 e, Floating e) => Vector r e -> e
-- | Normalise a vector.
normalise :: (Numeric r e, Source r Ix1 e, Floating e) => Vector r e -> Vector r e
-- | Angle between two vectors.
angle :: (Numeric r e, Source r Ix1 e, Floating e) => Vector r e -> Vector r e -> e
-- | Find the minimal distance in a distance matrix, which is not the main
-- diagonal.
minDistAt :: (Manifest r Ix2 e, MonadThrow m, Ord e) => Matrix r e -> m (e, Ix2)
-- | Find the minimal element of a vector, which is at a larger than the
-- supplied index.
minDistAtVec :: (Manifest r Ix1 e, MonadThrow m, Ord e) => Ix1 -> Vector r e -> m (e, Ix1)
-- | Like minimumM but also returns the index of the minimal
-- element.
iMinimumM :: (Manifest r ix a, MonadThrow m, Ord a) => Array r ix a -> m (a, ix)
-- | Quickly print a matrix with numerical values
printMat :: (Source r Ix2 e, Real e) => Matrix r e -> Matrix D Text
-- | A binary tree.
data BinTree e
Leaf :: e -> BinTree e
Node :: e -> BinTree e -> BinTree e -> BinTree e
-- | Look at the root of a binary tree.
root :: BinTree e -> e
-- | Steps down each branch of a tree until some criterion is satisfied or
-- the end of the branch is reached. Each end of the branch is added to a
-- result.
takeBranchesWhile :: (a -> Bool) -> BinTree a -> Vector DL a
-- | Takes the first value in each branch, that does not fullfill the
-- criterion anymore and adds it to the result. Terminal leafes of the
-- branches are always taken.
takeLeafyBranchesWhile :: (a -> Bool) -> BinTree a -> Vector DL a
instance GHC.Show.Show ConClusion.Numeric.Data.IndexException
instance GHC.Generics.Generic (ConClusion.Numeric.Data.BinTree e)
instance GHC.Show.Show e => GHC.Show.Show (ConClusion.Numeric.Data.BinTree e)
instance GHC.Classes.Eq e => GHC.Classes.Eq (ConClusion.Numeric.Data.BinTree e)
instance Data.Aeson.Types.FromJSON.FromJSON e => Data.Aeson.Types.FromJSON.FromJSON (ConClusion.Numeric.Data.BinTree e)
instance Data.Aeson.Types.ToJSON.ToJSON e => Data.Aeson.Types.ToJSON.ToJSON (ConClusion.Numeric.Data.BinTree e)
instance GHC.Base.Functor ConClusion.Numeric.Data.BinTree
instance GHC.Exception.Type.Exception ConClusion.Numeric.Data.IndexException
-- | This module implements routines to work with simple molden style XYZ
-- trajectories. This includes parsers as well as functions to obtain
-- structural features in internal coordinates.
--
-- For an introduction into PCA see
-- https://www.cs.cmu.edu/~elaw/papers/pca.pdf.
--
-- Diherdrals require a special metric, see
-- https://onlinelibrary.wiley.com/doi/full/10.1002/prot.20310).
module ConClusion.Chemistry.Topology
-- | A Molecule in cartesian coordinates.
data Molecule
type Trajectory = Seq Molecule
-- | Parser for molecules in Molden XYZ format.
xyz :: Parser Molecule
-- | Parser for trajectories in XYZ format as produced by CREST.
trajectory :: Parser Trajectory
-- | Selection of a bond between two atoms.
data B
B :: Int -> Int -> B
-- | Selection of an angle between three atoms.
data A
A :: Int -> Int -> Int -> A
-- | Selection of a dihedral angle between four atoms. Rotation around the
-- central two.
data D
D :: Int -> Int -> Int -> Int -> D
-- | Selections
data Feature
Energy :: Feature
Bond :: B -> Feature
Angle :: A -> Feature
Dihedral :: D -> Feature
-- | Obtains the feature matrix <math> for a principal component
-- analysis. Given <math> features to analyse in <math>
-- measurements, <math> will be a <math> matrix.
getFeatures :: (MonadThrow m, Traversable f) => f Feature -> Trajectory -> m (Matrix DL Double)
module ConClusion.Numeric.Statistics
data PCA
PCA :: Matrix U Double -> Matrix U Double -> Matrix U Double -> Matrix U Double -> Double -> Double -> Vector U Double -> Vector U Double -> Matrix U Double -> Matrix U Double -> PCA
-- | Original feature matrix.
[$sel:x:PCA] :: PCA -> Matrix U Double
-- | Feature matrix in mean deviation form.
[$sel:x':PCA] :: PCA -> Matrix U Double
-- | Transformed data.
[$sel:y:PCA] :: PCA -> Matrix U Double
-- | Transformation matrix to transform feature matrix into PCA result
-- matrix.
[$sel:a:PCA] :: PCA -> Matrix U Double
-- | Mean squared error introduced by PCA.
[$sel:mse:PCA] :: PCA -> Double
-- | Percentage of the behaviour captured in the remaining dimensions.
[$sel:remaining:PCA] :: PCA -> Double
-- | All eigenvalues from the diagonalisation of the covariance matrix.
[$sel:allEigenValues:PCA] :: PCA -> Vector U Double
-- | Eigenvalues that were kept for PCA.
[$sel:pcaEigenValues:PCA] :: PCA -> Vector U Double
-- | All eigenvectors from the diagonalisation of the covariance matrix.
[$sel:allEigenVecs:PCA] :: PCA -> Matrix U Double
-- | Eigenvectors that were kept for PCA.
[$sel:pcaEigenVecs:PCA] :: PCA -> Matrix U Double
-- | Performs a PCA on the feature matrix <math> by solving the
-- eigenproblem of the covariance matrix. The function takes the feature
-- matrix directly and perfoms the conversion to mean deviation form, the
-- calculation of the covariance matrix and the eigenvalue problem
-- automatically.
pca :: (Numeric r Double, Mutable r Ix2 Double, Manifest r Ix1 Double, Source (R r) Ix2 Double, Extract r Ix2 Double, MonadThrow m) => Int -> Matrix r Double -> m PCA
-- | Normalise each value so that the maximum absolute value in each row
-- becomes one.
normalise :: (Ord e, Unbox e, Numeric r e, Fractional e, Source r Ix2 e, Mutable r Ix2 e) => Array r Ix2 e -> Array r Ix2 e
-- | Subtract the mean value of all columns from the feature matrix. Brings
-- the feature matrix to mean deviation form.
meanDeviation :: (Source r Ix2 e, Fractional e, Unbox e, Numeric r e, Mutable r Ix2 e) => Matrix r e -> Matrix r e
-- | Obtains the covariance matrix <math> from the feature matrix
-- <math>. <math> where <math> is the number of columns
-- in the matrix.
--
-- The feature matrix should be in mean deviation form, see
-- meanDeviation.
covariance :: (Numeric r e, Mutable r Ix2 e, Fractional e) => Matrix r e -> Matrix r e
-- | Distance matrix generator functions.
type DistFn r e = Matrix r e -> Matrix r e
-- | The <math> norm between two vectors. Generalisation of Manhattan
-- and Euclidean distances. <math>
lpNorm :: (Mutable r Ix2 e, Floating e) => Int -> DistFn r e
-- | The Manhattan distance between two vectors. Specialisation of the
-- <math> norm for <math>. <math>
manhattan :: (Mutable r Ix2 e, Floating e) => DistFn r e
-- | The Euclidean distance between two vectors. Specialisation of the
-- <math> norm for <math>. <math>
euclidean :: (Mutable r Ix2 e, Floating e) => DistFn r e
-- | Mahalanobis distance between points. Suitable for non correlated axes.
-- <math> where <math> is the covariance matrix.
mahalanobis :: (Unbox e, Numeric r e, Mutable r Ix2 e, Mutable r Ix1 e, Field e) => DistFn r e
-- | Representation of clusters.
type Clusters = Vector B IntSet
-- | Exception for invalid search distances.
newtype DistanceInvalidException e
DistanceInvalidException :: e -> DistanceInvalidException e
-- | DBScan algorithm.
dbscan :: (MonadThrow m, Ord e, Num e, Typeable e, Show e, Source r Ix2 e) => DistFn r e -> Int -> e -> Matrix r e -> m Clusters
-- | A dendrogram as a binary tree.
data Dendrogram e
-- | A strategy/distance measure for clusters.
data JoinStrat e
SingleLinkage :: JoinStrat e
CompleteLinkage :: JoinStrat e
Median :: JoinStrat e
UPGMA :: JoinStrat e
WPGMA :: JoinStrat e
Centroid :: JoinStrat e
Ward :: JoinStrat e
LWFB :: e -> JoinStrat e
LW :: e -> e -> e -> e -> JoinStrat e
-- | Performance improved hierarchical clustering algorithm.
-- GENERIC_LINKAGE from figure 3,
-- https://arxiv.org/pdf/1109.2378.pdf.
hca :: (MonadThrow m, Mutable r Ix1 e, Mutable r Ix2 e, Mutable r Ix1 (e, Ix1), Manifest (R r) Ix1 e, OuterSlice r Ix2 e, Ord e, Unbox e, Fractional e) => DistFn r e -> JoinStrat e -> Matrix r e -> m (Dendrogram e)
-- | Cut a Dendrogram at a given distance and obtain all clusters
-- from it.
cutDendroAt :: Ord e => Dendrogram e -> e -> Clusters
instance GHC.Classes.Eq e => GHC.Classes.Eq (ConClusion.Numeric.Statistics.DistanceInvalidException e)
instance GHC.Show.Show e => GHC.Show.Show (ConClusion.Numeric.Statistics.DistanceInvalidException e)
instance GHC.Generics.Generic (ConClusion.Numeric.Statistics.DendroNode e)
instance GHC.Show.Show e => GHC.Show.Show (ConClusion.Numeric.Statistics.DendroNode e)
instance GHC.Classes.Eq e => GHC.Classes.Eq (ConClusion.Numeric.Statistics.DendroNode e)
instance GHC.Generics.Generic (ConClusion.Numeric.Statistics.Dendrogram e)
instance GHC.Classes.Eq e => GHC.Classes.Eq (ConClusion.Numeric.Statistics.Dendrogram e)
instance GHC.Show.Show e => GHC.Show.Show (ConClusion.Numeric.Statistics.Dendrogram e)
instance GHC.Show.Show e => GHC.Show.Show (ConClusion.Numeric.Statistics.JoinStrat e)
instance GHC.Classes.Eq e => GHC.Classes.Eq (ConClusion.Numeric.Statistics.JoinStrat e)
instance Data.Aeson.Types.ToJSON.ToJSON e => Data.Aeson.Types.ToJSON.ToJSON (ConClusion.Numeric.Statistics.Dendrogram e)
instance Data.Aeson.Types.FromJSON.FromJSON e => Data.Aeson.Types.FromJSON.FromJSON (ConClusion.Numeric.Statistics.Dendrogram e)
instance Data.Aeson.Types.FromJSON.FromJSON e => Data.Aeson.Types.FromJSON.FromJSON (ConClusion.Numeric.Statistics.DendroNode e)
instance Data.Aeson.Types.ToJSON.ToJSON e => Data.Aeson.Types.ToJSON.ToJSON (ConClusion.Numeric.Statistics.DendroNode e)
instance (Data.Typeable.Internal.Typeable e, GHC.Show.Show e) => GHC.Exception.Type.Exception (ConClusion.Numeric.Statistics.DistanceInvalidException e)