-- |
-- Module      : ConClusion.Chemistry.Topology
-- Description : Principal Component Analysis
-- Copyright   : Phillip Seeber, 2021
-- License     : AGPL-3
-- Maintainer  : phillip.seeber@googlemail.com
-- Stability   : experimental
-- Portability : POSIX, Windows
--
-- This module implements routines to work with simple molden style XYZ trajectories. This includes
-- parsers as well as functions to obtain structural features in internal coordinates.
--
-- For an introduction into PCA see <https://www.cs.cmu.edu/~elaw/papers/pca.pdf>.
--
-- Diherdrals require a special metric, see <https://onlinelibrary.wiley.com/doi/full/10.1002/prot.20310)>.
module ConClusion.Chemistry.Topology
  ( Molecule,
    Trajectory,
    xyz,
    trajectory,
    B (..),
    A (..),
    D (..),
    Feature (..),
    getFeatures,
  )
where

import ConClusion.Array.Conversion
import ConClusion.Array.Util hiding (angle)
import qualified ConClusion.Array.Util as ArrayUtil
import Data.Attoparsec.Text hiding (D)
import Data.Foldable
import Data.Massiv.Array as Massiv hiding (B, D, forM)
import qualified Numeric.LinearAlgebra as LA
import RIO
import qualified RIO.Seq as Seq
import qualified RIO.Text as Text
import qualified RIO.Vector.Boxed as VectorB

-- | A Molecule in cartesian coordinates.
data Molecule = Molecule
  { -- | The energy of the molecule.
    Molecule -> Double
energy :: !Double,
    -- | Chemical symbols or names of the atoms. \(N\) vector.
    Molecule -> Vector Text
atoms :: !(VectorB.Vector Text),
    -- | Cartesian coordinates. Atoms as rows, xyz as columns. \(N \times 3\) matrix.
    Molecule -> Matrix S Double
coordinates :: !(Massiv.Matrix S Double)
  }

type Trajectory = Seq Molecule

-- | Parser for molecules in Molden XYZ format.
xyz :: Parser Molecule
xyz :: Parser Molecule
xyz = do
  -- Starting line with number of atoms.
  Int
nAtoms <- do
    ()
_ <- Parser Text Char -> Parser Text ()
forall (f :: * -> *) a. Alternative f => f a -> f ()
skipMany (Parser Text Char -> Parser Text ())
-> (Char -> Parser Text Char) -> Char -> Parser Text ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Parser Text Char
char (Char -> Parser Text ()) -> Char -> Parser Text ()
forall a b. (a -> b) -> a -> b
$ Char
' '
    Int
n <- Parser Text Int
forall a. Integral a => Parser a
decimal
    ()
_ <- Parser Text Char -> Parser Text ()
forall (f :: * -> *) a. Alternative f => f a -> f ()
skipMany (Parser Text Char -> Parser Text ())
-> (Char -> Parser Text Char) -> Char -> Parser Text ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Parser Text Char
char (Char -> Parser Text ()) -> Char -> Parser Text ()
forall a b. (a -> b) -> a -> b
$ Char
' '
    Parser Text ()
endOfLine
    Int -> Parser Text Int
forall (m :: * -> *) a. Monad m => a -> m a
return Int
n

  -- Comment line. Must contain the energy.
  Double
energy <- do
    ()
_ <- Parser Text Char -> Parser Text ()
forall (f :: * -> *) a. Alternative f => f a -> f ()
skipMany (Parser Text Char -> Parser Text ())
-> (Char -> Parser Text Char) -> Char -> Parser Text ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Parser Text Char
char (Char -> Parser Text ()) -> Char -> Parser Text ()
forall a b. (a -> b) -> a -> b
$ Char
' '
    Double
e <- Parser Text Double
double
    ()
_ <- Parser Text Char -> Parser Text ()
forall (f :: * -> *) a. Alternative f => f a -> f ()
skipMany (Parser Text Char -> Parser Text ())
-> (Char -> Parser Text Char) -> Char -> Parser Text ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Parser Text Char
char (Char -> Parser Text ()) -> Char -> Parser Text ()
forall a b. (a -> b) -> a -> b
$ Char
' '
    Parser Text ()
endOfLine
    Double -> Parser Text Double
forall (m :: * -> *) a. Monad m => a -> m a
return Double
e

  -- Multiple lines of atoms.
  [(Text, Array D Ix2 Double)]
atoms <- Int
-> Parser Text (Text, Array D Ix2 Double)
-> Parser Text [(Text, Array D Ix2 Double)]
forall (m :: * -> *) a. Monad m => Int -> m a -> m [a]
count Int
nAtoms (Parser Text (Text, Array D Ix2 Double)
 -> Parser Text [(Text, Array D Ix2 Double)])
-> Parser Text (Text, Array D Ix2 Double)
-> Parser Text [(Text, Array D Ix2 Double)]
forall a b. (a -> b) -> a -> b
$ do
    ()
_ <- Parser Text Char -> Parser Text ()
forall (f :: * -> *) a. Alternative f => f a -> f ()
skipMany (Parser Text Char -> Parser Text ())
-> (Char -> Parser Text Char) -> Char -> Parser Text ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Parser Text Char
char (Char -> Parser Text ()) -> Char -> Parser Text ()
forall a b. (a -> b) -> a -> b
$ Char
' '
    [Char]
name <- Parser Text Char -> Parser Text [Char]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many1 (Parser Text Char -> Parser Text [Char])
-> Parser Text Char -> Parser Text [Char]
forall a b. (a -> b) -> a -> b
$ Parser Text Char
letter Parser Text Char -> Parser Text Char -> Parser Text Char
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Text Char
digit
    ()
_ <- Parser Text Char -> Parser Text ()
forall (f :: * -> *) a. Alternative f => f a -> f ()
skipMany1 (Parser Text Char -> Parser Text ())
-> (Char -> Parser Text Char) -> Char -> Parser Text ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Parser Text Char
char (Char -> Parser Text ()) -> Char -> Parser Text ()
forall a b. (a -> b) -> a -> b
$ Char
' '
    Double
x <- Parser Text Double
double
    ()
_ <- Parser Text Char -> Parser Text ()
forall (f :: * -> *) a. Alternative f => f a -> f ()
skipMany1 (Parser Text Char -> Parser Text ())
-> (Char -> Parser Text Char) -> Char -> Parser Text ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Parser Text Char
char (Char -> Parser Text ()) -> Char -> Parser Text ()
forall a b. (a -> b) -> a -> b
$ Char
' '
    Double
y <- Parser Text Double
double
    ()
_ <- Parser Text Char -> Parser Text ()
forall (f :: * -> *) a. Alternative f => f a -> f ()
skipMany1 (Parser Text Char -> Parser Text ())
-> (Char -> Parser Text Char) -> Char -> Parser Text ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Parser Text Char
char (Char -> Parser Text ()) -> Char -> Parser Text ()
forall a b. (a -> b) -> a -> b
$ Char
' '
    Double
z <- Parser Text Double
double
    ()
_ <- Parser Text Char -> Parser Text ()
forall (f :: * -> *) a. Alternative f => f a -> f ()
skipMany (Parser Text Char -> Parser Text ())
-> (Char -> Parser Text Char) -> Char -> Parser Text ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Parser Text Char
char (Char -> Parser Text ()) -> Char -> Parser Text ()
forall a b. (a -> b) -> a -> b
$ Char
' '
    Parser Text ()
endOfLine
    let row :: Array D Ix2 Double
row = Sz1
-> (Double -> Int -> Double)
-> Array S (Lower Ix2) Double
-> Array D Ix2 Double
forall r ix a b.
(Index ix, Index (Lower ix), Manifest r a) =>
Sz1 -> (a -> Int -> b) -> Array r (Lower ix) a -> Array D ix b
Massiv.expandOuter (Int -> Sz1
forall ix. Index ix => ix -> Sz ix
Sz Int
1) Double -> Int -> Double
forall a b. a -> b -> a
const (Array S Int Double -> Array D Ix2 Double)
-> ([Double] -> Array S Int Double)
-> [Double]
-> Array D Ix2 Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Comp -> [Double] -> Array S Int Double
forall r e. Manifest r e => Comp -> [e] -> Vector r e
Massiv.fromList @S Comp
Seq ([Double] -> Array D Ix2 Double) -> [Double] -> Array D Ix2 Double
forall a b. (a -> b) -> a -> b
$ [Double
x, Double
y, Double
z]
    (Text, Array D Ix2 Double)
-> Parser Text (Text, Array D Ix2 Double)
forall (m :: * -> *) a. Monad m => a -> m a
return ([Char] -> Text
Text.pack [Char]
name, Array D Ix2 Double
row)

  Molecule -> Parser Molecule
forall (m :: * -> *) a. Monad m => a -> m a
return
    Molecule :: Double -> Vector Text -> Matrix S Double -> Molecule
Molecule
      { $sel:energy:Molecule :: Double
energy = Double
energy,
        $sel:atoms:Molecule :: Vector Text
atoms = Int -> [Text] -> Vector Text
forall a. Int -> [a] -> Vector a
VectorB.fromListN Int
nAtoms ([Text] -> Vector Text)
-> ([(Text, Array D Ix2 Double)] -> [Text])
-> [(Text, Array D Ix2 Double)]
-> Vector Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((Text, Array D Ix2 Double) -> Text)
-> [(Text, Array D Ix2 Double)] -> [Text]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Text, Array D Ix2 Double) -> Text
forall a b. (a, b) -> a
fst ([(Text, Array D Ix2 Double)] -> Vector Text)
-> [(Text, Array D Ix2 Double)] -> Vector Text
forall a b. (a -> b) -> a -> b
$ [(Text, Array D Ix2 Double)]
atoms,
        $sel:coordinates:Molecule :: Matrix S Double
coordinates = Array DL Ix2 Double -> Matrix S Double
forall r ix e r'.
(Manifest r e, Load r' ix e) =>
Array r' ix e -> Array r ix e
Massiv.compute (Array DL Ix2 Double -> Matrix S Double)
-> ([(Text, Array D Ix2 Double)] -> Array DL Ix2 Double)
-> [(Text, Array D Ix2 Double)]
-> Matrix S Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Dim -> [Array D Ix2 Double] -> Array DL Ix2 Double
forall (f :: * -> *) r ix e.
(HasCallStack, Foldable f, Index ix, Source r e) =>
Dim -> f (Array r ix e) -> Array DL ix e
Massiv.concat' (Int -> Dim
Dim Int
2) ([Array D Ix2 Double] -> Array DL Ix2 Double)
-> ([(Text, Array D Ix2 Double)] -> [Array D Ix2 Double])
-> [(Text, Array D Ix2 Double)]
-> Array DL Ix2 Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((Text, Array D Ix2 Double) -> Array D Ix2 Double)
-> [(Text, Array D Ix2 Double)] -> [Array D Ix2 Double]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Text, Array D Ix2 Double) -> Array D Ix2 Double
forall a b. (a, b) -> b
snd ([(Text, Array D Ix2 Double)] -> Matrix S Double)
-> [(Text, Array D Ix2 Double)] -> Matrix S Double
forall a b. (a -> b) -> a -> b
$ [(Text, Array D Ix2 Double)]
atoms
      }

-- | Parser for trajectories in XYZ format as produced by CREST.
{-# SCC trajectory #-}
trajectory :: Parser Trajectory
trajectory :: Parser Trajectory
trajectory = do
  Trajectory
mols <- [Molecule] -> Trajectory
forall a. [a] -> Seq a
Seq.fromList ([Molecule] -> Trajectory)
-> Parser Text [Molecule] -> Parser Trajectory
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Molecule -> Parser Text [Molecule]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many1 Parser Molecule
xyz
  Parser Text ()
skipSpace
  Parser Text ()
forall t. Chunk t => Parser t ()
endOfInput
  Trajectory -> Parser Trajectory
forall (m :: * -> *) a. Monad m => a -> m a
return Trajectory
mols

-- | Selection of a bond between two atoms.
data B = B Int Int

-- | Selection of an angle between three atoms.
data A = A Int Int Int

-- | Selection of a dihedral angle between four atoms. Rotation around the central two.
data D = D Int Int Int Int

-- | Selections
data Feature
  = Energy
  | Bond B
  | Angle A
  | Dihedral D

-- | Calculate a bond distance.
bond :: MonadThrow m => B -> Molecule -> m Double
bond :: B -> Molecule -> m Double
bond (B Int
a Int
b) Molecule {Matrix S Double
coordinates :: Matrix S Double
$sel:coordinates:Molecule :: Molecule -> Matrix S Double
coordinates}
  | Int
a Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
b = IndexException -> m Double
forall (m :: * -> *) e a. (MonadThrow m, Exception e) => e -> m a
throwM (IndexException -> m Double)
-> ([Char] -> IndexException) -> [Char] -> m Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Char] -> IndexException
IndexException ([Char] -> m Double) -> [Char] -> m Double
forall a b. (a -> b) -> a -> b
$ [Char]
"selected atoms are identical"
  | Bool
otherwise = do
    Array U Int Double
coordA <- forall ix e r'.
(Manifest U e, Load r' ix e) =>
Array r' ix e -> Array U ix e
forall r ix e r'.
(Manifest r e, Load r' ix e) =>
Array r' ix e -> Array r ix e
compute @U (Array S Int Double -> Array U Int Double)
-> m (Array S Int Double) -> m (Array U Int Double)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Matrix S Double
coordinates Matrix S Double -> Int -> m (Array S (Lower Ix2) Double)
forall r ix e (m :: * -> *).
(MonadThrow m, Index ix, Index (Lower ix), Source r e) =>
Array r ix e -> Int -> m (Array r (Lower ix) e)
!?> Int
a)
    Array U Int Double
coordB <- forall ix e r'.
(Manifest U e, Load r' ix e) =>
Array r' ix e -> Array U ix e
forall r ix e r'.
(Manifest r e, Load r' ix e) =>
Array r' ix e -> Array r ix e
compute @U (Array S Int Double -> Array U Int Double)
-> m (Array S Int Double) -> m (Array U Int Double)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Matrix S Double
coordinates Matrix S Double -> Int -> m (Array S (Lower Ix2) Double)
forall r ix e (m :: * -> *).
(MonadThrow m, Index ix, Index (Lower ix), Source r e) =>
Array r ix e -> Int -> m (Array r (Lower ix) e)
!?> Int
b)
    Array U Int Double
vecAB <- Array U Int Double
coordA Array U Int Double -> Array U Int Double -> m (Array U Int Double)
forall ix r e (m :: * -> *).
(Index ix, Numeric r e, MonadThrow m) =>
Array r ix e -> Array r ix e -> m (Array r ix e)
.-. Array U Int Double
coordB
    Double -> m Double
forall (m :: * -> *) a. Monad m => a -> m a
return (Double -> m Double)
-> (Array U Int Double -> Double) -> Array U Int Double -> m Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Double -> Double
forall a. Floating a => a -> a
sqrt (Double -> Double)
-> (Array U Int Double -> Double) -> Array U Int Double -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Array D Int Double -> Double
forall ix r e. (Index ix, Source r e, Num e) => Array r ix e -> e
Massiv.sum (Array D Int Double -> Double)
-> (Array U Int Double -> Array D Int Double)
-> Array U Int Double
-> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Double -> Double) -> Array U Int Double -> Array D Int Double
forall ix r e' e.
(Index ix, Source r e') =>
(e' -> e) -> Array r ix e' -> Array D ix e
Massiv.map (Double -> Int -> Double
forall a b. (Num a, Integral b) => a -> b -> a
^ (Int
2 :: Int)) (Array U Int Double -> m Double) -> Array U Int Double -> m Double
forall a b. (a -> b) -> a -> b
$ Array U Int Double
vecAB

-- | Calculates the sinus of an angle defined by three atoms.
angle :: MonadThrow m => A -> Molecule -> m Double
angle :: A -> Molecule -> m Double
angle (A Int
a Int
b Int
c) Molecule {Matrix S Double
coordinates :: Matrix S Double
$sel:coordinates:Molecule :: Molecule -> Matrix S Double
coordinates}
  | Int
a Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
b Bool -> Bool -> Bool
|| Int
b Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
c Bool -> Bool -> Bool
|| Int
a Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
c = IndexException -> m Double
forall (m :: * -> *) e a. (MonadThrow m, Exception e) => e -> m a
throwM (IndexException -> m Double)
-> ([Char] -> IndexException) -> [Char] -> m Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Char] -> IndexException
IndexException ([Char] -> m Double) -> [Char] -> m Double
forall a b. (a -> b) -> a -> b
$ [Char]
"selected atoms are identical"
  | Bool
otherwise = do
    Array U Int Double
coordA <- forall ix e r'.
(Manifest U e, Load r' ix e) =>
Array r' ix e -> Array U ix e
forall r ix e r'.
(Manifest r e, Load r' ix e) =>
Array r' ix e -> Array r ix e
compute @U (Array S Int Double -> Array U Int Double)
-> m (Array S Int Double) -> m (Array U Int Double)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Matrix S Double
coordinates Matrix S Double -> Int -> m (Array S (Lower Ix2) Double)
forall r ix e (m :: * -> *).
(MonadThrow m, Index ix, Index (Lower ix), Source r e) =>
Array r ix e -> Int -> m (Array r (Lower ix) e)
!?> Int
a)
    Array U Int Double
coordB <- forall ix e r'.
(Manifest U e, Load r' ix e) =>
Array r' ix e -> Array U ix e
forall r ix e r'.
(Manifest r e, Load r' ix e) =>
Array r' ix e -> Array r ix e
compute @U (Array S Int Double -> Array U Int Double)
-> m (Array S Int Double) -> m (Array U Int Double)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Matrix S Double
coordinates Matrix S Double -> Int -> m (Array S (Lower Ix2) Double)
forall r ix e (m :: * -> *).
(MonadThrow m, Index ix, Index (Lower ix), Source r e) =>
Array r ix e -> Int -> m (Array r (Lower ix) e)
!?> Int
b)
    Array U Int Double
coordC <- forall ix e r'.
(Manifest U e, Load r' ix e) =>
Array r' ix e -> Array U ix e
forall r ix e r'.
(Manifest r e, Load r' ix e) =>
Array r' ix e -> Array r ix e
compute @U (Array S Int Double -> Array U Int Double)
-> m (Array S Int Double) -> m (Array U Int Double)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Matrix S Double
coordinates Matrix S Double -> Int -> m (Array S (Lower Ix2) Double)
forall r ix e (m :: * -> *).
(MonadThrow m, Index ix, Index (Lower ix), Source r e) =>
Array r ix e -> Int -> m (Array r (Lower ix) e)
!?> Int
c)
    Array U Int Double
vecAB <- Array U Int Double
coordA Array U Int Double -> Array U Int Double -> m (Array U Int Double)
forall ix r e (m :: * -> *).
(Index ix, Numeric r e, MonadThrow m) =>
Array r ix e -> Array r ix e -> m (Array r ix e)
.-. Array U Int Double
coordB
    Array U Int Double
vecCB <- Array U Int Double
coordC Array U Int Double -> Array U Int Double -> m (Array U Int Double)
forall ix r e (m :: * -> *).
(Index ix, Numeric r e, MonadThrow m) =>
Array r ix e -> Array r ix e -> m (Array r ix e)
.-. Array U Int Double
coordB
    Double -> m Double
forall (m :: * -> *) a. Monad m => a -> m a
return (Double -> m Double) -> Double -> m Double
forall a b. (a -> b) -> a -> b
$ Array U Int Double -> Array U Int Double -> Double
forall r e.
(Numeric r e, Source r e, Floating e) =>
Vector r e -> Vector r e -> e
ArrayUtil.angle Array U Int Double
vecAB Array U Int Double
vecCB

-- | Calculates the dihedral angle defined by four atoms. Respects rotation direction. Obtains the
-- result in radian.
dihedral' :: MonadThrow m => D -> Molecule -> m Double
dihedral' :: D -> Molecule -> m Double
dihedral' (D Int
a Int
b Int
c Int
d) Molecule {Matrix S Double
coordinates :: Matrix S Double
$sel:coordinates:Molecule :: Molecule -> Matrix S Double
coordinates}
  | Int
a Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
b Bool -> Bool -> Bool
|| Int
a Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
c Bool -> Bool -> Bool
|| Int
a Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
d Bool -> Bool -> Bool
|| Int
b Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
c Bool -> Bool -> Bool
|| Int
b Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
d Bool -> Bool -> Bool
|| Int
c Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
d = IndexException -> m Double
forall (m :: * -> *) e a. (MonadThrow m, Exception e) => e -> m a
throwM (IndexException -> m Double)
-> ([Char] -> IndexException) -> [Char] -> m Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Char] -> IndexException
IndexException ([Char] -> m Double) -> [Char] -> m Double
forall a b. (a -> b) -> a -> b
$ [Char]
"selected atoms are indentical"
  | Bool
otherwise = do
    Array U Int Double
coordA <- forall ix e r'.
(Manifest U e, Load r' ix e) =>
Array r' ix e -> Array U ix e
forall r ix e r'.
(Manifest r e, Load r' ix e) =>
Array r' ix e -> Array r ix e
compute @U (Array S Int Double -> Array U Int Double)
-> m (Array S Int Double) -> m (Array U Int Double)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Matrix S Double
coordinates Matrix S Double -> Int -> m (Array S (Lower Ix2) Double)
forall r ix e (m :: * -> *).
(MonadThrow m, Index ix, Index (Lower ix), Source r e) =>
Array r ix e -> Int -> m (Array r (Lower ix) e)
!?> Int
a)
    Array U Int Double
coordB <- forall ix e r'.
(Manifest U e, Load r' ix e) =>
Array r' ix e -> Array U ix e
forall r ix e r'.
(Manifest r e, Load r' ix e) =>
Array r' ix e -> Array r ix e
compute @U (Array S Int Double -> Array U Int Double)
-> m (Array S Int Double) -> m (Array U Int Double)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Matrix S Double
coordinates Matrix S Double -> Int -> m (Array S (Lower Ix2) Double)
forall r ix e (m :: * -> *).
(MonadThrow m, Index ix, Index (Lower ix), Source r e) =>
Array r ix e -> Int -> m (Array r (Lower ix) e)
!?> Int
b)
    Array U Int Double
coordC <- forall ix e r'.
(Manifest U e, Load r' ix e) =>
Array r' ix e -> Array U ix e
forall r ix e r'.
(Manifest r e, Load r' ix e) =>
Array r' ix e -> Array r ix e
compute @U (Array S Int Double -> Array U Int Double)
-> m (Array S Int Double) -> m (Array U Int Double)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Matrix S Double
coordinates Matrix S Double -> Int -> m (Array S (Lower Ix2) Double)
forall r ix e (m :: * -> *).
(MonadThrow m, Index ix, Index (Lower ix), Source r e) =>
Array r ix e -> Int -> m (Array r (Lower ix) e)
!?> Int
c)
    Array U Int Double
coordD <- forall ix e r'.
(Manifest U e, Load r' ix e) =>
Array r' ix e -> Array U ix e
forall r ix e r'.
(Manifest r e, Load r' ix e) =>
Array r' ix e -> Array r ix e
compute @U (Array S Int Double -> Array U Int Double)
-> m (Array S Int Double) -> m (Array U Int Double)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Matrix S Double
coordinates Matrix S Double -> Int -> m (Array S (Lower Ix2) Double)
forall r ix e (m :: * -> *).
(MonadThrow m, Index ix, Index (Lower ix), Source r e) =>
Array r ix e -> Int -> m (Array r (Lower ix) e)
!?> Int
d)
    Array U Int Double
vecAB <- Array U Int Double
coordA Array U Int Double -> Array U Int Double -> m (Array U Int Double)
forall ix r e (m :: * -> *).
(Index ix, Numeric r e, MonadThrow m) =>
Array r ix e -> Array r ix e -> m (Array r ix e)
.-. Array U Int Double
coordB
    Array U Int Double
vecBC <- Array U Int Double
coordB Array U Int Double -> Array U Int Double -> m (Array U Int Double)
forall ix r e (m :: * -> *).
(Index ix, Numeric r e, MonadThrow m) =>
Array r ix e -> Array r ix e -> m (Array r ix e)
.-. Array U Int Double
coordC
    Array U Int Double
vecCD <- Array U Int Double
coordC Array U Int Double -> Array U Int Double -> m (Array U Int Double)
forall ix r e (m :: * -> *).
(Index ix, Numeric r e, MonadThrow m) =>
Array r ix e -> Array r ix e -> m (Array r ix e)
.-. Array U Int Double
coordD
    let planeABC :: Array U Int Double
planeABC = Vector Double -> Array U Int Double
forall e r.
(Element e, Mutable r e, Load r Int e) =>
Vector e -> Vector r e
vecH2M (Vector Double -> Array U Int Double)
-> Vector Double -> Array U Int Double
forall a b. (a -> b) -> a -> b
$ Vector Double -> Vector Double -> Vector Double
forall t. Product t => Vector t -> Vector t -> Vector t
LA.cross (Array U Int Double -> Vector Double
forall r e.
(Manifest r e, Load r Int e, Element e) =>
Vector r e -> Vector e
vecM2H Array U Int Double
vecAB) (Array U Int Double -> Vector Double
forall r e.
(Manifest r e, Load r Int e, Element e) =>
Vector r e -> Vector e
vecM2H Array U Int Double
vecBC) :: Massiv.Vector U Double
        planeBCD :: Array U Int Double
planeBCD = Vector Double -> Array U Int Double
forall e r.
(Element e, Mutable r e, Load r Int e) =>
Vector e -> Vector r e
vecH2M (Vector Double -> Array U Int Double)
-> Vector Double -> Array U Int Double
forall a b. (a -> b) -> a -> b
$ Vector Double -> Vector Double -> Vector Double
forall t. Product t => Vector t -> Vector t -> Vector t
LA.cross (Array U Int Double -> Vector Double
forall r e.
(Manifest r e, Load r Int e, Element e) =>
Vector r e -> Vector e
vecM2H Array U Int Double
vecBC) (Array U Int Double -> Vector Double
forall r e.
(Manifest r e, Load r Int e, Element e) =>
Vector r e -> Vector e
vecM2H Array U Int Double
vecCD) :: Massiv.Vector U Double
        normVecRot :: Vector Double
normVecRot = Vector Double -> Vector Double -> Vector Double
forall t. Product t => Vector t -> Vector t -> Vector t
LA.cross (Array U Int Double -> Vector Double
forall r e.
(Manifest r e, Load r Int e, Element e) =>
Vector r e -> Vector e
vecM2H Array U Int Double
vecCD) (Array U Int Double -> Vector Double
forall r e.
(Manifest r e, Load r Int e, Element e) =>
Vector r e -> Vector e
vecM2H Array U Int Double
vecBC) :: LA.Vector Double
        rotDir :: Double
rotDir =
          if Vector Double -> Array U Int Double
forall e r.
(Element e, Mutable r e, Load r Int e) =>
Vector e -> Vector r e
vecH2M Vector Double
normVecRot Array U Int Double -> Array U Int Double -> Double
forall r e.
(Numeric r e, Source r e) =>
Vector r e -> Vector r e -> e
!.! Array U Int Double
vecAB Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
< Double
0
            then -Double
1
            else Double
1
    Double -> m Double
forall (m :: * -> *) a. Monad m => a -> m a
return (Double -> m Double) -> Double -> m Double
forall a b. (a -> b) -> a -> b
$ Double
rotDir Double -> Double -> Double
forall a. Num a => a -> a -> a
* Array U Int Double -> Array U Int Double -> Double
forall r e.
(Numeric r e, Source r e, Floating e) =>
Vector r e -> Vector r e -> e
ArrayUtil.angle Array U Int Double
planeABC Array U Int Double
planeBCD

-- | Calculates a metric value of the dihedral angle defined by four atoms. This must create 2
-- values in the feature matrix, instead of one.
-- See <https://onlinelibrary.wiley.com/doi/full/10.1002/prot.20310)>
dihedral :: MonadThrow m => D -> Molecule -> m (Double, Double)
dihedral :: D -> Molecule -> m (Double, Double)
dihedral D
d Molecule
mol = do
  Double
dihedRad <- D -> Molecule -> m Double
forall (m :: * -> *). MonadThrow m => D -> Molecule -> m Double
dihedral' D
d Molecule
mol
  (Double, Double) -> m (Double, Double)
forall (m :: * -> *) a. Monad m => a -> m a
return (Double -> Double
forall a. Floating a => a -> a
sin Double
dihedRad, Double -> Double
forall a. Floating a => a -> a
cos Double
dihedRad)

-- | Get all selected features from a molecule.
getFeature ::
  ( Traversable t,
    MonadThrow m
  ) =>
  -- | Selection of multiple features.
  t Feature ->
  -- | A Molecule where to apply to.
  Molecule ->
  m (Massiv.Vector Massiv.DL Double)
getFeature :: t Feature -> Molecule -> m (Vector DL Double)
getFeature t Feature
sel Molecule
mol = do
  t (Either Double (Double, Double))
features <- t Feature
-> (Feature -> m (Either Double (Double, Double)))
-> m (t (Either Double (Double, Double)))
forall (t :: * -> *) (f :: * -> *) a b.
(Traversable t, Applicative f) =>
t a -> (a -> f b) -> f (t b)
for t Feature
sel ((Feature -> m (Either Double (Double, Double)))
 -> m (t (Either Double (Double, Double))))
-> (Feature -> m (Either Double (Double, Double)))
-> m (t (Either Double (Double, Double)))
forall a b. (a -> b) -> a -> b
$ \Feature
s -> case Feature
s of
    Feature
Energy -> Either Double (Double, Double)
-> m (Either Double (Double, Double))
forall (m :: * -> *) a. Monad m => a -> m a
return (Either Double (Double, Double)
 -> m (Either Double (Double, Double)))
-> (Molecule -> Either Double (Double, Double))
-> Molecule
-> m (Either Double (Double, Double))
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Double -> Either Double (Double, Double)
forall a b. a -> Either a b
Left (Double -> Either Double (Double, Double))
-> (Molecule -> Double)
-> Molecule
-> Either Double (Double, Double)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Molecule -> Double
energy (Molecule -> m (Either Double (Double, Double)))
-> Molecule -> m (Either Double (Double, Double))
forall a b. (a -> b) -> a -> b
$ Molecule
mol
    Bond B
b -> Double -> Either Double (Double, Double)
forall a b. a -> Either a b
Left (Double -> Either Double (Double, Double))
-> m Double -> m (Either Double (Double, Double))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> B -> Molecule -> m Double
forall (m :: * -> *). MonadThrow m => B -> Molecule -> m Double
bond B
b Molecule
mol
    Angle A
a -> Double -> Either Double (Double, Double)
forall a b. a -> Either a b
Left (Double -> Either Double (Double, Double))
-> m Double -> m (Either Double (Double, Double))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> A -> Molecule -> m Double
forall (m :: * -> *). MonadThrow m => A -> Molecule -> m Double
angle A
a Molecule
mol
    Dihedral D
d -> (Double, Double) -> Either Double (Double, Double)
forall a b. b -> Either a b
Right ((Double, Double) -> Either Double (Double, Double))
-> m (Double, Double) -> m (Either Double (Double, Double))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> D -> Molecule -> m (Double, Double)
forall (m :: * -> *).
MonadThrow m =>
D -> Molecule -> m (Double, Double)
dihedral D
d Molecule
mol

  let featureVec :: Vector DL Double
featureVec =
        (Vector DL Double
 -> Either Double (Double, Double) -> Vector DL Double)
-> Vector DL Double
-> t (Either Double (Double, Double))
-> Vector DL Double
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl
          ( \Vector DL Double
acc Either Double (Double, Double)
val -> case Either Double (Double, Double)
val of
              Left Double
s -> Vector DL Double
acc Vector DL Double -> Double -> Vector DL Double
forall r e.
(Size r, Load r Int e) =>
Vector r e -> e -> Vector DL e
`Massiv.snoc` Double
s
              Right (Double
a, Double
b) -> Vector DL Double
acc Vector DL Double -> Double -> Vector DL Double
forall r e.
(Size r, Load r Int e) =>
Vector r e -> e -> Vector DL e
`Massiv.snoc` Double
a Vector DL Double -> Double -> Vector DL Double
forall r e.
(Size r, Load r Int e) =>
Vector r e -> e -> Vector DL e
`Massiv.snoc` Double
b
          )
          Vector DL Double
forall a. Monoid a => a
mempty
          t (Either Double (Double, Double))
features

  Vector DL Double -> m (Vector DL Double)
forall (m :: * -> *) a. Monad m => a -> m a
return Vector DL Double
featureVec

-- | Obtains the feature matrix \(\mathbf{X}\) for a principal component analysis. Given \(m\)
-- features to analyse in \(n\) measurements, \(\mathbf{X}\) will be a \(m \times n\) matrix.
{-# SCC getFeatures #-}
getFeatures ::
  ( MonadThrow m,
    Traversable f
  ) =>
  f Feature ->
  Trajectory ->
  m (Massiv.Matrix DL Double)
getFeatures :: f Feature -> Trajectory -> m (Array DL Ix2 Double)
getFeatures f Feature
sel Trajectory
trj = (Molecule -> m (Array D Ix2 Double))
-> Trajectory -> m (Seq (Array D Ix2 Double))
forall (t :: * -> *) (f :: * -> *) a b.
(Traversable t, Applicative f) =>
(a -> f b) -> t a -> f (t b)
traverse Molecule -> m (Array D Ix2 Double)
toCols Trajectory
trj m (Seq (Array D Ix2 Double))
-> (Seq (Array D Ix2 Double) -> m (Array DL Ix2 Double))
-> m (Array DL Ix2 Double)
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Dim -> Seq (Array D Ix2 Double) -> m (Array DL Ix2 Double)
forall r ix e (f :: * -> *) (m :: * -> *).
(MonadThrow m, Foldable f, Index ix, Source r e) =>
Dim -> f (Array r ix e) -> m (Array DL ix e)
concatM (Int -> Dim
Dim Int
1)
  where
    toCols :: Molecule -> m (Array D Ix2 Double)
toCols Molecule
v = Sz1
-> (Double -> Int -> Double)
-> Array U (Lower Ix2) Double
-> Array D Ix2 Double
forall r ix a b.
(Index ix, Index (Lower ix), Manifest r a) =>
Sz1 -> (a -> Int -> b) -> Array r (Lower ix) a -> Array D ix b
expandInner @U @Ix2 (Int -> Sz1
forall ix. Index ix => ix -> Sz ix
Sz Int
1) Double -> Int -> Double
forall a b. a -> b -> a
const (Array U Int Double -> Array D Ix2 Double)
-> (Vector DL Double -> Array U Int Double)
-> Vector DL Double
-> Array D Ix2 Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall ix e r'.
(Manifest U e, Load r' ix e) =>
Array r' ix e -> Array U ix e
forall r ix e r'.
(Manifest r e, Load r' ix e) =>
Array r' ix e -> Array r ix e
compute @U (Vector DL Double -> Array D Ix2 Double)
-> m (Vector DL Double) -> m (Array D Ix2 Double)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> f Feature -> Molecule -> m (Vector DL Double)
forall (t :: * -> *) (m :: * -> *).
(Traversable t, MonadThrow m) =>
t Feature -> Molecule -> m (Vector DL Double)
getFeature f Feature
sel Molecule
v