{-# LANGUAGE OverloadedStrings   #-}
{-# LANGUAGE ScopedTypeVariables #-}

module Bio.PDB.Parser
  ( pdbP )
where


import           Bio.PDB.Type         (Atom (..), Chain, FieldType, Model,
                                       PDB (..), RemarkCode)
import           Control.Applicative  (many, some, (<|>))
import           Control.DeepSeq      ()
import           Data.Attoparsec.Text (Parser, choice, count, endOfInput,
                                       endOfLine, isEndOfLine, satisfy,
                                       skipWhile, space, string, takeWhile,
                                       anyChar)
import qualified Data.List            as L (groupBy)
import           Data.Map.Strict      (Map, fromListWithKey)
import           Data.Maybe           (catMaybes)
import           Data.Function        (on)
import           Data.Text            as T (Text, concat, pack, stripEnd)
import qualified Data.Vector          as V (Vector, concat, fromList, singleton)
import           GHC.Generics         ()
import           Text.Read            (readMaybe)

pdbP :: Parser PDB
pdbP :: Parser PDB
pdbP = do
    [PdbData]
pdbData <- forall (f :: * -> *) a. Alternative f => f a -> f [a]
many (forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice [Parser Text PdbData
titleP, Parser Text PdbData
remarkStringP, Parser Text PdbData
manyModelsP, Parser Text PdbData
otherFieldP]) -- parser order is important
    let
        models :: Vector Model
models         = forall a. [Vector a] -> Vector a
V.concat forall a b. (a -> b) -> a -> b
$ forall a. [Maybe a] -> [a]
catMaybes (PdbData -> Maybe (Vector Model)
getModels forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [PdbData]
pdbData)
        otherFieldsMap :: Map FieldType (Vector Text)
otherFieldsMap = forall k a. Ord k => (a -> a -> a) -> [(k, a)] -> Map k a
fromRevListWith forall a. Semigroup a => a -> a -> a
(<>) forall a b. (a -> b) -> a -> b
$ forall a. [Maybe a] -> [a]
catMaybes (PdbData -> Maybe (FieldType, Vector Text)
getOtherField forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [PdbData]
pdbData)
        title :: Text
title          = [Text] -> Text
T.concat forall a b. (a -> b) -> a -> b
$ forall a. [Maybe a] -> [a]
catMaybes (PdbData -> Maybe Text
getTitle forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [PdbData]
pdbData)
        remarks :: Map RemarkCode (Vector Text)
remarks        = forall k a. Ord k => (a -> a -> a) -> [(k, a)] -> Map k a
fromRevListWith forall a. Semigroup a => a -> a -> a
(<>) forall a b. (a -> b) -> a -> b
$ forall a. [Maybe a] -> [a]
catMaybes (PdbData -> Maybe (RemarkCode, Vector Text)
getRemarks forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [PdbData]
pdbData)

    forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Text
-> Vector Model
-> Map RemarkCode (Vector Text)
-> Map FieldType (Vector Text)
-> PDB
PDB Text
title Vector Model
models Map RemarkCode (Vector Text)
remarks Map FieldType (Vector Text)
otherFieldsMap
  where
    getModels :: PdbData -> Maybe (V.Vector Model)
    getModels :: PdbData -> Maybe (Vector Model)
getModels PdbData
item = case PdbData
item of
      ModelData Vector Model
x -> forall a. a -> Maybe a
Just Vector Model
x
      PdbData
_           -> forall a. Maybe a
Nothing
    getOtherField :: PdbData -> Maybe (FieldType, V.Vector Text)
    getOtherField :: PdbData -> Maybe (FieldType, Vector Text)
getOtherField PdbData
item = case PdbData
item of
      OtherFieldData (Just FieldType
x, Text
y) -> forall a. a -> Maybe a
Just (FieldType
x, forall a. a -> Vector a
V.singleton Text
y)
      PdbData
_                          -> forall a. Maybe a
Nothing
    getTitle :: PdbData -> Maybe Text
    getTitle :: PdbData -> Maybe Text
getTitle PdbData
item = case PdbData
item of
      TitleData Text
x -> forall a. a -> Maybe a
Just Text
x
      PdbData
_           -> forall a. Maybe a
Nothing
    getRemarks :: PdbData -> Maybe (RemarkCode, V.Vector Text)
    getRemarks :: PdbData -> Maybe (RemarkCode, Vector Text)
getRemarks PdbData
item = case PdbData
item of
      RemarkData (RemarkCode
x, Text
y) -> forall a. a -> Maybe a
Just (RemarkCode
x, forall a. a -> Vector a
V.singleton Text
y)
      PdbData
_                 -> forall a. Maybe a
Nothing

data PdbData = ModelData (V.Vector Model)
             | OtherFieldData (Maybe FieldType, Text)
             | RemarkData (RemarkCode, Text)
             | TitleData Text
  deriving (Int -> PdbData -> ShowS
[PdbData] -> ShowS
PdbData -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [PdbData] -> ShowS
$cshowList :: [PdbData] -> ShowS
show :: PdbData -> String
$cshow :: PdbData -> String
showsPrec :: Int -> PdbData -> ShowS
$cshowsPrec :: Int -> PdbData -> ShowS
Show)

notEndLineChar :: Parser Char
notEndLineChar :: Parser Char
notEndLineChar = (Char -> Bool) -> Parser Char
satisfy forall a b. (a -> b) -> a -> b
$ Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
isEndOfLine

takeText :: Parser Text
takeText :: Parser Text
takeText = (Char -> Bool) -> Parser Text
Data.Attoparsec.Text.takeWhile forall a b. (a -> b) -> a -> b
$ Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
isEndOfLine

atomP :: Parser CoordLike
atomP :: Parser CoordLike
atomP = let atom :: Parser Text Atom
atom = Int
-> Text
-> Char
-> Text
-> Char
-> Int
-> Char
-> Float
-> Float
-> Float
-> Float
-> Float
-> Text
-> Text
-> Atom
Atom forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$>
                    (
                      (Text -> Parser Text
string Text
"ATOM " forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*>                                      -- (1 -  5)  ATOM -- we extended atomSerial length to the left for one symbol
                      (forall a. Read a => String -> a
read forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *) a. Monad m => Int -> m a -> m [a]
count Int
6 Parser Char
notEndLineChar))                      -- (6 - 11)  atomSerial
                        forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>                                                   -- or
                      (Text -> Parser Text
string Text
"HETATM" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*>                                     -- (1 -  6)  HETATM
                      (forall a. Read a => String -> a
read forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *) a. Monad m => Int -> m a -> m [a]
count Int
5 Parser Char
notEndLineChar))                      -- (7 - 11)  atomSerial
                    )
                    forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser Char
space
                    forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (String -> Text
T.pack forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *) a. Monad m => Int -> m a -> m [a]
count Int
4 Parser Char
notEndLineChar)                   -- (13 - 16) atomName
                    forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser Char
notEndLineChar                                        -- (17)      atomAltLoc
                    forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (String -> Text
T.pack forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *) a. Monad m => Int -> m a -> m [a]
count Int
3 Parser Char
notEndLineChar) forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser Char
space          -- (18 - 20) atomResName
                    forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser Char
notEndLineChar                                        -- (22)      atomChainID
                    forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (forall a. Read a => String -> a
read forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *) a. Monad m => Int -> m a -> m [a]
count Int
4 Parser Char
notEndLineChar)                     -- (23 - 26) atomResSeq
                    forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser Char
notEndLineChar forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall (m :: * -> *) a. Monad m => Int -> m a -> m [a]
count Int
3 Parser Char
space                       -- (27)      atomICode
                    forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (forall a. Read a => String -> a
read forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *) a. Monad m => Int -> m a -> m [a]
count Int
8 Parser Char
notEndLineChar)                     -- (31 - 38) atomX
                    forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (forall a. Read a => String -> a
read forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *) a. Monad m => Int -> m a -> m [a]
count Int
8 Parser Char
notEndLineChar)                     -- (39 - 46) atomY
                    forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (forall a. Read a => String -> a
read forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *) a. Monad m => Int -> m a -> m [a]
count Int
8 Parser Char
notEndLineChar)                     -- (47 - 54) atomZ
                    forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (forall a. Read a => String -> a
read forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *) a. Monad m => Int -> m a -> m [a]
count Int
6 Parser Char
notEndLineChar)                     -- (55 - 60) atomOccupancy
                    forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (forall a. Read a => String -> a
read forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *) a. Monad m => Int -> m a -> m [a]
count Int
6 Parser Char
notEndLineChar) forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall (m :: * -> *) a. Monad m => Int -> m a -> m [a]
count Int
10 Parser Char
anyChar -- (61 - 66) atomTempFactor
                    forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (String -> Text
T.pack forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *) a. Monad m => Int -> m a -> m [a]
count Int
2 Parser Char
notEndLineChar)                   -- (77 - 78) atomElement
                    forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (String -> Text
T.pack forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *) a. Monad m => Int -> m a -> m [a]
count Int
2 Parser Char
notEndLineChar)                   -- (79 - 80) atomCharge
                    forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* (Parser Text ()
endOfLine forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall t. Chunk t => Parser t ()
endOfInput)
        in Atom -> CoordLike
AtomLine forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text Atom
atom

coordNotAtomP :: Parser CoordLike
coordNotAtomP :: Parser CoordLike
coordNotAtomP = do
    Text
_ <- Text -> Parser Text
string Text
"TER" forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Text -> Parser Text
string Text
"ANISOU" forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Text -> Parser Text
string Text
"CONECT"
    (Char -> Bool) -> Parser Text ()
skipWhile forall a b. (a -> b) -> a -> b
$ Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
isEndOfLine
    Parser Text ()
endOfLine
    forall (m :: * -> *) a. Monad m => a -> m a
return CoordLike
CoordNotAtomLine

data CoordLike = AtomLine Atom | CoordNotAtomLine
  deriving (Int -> CoordLike -> ShowS
[CoordLike] -> ShowS
CoordLike -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [CoordLike] -> ShowS
$cshowList :: [CoordLike] -> ShowS
show :: CoordLike -> String
$cshow :: CoordLike -> String
showsPrec :: Int -> CoordLike -> ShowS
$cshowsPrec :: Int -> CoordLike -> ShowS
Show)

coordLikeP :: Parser [CoordLike]
coordLikeP :: Parser [CoordLike]
coordLikeP = forall (f :: * -> *) a. Alternative f => f a -> f [a]
some (Parser CoordLike
coordNotAtomP forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser CoordLike
atomP)

chainsP :: Parser (V.Vector Chain)
chainsP :: Parser Model
chainsP = do
    [CoordLike]
coordLikeLines <- Parser [CoordLike]
coordLikeP
    let atoms :: [Atom]
atoms  = forall a. [Maybe a] -> [a]
catMaybes (CoordLike -> Maybe Atom
getAtom forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [CoordLike]
coordLikeLines)
        chains :: Model
chains = forall a. [a] -> Vector a
V.fromList (forall a b. (a -> b) -> [a] -> [b]
map forall a. [a] -> Vector a
V.fromList forall a b. (a -> b) -> a -> b
$ [Atom] -> [[Atom]]
groupByChains [Atom]
atoms)
    forall (f :: * -> *) a. Applicative f => a -> f a
pure Model
chains
  where
    getAtom :: CoordLike -> Maybe Atom
    getAtom :: CoordLike -> Maybe Atom
getAtom CoordLike
line = case CoordLike
line of
      AtomLine Atom
x -> forall a. a -> Maybe a
Just Atom
x
      CoordLike
_          -> forall a. Maybe a
Nothing
    groupByChains :: [Atom]-> [[Atom]]
    groupByChains :: [Atom] -> [[Atom]]
groupByChains = forall a. (a -> a -> Bool) -> [a] -> [[a]]
L.groupBy (forall a. Eq a => a -> a -> Bool
(==) forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` Atom -> Char
atomChainID)

modelP :: Parser Model
modelP :: Parser Model
modelP = do
    Text
_ <- Text -> Parser Text
string Text
"MODEL"
    (Char -> Bool) -> Parser Text ()
skipWhile forall a b. (a -> b) -> a -> b
$ Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
isEndOfLine
    Parser Text ()
endOfLine
    Model
chains <- Parser Model
chainsP
    Text -> Parser Text
string Text
"ENDMDL" forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> (Char -> Bool) -> Parser Text ()
skipWhile (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
isEndOfLine)
    Parser Text ()
endOfLine forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall t. Chunk t => Parser t ()
endOfInput
    forall (f :: * -> *) a. Applicative f => a -> f a
pure Model
chains

manyModelsP :: Parser PdbData
manyModelsP :: Parser Text PdbData
manyModelsP = do
    [Model]
models <- (forall a. a -> [a] -> [a]
:[]) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Model
chainsP forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall (f :: * -> *) a. Alternative f => f a -> f [a]
some Parser Model
modelP
    forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Vector Model -> PdbData
ModelData (forall a. [a] -> Vector a
V.fromList [Model]
models)

titleStringP :: Parser Text
titleStringP :: Parser Text
titleStringP = do
    Text
_ <- Text -> Parser Text
string Text
"TITLE "
    Text
titleText <- Parser Text
takeText
    Parser Text ()
endOfLine
    forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a b. (a -> b) -> a -> b
$ Text -> Text
T.stripEnd Text
titleText

titleP :: Parser PdbData
titleP :: Parser Text PdbData
titleP =  do
    Text
titleText <- [Text] -> Text
T.concat forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (f :: * -> *) a. Alternative f => f a -> f [a]
some Parser Text
titleStringP
    forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Text -> PdbData
TitleData Text
titleText

remarkStringP :: Parser PdbData
remarkStringP :: Parser Text PdbData
remarkStringP = do
    Text
_ <- Text -> Parser Text
string Text
"REMARK"
    Char
_ <- Parser Char
space
    (RemarkCode
remarkCode :: RemarkCode) <- forall a. Read a => String -> Maybe a
readMaybe forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *) a. Monad m => Int -> m a -> m [a]
count Int
3 Parser Char
notEndLineChar
    Char
_ <- Parser Char
notEndLineChar
    Text
remarkText <- Parser Text
takeText
    Parser Text ()
endOfLine
    forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a b. (a -> b) -> a -> b
$ (RemarkCode, Text) -> PdbData
RemarkData (RemarkCode
remarkCode, Text -> Text
T.stripEnd Text
remarkText)

fromRevListWith :: Ord k => (a -> a -> a) -> [(k,a)] -> Map k a
fromRevListWith :: forall k a. Ord k => (a -> a -> a) -> [(k, a)] -> Map k a
fromRevListWith a -> a -> a
f [(k, a)]
xs = forall k a. Ord k => (k -> a -> a -> a) -> [(k, a)] -> Map k a
fromListWithKey (\k
_ a
x a
y -> a -> a -> a
f a
y a
x) [(k, a)]
xs

otherFieldP :: Parser PdbData
otherFieldP :: Parser Text PdbData
otherFieldP = do
    (Maybe FieldType
fieldType :: Maybe FieldType) <- forall a. Read a => String -> Maybe a
readMaybe forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *) a. Monad m => Int -> m a -> m [a]
count Int
6 Parser Char
notEndLineChar
    Text
fieldTypeText <- Parser Text
takeText
    Parser Text ()
endOfLine forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall t. Chunk t => Parser t ()
endOfInput
    forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ (Maybe FieldType, Text) -> PdbData
OtherFieldData (Maybe FieldType
fieldType, Text -> Text
T.stripEnd Text
fieldTypeText)