{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Bio.PDB.Parser
( pdbP )
where
import Bio.PDB.Type (Atom (..), Chain, FieldType, Model,
PDB (..), RemarkCode)
import Control.Applicative (many, some, (<|>))
import Control.DeepSeq ()
import Data.Attoparsec.Text (Parser, choice, count, endOfInput,
endOfLine, isEndOfLine, satisfy,
skipWhile, space, string, takeWhile,
anyChar)
import qualified Data.List as L (groupBy)
import Data.Map.Strict (Map, fromListWithKey)
import Data.Maybe (catMaybes)
import Data.Function (on)
import Data.Text as T (Text, concat, pack, stripEnd)
import qualified Data.Vector as V (Vector, concat, fromList, singleton)
import GHC.Generics ()
import Text.Read (readMaybe)
pdbP :: Parser PDB
pdbP :: Parser PDB
pdbP = do
[PdbData]
pdbData <- forall (f :: * -> *) a. Alternative f => f a -> f [a]
many (forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice [Parser Text PdbData
titleP, Parser Text PdbData
remarkStringP, Parser Text PdbData
manyModelsP, Parser Text PdbData
otherFieldP])
let
models :: Vector Model
models = forall a. [Vector a] -> Vector a
V.concat forall a b. (a -> b) -> a -> b
$ forall a. [Maybe a] -> [a]
catMaybes (PdbData -> Maybe (Vector Model)
getModels forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [PdbData]
pdbData)
otherFieldsMap :: Map FieldType (Vector Text)
otherFieldsMap = forall k a. Ord k => (a -> a -> a) -> [(k, a)] -> Map k a
fromRevListWith forall a. Semigroup a => a -> a -> a
(<>) forall a b. (a -> b) -> a -> b
$ forall a. [Maybe a] -> [a]
catMaybes (PdbData -> Maybe (FieldType, Vector Text)
getOtherField forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [PdbData]
pdbData)
title :: Text
title = [Text] -> Text
T.concat forall a b. (a -> b) -> a -> b
$ forall a. [Maybe a] -> [a]
catMaybes (PdbData -> Maybe Text
getTitle forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [PdbData]
pdbData)
remarks :: Map RemarkCode (Vector Text)
remarks = forall k a. Ord k => (a -> a -> a) -> [(k, a)] -> Map k a
fromRevListWith forall a. Semigroup a => a -> a -> a
(<>) forall a b. (a -> b) -> a -> b
$ forall a. [Maybe a] -> [a]
catMaybes (PdbData -> Maybe (RemarkCode, Vector Text)
getRemarks forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [PdbData]
pdbData)
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Text
-> Vector Model
-> Map RemarkCode (Vector Text)
-> Map FieldType (Vector Text)
-> PDB
PDB Text
title Vector Model
models Map RemarkCode (Vector Text)
remarks Map FieldType (Vector Text)
otherFieldsMap
where
getModels :: PdbData -> Maybe (V.Vector Model)
getModels :: PdbData -> Maybe (Vector Model)
getModels PdbData
item = case PdbData
item of
ModelData Vector Model
x -> forall a. a -> Maybe a
Just Vector Model
x
PdbData
_ -> forall a. Maybe a
Nothing
getOtherField :: PdbData -> Maybe (FieldType, V.Vector Text)
getOtherField :: PdbData -> Maybe (FieldType, Vector Text)
getOtherField PdbData
item = case PdbData
item of
OtherFieldData (Just FieldType
x, Text
y) -> forall a. a -> Maybe a
Just (FieldType
x, forall a. a -> Vector a
V.singleton Text
y)
PdbData
_ -> forall a. Maybe a
Nothing
getTitle :: PdbData -> Maybe Text
getTitle :: PdbData -> Maybe Text
getTitle PdbData
item = case PdbData
item of
TitleData Text
x -> forall a. a -> Maybe a
Just Text
x
PdbData
_ -> forall a. Maybe a
Nothing
getRemarks :: PdbData -> Maybe (RemarkCode, V.Vector Text)
getRemarks :: PdbData -> Maybe (RemarkCode, Vector Text)
getRemarks PdbData
item = case PdbData
item of
RemarkData (RemarkCode
x, Text
y) -> forall a. a -> Maybe a
Just (RemarkCode
x, forall a. a -> Vector a
V.singleton Text
y)
PdbData
_ -> forall a. Maybe a
Nothing
data PdbData = ModelData (V.Vector Model)
| OtherFieldData (Maybe FieldType, Text)
| (RemarkCode, Text)
| TitleData Text
deriving (Int -> PdbData -> ShowS
[PdbData] -> ShowS
PdbData -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [PdbData] -> ShowS
$cshowList :: [PdbData] -> ShowS
show :: PdbData -> String
$cshow :: PdbData -> String
showsPrec :: Int -> PdbData -> ShowS
$cshowsPrec :: Int -> PdbData -> ShowS
Show)
notEndLineChar :: Parser Char
notEndLineChar :: Parser Char
notEndLineChar = (Char -> Bool) -> Parser Char
satisfy forall a b. (a -> b) -> a -> b
$ Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
isEndOfLine
takeText :: Parser Text
takeText :: Parser Text
takeText = (Char -> Bool) -> Parser Text
Data.Attoparsec.Text.takeWhile forall a b. (a -> b) -> a -> b
$ Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
isEndOfLine
atomP :: Parser CoordLike
atomP :: Parser CoordLike
atomP = let atom :: Parser Text Atom
atom = Int
-> Text
-> Char
-> Text
-> Char
-> Int
-> Char
-> Float
-> Float
-> Float
-> Float
-> Float
-> Text
-> Text
-> Atom
Atom forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$>
(
(Text -> Parser Text
string Text
"ATOM " forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*>
(forall a. Read a => String -> a
read forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *) a. Monad m => Int -> m a -> m [a]
count Int
6 Parser Char
notEndLineChar))
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
(Text -> Parser Text
string Text
"HETATM" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*>
(forall a. Read a => String -> a
read forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *) a. Monad m => Int -> m a -> m [a]
count Int
5 Parser Char
notEndLineChar))
)
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser Char
space
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (String -> Text
T.pack forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *) a. Monad m => Int -> m a -> m [a]
count Int
4 Parser Char
notEndLineChar)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser Char
notEndLineChar
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (String -> Text
T.pack forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *) a. Monad m => Int -> m a -> m [a]
count Int
3 Parser Char
notEndLineChar) forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser Char
space
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser Char
notEndLineChar
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (forall a. Read a => String -> a
read forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *) a. Monad m => Int -> m a -> m [a]
count Int
4 Parser Char
notEndLineChar)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser Char
notEndLineChar forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall (m :: * -> *) a. Monad m => Int -> m a -> m [a]
count Int
3 Parser Char
space
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (forall a. Read a => String -> a
read forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *) a. Monad m => Int -> m a -> m [a]
count Int
8 Parser Char
notEndLineChar)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (forall a. Read a => String -> a
read forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *) a. Monad m => Int -> m a -> m [a]
count Int
8 Parser Char
notEndLineChar)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (forall a. Read a => String -> a
read forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *) a. Monad m => Int -> m a -> m [a]
count Int
8 Parser Char
notEndLineChar)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (forall a. Read a => String -> a
read forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *) a. Monad m => Int -> m a -> m [a]
count Int
6 Parser Char
notEndLineChar)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (forall a. Read a => String -> a
read forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *) a. Monad m => Int -> m a -> m [a]
count Int
6 Parser Char
notEndLineChar) forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall (m :: * -> *) a. Monad m => Int -> m a -> m [a]
count Int
10 Parser Char
anyChar
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (String -> Text
T.pack forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *) a. Monad m => Int -> m a -> m [a]
count Int
2 Parser Char
notEndLineChar)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (String -> Text
T.pack forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *) a. Monad m => Int -> m a -> m [a]
count Int
2 Parser Char
notEndLineChar)
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* (Parser Text ()
endOfLine forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall t. Chunk t => Parser t ()
endOfInput)
in Atom -> CoordLike
AtomLine forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text Atom
atom
coordNotAtomP :: Parser CoordLike
coordNotAtomP :: Parser CoordLike
coordNotAtomP = do
Text
_ <- Text -> Parser Text
string Text
"TER" forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Text -> Parser Text
string Text
"ANISOU" forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Text -> Parser Text
string Text
"CONECT"
(Char -> Bool) -> Parser Text ()
skipWhile forall a b. (a -> b) -> a -> b
$ Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
isEndOfLine
Parser Text ()
endOfLine
forall (m :: * -> *) a. Monad m => a -> m a
return CoordLike
CoordNotAtomLine
data CoordLike = AtomLine Atom | CoordNotAtomLine
deriving (Int -> CoordLike -> ShowS
[CoordLike] -> ShowS
CoordLike -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [CoordLike] -> ShowS
$cshowList :: [CoordLike] -> ShowS
show :: CoordLike -> String
$cshow :: CoordLike -> String
showsPrec :: Int -> CoordLike -> ShowS
$cshowsPrec :: Int -> CoordLike -> ShowS
Show)
coordLikeP :: Parser [CoordLike]
coordLikeP :: Parser [CoordLike]
coordLikeP = forall (f :: * -> *) a. Alternative f => f a -> f [a]
some (Parser CoordLike
coordNotAtomP forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser CoordLike
atomP)
chainsP :: Parser (V.Vector Chain)
chainsP :: Parser Model
chainsP = do
[CoordLike]
coordLikeLines <- Parser [CoordLike]
coordLikeP
let atoms :: [Atom]
atoms = forall a. [Maybe a] -> [a]
catMaybes (CoordLike -> Maybe Atom
getAtom forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [CoordLike]
coordLikeLines)
chains :: Model
chains = forall a. [a] -> Vector a
V.fromList (forall a b. (a -> b) -> [a] -> [b]
map forall a. [a] -> Vector a
V.fromList forall a b. (a -> b) -> a -> b
$ [Atom] -> [[Atom]]
groupByChains [Atom]
atoms)
forall (f :: * -> *) a. Applicative f => a -> f a
pure Model
chains
where
getAtom :: CoordLike -> Maybe Atom
getAtom :: CoordLike -> Maybe Atom
getAtom CoordLike
line = case CoordLike
line of
AtomLine Atom
x -> forall a. a -> Maybe a
Just Atom
x
CoordLike
_ -> forall a. Maybe a
Nothing
groupByChains :: [Atom]-> [[Atom]]
groupByChains :: [Atom] -> [[Atom]]
groupByChains = forall a. (a -> a -> Bool) -> [a] -> [[a]]
L.groupBy (forall a. Eq a => a -> a -> Bool
(==) forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` Atom -> Char
atomChainID)
modelP :: Parser Model
modelP :: Parser Model
modelP = do
Text
_ <- Text -> Parser Text
string Text
"MODEL"
(Char -> Bool) -> Parser Text ()
skipWhile forall a b. (a -> b) -> a -> b
$ Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
isEndOfLine
Parser Text ()
endOfLine
Model
chains <- Parser Model
chainsP
Text -> Parser Text
string Text
"ENDMDL" forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> (Char -> Bool) -> Parser Text ()
skipWhile (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
isEndOfLine)
Parser Text ()
endOfLine forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall t. Chunk t => Parser t ()
endOfInput
forall (f :: * -> *) a. Applicative f => a -> f a
pure Model
chains
manyModelsP :: Parser PdbData
manyModelsP :: Parser Text PdbData
manyModelsP = do
[Model]
models <- (forall a. a -> [a] -> [a]
:[]) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Model
chainsP forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall (f :: * -> *) a. Alternative f => f a -> f [a]
some Parser Model
modelP
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Vector Model -> PdbData
ModelData (forall a. [a] -> Vector a
V.fromList [Model]
models)
titleStringP :: Parser Text
titleStringP :: Parser Text
titleStringP = do
Text
_ <- Text -> Parser Text
string Text
"TITLE "
Text
titleText <- Parser Text
takeText
Parser Text ()
endOfLine
forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a b. (a -> b) -> a -> b
$ Text -> Text
T.stripEnd Text
titleText
titleP :: Parser PdbData
titleP :: Parser Text PdbData
titleP = do
Text
titleText <- [Text] -> Text
T.concat forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (f :: * -> *) a. Alternative f => f a -> f [a]
some Parser Text
titleStringP
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Text -> PdbData
TitleData Text
titleText
remarkStringP :: Parser PdbData
= do
Text
_ <- Text -> Parser Text
string Text
"REMARK"
Char
_ <- Parser Char
space
(RemarkCode
remarkCode :: RemarkCode) <- forall a. Read a => String -> Maybe a
readMaybe forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *) a. Monad m => Int -> m a -> m [a]
count Int
3 Parser Char
notEndLineChar
Char
_ <- Parser Char
notEndLineChar
Text
remarkText <- Parser Text
takeText
Parser Text ()
endOfLine
forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a b. (a -> b) -> a -> b
$ (RemarkCode, Text) -> PdbData
RemarkData (RemarkCode
remarkCode, Text -> Text
T.stripEnd Text
remarkText)
fromRevListWith :: Ord k => (a -> a -> a) -> [(k,a)] -> Map k a
fromRevListWith :: forall k a. Ord k => (a -> a -> a) -> [(k, a)] -> Map k a
fromRevListWith a -> a -> a
f [(k, a)]
xs = forall k a. Ord k => (k -> a -> a -> a) -> [(k, a)] -> Map k a
fromListWithKey (\k
_ a
x a
y -> a -> a -> a
f a
y a
x) [(k, a)]
xs
otherFieldP :: Parser PdbData
otherFieldP :: Parser Text PdbData
otherFieldP = do
(Maybe FieldType
fieldType :: Maybe FieldType) <- forall a. Read a => String -> Maybe a
readMaybe forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *) a. Monad m => Int -> m a -> m [a]
count Int
6 Parser Char
notEndLineChar
Text
fieldTypeText <- Parser Text
takeText
Parser Text ()
endOfLine forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall t. Chunk t => Parser t ()
endOfInput
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ (Maybe FieldType, Text) -> PdbData
OtherFieldData (Maybe FieldType
fieldType, Text -> Text
T.stripEnd Text
fieldTypeText)