{-# LANGUAGE TupleSections #-}
{-# OPTIONS_GHC -fno-warn-unused-do-bind #-}
{-# OPTIONS_GHC -fno-warn-name-shadowing #-}
{-# OPTIONS_GHC -fno-warn-unused-matches #-}
{-# OPTIONS_GHC -fno-warn-unused-local-binds #-}

module Bio.Uniprot.Parser where

import           Prelude              hiding (null)
import qualified Prelude              as P (concat, init, last, null, tail)

import           Bio.Uniprot.Type
import           Control.Applicative  (liftA2, (<|>))
import           Control.Monad        (unless)
import           Data.Attoparsec.Text
import           Data.Bifunctor       (second)
import           Data.Char            (isSpace)
import           Data.Functor         (($>))
import           Data.Text            (Text, append, isPrefixOf, null, pack,
                                       splitOn, unpack)

-- | Describes possible name type of DE section.
data NameType = RecName | AltName | SubName | Flags | None
  deriving (Int -> NameType -> ShowS
[NameType] -> ShowS
NameType -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [NameType] -> ShowS
$cshowList :: [NameType] -> ShowS
show :: NameType -> String
$cshow :: NameType -> String
showsPrec :: Int -> NameType -> ShowS
$cshowsPrec :: Int -> NameType -> ShowS
Show)

-- | Parses ID line of UniProt-KB text file.
parseID :: Parser ID
parseID :: Parser ID
parseID = do
    Text -> Parser Text Text
string Text
"ID   "
    Text
entryName <- String -> Text
pack forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (f :: * -> *) a. Alternative f => f a -> f [a]
many1 ((Char -> Bool) -> Parser Char
satisfy forall a b. (a -> b) -> a -> b
$ String -> Char -> Bool
inClass String
"A-Z0-9_")
    forall (f :: * -> *) a. Alternative f => f a -> f [a]
many1 Parser Char
space
    Status
status <- (Text -> Parser Text Text
string Text
"Reviewed" forall (f :: * -> *) a b. Functor f => f a -> b -> f b
$> Status
Reviewed) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
              (Text -> Parser Text Text
string Text
"Unreviewed" forall (f :: * -> *) a b. Functor f => f a -> b -> f b
$> Status
Unreviewed)
    Char -> Parser Char
char Char
';'
    forall (f :: * -> *) a. Alternative f => f a -> f [a]
many1 Parser Char
space
    Int
seqLength <- forall a. Integral a => Parser a
decimal
    Parser Char
space forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Text -> Parser Text Text
string Text
"AA."
    forall (f :: * -> *) a. Applicative f => a -> f a
pure ID{Int
Text
Status
seqLength :: Int
status :: Status
entryName :: Text
seqLength :: Int
status :: Status
entryName :: Text
..}

-- | Parses AC lines of UniProt-KB text file.
parseAC :: Parser AC
parseAC :: Parser AC
parseAC = do
    Parser ()
parseStartAC
    [Text]
initAC <- forall (t :: * -> *) a. Foldable t => t [a] -> [a]
P.concat forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many' (Parser Text [Text]
parseOneAC forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
endOfLine forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
parseStartAC)
    [Text]
lastAC <- Parser Text [Text]
parseOneAC
    let accessionNumbers :: [Text]
accessionNumbers = [Text]
initAC forall a. [a] -> [a] -> [a]
++ [Text]
lastAC
    forall (f :: * -> *) a. Applicative f => a -> f a
pure AC{[Text]
accessionNumbers :: [Text]
accessionNumbers :: [Text]
..}
  where
    parseStartAC :: Parser ()
    parseStartAC :: Parser ()
parseStartAC = Text -> Parser Text Text
string Text
"AC" forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => Int -> m a -> m [a]
count Int
3 Parser Char
space forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (f :: * -> *) a. Applicative f => a -> f a
pure ()

    parseOneAC :: Parser [Text]
    parseOneAC :: Parser Text [Text]
parseOneAC = forall (f :: * -> *) a. Alternative f => f a -> f [a]
many1 forall a b. (a -> b) -> a -> b
$ do
        Text
res <- String -> Text
pack forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (f :: * -> *) a. Alternative f => f a -> f [a]
many1 ((Char -> Bool) -> Parser Char
satisfy forall a b. (a -> b) -> a -> b
$ String -> Char -> Bool
inClass String
"A-Z0-9_")
        Char -> Parser Char
char Char
';'
        forall (f :: * -> *) a. Alternative f => a -> f a -> f a
option Char
' ' ((Char -> Bool) -> Parser Char
satisfy Char -> Bool
isHorizontalSpace)
        forall (f :: * -> *) a. Applicative f => a -> f a
pure Text
res

-- | Parses 3 DT lines of UniProt-KB text file.
parseDT :: Parser DT
parseDT :: Parser DT
parseDT = do
    (Text
dbIntegrationDate, Text
dbName) <- Text -> Parser Text (Text, Text)
parseOneDT Text
"integrated into UniProtKB/" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
endOfLine
    (Text
seqVersionDate, Int
seqVersion) <- forall (p :: * -> * -> *) b c a.
Bifunctor p =>
(b -> c) -> p a b -> p a c
second (forall a. Read a => String -> a
read forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
unpack) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Text -> Parser Text (Text, Text)
parseOneDT Text
"sequence version " forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
endOfLine
    (Text
entryVersionDate, Int
entryVersion) <- forall (p :: * -> * -> *) b c a.
Bifunctor p =>
(b -> c) -> p a b -> p a c
second (forall a. Read a => String -> a
read forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
unpack) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Text -> Parser Text (Text, Text)
parseOneDT Text
"entry version "
    forall (f :: * -> *) a. Applicative f => a -> f a
pure DT{Int
Text
entryVersion :: Int
entryVersionDate :: Text
seqVersion :: Int
seqVersionDate :: Text
dbName :: Text
dbIntegrationDate :: Text
entryVersion :: Int
entryVersionDate :: Text
seqVersion :: Int
seqVersionDate :: Text
dbName :: Text
dbIntegrationDate :: Text
..}
  where
    parseOneDT :: Text -> Parser (Text, Text)
    parseOneDT :: Text -> Parser Text (Text, Text)
parseOneDT Text
txt = do
        Text -> Parser Text Text
string Text
"DT   "
        Text
day <- String -> Text
pack forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (f :: * -> *) a. Alternative f => f a -> f [a]
many1 ((Char -> Bool) -> Parser Char
satisfy forall a b. (a -> b) -> a -> b
$ String -> Char -> Bool
inClass String
"A-Z0-9-")
        Char -> Parser Char
char Char
','
        forall (f :: * -> *) a. Alternative f => f a -> f [a]
many1 Parser Char
space
        Text -> Parser Text Text
string Text
txt
        Text
x <- String -> Text
pack forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (f :: * -> *) a. Alternative f => f a -> f [a]
many1 ((Char -> Bool) -> Parser Char
satisfy forall a b. (a -> b) -> a -> b
$ String -> Char -> Bool
inClass String
"A-Za-z0-9_-")
        Char -> Parser Char
char Char
'.'
        forall (f :: * -> *) a. Applicative f => a -> f a
pure (Text
day, Text
x)

-- | Parses DE lines of UniProt-KB text file.
parseDE :: Parser DE
parseDE :: Parser DE
parseDE = do
    Maybe Name
recName  <- forall a. Parser a -> Parser (Maybe a)
optional forall a b. (a -> b) -> a -> b
$ Int -> NameType -> Parser Name
parseNameDE Int
0 NameType
RecName
    [AltName]
altNames <- forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many' (Parser ()
endOfLine forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Int -> Parser Text AltName
parseAltDE Int
0)
    [Name]
subNames <- forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many' (Parser ()
endOfLine forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Int -> NameType -> Parser Name
parseNameDE Int
0 NameType
SubName)
    [DE]
includes <- forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many' (Parser ()
endOfLine forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Text -> Parser DE
parseInternal Text
"Includes")
    [DE]
contains <- forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many' (Parser ()
endOfLine forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Text -> Parser DE
parseInternal Text
"Contains")
    [Flag]
flags    <- forall (f :: * -> *) a. Alternative f => a -> f a -> f a
option [] (Parser ()
endOfLine forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser Text [Flag]
parseFlagsDE)
    forall (f :: * -> *) a. Applicative f => a -> f a
pure DE{[DE]
[Flag]
[AltName]
[Name]
Maybe Name
flags :: [Flag]
contains :: [DE]
includes :: [DE]
subNames :: [Name]
altNames :: [AltName]
recName :: Maybe Name
flags :: [Flag]
contains :: [DE]
includes :: [DE]
subNames :: [Name]
altNames :: [AltName]
recName :: Maybe Name
..}
  where
    -- Parses name section like RecName, AltName or SubName.
    parseNameDE :: Int -> NameType -> Parser Name
    parseNameDE :: Int -> NameType -> Parser Name
parseNameDE Int
indent NameType
nameType = do
        Text
fullName <- Int -> NameType -> Text -> Parser Text Text
parseDELine Int
indent NameType
nameType Text
"Full"
        [Text]
shortName <- forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many' forall a b. (a -> b) -> a -> b
$ Parser ()
endOfLine forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Int -> NameType -> Text -> Parser Text Text
parseDELine Int
indent NameType
None Text
"Short"
        [Text]
ecNumber <- forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many' forall a b. (a -> b) -> a -> b
$ Parser ()
endOfLine forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Int -> NameType -> Text -> Parser Text Text
parseDELine Int
indent NameType
None Text
"EC"
        forall (f :: * -> *) a. Applicative f => a -> f a
pure Name{[Text]
Text
ecNumber :: [Text]
shortName :: [Text]
fullName :: Text
ecNumber :: [Text]
shortName :: [Text]
fullName :: Text
..}

    -- Parses flag line of DE section
    parseFlagsDE :: Parser [Flag]
    parseFlagsDE :: Parser Text [Flag]
parseFlagsDE = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (forall a. Read a => String -> a
read forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
unpack) forall b c a. (b -> c) -> (a -> b) -> a -> c
.
                       (Text
"; " Text -> Text -> [Text]
`splitOn`) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Int -> NameType -> Text -> Parser Text Text
parseDELine Int
0 NameType
Flags Text
""

    -- Parses AltName lines of DE section
    parseAltDE :: Int -> Parser AltName
    parseAltDE :: Int -> Parser Text AltName
parseAltDE Int
indent =
      (Name -> AltName
Simple forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Int -> NameType -> Parser Name
parseNameDE Int
indent NameType
AltName) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
      (Text -> AltName
Allergen forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Int -> NameType -> Text -> Parser Text Text
parseDELine Int
indent NameType
AltName Text
"Allergen") forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
      (Text -> AltName
Biotech forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Int -> NameType -> Text -> Parser Text Text
parseDELine Int
indent NameType
AltName Text
"Biotech") forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
      (Text -> AltName
CDAntigen forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Int -> NameType -> Text -> Parser Text Text
parseDELine Int
indent NameType
AltName Text
"CD_antigen") forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
      (Text -> AltName
INN forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Int -> NameType -> Text -> Parser Text Text
parseDELine Int
indent NameType
AltName Text
"INN")

    -- Parses any DE line
    parseDELine :: Int -> NameType -> Text -> Parser Text
    parseDELine :: Int -> NameType -> Text -> Parser Text Text
parseDELine Int
indent NameType
nameType Text
tpe = do
        Text -> Parser Text Text
string Text
"DE   "
        forall (m :: * -> *) a. Monad m => Int -> m a -> m [a]
count Int
indent (Char -> Parser Char
char Char
' ')
        case NameType
nameType of
          NameType
None -> Text -> Parser Text Text
string Text
"         "
          NameType
a    -> Text -> Parser Text Text
string forall a b. (a -> b) -> a -> b
$ Text -> Text -> Text
append (String -> Text
pack forall a b. (a -> b) -> a -> b
$ forall a. Show a => a -> String
show NameType
a) Text
": "
        forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless (Text -> Bool
null Text
tpe) forall a b. (a -> b) -> a -> b
$ do
            Text -> Parser Text Text
string Text
tpe
            Text -> Parser Text Text
string Text
"="
            forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
        Text
result <- String -> Text
pack forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. [a] -> [a]
P.init forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (f :: * -> *) a. Alternative f => f a -> f [a]
many1 ((Char -> Bool) -> Parser Char
satisfy (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
isEndOfLine))
        forall (f :: * -> *) a. Applicative f => a -> f a
pure forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. [a] -> a
head forall a b. (a -> b) -> a -> b
$ Text
" {ECO" Text -> Text -> [Text]
`splitOn` Text
result

    -- Parses internal DE entities
    parseInternal :: Text -> Parser DE
    parseInternal :: Text -> Parser DE
parseInternal Text
name = do
        Text -> Parser Text Text
string Text
"DE   " forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Text -> Parser Text Text
string Text
name forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Char -> Parser Char
char Char
':'
        Parser ()
endOfLine
        Maybe Name
recName  <- forall a. Parser a -> Parser (Maybe a)
optional forall a b. (a -> b) -> a -> b
$ Int -> NameType -> Parser Name
parseNameDE Int
2 NameType
RecName
        [AltName]
altNames <- forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many' (Parser ()
endOfLine forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Int -> Parser Text AltName
parseAltDE Int
2)
        forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a b. (a -> b) -> a -> b
$ Maybe Name -> [AltName] -> [Name] -> [DE] -> [DE] -> [Flag] -> DE
DE Maybe Name
recName [AltName]
altNames [] [] [] []

-- | Parses DE lines of UniProt-KB text file.
parseGN :: Parser [GN]
parseGN :: Parser [GN]
parseGN = do
    Text -> Parser Text Text
string Text
"GN   "
    Maybe Text
geneName <- forall a. Parser a -> Parser (Maybe a)
optional forall a b. (a -> b) -> a -> b
$ Text -> Parser Text Text
parseDefItem Text
"Name"
    forall a. Parser a -> Parser (Maybe a)
optional forall a b. (a -> b) -> a -> b
$ Text -> Parser ()
parseBreak Text
"GN"
    [Text]
synonyms <- forall (f :: * -> *) a. Alternative f => a -> f a -> f a
option [] forall a b. (a -> b) -> a -> b
$ Text -> Parser Text [Text]
parseGNList Text
"Synonyms"
    forall a. Parser a -> Parser (Maybe a)
optional forall a b. (a -> b) -> a -> b
$ Text -> Parser ()
parseBreak Text
"GN"
    [Text]
orderedLocusNames <- forall (f :: * -> *) a. Alternative f => a -> f a -> f a
option [] forall a b. (a -> b) -> a -> b
$ Text -> Parser Text [Text]
parseGNList Text
"OrderedLocusNames"
    forall a. Parser a -> Parser (Maybe a)
optional forall a b. (a -> b) -> a -> b
$ Text -> Parser ()
parseBreak Text
"GN"
    [Text]
orfNames <- forall (f :: * -> *) a. Alternative f => a -> f a -> f a
option [] forall a b. (a -> b) -> a -> b
$ Text -> Parser Text [Text]
parseGNList Text
"ORFNames"
    let gn :: GN
gn = GN{[Text]
Maybe Text
orfNames :: [Text]
orderedLocusNames :: [Text]
synonyms :: [Text]
geneName :: Maybe Text
orfNames :: [Text]
orderedLocusNames :: [Text]
synonyms :: [Text]
geneName :: Maybe Text
..}
    forall a. Parser a -> Parser (Maybe a)
optional forall a b. (a -> b) -> a -> b
$ Text -> Parser ()
parseBreak Text
"GN"
    [GN]
rest <- forall (f :: * -> *) a. Alternative f => a -> f a -> f a
option [] forall a b. (a -> b) -> a -> b
$ Text -> Parser Text Text
string Text
"and" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser ()
endOfLine forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser [GN]
parseGN
    forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a b. (a -> b) -> a -> b
$ GN
gnforall a. a -> [a] -> [a]
:[GN]
rest
  where
    -- Parses any list item of GN line (like `Synonyms` or `ORFNames`)
    parseGNList :: Text -> Parser [Text]
    parseGNList :: Text -> Parser Text [Text]
parseGNList Text
name = Text -> Text -> [Text]
splitOn Text
", " forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Text -> Parser Text Text
parseDefItem Text
name

-- | Parses OS lines for one record of UniProt-KB text file.
parseOS :: Parser OS
parseOS :: Parser OS
parseOS = Text -> OS
OS forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Text
pack forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. [a] -> [a]
P.init forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Text -> Parser Text Text
string Text
"OS   " forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Text -> Int -> Parser Text String
parseMultiLineComment Text
"OS" Int
3)

-- | Parser OG line of UniProt-KB text file.
parseOG :: Parser OG
parseOG :: Parser OG
parseOG = (Parser OG
parseOGNonPlasmid forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many' (Char -> Parser Char
char Char
' ' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text Text
parseEvidence) forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Char -> Parser Char
char Char
'.') forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
          ([Text] -> OG
Plasmid forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text [Text]
parseOGPlasmid)
  where
    parseOGNonPlasmid :: Parser OG
    parseOGNonPlasmid :: Parser OG
parseOGNonPlasmid = Text -> Parser Text Text
string Text
"OG   " forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*>
      ((Text -> Parser Text Text
string Text
"Hydrogenosome" forall (f :: * -> *) a b. Functor f => f a -> b -> f b
$> OG
Hydrogenosome) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
       (Text -> Parser Text Text
string Text
"Mitochondrion" forall (f :: * -> *) a b. Functor f => f a -> b -> f b
$> OG
Mitochondrion) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
       (Text -> Parser Text Text
string Text
"Nucleomorph" forall (f :: * -> *) a b. Functor f => f a -> b -> f b
$> OG
Nucleomorph) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
       (Text -> Parser Text Text
string Text
"Plastid; Apicoplast" forall (f :: * -> *) a b. Functor f => f a -> b -> f b
$> Plastid -> OG
Plastid Plastid
PlastidApicoplast) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
       (Text -> Parser Text Text
string Text
"Plastid; Chloroplast" forall (f :: * -> *) a b. Functor f => f a -> b -> f b
$> Plastid -> OG
Plastid Plastid
PlastidChloroplast) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
       (Text -> Parser Text Text
string Text
"Plastid; Organellar chromatophore" forall (f :: * -> *) a b. Functor f => f a -> b -> f b
$> Plastid -> OG
Plastid Plastid
PlastidOrganellarChromatophore) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
       (Text -> Parser Text Text
string Text
"Plastid; Cyanelle" forall (f :: * -> *) a b. Functor f => f a -> b -> f b
$> Plastid -> OG
Plastid Plastid
PlastidCyanelle) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
       (Text -> Parser Text Text
string Text
"Plastid; Non-photosynthetic plastid" forall (f :: * -> *) a b. Functor f => f a -> b -> f b
$> Plastid -> OG
Plastid Plastid
PlastidNonPhotosynthetic) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
       (Text -> Parser Text Text
string Text
"Plastid" forall (f :: * -> *) a b. Functor f => f a -> b -> f b
$> Plastid -> OG
Plastid Plastid
PlastidSimple))

    parseOGPlasmid :: Parser [Text]
    parseOGPlasmid :: Parser Text [Text]
parseOGPlasmid = do
        Text -> Parser Text Text
string Text
"OG   "
        Text
name <- Parser Text Text
parseAnyPlasmid
        let separator :: Parser (Maybe Text)
separator = Char -> Parser Char
char Char
',' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall a. Parser a -> Parser (Maybe a)
optional Parser Text Text
" and"
        [Text]
rest <- forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many' forall a b. (a -> b) -> a -> b
$ Parser (Maybe Text)
separator forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Char -> Parser Char
char Char
' ' forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser Text Text
parseAnyPlasmid
        forall a. Parser a -> Parser (Maybe a)
optional Parser (Maybe Text)
separator
        [Text]
rest2 <- forall (t :: * -> *) a. Foldable t => t [a] -> [a]
P.concat forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many' (Parser ()
endOfLine forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser Text [Text]
parseOGPlasmid)
        forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a b. (a -> b) -> a -> b
$ Text
name forall a. a -> [a] -> [a]
: [Text]
rest forall a. [a] -> [a] -> [a]
++ [Text]
rest2

    parseAnyPlasmid :: Parser Text
    parseAnyPlasmid :: Parser Text Text
parseAnyPlasmid = Parser Text Text
parseOnePlasmid forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
                      ((Parser Text Text
"Plasmid" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall a. Parser a -> Parser (Maybe a)
optional (Char -> Parser Char
char Char
' ' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text Text
parseEvidence)
                                  forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall a. Parser a -> Parser (Maybe a)
optional (Char -> Parser Char
char Char
'.')) forall (f :: * -> *) a b. Functor f => f a -> b -> f b
$> Text
"") -- ABSAA_ALCSP hack

    parseOnePlasmid :: Parser Text
    parseOnePlasmid :: Parser Text Text
parseOnePlasmid = do
        Text -> Parser Text Text
string Text
"Plasmid "
        String -> Text
pack forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text String
parsePlasmidName

    parsePlasmidName :: Parser String
    parsePlasmidName :: Parser Text String
parsePlasmidName = do
        let p :: Parser Text String
p = forall (f :: * -> *) a. Alternative f => f a -> f [a]
many1 ((Char -> Bool) -> Parser Char
satisfy forall a b. (a -> b) -> a -> b
$ forall (f :: * -> *) a b c.
Applicative f =>
(a -> b -> c) -> f a -> f b -> f c
liftA2 Bool -> Bool -> Bool
(&&) (String -> Char -> Bool
notInClass String
",{") (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
isEndOfLine))
        String
part <- Parser Text String
p
        Maybe Char
nextChar <- Parser (Maybe Char)
peekChar
        String
plasmid <- case Maybe Char
nextChar of
          Just Char
'{' -> Parser Text Text
parseEvidence forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall a. Parser a -> Parser (Maybe a)
optional (Char -> Parser Char
char Char
'.') forall (f :: * -> *) a b. Functor f => f a -> b -> f b
$> forall a. [a] -> [a]
P.init String
part
          Maybe Char
_        -> forall (f :: * -> *) a. Applicative f => a -> f a
pure String
part
        forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a b. (a -> b) -> a -> b
$ if forall a. [a] -> a
P.last String
plasmid forall a. Eq a => a -> a -> Bool
== Char
'.' then forall a. [a] -> [a]
P.init String
plasmid else String
plasmid

    countElem :: Eq a => [a] -> a -> Int
    countElem :: forall a. Eq a => [a] -> a -> Int
countElem []     a
_             = Int
0
    countElem (a
x:[a]
xs) a
y | a
x forall a. Eq a => a -> a -> Bool
== a
y    = Int
1 forall a. Num a => a -> a -> a
+ forall a. Eq a => [a] -> a -> Int
countElem [a]
xs a
y
                       | Bool
otherwise = forall a. Eq a => [a] -> a -> Int
countElem [a]
xs a
y

-- | Parser OC line of UniProt-KB text file.
parseOC :: Parser OC
parseOC :: Parser OC
parseOC = [Text] -> OC
OC forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Text -> Char -> Char -> Parser Text [Text]
parseNodes Text
"OC" Char
';' Char
'.'

-- | Parses OX lines of UniProt-KB text file.
parseOX :: Parser OX
parseOX :: Parser OX
parseOX = do
    Text -> Parser Text Text
string Text
"OX   "
    Text
databaseQualifier <- String -> Text
pack forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (f :: * -> *) a. Alternative f => f a -> f [a]
many1 (Char -> Parser Char
notChar Char
'=')
    Char -> Parser Char
char Char
'='
    Text
taxonomicCode <- String -> Text
pack forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (f :: * -> *) a. Alternative f => f a -> f [a]
many1 (Char -> Parser Char
notChar Char
';')
    Char -> Parser Char
char Char
';'
    forall (f :: * -> *) a. Applicative f => a -> f a
pure OX{Text
taxonomicCode :: Text
databaseQualifier :: Text
taxonomicCode :: Text
databaseQualifier :: Text
..}

-- | Parses OH line of UniProt-KB text file.
parseOH :: Parser OH
parseOH :: Parser OH
parseOH = do
    Text -> Parser Text Text
string Text
"OH   NCBI_TaxID="
    Text
taxId <- String -> Text
pack forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (f :: * -> *) a. Alternative f => f a -> f [a]
many1 (Char -> Parser Char
notChar Char
';')
    Char -> Parser Char
char Char
';'
    String
hostName' <- forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many' ((Char -> Bool) -> Parser Char
satisfy forall a b. (a -> b) -> a -> b
$ Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
isEndOfLine)
    let hostName :: Text
hostName = String -> Text
pack forall a b. (a -> b) -> a -> b
$ if forall (t :: * -> *) a. Foldable t => t a -> Bool
P.null String
hostName'
                            then String
""
                            else forall a. [a] -> [a]
P.tail forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. [a] -> [a]
P.init forall a b. (a -> b) -> a -> b
$ String
hostName'
    forall (f :: * -> *) a. Applicative f => a -> f a
pure OH{Text
hostName :: Text
taxId :: Text
hostName :: Text
taxId :: Text
..}

-- | Parses RN, RP, RC, RX, RG, RA, RT and RL lines of UniProt-KB text file.
parseRef :: Parser Reference
parseRef :: Parser Reference
parseRef = do
    Int
rn <- Parser Int
parseRN
    Parser ()
endOfLine
    Text
rp <- Parser Text Text
parseRP
    Parser ()
endOfLine
    [(Token, Text)]
rc <- forall (f :: * -> *) a. Alternative f => a -> f a -> f a
option [] (forall a. (Enum a, Show a) => a -> Text -> Parser [(a, Text)]
parseRCX Token
STRAIN Text
"RC" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
endOfLine)
    [(BibliographicDB, Text)]
rx <- forall (f :: * -> *) a. Alternative f => a -> f a -> f a
option [] (forall a. (Enum a, Show a) => a -> Text -> Parser [(a, Text)]
parseRCX BibliographicDB
MEDLINE Text
"RX" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
endOfLine)
    [Text]
rg <- forall (f :: * -> *) a. Alternative f => a -> f a -> f a
option [] (forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many' forall a b. (a -> b) -> a -> b
$ Parser Text Text
parseRG forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
endOfLine)
    [Text]
ra <- forall (f :: * -> *) a. Alternative f => a -> f a -> f a
option [] (Text -> Char -> Char -> Parser Text [Text]
parseNodes Text
"RA" Char
',' Char
';' forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
endOfLine)
    Maybe Text
rt <- forall a. Parser a -> Parser (Maybe a)
optional  (Parser Text Text
parseRT forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
endOfLine)
    Text
rl <- Parser Text Text
parseRL
    forall (f :: * -> *) a. Applicative f => a -> f a
pure Reference{Int
[(BibliographicDB, Text)]
[(Token, Text)]
[Text]
Maybe Text
Text
rl :: Text
rt :: Maybe Text
ra :: [Text]
rg :: [Text]
rx :: [(BibliographicDB, Text)]
rc :: [(Token, Text)]
rp :: Text
rn :: Int
rl :: Text
rt :: Maybe Text
ra :: [Text]
rg :: [Text]
rx :: [(BibliographicDB, Text)]
rc :: [(Token, Text)]
rp :: Text
rn :: Int
..}
  where
    parseRN :: Parser Int
    parseRN :: Parser Int
parseRN = do
        Int
number <- (Text -> Parser Text Text
string Text
"RN   [" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall a. Integral a => Parser a
decimal) forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Char -> Parser Char
char Char
']'
        -- Despite the specification, edivence may be presented here
        [Text]
_ <- forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many' (Char -> Parser Char
char Char
' ' forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser Text Text
parseEvidence)
        forall (f :: * -> *) a. Applicative f => a -> f a
pure Int
number

    parseRP :: Parser Text
    parseRP :: Parser Text Text
parseRP = do
        Text -> Parser Text Text
string Text
"RP   "
        String -> Text
pack forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. [a] -> [a]
P.init forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Text -> Int -> Parser Text String
parseMultiLineComment Text
"RP" Int
3

    parseRCX :: (Enum a, Show a) => a -> Text -> Parser [(a, Text)]
    parseRCX :: forall a. (Enum a, Show a) => a -> Text -> Parser [(a, Text)]
parseRCX a
start Text
name = do
       Text -> Parser Text Text
string Text
name forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Text -> Parser Text Text
string Text
"   "
       (:) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall a. (Enum a, Show a) => a -> Parser (a, Text)
parseTokPair a
start
           forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many' (Text -> Parser ()
parseBreak Text
name forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall a. (Enum a, Show a) => a -> Parser (a, Text)
parseTokPair a
start)
     where
       parseTokPair :: (Enum a, Show a) => a -> Parser (a, Text)
       parseTokPair :: forall a. (Enum a, Show a) => a -> Parser (a, Text)
parseTokPair a
x = forall (t :: * -> *) a. Foldable t => (a -> a -> a) -> t a -> a
foldl1 forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
(<|>) forall a b. (a -> b) -> a -> b
$
                          (\a
x -> (a
x,) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Text -> Parser Text Text
parseDefItem (String -> Text
pack forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Show a => a -> String
show forall a b. (a -> b) -> a -> b
$ a
x)) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [a
x..]

    parseRG :: Parser Text
    parseRG :: Parser Text Text
parseRG = String -> Text
pack forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Text -> Parser Text Text
string Text
"RG   " forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (f :: * -> *) a. Alternative f => f a -> f [a]
many1 ((Char -> Bool) -> Parser Char
satisfy forall a b. (a -> b) -> a -> b
$ Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
isEndOfLine))

    parseRT :: Parser Text
    parseRT :: Parser Text Text
parseRT = do
        Text -> Parser Text Text
string Text
"RT   \""
        let p :: Parser Text String
p = forall (f :: * -> *) a. Alternative f => f a -> f [a]
many1 forall a b. (a -> b) -> a -> b
$ (Char -> Bool) -> Parser Char
satisfy forall a b. (a -> b) -> a -> b
$ forall (f :: * -> *) a b c.
Applicative f =>
(a -> b -> c) -> f a -> f b -> f c
liftA2 Bool -> Bool -> Bool
(&&) (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
isEndOfLine) (String -> Char -> Bool
notInClass String
"\"")
        [String]
referenceTitle <- (:) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text String
p forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many' (Parser ()
endOfLine forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Text -> Parser Text Text
string Text
"RT  " forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser Text String
p)
        Text -> Parser Text Text
string Text
"\";"
        forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a b. (a -> b) -> a -> b
$ String -> Text
pack forall b c a. (b -> c) -> (a -> b) -> a -> c
. [String] -> String
hyphenConcat forall a b. (a -> b) -> a -> b
$ [String]
referenceTitle

    parseRL :: Parser Text
    parseRL :: Parser Text Text
parseRL = do
        Text -> Parser Text Text
string Text
"RL   "
        String -> Text
pack forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. [a] -> [a]
P.init forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Text -> Int -> Parser Text String
parseMultiLineComment Text
"RL" Int
3

-- | Parses CC lines of UniProt-KB text file.
parseCC :: Parser CC
parseCC :: Parser CC
parseCC = do
    Text -> Parser Text Text
string Text
"CC   -!- "
    Text
topic <- String -> Text
pack forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (f :: * -> *) a. Alternative f => f a -> f [a]
many1 (Char -> Parser Char
notChar Char
':')
    Char -> Parser Char
char Char
':'
    (Char -> Parser Char
char Char
' ' forall (f :: * -> *) a b. Functor f => f a -> b -> f b
$> ()) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Parser ()
endOfLine forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Text -> Parser Text Text
string Text
"CC" forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => Int -> m a -> m [a]
count Int
7 Parser Char
space forall (f :: * -> *) a b. Functor f => f a -> b -> f b
$> ())
    Text
comment <- forall a. [a] -> a
head forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Text
" {ECO" Text -> Text -> [Text]
`splitOn`) forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Text
pack forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Text -> Int -> Parser Text String
parseMultiLineComment Text
"CC" Int
7
    forall (f :: * -> *) a. Applicative f => a -> f a
pure CC{Text
comment :: Text
topic :: Text
comment :: Text
topic :: Text
..}

-- | UniProt-KB copyright comment
copyrightCC :: Text
copyrightCC :: Text
copyrightCC = Text
"CC   -----------------------------------------------------------------------\nCC   Copyrighted by the UniProt Consortium, see https://www.uniprot.org/terms\nCC   Distributed under the Creative Commons Attribution (CC BY 4.0) License\nCC   -----------------------------------------------------------------------"


-- | Parses DR lines of UniProt-KB text file.
parseDR :: Parser DR
parseDR :: Parser DR
parseDR = do
    Text -> Parser Text Text
string Text
"DR   "
    Text
resourceAbbr <- Parser Text Text
parseToken
    Char -> Parser Char
char Char
' '
    Text
resourceId <- Parser Text Text
parseToken
    [Text]
optionalInfo <- forall (f :: * -> *) a. Alternative f => f a -> f [a]
many1 (Char -> Parser Char
char Char
' ' forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser Text Text
parseToken)
    forall (f :: * -> *) a. Applicative f => a -> f a
pure DR{[Text]
Text
optionalInfo :: [Text]
resourceId :: Text
resourceAbbr :: Text
optionalInfo :: [Text]
resourceId :: Text
resourceAbbr :: Text
..}
  where
    parseToken :: Parser Text
    parseToken :: Parser Text Text
parseToken = String -> Text
pack forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text String
parseTokenStr

    parseTokenStr :: Parser String
    parseTokenStr :: Parser Text String
parseTokenStr = do
        String
part <- forall (f :: * -> *) a. Alternative f => f a -> f [a]
many1 ((Char -> Bool) -> Parser Char
satisfy forall a b. (a -> b) -> a -> b
$ forall (f :: * -> *) a b c.
Applicative f =>
(a -> b -> c) -> f a -> f b -> f c
liftA2 Bool -> Bool -> Bool
(&&) (forall a. Eq a => a -> a -> Bool
/=Char
';') (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
isEndOfLine))
        Maybe Char
nextChar <- Parser (Maybe Char)
peekChar
        case Maybe Char
nextChar of
          Maybe Char
Nothing  -> forall (f :: * -> *) a. Applicative f => a -> f a
pure forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. [a] -> [a]
P.init forall a b. (a -> b) -> a -> b
$ String
part
          Just Char
';' -> do
              Char -> Parser Char
char Char
';'
              Maybe Char
nextChar <- Parser (Maybe Char)
peekChar
              case Maybe Char
nextChar of
                Maybe Char
Nothing -> forall (m :: * -> *) a. MonadFail m => String -> m a
fail String
"You cannot be here"
                Just Char
c  | Char -> Bool
isSpace Char
c -> forall (f :: * -> *) a. Applicative f => a -> f a
pure String
part
                Just Char
c  -> (String
part forall a. Semigroup a => a -> a -> a
<>) forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char
';'forall a. a -> [a] -> [a]
:) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text String
parseTokenStr
          Just Char
c  -> forall (f :: * -> *) a. Applicative f => a -> f a
pure forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. [a] -> [a]
P.init forall a b. (a -> b) -> a -> b
$ String
part

-- | Parses PE line of UniProt-KB text file.
parsePE :: Parser PE
parsePE :: Parser PE
parsePE = (Text -> Parser Text Text
string Text
"PE   1: Evidence at protein level;" forall (f :: * -> *) a b. Functor f => f a -> b -> f b
$> PE
EvidenceAtProteinLevel) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
          (Text -> Parser Text Text
string Text
"PE   2: Evidence at transcript level;" forall (f :: * -> *) a b. Functor f => f a -> b -> f b
$> PE
EvidenceAtTranscriptLevel) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
          (Text -> Parser Text Text
string Text
"PE   3: Inferred from homology;" forall (f :: * -> *) a b. Functor f => f a -> b -> f b
$> PE
InferredFromHomology) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
          (Text -> Parser Text Text
string Text
"PE   4: Predicted;" forall (f :: * -> *) a b. Functor f => f a -> b -> f b
$> PE
Predicted) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
          (Text -> Parser Text Text
string Text
"PE   5: Uncertain;" forall (f :: * -> *) a b. Functor f => f a -> b -> f b
$> PE
Uncertain)

-- | Parses KW lines of UniProt-KB text file.
parseKW :: Parser KW
parseKW :: Parser KW
parseKW = [Text] -> KW
KW forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Text -> Char -> Char -> Parser Text [Text]
parseNodes Text
"KW" Char
';' Char
'.'

-- | Parses FT lines of UniProt-KB text file. One FT section is parsed.
parseFT :: Parser FT
parseFT :: Parser FT
parseFT = do
    Text -> Parser Text Text
string Text
"FT   "
    Text
keyName <- String -> Text
pack forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (f :: * -> *) a. Alternative f => f a -> f [a]
many1 ((Char -> Bool) -> Parser Char
satisfy forall a b. (a -> b) -> a -> b
$ String -> Char -> Bool
inClass String
"A-Z_")
    forall (f :: * -> *) a. Alternative f => f a -> f [a]
many1 Parser Char
space
    Endpoint
fromEP <- Parser Endpoint
parseFTEndpoint
    forall (f :: * -> *) a. Alternative f => f a -> f [a]
many1 Parser Char
space
    Endpoint
toEP <- Parser Endpoint
parseFTEndpoint
    [Text]
description <- forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Text
"{ECO" Text -> Text -> Bool
`isPrefixOf`)) forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> [Text]
splitByMagic forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$>
                     ((forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many' (Char -> Parser Char
char Char
' ') forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Text -> Int -> Parser Text String
parseMultiLineComment Text
"FT" Int
32) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
                      ([String] -> String
hyphenConcat forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Text -> Int -> Parser Text [String]
parseMultiLine Text
"FT" Int
32))
    forall (f :: * -> *) a. Applicative f => a -> f a
pure FT{[Text]
Text
Endpoint
description :: [Text]
toEP :: Endpoint
fromEP :: Endpoint
keyName :: Text
description :: [Text]
toEP :: Endpoint
fromEP :: Endpoint
keyName :: Text
..}
  where
    -- Parse FT endpoint
    parseFTEndpoint :: Parser Endpoint
    parseFTEndpoint :: Parser Endpoint
parseFTEndpoint = (Int -> Endpoint
UncertainEP forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Parser Char
char Char
'?' forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall a. Integral a => Parser a
decimal)) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
                      (Int -> Endpoint
NTerminalEP forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Parser Char
char Char
'<' forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall a. Integral a => Parser a
decimal)) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
                      (Int -> Endpoint
CTerminalEP forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Parser Char
char Char
'>' forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall a. Integral a => Parser a
decimal)) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
                      (Int -> Endpoint
ExactEP     forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall a. Integral a => Parser a
decimal) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
                      (Char -> Parser Char
char Char
'?' forall (f :: * -> *) a b. Functor f => f a -> b -> f b
$> Endpoint
UnknownEP)

    -- Split string to tokens by periods outside brackets.
    splitByMagic :: String -> [Text]
    splitByMagic :: String -> [Text]
splitByMagic String
txt = String -> Text
pack forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Int -> String -> String -> [String]
splitStr Int
0 [] String
txt
      where
        splitStr :: Int -> String -> String -> [String]
        splitStr :: Int -> String -> String -> [String]
splitStr Int
_ String
_   []           = []
        splitStr Int
0 String
acc [Char
'.']        = [forall a. [a] -> [a]
reverse String
acc]
        splitStr Int
0 String
acc (Char
'.':Char
' ':String
xs) = forall a. [a] -> [a]
reverse String
acc forall a. a -> [a] -> [a]
: Int -> String -> String -> [String]
splitStr Int
0 [] String
xs
        splitStr Int
0 String
acc (Char
'.':String
xs)     = forall a. [a] -> [a]
reverse String
acc forall a. a -> [a] -> [a]
: Int -> String -> String -> [String]
splitStr Int
0 [] String
xs
        splitStr Int
n String
acc (Char
'(':String
xs)     = Int -> String -> String -> [String]
splitStr (Int
nforall a. Num a => a -> a -> a
+Int
1) (Char
'('forall a. a -> [a] -> [a]
:String
acc) String
xs
        splitStr Int
n String
acc (Char
')':String
xs)     = Int -> String -> String -> [String]
splitStr (Int
nforall a. Num a => a -> a -> a
-Int
1) (Char
')'forall a. a -> [a] -> [a]
:String
acc) String
xs
        splitStr Int
n String
acc (Char
x:String
xs)       = Int -> String -> String -> [String]
splitStr Int
n (Char
xforall a. a -> [a] -> [a]
:String
acc) String
xs

-- | Parses SQ lines of UniProt-KB text file.
parseSQ :: Parser SQ
parseSQ :: Parser SQ
parseSQ = do
    Text -> Parser Text Text
string Text
"SQ   SEQUENCE"
    forall (f :: * -> *) a. Alternative f => f a -> f [a]
many1 Parser Char
space
    Int
len <- forall a. Integral a => Parser a
decimal
    Parser Char
space forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Text -> Parser Text Text
string Text
"AA;"
    forall (f :: * -> *) a. Alternative f => f a -> f [a]
many1 Parser Char
space
    Int
molWeight <- forall a. Integral a => Parser a
decimal
    Parser Char
space forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Text -> Parser Text Text
string Text
"MW;"
    forall (f :: * -> *) a. Alternative f => f a -> f [a]
many1 Parser Char
space
    Text
crc64 <- String -> Text
pack forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (f :: * -> *) a. Alternative f => f a -> f [a]
many1 ((Char -> Bool) -> Parser Char
satisfy forall a b. (a -> b) -> a -> b
$ String -> Char -> Bool
inClass String
"A-F0-9")
    Parser Char
space forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Text -> Parser Text Text
string Text
"CRC64;"
    Parser ()
endOfLine
    Text
sequ <- String -> Text
pack forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (t :: * -> *) a. Foldable t => t [a] -> [a]
P.concat forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$>
            forall (f :: * -> *) a. Alternative f => f a -> f [a]
many1 (Parser ()
skipSpace forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (f :: * -> *) a. Alternative f => f a -> f [a]
many1 ((Char -> Bool) -> Parser Char
satisfy forall a b. (a -> b) -> a -> b
$ String -> Char -> Bool
inClass String
"A-Z"))
    forall (f :: * -> *) a. Applicative f => a -> f a
pure SQ{Int
Text
sequ :: Text
crc64 :: Text
molWeight :: Int
len :: Int
sequ :: Text
crc64 :: Text
molWeight :: Int
len :: Int
..}

-- | Parses end of one UniProt record.
parseEnd :: Parser ()
parseEnd :: Parser ()
parseEnd = Text -> Parser Text Text
string Text
"//" forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (f :: * -> *) a. Applicative f => a -> f a
pure ()

-- | Parses whole UniProt-KB record.
parseRecord :: Parser Record
parseRecord :: Parser Record
parseRecord = ID
-> AC
-> DT
-> DE
-> [GN]
-> OS
-> [OG]
-> OC
-> OX
-> [OH]
-> [Reference]
-> [CC]
-> [DR]
-> PE
-> Maybe KW
-> [FT]
-> SQ
-> Record
Record forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$>           (Parser ID
parseID  forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
endOfLine)
                     forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*>           (Parser AC
parseAC  forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
endOfLine)
                     forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*>           (Parser DT
parseDT  forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
endOfLine)
                     forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*>           (Parser DE
parseDE  forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
endOfLine)
                     forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall (f :: * -> *) a. Alternative f => a -> f a -> f a
option [] (Parser [GN]
parseGN  forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
endOfLine)
                     forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*>           (Parser OS
parseOS  forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
endOfLine)
                     forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many'     (Parser OG
parseOG  forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
endOfLine)
                     forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*>           (Parser OC
parseOC  forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
endOfLine)
                     forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*>           (Parser OX
parseOX  forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
endOfLine)
                     forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many'     (Parser OH
parseOH  forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
endOfLine)
                     forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many'     (Parser Reference
parseRef forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
endOfLine)
                     forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many'     (Parser CC
parseCC  forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
endOfLine) forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall (f :: * -> *) a. Alternative f => a -> f a -> f a
option Text
"" (Text -> Parser Text Text
string Text
copyrightCC forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
endOfLine)
                     forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many'     (Parser DR
parseDR  forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
endOfLine)
                     forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*>           (Parser PE
parsePE  forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
endOfLine)
                     forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall a. Parser a -> Parser (Maybe a)
optional  (Parser KW
parseKW  forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
endOfLine)
                     forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many'     (Parser FT
parseFT  forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
endOfLine)
                     forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*>           (Parser SQ
parseSQ  forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
endOfLine)
                     forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<*            Parser ()
parseEnd

parseEvidence :: Parser Text
parseEvidence :: Parser Text Text
parseEvidence = (\Text
x Text
y Text
z -> Text
x forall a. Semigroup a => a -> a -> a
<> Text
y forall a. Semigroup a => a -> a -> a
<> Text
z) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$>
                  Text -> Parser Text Text
string Text
"{" forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (String -> Text
pack forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (f :: * -> *) a. Alternative f => f a -> f [a]
many1 (Char -> Parser Char
notChar Char
'}')) forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Text -> Parser Text Text
string Text
"}"

-- = Helper parsers

-- | Transforms any parser to a parser of maybe value.
--
-- >>> parseOnly (optional digit) "1"
-- Right (Just 1)
--
-- >>> parseOnly (optional digit) ""
-- Right Nothing
optional :: Parser a -> Parser (Maybe a)
optional :: forall a. Parser a -> Parser (Maybe a)
optional Parser a
par = forall (f :: * -> *) a. Alternative f => a -> f a -> f a
option forall a. Maybe a
Nothing (forall a. a -> Maybe a
Just forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser a
par)

-- | Parses lines, that contain nodes splitted by `del` and ended by `end`.
parseNodes :: Text          -- ^Start 2-letter mark.
           -> Char          -- ^Delimeter char, that splits the nodes.
           -> Char          -- ^Terminal char, that ends the node list.
           -> Parser [Text]
parseNodes :: Text -> Char -> Char -> Parser Text [Text]
parseNodes Text
start Char
del Char
end = do
    Text -> Parser Text Text
string Text
start forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => Int -> m a -> m [a]
count Int
3 (Char -> Parser Char
char Char
' ')
    Parser Text [Text]
parseNodesNoStart
  where
    parseNodesNoStart :: Parser [Text]
    parseNodesNoStart :: Parser Text [Text]
parseNodesNoStart = do
        Text
part <- Parser Text Text
parseNode
        Char
c <- Char -> Parser Char
char Char
del forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Char -> Parser Char
char Char
end
        if Char
c forall a. Eq a => a -> a -> Bool
== Char
del
          then do (Char -> Parser Char
char Char
' ' forall (f :: * -> *) a b. Functor f => f a -> b -> f b
$> ()) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Parser ()
endOfLine forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Text -> Parser Text Text
string Text
start forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => Int -> m a -> m [a]
count Int
3 (Char -> Parser Char
char Char
' ') forall (f :: * -> *) a b. Functor f => f a -> b -> f b
$> ())
                  (Text
part forall a. a -> [a] -> [a]
:) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text [Text]
parseNodesNoStart
          else do Maybe Char
nextChar <- Parser (Maybe Char)
peekChar
                  case Maybe Char
nextChar of
                    Maybe Char
Nothing                -> forall (f :: * -> *) a. Applicative f => a -> f a
pure [Text
part]
                    Just Char
c | Char -> Bool
isEndOfLine Char
c -> forall (f :: * -> *) a. Applicative f => a -> f a
pure [Text
part]
                    Just Char
c                 -> do (Text
x:[Text]
xs) <- Parser Text [Text]
parseNodesNoStart
                                                 forall (f :: * -> *) a. Applicative f => a -> f a
pure (Text
part forall a. Semigroup a => a -> a -> a
<> Text
x forall a. a -> [a] -> [a]
: [Text]
xs)

    parseNode :: Parser Text
    parseNode :: Parser Text Text
parseNode = String -> Text
pack forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (f :: * -> *) a. Alternative f => f a -> f [a]
many1 ((Char -> Bool) -> Parser Char
satisfy forall a b. (a -> b) -> a -> b
$ forall (f :: * -> *) a b c.
Applicative f =>
(a -> b -> c) -> f a -> f b -> f c
liftA2 Bool -> Bool -> Bool
(&&) (String -> Char -> Bool
notInClass [Char
del,Char
end]) (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
isEndOfLine))

-- | Parses line till the end.
parseTillEnd :: Parser String
parseTillEnd :: Parser Text String
parseTillEnd = forall (f :: * -> *) a. Alternative f => f a -> f [a]
many1 forall a b. (a -> b) -> a -> b
$ (Char -> Bool) -> Parser Char
satisfy (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
isEndOfLine)

-- | Parses multiline comment as one string.
parseMultiLineComment :: Text -> Int -> Parser String
parseMultiLineComment :: Text -> Int -> Parser Text String
parseMultiLineComment Text
start Int
skip = [String] -> String
hyphenConcat forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$>
                                     ((:) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text String
parseTillEnd
                                          forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Text -> Int -> Parser Text [String]
parseMultiLine Text
start Int
skip)

-- | Parses multiline comment from new line.
parseMultiLine :: Text -> Int -> Parser [String]
parseMultiLine :: Text -> Int -> Parser Text [String]
parseMultiLine Text
start Int
skip = forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many' forall a b. (a -> b) -> a -> b
$ do
    Parser ()
endOfLine
    Text -> Parser Text Text
string Text
start
    forall (m :: * -> *) a. Monad m => Int -> m a -> m [a]
count (Int
skip forall a. Num a => a -> a -> a
- Int
1) (Char -> Parser Char
char Char
' ') -- leave one space to separate words
    Parser Text String
parseTillEnd

-- | Parses line break for multiline section.
parseBreak :: Text -> Parser ()
parseBreak :: Text -> Parser ()
parseBreak Text
txt = ((Parser ()
endOfLine forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Text -> Parser Text Text
string Text
txt forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Text -> Parser Text Text
string Text
"   ") forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Text -> Parser Text Text
string Text
" ") forall (f :: * -> *) a b. Functor f => f a -> b -> f b
$> ()

-- | Parses one item like "Something=Something else;"
parseDefItem :: Text -> Parser Text
parseDefItem :: Text -> Parser Text Text
parseDefItem Text
name = do
    Text -> Parser Text Text
string Text
name forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Char -> Parser Char
char Char
'='
    forall a. [a] -> a
head forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Text
" {" Text -> Text -> [Text]
`splitOn`) forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Text
pack forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Char -> Parser Text String
parseTillChar Char
';'

-- | Parses line till specific char (e.g. semicolon or dot) before space/endOfLine/endOfInput.
parseTillChar :: Char -> Parser String
parseTillChar :: Char -> Parser Text String
parseTillChar Char
c = do
    String
part <- forall (f :: * -> *) a. Alternative f => f a -> f [a]
many1 forall a b. (a -> b) -> a -> b
$ (Char -> Bool) -> Parser Char
satisfy forall a b. (a -> b) -> a -> b
$ forall (f :: * -> *) a b c.
Applicative f =>
(a -> b -> c) -> f a -> f b -> f c
liftA2 Bool -> Bool -> Bool
(&&) (forall a. Eq a => a -> a -> Bool
/=Char
c) (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
isEndOfLine)
    Maybe Char
nextChar <- Parser (Maybe Char)
peekChar
    case Maybe Char
nextChar of
      Maybe Char
Nothing                -> forall (m :: * -> *) a. MonadFail m => String -> m a
fail String
"You cannot be here!"
      Just Char
d | Char
d forall a. Eq a => a -> a -> Bool
== Char
c        -> do
          Char -> Parser Char
char Char
c
          Maybe Char
nextChar <- Parser (Maybe Char)
peekChar
          case Maybe Char
nextChar of
            Maybe Char
Nothing -> forall (f :: * -> *) a. Applicative f => a -> f a
pure String
part
            Just Char
d  | Char -> Bool
isSpace Char
d -> forall (f :: * -> *) a. Applicative f => a -> f a
pure String
part
            Just Char
d  -> (String
part forall a. Semigroup a => a -> a -> a
<>) forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char
dforall a. a -> [a] -> [a]
:) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Char -> Parser Text String
parseTillChar Char
c
      Just Char
d | Char -> Bool
isEndOfLine Char
d -> do
          Parser ()
endOfLine
          forall (m :: * -> *) a. Monad m => Int -> m a -> m [a]
count Int
2 Parser Char
anyChar
          forall (m :: * -> *) a. Monad m => Int -> m a -> m [a]
count Int
2 (Char -> Parser Char
char Char
' ')
          (String
part forall a. Semigroup a => a -> a -> a
<>) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Char -> Parser Text String
parseTillChar Char
c
      Just Char
_                 -> forall (m :: * -> *) a. MonadFail m => String -> m a
fail String
"You cannot be here!"

-- | Delete needless space after hyphen on concat.
hyphenConcat :: [String] -> String
hyphenConcat :: [String] -> String
hyphenConcat []       = []
hyphenConcat [String
x]      = String
x
hyphenConcat (String
x:String
y:[String]
ys) = String
x forall a. [a] -> [a] -> [a]
++ [String] -> String
hyphenConcat (String
syforall a. a -> [a] -> [a]
:[String]
ys)
  where
    sy :: String
    sy :: String
sy | forall a. [a] -> a
last String
x forall a. Eq a => a -> a -> Bool
== Char
'-'                  = forall a. [a] -> [a]
tail String
y
       | Char -> Bool
isAA (forall a. [a] -> a
last String
x) Bool -> Bool -> Bool
&& Char -> Bool
isAA (String
y forall a. [a] -> Int -> a
!! Int
1) = forall a. [a] -> [a]
tail String
y
       | Bool
otherwise                      = String
y

    isAA :: Char -> Bool
    isAA :: Char -> Bool
isAA = String -> Char -> Bool
inClass String
"A-Z"