{-# LANGUAGE OverloadedStrings #-}

module Bio.GB.Parser
  ( genBankP
  , rangeP
  ) where

import           Bio.GB.Type                (Feature (..), Form (..), GenBankSequence (..),
                                             Locus (..), Meta (..), Parser, Reference (..),
                                             Source (..), Version (..))
import           Bio.Sequence               (Border (..), MarkedSequence, Range (..),
                                             RangeBorder (..), markedSequence, shiftRange)
import           Control.Monad.Combinators  (many, manyTill, optional, some, (<|>))
import           Data.Char                  (isAlphaNum, isSpace, isUpper)
import           Data.Functor               (($>))
import           Data.Text                  (Text, intercalate, pack, splitOn, unpack)
import qualified Data.Text                  as T
import           Text.Megaparsec            (notFollowedBy, option, satisfy, sepBy1, takeWhile1P,
                                             takeWhileP, try, (<?>))
import           Text.Megaparsec.Char       (char, digitChar, eol, letterChar, string)
import           Text.Megaparsec.Char.Lexer (decimal)

-- | Parser of .gb file.
--
genBankP :: Parser GenBankSequence
genBankP :: Parser GenBankSequence
genBankP =  Meta -> MarkedSequence Feature Char -> GenBankSequence
GenBankSequence
        forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (ParsecT Void Text Identity Meta
metaP forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> String -> m a
<?> String
"Meta parser")
        forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (ParsecT Void Text Identity (MarkedSequence Feature Char)
gbSeqP forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> String -> m a
<?> String
"GB sequence parser")
        forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<*  forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
string Tokens Text
"//" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
eolSpaceP 

--------------------------------------------------------------------------------
-- Block with meta-information.
--------------------------------------------------------------------------------

metaP :: Parser Meta
metaP :: ParsecT Void Text Identity Meta
metaP = do
  Locus
locus'      <- ParsecT Void Text Identity Locus
locusP forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> String -> m a
<?> String
"Locus parser"

  Maybe Text
definitionM <- forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional Parser Text
definitionP forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> String -> m a
<?> String
"Definition parser"
  Maybe Text
accessionM  <- forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional Parser Text
accessionP forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> String -> m a
<?> String
"Accession parser"
  Maybe Version
versionM    <- forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional Parser Version
versionP forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> String -> m a
<?> String
"Version parser"
  Maybe Text
keywordsM   <- forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional Parser Text
keywordsP forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> String -> m a
<?> String
"Keywords parser"
  Maybe Source
sourceM     <- forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional Parser Source
sourceP forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> String -> m a
<?> String
"Source parser"
  [Reference]
referencesL <- forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many Parser Reference
referenceP forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> String -> m a
<?> String
"References parser"
  [Text]
commentsL   <- forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many Parser Text
commentP forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> String -> m a
<?> String
"Comments parser"

  forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a b. (a -> b) -> a -> b
$ Locus
-> Maybe Text
-> Maybe Text
-> Maybe Version
-> Maybe Text
-> Maybe Source
-> [Reference]
-> [Text]
-> Meta
Meta Locus
locus' Maybe Text
definitionM Maybe Text
accessionM Maybe Version
versionM Maybe Text
keywordsM Maybe Source
sourceM [Reference]
referencesL [Text]
commentsL

locusP :: Parser Locus
locusP :: ParsecT Void Text Identity Locus
locusP = forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
string Tokens Text
"LOCUS" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser ()
space forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> (Text -> Int -> Text -> Maybe Form -> Maybe Text -> Text -> Locus
Locus
       forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ParsecT Void Text Identity (Tokens Text)
textP forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
space                                      -- name
       forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall e s (m :: * -> *) a.
(MonadParsec e s m, Token s ~ Char, Num a) =>
m a
decimal forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
space forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
string Tokens Text
"bp" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
space            -- sequence length
       forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> ParsecT Void Text Identity (Tokens Text)
textP forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
space                                      -- molecule type
       forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional Parser Form
formP forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
space                               -- form of sequence
       forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional (String -> Text
pack forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
some (forall e s (m :: * -> *).
MonadParsec e s m =>
(Token s -> Bool) -> m (Token s)
satisfy Char -> Bool
isUpper)) forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
space   -- GenBank division
       forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> ParsecT Void Text Identity (Tokens Text)
textP                                               -- modification date
       forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<*  Parser ()
eolSpaceP)
  where
    textP :: ParsecT Void Text Identity (Tokens Text)
textP = forall e s (m :: * -> *).
MonadParsec e s m =>
Maybe String -> (Token s -> Bool) -> m (Tokens s)
takeWhile1P forall a. Maybe a
Nothing forall a b. (a -> b) -> a -> b
$ Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
isSpace

    formP :: Parser Form
    formP :: Parser Form
formP = forall e s (m :: * -> *) a. MonadParsec e s m => m a -> m a
try (forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
string Tokens Text
"linear" forall (f :: * -> *) a b. Functor f => f a -> b -> f b
$> Form
Linear) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
string Tokens Text
"circular" forall (f :: * -> *) a b. Functor f => f a -> b -> f b
$> Form
Circular)

definitionP :: Parser Text
definitionP :: Parser Text
definitionP =  forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
string Tokens Text
"DEFINITION" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser ()
space forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> (forall e s (m :: * -> *) a. MonadParsec e s m => m a -> m a
try Parser Text
emptyP forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Text
someLinesP)

accessionP :: Parser Text
accessionP :: Parser Text
accessionP =  forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
string Tokens Text
"ACCESSION" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser ()
space forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> (forall e s (m :: * -> *) a. MonadParsec e s m => m a -> m a
try Parser Text
emptyP forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (String -> Text
pack
          forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
some (forall e s (m :: * -> *) a. MonadParsec e s m => m a -> m a
try ParsecT Void Text Identity Char
alphaNumChar forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
'_')
          forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<*  Parser ()
eolSpaceP))

versionP :: Parser Version
versionP :: Parser Version
versionP =  forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
string Tokens Text
"VERSION" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser ()
space
         forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> ((Text -> Maybe Text -> Version
Version forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text
emptyP forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a. Maybe a
Nothing) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Text -> Maybe Text -> Version
Version
        forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (String -> Text
pack forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
some ParsecT Void Text Identity Char
versionP')
        forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional (String -> Text
pack forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Parser ()
space forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
string Tokens Text
"GI:" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
some ParsecT Void Text Identity Char
versionP'))
        forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<*  Parser ()
eolSpaceP))
  where
    versionP' :: ParsecT Void Text Identity Char
versionP' = forall e s (m :: * -> *) a. MonadParsec e s m => m a -> m a
try ParsecT Void Text Identity Char
alphaNumChar forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall e s (m :: * -> *) a. MonadParsec e s m => m a -> m a
try (forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
'_') forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
'.'

keywordsP :: Parser Text
keywordsP :: Parser Text
keywordsP =  forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
string Tokens Text
"KEYWORDS"
          forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> (forall e s (m :: * -> *) a. MonadParsec e s m => m a -> m a
try Parser Text
emptyP
         forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Parser ()
space forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser Text
textWithSpacesP forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
eolSpaceP))

sourceP :: Parser Source
sourceP :: Parser Source
sourceP =  forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
string Tokens Text
"SOURCE" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser ()
space
        forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> (Text -> Maybe Text -> Source
Source
       forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text
someLinesP
       forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional Parser Text
organismP)
  where
    organismP :: Parser Text
organismP = forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
string Tokens Text
"  ORGANISM" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser ()
space forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser Text
someLinesP

referenceP :: Parser Reference
referenceP :: Parser Reference
referenceP = forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
string Tokens Text
"REFERENCE" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser ()
space
           forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> (((\Text
x -> Text
-> Maybe Text
-> Maybe Text
-> Maybe Text
-> Maybe Text
-> Reference
Reference Text
x forall a. Maybe a
Nothing forall a. Maybe a
Nothing forall a. Maybe a
Nothing forall a. Maybe a
Nothing) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text
emptyP) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Text
-> Maybe Text
-> Maybe Text
-> Maybe Text
-> Maybe Text
-> Reference
Reference
          forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text
someLinesP
          forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional (forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
string Tokens Text
"  AUTHORS" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser ()
space forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser Text
someLinesP)
          forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional (forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
string Tokens Text
"  TITLE" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser ()
space forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser Text
someLinesP)
          forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional (forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
string Tokens Text
"  JOURNAL" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser ()
space forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser Text
someLinesP)
          forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional (forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
string Tokens Text
"  PUBMED" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser ()
space forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser Text
someLinesP)))

commentP :: Parser Text
commentP :: Parser Text
commentP = forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
string Tokens Text
"COMMENT" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> (forall e s (m :: * -> *) a. MonadParsec e s m => m a -> m a
try Parser Text
emptyP forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many (forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
' ') forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser Text
someLinesP))

--------------------------------------------------------------------------------
-- Block with FEATURES table.
--------------------------------------------------------------------------------

featuresP :: Parser [(Feature, Range)]
featuresP :: Parser [(Feature, Range)]
featuresP = -- skip unknown fields and stop on line with "FEATURES" 
          forall (m :: * -> *) a end. MonadPlus m => m a -> m end -> m [a]
manyTill (Parser Text
textWithSpacesP forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
eolSpaceP) (forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
string Tokens Text
"FEATURES") forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser ()
space
          forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser Text
textWithSpacesP forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
eolSpaceP
          forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
some (ParsecT Void Text Identity (Feature, Range)
featureP forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> String -> m a
<?> String
"Single feature parser")

featureP :: Parser (Feature, Range)
featureP :: ParsecT Void Text Identity (Feature, Range)
featureP = do
    Tokens Text
_ <- forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
string Text
featureIndent1

    Text
featureName'      <- forall e s (m :: * -> *).
MonadParsec e s m =>
Maybe String -> (Token s -> Bool) -> m (Tokens s)
takeWhileP forall a. Maybe a
Nothing (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
isSpace) forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
space
    Range
range <- ParsecT Void Text Identity Range
rangeP forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
eolSpaceP

    [(Text, Text)]
props <- forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
some Parser (Text, Text)
propsP

    -- Ranges are 1-based, but the underlying Vector in the Feature is 0-based.
    -- We shift the range left so the numberings match.
    --
    forall (f :: * -> *) a. Applicative f => a -> f a
pure (Text -> [(Text, Text)] -> Feature
Feature Text
featureName' [(Text, Text)]
props, Int -> Range -> Range
shiftRange (-Int
1) Range
range)

rangeP :: Parser Range
rangeP :: ParsecT Void Text Identity Range
rangeP =  forall e s (m :: * -> *) a. MonadParsec e s m => m a -> m a
try ParsecT Void Text Identity Range
spanP 
      forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall e s (m :: * -> *) a. MonadParsec e s m => m a -> m a
try ParsecT Void Text Identity Range
betweenP 
      forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall e s (m :: * -> *) a. MonadParsec e s m => m a -> m a
try ParsecT Void Text Identity Range
pointP
      forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall e s (m :: * -> *) a. MonadParsec e s m => m a -> m a
try ParsecT Void Text Identity Range
joinP
      forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> ParsecT Void Text Identity Range
complementP
  where
    spanP :: Parser Range
    spanP :: ParsecT Void Text Identity Range
spanP = do
        Border
lowerBorderType <- forall (m :: * -> *) a. Alternative m => a -> m a -> m a
option Border
Precise (forall e s (m :: * -> *) a. MonadParsec e s m => m a -> m a
try forall a b. (a -> b) -> a -> b
$ forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
'<' forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (f :: * -> *) a. Applicative f => a -> f a
pure Border
Exceeded)
        Int
lowerBorderLocation <- forall e s (m :: * -> *) a.
(MonadParsec e s m, Token s ~ Char, Num a) =>
m a
decimal
        Tokens Text
_ <- forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
string Tokens Text
".."
        Border
upperBorderType <- forall (m :: * -> *) a. Alternative m => a -> m a -> m a
option Border
Precise (forall e s (m :: * -> *) a. MonadParsec e s m => m a -> m a
try forall a b. (a -> b) -> a -> b
$ forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
'>' forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (f :: * -> *) a. Applicative f => a -> f a
pure Border
Exceeded)
        Int
upperBorderLocation <- forall e s (m :: * -> *) a.
(MonadParsec e s m, Token s ~ Char, Num a) =>
m a
decimal
        forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a b. (a -> b) -> a -> b
$ RangeBorder -> RangeBorder -> Range
Span (Border -> Int -> RangeBorder
RangeBorder Border
lowerBorderType Int
lowerBorderLocation) (Border -> Int -> RangeBorder
RangeBorder Border
upperBorderType Int
upperBorderLocation) 
                
    betweenP :: Parser Range
    betweenP :: ParsecT Void Text Identity Range
betweenP = do
        Int
before <- forall e s (m :: * -> *) a.
(MonadParsec e s m, Token s ~ Char, Num a) =>
m a
decimal
        Token Text
_ <- forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
'^'
        Int
after <- forall e s (m :: * -> *) a.
(MonadParsec e s m, Token s ~ Char, Num a) =>
m a
decimal
        forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a b. (a -> b) -> a -> b
$ Int -> Int -> Range
Between Int
before Int
after

    pointP :: Parser Range
    pointP :: ParsecT Void Text Identity Range
pointP = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Int -> Range
Point forall e s (m :: * -> *) a.
(MonadParsec e s m, Token s ~ Char, Num a) =>
m a
decimal
   
    joinP :: Parser Range
    joinP :: ParsecT Void Text Identity Range
joinP = forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
string Tokens Text
"join(" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap [Range] -> Range
Join (ParsecT Void Text Identity Range
rangeP forall (m :: * -> *) a end. MonadPlus m => m a -> m end -> m [a]
`sepBy1` forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
',') forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
')'

    complementP :: Parser Range
    complementP :: ParsecT Void Text Identity Range
complementP = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Range -> Range
Complement forall a b. (a -> b) -> a -> b
$ forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
string Tokens Text
"complement(" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> ParsecT Void Text Identity Range
rangeP forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
')'
        

propsP :: Parser (Text, Text)
propsP :: Parser (Text, Text)
propsP = do
    Tokens Text
_ <- forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
string Text
featureIndent2
    Token Text
_ <- forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
'/'
    Text
propName <- forall e s (m :: * -> *).
MonadParsec e s m =>
Maybe String -> (Token s -> Bool) -> m (Tokens s)
takeWhile1P forall a. Maybe a
Nothing (forall a. Eq a => a -> a -> Bool
/= Char
'=')
    Token Text
_ <- forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
'='

    Text
propText <- forall e s (m :: * -> *) a. MonadParsec e s m => m a -> m a
try ((forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
'\"' forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall e s (m :: * -> *).
MonadParsec e s m =>
Maybe String -> (Token s -> Bool) -> m (Tokens s)
takeWhile1P forall a. Maybe a
Nothing (forall a. Eq a => a -> a -> Bool
/= Char
'\"') forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
'\"' forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
eolSpaceP)
             forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Text
multiLineProp)

    let propTextCorrect :: Text
propTextCorrect = forall a. Monoid a => [a] -> a
mconcat forall a b. (a -> b) -> a -> b
$ forall a. (a -> Bool) -> [a] -> [a]
filter (forall a. Eq a => a -> a -> Bool
/= Text
featureIndent2) forall a b. (a -> b) -> a -> b
$ HasCallStack => Text -> Text -> [Text]
splitOn Text
featureIndent2 Text
propText

    forall (f :: * -> *) a. Applicative f => a -> f a
pure (Text
propName, Text
propTextCorrect)
  where
    indLine :: Parser Text
    indLine :: Parser Text
indLine = do
        Tokens Text
_ <- forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
string Text
featureIndent2
        forall e s (m :: * -> *) a. MonadParsec e s m => m a -> m ()
notFollowedBy (forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
'/')
        Text
text <- Parser Text
textWithSpacesP 
        Parser ()
eolSpaceP
        forall (f :: * -> *) a. Applicative f => a -> f a
pure Text
text

    multiLineProp :: Parser Text
    multiLineProp :: Parser Text
multiLineProp = do
        Text
fstText <- Parser Text
textWithSpacesP forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
eolSpaceP 
        [Text]
rest <- forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many (forall e s (m :: * -> *) a. MonadParsec e s m => m a -> m a
try Parser Text
indLine)
        forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a b. (a -> b) -> a -> b
$ [Text] -> Text
T.concat (Text
fstText forall a. a -> [a] -> [a]
: [Text]
rest) 

    

-- | First level of identation in FEATURES table file.
--
featureIndent1 :: Text
featureIndent1 :: Text
featureIndent1 = String -> Text
pack forall a b. (a -> b) -> a -> b
$ forall a. Int -> a -> [a]
replicate Int
5 Char
' '

-- | Second level of identation in FEATURES table file.
--
featureIndent2 :: Text
featureIndent2 :: Text
featureIndent2 = String -> Text
pack forall a b. (a -> b) -> a -> b
$ forall a. Int -> a -> [a]
replicate Int
21 Char
' '

--------------------------------------------------------------------------------
-- Block with ORIGIN table.
--------------------------------------------------------------------------------

originP :: Parser String
originP :: ParsecT Void Text Identity String
originP =  (forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
string Tokens Text
"ORIGIN" forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> String -> m a
<?> String
"String ORIGIN") forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser ()
eolSpaceP
        forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (f :: * -> *) a. Applicative f => a -> f a
pure [[String]] -> String
toText
       forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
some (Parser ()
space forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
some forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m (Token s)
digitChar forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser ()
space1
        forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
some (forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
some forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m (Token s)
letterChar forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* (forall e s (m :: * -> *) a. MonadParsec e s m => m a -> m a
try Parser ()
space1 forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser ()
eolSpaceP)))
  where
    toText :: [[String]] -> String
    toText :: [[String]] -> String
toText = forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat

--------------------------------------------------------------------------------
-- Parser of 'GenBankSequence' from FEATURES and ORIGIN tables.
--------------------------------------------------------------------------------
gbSeqP :: Parser (MarkedSequence Feature Char)
gbSeqP :: ParsecT Void Text Identity (MarkedSequence Feature Char)
gbSeqP = do
    [(Feature, Range)]
features <- (Parser [(Feature, Range)]
featuresP forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> String -> m a
<?> String
"Features parser")

    -- An extract from the GB specification (https://www.ncbi.nlm.nih.gov/genbank/release/current/):
    --    NOTE: The BASE COUNT linetype is obsolete and was removed
    --    from the GenBank flatfile format in October 2003.
    --  Anyway, here, in 2021, we still might get plasmids with the BASE COUNT line present.
    --
    Maybe (Tokens Text)
_ <- forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional forall a b. (a -> b) -> a -> b
$ forall e s (m :: * -> *) a. MonadParsec e s m => m a -> m a
try (forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
string Tokens Text
"BASE COUNT" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser Text
textWithSpacesP forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m (Tokens s)
eol)

    String
origin   <- (ParsecT Void Text Identity String
originP forall e s (m :: * -> *) a.
MonadParsec e s m =>
m a -> String -> m a
<?> String
"Origin parser")

    forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (forall (m :: * -> *) a. MonadFail m => String -> m a
fail forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
unpack) forall (f :: * -> *) a. Applicative f => a -> f a
pure (forall s (m :: * -> *).
(IsMarkedSequence s, MonadError Text m) =>
[Element s] -> [(Marking s, Range)] -> m s
markedSequence String
origin [(Feature, Range)]
features)

--------------------------------------------------------------------------------
-- Utility functions.
--------------------------------------------------------------------------------

-- | First level of identation in .gb file.
--
firstIndent :: Text
firstIndent :: Text
firstIndent = String -> Text
pack forall a b. (a -> b) -> a -> b
$ forall a. Int -> a -> [a]
replicate Int
12 Char
' '

eolSpaceP :: Parser ()
eolSpaceP :: Parser ()
eolSpaceP = () forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many (forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
' ') forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m (Tokens s)
eol

emptyP :: Parser Text
emptyP :: Parser Text
emptyP = forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many (forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
' ') forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
'.' forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser ()
eolSpaceP forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (f :: * -> *) a. Applicative f => a -> f a
pure Text
"."

textWithSpacesP :: Parser Text
textWithSpacesP :: Parser Text
textWithSpacesP = forall e s (m :: * -> *).
MonadParsec e s m =>
Maybe String -> (Token s -> Bool) -> m (Tokens s)
takeWhileP forall a. Maybe a
Nothing (forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` [Char
'\n', Char
'\r'])

someLinesP :: Parser Text
someLinesP :: Parser Text
someLinesP = Text -> [Text] -> Text
intercalate Text
"\n" forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Text -> ParsecT Void Text Identity [Text]
someLinesIndentP Text
firstIndent

someLinesIndentP :: Text -> Parser [Text]
someLinesIndentP :: Text -> ParsecT Void Text Identity [Text]
someLinesIndentP Text
indent =  (:) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text
textWithSpacesP forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
eolSpaceP
                       forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many (forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
string Text
indent forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser Text
textWithSpacesP forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
eolSpaceP))

space :: Parser ()
space :: Parser ()
space = () forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ (forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many forall a b. (a -> b) -> a -> b
$ forall e s (m :: * -> *).
MonadParsec e s m =>
(Token s -> Bool) -> m (Token s)
satisfy Char -> Bool
isSpace)

space1 :: Parser ()
space1 :: Parser ()
space1 = () forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ (forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
some forall a b. (a -> b) -> a -> b
$ forall e s (m :: * -> *).
MonadParsec e s m =>
(Token s -> Bool) -> m (Token s)
satisfy Char -> Bool
isSpace)

alphaNumChar :: Parser Char
alphaNumChar :: ParsecT Void Text Identity Char
alphaNumChar = forall e s (m :: * -> *).
MonadParsec e s m =>
(Token s -> Bool) -> m (Token s)
satisfy Char -> Bool
isAlphaNum