module Bio.GB.Writer
( genBankToText
) where
import Bio.GB.Type (Feature (..), GenBankSequence (..),
Locus (..), Meta (..), Reference (..),
Source (..), Version (..))
import Bio.Sequence (Range, markings, toList)
import Control.Lens ((^.))
import qualified Data.List.Split as S (chunksOf)
import Data.Maybe (fromMaybe)
import Data.Text (Text)
import qualified Data.Text as T (append, chunksOf, intercalate, length,
lines, null, pack, toLower, unwords)
genBankToText :: GenBankSequence -> Text
genBankToText GenBankSequence{..} = interNewLine parts <> "\n"
where
parts = [ metaToText meta
, featuresToText $ gbSeq ^. markings
, originToText $ T.pack $ toList gbSeq
, "//"
]
metaToText :: Meta -> Text
metaToText Meta{..} = interNewLine parts
where
parts = [ locusToText locus
, textFromMaybe $ fmap (processMany metaIndent "DEFINITION") definition
, textFromMaybe $ fmap (processMany metaIndent "ACCESSION") accession
, textFromMaybe $ fmap versionToText version
, textFromMaybe $ fmap (processMany metaIndent "KEYWORDS") keywords
, textFromMaybe $ fmap sourceToText source
]
<> fmap referenceToText references
<> fmap (processMany metaIndent "COMMENT") comments
locusToText :: Locus -> Text
locusToText Locus{..} = toIndent metaIndent "LOCUS" <> locusText
where
locusList = [ name
, showText len <> " bp"
, molType
, textFromMaybe $ fmap (T.toLower . showText) form
, textFromMaybe gbDivision
, modificationDate
]
locusText = T.intercalate (spaces 5) $ filter (not . T.null) locusList
sourceToText :: Source -> Text
sourceToText Source{..} = interNewLine $ mainPart : pure (textFromMaybe organismPart)
where
mainPart = processMany metaIndent "SOURCE" sourceT
organismPart = fmap (processMany metaIndent (prependIndent metaPreIndent "ORGANISM")) organism
versionToText :: Version -> Text
versionToText Version{..} = toIndent metaIndent "VERSION" <> version
where
version = versionT <> spaces 5 <> maybe mempty ("GI:" <>) gbId
referenceToText :: Reference -> Text
referenceToText Reference{..} = interNewLine $ mainPart : parts
where
mainPart = processMany metaIndent "REFERENCE" referenceT
sectionNames = fmap (prependIndent metaPreIndent) ["AUTHORS", "TITLE", "JOURNAL", "PUBMED"]
sections = [authors, title, journal, pubmed]
parts = zipWith (\a -> textFromMaybe . fmap (processMany metaIndent a)) sectionNames sections
metaIndent :: Int
metaIndent = 12
metaPreIndent :: Int
metaPreIndent = 2
featuresToText :: [(Feature, Range)] -> Text
featuresToText l = interNewLine $ mainPart : sections
where
mainPart = processMany featuresIndent "FEATURES" featuresText
sections = fmap featureToText l
featuresText :: Text
featuresText = "Location/Qualifiers"
featureToText :: (Feature, Range) -> Text
featureToText (Feature{..}, range) = interNewLine $ mainPart : sections
where
mainPart = processMany featuresIndent (prependIndent featuresPreIndent fName) (featureRangeToText fStrand53 range)
sections = fmap featurePropToText fProps
featurePropToText :: (Text, Text) -> Text
featurePropToText (nameF, textF) = mainPart
where
mainPart = processMany featuresIndent mempty ("/" <> nameF <> "=\"" <> textF <> "\"")
featureRangeToText :: Bool -> Range -> Text
featureRangeToText complement (l, r) | l == r - 1 = processComplement complement $ showText (l + 1)
| otherwise = processComplement complement $ showText (l + 1) <> ".." <> showText r
where
processComplement :: Bool -> Text -> Text
processComplement True text = text
processComplement False text = "complement(" <> text <> ")"
featuresIndent :: Int
featuresIndent = 21
featuresPreIndent :: Int
featuresPreIndent = 5
originToText :: Text -> Text
originToText text = interNewLine $ mainPart : parts
where
mainPart = "ORIGIN"
manyLines = S.chunksOf lengthOfLineChunk $ T.chunksOf lengthOfChunk text
parts = zipWith processLine [1, 1 + lengthOfChunk * lengthOfLineChunk..] manyLines
processLine :: Int -> [Text] -> Text
processLine startInd = T.unwords . (prependIndent (originIndent - T.length indText) indText :)
where
indText = showText startInd
lengthOfChunk :: Int
lengthOfChunk = 10
lengthOfLineChunk :: Int
lengthOfLineChunk = 6
originIndent :: Int
originIndent = 9
processMany :: Int -> Text -> Text -> Text
processMany indent name "" = toIndent indent name
processMany indent name text = interNewLine resLines
where
(x : xs) = T.lines text
resLines = toIndent indent name <> x
: fmap (prependIndent indent) xs
interNewLine :: [Text] -> Text
interNewLine = T.intercalate "\n" . filter (not . T.null)
textFromMaybe :: Maybe Text -> Text
textFromMaybe = fromMaybe mempty
toIndent :: Int -> Text -> Text
toIndent indent name = name <> (spaces $ indent - (T.length name))
prependIndent :: Int -> Text -> Text
prependIndent = T.append . spaces
showText :: Show a => a -> Text
showText = T.pack . show
spaces :: Int -> Text
spaces = T.pack . flip replicate ' '