{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}

{-|
Module      : Hyax.Abif.Fasta
Description : Read a FASTA file
Copyright   : (c) HyraxBio, 2018
License     : BSD3
Maintainer  : andre@hyraxbio.co.za, andre@andrevdm.com
Stability   : beta

Functionality for reading FASTA files
-}
module Hyrax.Abif.Fasta
    ( Fasta (..)
    , parseFasta
    ) where

import           Protolude hiding (lines)
import qualified Data.Text as Txt

-- | FASTA data
data Fasta = Fasta { Fasta -> Text
fastaName :: !Text -- ^ Name
                   , Fasta -> Text
fastaRead :: !Text -- ^ Data
                   } deriving (Int -> Fasta -> ShowS
[Fasta] -> ShowS
Fasta -> String
(Int -> Fasta -> ShowS)
-> (Fasta -> String) -> ([Fasta] -> ShowS) -> Show Fasta
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Fasta] -> ShowS
$cshowList :: [Fasta] -> ShowS
show :: Fasta -> String
$cshow :: Fasta -> String
showsPrec :: Int -> Fasta -> ShowS
$cshowsPrec :: Int -> Fasta -> ShowS
Show, Fasta -> Fasta -> Bool
(Fasta -> Fasta -> Bool) -> (Fasta -> Fasta -> Bool) -> Eq Fasta
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Fasta -> Fasta -> Bool
$c/= :: Fasta -> Fasta -> Bool
== :: Fasta -> Fasta -> Bool
$c== :: Fasta -> Fasta -> Bool
Eq)


-- | Parse the data for a single FASTA into a list of 'Fasta' values.
-- Single and multi-line FASTAs are supported.
-- Used by "Hyrax.Abif.Generate" to read weighted-FASTAs
parseFasta :: Text -> Either Text [Fasta]
parseFasta :: Text -> Either Text [Fasta]
parseFasta Text
s =
  [Fasta] -> [Fasta]
forall a. [a] -> [a]
reverse ([Fasta] -> [Fasta]) -> Either Text [Fasta] -> Either Text [Fasta]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Text] -> Maybe Text -> Text -> [Fasta] -> Either Text [Fasta]
go (Text -> [Text]
Txt.lines Text
s) Maybe Text
forall a. Maybe a
Nothing Text
"" []

  where
    go :: [Text] -> Maybe Text -> Text -> [Fasta] -> Either Text [Fasta]
    go :: [Text] -> Maybe Text -> Text -> [Fasta] -> Either Text [Fasta]
go (Text
line:[Text]
lines) (Just Text
name) Text
read [Fasta]
acc =
      if Int -> Text -> Text
Txt.take Int
1 Text
line Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
/= Text
">"
      then [Text] -> Maybe Text -> Text -> [Fasta] -> Either Text [Fasta]
go [Text]
lines (Text -> Maybe Text
forall a. a -> Maybe a
Just Text
name) (Text
read Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
line) [Fasta]
acc
      else [Text] -> Maybe Text -> Text -> [Fasta] -> Either Text [Fasta]
go [Text]
lines (Text -> Maybe Text
forall a. a -> Maybe a
Just (Text -> Maybe Text) -> Text -> Maybe Text
forall a b. (a -> b) -> a -> b
$ Int -> Text -> Text
Txt.drop Int
1 Text
line) Text
"" (Text -> Text -> Fasta
Fasta (Text -> Text
Txt.strip Text
name) Text
read Fasta -> [Fasta] -> [Fasta]
forall a. a -> [a] -> [a]
: [Fasta]
acc)
    go (Text
line:[Text]
lines) Maybe Text
Nothing Text
_read [Fasta]
acc =
      if Int -> Text -> Text
Txt.take Int
1 Text
line Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
">"
      then [Text] -> Maybe Text -> Text -> [Fasta] -> Either Text [Fasta]
go [Text]
lines (Text -> Maybe Text
forall a. a -> Maybe a
Just (Text -> Maybe Text) -> Text -> Maybe Text
forall a b. (a -> b) -> a -> b
$ Text -> Text
Txt.strip (Text -> Text) -> (Text -> Text) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Text -> Text
Txt.drop Int
1 (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ Text
line) Text
"" [Fasta]
acc
      else Text -> Either Text [Fasta]
forall a b. a -> Either a b
Left Text
"Expecting name"
    go [] Maybe Text
Nothing Text
_ [Fasta]
acc =
      [Fasta] -> Either Text [Fasta]
forall a b. b -> Either a b
Right [Fasta]
acc
    go [] (Just Text
_name) Text
"" [Fasta]
_acc =
      Text -> Either Text [Fasta]
forall a b. a -> Either a b
Left Text
"Expecting read"
    go [] (Just Text
name) Text
read [Fasta]
acc =
      [Fasta] -> Either Text [Fasta]
forall a b. b -> Either a b
Right ([Fasta] -> Either Text [Fasta]) -> [Fasta] -> Either Text [Fasta]
forall a b. (a -> b) -> a -> b
$ Text -> Text -> Fasta
Fasta (Text -> Text
Txt.strip Text
name) Text
read Fasta -> [Fasta] -> [Fasta]
forall a. a -> [a] -> [a]
: [Fasta]
acc