-- | A convenience module for *small* @Fasta@ entries, that are completely in
-- memory and *not* to be streamed.
--
-- The @Data.ByteString.Strict.Lens@ module is very helpful for further
-- handling of 'Fasta' entries.
--
-- For convenience, the 'convertString' function from @string-conversions@ is
-- supplied.

module Biobase.Fasta.Strict
  ( module Biobase.Fasta.Strict
  , convertString
  ) where

import           Control.Lens
import           Data.Bifunctor (first)
import           Data.ByteString (ByteString)
import           Data.String.Conversions
import           Data.Void
import           GHC.Generics (Generic)
import qualified Data.ByteString.Builder as BB
import qualified Data.ByteString.Char8 as BS
import qualified Data.ByteString.Lazy as BSL
import qualified Data.ByteString.Streaming as BSS
import qualified Streaming.Prelude as SP

import           Biobase.Fasta.Streaming as FS
import           Biobase.Types.BioSequence



-- | A *strict* @Fasta@ entry.

data Fasta which ty = Fasta
  { Fasta which ty -> SequenceIdentifier which
_header  !(SequenceIdentifier which)
  , Fasta which ty -> BioSequence ty
_fasta   !(BioSequence ty)
  }
  deriving (Fasta which ty -> Fasta which ty -> Bool
(Fasta which ty -> Fasta which ty -> Bool)
-> (Fasta which ty -> Fasta which ty -> Bool)
-> Eq (Fasta which ty)
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
forall k (which :: k) k (ty :: k).
Fasta which ty -> Fasta which ty -> Bool
/= :: Fasta which ty -> Fasta which ty -> Bool
$c/= :: forall k (which :: k) k (ty :: k).
Fasta which ty -> Fasta which ty -> Bool
== :: Fasta which ty -> Fasta which ty -> Bool
$c== :: forall k (which :: k) k (ty :: k).
Fasta which ty -> Fasta which ty -> Bool
Eq,Eq (Fasta which ty)
Eq (Fasta which ty)
-> (Fasta which ty -> Fasta which ty -> Ordering)
-> (Fasta which ty -> Fasta which ty -> Bool)
-> (Fasta which ty -> Fasta which ty -> Bool)
-> (Fasta which ty -> Fasta which ty -> Bool)
-> (Fasta which ty -> Fasta which ty -> Bool)
-> (Fasta which ty -> Fasta which ty -> Fasta which ty)
-> (Fasta which ty -> Fasta which ty -> Fasta which ty)
-> Ord (Fasta which ty)
Fasta which ty -> Fasta which ty -> Bool
Fasta which ty -> Fasta which ty -> Ordering
Fasta which ty -> Fasta which ty -> Fasta which ty
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
forall k (which :: k) k (ty :: k). Eq (Fasta which ty)
forall k (which :: k) k (ty :: k).
Fasta which ty -> Fasta which ty -> Bool
forall k (which :: k) k (ty :: k).
Fasta which ty -> Fasta which ty -> Ordering
forall k (which :: k) k (ty :: k).
Fasta which ty -> Fasta which ty -> Fasta which ty
min :: Fasta which ty -> Fasta which ty -> Fasta which ty
$cmin :: forall k (which :: k) k (ty :: k).
Fasta which ty -> Fasta which ty -> Fasta which ty
max :: Fasta which ty -> Fasta which ty -> Fasta which ty
$cmax :: forall k (which :: k) k (ty :: k).
Fasta which ty -> Fasta which ty -> Fasta which ty
>= :: Fasta which ty -> Fasta which ty -> Bool
$c>= :: forall k (which :: k) k (ty :: k).
Fasta which ty -> Fasta which ty -> Bool
> :: Fasta which ty -> Fasta which ty -> Bool
$c> :: forall k (which :: k) k (ty :: k).
Fasta which ty -> Fasta which ty -> Bool
<= :: Fasta which ty -> Fasta which ty -> Bool
$c<= :: forall k (which :: k) k (ty :: k).
Fasta which ty -> Fasta which ty -> Bool
< :: Fasta which ty -> Fasta which ty -> Bool
$c< :: forall k (which :: k) k (ty :: k).
Fasta which ty -> Fasta which ty -> Bool
compare :: Fasta which ty -> Fasta which ty -> Ordering
$ccompare :: forall k (which :: k) k (ty :: k).
Fasta which ty -> Fasta which ty -> Ordering
$cp1Ord :: forall k (which :: k) k (ty :: k). Eq (Fasta which ty)
Ord,ReadPrec [Fasta which ty]
ReadPrec (Fasta which ty)
Int -> ReadS (Fasta which ty)
ReadS [Fasta which ty]
(Int -> ReadS (Fasta which ty))
-> ReadS [Fasta which ty]
-> ReadPrec (Fasta which ty)
-> ReadPrec [Fasta which ty]
-> Read (Fasta which ty)
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
forall k (which :: k) k (ty :: k). ReadPrec [Fasta which ty]
forall k (which :: k) k (ty :: k). ReadPrec (Fasta which ty)
forall k (which :: k) k (ty :: k). Int -> ReadS (Fasta which ty)
forall k (which :: k) k (ty :: k). ReadS [Fasta which ty]
readListPrec :: ReadPrec [Fasta which ty]
$creadListPrec :: forall k (which :: k) k (ty :: k). ReadPrec [Fasta which ty]
readPrec :: ReadPrec (Fasta which ty)
$creadPrec :: forall k (which :: k) k (ty :: k). ReadPrec (Fasta which ty)
readList :: ReadS [Fasta which ty]
$creadList :: forall k (which :: k) k (ty :: k). ReadS [Fasta which ty]
readsPrec :: Int -> ReadS (Fasta which ty)
$creadsPrec :: forall k (which :: k) k (ty :: k). Int -> ReadS (Fasta which ty)
Read,Int -> Fasta which ty -> ShowS
[Fasta which ty] -> ShowS
Fasta which ty -> String
(Int -> Fasta which ty -> ShowS)
-> (Fasta which ty -> String)
-> ([Fasta which ty] -> ShowS)
-> Show (Fasta which ty)
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
forall k (which :: k) k (ty :: k). Int -> Fasta which ty -> ShowS
forall k (which :: k) k (ty :: k). [Fasta which ty] -> ShowS
forall k (which :: k) k (ty :: k). Fasta which ty -> String
showList :: [Fasta which ty] -> ShowS
$cshowList :: forall k (which :: k) k (ty :: k). [Fasta which ty] -> ShowS
show :: Fasta which ty -> String
$cshow :: forall k (which :: k) k (ty :: k). Fasta which ty -> String
showsPrec :: Int -> Fasta which ty -> ShowS
$cshowsPrec :: forall k (which :: k) k (ty :: k). Int -> Fasta which ty -> ShowS
Show,(forall x. Fasta which ty -> Rep (Fasta which ty) x)
-> (forall x. Rep (Fasta which ty) x -> Fasta which ty)
-> Generic (Fasta which ty)
forall x. Rep (Fasta which ty) x -> Fasta which ty
forall x. Fasta which ty -> Rep (Fasta which ty) x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
forall k (which :: k) k (ty :: k) x.
Rep (Fasta which ty) x -> Fasta which ty
forall k (which :: k) k (ty :: k) x.
Fasta which ty -> Rep (Fasta which ty) x
$cto :: forall k (which :: k) k (ty :: k) x.
Rep (Fasta which ty) x -> Fasta which ty
$cfrom :: forall k (which :: k) k (ty :: k) x.
Fasta which ty -> Rep (Fasta which ty) x
Generic)
makeLenses ''Fasta

-- | If you don't want to deal with the phantom types.

type FastaUntyped = Fasta Void Void

-- | Render a 'Fasta' entry to a 'ByteString'. Will end with a final @\n@ in
-- any case.

fastaToByteString  Int  Fasta which ty  ByteString
{-# Inlinable fastaToByteString #-}
fastaToByteString :: Int -> Fasta which ty -> ByteString
fastaToByteString Int
k' Fasta{BioSequence ty
SequenceIdentifier which
_fasta :: BioSequence ty
_header :: SequenceIdentifier which
_fasta :: forall k (which :: k) k (ty :: k). Fasta which ty -> BioSequence ty
_header :: forall k (which :: k) k (ty :: k).
Fasta which ty -> SequenceIdentifier which
..} = Char -> ByteString -> ByteString
BS.cons Char
'>' (SequenceIdentifier which
_headerSequenceIdentifier which
-> Getting ByteString (SequenceIdentifier which) ByteString
-> ByteString
forall s a. s -> Getting a s a -> a
^.Getting ByteString (SequenceIdentifier which) ByteString
forall s t. Rewrapping s t => Iso s t (Unwrapped s) (Unwrapped t)
_Wrapped) ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
"\n" ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString -> ByteString
go (BioSequence ty
_fastaBioSequence ty
-> Getting ByteString (BioSequence ty) ByteString -> ByteString
forall s a. s -> Getting a s a -> a
^.Getting ByteString (BioSequence ty) ByteString
forall s t. Rewrapping s t => Iso s t (Unwrapped s) (Unwrapped t)
_Wrapped)
  where go :: ByteString -> ByteString
go (Int -> ByteString -> (ByteString, ByteString)
BS.splitAt Int
k  (ByteString
hd,ByteString
tl))
          | ByteString -> Bool
BS.null ByteString
hd = ByteString
forall a. Monoid a => a
mempty
          | Bool
otherwise  = ByteString
hd ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
"\n" ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString -> ByteString
go ByteString
tl
        k :: Int
k = Int -> Int -> Int
forall a. Ord a => a -> a -> a
max Int
1 Int
k'

-- | Render a 'Fasta' entry to a 'Builder'. Will end with a final @\n@ in
-- any case.

fastaToBuilder  Int  Fasta which ty  BB.Builder
{-# Inlinable fastaToBuilder #-}
fastaToBuilder :: Int -> Fasta which ty -> Builder
fastaToBuilder Int
k' Fasta{BioSequence ty
SequenceIdentifier which
_fasta :: BioSequence ty
_header :: SequenceIdentifier which
_fasta :: forall k (which :: k) k (ty :: k). Fasta which ty -> BioSequence ty
_header :: forall k (which :: k) k (ty :: k).
Fasta which ty -> SequenceIdentifier which
..} = Char -> Builder
BB.char8 Char
'>' Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> (ByteString -> Builder
BB.byteString (ByteString -> Builder) -> ByteString -> Builder
forall a b. (a -> b) -> a -> b
$ SequenceIdentifier which
_headerSequenceIdentifier which
-> Getting ByteString (SequenceIdentifier which) ByteString
-> ByteString
forall s a. s -> Getting a s a -> a
^.Getting ByteString (SequenceIdentifier which) ByteString
forall s t. Rewrapping s t => Iso s t (Unwrapped s) (Unwrapped t)
_Wrapped) Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Char -> Builder
BB.char8 Char
'\n' Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> ByteString -> Builder
go (BioSequence ty
_fastaBioSequence ty
-> Getting ByteString (BioSequence ty) ByteString -> ByteString
forall s a. s -> Getting a s a -> a
^.Getting ByteString (BioSequence ty) ByteString
forall s t. Rewrapping s t => Iso s t (Unwrapped s) (Unwrapped t)
_Wrapped)
  where go :: ByteString -> Builder
go (Int -> ByteString -> (ByteString, ByteString)
BS.splitAt Int
k  (ByteString
hd,ByteString
tl))
          | ByteString -> Bool
BS.null ByteString
hd = Builder
forall a. Monoid a => a
mempty
          | Bool
otherwise  = ByteString -> Builder
BB.byteString ByteString
hd Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Char -> Builder
BB.char8 Char
'\n' Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> ByteString -> Builder
go ByteString
tl
        k :: Int
k = Int -> Int -> Int
forall a. Ord a => a -> a -> a
max Int
1 Int
k'

-- | Try to parse a 'ByteString' as a 'Fasta', failing with 'Left', succees
-- with 'Right'.

byteStringToFasta  ByteString  Either String (Fasta which ty)
{-# Inlinable byteStringToFasta #-}
byteStringToFasta :: ByteString -> Either String (Fasta which ty)
byteStringToFasta (ByteString -> [ByteString]
BS.lines  [ByteString]
ls)
  | [ByteString] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [ByteString]
ls = String -> Either String (Fasta which ty)
forall a b. a -> Either a b
Left String
"empty bytestring"
  | Just (Char
z, ByteString
hdr)  ByteString -> Maybe (Char, ByteString)
BS.uncons ByteString
h, Char
z Char -> ByteString -> Bool
`BS.elem` ByteString
">;" = Fasta which ty -> Either String (Fasta which ty)
forall a b. b -> Either a b
Right (Fasta which ty -> Either String (Fasta which ty))
-> Fasta which ty -> Either String (Fasta which ty)
forall a b. (a -> b) -> a -> b
$ Fasta :: forall k k (which :: k) (ty :: k).
SequenceIdentifier which -> BioSequence ty -> Fasta which ty
Fasta { _header :: SequenceIdentifier which
_header = ByteString -> SequenceIdentifier which
forall k (which :: k). ByteString -> SequenceIdentifier which
SequenceIdentifier ByteString
hdr, _fasta :: BioSequence ty
_fasta = ByteString -> BioSequence ty
forall k (which :: k). ByteString -> BioSequence which
BioSequence (ByteString -> BioSequence ty) -> ByteString -> BioSequence ty
forall a b. (a -> b) -> a -> b
$ [ByteString] -> ByteString
BS.concat [ByteString]
ts }
  | Bool
otherwise = String -> Either String (Fasta which ty)
forall a b. a -> Either a b
Left String
"no '>'/';' first character"
  where ByteString
h:[ByteString]
ts = [ByteString]
ls

-- | Try to parse a 'ByteString' as multiple 'Fasta' entries. Even though this
-- is using the underlying streaming interface, this is not streaming.

{-
byteStringToMultiFasta
  ∷ BSL.ByteString → [Fasta which ty]
{-# Inlinable byteStringToMultiFasta #-}
byteStringToMultiFasta bsl = map (view windowedFasta) $ runIdentity bss
  where bss = SP.toList_ . streamingFasta (HeaderSize maxBound) (OverlapSize 0) (CurrentSize maxBound) $ BSS.fromLazy bsl
-}

-- | A lens that goes from a 'BioSequenceWindow' to a 'Fasta'.

{-
windowedFasta ∷ Lens' (BioSequenceWindow w ty k) (Fasta w ty)
{-# Inline windowedFasta #-}
windowedFasta = lens lr rl
  where lr bsw = Fasta { _header = bsw^.bswIdentifier, _fasta = bsw^.bswSequence }
        rl bsw f = set bswSequence (f^.fasta) $ set bswIdentifier (f^.header) bsw
-}

-- | A prism from a 'ByteString' to a 'Fasta'. Note that this will only be an
-- identity if the underlying fasta file is rendered with @k@ characters per
-- line.

rawFasta  Int  Prism' ByteString (Fasta which ty)
{-# Inline rawFasta #-}
rawFasta :: Int -> Prism' ByteString (Fasta which ty)
rawFasta Int
k = (Fasta which ty -> ByteString)
-> (ByteString -> Either ByteString (Fasta which ty))
-> Prism' ByteString (Fasta which ty)
forall b t s a. (b -> t) -> (s -> Either t a) -> Prism s t a b
prism (Int -> Fasta which ty -> ByteString
forall k k (which :: k) (ty :: k).
Int -> Fasta which ty -> ByteString
fastaToByteString Int
k) ((ByteString -> Either ByteString (Fasta which ty))
 -> Prism' ByteString (Fasta which ty))
-> (ByteString -> Either ByteString (Fasta which ty))
-> Prism' ByteString (Fasta which ty)
forall a b. (a -> b) -> a -> b
$ \ByteString
bs  (String -> ByteString)
-> Either String (Fasta which ty)
-> Either ByteString (Fasta which ty)
forall (p :: * -> * -> *) a b c.
Bifunctor p =>
(a -> b) -> p a c -> p b c
first (ByteString -> String -> ByteString
forall a b. a -> b -> a
const ByteString
bs) (Either String (Fasta which ty)
 -> Either ByteString (Fasta which ty))
-> Either String (Fasta which ty)
-> Either ByteString (Fasta which ty)
forall a b. (a -> b) -> a -> b
$ ByteString -> Either String (Fasta which ty)
forall k k (which :: k) (ty :: k).
ByteString -> Either String (Fasta which ty)
byteStringToFasta ByteString
bs