module Biobase.Fasta.Strict
( module Biobase.Fasta.Strict
, convertString
) where
import Control.Lens
import Data.Bifunctor (first)
import Data.ByteString (ByteString)
import Data.String.Conversions
import Data.Void
import GHC.Generics (Generic)
import qualified Data.ByteString.Builder as BB
import qualified Data.ByteString.Char8 as BS
import qualified Data.ByteString.Lazy as BSL
import qualified Data.ByteString.Streaming as BSS
import qualified Streaming.Prelude as SP
import Biobase.Fasta.Streaming as FS
import Biobase.Types.BioSequence
data Fasta which ty = Fasta
{ ∷ !(SequenceIdentifier which)
, Fasta which ty -> BioSequence ty
_fasta ∷ !(BioSequence ty)
}
deriving (Fasta which ty -> Fasta which ty -> Bool
(Fasta which ty -> Fasta which ty -> Bool)
-> (Fasta which ty -> Fasta which ty -> Bool)
-> Eq (Fasta which ty)
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
forall k (which :: k) k (ty :: k).
Fasta which ty -> Fasta which ty -> Bool
/= :: Fasta which ty -> Fasta which ty -> Bool
$c/= :: forall k (which :: k) k (ty :: k).
Fasta which ty -> Fasta which ty -> Bool
== :: Fasta which ty -> Fasta which ty -> Bool
$c== :: forall k (which :: k) k (ty :: k).
Fasta which ty -> Fasta which ty -> Bool
Eq,Eq (Fasta which ty)
Eq (Fasta which ty)
-> (Fasta which ty -> Fasta which ty -> Ordering)
-> (Fasta which ty -> Fasta which ty -> Bool)
-> (Fasta which ty -> Fasta which ty -> Bool)
-> (Fasta which ty -> Fasta which ty -> Bool)
-> (Fasta which ty -> Fasta which ty -> Bool)
-> (Fasta which ty -> Fasta which ty -> Fasta which ty)
-> (Fasta which ty -> Fasta which ty -> Fasta which ty)
-> Ord (Fasta which ty)
Fasta which ty -> Fasta which ty -> Bool
Fasta which ty -> Fasta which ty -> Ordering
Fasta which ty -> Fasta which ty -> Fasta which ty
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
forall k (which :: k) k (ty :: k). Eq (Fasta which ty)
forall k (which :: k) k (ty :: k).
Fasta which ty -> Fasta which ty -> Bool
forall k (which :: k) k (ty :: k).
Fasta which ty -> Fasta which ty -> Ordering
forall k (which :: k) k (ty :: k).
Fasta which ty -> Fasta which ty -> Fasta which ty
min :: Fasta which ty -> Fasta which ty -> Fasta which ty
$cmin :: forall k (which :: k) k (ty :: k).
Fasta which ty -> Fasta which ty -> Fasta which ty
max :: Fasta which ty -> Fasta which ty -> Fasta which ty
$cmax :: forall k (which :: k) k (ty :: k).
Fasta which ty -> Fasta which ty -> Fasta which ty
>= :: Fasta which ty -> Fasta which ty -> Bool
$c>= :: forall k (which :: k) k (ty :: k).
Fasta which ty -> Fasta which ty -> Bool
> :: Fasta which ty -> Fasta which ty -> Bool
$c> :: forall k (which :: k) k (ty :: k).
Fasta which ty -> Fasta which ty -> Bool
<= :: Fasta which ty -> Fasta which ty -> Bool
$c<= :: forall k (which :: k) k (ty :: k).
Fasta which ty -> Fasta which ty -> Bool
< :: Fasta which ty -> Fasta which ty -> Bool
$c< :: forall k (which :: k) k (ty :: k).
Fasta which ty -> Fasta which ty -> Bool
compare :: Fasta which ty -> Fasta which ty -> Ordering
$ccompare :: forall k (which :: k) k (ty :: k).
Fasta which ty -> Fasta which ty -> Ordering
$cp1Ord :: forall k (which :: k) k (ty :: k). Eq (Fasta which ty)
Ord,ReadPrec [Fasta which ty]
ReadPrec (Fasta which ty)
Int -> ReadS (Fasta which ty)
ReadS [Fasta which ty]
(Int -> ReadS (Fasta which ty))
-> ReadS [Fasta which ty]
-> ReadPrec (Fasta which ty)
-> ReadPrec [Fasta which ty]
-> Read (Fasta which ty)
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
forall k (which :: k) k (ty :: k). ReadPrec [Fasta which ty]
forall k (which :: k) k (ty :: k). ReadPrec (Fasta which ty)
forall k (which :: k) k (ty :: k). Int -> ReadS (Fasta which ty)
forall k (which :: k) k (ty :: k). ReadS [Fasta which ty]
readListPrec :: ReadPrec [Fasta which ty]
$creadListPrec :: forall k (which :: k) k (ty :: k). ReadPrec [Fasta which ty]
readPrec :: ReadPrec (Fasta which ty)
$creadPrec :: forall k (which :: k) k (ty :: k). ReadPrec (Fasta which ty)
readList :: ReadS [Fasta which ty]
$creadList :: forall k (which :: k) k (ty :: k). ReadS [Fasta which ty]
readsPrec :: Int -> ReadS (Fasta which ty)
$creadsPrec :: forall k (which :: k) k (ty :: k). Int -> ReadS (Fasta which ty)
Read,Int -> Fasta which ty -> ShowS
[Fasta which ty] -> ShowS
Fasta which ty -> String
(Int -> Fasta which ty -> ShowS)
-> (Fasta which ty -> String)
-> ([Fasta which ty] -> ShowS)
-> Show (Fasta which ty)
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
forall k (which :: k) k (ty :: k). Int -> Fasta which ty -> ShowS
forall k (which :: k) k (ty :: k). [Fasta which ty] -> ShowS
forall k (which :: k) k (ty :: k). Fasta which ty -> String
showList :: [Fasta which ty] -> ShowS
$cshowList :: forall k (which :: k) k (ty :: k). [Fasta which ty] -> ShowS
show :: Fasta which ty -> String
$cshow :: forall k (which :: k) k (ty :: k). Fasta which ty -> String
showsPrec :: Int -> Fasta which ty -> ShowS
$cshowsPrec :: forall k (which :: k) k (ty :: k). Int -> Fasta which ty -> ShowS
Show,(forall x. Fasta which ty -> Rep (Fasta which ty) x)
-> (forall x. Rep (Fasta which ty) x -> Fasta which ty)
-> Generic (Fasta which ty)
forall x. Rep (Fasta which ty) x -> Fasta which ty
forall x. Fasta which ty -> Rep (Fasta which ty) x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
forall k (which :: k) k (ty :: k) x.
Rep (Fasta which ty) x -> Fasta which ty
forall k (which :: k) k (ty :: k) x.
Fasta which ty -> Rep (Fasta which ty) x
$cto :: forall k (which :: k) k (ty :: k) x.
Rep (Fasta which ty) x -> Fasta which ty
$cfrom :: forall k (which :: k) k (ty :: k) x.
Fasta which ty -> Rep (Fasta which ty) x
Generic)
type FastaUntyped = Fasta Void Void
fastaToByteString ∷ Int → Fasta which ty → ByteString
{-# Inlinable fastaToByteString #-}
fastaToByteString :: Int -> Fasta which ty -> ByteString
fastaToByteString Int
k' Fasta{BioSequence ty
SequenceIdentifier which
_fasta :: BioSequence ty
_header :: SequenceIdentifier which
_fasta :: forall k (which :: k) k (ty :: k). Fasta which ty -> BioSequence ty
_header :: forall k (which :: k) k (ty :: k).
Fasta which ty -> SequenceIdentifier which
..} = Char -> ByteString -> ByteString
BS.cons Char
'>' (SequenceIdentifier which
_headerSequenceIdentifier which
-> Getting ByteString (SequenceIdentifier which) ByteString
-> ByteString
forall s a. s -> Getting a s a -> a
^.Getting ByteString (SequenceIdentifier which) ByteString
forall s t. Rewrapping s t => Iso s t (Unwrapped s) (Unwrapped t)
_Wrapped) ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
"\n" ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString -> ByteString
go (BioSequence ty
_fastaBioSequence ty
-> Getting ByteString (BioSequence ty) ByteString -> ByteString
forall s a. s -> Getting a s a -> a
^.Getting ByteString (BioSequence ty) ByteString
forall s t. Rewrapping s t => Iso s t (Unwrapped s) (Unwrapped t)
_Wrapped)
where go :: ByteString -> ByteString
go (Int -> ByteString -> (ByteString, ByteString)
BS.splitAt Int
k → (ByteString
hd,ByteString
tl))
| ByteString -> Bool
BS.null ByteString
hd = ByteString
forall a. Monoid a => a
mempty
| Bool
otherwise = ByteString
hd ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
"\n" ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString -> ByteString
go ByteString
tl
k :: Int
k = Int -> Int -> Int
forall a. Ord a => a -> a -> a
max Int
1 Int
k'
fastaToBuilder ∷ Int → Fasta which ty → BB.Builder
{-# Inlinable fastaToBuilder #-}
fastaToBuilder :: Int -> Fasta which ty -> Builder
fastaToBuilder Int
k' Fasta{BioSequence ty
SequenceIdentifier which
_fasta :: BioSequence ty
_header :: SequenceIdentifier which
_fasta :: forall k (which :: k) k (ty :: k). Fasta which ty -> BioSequence ty
_header :: forall k (which :: k) k (ty :: k).
Fasta which ty -> SequenceIdentifier which
..} = Char -> Builder
BB.char8 Char
'>' Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> (ByteString -> Builder
BB.byteString (ByteString -> Builder) -> ByteString -> Builder
forall a b. (a -> b) -> a -> b
$ SequenceIdentifier which
_headerSequenceIdentifier which
-> Getting ByteString (SequenceIdentifier which) ByteString
-> ByteString
forall s a. s -> Getting a s a -> a
^.Getting ByteString (SequenceIdentifier which) ByteString
forall s t. Rewrapping s t => Iso s t (Unwrapped s) (Unwrapped t)
_Wrapped) Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Char -> Builder
BB.char8 Char
'\n' Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> ByteString -> Builder
go (BioSequence ty
_fastaBioSequence ty
-> Getting ByteString (BioSequence ty) ByteString -> ByteString
forall s a. s -> Getting a s a -> a
^.Getting ByteString (BioSequence ty) ByteString
forall s t. Rewrapping s t => Iso s t (Unwrapped s) (Unwrapped t)
_Wrapped)
where go :: ByteString -> Builder
go (Int -> ByteString -> (ByteString, ByteString)
BS.splitAt Int
k → (ByteString
hd,ByteString
tl))
| ByteString -> Bool
BS.null ByteString
hd = Builder
forall a. Monoid a => a
mempty
| Bool
otherwise = ByteString -> Builder
BB.byteString ByteString
hd Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Char -> Builder
BB.char8 Char
'\n' Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> ByteString -> Builder
go ByteString
tl
k :: Int
k = Int -> Int -> Int
forall a. Ord a => a -> a -> a
max Int
1 Int
k'
byteStringToFasta ∷ ByteString → Either String (Fasta which ty)
{-# Inlinable byteStringToFasta #-}
byteStringToFasta :: ByteString -> Either String (Fasta which ty)
byteStringToFasta (ByteString -> [ByteString]
BS.lines → [ByteString]
ls)
| [ByteString] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [ByteString]
ls = String -> Either String (Fasta which ty)
forall a b. a -> Either a b
Left String
"empty bytestring"
| Just (Char
z, ByteString
hdr) ← ByteString -> Maybe (Char, ByteString)
BS.uncons ByteString
h, Char
z Char -> ByteString -> Bool
`BS.elem` ByteString
">;" = Fasta which ty -> Either String (Fasta which ty)
forall a b. b -> Either a b
Right (Fasta which ty -> Either String (Fasta which ty))
-> Fasta which ty -> Either String (Fasta which ty)
forall a b. (a -> b) -> a -> b
$ Fasta :: forall k k (which :: k) (ty :: k).
SequenceIdentifier which -> BioSequence ty -> Fasta which ty
Fasta { _header :: SequenceIdentifier which
_header = ByteString -> SequenceIdentifier which
forall k (which :: k). ByteString -> SequenceIdentifier which
SequenceIdentifier ByteString
hdr, _fasta :: BioSequence ty
_fasta = ByteString -> BioSequence ty
forall k (which :: k). ByteString -> BioSequence which
BioSequence (ByteString -> BioSequence ty) -> ByteString -> BioSequence ty
forall a b. (a -> b) -> a -> b
$ [ByteString] -> ByteString
BS.concat [ByteString]
ts }
| Bool
otherwise = String -> Either String (Fasta which ty)
forall a b. a -> Either a b
Left String
"no '>'/';' first character"
where ByteString
h:[ByteString]
ts = [ByteString]
ls
rawFasta ∷ Int → Prism' ByteString (Fasta which ty)
{-# Inline rawFasta #-}
rawFasta :: Int -> Prism' ByteString (Fasta which ty)
rawFasta Int
k = (Fasta which ty -> ByteString)
-> (ByteString -> Either ByteString (Fasta which ty))
-> Prism' ByteString (Fasta which ty)
forall b t s a. (b -> t) -> (s -> Either t a) -> Prism s t a b
prism (Int -> Fasta which ty -> ByteString
forall k k (which :: k) (ty :: k).
Int -> Fasta which ty -> ByteString
fastaToByteString Int
k) ((ByteString -> Either ByteString (Fasta which ty))
-> Prism' ByteString (Fasta which ty))
-> (ByteString -> Either ByteString (Fasta which ty))
-> Prism' ByteString (Fasta which ty)
forall a b. (a -> b) -> a -> b
$ \ByteString
bs → (String -> ByteString)
-> Either String (Fasta which ty)
-> Either ByteString (Fasta which ty)
forall (p :: * -> * -> *) a b c.
Bifunctor p =>
(a -> b) -> p a c -> p b c
first (ByteString -> String -> ByteString
forall a b. a -> b -> a
const ByteString
bs) (Either String (Fasta which ty)
-> Either ByteString (Fasta which ty))
-> Either String (Fasta which ty)
-> Either ByteString (Fasta which ty)
forall a b. (a -> b) -> a -> b
$ ByteString -> Either String (Fasta which ty)
forall k k (which :: k) (ty :: k).
ByteString -> Either String (Fasta which ty)
byteStringToFasta ByteString
bs