module Bio.Core.Sequence (
Qual (..), Offset (..),
SeqData (..), SeqLabel (..), QualData (..),
BioSeq (..), BioSeqQual (..),
toFasta, toFastaQual, toFastQ
) where
import qualified Data.ByteString.Lazy.Char8 as LC
import qualified Data.ByteString.Lazy as L
import Data.Int
import Data.Word
import Data.String
newtype SeqData = SeqData { unSD :: LC.ByteString } deriving (Eq,Ord,IsString)
newtype SeqLabel = SeqLabel { unSL :: LC.ByteString } deriving (Eq,Ord,IsString)
newtype Qual = Qual { unQual :: Word8 } deriving (Show,Eq,Ord,Num)
newtype QualData = QualData { unQD :: L.ByteString } deriving (Eq,Ord)
newtype Offset = Offset { unOff :: Int64 } deriving (Show,Eq,Ord,Num)
class BioSeq s where
seqlabel :: s -> SeqLabel
seqdata :: s -> SeqData
seqlength :: s -> Offset
toFasta :: BioSeq s => s -> LC.ByteString
toFasta s = LC.concat (gt:unSL (seqlabel s):nl:wrap (unSD $ seqdata s))
where wrap x = if LC.null x then [] else let (ln,rest) = LC.splitAt 60 x in ln : nl : wrap rest
nl = LC.pack "\n"
gt = LC.pack ">"
class BioSeq sq => BioSeqQual sq where
seqqual :: sq -> QualData
toFastaQual :: BioSeqQual s => s -> LC.ByteString
toFastaQual s = LC.concat (gt:unSL (seqlabel s):nl:wrap (L.unpack $ unQD $ seqqual s))
where wrap x = if null x then [] else let (ln,rest) = splitAt 20 x in LC.pack (unwords $ map show ln) : nl : wrap rest
nl = LC.pack "\n"
gt = LC.pack ">"
toFastQ :: BioSeqQual s => s -> LC.ByteString
toFastQ s = LC.unlines [LC.cons '@' (unSL $ seqlabel s)
, unSD (seqdata s)
, LC.cons '+' (unSL $ seqlabel s)
, L.map (+33) (unQD $ seqqual s)]