{-# OPTIONS_GHC -fno-warn-orphans #-}
module Bio.ABI.Decode
( decodeRawSequence
, decodeRawSequence'
) where
import Control.Applicative (many)
import Data.Bifunctor (bimap)
import Data.Binary.Get (getInt16be, runGetOrFail)
import Data.ByteString as BS (ByteString)
import Data.ByteString.Lazy as BSL (ByteString, fromStrict)
import Data.ByteString.Lazy.Char8 as BSL8 (unpack)
import Data.Char (ord)
import Data.Functor ((<&>))
import Data.Int (Int16)
import Data.List (find)
import Data.Text (Text, pack)
import Data.Vector (Vector, fromList)
import Hyrax.Abif (Abif (..), Directory (..))
import Hyrax.Abif.Read (getAbif)
import Bio.Sequence (SequenceDecodable (..),
weightedSequence)
import Bio.Sequence.Basecalled (BasecalledSequence,
BasecalledSequenceWithRawData (..))
decodeRawSequence :: BSL.ByteString -> Either Text BasecalledSequenceWithRawData
decodeRawSequence :: ByteString -> Either Text BasecalledSequenceWithRawData
decodeRawSequence ByteString
bs = do
Abif
abif <- ByteString -> Either Text Abif
getAbif ByteString
bs
String
sequence' <- Abif -> Either Text String
extractSequence Abif
abif
[Double]
quality' <- Abif -> Either Text [Double]
extractQuality Abif
abif
BasecalledSequence
bsSequence <- forall s (m :: * -> *).
(IsWeightedSequence s, MonadError Text m) =>
[Element s] -> [Weight s] -> m s
weightedSequence String
sequence' [Double]
quality'
Vector Int16
bsRawG <- Text -> Int -> Abif -> Either Text Directory
findDataByDirectory Text
"DATA" Int
9 Abif
abif forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Directory -> Either Text (Vector Int16)
decodeShortArray
Vector Int16
bsRawA <- Text -> Int -> Abif -> Either Text Directory
findDataByDirectory Text
"DATA" Int
10 Abif
abif forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Directory -> Either Text (Vector Int16)
decodeShortArray
Vector Int16
bsRawT <- Text -> Int -> Abif -> Either Text Directory
findDataByDirectory Text
"DATA" Int
11 Abif
abif forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Directory -> Either Text (Vector Int16)
decodeShortArray
Vector Int16
bsRawC <- Text -> Int -> Abif -> Either Text Directory
findDataByDirectory Text
"DATA" Int
12 Abif
abif forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Directory -> Either Text (Vector Int16)
decodeShortArray
Vector Int
bsPeakLocations <- Text -> Int -> Abif -> Either Text Directory
findDataByDirectory Text
"PLOC" Int
2 Abif
abif forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Directory -> Either Text (Vector Int16)
decodeShortArray forall (f :: * -> *) a b. Functor f => f a -> (a -> b) -> f b
<&> forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap forall a b. (Integral a, Num b) => a -> b
fromIntegral
forall (m :: * -> *) a. Monad m => a -> m a
return BasecalledSequenceWithRawData{Vector Int
Vector Int16
BasecalledSequence
bsPeakLocations :: Vector Int
bsRawC :: Vector Int16
bsRawT :: Vector Int16
bsRawA :: Vector Int16
bsRawG :: Vector Int16
bsSequence :: BasecalledSequence
bsPeakLocations :: Vector Int
bsRawC :: Vector Int16
bsRawT :: Vector Int16
bsRawA :: Vector Int16
bsRawG :: Vector Int16
bsSequence :: BasecalledSequence
..}
decodeRawSequence' :: BS.ByteString -> Either Text BasecalledSequenceWithRawData
decodeRawSequence' :: ByteString -> Either Text BasecalledSequenceWithRawData
decodeRawSequence' = ByteString -> Either Text BasecalledSequenceWithRawData
decodeRawSequence forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ByteString
BSL.fromStrict
instance SequenceDecodable BasecalledSequenceWithRawData BasecalledSequence where
sequenceDecode :: BasecalledSequenceWithRawData -> Either Text BasecalledSequence
sequenceDecode = forall (f :: * -> *) a. Applicative f => a -> f a
pure forall b c a. (b -> c) -> (a -> b) -> a -> c
. BasecalledSequenceWithRawData -> BasecalledSequence
bsSequence
instance SequenceDecodable BSL.ByteString BasecalledSequence where
sequenceDecode :: BSL.ByteString -> Either Text BasecalledSequence
sequenceDecode :: ByteString -> Either Text BasecalledSequence
sequenceDecode ByteString
bs = do
Abif
abif <- ByteString -> Either Text Abif
getAbif ByteString
bs
String
sequence' <- Abif -> Either Text String
extractSequence Abif
abif
[Double]
quality' <- Abif -> Either Text [Double]
extractQuality Abif
abif
forall s (m :: * -> *).
(IsWeightedSequence s, MonadError Text m) =>
[Element s] -> [Weight s] -> m s
weightedSequence String
sequence' [Double]
quality'
instance SequenceDecodable BS.ByteString BasecalledSequence where
sequenceDecode :: BS.ByteString -> Either Text BasecalledSequence
sequenceDecode :: ByteString -> Either Text BasecalledSequence
sequenceDecode = forall a s. SequenceDecodable a s => a -> Either Text s
sequenceDecode forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ByteString
BSL.fromStrict
extractSequence :: Abif -> Either Text String
Abif
abif = Text -> Int -> Abif -> Either Text Directory
findDataByDirectory Text
"PBAS" Int
1 Abif
abif forall (f :: * -> *) a b. Functor f => f a -> (a -> b) -> f b
<&> ByteString -> String
BSL8.unpack forall b c a. (b -> c) -> (a -> b) -> a -> c
. Directory -> ByteString
dData forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= String -> Either Text String
checkACGT
extractQuality :: Abif -> Either Text [Double]
Abif
abif = forall a b. (a -> b) -> [a] -> [b]
map (forall a b. (Integral a, Num b) => a -> b
fromIntegral forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Int
ord) forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> String
BSL8.unpack forall b c a. (b -> c) -> (a -> b) -> a -> c
. Directory -> ByteString
dData forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Text -> Int -> Abif -> Either Text Directory
findDataByDirectory Text
"PCON" Int
1 Abif
abif
checkACGT :: String -> Either Text String
checkACGT :: String -> Either Text String
checkACGT String
str | forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Char -> Bool
validChar String
str = forall a b. b -> Either a b
Right String
str
| Bool
otherwise = forall a b. a -> Either a b
Left Text
"Bio.ABI.Decode: could not parse sequence"
where
validChar :: Char -> Bool
validChar :: Char -> Bool
validChar Char
ch = Char
ch forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Char
'A', Char
'C', Char
'G', Char
'T']
findDataByDirectory
:: Text
-> Int
-> Abif
-> Either Text Directory
findDataByDirectory :: Text -> Int -> Abif -> Either Text Directory
findDataByDirectory Text
dirName Int
dirIndex Abif
abif =
let directoryM :: Maybe Directory
directoryM = forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Maybe a
find (\Directory{Int
[Text]
ByteString
Text
ElemType
dTagName :: Directory -> Text
dTagNum :: Directory -> Int
dElemType :: Directory -> ElemType
dElemTypeCode :: Directory -> Int
dElemTypeDesc :: Directory -> Text
dElemSize :: Directory -> Int
dElemNum :: Directory -> Int
dDataSize :: Directory -> Int
dDataOffset :: Directory -> Int
dDataDebug :: Directory -> [Text]
dDataDebug :: [Text]
dData :: ByteString
dDataOffset :: Int
dDataSize :: Int
dElemNum :: Int
dElemSize :: Int
dElemTypeDesc :: Text
dElemTypeCode :: Int
dElemType :: ElemType
dTagNum :: Int
dTagName :: Text
dData :: Directory -> ByteString
..} -> Text
dTagName forall a. Eq a => a -> a -> Bool
== Text
dirName Bool -> Bool -> Bool
&& Int
dTagNum forall a. Eq a => a -> a -> Bool
== Int
dirIndex) forall b c a. (b -> c) -> (a -> b) -> a -> c
. Abif -> [Directory]
aDirs forall a b. (a -> b) -> a -> b
$ Abif
abif
in forall b a. b -> (a -> b) -> Maybe a -> b
maybe (forall a b. a -> Either a b
Left Text
errorMsg) forall a b. b -> Either a b
Right Maybe Directory
directoryM
where
errorMsg :: Text
errorMsg :: Text
errorMsg = Text
"Bio.ABI.Decode: could not find directory " forall a. Semigroup a => a -> a -> a
<> Text
dirName forall a. Semigroup a => a -> a -> a
<> Text
" with index " forall a. Semigroup a => a -> a -> a
<> String -> Text
pack (forall a. Show a => a -> String
show Int
dirIndex)
decodeShortArray :: Directory -> Either Text (Vector Int16)
decodeShortArray :: Directory -> Either Text (Vector Int16)
decodeShortArray =
forall (p :: * -> * -> *) a b c d.
Bifunctor p =>
(a -> b) -> (c -> d) -> p a c -> p b d
bimap
(\(ByteString
_, ByteOffset
_, String
msg) -> Text
"Data.ABI.Decode: could not decode short array: " forall a. Semigroup a => a -> a -> a
<> String -> Text
pack String
msg)
(\(ByteString
_, ByteOffset
_, [Int16]
lst) -> forall a. [a] -> Vector a
fromList [Int16]
lst)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a.
Get a
-> ByteString
-> Either
(ByteString, ByteOffset, String) (ByteString, ByteOffset, a)
runGetOrFail (forall (f :: * -> *) a. Alternative f => f a -> f [a]
many Get Int16
getInt16be)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Directory -> ByteString
dData