{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TupleSections #-}
module Hyrax.Abif.Generate
( generateAb1s
, generateAb1
, readWeightedFasta
, iupac
, unIupac
, complementNucleotides
) where
import Protolude
import qualified Data.Text as Txt
import qualified Data.Text.Encoding as TxtE
import qualified Data.List as Lst
import qualified Data.Binary.Put as B
import qualified Data.ByteString as BS
import qualified Data.ByteString.Lazy as BSL
import qualified System.FilePath as FP
import System.FilePath ((</>))
import qualified System.Directory as Dir
import Hyrax.Abif
import Hyrax.Abif.Write
import Hyrax.Abif.Fasta
data TraceData = TraceData { TraceData -> [Int16]
trData09G :: ![Int16]
, TraceData -> [Int16]
trData10A :: ![Int16]
, TraceData -> [Int16]
trData11T :: ![Int16]
, TraceData -> [Int16]
trData12C :: ![Int16]
, TraceData -> Int
trValsPerBase :: !Int
, TraceData -> Text
trFasta :: !Text
} deriving (Int -> TraceData -> ShowS
[TraceData] -> ShowS
TraceData -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [TraceData] -> ShowS
$cshowList :: [TraceData] -> ShowS
show :: TraceData -> String
$cshow :: TraceData -> String
showsPrec :: Int -> TraceData -> ShowS
$cshowsPrec :: Int -> TraceData -> ShowS
Show)
generateAb1s :: FilePath -> FilePath -> IO ()
generateAb1s :: String -> String -> IO ()
generateAb1s String
source String
dest = do
Bool -> String -> IO ()
Dir.createDirectoryIfMissing Bool
True String
dest
Either Text [(Text, [(Double, Text)])]
weighted <- String -> IO (Either Text [(Text, [(Double, Text)])])
readWeightedFastas String
source
case Either Text [(Text, [(Double, Text)])]
weighted of
Left Text
e -> forall (m :: * -> *). MonadIO m => Text -> m ()
putText Text
e
Right [(Text, [(Double, Text)])]
rs -> do
let ab1s :: [(Text, ByteString)]
ab1s = (\(Text
n, [(Double, Text)]
r) -> (Text
n, (Text, [(Double, Text)]) -> ByteString
generateAb1 (Text
n, [(Double, Text)]
r))) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [(Text, [(Double, Text)])]
rs
forall (t :: * -> *) (f :: * -> *) a b.
(Foldable t, Applicative f) =>
(a -> f b) -> t a -> f ()
traverse_ (\(Text
name, ByteString
ab1) -> String -> ByteString -> IO ()
BS.writeFile (String
dest String -> ShowS
</> Text -> String
Txt.unpack Text
name forall a. Semigroup a => a -> a -> a
<> String
".ab1") forall a b. (a -> b) -> a -> b
$ ByteString -> ByteString
BSL.toStrict ByteString
ab1) [(Text, ByteString)]
ab1s
generateAb1 :: (Text, [(Double, Text)]) -> BSL.ByteString
generateAb1 :: (Text, [(Double, Text)]) -> ByteString
generateAb1 (Text
fName, [(Double, Text)]
sourceFasta) =
let
tr :: TraceData
tr = [(Double, Text)] -> TraceData
generateTraceData [(Double, Text)]
sourceFasta
valsPerBase :: Int
valsPerBase = TraceData -> Int
trValsPerBase TraceData
tr
generatedFastaLen :: Int
generatedFastaLen = (Text -> Int
Txt.length forall a b. (a -> b) -> a -> b
$ TraceData -> Text
trFasta TraceData
tr)
midPeek :: Int
midPeek = Int
valsPerBase forall a. Integral a => a -> a -> a
`div` Int
2
peakLocations :: [Int]
peakLocations = forall a. Int -> [a] -> [a]
take Int
generatedFastaLen [Int
midPeek, Int
valsPerBase forall a. Num a => a -> a -> a
+ Int
midPeek..]
sampleName :: Text
sampleName = forall a b. (a, b) -> a
fst forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text -> (Text, Text)
Txt.breakOn Text
"_" forall a b. (a -> b) -> a -> b
$ Text
fName
dirs :: [Directory]
dirs = [ Int -> [Int16] -> Directory
mkData Int
9 forall a b. (a -> b) -> a -> b
$ TraceData -> [Int16]
trData09G TraceData
tr
, Int -> [Int16] -> Directory
mkData Int
10 forall a b. (a -> b) -> a -> b
$ TraceData -> [Int16]
trData10A TraceData
tr
, Int -> [Int16] -> Directory
mkData Int
11 forall a b. (a -> b) -> a -> b
$ TraceData -> [Int16]
trData11T TraceData
tr
, Int -> [Int16] -> Directory
mkData Int
12 forall a b. (a -> b) -> a -> b
$ TraceData -> [Int16]
trData12C TraceData
tr
, Base -> Base -> Base -> Base -> Directory
mkBaseOrder Base
BaseG Base
BaseA Base
BaseT Base
BaseC
, Int16 -> Directory
mkLane Int16
1
, Text -> Directory
mkCalledBases forall a b. (a -> b) -> a -> b
$ TraceData -> Text
trFasta TraceData
tr
, Int -> Text -> Directory
mkMobilityFileName Int
1 Text
"KB_3500_POP7_BDTv3.mob"
, Int -> Text -> Directory
mkMobilityFileName Int
2 Text
"KB_3500_POP7_BDTv3.mob"
, [Int16] -> Directory
mkPeakLocations forall a b. (a -> b) -> a -> b
$ forall a b. (Integral a, Num b) => a -> b
fromIntegral forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Int]
peakLocations
, Int16 -> Int16 -> Int16 -> Int16 -> Directory
mkDyeSignalStrength Int16
53 Int16
75 Int16
79 Int16
48
, Text -> Directory
mkSampleName Text
sampleName
, Text -> Directory
mkComment Text
"Generated by HyraxBio AB1 generator"
]
abif :: Abif
abif = Abif { aHeader :: Header
aHeader = Header
mkHeader
, aRootDir :: Directory
aRootDir = Directory
mkRoot
, aDirs :: [Directory]
aDirs = [Directory]
dirs
}
in
Put -> ByteString
B.runPut (Abif -> Put
putAbif Abif
abif)
generateTraceData :: [(Double, Text)] -> TraceData
generateTraceData :: [(Double, Text)] -> TraceData
generateTraceData [(Double, Text)]
weighted =
let
weightedNucs' :: [[(Double, String)]]
weightedNucs' = (\(Double
w, Text
ns) -> (Double
w,) forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> String
unIupac forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Text -> String
Txt.unpack Text
ns) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [(Double, Text)]
weighted
weightedNucs :: [[(Double, String)]]
weightedNucs = forall a. [[a]] -> [[a]]
Lst.transpose [[(Double, String)]]
weightedNucs'
curve :: [Int]
curve = [Int
0, Int
0, Int
128, Int
512, Int
1024, Int
1024, Int
512, Int
128, Int
0, Int
0]
valsPerBase :: Int
valsPerBase = forall (t :: * -> *) a. Foldable t => t a -> Int
length [Int]
curve
data09G :: [Int16]
data09G = forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat forall a b. (a -> b) -> a -> b
$ [Int] -> Char -> [(Double, String)] -> [Int16]
getWeightedTrace [Int]
curve Char
'G' forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [[(Double, String)]]
weightedNucs
data10A :: [Int16]
data10A = forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat forall a b. (a -> b) -> a -> b
$ [Int] -> Char -> [(Double, String)] -> [Int16]
getWeightedTrace [Int]
curve Char
'A' forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [[(Double, String)]]
weightedNucs
data11T :: [Int16]
data11T = forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat forall a b. (a -> b) -> a -> b
$ [Int] -> Char -> [(Double, String)] -> [Int16]
getWeightedTrace [Int]
curve Char
'T' forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [[(Double, String)]]
weightedNucs
data12C :: [Int16]
data12C = forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat forall a b. (a -> b) -> a -> b
$ [Int] -> Char -> [(Double, String)] -> [Int16]
getWeightedTrace [Int]
curve Char
'C' forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [[(Double, String)]]
weightedNucs
fastaSeq :: [String]
fastaSeq = forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (forall a b. (a, b) -> b
snd forall (f :: * -> *) (g :: * -> *) a b.
(Functor f, Functor g) =>
(a -> b) -> f (g a) -> f (g b)
<<$>> [[(Double, String)]]
weightedNucs)
fasta :: Text
fasta = String -> Text
Txt.pack forall a b. (a -> b) -> a -> b
$ [String] -> String
iupac [String]
fastaSeq
in
TraceData { trData09G :: [Int16]
trData09G = [Int16]
data09G
, trData10A :: [Int16]
trData10A = [Int16]
data10A
, trData11T :: [Int16]
trData11T = [Int16]
data11T
, trData12C :: [Int16]
trData12C = [Int16]
data12C
, trFasta :: Text
trFasta = Text
fasta
, trValsPerBase :: Int
trValsPerBase = Int
valsPerBase
}
where
getWeightedTrace :: [Int] -> Char -> [(Double, [Char])] -> [Int16]
getWeightedTrace :: [Int] -> Char -> [(Double, String)] -> [Int16]
getWeightedTrace [Int]
curve Char
nuc [(Double, String)]
ws =
let
found :: [(Double, String)]
found = forall a. (a -> Bool) -> [a] -> [a]
filter ((Char
nuc forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem`) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a, b) -> b
snd) [(Double, String)]
ws
score' :: Double
score' = forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' forall a. Num a => a -> a -> a
(+) Double
0 forall a b. (a -> b) -> a -> b
$ forall a b. (a, b) -> a
fst forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [(Double, String)]
found
score :: Double
score = forall a. Ord a => a -> a -> a
min Double
1 forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Ord a => a -> a -> a
max Double
0 forall a b. (a -> b) -> a -> b
$ Double
score'
wave :: [Int16]
wave = forall a b. (RealFrac a, Integral b) => a -> b
floor forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Double
score forall a. Num a => a -> a -> a
*) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (Integral a, Num b) => a -> b
fromIntegral forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Int]
curve
in
[Int16]
wave
readWeightedFasta :: ByteString -> Either Text [(Double, Text)]
readWeightedFasta :: ByteString -> Either Text [(Double, Text)]
readWeightedFasta ByteString
fastaData =
case Text -> Either Text [Fasta]
parseFasta forall a b. (a -> b) -> a -> b
$ ByteString -> Text
TxtE.decodeUtf8 ByteString
fastaData of
Left Text
e -> forall a b. a -> Either a b
Left Text
e
Right [Fasta]
fs -> [Fasta] -> Either Text [(Double, Text)]
getWeightedFasta [Fasta]
fs
where
getWeightedFasta :: [Fasta] -> Either Text [(Double, Text)]
getWeightedFasta :: [Fasta] -> Either Text [(Double, Text)]
getWeightedFasta [Fasta]
fs =
case forall (t :: * -> *) (f :: * -> *) a.
(Traversable t, Applicative f) =>
t (f a) -> f (t a)
sequenceA forall a b. (a -> b) -> a -> b
$ Fasta -> Either Text (Double, Text)
readWeighted forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Fasta]
fs of
Left Text
e -> forall a b. a -> Either a b
Left Text
e
Right [(Double, Text)]
r -> forall a b. b -> Either a b
Right [(Double, Text)]
r
readWeighted :: Fasta -> Either Text (Double, Text)
readWeighted :: Fasta -> Either Text (Double, Text)
readWeighted (Fasta Text
hdr' Text
dta) =
let (Text -> Text
processNucs, Text
hdr) =
if Text -> Text -> Bool
Txt.isSuffixOf Text
"R" Text
hdr'
then (Text -> Text
Txt.reverse forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text
complementNucleotides, Text -> Text
Txt.strip forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Text -> Text
Txt.dropEnd Int
1 forall a b. (a -> b) -> a -> b
$ Text
hdr')
else (forall a. a -> a
identity, Text
hdr')
in
case (forall b a. (Read b, StringConv a String) => a -> Maybe b
readMaybe forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
Txt.unpack forall a b. (a -> b) -> a -> b
$ Text
hdr :: Maybe Double) of
Just Double
weight -> forall a b. b -> Either a b
Right (forall a. Ord a => a -> a -> a
min Double
1 forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Ord a => a -> a -> a
max Double
0 forall a b. (a -> b) -> a -> b
$ Double
weight, Text -> Text
processNucs forall a b. (a -> b) -> a -> b
$ Text -> Text
Txt.strip Text
dta)
Maybe Double
Nothing -> forall a b. a -> Either a b
Left forall a b. (a -> b) -> a -> b
$ Text
"Invalid header reading, expecting numeric weight, got: " forall a. Semigroup a => a -> a -> a
<> Text
hdr
readWeightedFastas :: FilePath -> IO (Either Text [(Text, [(Double, Text)])])
readWeightedFastas :: String -> IO (Either Text [(Text, [(Double, Text)])])
readWeightedFastas String
source = do
[String]
files <- forall a. (a -> Bool) -> [a] -> [a]
filter (Text -> Text -> Bool
Txt.isSuffixOf Text
".fasta" forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Text
Txt.pack) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> String -> IO [String]
getFiles String
source
let names :: [Text]
names = String -> Text
Txt.pack forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShowS
FP.takeBaseName forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [String]
files
[ByteString]
contents <- forall (t :: * -> *) (f :: * -> *) a b.
(Traversable t, Applicative f) =>
(a -> f b) -> t a -> f (t b)
traverse String -> IO ByteString
BS.readFile [String]
files
case forall (t :: * -> *) (f :: * -> *) a.
(Traversable t, Applicative f) =>
t (f a) -> f (t a)
sequenceA forall a b. (a -> b) -> a -> b
$ ByteString -> Either Text [(Double, Text)]
readWeightedFasta forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [ByteString]
contents of
Left Text
e -> forall (f :: * -> *) a. Applicative f => a -> f a
pure forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. a -> Either a b
Left forall a b. (a -> b) -> a -> b
$ Text
e
Right [[(Double, Text)]]
rs -> forall (f :: * -> *) a. Applicative f => a -> f a
pure forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. b -> Either a b
Right forall a b. (a -> b) -> a -> b
$ forall a b. [a] -> [b] -> [(a, b)]
zip [Text]
names [[(Double, Text)]]
rs
getFiles :: FilePath -> IO [FilePath]
getFiles :: String -> IO [String]
getFiles String
p = do
[String]
entries <- (String
p String -> ShowS
</>) forall (f :: * -> *) (g :: * -> *) a b.
(Functor f, Functor g) =>
(a -> b) -> f (g a) -> f (g b)
<<$>> String -> IO [String]
Dir.listDirectory String
p
forall (m :: * -> *) a.
Applicative m =>
(a -> m Bool) -> [a] -> m [a]
filterM String -> IO Bool
Dir.doesFileExist [String]
entries
unIupac :: Char -> [Char]
unIupac :: Char -> String
unIupac Char
c =
case Char
c of
Char
'T' -> String
"T"
Char
'C' -> String
"C"
Char
'A' -> String
"A"
Char
'G' -> String
"G"
Char
'U' -> String
"T"
Char
'M' -> String
"AC"
Char
'R' -> String
"AG"
Char
'W' -> String
"AT"
Char
'S' -> String
"CG"
Char
'Y' -> String
"CT"
Char
'K' -> String
"GT"
Char
'V' -> String
"ACG"
Char
'H' -> String
"ACT"
Char
'D' -> String
"AGT"
Char
'B' -> String
"CGT"
Char
'N' -> String
"GATC"
Char
'X' -> String
"GATC"
Char
_ -> String
""
iupac :: [[Char]] -> [Char]
iupac :: [String] -> String
iupac [String]
ns =
forall {t :: * -> *}. Foldable t => t Char -> Char
go forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [String]
ns
where
go :: t Char -> Char
go t Char
cs =
let
a :: Bool
a = Char
'A' forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` t Char
cs
c :: Bool
c = Char
'C' forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` t Char
cs
g :: Bool
g = Char
'G' forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` t Char
cs
t :: Bool
t = Char
'T' forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` t Char
cs
in
case (Bool
a, Bool
c, Bool
g, Bool
t) of
(Bool
True, Bool
False, Bool
False, Bool
False) -> Char
'A'
(Bool
False, Bool
True, Bool
False, Bool
False) -> Char
'C'
(Bool
False, Bool
False, Bool
True, Bool
False) -> Char
'G'
(Bool
False, Bool
False, Bool
False, Bool
True ) -> Char
'T'
(Bool
True, Bool
True, Bool
False, Bool
False) -> Char
'M'
(Bool
True, Bool
False, Bool
True, Bool
False) -> Char
'R'
(Bool
True, Bool
False, Bool
False, Bool
True ) -> Char
'W'
(Bool
False, Bool
True, Bool
True, Bool
False) -> Char
'S'
(Bool
False, Bool
True, Bool
False, Bool
True ) -> Char
'Y'
(Bool
False, Bool
False, Bool
True, Bool
True ) -> Char
'K'
(Bool
True, Bool
True, Bool
True, Bool
False) -> Char
'V'
(Bool
True, Bool
True, Bool
False, Bool
True ) -> Char
'H'
(Bool
True, Bool
False, Bool
True, Bool
True ) -> Char
'D'
(Bool
False, Bool
True, Bool
True, Bool
True ) -> Char
'B'
(Bool
True, Bool
True, Bool
True, Bool
True ) -> Char
'N'
(Bool, Bool, Bool, Bool)
_ -> Char
'_'
complementNucleotides :: Text -> Text
complementNucleotides :: Text -> Text
complementNucleotides Text
ns =
let
un :: [String]
un = Char -> String
unIupac forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Text -> String
Txt.unpack Text
ns
comp :: [String]
comp = Char -> Char
complementNuc forall (f :: * -> *) (g :: * -> *) a b.
(Functor f, Functor g) =>
(a -> b) -> f (g a) -> f (g b)
<<$>> [String]
un
iu :: String
iu = [String] -> String
iupac [String]
comp
in
String -> Text
Txt.pack String
iu
where
complementNuc :: Char -> Char
complementNuc Char
'A' = Char
'T'
complementNuc Char
'G' = Char
'C'
complementNuc Char
'T' = Char
'A'
complementNuc Char
'C' = Char
'G'
complementNuc Char
x = Char
x