{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TupleSections #-}
{-|
Module      : Hyax.Abif.Generate
Description : Generate AB1 from a weighted FASTA
Copyright   : (c) HyraxBio, 2018
License     : BSD3
Maintainer  : andre@hyraxbio.co.za, andre@andrevdm.com

Functionality for generating AB1 files from an input FASTA. These AB1s are supported by both PHRED and recall,
if you are using other software you may need to add additional required sections.

= Weighted reads

The input FASTA files have "weighted" reads. The name for each read is an value between 0 and 1
 which specifies the height of the peak relative to a full peak. 


== Single read

The most simple example is a single FASTA with a single read with a weight of 1

@
> 1
ACTG
@

<<docs/eg_actg.png>>

The chromatogram for this AB1 shows perfect traces for the input `ACTG` nucleotides with a full height peak.


== Mixes & multiple reads 

The source FASTA can have multiple reads, which results in a chromatogram with mixes

@
> 1
ACAG
> 0.3
ACTG
@

<<docs/eg_acag_acgt_mix.png>>

There is an `AT` mix at the third nucleotide. The first read has a weight of 1 and the second a weight of 0.3.
The chromatogram shows the mix and the `T` with a lower peak (30% of the `A` peak)

== Summing weights

 - The weigh of a read specifies the intensity of the peak from 0 to 1. 
 - Weights for each position are added to a maximum of 1 per nucleotide
 - You can use `_` as a "blank" nucleotide, in which only the nucleotides from other reads will be considered

E.g.

@
> 1
ACAG
> 0.3
_GT
> 0.2
_G
@

<<docs/eg_multi_mix.png>>


== Reverse reads

A weighted FASTA can represent a reverse read. To do this add a `R` suffix to the weight.
The data you enter should be entered as if it was a forward read. This data will be complemented
and reversed before writing to the ABIF

E.g.

@
> 1R
ACAG
@

See README.md for additional details and examples
-}
module Hyrax.Abif.Generate
    ( generateAb1s
    , generateAb1
    , readWeightedFasta
    , iupac
    , unIupac
    , complementNucleotides
    ) where

import           Protolude
import qualified Data.Text as Txt
import qualified Data.Text.Encoding as TxtE
import qualified Data.List as Lst
import qualified Data.Binary.Put as B
import qualified Data.ByteString as BS
import qualified Data.ByteString.Lazy as BSL
import qualified System.FilePath as FP
import           System.FilePath ((</>))
import qualified System.Directory as Dir

import           Hyrax.Abif
import           Hyrax.Abif.Write
import           Hyrax.Abif.Fasta

data TraceData = TraceData { TraceData -> [Int16]
trData09G :: ![Int16]
                           , TraceData -> [Int16]
trData10A :: ![Int16]
                           , TraceData -> [Int16]
trData11T :: ![Int16]
                           , TraceData -> [Int16]
trData12C :: ![Int16]
                           , TraceData -> Int
trValsPerBase :: !Int
                           , TraceData -> Text
trFasta :: !Text
                           } deriving (Int -> TraceData -> ShowS
[TraceData] -> ShowS
TraceData -> String
(Int -> TraceData -> ShowS)
-> (TraceData -> String)
-> ([TraceData] -> ShowS)
-> Show TraceData
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [TraceData] -> ShowS
$cshowList :: [TraceData] -> ShowS
show :: TraceData -> String
$cshow :: TraceData -> String
showsPrec :: Int -> TraceData -> ShowS
$cshowsPrec :: Int -> TraceData -> ShowS
Show)

-- | Generate a set of AB1s. One for every FASTA found in the source directory
generateAb1s :: FilePath -> FilePath -> IO ()
generateAb1s :: String -> String -> IO ()
generateAb1s String
source String
dest = do
  Bool -> String -> IO ()
Dir.createDirectoryIfMissing Bool
True String
dest
  Either Text [(Text, [(Double, Text)])]
weighted <- String -> IO (Either Text [(Text, [(Double, Text)])])
readWeightedFastas String
source

  case Either Text [(Text, [(Double, Text)])]
weighted of
    Left Text
e -> Text -> IO ()
forall (m :: * -> *). MonadIO m => Text -> m ()
putText Text
e
    Right [(Text, [(Double, Text)])]
rs -> do
      let ab1s :: [(Text, ByteString)]
ab1s = (\(Text
n, [(Double, Text)]
r) -> (Text
n, (Text, [(Double, Text)]) -> ByteString
generateAb1 (Text
n, [(Double, Text)]
r))) ((Text, [(Double, Text)]) -> (Text, ByteString))
-> [(Text, [(Double, Text)])] -> [(Text, ByteString)]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [(Text, [(Double, Text)])]
rs
      ((Text, ByteString) -> IO ()) -> [(Text, ByteString)] -> IO ()
forall (t :: * -> *) (f :: * -> *) a b.
(Foldable t, Applicative f) =>
(a -> f b) -> t a -> f ()
traverse_ (\(Text
name, ByteString
ab1) -> String -> ByteString -> IO ()
BS.writeFile (String
dest String -> ShowS
</> Text -> String
Txt.unpack Text
name String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
".ab1") (ByteString -> IO ()) -> ByteString -> IO ()
forall a b. (a -> b) -> a -> b
$ ByteString -> ByteString
BSL.toStrict ByteString
ab1) [(Text, ByteString)]
ab1s


-- | Create the 'ByteString' data for an AB1 given the data from a weighted FASTA (see 'readWeightedFasta')
generateAb1 :: (Text, [(Double, Text)]) -> BSL.ByteString
generateAb1 :: (Text, [(Double, Text)]) -> ByteString
generateAb1 (Text
fName, [(Double, Text)]
sourceFasta) = 
  let
    tr :: TraceData
tr = [(Double, Text)] -> TraceData
generateTraceData [(Double, Text)]
sourceFasta
    valsPerBase :: Int
valsPerBase = TraceData -> Int
trValsPerBase TraceData
tr
    generatedFastaLen :: Int
generatedFastaLen = (Text -> Int
Txt.length (Text -> Int) -> Text -> Int
forall a b. (a -> b) -> a -> b
$ TraceData -> Text
trFasta TraceData
tr)

    -- The point that is the peak of the trace, i.e. mid point of trace for a single base
    midPeek :: Int
midPeek = Int
valsPerBase Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
2
    -- Get the peak locations for all bases
    peakLocations :: [Int]
peakLocations = Int -> [Int] -> [Int]
forall a. Int -> [a] -> [a]
take Int
generatedFastaLen [Int
midPeek, Int
valsPerBase Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
midPeek..]

    -- Sample name (from the FASTA name)
    sampleName :: Text
sampleName = (Text, Text) -> Text
forall a b. (a, b) -> a
fst ((Text, Text) -> Text) -> (Text -> (Text, Text)) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text -> (Text, Text)
Txt.breakOn Text
"_" (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ Text
fName

    -- Create the ABIF directories
    dirs :: [Directory]
dirs = [ Int -> [Int16] -> Directory
mkData  Int
9 ([Int16] -> Directory) -> [Int16] -> Directory
forall a b. (a -> b) -> a -> b
$ TraceData -> [Int16]
trData09G TraceData
tr -- G
           , Int -> [Int16] -> Directory
mkData Int
10 ([Int16] -> Directory) -> [Int16] -> Directory
forall a b. (a -> b) -> a -> b
$ TraceData -> [Int16]
trData10A TraceData
tr -- A
           , Int -> [Int16] -> Directory
mkData Int
11 ([Int16] -> Directory) -> [Int16] -> Directory
forall a b. (a -> b) -> a -> b
$ TraceData -> [Int16]
trData11T TraceData
tr -- T
           , Int -> [Int16] -> Directory
mkData Int
12 ([Int16] -> Directory) -> [Int16] -> Directory
forall a b. (a -> b) -> a -> b
$ TraceData -> [Int16]
trData12C TraceData
tr -- C
           , Base -> Base -> Base -> Base -> Directory
mkBaseOrder Base
BaseG Base
BaseA Base
BaseT Base
BaseC -- Base order, should be GATC for 3500
           , Int16 -> Directory
mkLane Int16
1 -- Lane or capliary number
           , Text -> Directory
mkCalledBases (Text -> Directory) -> Text -> Directory
forall a b. (a -> b) -> a -> b
$ TraceData -> Text
trFasta TraceData
tr -- Called bases
           , Int -> Text -> Directory
mkMobilityFileName Int
1 Text
"KB_3500_POP7_BDTv3.mob" -- Mobility file name
           , Int -> Text -> Directory
mkMobilityFileName Int
2 Text
"KB_3500_POP7_BDTv3.mob" -- Mobility file name
           , [Int16] -> Directory
mkPeakLocations ([Int16] -> Directory) -> [Int16] -> Directory
forall a b. (a -> b) -> a -> b
$ Int -> Int16
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Int16) -> [Int] -> [Int16]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Int]
peakLocations -- Peak locations
           , Int16 -> Int16 -> Int16 -> Int16 -> Directory
mkDyeSignalStrength Int16
53 Int16
75 Int16
79 Int16
48 -- Signal strength per dye
           , Text -> Directory
mkSampleName Text
sampleName  -- Sample name
           , Text -> Directory
mkComment Text
"Generated by HyraxBio AB1 generator"
           ]

    -- The ABIF
    abif :: Abif
abif = Abif :: Header -> Directory -> [Directory] -> Abif
Abif { aHeader :: Header
aHeader = Header
mkHeader
                , aRootDir :: Directory
aRootDir = Directory
mkRoot
                , aDirs :: [Directory]
aDirs = [Directory]
dirs
                }
            
  in
  -- Generate the data
  Put -> ByteString
B.runPut (Abif -> Put
putAbif Abif
abif)


-- | Generate the traces for the AB1 from the parsed weighted FASTA
generateTraceData :: [(Double, Text)] -> TraceData
generateTraceData :: [(Double, Text)] -> TraceData
generateTraceData [(Double, Text)]
weighted =
  let
    weightedNucs' :: [[(Double, String)]]
weightedNucs' = (\(Double
w, Text
ns) -> (Double
w,) (String -> (Double, String))
-> (Char -> String) -> Char -> (Double, String)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> String
unIupac (Char -> (Double, String)) -> String -> [(Double, String)]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Text -> String
Txt.unpack Text
ns) ((Double, Text) -> [(Double, String)])
-> [(Double, Text)] -> [[(Double, String)]]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [(Double, Text)]
weighted
    weightedNucs :: [[(Double, String)]]
weightedNucs = [[(Double, String)]] -> [[(Double, String)]]
forall a. [[a]] -> [[a]]
Lst.transpose [[(Double, String)]]
weightedNucs'
  
    -- Values for a base that was present. This defines the shape of the chromatogram curve, and defines the number of values per base
    curve :: [Int]
curve = [Int
0, Int
0, Int
128, Int
512, Int
1024, Int
1024, Int
512, Int
128, Int
0, Int
0]
    valsPerBase :: Int
valsPerBase = [Int] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Int]
curve

    -- Create the G, A, T and C traces
    data09G :: [Int16]
data09G = [[Int16]] -> [Int16]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat ([[Int16]] -> [Int16]) -> [[Int16]] -> [Int16]
forall a b. (a -> b) -> a -> b
$ [Int] -> Char -> [(Double, String)] -> [Int16]
getWeightedTrace [Int]
curve Char
'G' ([(Double, String)] -> [Int16])
-> [[(Double, String)]] -> [[Int16]]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [[(Double, String)]]
weightedNucs
    data10A :: [Int16]
data10A = [[Int16]] -> [Int16]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat ([[Int16]] -> [Int16]) -> [[Int16]] -> [Int16]
forall a b. (a -> b) -> a -> b
$ [Int] -> Char -> [(Double, String)] -> [Int16]
getWeightedTrace [Int]
curve Char
'A' ([(Double, String)] -> [Int16])
-> [[(Double, String)]] -> [[Int16]]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [[(Double, String)]]
weightedNucs
    data11T :: [Int16]
data11T = [[Int16]] -> [Int16]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat ([[Int16]] -> [Int16]) -> [[Int16]] -> [Int16]
forall a b. (a -> b) -> a -> b
$ [Int] -> Char -> [(Double, String)] -> [Int16]
getWeightedTrace [Int]
curve Char
'T' ([(Double, String)] -> [Int16])
-> [[(Double, String)]] -> [[Int16]]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [[(Double, String)]]
weightedNucs
    data12C :: [Int16]
data12C = [[Int16]] -> [Int16]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat ([[Int16]] -> [Int16]) -> [[Int16]] -> [Int16]
forall a b. (a -> b) -> a -> b
$ [Int] -> Char -> [(Double, String)] -> [Int16]
getWeightedTrace [Int]
curve Char
'C' ([(Double, String)] -> [Int16])
-> [[(Double, String)]] -> [[Int16]]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [[(Double, String)]]
weightedNucs

    -- Create fasta sequence for the trace
    fastaSeq :: [String]
fastaSeq = [String] -> String
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat ([String] -> String) -> [[String]] -> [String]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ((Double, String) -> String
forall a b. (a, b) -> b
snd ((Double, String) -> String) -> [[(Double, String)]] -> [[String]]
forall (f :: * -> *) (g :: * -> *) a b.
(Functor f, Functor g) =>
(a -> b) -> f (g a) -> f (g b)
<<$>> [[(Double, String)]]
weightedNucs)
    fasta :: Text
fasta = String -> Text
Txt.pack (String -> Text) -> String -> Text
forall a b. (a -> b) -> a -> b
$ [String] -> String
iupac [String]
fastaSeq
  in      
  TraceData :: [Int16]
-> [Int16] -> [Int16] -> [Int16] -> Int -> Text -> TraceData
TraceData { trData09G :: [Int16]
trData09G = [Int16]
data09G
            , trData10A :: [Int16]
trData10A = [Int16]
data10A
            , trData11T :: [Int16]
trData11T = [Int16]
data11T
            , trData12C :: [Int16]
trData12C = [Int16]
data12C
            , trFasta :: Text
trFasta = Text
fasta
            , trValsPerBase :: Int
trValsPerBase = Int
valsPerBase
            }

  where
    getWeightedTrace :: [Int] -> Char -> [(Double, [Char])] -> [Int16]
    getWeightedTrace :: [Int] -> Char -> [(Double, String)] -> [Int16]
getWeightedTrace [Int]
curve Char
nuc [(Double, String)]
ws =
      let
        found :: [(Double, String)]
found = ((Double, String) -> Bool)
-> [(Double, String)] -> [(Double, String)]
forall a. (a -> Bool) -> [a] -> [a]
filter ((Char
nuc Char -> String -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem`) (String -> Bool)
-> ((Double, String) -> String) -> (Double, String) -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Double, String) -> String
forall a b. (a, b) -> b
snd) [(Double, String)]
ws
        score' :: Double
score' = (Double -> Double -> Double) -> Double -> [Double] -> Double
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' Double -> Double -> Double
forall a. Num a => a -> a -> a
(+) Double
0 ([Double] -> Double) -> [Double] -> Double
forall a b. (a -> b) -> a -> b
$ (Double, String) -> Double
forall a b. (a, b) -> a
fst ((Double, String) -> Double) -> [(Double, String)] -> [Double]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [(Double, String)]
found
        score :: Double
score = Double -> Double -> Double
forall a. Ord a => a -> a -> a
min Double
1 (Double -> Double) -> (Double -> Double) -> Double -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Double -> Double -> Double
forall a. Ord a => a -> a -> a
max Double
0 (Double -> Double) -> Double -> Double
forall a b. (a -> b) -> a -> b
$ Double
score'
        wave :: [Int16]
wave = Double -> Int16
forall a b. (RealFrac a, Integral b) => a -> b
floor (Double -> Int16) -> (Int -> Double) -> Int -> Int16
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Double
score Double -> Double -> Double
forall a. Num a => a -> a -> a
*) (Double -> Double) -> (Int -> Double) -> Int -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Int16) -> [Int] -> [Int16]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Int]
curve
      in
      [Int16]
wave


-- | Read a weighted FASTA file. See the module documentation for details on the format of the weighted FASTA 
-- Reads with a weight followed by an `R` are reverse reads, and the AB1 generated will contain the complemeted
-- sequence.
--
-- e.g. weighted FASTA
--
-- @
-- > 1
-- ACAG
-- > 0.3
-- _GT
-- > 0.2
-- _G
-- @
--
--
-- The result data has the type
-- 
-- @
--   [('Double', 'Text')]
--     ^        ^
--     |        |
--     |        +---- read 
--     | 
--     +---- weight
-- @
--
readWeightedFasta :: ByteString -> Either Text [(Double, Text)]
readWeightedFasta :: ByteString -> Either Text [(Double, Text)]
readWeightedFasta ByteString
fastaData = 
  case Text -> Either Text [Fasta]
parseFasta (Text -> Either Text [Fasta]) -> Text -> Either Text [Fasta]
forall a b. (a -> b) -> a -> b
$ ByteString -> Text
TxtE.decodeUtf8 ByteString
fastaData of
    Left Text
e -> Text -> Either Text [(Double, Text)]
forall a b. a -> Either a b
Left Text
e
    Right [Fasta]
fs -> [Fasta] -> Either Text [(Double, Text)]
getWeightedFasta [Fasta]
fs

  where
    getWeightedFasta :: [Fasta] -> Either Text [(Double, Text)]
    getWeightedFasta :: [Fasta] -> Either Text [(Double, Text)]
getWeightedFasta [Fasta]
fs = 
      case [Either Text (Double, Text)] -> Either Text [(Double, Text)]
forall (t :: * -> *) (f :: * -> *) a.
(Traversable t, Applicative f) =>
t (f a) -> f (t a)
sequenceA ([Either Text (Double, Text)] -> Either Text [(Double, Text)])
-> [Either Text (Double, Text)] -> Either Text [(Double, Text)]
forall a b. (a -> b) -> a -> b
$ Fasta -> Either Text (Double, Text)
readWeighted (Fasta -> Either Text (Double, Text))
-> [Fasta] -> [Either Text (Double, Text)]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Fasta]
fs of
        Left Text
e -> Text -> Either Text [(Double, Text)]
forall a b. a -> Either a b
Left Text
e
        Right [(Double, Text)]
r -> [(Double, Text)] -> Either Text [(Double, Text)]
forall a b. b -> Either a b
Right [(Double, Text)]
r

    readWeighted :: Fasta -> Either Text (Double, Text)
    readWeighted :: Fasta -> Either Text (Double, Text)
readWeighted (Fasta Text
hdr' Text
dta) =
      let (Text -> Text
processNucs, Text
hdr) =
            -- If there is a 'R' suffix, then generate a reverse sequence
            --  Which means complement each nucleotide and then reverse the string
            if Text -> Text -> Bool
Txt.isSuffixOf Text
"R" Text
hdr'
            then (Text -> Text
Txt.reverse (Text -> Text) -> (Text -> Text) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text
complementNucleotides, Text -> Text
Txt.strip (Text -> Text) -> (Text -> Text) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Text -> Text
Txt.dropEnd Int
1 (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ Text
hdr')
            else (Text -> Text
forall a. a -> a
identity, Text
hdr')
      in
      
      case (String -> Maybe Double
forall a. Read a => String -> Maybe a
readMaybe (String -> Maybe Double)
-> (Text -> String) -> Text -> Maybe Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
Txt.unpack (Text -> Maybe Double) -> Text -> Maybe Double
forall a b. (a -> b) -> a -> b
$ Text
hdr :: Maybe Double) of
        Just Double
weight -> (Double, Text) -> Either Text (Double, Text)
forall a b. b -> Either a b
Right (Double -> Double -> Double
forall a. Ord a => a -> a -> a
min Double
1 (Double -> Double) -> (Double -> Double) -> Double -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Double -> Double -> Double
forall a. Ord a => a -> a -> a
max Double
0 (Double -> Double) -> Double -> Double
forall a b. (a -> b) -> a -> b
$ Double
weight, Text -> Text
processNucs (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ Text -> Text
Txt.strip Text
dta)
        Maybe Double
Nothing -> Text -> Either Text (Double, Text)
forall a b. a -> Either a b
Left (Text -> Either Text (Double, Text))
-> Text -> Either Text (Double, Text)
forall a b. (a -> b) -> a -> b
$ Text
"Invalid header reading, expecting numeric weight, got: " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
hdr

  

-- | Read all FASTA files in a directory
--
-- The result data has the type
-- 
-- @
--                     [ ('Text', [('Double', 'Text')]) ]
--                        ^       ^       ^
--                        |       |       |
-- file name -------------+       |       +---- read 
--                                | 
--                                +---- weight
-- @
--
readWeightedFastas :: FilePath -> IO (Either Text [(Text, [(Double, Text)])])
readWeightedFastas :: String -> IO (Either Text [(Text, [(Double, Text)])])
readWeightedFastas String
source = do
  [String]
files <- (String -> Bool) -> [String] -> [String]
forall a. (a -> Bool) -> [a] -> [a]
filter (Text -> Text -> Bool
Txt.isSuffixOf Text
".fasta" (Text -> Bool) -> (String -> Text) -> String -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Text
Txt.pack) ([String] -> [String]) -> IO [String] -> IO [String]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> String -> IO [String]
getFiles String
source
  let names :: [Text]
names = String -> Text
Txt.pack (String -> Text) -> ShowS -> String -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShowS
FP.takeBaseName (String -> Text) -> [String] -> [Text]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [String]
files
  [ByteString]
contents <- (String -> IO ByteString) -> [String] -> IO [ByteString]
forall (t :: * -> *) (f :: * -> *) a b.
(Traversable t, Applicative f) =>
(a -> f b) -> t a -> f (t b)
traverse String -> IO ByteString
BS.readFile [String]
files
  
  case [Either Text [(Double, Text)]] -> Either Text [[(Double, Text)]]
forall (t :: * -> *) (f :: * -> *) a.
(Traversable t, Applicative f) =>
t (f a) -> f (t a)
sequenceA ([Either Text [(Double, Text)]] -> Either Text [[(Double, Text)]])
-> [Either Text [(Double, Text)]] -> Either Text [[(Double, Text)]]
forall a b. (a -> b) -> a -> b
$ ByteString -> Either Text [(Double, Text)]
readWeightedFasta (ByteString -> Either Text [(Double, Text)])
-> [ByteString] -> [Either Text [(Double, Text)]]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [ByteString]
contents of
    Left Text
e -> Either Text [(Text, [(Double, Text)])]
-> IO (Either Text [(Text, [(Double, Text)])])
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Either Text [(Text, [(Double, Text)])]
 -> IO (Either Text [(Text, [(Double, Text)])]))
-> (Text -> Either Text [(Text, [(Double, Text)])])
-> Text
-> IO (Either Text [(Text, [(Double, Text)])])
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Either Text [(Text, [(Double, Text)])]
forall a b. a -> Either a b
Left (Text -> IO (Either Text [(Text, [(Double, Text)])]))
-> Text -> IO (Either Text [(Text, [(Double, Text)])])
forall a b. (a -> b) -> a -> b
$ Text
e
    Right [[(Double, Text)]]
rs -> Either Text [(Text, [(Double, Text)])]
-> IO (Either Text [(Text, [(Double, Text)])])
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Either Text [(Text, [(Double, Text)])]
 -> IO (Either Text [(Text, [(Double, Text)])]))
-> ([(Text, [(Double, Text)])]
    -> Either Text [(Text, [(Double, Text)])])
-> [(Text, [(Double, Text)])]
-> IO (Either Text [(Text, [(Double, Text)])])
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [(Text, [(Double, Text)])]
-> Either Text [(Text, [(Double, Text)])]
forall a b. b -> Either a b
Right ([(Text, [(Double, Text)])]
 -> IO (Either Text [(Text, [(Double, Text)])]))
-> [(Text, [(Double, Text)])]
-> IO (Either Text [(Text, [(Double, Text)])])
forall a b. (a -> b) -> a -> b
$ [Text] -> [[(Double, Text)]] -> [(Text, [(Double, Text)])]
forall a b. [a] -> [b] -> [(a, b)]
zip [Text]
names [[(Double, Text)]]
rs

  
-- | Find all files in a directory
getFiles :: FilePath -> IO [FilePath]
getFiles :: String -> IO [String]
getFiles String
p = do
  [String]
entries <- (String
p String -> ShowS
</>) ShowS -> IO [String] -> IO [String]
forall (f :: * -> *) (g :: * -> *) a b.
(Functor f, Functor g) =>
(a -> b) -> f (g a) -> f (g b)
<<$>> String -> IO [String]
Dir.listDirectory String
p
  (String -> IO Bool) -> [String] -> IO [String]
forall (m :: * -> *) a.
Applicative m =>
(a -> m Bool) -> [a] -> m [a]
filterM String -> IO Bool
Dir.doesFileExist [String]
entries


-- | Convert a IUPAC ambiguity code to the set of nucleotides it represents
unIupac :: Char -> [Char]
unIupac :: Char -> String
unIupac Char
c =
  case Char
c of
    Char
'T' -> String
"T"
    Char
'C' -> String
"C"
    Char
'A' -> String
"A"
    Char
'G' -> String
"G"
   
    Char
'U' -> String
"T"
    Char
'M' -> String
"AC"
    Char
'R' -> String
"AG"
    Char
'W' -> String
"AT"
    Char
'S' -> String
"CG"
    Char
'Y' -> String
"CT"
    Char
'K' -> String
"GT"
    Char
'V' -> String
"ACG"
    Char
'H' -> String
"ACT"
    Char
'D' -> String
"AGT"
    Char
'B' -> String
"CGT"
    Char
'N' -> String
"GATC"
  
    Char
'X' -> String
"GATC"
    Char
_   -> String
""


-- | Given a set of nucleotides get the IUPAC ambiguity code
iupac :: [[Char]] -> [Char]
iupac :: [String] -> String
iupac [String]
ns =
  String -> Char
forall (t :: * -> *). Foldable t => t Char -> Char
go (String -> Char) -> [String] -> String
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [String]
ns

  where
    go :: t Char -> Char
go t Char
cs =
      let
        a :: Bool
a = Char
'A' Char -> t Char -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` t Char
cs
        c :: Bool
c = Char
'C' Char -> t Char -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` t Char
cs
        g :: Bool
g = Char
'G' Char -> t Char -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` t Char
cs
        t :: Bool
t = Char
'T' Char -> t Char -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` t Char
cs
      in
      case (Bool
a, Bool
c, Bool
g, Bool
t) of
        (Bool
True,  Bool
False, Bool
False, Bool
False) -> Char
'A'
        (Bool
False, Bool
True,  Bool
False, Bool
False) -> Char
'C'
        (Bool
False, Bool
False, Bool
True,  Bool
False) -> Char
'G'
        (Bool
False, Bool
False, Bool
False, Bool
True ) -> Char
'T'
        (Bool
True,  Bool
True,  Bool
False, Bool
False) -> Char
'M'
        (Bool
True,  Bool
False, Bool
True,  Bool
False) -> Char
'R'
        (Bool
True,  Bool
False, Bool
False, Bool
True ) -> Char
'W'
        (Bool
False, Bool
True,  Bool
True,  Bool
False) -> Char
'S'
        (Bool
False, Bool
True,  Bool
False, Bool
True ) -> Char
'Y'
        (Bool
False, Bool
False, Bool
True,  Bool
True ) -> Char
'K'
        (Bool
True,  Bool
True,  Bool
True,  Bool
False) -> Char
'V'
        (Bool
True,  Bool
True,  Bool
False, Bool
True ) -> Char
'H'
        (Bool
True,  Bool
False, Bool
True,  Bool
True ) -> Char
'D'
        (Bool
False, Bool
True,  Bool
True,  Bool
True ) -> Char
'B'
        (Bool
True,  Bool
True,  Bool
True,  Bool
True ) -> Char
'N'
        (Bool, Bool, Bool, Bool)
_ -> Char
'_'


-- | Return the complement of a nucelotide string
complementNucleotides :: Text -> Text
complementNucleotides :: Text -> Text
complementNucleotides Text
ns =
  let
    un :: [String]
un = Char -> String
unIupac (Char -> String) -> String -> [String]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Text -> String
Txt.unpack Text
ns
    comp :: [String]
comp = Char -> Char
complementNuc (Char -> Char) -> [String] -> [String]
forall (f :: * -> *) (g :: * -> *) a b.
(Functor f, Functor g) =>
(a -> b) -> f (g a) -> f (g b)
<<$>> [String]
un
    iu :: String
iu = [String] -> String
iupac [String]
comp
  in
  String -> Text
Txt.pack String
iu

  where
    complementNuc :: Char -> Char
complementNuc Char
'A' = Char
'T'
    complementNuc Char
'G' = Char
'C'
    complementNuc Char
'T' = Char
'A'
    complementNuc Char
'C' = Char
'G'
    complementNuc Char
x = Char
x