{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TupleSections #-}
{-|
Module      : Hyax.Abif.Generate
Description : Generate AB1 from a weighted FASTA
Copyright   : (c) HyraxBio, 2018
License     : BSD3
Maintainer  : andre@hyraxbio.co.za, andre@andrevdm.com

Functionality for generating AB1 files from an input FASTA. These AB1s are supported by both PHRED and recall,
if you are using other software you may need to add additional required sections.

= Weighted reads

The input FASTA files have "weighted" reads. The name for each read is an value between 0 and 1
 which specifies the height of the peak relative to a full peak. 


== Single read

The most simple example is a single FASTA with a single read with a weight of 1

@
> 1
ACTG
@

<<docs/eg_actg.png>>

The chromatogram for this AB1 shows perfect traces for the input `ACTG` nucleotides with a full height peak.


== Mixes & multiple reads 

The source FASTA can have multiple reads, which results in a chromatogram with mixes

@
> 1
ACAG
> 0.3
ACTG
@

<<docs/eg_acag_acgt_mix.png>>

There is an `AT` mix at the third nucleotide. The first read has a weight of 1 and the second a weight of 0.3.
The chromatogram shows the mix and the `T` with a lower peak (30% of the `A` peak)

== Summing weights

 - The weigh of a read specifies the intensity of the peak from 0 to 1. 
 - Weights for each position are added to a maximum of 1 per nucleotide
 - You can use `_` as a "blank" nucleotide, in which only the nucleotides from other reads will be considered

E.g.

@
> 1
ACAG
> 0.3
_GT
> 0.2
_G
@

<<docs/eg_multi_mix.png>>


== Reverse reads

A weighted FASTA can represent a reverse read. To do this add a `R` suffix to the weight.
The data you enter should be entered as if it was a forward read. This data will be complemented
and reversed before writing to the ABIF

E.g.

@
> 1R
ACAG
@

See README.md for additional details and examples
-}
module Hyrax.Abif.Generate
    ( generateAb1s
    , generateAb1
    , readWeightedFasta
    , iupac
    , unIupac
    , complementNucleotides
    ) where

import           Protolude
import qualified Data.Text as Txt
import qualified Data.Text.Encoding as TxtE
import qualified Data.List as Lst
import qualified Data.Binary.Put as B
import qualified Data.ByteString as BS
import qualified Data.ByteString.Lazy as BSL
import qualified System.FilePath as FP
import           System.FilePath ((</>))
import qualified System.Directory as Dir

import           Hyrax.Abif
import           Hyrax.Abif.Write
import           Hyrax.Abif.Fasta

data TraceData = TraceData { TraceData -> [Int16]
trData09G :: ![Int16]
                           , TraceData -> [Int16]
trData10A :: ![Int16]
                           , TraceData -> [Int16]
trData11T :: ![Int16]
                           , TraceData -> [Int16]
trData12C :: ![Int16]
                           , TraceData -> Int
trValsPerBase :: !Int
                           , TraceData -> Text
trFasta :: !Text
                           } deriving (Int -> TraceData -> ShowS
[TraceData] -> ShowS
TraceData -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [TraceData] -> ShowS
$cshowList :: [TraceData] -> ShowS
show :: TraceData -> String
$cshow :: TraceData -> String
showsPrec :: Int -> TraceData -> ShowS
$cshowsPrec :: Int -> TraceData -> ShowS
Show)

-- | Generate a set of AB1s. One for every FASTA found in the source directory
generateAb1s :: FilePath -> FilePath -> IO ()
generateAb1s :: String -> String -> IO ()
generateAb1s String
source String
dest = do
  Bool -> String -> IO ()
Dir.createDirectoryIfMissing Bool
True String
dest
  Either Text [(Text, [(Double, Text)])]
weighted <- String -> IO (Either Text [(Text, [(Double, Text)])])
readWeightedFastas String
source

  case Either Text [(Text, [(Double, Text)])]
weighted of
    Left Text
e -> forall (m :: * -> *). MonadIO m => Text -> m ()
putText Text
e
    Right [(Text, [(Double, Text)])]
rs -> do
      let ab1s :: [(Text, ByteString)]
ab1s = (\(Text
n, [(Double, Text)]
r) -> (Text
n, (Text, [(Double, Text)]) -> ByteString
generateAb1 (Text
n, [(Double, Text)]
r))) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [(Text, [(Double, Text)])]
rs
      forall (t :: * -> *) (f :: * -> *) a b.
(Foldable t, Applicative f) =>
(a -> f b) -> t a -> f ()
traverse_ (\(Text
name, ByteString
ab1) -> String -> ByteString -> IO ()
BS.writeFile (String
dest String -> ShowS
</> Text -> String
Txt.unpack Text
name forall a. Semigroup a => a -> a -> a
<> String
".ab1") forall a b. (a -> b) -> a -> b
$ ByteString -> ByteString
BSL.toStrict ByteString
ab1) [(Text, ByteString)]
ab1s


-- | Create the 'ByteString' data for an AB1 given the data from a weighted FASTA (see 'readWeightedFasta')
generateAb1 :: (Text, [(Double, Text)]) -> BSL.ByteString
generateAb1 :: (Text, [(Double, Text)]) -> ByteString
generateAb1 (Text
fName, [(Double, Text)]
sourceFasta) = 
  let
    tr :: TraceData
tr = [(Double, Text)] -> TraceData
generateTraceData [(Double, Text)]
sourceFasta
    valsPerBase :: Int
valsPerBase = TraceData -> Int
trValsPerBase TraceData
tr
    generatedFastaLen :: Int
generatedFastaLen = (Text -> Int
Txt.length forall a b. (a -> b) -> a -> b
$ TraceData -> Text
trFasta TraceData
tr)

    -- The point that is the peak of the trace, i.e. mid point of trace for a single base
    midPeek :: Int
midPeek = Int
valsPerBase forall a. Integral a => a -> a -> a
`div` Int
2
    -- Get the peak locations for all bases
    peakLocations :: [Int]
peakLocations = forall a. Int -> [a] -> [a]
take Int
generatedFastaLen [Int
midPeek, Int
valsPerBase forall a. Num a => a -> a -> a
+ Int
midPeek..]

    -- Sample name (from the FASTA name)
    sampleName :: Text
sampleName = forall a b. (a, b) -> a
fst forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text -> (Text, Text)
Txt.breakOn Text
"_" forall a b. (a -> b) -> a -> b
$ Text
fName

    -- Create the ABIF directories
    dirs :: [Directory]
dirs = [ Int -> [Int16] -> Directory
mkData  Int
9 forall a b. (a -> b) -> a -> b
$ TraceData -> [Int16]
trData09G TraceData
tr -- G
           , Int -> [Int16] -> Directory
mkData Int
10 forall a b. (a -> b) -> a -> b
$ TraceData -> [Int16]
trData10A TraceData
tr -- A
           , Int -> [Int16] -> Directory
mkData Int
11 forall a b. (a -> b) -> a -> b
$ TraceData -> [Int16]
trData11T TraceData
tr -- T
           , Int -> [Int16] -> Directory
mkData Int
12 forall a b. (a -> b) -> a -> b
$ TraceData -> [Int16]
trData12C TraceData
tr -- C
           , Base -> Base -> Base -> Base -> Directory
mkBaseOrder Base
BaseG Base
BaseA Base
BaseT Base
BaseC -- Base order, should be GATC for 3500
           , Int16 -> Directory
mkLane Int16
1 -- Lane or capliary number
           , Text -> Directory
mkCalledBases forall a b. (a -> b) -> a -> b
$ TraceData -> Text
trFasta TraceData
tr -- Called bases
           , Int -> Text -> Directory
mkMobilityFileName Int
1 Text
"KB_3500_POP7_BDTv3.mob" -- Mobility file name
           , Int -> Text -> Directory
mkMobilityFileName Int
2 Text
"KB_3500_POP7_BDTv3.mob" -- Mobility file name
           , [Int16] -> Directory
mkPeakLocations forall a b. (a -> b) -> a -> b
$ forall a b. (Integral a, Num b) => a -> b
fromIntegral forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Int]
peakLocations -- Peak locations
           , Int16 -> Int16 -> Int16 -> Int16 -> Directory
mkDyeSignalStrength Int16
53 Int16
75 Int16
79 Int16
48 -- Signal strength per dye
           , Text -> Directory
mkSampleName Text
sampleName  -- Sample name
           , Text -> Directory
mkComment Text
"Generated by HyraxBio AB1 generator"
           ]

    -- The ABIF
    abif :: Abif
abif = Abif { aHeader :: Header
aHeader = Header
mkHeader
                , aRootDir :: Directory
aRootDir = Directory
mkRoot
                , aDirs :: [Directory]
aDirs = [Directory]
dirs
                }
            
  in
  -- Generate the data
  Put -> ByteString
B.runPut (Abif -> Put
putAbif Abif
abif)


-- | Generate the traces for the AB1 from the parsed weighted FASTA
generateTraceData :: [(Double, Text)] -> TraceData
generateTraceData :: [(Double, Text)] -> TraceData
generateTraceData [(Double, Text)]
weighted =
  let
    weightedNucs' :: [[(Double, String)]]
weightedNucs' = (\(Double
w, Text
ns) -> (Double
w,) forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> String
unIupac forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Text -> String
Txt.unpack Text
ns) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [(Double, Text)]
weighted
    weightedNucs :: [[(Double, String)]]
weightedNucs = forall a. [[a]] -> [[a]]
Lst.transpose [[(Double, String)]]
weightedNucs'
  
    -- Values for a base that was present. This defines the shape of the chromatogram curve, and defines the number of values per base
    curve :: [Int]
curve = [Int
0, Int
0, Int
128, Int
512, Int
1024, Int
1024, Int
512, Int
128, Int
0, Int
0]
    valsPerBase :: Int
valsPerBase = forall (t :: * -> *) a. Foldable t => t a -> Int
length [Int]
curve

    -- Create the G, A, T and C traces
    data09G :: [Int16]
data09G = forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat forall a b. (a -> b) -> a -> b
$ [Int] -> Char -> [(Double, String)] -> [Int16]
getWeightedTrace [Int]
curve Char
'G' forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [[(Double, String)]]
weightedNucs
    data10A :: [Int16]
data10A = forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat forall a b. (a -> b) -> a -> b
$ [Int] -> Char -> [(Double, String)] -> [Int16]
getWeightedTrace [Int]
curve Char
'A' forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [[(Double, String)]]
weightedNucs
    data11T :: [Int16]
data11T = forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat forall a b. (a -> b) -> a -> b
$ [Int] -> Char -> [(Double, String)] -> [Int16]
getWeightedTrace [Int]
curve Char
'T' forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [[(Double, String)]]
weightedNucs
    data12C :: [Int16]
data12C = forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat forall a b. (a -> b) -> a -> b
$ [Int] -> Char -> [(Double, String)] -> [Int16]
getWeightedTrace [Int]
curve Char
'C' forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [[(Double, String)]]
weightedNucs

    -- Create fasta sequence for the trace
    fastaSeq :: [String]
fastaSeq = forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (forall a b. (a, b) -> b
snd forall (f :: * -> *) (g :: * -> *) a b.
(Functor f, Functor g) =>
(a -> b) -> f (g a) -> f (g b)
<<$>> [[(Double, String)]]
weightedNucs)
    fasta :: Text
fasta = String -> Text
Txt.pack forall a b. (a -> b) -> a -> b
$ [String] -> String
iupac [String]
fastaSeq
  in      
  TraceData { trData09G :: [Int16]
trData09G = [Int16]
data09G
            , trData10A :: [Int16]
trData10A = [Int16]
data10A
            , trData11T :: [Int16]
trData11T = [Int16]
data11T
            , trData12C :: [Int16]
trData12C = [Int16]
data12C
            , trFasta :: Text
trFasta = Text
fasta
            , trValsPerBase :: Int
trValsPerBase = Int
valsPerBase
            }

  where
    getWeightedTrace :: [Int] -> Char -> [(Double, [Char])] -> [Int16]
    getWeightedTrace :: [Int] -> Char -> [(Double, String)] -> [Int16]
getWeightedTrace [Int]
curve Char
nuc [(Double, String)]
ws =
      let
        found :: [(Double, String)]
found = forall a. (a -> Bool) -> [a] -> [a]
filter ((Char
nuc forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem`) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a, b) -> b
snd) [(Double, String)]
ws
        score' :: Double
score' = forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' forall a. Num a => a -> a -> a
(+) Double
0 forall a b. (a -> b) -> a -> b
$ forall a b. (a, b) -> a
fst forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [(Double, String)]
found
        score :: Double
score = forall a. Ord a => a -> a -> a
min Double
1 forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Ord a => a -> a -> a
max Double
0 forall a b. (a -> b) -> a -> b
$ Double
score'
        wave :: [Int16]
wave = forall a b. (RealFrac a, Integral b) => a -> b
floor forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Double
score forall a. Num a => a -> a -> a
*) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (Integral a, Num b) => a -> b
fromIntegral forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Int]
curve
      in
      [Int16]
wave


-- | Read a weighted FASTA file. See the module documentation for details on the format of the weighted FASTA 
-- Reads with a weight followed by an `R` are reverse reads, and the AB1 generated will contain the complemeted
-- sequence.
--
-- e.g. weighted FASTA
--
-- @
-- > 1
-- ACAG
-- > 0.3
-- _GT
-- > 0.2
-- _G
-- @
--
--
-- The result data has the type
-- 
-- @
--   [('Double', 'Text')]
--     ^        ^
--     |        |
--     |        +---- read 
--     | 
--     +---- weight
-- @
--
readWeightedFasta :: ByteString -> Either Text [(Double, Text)]
readWeightedFasta :: ByteString -> Either Text [(Double, Text)]
readWeightedFasta ByteString
fastaData = 
  case Text -> Either Text [Fasta]
parseFasta forall a b. (a -> b) -> a -> b
$ ByteString -> Text
TxtE.decodeUtf8 ByteString
fastaData of
    Left Text
e -> forall a b. a -> Either a b
Left Text
e
    Right [Fasta]
fs -> [Fasta] -> Either Text [(Double, Text)]
getWeightedFasta [Fasta]
fs

  where
    getWeightedFasta :: [Fasta] -> Either Text [(Double, Text)]
    getWeightedFasta :: [Fasta] -> Either Text [(Double, Text)]
getWeightedFasta [Fasta]
fs = 
      case forall (t :: * -> *) (f :: * -> *) a.
(Traversable t, Applicative f) =>
t (f a) -> f (t a)
sequenceA forall a b. (a -> b) -> a -> b
$ Fasta -> Either Text (Double, Text)
readWeighted forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Fasta]
fs of
        Left Text
e -> forall a b. a -> Either a b
Left Text
e
        Right [(Double, Text)]
r -> forall a b. b -> Either a b
Right [(Double, Text)]
r

    readWeighted :: Fasta -> Either Text (Double, Text)
    readWeighted :: Fasta -> Either Text (Double, Text)
readWeighted (Fasta Text
hdr' Text
dta) =
      let (Text -> Text
processNucs, Text
hdr) =
            -- If there is a 'R' suffix, then generate a reverse sequence
            --  Which means complement each nucleotide and then reverse the string
            if Text -> Text -> Bool
Txt.isSuffixOf Text
"R" Text
hdr'
            then (Text -> Text
Txt.reverse forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text
complementNucleotides, Text -> Text
Txt.strip forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Text -> Text
Txt.dropEnd Int
1 forall a b. (a -> b) -> a -> b
$ Text
hdr')
            else (forall a. a -> a
identity, Text
hdr')
      in
      
      case (forall b a. (Read b, StringConv a String) => a -> Maybe b
readMaybe forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
Txt.unpack forall a b. (a -> b) -> a -> b
$ Text
hdr :: Maybe Double) of
        Just Double
weight -> forall a b. b -> Either a b
Right (forall a. Ord a => a -> a -> a
min Double
1 forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Ord a => a -> a -> a
max Double
0 forall a b. (a -> b) -> a -> b
$ Double
weight, Text -> Text
processNucs forall a b. (a -> b) -> a -> b
$ Text -> Text
Txt.strip Text
dta)
        Maybe Double
Nothing -> forall a b. a -> Either a b
Left forall a b. (a -> b) -> a -> b
$ Text
"Invalid header reading, expecting numeric weight, got: " forall a. Semigroup a => a -> a -> a
<> Text
hdr

  

-- | Read all FASTA files in a directory
--
-- The result data has the type
-- 
-- @
--                     [ ('Text', [('Double', 'Text')]) ]
--                        ^       ^       ^
--                        |       |       |
-- file name -------------+       |       +---- read 
--                                | 
--                                +---- weight
-- @
--
readWeightedFastas :: FilePath -> IO (Either Text [(Text, [(Double, Text)])])
readWeightedFastas :: String -> IO (Either Text [(Text, [(Double, Text)])])
readWeightedFastas String
source = do
  [String]
files <- forall a. (a -> Bool) -> [a] -> [a]
filter (Text -> Text -> Bool
Txt.isSuffixOf Text
".fasta" forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Text
Txt.pack) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> String -> IO [String]
getFiles String
source
  let names :: [Text]
names = String -> Text
Txt.pack forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShowS
FP.takeBaseName forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [String]
files
  [ByteString]
contents <- forall (t :: * -> *) (f :: * -> *) a b.
(Traversable t, Applicative f) =>
(a -> f b) -> t a -> f (t b)
traverse String -> IO ByteString
BS.readFile [String]
files
  
  case forall (t :: * -> *) (f :: * -> *) a.
(Traversable t, Applicative f) =>
t (f a) -> f (t a)
sequenceA forall a b. (a -> b) -> a -> b
$ ByteString -> Either Text [(Double, Text)]
readWeightedFasta forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [ByteString]
contents of
    Left Text
e -> forall (f :: * -> *) a. Applicative f => a -> f a
pure forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. a -> Either a b
Left forall a b. (a -> b) -> a -> b
$ Text
e
    Right [[(Double, Text)]]
rs -> forall (f :: * -> *) a. Applicative f => a -> f a
pure forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. b -> Either a b
Right forall a b. (a -> b) -> a -> b
$ forall a b. [a] -> [b] -> [(a, b)]
zip [Text]
names [[(Double, Text)]]
rs

  
-- | Find all files in a directory
getFiles :: FilePath -> IO [FilePath]
getFiles :: String -> IO [String]
getFiles String
p = do
  [String]
entries <- (String
p String -> ShowS
</>) forall (f :: * -> *) (g :: * -> *) a b.
(Functor f, Functor g) =>
(a -> b) -> f (g a) -> f (g b)
<<$>> String -> IO [String]
Dir.listDirectory String
p
  forall (m :: * -> *) a.
Applicative m =>
(a -> m Bool) -> [a] -> m [a]
filterM String -> IO Bool
Dir.doesFileExist [String]
entries


-- | Convert a IUPAC ambiguity code to the set of nucleotides it represents
unIupac :: Char -> [Char]
unIupac :: Char -> String
unIupac Char
c =
  case Char
c of
    Char
'T' -> String
"T"
    Char
'C' -> String
"C"
    Char
'A' -> String
"A"
    Char
'G' -> String
"G"
   
    Char
'U' -> String
"T"
    Char
'M' -> String
"AC"
    Char
'R' -> String
"AG"
    Char
'W' -> String
"AT"
    Char
'S' -> String
"CG"
    Char
'Y' -> String
"CT"
    Char
'K' -> String
"GT"
    Char
'V' -> String
"ACG"
    Char
'H' -> String
"ACT"
    Char
'D' -> String
"AGT"
    Char
'B' -> String
"CGT"
    Char
'N' -> String
"GATC"
  
    Char
'X' -> String
"GATC"
    Char
_   -> String
""


-- | Given a set of nucleotides get the IUPAC ambiguity code
iupac :: [[Char]] -> [Char]
iupac :: [String] -> String
iupac [String]
ns =
  forall {t :: * -> *}. Foldable t => t Char -> Char
go forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [String]
ns

  where
    go :: t Char -> Char
go t Char
cs =
      let
        a :: Bool
a = Char
'A' forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` t Char
cs
        c :: Bool
c = Char
'C' forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` t Char
cs
        g :: Bool
g = Char
'G' forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` t Char
cs
        t :: Bool
t = Char
'T' forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` t Char
cs
      in
      case (Bool
a, Bool
c, Bool
g, Bool
t) of
        (Bool
True,  Bool
False, Bool
False, Bool
False) -> Char
'A'
        (Bool
False, Bool
True,  Bool
False, Bool
False) -> Char
'C'
        (Bool
False, Bool
False, Bool
True,  Bool
False) -> Char
'G'
        (Bool
False, Bool
False, Bool
False, Bool
True ) -> Char
'T'
        (Bool
True,  Bool
True,  Bool
False, Bool
False) -> Char
'M'
        (Bool
True,  Bool
False, Bool
True,  Bool
False) -> Char
'R'
        (Bool
True,  Bool
False, Bool
False, Bool
True ) -> Char
'W'
        (Bool
False, Bool
True,  Bool
True,  Bool
False) -> Char
'S'
        (Bool
False, Bool
True,  Bool
False, Bool
True ) -> Char
'Y'
        (Bool
False, Bool
False, Bool
True,  Bool
True ) -> Char
'K'
        (Bool
True,  Bool
True,  Bool
True,  Bool
False) -> Char
'V'
        (Bool
True,  Bool
True,  Bool
False, Bool
True ) -> Char
'H'
        (Bool
True,  Bool
False, Bool
True,  Bool
True ) -> Char
'D'
        (Bool
False, Bool
True,  Bool
True,  Bool
True ) -> Char
'B'
        (Bool
True,  Bool
True,  Bool
True,  Bool
True ) -> Char
'N'
        (Bool, Bool, Bool, Bool)
_ -> Char
'_'


-- | Return the complement of a nucelotide string
complementNucleotides :: Text -> Text
complementNucleotides :: Text -> Text
complementNucleotides Text
ns =
  let
    un :: [String]
un = Char -> String
unIupac forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Text -> String
Txt.unpack Text
ns
    comp :: [String]
comp = Char -> Char
complementNuc forall (f :: * -> *) (g :: * -> *) a b.
(Functor f, Functor g) =>
(a -> b) -> f (g a) -> f (g b)
<<$>> [String]
un
    iu :: String
iu = [String] -> String
iupac [String]
comp
  in
  String -> Text
Txt.pack String
iu

  where
    complementNuc :: Char -> Char
complementNuc Char
'A' = Char
'T'
    complementNuc Char
'G' = Char
'C'
    complementNuc Char
'T' = Char
'A'
    complementNuc Char
'C' = Char
'G'
    complementNuc Char
x = Char
x