{-# LANGUAGE OverloadedStrings #-}
module SequenceFormats.RareAlleleHistogram (RareAlleleHistogram(..), readHistogramFromHandle,
SitePattern, readHistogram, writeHistogramStdOut, writeHistogramFile, showSitePattern) where
import SequenceFormats.Utils (SeqFormatException(..))
import Control.Applicative (optional)
import Control.Error (assertErr)
import Control.Exception (throw)
import Control.Monad.IO.Class (MonadIO, liftIO)
import Control.Monad.Trans.State.Strict (evalStateT)
import qualified Data.Attoparsec.ByteString.Char8 as A
import Data.Char (isAlphaNum)
import Data.Int (Int64)
import Data.List (intercalate, sortBy)
import qualified Data.Map.Strict as Map
import qualified Data.ByteString.Char8 as B
import Pipes.Attoparsec (parse)
import qualified Pipes.ByteString as PB
import System.IO (Handle, IOMode(..), withFile)
data RareAlleleHistogram = RareAlleleHistogram {
RareAlleleHistogram -> [[Char]]
raNames :: [String],
RareAlleleHistogram -> [Int]
raNVec :: [Int],
RareAlleleHistogram -> Int
raMinAf :: Int,
RareAlleleHistogram -> Int
raMaxAf :: Int,
RareAlleleHistogram -> [Int]
raConditionOn :: [Int],
RareAlleleHistogram -> [[Int]]
raExcludePatterns :: [SitePattern],
RareAlleleHistogram -> Int64
raTotalNrSites :: Int64,
RareAlleleHistogram -> Map [Int] Int64
raCounts :: Map.Map SitePattern Int64,
RareAlleleHistogram -> Maybe (Map [Int] (Double, Double))
raJackknifeEstimates :: Maybe (Map.Map SitePattern (Double, Double))
} deriving (RareAlleleHistogram -> RareAlleleHistogram -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: RareAlleleHistogram -> RareAlleleHistogram -> Bool
$c/= :: RareAlleleHistogram -> RareAlleleHistogram -> Bool
== :: RareAlleleHistogram -> RareAlleleHistogram -> Bool
$c== :: RareAlleleHistogram -> RareAlleleHistogram -> Bool
Eq, Int -> RareAlleleHistogram -> ShowS
[RareAlleleHistogram] -> ShowS
RareAlleleHistogram -> [Char]
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
showList :: [RareAlleleHistogram] -> ShowS
$cshowList :: [RareAlleleHistogram] -> ShowS
show :: RareAlleleHistogram -> [Char]
$cshow :: RareAlleleHistogram -> [Char]
showsPrec :: Int -> RareAlleleHistogram -> ShowS
$cshowsPrec :: Int -> RareAlleleHistogram -> ShowS
Show)
type SitePattern = [Int]
showSitePattern :: SitePattern -> String
showSitePattern :: [Int] -> [Char]
showSitePattern = forall a. [a] -> [[a]] -> [a]
intercalate [Char]
"," forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a -> b) -> [a] -> [b]
map forall a. Show a => a -> [Char]
show
showHistogram :: RareAlleleHistogram -> Either String B.ByteString
showHistogram :: RareAlleleHistogram -> Either [Char] ByteString
showHistogram RareAlleleHistogram
hist = do
forall e. e -> Bool -> Either e ()
assertErr [Char]
"can only print histogram with minAf=1 due to format-legacy" forall a b. (a -> b) -> a -> b
$ RareAlleleHistogram -> Int
raMinAf RareAlleleHistogram
hist forall a. Eq a => a -> a -> Bool
== Int
1
forall e. e -> Bool -> Either e ()
assertErr [Char]
"can only print histogram with no conditioning due to format-legacy" forall a b. (a -> b) -> a -> b
$
forall (t :: * -> *) a. Foldable t => t a -> Bool
null (RareAlleleHistogram -> [Int]
raConditionOn RareAlleleHistogram
hist)
forall e. e -> Bool -> Either e ()
assertErr [Char]
"can only print histogram with no exclude pattern due to format-legacy" forall a b. (a -> b) -> a -> b
$
forall (t :: * -> *) a. Foldable t => t a -> Bool
null (RareAlleleHistogram -> [[Int]]
raExcludePatterns RareAlleleHistogram
hist)
let head0 :: ByteString
head0 = ByteString
"NAMES=" forall a. Semigroup a => a -> a -> a
<> (ByteString -> [ByteString] -> ByteString
B.intercalate ByteString
"," forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a -> b) -> [a] -> [b]
map [Char] -> ByteString
B.pack forall b c a. (b -> c) -> (a -> b) -> a -> c
. RareAlleleHistogram -> [[Char]]
raNames forall a b. (a -> b) -> a -> b
$ RareAlleleHistogram
hist)
head1 :: ByteString
head1 = ByteString
"N=" forall a. Semigroup a => a -> a -> a
<> ([Char] -> ByteString
B.pack forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. [a] -> [[a]] -> [a]
intercalate [Char]
"," forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a -> b) -> [a] -> [b]
map forall a. Show a => a -> [Char]
show forall b c a. (b -> c) -> (a -> b) -> a -> c
. RareAlleleHistogram -> [Int]
raNVec forall a b. (a -> b) -> a -> b
$ RareAlleleHistogram
hist)
head2 :: ByteString
head2 = ByteString
"MAX_M=" forall a. Semigroup a => a -> a -> a
<> ([Char] -> ByteString
B.pack forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Show a => a -> [Char]
show forall b c a. (b -> c) -> (a -> b) -> a -> c
. RareAlleleHistogram -> Int
raMaxAf forall a b. (a -> b) -> a -> b
$ RareAlleleHistogram
hist)
head3 :: ByteString
head3 = ByteString
"TOTAL_SITES=" forall a. Semigroup a => a -> a -> a
<> ([Char] -> ByteString
B.pack forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Show a => a -> [Char]
show forall b c a. (b -> c) -> (a -> b) -> a -> c
. RareAlleleHistogram -> Int64
raTotalNrSites forall a b. (a -> b) -> a -> b
$ RareAlleleHistogram
hist)
body :: [ByteString]
body = do
([Int]
k, Int64
v) <- [([Int], Int64)]
sorted
case RareAlleleHistogram -> Maybe (Map [Int] (Double, Double))
raJackknifeEstimates RareAlleleHistogram
hist of
Maybe (Map [Int] (Double, Double))
Nothing -> [ByteString -> [ByteString] -> ByteString
B.intercalate ByteString
" " [[Char] -> ByteString
B.pack forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Int] -> [Char]
showSitePattern forall a b. (a -> b) -> a -> b
$ [Int]
k, [Char] -> ByteString
B.pack forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Show a => a -> [Char]
show forall a b. (a -> b) -> a -> b
$ Int64
v]]
Just Map [Int] (Double, Double)
jkHist -> do
let Just (Double
jkMean, Double
jkSE) = [Int]
k forall k a. Ord k => k -> Map k a -> Maybe a
`Map.lookup` Map [Int] (Double, Double)
jkHist
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ ByteString -> [ByteString] -> ByteString
B.intercalate ByteString
" " [[Char] -> ByteString
B.pack forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Int] -> [Char]
showSitePattern forall a b. (a -> b) -> a -> b
$ [Int]
k, [Char] -> ByteString
B.pack forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Show a => a -> [Char]
show forall a b. (a -> b) -> a -> b
$ Int64
v,
[Char] -> ByteString
B.pack forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Show a => a -> [Char]
show forall a b. (a -> b) -> a -> b
$ Double
jkMean, [Char] -> ByteString
B.pack forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Show a => a -> [Char]
show forall a b. (a -> b) -> a -> b
$ Double
jkSE]
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ [ByteString] -> ByteString
B.unlines (ByteString
head0forall a. a -> [a] -> [a]
:ByteString
head1forall a. a -> [a] -> [a]
:ByteString
head2forall a. a -> [a] -> [a]
:ByteString
head3forall a. a -> [a] -> [a]
:[ByteString]
body)
where
sorted :: [([Int], Int64)]
sorted = forall a. (a -> a -> Ordering) -> [a] -> [a]
sortBy (\([Int]
_, Int64
v1) ([Int]
_, Int64
v2) -> forall a. Ord a => a -> a -> Ordering
compare Int64
v2 Int64
v1) forall a b. (a -> b) -> a -> b
$ forall k a. Map k a -> [(k, a)]
Map.toList (RareAlleleHistogram -> Map [Int] Int64
raCounts RareAlleleHistogram
hist)
writeHistogramStdOut :: (MonadIO m) => RareAlleleHistogram -> m ()
writeHistogramStdOut :: forall (m :: * -> *). MonadIO m => RareAlleleHistogram -> m ()
writeHistogramStdOut RareAlleleHistogram
hist =
case RareAlleleHistogram -> Either [Char] ByteString
showHistogram RareAlleleHistogram
hist of
Left [Char]
err -> forall a e. Exception e => e -> a
throw ([Char] -> SeqFormatException
SeqFormatException [Char]
err)
Right ByteString
outStr -> forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO forall a b. (a -> b) -> a -> b
$ ByteString -> IO ()
B.putStrLn ByteString
outStr
writeHistogramFile :: (MonadIO m) => FilePath -> RareAlleleHistogram -> m ()
writeHistogramFile :: forall (m :: * -> *).
MonadIO m =>
[Char] -> RareAlleleHistogram -> m ()
writeHistogramFile [Char]
outF RareAlleleHistogram
hist =
case RareAlleleHistogram -> Either [Char] ByteString
showHistogram RareAlleleHistogram
hist of
Left [Char]
err -> forall a e. Exception e => e -> a
throw ([Char] -> SeqFormatException
SeqFormatException [Char]
err)
Right ByteString
outStr -> forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO forall a b. (a -> b) -> a -> b
$ [Char] -> ByteString -> IO ()
B.writeFile [Char]
outF ByteString
outStr
readHistogram :: (MonadIO m) => FilePath -> m RareAlleleHistogram
readHistogram :: forall (m :: * -> *). MonadIO m => [Char] -> m RareAlleleHistogram
readHistogram [Char]
path = forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO forall a b. (a -> b) -> a -> b
$ forall r. [Char] -> IOMode -> (Handle -> IO r) -> IO r
withFile [Char]
path IOMode
ReadMode forall (m :: * -> *). MonadIO m => Handle -> m RareAlleleHistogram
readHistogramFromHandle
readHistogramFromHandle :: (MonadIO m) => Handle -> m RareAlleleHistogram
readHistogramFromHandle :: forall (m :: * -> *). MonadIO m => Handle -> m RareAlleleHistogram
readHistogramFromHandle Handle
handle = do
Maybe (Either ParsingError RareAlleleHistogram)
res <- forall (m :: * -> *) s a. Monad m => StateT s m a -> s -> m a
evalStateT (forall (m :: * -> *) a b.
(Monad m, ParserInput a) =>
Parser a b -> Parser a m (Maybe (Either ParsingError b))
parse Parser RareAlleleHistogram
parseHistogram) (forall (m :: * -> *).
MonadIO m =>
Handle -> Producer' ByteString m ()
PB.fromHandle Handle
handle)
case Maybe (Either ParsingError RareAlleleHistogram)
res of
Maybe (Either ParsingError RareAlleleHistogram)
Nothing -> forall a e. Exception e => e -> a
throw ([Char] -> SeqFormatException
SeqFormatException [Char]
"histogram file exhausted too early")
Just (Left ParsingError
err) -> forall a e. Exception e => e -> a
throw ([Char] -> SeqFormatException
SeqFormatException ([Char]
"Histogram parsing error: " forall a. Semigroup a => a -> a -> a
<> forall a. Show a => a -> [Char]
show ParsingError
err))
Just (Right RareAlleleHistogram
hist) -> forall (m :: * -> *) a. Monad m => a -> m a
return RareAlleleHistogram
hist
parseHistogram :: A.Parser RareAlleleHistogram
parseHistogram :: Parser RareAlleleHistogram
parseHistogram = do
[ByteString]
names <- Parser ByteString [ByteString]
parseNames
[Int]
nVec <- Parser ByteString [Int]
parseNVec
Int
maxM <- Parser ByteString Int
parseMaxM
Int64
totalNrSites <- Parser ByteString Int64
parseTotalNrSites
[([Int], Int64, Maybe (Double, Double))]
body <- Parser [([Int], Int64, Maybe (Double, Double))]
parseBody
let countHist :: Map [Int] Int64
countHist = forall k a. Ord k => [(k, a)] -> Map k a
Map.fromList forall a b. (a -> b) -> a -> b
$ [([Int]
k, Int64
c) | ([Int]
k, Int64
c, Maybe (Double, Double)
_) <- [([Int], Int64, Maybe (Double, Double))]
body]
jkHist :: Maybe (Map [Int] (Double, Double))
jkHist = case forall a. [a] -> a
head [([Int], Int64, Maybe (Double, Double))]
body of
([Int]
_, Int64
_, Just (Double, Double)
_) -> forall a. a -> Maybe a
Just forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall k a. Ord k => [(k, a)] -> Map k a
Map.fromList forall a b. (a -> b) -> a -> b
$ [([Int]
k, (Double
jkMean, Double
jkSE)) |
([Int]
k, Int64
_, Just (Double
jkMean, Double
jkSE)) <- [([Int], Int64, Maybe (Double, Double))]
body]
([Int], Int64, Maybe (Double, Double))
_ -> forall a. Maybe a
Nothing
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ [[Char]]
-> [Int]
-> Int
-> Int
-> [Int]
-> [[Int]]
-> Int64
-> Map [Int] Int64
-> Maybe (Map [Int] (Double, Double))
-> RareAlleleHistogram
RareAlleleHistogram (forall a b. (a -> b) -> [a] -> [b]
map ByteString -> [Char]
B.unpack [ByteString]
names) [Int]
nVec Int
1 Int
maxM [][] Int64
totalNrSites Map [Int] Int64
countHist Maybe (Map [Int] (Double, Double))
jkHist
where
parseNames :: Parser ByteString [ByteString]
parseNames = ByteString -> Parser ByteString
A.string ByteString
"NAMES=" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser ByteString
name forall (f :: * -> *) a s. Alternative f => f a -> f s -> f [a]
`A.sepBy1` Char -> Parser Char
A.char Char
',' forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
A.endOfLine
name :: Parser ByteString
name = (Char -> Bool) -> Parser ByteString
A.takeWhile1 (\Char
c -> Char -> Bool
isAlphaNum Char
c Bool -> Bool -> Bool
|| Char
c forall a. Eq a => a -> a -> Bool
== Char
'_' Bool -> Bool -> Bool
|| Char
c forall a. Eq a => a -> a -> Bool
== Char
'-')
parseNVec :: Parser ByteString [Int]
parseNVec = ByteString -> Parser ByteString
A.string ByteString
"N=" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall a. Integral a => Parser a
A.decimal forall (f :: * -> *) a s. Alternative f => f a -> f s -> f [a]
`A.sepBy1` Char -> Parser Char
A.char Char
',' forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
A.endOfLine
parseMaxM :: Parser ByteString Int
parseMaxM = ByteString -> Parser ByteString
A.string ByteString
"MAX_M=" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall a. Integral a => Parser a
A.decimal forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
A.endOfLine
parseTotalNrSites :: Parser ByteString Int64
parseTotalNrSites = ByteString -> Parser ByteString
A.string ByteString
"TOTAL_SITES=" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall a. Integral a => Parser a
A.decimal forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
A.endOfLine
parseBody :: A.Parser [(SitePattern, Int64, Maybe (Double, Double))]
parseBody :: Parser [([Int], Int64, Maybe (Double, Double))]
parseBody = forall (f :: * -> *) a. Alternative f => f a -> f [a]
A.many1 Parser ByteString ([Int], Int64, Maybe (Double, Double))
patternLine
where
patternLine :: Parser ByteString ([Int], Int64, Maybe (Double, Double))
patternLine = (,,) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser ByteString [Int]
parsePattern forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser Char
A.space forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser ByteString Int64
parseLargeInt forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional Parser ByteString (Double, Double)
parseJackknife forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<*
Parser ()
A.endOfLine
parsePattern :: Parser ByteString [Int]
parsePattern = forall a. Integral a => Parser a
A.decimal forall (f :: * -> *) a s. Alternative f => f a -> f s -> f [a]
`A.sepBy1` Char -> Parser Char
A.char Char
','
parseLargeInt :: Parser ByteString Int64
parseLargeInt = forall a. Read a => [Char] -> a
read forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (f :: * -> *) a. Alternative f => f a -> f [a]
A.many1 Parser Char
A.digit
parseJackknife :: Parser ByteString (Double, Double)
parseJackknife = (,) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Parser Char
A.space forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser ByteString Double
A.double) forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser Char
A.space forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser ByteString Double
A.double