module Bio.TwoBit (
TwoBitFile(..),
TwoBitSequence(..),
openTwoBit,
getFwdSubseqWith,
getSubseq,
getSubseqWith,
getSubseqAscii,
getSubseqMasked,
getLazySubseq,
getFragment,
getFwdSubseqV,
getSeqnames,
lookupSequence,
getSeqLength,
clampPosition,
getRandomSeq,
takeOverlap,
mergeBlocks,
Mask(..)
) where
import Bio.Prelude hiding ( left, right, chr )
import Bio.Util.MMap
import Bio.Util.Storable
import Control.Monad.Trans.State ( StateT(..), get, evalStateT )
import qualified Data.ByteString as B
import qualified Data.IntMap.Strict as I
import qualified Data.HashMap.Lazy as M
import qualified Data.Vector.Unboxed as U
import Foreign.C.Types ( CChar )
import Foreign.ForeignPtr.Unsafe ( unsafeForeignPtrToPtr )
data TwoBitFile = TBF {
TwoBitFile -> ForeignPtr CChar
tbf_raw :: ForeignPtr CChar,
TwoBitFile -> HashMap Bytes TwoBitSequence
tbf_seqs :: !(M.HashMap Bytes TwoBitSequence)
}
data TwoBitSequence = TBS { TwoBitSequence -> IntMap Int
tbs_n_blocks :: !(I.IntMap Int)
, TwoBitSequence -> IntMap Int
tbs_m_blocks :: !(I.IntMap Int)
, TwoBitSequence -> Int
tbs_dna_offset :: {-# UNPACK #-} !Int
, TwoBitSequence -> Int
tbs_dna_size :: {-# UNPACK #-} !Int }
openTwoBit :: FilePath -> IO TwoBitFile
openTwoBit :: FilePath -> IO TwoBitFile
openTwoBit fp :: FilePath
fp = do
(_sz :: Int
_sz, raw :: ForeignPtr CChar
raw) <- FilePath -> IO (Int, ForeignPtr CChar)
forall a. FilePath -> IO (Int, ForeignPtr a)
mmapFile FilePath
fp
ForeignPtr CChar -> (Ptr CChar -> IO TwoBitFile) -> IO TwoBitFile
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr ForeignPtr CChar
raw ((Ptr CChar -> IO TwoBitFile) -> IO TwoBitFile)
-> (Ptr CChar -> IO TwoBitFile) -> IO TwoBitFile
forall a b. (a -> b) -> a -> b
$
StateT (Ptr CChar) IO TwoBitFile -> Ptr CChar -> IO TwoBitFile
forall (m :: * -> *) s a. Monad m => StateT s m a -> s -> m a
evalStateT (StateT (Ptr CChar) IO TwoBitFile -> Ptr CChar -> IO TwoBitFile)
-> StateT (Ptr CChar) IO TwoBitFile -> Ptr CChar -> IO TwoBitFile
forall a b. (a -> b) -> a -> b
$ do
Word32
sig <- Get Word32
forall a. Num a => Get a
getWord32be
Get Int
getWord32 <- case Word32
sig :: Word32 of
0x1A412743 -> Get Int -> StateT (Ptr CChar) IO (Get Int)
forall (m :: * -> *) a. Monad m => a -> m a
return Get Int
forall a. Num a => Get a
getWord32be
0x4327411A -> Get Int -> StateT (Ptr CChar) IO (Get Int)
forall (m :: * -> *) a. Monad m => a -> m a
return Get Int
forall a. Num a => Get a
getWord32le
_ -> FilePath -> StateT (Ptr CChar) IO (Get Int)
forall (m :: * -> *) a. MonadFail m => FilePath -> m a
fail (FilePath -> StateT (Ptr CChar) IO (Get Int))
-> FilePath -> StateT (Ptr CChar) IO (Get Int)
forall a b. (a -> b) -> a -> b
$ "invalid .2bit signature " FilePath -> FilePath -> FilePath
forall a. [a] -> [a] -> [a]
++ Word32 -> FilePath -> FilePath
forall a. (Integral a, Show a) => a -> FilePath -> FilePath
showHex Word32
sig []
Int
version <- Get Int
getWord32
Bool -> StateT (Ptr CChar) IO () -> StateT (Ptr CChar) IO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless (Int
version Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== 0) (StateT (Ptr CChar) IO () -> StateT (Ptr CChar) IO ())
-> StateT (Ptr CChar) IO () -> StateT (Ptr CChar) IO ()
forall a b. (a -> b) -> a -> b
$ FilePath -> StateT (Ptr CChar) IO ()
forall (m :: * -> *) a. MonadFail m => FilePath -> m a
fail (FilePath -> StateT (Ptr CChar) IO ())
-> FilePath -> StateT (Ptr CChar) IO ()
forall a b. (a -> b) -> a -> b
$ "wrong .2bit version " FilePath -> FilePath -> FilePath
forall a. [a] -> [a] -> [a]
++ Int -> FilePath
forall a. Show a => a -> FilePath
show Int
version
Int
nseqs <- Get Int
getWord32
Int
_reserved <- Get Int
getWord32
ForeignPtr CChar -> HashMap Bytes TwoBitSequence -> TwoBitFile
TBF ForeignPtr CChar
raw (HashMap Bytes TwoBitSequence -> TwoBitFile)
-> StateT (Ptr CChar) IO (HashMap Bytes TwoBitSequence)
-> StateT (Ptr CChar) IO TwoBitFile
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (HashMap Bytes TwoBitSequence
-> Int -> StateT (Ptr CChar) IO (HashMap Bytes TwoBitSequence))
-> HashMap Bytes TwoBitSequence
-> [Int]
-> StateT (Ptr CChar) IO (HashMap Bytes TwoBitSequence)
forall (t :: * -> *) (m :: * -> *) b a.
(Foldable t, Monad m) =>
(b -> a -> m b) -> b -> t a -> m b
foldM (\ix :: HashMap Bytes TwoBitSequence
ix _ -> do !Bytes
key <- Get Int
forall a. Num a => Get a
getWord8 Get Int
-> (Int -> StateT (Ptr CChar) IO Bytes)
-> StateT (Ptr CChar) IO Bytes
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Int -> StateT (Ptr CChar) IO Bytes
getByteString
!Int
off <- Get Int
getWord32
HashMap Bytes TwoBitSequence
-> StateT (Ptr CChar) IO (HashMap Bytes TwoBitSequence)
forall (m :: * -> *) a. Monad m => a -> m a
return (HashMap Bytes TwoBitSequence
-> StateT (Ptr CChar) IO (HashMap Bytes TwoBitSequence))
-> HashMap Bytes TwoBitSequence
-> StateT (Ptr CChar) IO (HashMap Bytes TwoBitSequence)
forall a b. (a -> b) -> a -> b
$! Bytes
-> TwoBitSequence
-> HashMap Bytes TwoBitSequence
-> HashMap Bytes TwoBitSequence
forall k v.
(Eq k, Hashable k) =>
k -> v -> HashMap k v -> HashMap k v
M.insert Bytes
key (ForeignPtr CChar -> Get Int -> Int -> TwoBitSequence
mkBlockIndex ForeignPtr CChar
raw Get Int
getWord32 Int
off) HashMap Bytes TwoBitSequence
ix
) HashMap Bytes TwoBitSequence
forall k v. HashMap k v
M.empty [1..Int
nseqs]
type Get = StateT (Ptr CChar) IO
getWord8, getWord32be, getWord32le :: Num a => Get a
getWord8 :: Get a
getWord8 = (Ptr CChar -> IO (a, Ptr CChar)) -> Get a
forall s (m :: * -> *) a. (s -> m (a, s)) -> StateT s m a
StateT ((Ptr CChar -> IO (a, Ptr CChar)) -> Get a)
-> (Ptr CChar -> IO (a, Ptr CChar)) -> Get a
forall a b. (a -> b) -> a -> b
$ \p :: Ptr CChar
p -> Ptr CChar -> IO Word32
forall a. Ptr a -> IO Word32
peekUnalnWord32BE Ptr CChar
p IO Word32 -> (Word32 -> IO (a, Ptr CChar)) -> IO (a, Ptr CChar)
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \w :: Word32
w -> (a, Ptr CChar) -> IO (a, Ptr CChar)
forall (m :: * -> *) a. Monad m => a -> m a
return (Word32 -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word32
w, Ptr CChar -> Int -> Ptr CChar
forall a b. Ptr a -> Int -> Ptr b
plusPtr Ptr CChar
p 1)
getWord32be :: Get a
getWord32be = (Ptr CChar -> IO (a, Ptr CChar)) -> Get a
forall s (m :: * -> *) a. (s -> m (a, s)) -> StateT s m a
StateT ((Ptr CChar -> IO (a, Ptr CChar)) -> Get a)
-> (Ptr CChar -> IO (a, Ptr CChar)) -> Get a
forall a b. (a -> b) -> a -> b
$ \p :: Ptr CChar
p -> Ptr CChar -> IO Word32
forall a. Ptr a -> IO Word32
peekUnalnWord32BE Ptr CChar
p IO Word32 -> (Word32 -> IO (a, Ptr CChar)) -> IO (a, Ptr CChar)
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \w :: Word32
w -> (a, Ptr CChar) -> IO (a, Ptr CChar)
forall (m :: * -> *) a. Monad m => a -> m a
return (Word32 -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word32
w, Ptr CChar -> Int -> Ptr CChar
forall a b. Ptr a -> Int -> Ptr b
plusPtr Ptr CChar
p 4)
getWord32le :: Get a
getWord32le = (Ptr CChar -> IO (a, Ptr CChar)) -> Get a
forall s (m :: * -> *) a. (s -> m (a, s)) -> StateT s m a
StateT ((Ptr CChar -> IO (a, Ptr CChar)) -> Get a)
-> (Ptr CChar -> IO (a, Ptr CChar)) -> Get a
forall a b. (a -> b) -> a -> b
$ \p :: Ptr CChar
p -> Ptr CChar -> IO Word32
forall a. Ptr a -> IO Word32
peekUnalnWord32LE Ptr CChar
p IO Word32 -> (Word32 -> IO (a, Ptr CChar)) -> IO (a, Ptr CChar)
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \w :: Word32
w -> (a, Ptr CChar) -> IO (a, Ptr CChar)
forall (m :: * -> *) a. Monad m => a -> m a
return (Word32 -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word32
w, Ptr CChar -> Int -> Ptr CChar
forall a b. Ptr a -> Int -> Ptr b
plusPtr Ptr CChar
p 4)
getByteString :: Int -> Get Bytes
getByteString :: Int -> StateT (Ptr CChar) IO Bytes
getByteString l :: Int
l = (Ptr CChar -> IO (Bytes, Ptr CChar)) -> StateT (Ptr CChar) IO Bytes
forall s (m :: * -> *) a. (s -> m (a, s)) -> StateT s m a
StateT ((Ptr CChar -> IO (Bytes, Ptr CChar))
-> StateT (Ptr CChar) IO Bytes)
-> (Ptr CChar -> IO (Bytes, Ptr CChar))
-> StateT (Ptr CChar) IO Bytes
forall a b. (a -> b) -> a -> b
$ \p :: Ptr CChar
p -> CStringLen -> IO Bytes
B.packCStringLen (Ptr CChar
p,Int
l) IO Bytes
-> (Bytes -> IO (Bytes, Ptr CChar)) -> IO (Bytes, Ptr CChar)
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \s :: Bytes
s -> (Bytes, Ptr CChar) -> IO (Bytes, Ptr CChar)
forall (m :: * -> *) a. Monad m => a -> m a
return (Bytes
s, Ptr CChar -> Int -> Ptr CChar
forall a b. Ptr a -> Int -> Ptr b
plusPtr Ptr CChar
p Int
l)
mkBlockIndex :: ForeignPtr CChar -> Get Int -> Int -> TwoBitSequence
mkBlockIndex :: ForeignPtr CChar -> Get Int -> Int -> TwoBitSequence
mkBlockIndex raw :: ForeignPtr CChar
raw getWord32 :: Get Int
getWord32 ofs :: Int
ofs =
IO TwoBitSequence -> TwoBitSequence
forall a. IO a -> a
unsafeDupablePerformIO (IO TwoBitSequence -> TwoBitSequence)
-> IO TwoBitSequence -> TwoBitSequence
forall a b. (a -> b) -> a -> b
$
ForeignPtr CChar
-> (Ptr CChar -> IO TwoBitSequence) -> IO TwoBitSequence
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr ForeignPtr CChar
raw ((Ptr CChar -> IO TwoBitSequence) -> IO TwoBitSequence)
-> (Ptr CChar -> IO TwoBitSequence) -> IO TwoBitSequence
forall a b. (a -> b) -> a -> b
$ \praw :: Ptr CChar
praw ->
StateT (Ptr CChar) IO TwoBitSequence
-> Ptr CChar -> IO TwoBitSequence
forall (m :: * -> *) s a. Monad m => StateT s m a -> s -> m a
evalStateT StateT (Ptr CChar) IO TwoBitSequence
getBlock (Ptr CChar -> Int -> Ptr CChar
forall a b. Ptr a -> Int -> Ptr b
plusPtr Ptr CChar
praw Int
ofs)
where
getBlock :: StateT (Ptr CChar) IO TwoBitSequence
getBlock = do Ptr CChar
p0 <- StateT (Ptr CChar) IO (Ptr CChar)
forall (m :: * -> *) s. Monad m => StateT s m s
get
Int
ds <- Get Int
getWord32
[(Int, Int)]
nb <- StateT (Ptr CChar) IO [(Int, Int)]
readBlockList
[(Int, Int)]
mb <- StateT (Ptr CChar) IO [(Int, Int)]
readBlockList
Int
_ <- Get Int
getWord32
Ptr CChar
p1 <- StateT (Ptr CChar) IO (Ptr CChar)
forall (m :: * -> *) s. Monad m => StateT s m s
get
TwoBitSequence -> StateT (Ptr CChar) IO TwoBitSequence
forall (m :: * -> *) a. Monad m => a -> m a
return (TwoBitSequence -> StateT (Ptr CChar) IO TwoBitSequence)
-> TwoBitSequence -> StateT (Ptr CChar) IO TwoBitSequence
forall a b. (a -> b) -> a -> b
$! IntMap Int -> IntMap Int -> Int -> Int -> TwoBitSequence
TBS ([(Int, Int)] -> IntMap Int
forall a. [(Int, a)] -> IntMap a
I.fromList [(Int, Int)]
nb) ([(Int, Int)] -> IntMap Int
forall a. [(Int, a)] -> IntMap a
I.fromList [(Int, Int)]
mb) (Int
ofs Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Ptr CChar -> Ptr CChar -> Int
forall a b. Ptr a -> Ptr b -> Int
minusPtr Ptr CChar
p1 Ptr CChar
p0) Int
ds
readBlockList :: StateT (Ptr CChar) IO [(Int, Int)]
readBlockList = Get Int
getWord32 Get Int
-> (Int -> StateT (Ptr CChar) IO [(Int, Int)])
-> StateT (Ptr CChar) IO [(Int, Int)]
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \n :: Int
n -> ([Int] -> [Int] -> [(Int, Int)])
-> StateT (Ptr CChar) IO [Int]
-> StateT (Ptr CChar) IO [Int]
-> StateT (Ptr CChar) IO [(Int, Int)]
forall (m :: * -> *) a1 a2 r.
Monad m =>
(a1 -> a2 -> r) -> m a1 -> m a2 -> m r
liftM2 [Int] -> [Int] -> [(Int, Int)]
forall a b. [a] -> [b] -> [(a, b)]
zip (Int -> Get Int -> StateT (Ptr CChar) IO [Int]
forall (m :: * -> *) a. Monad m => Int -> m a -> m [a]
repM Int
n Get Int
getWord32) (Int -> Get Int -> StateT (Ptr CChar) IO [Int]
forall (m :: * -> *) a. Monad m => Int -> m a -> m [a]
repM Int
n Get Int
getWord32)
repM :: Monad m => Int -> m a -> m [a]
repM :: Int -> m a -> m [a]
repM n0 :: Int
n0 m :: m a
m = [a] -> Int -> m [a]
forall t. (Eq t, Num t) => [a] -> t -> m [a]
go [] Int
n0
where
go :: [a] -> t -> m [a]
go acc :: [a]
acc 0 = [a] -> m [a]
forall (m :: * -> *) a. Monad m => a -> m a
return [a]
acc
go acc :: [a]
acc n :: t
n = m a
m m a -> (a -> m [a]) -> m [a]
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \x :: a
x -> a
x a -> m [a] -> m [a]
forall a b. a -> b -> b
`seq` [a] -> t -> m [a]
go (a
xa -> [a] -> [a]
forall a. a -> [a] -> [a]
:[a]
acc) (t
nt -> t -> t
forall a. Num a => a -> a -> a
-1)
takeOverlap :: Int -> I.IntMap Int -> [(Int,Int)]
takeOverlap :: Int -> IntMap Int -> [(Int, Int)]
takeOverlap k :: Int
k m :: IntMap Int
m = ((Int, Int) -> Bool) -> [(Int, Int)] -> [(Int, Int)]
forall a. (a -> Bool) -> [a] -> [a]
dropWhile (Int, Int) -> Bool
far_left ([(Int, Int)] -> [(Int, Int)]) -> [(Int, Int)] -> [(Int, Int)]
forall a b. (a -> b) -> a -> b
$
([(Int, Int)] -> [(Int, Int)])
-> (((Int, Int), IntMap Int) -> [(Int, Int)] -> [(Int, Int)])
-> Maybe ((Int, Int), IntMap Int)
-> [(Int, Int)]
-> [(Int, Int)]
forall b a. b -> (a -> b) -> Maybe a -> b
maybe [(Int, Int)] -> [(Int, Int)]
forall k (cat :: k -> k -> *) (a :: k). Category cat => cat a a
id (\(kv :: (Int, Int)
kv,_) -> (:) (Int, Int)
kv) (IntMap Int -> Maybe ((Int, Int), IntMap Int)
forall a. IntMap a -> Maybe ((Int, a), IntMap a)
I.maxViewWithKey IntMap Int
left) ([(Int, Int)] -> [(Int, Int)]) -> [(Int, Int)] -> [(Int, Int)]
forall a b. (a -> b) -> a -> b
$
([(Int, Int)] -> [(Int, Int)])
-> (Int -> [(Int, Int)] -> [(Int, Int)])
-> Maybe Int
-> [(Int, Int)]
-> [(Int, Int)]
forall b a. b -> (a -> b) -> Maybe a -> b
maybe [(Int, Int)] -> [(Int, Int)]
forall k (cat :: k -> k -> *) (a :: k). Category cat => cat a a
id (\v :: Int
v -> (:) (Int
k,Int
v)) Maybe Int
middle ([(Int, Int)] -> [(Int, Int)]) -> [(Int, Int)] -> [(Int, Int)]
forall a b. (a -> b) -> a -> b
$
IntMap Int -> [(Int, Int)]
forall a. IntMap a -> [(Int, a)]
I.toAscList IntMap Int
right
where
(left :: IntMap Int
left, middle :: Maybe Int
middle, right :: IntMap Int
right) = Int -> IntMap Int -> (IntMap Int, Maybe Int, IntMap Int)
forall a. Int -> IntMap a -> (IntMap a, Maybe a, IntMap a)
I.splitLookup Int
k IntMap Int
m
far_left :: (Int, Int) -> Bool
far_left (s :: Int
s,l :: Int
l) = Int
sInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
l Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
k
data Mask = None | Soft | Hard | Both deriving (Mask -> Mask -> Bool
(Mask -> Mask -> Bool) -> (Mask -> Mask -> Bool) -> Eq Mask
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Mask -> Mask -> Bool
$c/= :: Mask -> Mask -> Bool
== :: Mask -> Mask -> Bool
$c== :: Mask -> Mask -> Bool
Eq, Eq Mask
Eq Mask =>
(Mask -> Mask -> Ordering)
-> (Mask -> Mask -> Bool)
-> (Mask -> Mask -> Bool)
-> (Mask -> Mask -> Bool)
-> (Mask -> Mask -> Bool)
-> (Mask -> Mask -> Mask)
-> (Mask -> Mask -> Mask)
-> Ord Mask
Mask -> Mask -> Bool
Mask -> Mask -> Ordering
Mask -> Mask -> Mask
forall a.
Eq a =>
(a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: Mask -> Mask -> Mask
$cmin :: Mask -> Mask -> Mask
max :: Mask -> Mask -> Mask
$cmax :: Mask -> Mask -> Mask
>= :: Mask -> Mask -> Bool
$c>= :: Mask -> Mask -> Bool
> :: Mask -> Mask -> Bool
$c> :: Mask -> Mask -> Bool
<= :: Mask -> Mask -> Bool
$c<= :: Mask -> Mask -> Bool
< :: Mask -> Mask -> Bool
$c< :: Mask -> Mask -> Bool
compare :: Mask -> Mask -> Ordering
$ccompare :: Mask -> Mask -> Ordering
$cp1Ord :: Eq Mask
Ord, Int -> Mask
Mask -> Int
Mask -> [Mask]
Mask -> Mask
Mask -> Mask -> [Mask]
Mask -> Mask -> Mask -> [Mask]
(Mask -> Mask)
-> (Mask -> Mask)
-> (Int -> Mask)
-> (Mask -> Int)
-> (Mask -> [Mask])
-> (Mask -> Mask -> [Mask])
-> (Mask -> Mask -> [Mask])
-> (Mask -> Mask -> Mask -> [Mask])
-> Enum Mask
forall a.
(a -> a)
-> (a -> a)
-> (Int -> a)
-> (a -> Int)
-> (a -> [a])
-> (a -> a -> [a])
-> (a -> a -> [a])
-> (a -> a -> a -> [a])
-> Enum a
enumFromThenTo :: Mask -> Mask -> Mask -> [Mask]
$cenumFromThenTo :: Mask -> Mask -> Mask -> [Mask]
enumFromTo :: Mask -> Mask -> [Mask]
$cenumFromTo :: Mask -> Mask -> [Mask]
enumFromThen :: Mask -> Mask -> [Mask]
$cenumFromThen :: Mask -> Mask -> [Mask]
enumFrom :: Mask -> [Mask]
$cenumFrom :: Mask -> [Mask]
fromEnum :: Mask -> Int
$cfromEnum :: Mask -> Int
toEnum :: Int -> Mask
$ctoEnum :: Int -> Mask
pred :: Mask -> Mask
$cpred :: Mask -> Mask
succ :: Mask -> Mask
$csucc :: Mask -> Mask
Enum, Int -> Mask -> FilePath -> FilePath
[Mask] -> FilePath -> FilePath
Mask -> FilePath
(Int -> Mask -> FilePath -> FilePath)
-> (Mask -> FilePath)
-> ([Mask] -> FilePath -> FilePath)
-> Show Mask
forall a.
(Int -> a -> FilePath -> FilePath)
-> (a -> FilePath) -> ([a] -> FilePath -> FilePath) -> Show a
showList :: [Mask] -> FilePath -> FilePath
$cshowList :: [Mask] -> FilePath -> FilePath
show :: Mask -> FilePath
$cshow :: Mask -> FilePath
showsPrec :: Int -> Mask -> FilePath -> FilePath
$cshowsPrec :: Int -> Mask -> FilePath -> FilePath
Show)
getFwdSubseqWith :: TwoBitFile -> TwoBitSequence
-> (Word8 -> Mask -> a)
-> Int -> [a]
getFwdSubseqWith :: TwoBitFile -> TwoBitSequence -> (Word8 -> Mask -> a) -> Int -> [a]
getFwdSubseqWith TBF{..} TBS{..} nt :: Word8 -> Mask -> a
nt start :: Int
start =
[(Int, Int, Mask)] -> Int -> [Word8] -> [a]
do_mask (Int -> IntMap Int -> [(Int, Int)]
takeOverlap Int
start IntMap Int
tbs_n_blocks [(Int, Int)] -> [(Int, Int)] -> [(Int, Int, Mask)]
`mergeBlocks` Int -> IntMap Int -> [(Int, Int)]
takeOverlap Int
start IntMap Int
tbs_m_blocks) Int
start ([Word8] -> [a]) -> (Ptr Word8 -> [Word8]) -> Ptr Word8 -> [a]
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
.
Int -> [Word8] -> [Word8]
forall a. Int -> [a] -> [a]
drop (Int
start Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. 3) ([Word8] -> [Word8])
-> (Ptr Word8 -> [Word8]) -> Ptr Word8 -> [Word8]
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
.
Ptr Word8 -> [Word8]
forall a. (Bits a, Num a, Storable a) => Ptr a -> [a]
toDNA (Ptr Word8 -> [a]) -> Ptr Word8 -> [a]
forall a b. (a -> b) -> a -> b
$ Ptr CChar -> Int -> Ptr Word8
forall a b. Ptr a -> Int -> Ptr b
plusPtr (ForeignPtr CChar -> Ptr CChar
forall a. ForeignPtr a -> Ptr a
unsafeForeignPtrToPtr ForeignPtr CChar
tbf_raw) (Int
tbs_dna_offset Int -> Int -> Int
forall a. Num a => a -> a -> a
+ (Int
start Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftR` 2))
where
toDNA :: Ptr a -> [a]
toDNA p :: Ptr a
p = let !b :: a
b = IO a -> a
forall a. IO a -> a
unsafeDupablePerformIO (IO a -> a) -> IO a -> a
forall a b. (a -> b) -> a -> b
$ do Ptr a -> IO a
forall a. Storable a => Ptr a -> IO a
peek Ptr a
p IO a -> IO () -> IO a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* ForeignPtr CChar -> IO ()
forall a. ForeignPtr a -> IO ()
touchForeignPtr ForeignPtr CChar
tbf_raw
in [ 3 a -> a -> a
forall a. Bits a => a -> a -> a
.&. (a
b a -> Int -> a
forall a. Bits a => a -> Int -> a
`shiftR` Int
x) | Int
x <- [6,4,2,0] ] [a] -> [a] -> [a]
forall a. [a] -> [a] -> [a]
++ Ptr a -> [a]
toDNA (Ptr a -> Int -> Ptr a
forall a b. Ptr a -> Int -> Ptr b
plusPtr Ptr a
p 1)
do_mask :: [(Int, Int, Mask)] -> Int -> [Word8] -> [a]
do_mask _ _ [] = []
do_mask [ ] _ ws :: [Word8]
ws = (Word8 -> a) -> [Word8] -> [a]
forall a b. (a -> b) -> [a] -> [b]
map (Word8 -> Mask -> a
`nt` Mask
None) [Word8]
ws
do_mask ((s :: Int
s,l :: Int
l,m :: Mask
m):is :: [(Int, Int, Mask)]
is) p :: Int
p ws :: [Word8]
ws
| Int
p Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
s = (Word8 -> a) -> [Word8] -> [a]
forall a b. (a -> b) -> [a] -> [b]
map (Word8 -> Mask -> a
`nt` Mask
None) (Int -> [Word8] -> [Word8]
forall a. Int -> [a] -> [a]
take (Int
sInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
p) [Word8]
ws) [a] -> [a] -> [a]
forall a. [a] -> [a] -> [a]
++ [(Int, Int, Mask)] -> Int -> [Word8] -> [a]
do_mask ((Int
s,Int
l,Mask
m)(Int, Int, Mask) -> [(Int, Int, Mask)] -> [(Int, Int, Mask)]
forall a. a -> [a] -> [a]
:[(Int, Int, Mask)]
is) Int
s (Int -> [Word8] -> [Word8]
forall a. Int -> [a] -> [a]
drop (Int
sInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
p) [Word8]
ws)
| Bool
otherwise = (Word8 -> a) -> [Word8] -> [a]
forall a b. (a -> b) -> [a] -> [b]
map (Word8 -> Mask -> a
`nt` Mask
m) (Int -> [Word8] -> [Word8]
forall a. Int -> [a] -> [a]
take (Int
sInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
lInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
p) [Word8]
ws) [a] -> [a] -> [a]
forall a. [a] -> [a] -> [a]
++ [(Int, Int, Mask)] -> Int -> [Word8] -> [a]
do_mask [(Int, Int, Mask)]
is (Int
sInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
l) (Int -> [Word8] -> [Word8]
forall a. Int -> [a] -> [a]
drop (Int
sInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
lInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
p) [Word8]
ws)
mergeBlocks :: [(Int,Int)] -> [(Int,Int)] -> [(Int,Int,Mask)]
mergeBlocks :: [(Int, Int)] -> [(Int, Int)] -> [(Int, Int, Mask)]
mergeBlocks ((_,0):nbs :: [(Int, Int)]
nbs) mbs :: [(Int, Int)]
mbs = [(Int, Int)] -> [(Int, Int)] -> [(Int, Int, Mask)]
mergeBlocks [(Int, Int)]
nbs [(Int, Int)]
mbs
mergeBlocks nbs :: [(Int, Int)]
nbs ((_,0):mbs :: [(Int, Int)]
mbs) = [(Int, Int)] -> [(Int, Int)] -> [(Int, Int, Mask)]
mergeBlocks [(Int, Int)]
nbs [(Int, Int)]
mbs
mergeBlocks ((ns :: Int
ns,nl :: Int
nl):nbs :: [(Int, Int)]
nbs) ((ms :: Int
ms,ml :: Int
ml):mbs :: [(Int, Int)]
mbs)
| Int
ns Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
ms = let l :: Int
l = Int -> Int -> Int
forall a. Ord a => a -> a -> a
min (Int
msInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
ns) Int
nl in (Int
ns,Int
l, Mask
Hard) (Int, Int, Mask) -> [(Int, Int, Mask)] -> [(Int, Int, Mask)]
forall a. a -> [a] -> [a]
: [(Int, Int)] -> [(Int, Int)] -> [(Int, Int, Mask)]
mergeBlocks ((Int
nsInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
l,Int
nlInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
l)(Int, Int) -> [(Int, Int)] -> [(Int, Int)]
forall a. a -> [a] -> [a]
:[(Int, Int)]
nbs) ((Int
ms,Int
ml)(Int, Int) -> [(Int, Int)] -> [(Int, Int)]
forall a. a -> [a] -> [a]
:[(Int, Int)]
mbs)
| Int
ms Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
ns = let l :: Int
l = Int -> Int -> Int
forall a. Ord a => a -> a -> a
min (Int
nsInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
ms) Int
ml in (Int
ms,Int
l, Mask
Soft) (Int, Int, Mask) -> [(Int, Int, Mask)] -> [(Int, Int, Mask)]
forall a. a -> [a] -> [a]
: [(Int, Int)] -> [(Int, Int)] -> [(Int, Int, Mask)]
mergeBlocks ((Int
ns,Int
nl)(Int, Int) -> [(Int, Int)] -> [(Int, Int)]
forall a. a -> [a] -> [a]
:[(Int, Int)]
nbs) ((Int
msInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
l,Int
mlInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
l)(Int, Int) -> [(Int, Int)] -> [(Int, Int)]
forall a. a -> [a] -> [a]
:[(Int, Int)]
mbs)
| Bool
otherwise = let l :: Int
l = Int -> Int -> Int
forall a. Ord a => a -> a -> a
min Int
nl Int
ml in (Int
ns,Int
l, Mask
Both) (Int, Int, Mask) -> [(Int, Int, Mask)] -> [(Int, Int, Mask)]
forall a. a -> [a] -> [a]
: [(Int, Int)] -> [(Int, Int)] -> [(Int, Int, Mask)]
mergeBlocks ((Int
nsInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
l,Int
nlInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
l)(Int, Int) -> [(Int, Int)] -> [(Int, Int)]
forall a. a -> [a] -> [a]
:[(Int, Int)]
nbs) ((Int
msInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
l,Int
mlInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
l)(Int, Int) -> [(Int, Int)] -> [(Int, Int)]
forall a. a -> [a] -> [a]
:[(Int, Int)]
mbs)
mergeBlocks ((ns :: Int
ns,nl :: Int
nl):nbs :: [(Int, Int)]
nbs) [] = (Int
ns,Int
nl, Mask
Hard) (Int, Int, Mask) -> [(Int, Int, Mask)] -> [(Int, Int, Mask)]
forall a. a -> [a] -> [a]
: [(Int, Int)] -> [(Int, Int)] -> [(Int, Int, Mask)]
mergeBlocks [(Int, Int)]
nbs []
mergeBlocks [] ((ms :: Int
ms,ml :: Int
ml):mbs :: [(Int, Int)]
mbs) = (Int
ms,Int
ml, Mask
Soft) (Int, Int, Mask) -> [(Int, Int, Mask)] -> [(Int, Int, Mask)]
forall a. a -> [a] -> [a]
: [(Int, Int)] -> [(Int, Int)] -> [(Int, Int, Mask)]
mergeBlocks [] [(Int, Int)]
mbs
mergeBlocks [ ] [ ] = []
getSubseqWith :: (Nucleotide -> Mask -> a) -> TwoBitFile -> Range -> [a]
getSubseqWith :: (Nucleotide -> Mask -> a) -> TwoBitFile -> Range -> [a]
getSubseqWith maskf :: Nucleotide -> Mask -> a
maskf tbf :: TwoBitFile
tbf Range{ r_pos :: Range -> Position
r_pos = Pos { p_seq :: Position -> Bytes
p_seq = Bytes
chr, p_start :: Position -> Int
p_start = Int
start }, r_length :: Range -> Int
r_length = Int
len } = do
let sq1 :: TwoBitSequence
sq1 = TwoBitSequence -> Maybe TwoBitSequence -> TwoBitSequence
forall a. a -> Maybe a -> a
fromMaybe (FilePath -> TwoBitSequence
forall a. HasCallStack => FilePath -> a
error (FilePath -> TwoBitSequence) -> FilePath -> TwoBitSequence
forall a b. (a -> b) -> a -> b
$ Bytes -> FilePath
forall s. Unpack s => s -> FilePath
unpack Bytes
chr FilePath -> FilePath -> FilePath
forall a. [a] -> [a] -> [a]
++ " doesn't exist") (Maybe TwoBitSequence -> TwoBitSequence)
-> Maybe TwoBitSequence -> TwoBitSequence
forall a b. (a -> b) -> a -> b
$ Bytes -> HashMap Bytes TwoBitSequence -> Maybe TwoBitSequence
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
M.lookup Bytes
chr (TwoBitFile -> HashMap Bytes TwoBitSequence
tbf_seqs TwoBitFile
tbf)
let go :: (Word8 -> Mask -> a) -> Int -> [a]
go = TwoBitFile -> TwoBitSequence -> (Word8 -> Mask -> a) -> Int -> [a]
forall a.
TwoBitFile -> TwoBitSequence -> (Word8 -> Mask -> a) -> Int -> [a]
getFwdSubseqWith TwoBitFile
tbf TwoBitSequence
sq1
if Int
start Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< 0
then [a] -> [a]
forall a. [a] -> [a]
reverse ([a] -> [a]) -> [a] -> [a]
forall a b. (a -> b) -> a -> b
$ Int -> [a] -> [a]
forall a. Int -> [a] -> [a]
take Int
len ([a] -> [a]) -> [a] -> [a]
forall a b. (a -> b) -> a -> b
$ (Word8 -> Mask -> a) -> Int -> [a]
forall a. (Word8 -> Mask -> a) -> Int -> [a]
go (Nucleotide -> Mask -> a
maskf (Nucleotide -> Mask -> a)
-> (Word8 -> Nucleotide) -> Word8 -> Mask -> a
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Word8 -> Nucleotide
cmp_nt) (-Int
startInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
len)
else Int -> [a] -> [a]
forall a. Int -> [a] -> [a]
take Int
len ([a] -> [a]) -> [a] -> [a]
forall a b. (a -> b) -> a -> b
$ (Word8 -> Mask -> a) -> Int -> [a]
forall a. (Word8 -> Mask -> a) -> Int -> [a]
go (Nucleotide -> Mask -> a
maskf (Nucleotide -> Mask -> a)
-> (Word8 -> Nucleotide) -> Word8 -> Mask -> a
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Word8 -> Nucleotide
fwd_nt) Int
start
where
fwd_nt :: Word8 -> Nucleotide
fwd_nt = [Nucleotide] -> Int -> Nucleotide
forall a. [a] -> Int -> a
(!!) [Nucleotide
nucT, Nucleotide
nucC, Nucleotide
nucA, Nucleotide
nucG] (Int -> Nucleotide) -> (Word8 -> Int) -> Word8 -> Nucleotide
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral
cmp_nt :: Word8 -> Nucleotide
cmp_nt = [Nucleotide] -> Int -> Nucleotide
forall a. [a] -> Int -> a
(!!) [Nucleotide
nucA, Nucleotide
nucG, Nucleotide
nucT, Nucleotide
nucC] (Int -> Nucleotide) -> (Word8 -> Int) -> Word8 -> Nucleotide
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral
{-# INLINE getSubseqWith #-}
getLazySubseq :: TwoBitFile -> Position -> [Nucleotide]
getLazySubseq :: TwoBitFile -> Position -> [Nucleotide]
getLazySubseq tbf :: TwoBitFile
tbf Pos{ p_seq :: Position -> Bytes
p_seq = Bytes
chr, p_start :: Position -> Int
p_start = Int
start } = do
let sq1 :: TwoBitSequence
sq1 = TwoBitSequence -> Maybe TwoBitSequence -> TwoBitSequence
forall a. a -> Maybe a -> a
fromMaybe (FilePath -> TwoBitSequence
forall a. HasCallStack => FilePath -> a
error (FilePath -> TwoBitSequence) -> FilePath -> TwoBitSequence
forall a b. (a -> b) -> a -> b
$ Bytes -> FilePath
forall s. Unpack s => s -> FilePath
unpack Bytes
chr FilePath -> FilePath -> FilePath
forall a. [a] -> [a] -> [a]
++ " doesn't exist") (Maybe TwoBitSequence -> TwoBitSequence)
-> Maybe TwoBitSequence -> TwoBitSequence
forall a b. (a -> b) -> a -> b
$ Bytes -> HashMap Bytes TwoBitSequence -> Maybe TwoBitSequence
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
M.lookup Bytes
chr (TwoBitFile -> HashMap Bytes TwoBitSequence
tbf_seqs TwoBitFile
tbf)
let go :: (Word8 -> Mask -> a) -> Int -> [a]
go = TwoBitFile -> TwoBitSequence -> (Word8 -> Mask -> a) -> Int -> [a]
forall a.
TwoBitFile -> TwoBitSequence -> (Word8 -> Mask -> a) -> Int -> [a]
getFwdSubseqWith TwoBitFile
tbf TwoBitSequence
sq1
if Int
start Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< 0
then FilePath -> [Nucleotide]
forall a. HasCallStack => FilePath -> a
error "sorry, can't go backwards"
else (Word8 -> Mask -> Nucleotide) -> Int -> [Nucleotide]
forall a. (Word8 -> Mask -> a) -> Int -> [a]
go Word8 -> Mask -> Nucleotide
forall a p. Integral a => a -> p -> Nucleotide
fwd_nt Int
start
where
fwd_nt :: a -> p -> Nucleotide
fwd_nt n :: a
n _ = [Nucleotide
nucT, Nucleotide
nucC, Nucleotide
nucA, Nucleotide
nucG] [Nucleotide] -> Int -> Nucleotide
forall a. [a] -> Int -> a
!! a -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral a
n
{-# INLINE getLazySubseq #-}
getSubseq :: TwoBitFile -> Range -> [Nucleotide]
getSubseq :: TwoBitFile -> Range -> [Nucleotide]
getSubseq = (Nucleotide -> Mask -> Nucleotide)
-> TwoBitFile -> Range -> [Nucleotide]
forall a. (Nucleotide -> Mask -> a) -> TwoBitFile -> Range -> [a]
getSubseqWith Nucleotide -> Mask -> Nucleotide
forall a b. a -> b -> a
const
{-# INLINE getSubseq #-}
getSubseqMasked :: TwoBitFile -> Range -> [Nucleotides]
getSubseqMasked :: TwoBitFile -> Range -> [Nucleotides]
getSubseqMasked = (Nucleotide -> Mask -> Nucleotides)
-> TwoBitFile -> Range -> [Nucleotides]
forall a. (Nucleotide -> Mask -> a) -> TwoBitFile -> Range -> [a]
getSubseqWith Nucleotide -> Mask -> Nucleotides
mymask
where
mymask :: Nucleotide -> Mask -> Nucleotides
mymask n :: Nucleotide
n None = Nucleotide -> Nucleotides
nucToNucs Nucleotide
n
mymask n :: Nucleotide
n Soft = Nucleotide -> Nucleotides
nucToNucs Nucleotide
n
mymask _ Hard = Nucleotides
nucsN
mymask _ Both = Nucleotides
nucsN
{-# INLINE getSubseqMasked #-}
getSubseqAscii :: TwoBitFile -> Range -> String
getSubseqAscii :: TwoBitFile -> Range -> FilePath
getSubseqAscii = (Nucleotide -> Mask -> Char) -> TwoBitFile -> Range -> FilePath
forall a. (Nucleotide -> Mask -> a) -> TwoBitFile -> Range -> [a]
getSubseqWith Nucleotide -> Mask -> Char
mymask
where
mymask :: Nucleotide -> Mask -> Char
mymask n :: Nucleotide
n None = Nucleotide -> Char
showNucleotide Nucleotide
n
mymask n :: Nucleotide
n Soft = Char -> Char
toLower (Nucleotide -> Char
showNucleotide Nucleotide
n)
mymask _ Hard = 'N'
mymask _ Both = 'N'
{-# INLINE getSubseqAscii #-}
getSeqnames :: TwoBitFile -> [Bytes]
getSeqnames :: TwoBitFile -> [Bytes]
getSeqnames = HashMap Bytes TwoBitSequence -> [Bytes]
forall k v. HashMap k v -> [k]
M.keys (HashMap Bytes TwoBitSequence -> [Bytes])
-> (TwoBitFile -> HashMap Bytes TwoBitSequence)
-> TwoBitFile
-> [Bytes]
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. TwoBitFile -> HashMap Bytes TwoBitSequence
tbf_seqs
lookupSequence :: TwoBitFile -> Bytes -> Maybe TwoBitSequence
lookupSequence :: TwoBitFile -> Bytes -> Maybe TwoBitSequence
lookupSequence tbf :: TwoBitFile
tbf sq :: Bytes
sq = Bytes -> HashMap Bytes TwoBitSequence -> Maybe TwoBitSequence
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
M.lookup Bytes
sq (HashMap Bytes TwoBitSequence -> Maybe TwoBitSequence)
-> (TwoBitFile -> HashMap Bytes TwoBitSequence)
-> TwoBitFile
-> Maybe TwoBitSequence
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. TwoBitFile -> HashMap Bytes TwoBitSequence
tbf_seqs (TwoBitFile -> Maybe TwoBitSequence)
-> TwoBitFile -> Maybe TwoBitSequence
forall a b. (a -> b) -> a -> b
$ TwoBitFile
tbf
getSeqLength :: TwoBitFile -> Bytes -> Int
getSeqLength :: TwoBitFile -> Bytes -> Int
getSeqLength tbf :: TwoBitFile
tbf chr :: Bytes
chr =
Int -> (TwoBitSequence -> Int) -> Maybe TwoBitSequence -> Int
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (FilePath -> Int
forall a. HasCallStack => FilePath -> a
error (FilePath -> Int) -> FilePath -> Int
forall a b. (a -> b) -> a -> b
$ Bytes -> FilePath -> FilePath
forall a. Show a => a -> FilePath -> FilePath
shows Bytes
chr " doesn't exist") TwoBitSequence -> Int
tbs_dna_size (Maybe TwoBitSequence -> Int) -> Maybe TwoBitSequence -> Int
forall a b. (a -> b) -> a -> b
$
Bytes -> HashMap Bytes TwoBitSequence -> Maybe TwoBitSequence
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
M.lookup Bytes
chr (TwoBitFile -> HashMap Bytes TwoBitSequence
tbf_seqs TwoBitFile
tbf)
clampPosition :: TwoBitFile -> Range -> Range
clampPosition :: TwoBitFile -> Range -> Range
clampPosition tbf :: TwoBitFile
tbf (Range (Pos n :: Bytes
n start :: Int
start) len :: Int
len) = Position -> Int -> Range
Range (Bytes -> Int -> Position
Pos Bytes
n Int
start') (Int
end' Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
start')
where
size :: Int
size = TwoBitFile -> Bytes -> Int
getSeqLength TwoBitFile
tbf Bytes
n
start' :: Int
start' = if Int
start Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< 0 then Int -> Int -> Int
forall a. Ord a => a -> a -> a
max Int
start (-Int
size) else Int
start
end' :: Int
end' = Int -> Int -> Int
forall a. Ord a => a -> a -> a
min (Int
start Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
len) (Int -> Int) -> Int -> Int
forall a b. (a -> b) -> a -> b
$ if Int
start Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< 0 then 0 else Int
size
getRandomSeq :: TwoBitFile
-> Int
-> (Int -> g -> (Int, g))
-> g
-> ((Range, [Nucleotide]), g)
getRandomSeq :: TwoBitFile
-> Int -> (Int -> g -> (Int, g)) -> g -> ((Range, [Nucleotide]), g)
getRandomSeq tbf :: TwoBitFile
tbf len :: Int
len rndInt :: Int -> g -> (Int, g)
rndInt = g -> ((Range, [Nucleotide]), g)
draw
where
names :: [Bytes]
names = TwoBitFile -> [Bytes]
getSeqnames TwoBitFile
tbf
lengths :: [Int]
lengths = (Bytes -> Int) -> [Bytes] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map (TwoBitFile -> Bytes -> Int
getSeqLength TwoBitFile
tbf) [Bytes]
names
total :: Int
total = [Int] -> Int
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum [Int]
lengths
frags :: IntMap Bytes
frags = [(Int, Bytes)] -> IntMap Bytes
forall a. [(Int, a)] -> IntMap a
I.fromList ([(Int, Bytes)] -> IntMap Bytes) -> [(Int, Bytes)] -> IntMap Bytes
forall a b. (a -> b) -> a -> b
$ [Int] -> [Bytes] -> [(Int, Bytes)]
forall a b. [a] -> [b] -> [(a, b)]
zip ((Int -> Int -> Int) -> Int -> [Int] -> [Int]
forall b a. (b -> a -> b) -> b -> [a] -> [b]
scanl Int -> Int -> Int
forall a. Num a => a -> a -> a
(+) 0 [Int]
lengths) [Bytes]
names
draw :: g -> ((Range, [Nucleotide]), g)
draw g0 :: g
g0 | Bool
good = ((Range
r', [Nucleotide]
sq), g
gn)
| Bool
otherwise = g -> ((Range, [Nucleotide]), g)
draw g
gn
where
(p0 :: Int
p0, gn :: g
gn) = Int -> g -> (Int, g)
rndInt (2Int -> Int -> Int
forall a. Num a => a -> a -> a
*Int
total) g
g0
p :: Int
p = Int
p0 Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftR` 1
(o :: Int
o,s :: Bytes
s) = ((Int, Bytes), IntMap Bytes) -> (Int, Bytes)
forall a b. (a, b) -> a
fst (((Int, Bytes), IntMap Bytes) -> (Int, Bytes))
-> ((Int, Bytes), IntMap Bytes) -> (Int, Bytes)
forall a b. (a -> b) -> a -> b
$ Maybe ((Int, Bytes), IntMap Bytes) -> ((Int, Bytes), IntMap Bytes)
forall a. HasCallStack => Maybe a -> a
fromJust (Maybe ((Int, Bytes), IntMap Bytes)
-> ((Int, Bytes), IntMap Bytes))
-> Maybe ((Int, Bytes), IntMap Bytes)
-> ((Int, Bytes), IntMap Bytes)
forall a b. (a -> b) -> a -> b
$ IntMap Bytes -> Maybe ((Int, Bytes), IntMap Bytes)
forall a. IntMap a -> Maybe ((Int, a), IntMap a)
I.maxViewWithKey (IntMap Bytes -> Maybe ((Int, Bytes), IntMap Bytes))
-> IntMap Bytes -> Maybe ((Int, Bytes), IntMap Bytes)
forall a b. (a -> b) -> a -> b
$ (IntMap Bytes, IntMap Bytes) -> IntMap Bytes
forall a b. (a, b) -> a
fst ((IntMap Bytes, IntMap Bytes) -> IntMap Bytes)
-> (IntMap Bytes, IntMap Bytes) -> IntMap Bytes
forall a b. (a -> b) -> a -> b
$ Int -> IntMap Bytes -> (IntMap Bytes, IntMap Bytes)
forall a. Int -> IntMap a -> (IntMap a, IntMap a)
I.split (Int
pInt -> Int -> Int
forall a. Num a => a -> a -> a
+1) IntMap Bytes
frags
r' :: Range
r' = (if Int -> Bool
forall a. Integral a => a -> Bool
odd Int
p0 then Range -> Range
forall k (cat :: k -> k -> *) (a :: k). Category cat => cat a a
id else Range -> Range
reverseRange) (Range -> Range) -> Range -> Range
forall a b. (a -> b) -> a -> b
$ TwoBitFile -> Range -> Range
clampPosition TwoBitFile
tbf (Range -> Range) -> Range -> Range
forall a b. (a -> b) -> a -> b
$ Position -> Int -> Range
Range (Bytes -> Int -> Position
Pos Bytes
s (Int
pInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
o)) Int
len
sq :: [Nucleotide]
sq = [Maybe Nucleotide] -> [Nucleotide]
forall a. [Maybe a] -> [a]
catMaybes ([Maybe Nucleotide] -> [Nucleotide])
-> [Maybe Nucleotide] -> [Nucleotide]
forall a b. (a -> b) -> a -> b
$ (Nucleotide -> Mask -> Maybe Nucleotide)
-> TwoBitFile -> Range -> [Maybe Nucleotide]
forall a. (Nucleotide -> Mask -> a) -> TwoBitFile -> Range -> [a]
getSubseqWith Nucleotide -> Mask -> Maybe Nucleotide
forall a. a -> Mask -> Maybe a
mask2maybe TwoBitFile
tbf Range
r'
good :: Bool
good = Range -> Int
r_length Range
r' Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
len Bool -> Bool -> Bool
&& [Nucleotide] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Nucleotide]
sq Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
len
mask2maybe :: a -> Mask -> Maybe a
mask2maybe n :: a
n None = a -> Maybe a
forall a. a -> Maybe a
Just a
n
mask2maybe n :: a
n Soft = a -> Maybe a
forall a. a -> Maybe a
Just a
n
mask2maybe _ Hard = Maybe a
forall a. Maybe a
Nothing
mask2maybe _ Both = Maybe a
forall a. Maybe a
Nothing
getFragment :: TwoBitFile -> Bytes -> Int -> Int -> U.Vector Word8
getFragment :: TwoBitFile -> Bytes -> Int -> Int -> Vector Word8
getFragment tbf :: TwoBitFile
tbf chr :: Bytes
chr p :: Int
p l :: Int
l =
case TwoBitFile -> Bytes -> Maybe TwoBitSequence
lookupSequence TwoBitFile
tbf Bytes
chr of
Nothing -> Int -> Word8 -> Vector Word8
forall a. Unbox a => Int -> a -> Vector a
U.replicate Int
l 4
Just tbs :: TwoBitSequence
tbs -> TwoBitFile -> TwoBitSequence -> Int -> Int -> Vector Word8
getFwdSubseqV TwoBitFile
tbf TwoBitSequence
tbs Int
p Int
l
getFwdSubseqV :: TwoBitFile -> TwoBitSequence -> Int -> Int -> U.Vector Word8
getFwdSubseqV :: TwoBitFile -> TwoBitSequence -> Int -> Int -> Vector Word8
getFwdSubseqV TBF{..} TBS{..} start :: Int
start len :: Int
len = Int
-> ((Int, [(Int, Int)]) -> Maybe (Word8, (Int, [(Int, Int)])))
-> (Int, [(Int, Int)])
-> Vector Word8
forall a b. Unbox a => Int -> (b -> Maybe (a, b)) -> b -> Vector a
U.unfoldrN Int
len (Int, [(Int, Int)]) -> Maybe (Word8, (Int, [(Int, Int)]))
forall a.
(Storable a, Bits a, Num a) =>
(Int, [(Int, Int)]) -> Maybe (a, (Int, [(Int, Int)]))
step (Int, [(Int, Int)])
ini
where
ini :: (Int, [(Int, Int)])
ini = (Int
start, Int -> IntMap Int -> [(Int, Int)]
takeOverlap Int
start IntMap Int
tbs_n_blocks)
step :: (Int, [(Int, Int)]) -> Maybe (a, (Int, [(Int, Int)]))
step (off :: Int
off, nbs :: [(Int, Int)]
nbs)
| Int
off Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< 0 = (a, (Int, [(Int, Int)])) -> Maybe (a, (Int, [(Int, Int)]))
forall a. a -> Maybe a
Just (4, (Int -> Int
forall a. Enum a => a -> a
succ Int
off, [(Int, Int)]
nbs))
| Int
off Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
tbs_dna_size = (a, (Int, [(Int, Int)])) -> Maybe (a, (Int, [(Int, Int)]))
forall a. a -> Maybe a
Just (4, (Int -> Int
forall a. Enum a => a -> a
succ Int
off, [(Int, Int)]
nbs))
| Bool
otherwise = case [(Int, Int)]
nbs of
[ ] -> (a, (Int, [(Int, Int)])) -> Maybe (a, (Int, [(Int, Int)]))
forall a. a -> Maybe a
Just (a
y, (Int -> Int
forall a. Enum a => a -> a
succ Int
off, [ ]))
(s :: Int
s,l :: Int
l):nbs' :: [(Int, Int)]
nbs' | Int
off Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
s -> (a, (Int, [(Int, Int)])) -> Maybe (a, (Int, [(Int, Int)]))
forall a. a -> Maybe a
Just (a
y, (Int -> Int
forall a. Enum a => a -> a
succ Int
off, [(Int, Int)]
nbs))
| Int
off Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
sInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
l -> (a, (Int, [(Int, Int)])) -> Maybe (a, (Int, [(Int, Int)]))
forall a. a -> Maybe a
Just (4, (Int -> Int
forall a. Enum a => a -> a
succ Int
off, [(Int, Int)]
nbs))
| Bool
otherwise -> (a, (Int, [(Int, Int)])) -> Maybe (a, (Int, [(Int, Int)]))
forall a. a -> Maybe a
Just (a
y, (Int -> Int
forall a. Enum a => a -> a
succ Int
off, [(Int, Int)]
nbs'))
where
x :: a
x = IO a -> a
forall a. IO a -> a
unsafeDupablePerformIO (IO a -> a) -> IO a -> a
forall a b. (a -> b) -> a -> b
$
ForeignPtr CChar -> (Ptr CChar -> IO a) -> IO a
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr ForeignPtr CChar
tbf_raw ((Ptr CChar -> IO a) -> IO a) -> (Ptr CChar -> IO a) -> IO a
forall a b. (a -> b) -> a -> b
$ \p :: Ptr CChar
p ->
Ptr CChar -> Int -> IO a
forall a b. Storable a => Ptr b -> Int -> IO a
peekByteOff Ptr CChar
p (Int
tbs_dna_offset Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
off Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftR` 2)
y :: a
y = a
x a -> Int -> a
forall a. Bits a => a -> Int -> a
`shiftR` (6 Int -> Int -> Int
forall a. Num a => a -> a -> a
- 2 Int -> Int -> Int
forall a. Num a => a -> a -> a
* (Int
off Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. 3)) a -> a -> a
forall a. Bits a => a -> a -> a
.&. 3