module Bio.Utils.Functions (
ihs
, ihs'
, scale
, filterFDR
, slideAverage
, hyperquick
, kld
, jsd
, binarySearch
, binarySearchBy
, binarySearchByBounds
, quantileNormalization
) where
import Data.Bits (shiftR)
import Data.List (foldl')
import Data.Ord (comparing)
import qualified Data.Vector as V
import qualified Data.Vector.Generic as G
import qualified Data.Vector.Unboxed as U
import qualified Data.Matrix.Unboxed as MU
import Statistics.Sample (meanVarianceUnb, mean)
import Statistics.Function (sortBy)
ihs :: Double
-> Double
-> Double
ihs !θ !x | θ == 0 = x
| otherwise = log (θ * x + sqrt (θ * θ * x * x + 1)) / θ
ihs' :: Double -> Double
ihs' = ihs 1
scale :: G.Vector v Double => v Double -> v Double
scale xs = G.map (\x -> (x m) / sqrt s) xs
where
(m,s) = meanVarianceUnb xs
filterFDR :: G.Vector v (a, Double)
=> Double
-> v (a, Double)
-> v (a, Double)
filterFDR α xs = go n . sortBy (comparing snd) $ xs
where
go rank v | rank <= 0 = G.empty
| snd (v `G.unsafeIndex` (rank1)) <= fromIntegral rank * α / fromIntegral n = G.slice 0 rank v
| otherwise = go (rank1) v
n = G.length xs
slideAverage :: (Fractional a, G.Vector v a)
=> Int
-> v a
-> v a
slideAverage k xs = G.generate n $ \i -> go xs (max 0 (ik)) (min (n1) (i+k))
where
go v i j = let l = j i + 1
in G.foldl' (+) 0 (G.unsafeSlice i l v) / fromIntegral l
n = G.length xs
hyperquick :: Int -> Int -> Int -> Int -> Double
hyperquick x m _n _N = loop (m2) s s (2*e)
where
loop !k !ak !bk !epsk
| k < _N (_nx) 1 && epsk > e =
let ck = ak / bk
k' = k + 1
jjm = invJm _n x _N k'
bk' = bk * jjm + 1
ak' = ak * jjm
espk' = fromIntegral (_N (_n x) 1 k') * (ck ak' / bk')
in loop k' ak' bk' espk'
| otherwise = 1 (ak / bk epsk / 2)
s = foldl' (\s' k -> 1 + s' * invJm _n x _N k) 1.0 [x..m2]
invJm _n _x _N _m = ( 1 fromIntegral _x / fromIntegral (_m+1) ) /
( 1 fromIntegral (_n1_x) / fromIntegral (_N1_m) )
e = 1e-20
kld :: (G.Vector v Double, G.Vector v (Double, Double)) => v Double -> v Double -> Double
kld xs ys | G.length xs /= G.length ys = error "Incompitable dimensions"
| otherwise = G.foldl' f 0 . G.zip xs $ ys
where
f acc (x, y) | x == 0 = acc
| otherwise = acc + x * (logBase 2 x logBase 2 y)
jsd :: (G.Vector v Double, G.Vector v (Double, Double)) => v Double -> v Double -> Double
jsd xs ys = 0.5 * kld xs zs + 0.5 * kld ys zs
where zs = G.zipWith (\x y -> (x + y) / 2) xs ys
binarySearch :: (G.Vector v e, Ord e)
=> v e -> e -> Int
binarySearch vec e = binarySearchByBounds compare vec e 0 $ G.length vec 1
binarySearchBy :: G.Vector v e
=> (e -> a -> Ordering) -> v e -> a -> Int
binarySearchBy cmp vec e = binarySearchByBounds cmp vec e 0 $ G.length vec 1
binarySearchByBounds :: G.Vector v e
=> (e -> a -> Ordering) -> v e -> a -> Int -> Int -> Int
binarySearchByBounds cmp vec e = loop
where
loop !l !u
| u < l = l
| otherwise = case cmp (vec G.! k) e of
LT -> loop (k+1) u
EQ -> loop l (k1)
GT -> loop l (k1)
where k = u + l `shiftR` 1
quantileNormalization :: MU.Matrix Double -> MU.Matrix Double
quantileNormalization mat = fromColumns $
map (snd . (U.unzip :: U.Vector (Int, Double) -> (U.Vector Int, U.Vector Double)) . sortBy (comparing fst)) $ MU.toColumns $
fromRows $ map f $ MU.toRows $ fromColumns $
map (sortBy (comparing snd) . U.zip (U.enumFromN 0 n)) $ MU.toColumns mat
where
n = MU.rows mat
f xs = let m = mean $ snd $ U.unzip xs
in U.map (\(i,_) -> (i,m)) xs
fromRows :: U.Unbox a => [U.Vector a] -> MU.Matrix a
fromRows = MU.fromRows
fromColumns :: U.Unbox a => [U.Vector a] -> MU.Matrix a
fromColumns = MU.fromColumns