module Bio.Base(
    Nucleotide(..), Nucleotides(..),
    Qual(..), toQual, fromQual, fromQualRaised, probToQual,
    Prob(..), toProb, fromProb, qualToProb, pow,
    Word8,
    nucA, nucC, nucG, nucT,
    nucsA, nucsC, nucsG, nucsT, nucsN, gap,
    toNucleotide, toNucleotides, nucToNucs,
    showNucleotide, showNucleotides,
    isGap,
    isBase,
    isProperBase,
    properBases,
    compl, compls,
    everything,
    Seqid,
    unpackSeqid,
    packSeqid,
    Position(..),
    shiftPosition,
    p_is_reverse,
    Range(..),
    shiftRange,
    reverseRange,
    extendRange,
    insideRange,
    wrapRange,
    w2c,
    c2w,
    findAuxFile
) where
import Bio.Util                     ( log1p )
import Data.Array.Unboxed
import Data.Bits
import Data.ByteString.Internal     ( c2w, w2c )
import Data.Char                    ( isAlpha, isSpace, ord, toUpper )
import Data.Word                    ( Word8 )
import Data.Vector.Generic          ( Vector(..) )
import Data.Vector.Generic.Mutable  ( MVector(..) )
import Data.Vector.Unboxed.Deriving
import Foreign.Storable             ( Storable(..) )
import Numeric                      ( showFFloat )
import System.Directory             ( doesFileExist )
import System.FilePath              ( (</>), isAbsolute, splitSearchPath )
import System.Environment           ( getEnvironment )
import qualified Data.ByteString.Char8 as S
newtype Nucleotide = N { unN :: Word8 } deriving ( Eq, Ord, Enum, Ix, Storable )
derivingUnbox "Nucleotide" [t| Nucleotide -> Word8 |] [| unN |] [| N |]
instance Bounded Nucleotide where
    minBound = N 0
    maxBound = N 3
everything :: (Bounded a, Ix a) => [a]
everything = range (minBound, maxBound)
newtype Nucleotides = Ns { unNs :: Word8 } deriving ( Eq, Ord, Enum, Ix, Storable )
derivingUnbox "Nucleotides" [t| Nucleotides -> Word8 |] [| unNs |] [| Ns |]
instance Bounded Nucleotides where
    minBound = Ns  0
    maxBound = Ns 15
nucToNucs :: Nucleotide -> Nucleotides
nucToNucs (N x) = Ns $ 1 `shiftL` fromIntegral (x .&. 3)
newtype Qual = Q { unQ :: Word8 } deriving ( Eq, Ord, Storable, Bounded )
derivingUnbox "Qual" [t| Qual -> Word8 |] [| unQ |] [| Q |]
instance Show Qual where
    showsPrec p (Q q) = (:) 'q' . showsPrec p q
toQual :: (Floating a, RealFrac a) => a -> Qual
toQual a = Q $ round (10 * log a / log 10)
fromQual :: Qual -> Double
fromQual (Q q) = 10 ** ( fromIntegral q / 10)
fromQualRaised :: Double -> Qual -> Double
fromQualRaised k (Q q) = 10 ** ( k * fromIntegral q / 10)
newtype Prob = Pr { unPr :: Double } deriving ( Eq, Ord, Storable )
derivingUnbox "Prob" [t| Prob -> Double |] [| unPr |] [| Pr |]
instance Show Prob where
    showsPrec _ (Pr p) = (:) 'q' . showFFloat (Just 1) q
      where q =  10 * p / log 10
instance Num Prob where
    fromInteger a = Pr (log (fromInteger a))
    Pr x + Pr y = Pr $ if x >= y then x + log1p (  exp (yx)) else y + log1p (exp (xy))
    Pr x  Pr y = Pr $ if x >= y then x + log1p ( exp (yx)) else error "no negative error probabilities"
    Pr a * Pr b = Pr $ a + b
    negate    _ = Pr $ error "no negative error probabilities"
    abs       x = x
    signum    _ = Pr 0
instance Fractional Prob where
    fromRational a = Pr (log (fromRational a))
    Pr a  /  Pr b = Pr (a  b)
    recip  (Pr a) = Pr (negate a)
infixr 8 `pow`
pow :: Prob -> Double -> Prob
pow (Pr a) e = Pr (a*e)
toProb :: Double -> Prob
toProb p = Pr (log p)
fromProb :: Prob -> Double
fromProb (Pr q) = exp q
qualToProb :: Qual -> Prob
qualToProb (Q q) = Pr ( log 10 * fromIntegral q / 10)
probToQual :: Prob -> Qual
probToQual (Pr p) = Q (round ( 10 * p / log 10))
nucA, nucC, nucG, nucT :: Nucleotide
nucA = N 0
nucC = N 1
nucG = N 2
nucT = N 3
gap, nucsA, nucsC, nucsG, nucsT, nucsN :: Nucleotides
gap   = Ns 0
nucsA = Ns 1
nucsC = Ns 2
nucsG = Ns 4
nucsT = Ns 8
nucsN = Ns 15
type Seqid = S.ByteString
unpackSeqid :: Seqid -> String
unpackSeqid = S.unpack
packSeqid :: String -> Seqid
packSeqid = S.pack
data Position = Pos {
        p_seq   ::  !Seqid,   
        p_start ::  !Int      
    } deriving (Show, Eq, Ord)
p_is_reverse :: Position -> Bool
p_is_reverse = (< 0) . p_start
data Range = Range {
        r_pos    ::  !Position,
        r_length ::  !Int
    } deriving (Show, Eq, Ord)
toNucleotide :: Char -> Nucleotide
toNucleotide c = if inRange (bounds arr) (ord c) then N (arr ! ord c) else N 0
  where
    arr :: UArray Int Word8
    arr = listArray (0,127) (repeat 0) //
          ( [ (ord          x,  n) | (x, N n) <- pairs ] ++
            [ (ord (toUpper x), n) | (x, N n) <- pairs ] )
    pairs = [ ('a', nucA), ('c', nucC), ('g', nucG), ('t', nucT) ]
toNucleotides :: Char -> Nucleotides
toNucleotides c = if inRange (bounds arr) (ord c) then Ns (arr ! ord c) else nucsN
  where
    arr :: UArray Int Word8
    arr = listArray (0,127) (repeat (unNs nucsN)) //
          ( [ (ord          x,  n) | (x, Ns n) <- pairs ] ++
            [ (ord (toUpper x), n) | (x, Ns n) <- pairs ] )
    Ns a `plus` Ns b = Ns (a .|. b)
    pairs = [ ('a', nucsA), ('c', nucsC), ('g', nucsG), ('t', nucsT),
              ('u', nucsT), ('-', gap),  ('.', gap),
              ('b', nucsC `plus` nucsG `plus` nucsT),
              ('d', nucsA `plus` nucsG `plus` nucsT),
              ('h', nucsA `plus` nucsC `plus` nucsT),
              ('v', nucsA `plus` nucsC `plus` nucsG),
              ('k', nucsG `plus` nucsT),
              ('m', nucsA `plus` nucsC),
              ('s', nucsC `plus` nucsG),
              ('w', nucsA `plus` nucsT),
              ('r', nucsA `plus` nucsG),
              ('y', nucsC `plus` nucsT) ]
isBase :: Nucleotides -> Bool
isBase (Ns n) = n /= 0
isProperBase :: Nucleotides -> Bool
isProperBase x = x == nucsA || x == nucsC || x == nucsG || x == nucsT
properBases :: [ Nucleotides ]
properBases = [ nucsA, nucsC, nucsG, nucsT ]
isGap :: Nucleotides -> Bool
isGap x = x == gap
showNucleotide :: Nucleotide -> Char
showNucleotide (N x) = S.index str $ fromIntegral $ x .&. 3
  where str = S.pack "ACGT"
showNucleotides :: Nucleotides -> Char
showNucleotides (Ns x) = S.index str $ fromIntegral $ x .&. 15
  where str = S.pack "-ACMGRSVTWYHKDBN"
instance Show Nucleotide where
    show     x = [ showNucleotide x ]
    showList l = (map showNucleotide l ++)
instance Read Nucleotide where
    readsPrec _ ('a':cs) = [(nucA, cs)]
    readsPrec _ ('A':cs) = [(nucA, cs)]
    readsPrec _ ('c':cs) = [(nucC, cs)]
    readsPrec _ ('C':cs) = [(nucC, cs)]
    readsPrec _ ('g':cs) = [(nucG, cs)]
    readsPrec _ ('G':cs) = [(nucG, cs)]
    readsPrec _ ('t':cs) = [(nucT, cs)]
    readsPrec _ ('T':cs) = [(nucT, cs)]
    readsPrec _ ('u':cs) = [(nucT, cs)]
    readsPrec _ ('U':cs) = [(nucT, cs)]
    readsPrec _     _    = [          ]
    readList ('-':cs) = readList cs
    readList (c:cs) | isSpace c = readList cs
                    | otherwise = case readsPrec 0 (c:cs) of
                            [] -> [ ([],c:cs) ]
                            xs -> [ (n:ns,r2) | (n,r1) <- xs, (ns,r2) <- readList r1 ]
    readList [] = [([],[])]
instance Show Nucleotides where
    show     x = [ showNucleotides x ]
    showList l = (map showNucleotides l ++)
instance Read Nucleotides where
    readsPrec _ (c:cs) = [(toNucleotides c, cs)]
    readsPrec _ [    ] = []
    readList s = let (hd,tl) = span (\c -> isAlpha c || isSpace c || '-' == c) s
                 in [(map toNucleotides $ filter (not . isSpace) hd, tl)]
compl :: Nucleotide -> Nucleotide
compl (N n) = N $ n `xor` 3
compls :: Nucleotides -> Nucleotides
compls (Ns x) = Ns $ arr ! (x .&. 15)
  where
    arr :: UArray Word8 Word8
    !arr = listArray (0,15) [ 0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15 ]
shiftPosition :: Int -> Position -> Position
shiftPosition a p = p { p_start = p_start p + a }
shiftRange :: Int -> Range -> Range
shiftRange a r = r { r_pos = shiftPosition a (r_pos r) }
reverseRange :: Range -> Range
reverseRange (Range (Pos sq pos) len) = Range (Pos sq (poslen)) len
extendRange :: Int -> Range -> Range
extendRange a r = r { r_length = r_length r + a }
insideRange :: Range -> Range -> Range
insideRange r1@(Range (Pos _ start1) length1) r2@(Range (Pos sq start2) length2)
    | start2 < 0         = reverseRange (insideRange r1 (reverseRange r2))
    | start1 <= length2  = Range (Pos sq (start2 + start1)) (min length1 (length2  start1))
    | otherwise          = Range (Pos sq (start2 + length2)) 0
wrapRange :: Int -> Range -> Range
wrapRange n (Range (Pos sq s) l) = Range (Pos sq (s `mod` n)) l
findAuxFile :: FilePath -> IO FilePath
findAuxFile fn | isAbsolute fn = return fn
               | otherwise = loop . maybe ["."] splitSearchPath . lookup "BIOHAZARD" =<< getEnvironment
  where
    loop [    ] = return fn
    loop (p:ps) = do e <- doesFileExist $ p </> fn
                     if e then return $ p </> fn else loop ps