{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskell #-}

module Bio.Data.Bed.Types
    ( BEDLike(..)
    , BEDConvert(..)
    , BED(..)
    , BED3(..)
    , NarrowPeak(..)
    , npSignal
    , npPvalue
    , npQvalue
    , npPeak
    , BroadPeak(..)
    , bpSignal
    , bpPvalue
    , bpQvalue
    , BEDExt(..)
    , _bed
    , _data
    , BEDTree
    , Sorted(..)
    ) where

import Lens.Micro
import Lens.Micro.TH (makeLensesFor)
import qualified Data.ByteString.Char8             as B
import           Data.ByteString.Lex.Integral      (packDecimal)
import           Data.Default.Class                (Default (..))
import           Data.Double.Conversion.ByteString (toShortest)
import qualified Data.HashMap.Strict               as M
import qualified Data.IntervalMap.Strict           as IM
import           Data.Maybe                        (fromJust, fromMaybe)

import           Bio.Utils.Misc                    (readDouble, readInt)

readDoubleNonnegative :: B.ByteString -> Maybe Double
readDoubleNonnegative x | v < 0 = Nothing
                        | otherwise = Just v
  where
    v = readDouble x
{-# INLINE readDoubleNonnegative #-}

readIntNonnegative :: B.ByteString -> Maybe Int
readIntNonnegative x | v < 0 = Nothing
                     | otherwise = Just v
  where
    v = readInt x
{-# INLINE readIntNonnegative #-}

-- | A class representing BED-like data, e.g., BED3, BED6 and BED12. BED format
-- uses 0-based index (see documentation).
class BEDLike b where
    -- | Field lens
    chrom :: Lens' b B.ByteString
    chromStart :: Lens' b Int
    chromEnd :: Lens' b Int
    name :: Lens' b (Maybe B.ByteString)
    score :: Lens' b (Maybe Int)
    strand :: Lens' b (Maybe Bool)

    -- | Return the size of a bed region.
    size :: b -> Int
    size bed = bed^.chromEnd - bed^.chromStart
    {-# INLINE size #-}

    {-# MINIMAL chrom, chromStart, chromEnd, name, score, strand #-}

class BEDLike b => BEDConvert b where
    -- | Construct bed record from chromsomoe, start location and end location
    asBed :: B.ByteString -> Int -> Int -> b

    -- | Convert bytestring to bed format
    fromLine :: B.ByteString -> b

    -- | Convert bed to bytestring
    toLine :: b -> B.ByteString

    convert :: BEDLike b' => b' -> b
    convert bed = asBed (bed^.chrom) (bed^.chromStart) (bed^.chromEnd)
    {-# INLINE convert #-}

    {-# MINIMAL asBed, fromLine, toLine #-}

-- * BED6 format

-- | BED6 format, as described in http://genome.ucsc.edu/FAQ/FAQformat.html#format1.7
data BED = BED
    { _bed_chrom      :: !B.ByteString
    , _bed_chromStart :: !Int
    , _bed_chromEnd   :: !Int
    , _bed_name       :: !(Maybe B.ByteString)
    , _bed_score      :: !(Maybe Int)
    , _bed_strand     :: !(Maybe Bool)  -- ^ True: "+", False: "-"
    } deriving (Eq, Show, Read)

instance Ord BED where
    compare (BED x1 x2 x3 x4 x5 x6) (BED y1 y2 y3 y4 y5 y6) =
        compare (x1,x2,x3,x4,x5,x6) (y1,y2,y3,y4,y5,y6)

instance BEDLike BED where
    chrom = lens _bed_chrom (\bed x -> bed { _bed_chrom = x })
    chromStart = lens _bed_chromStart (\bed x -> bed { _bed_chromStart = x })
    chromEnd = lens _bed_chromEnd (\bed x -> bed { _bed_chromEnd = x })
    name = lens _bed_name (\bed x -> bed { _bed_name = x })
    score = lens _bed_score (\bed x -> bed { _bed_score = x })
    strand = lens _bed_strand (\bed x -> bed { _bed_strand = x })

instance BEDConvert BED where
    asBed chr s e = BED chr s e Nothing Nothing Nothing

    fromLine l = f $ take 6 $ B.split '\t' l
      where
        f [f1,f2,f3,f4,f5,f6] = BED f1 (readInt f2) (readInt f3) (getName f4)
            (getScore f5) (getStrand f6)
        f [f1,f2,f3,f4,f5] = BED f1 (readInt f2) (readInt f3) (getName f4)
            (getScore f5) Nothing
        f [f1,f2,f3,f4] = BED f1 (readInt f2) (readInt f3) (getName f4)
            Nothing Nothing
        f [f1,f2,f3] = asBed f1 (readInt f2) (readInt f3)
        f _ = error "Read BED fail: Not enough fields!"
        getName x | x == "." = Nothing
                  | otherwise = Just x
        getScore x | x == "." = Nothing
                   | s >= 0 && s <= 1000 = Just s
                   | otherwise = error "score must be in [0, 1000]."
          where
            s = readInt x
        getStrand str | str == "-" = Just False
                      | str == "+" = Just True
                      | otherwise = Nothing
    {-# INLINE fromLine #-}

    toLine (BED f1 f2 f3 f4 f5 f6) = B.intercalate "\t"
        [ f1, fromJust $ packDecimal f2, fromJust $ packDecimal f3
        , fromMaybe "." f4, score', strand' ]
      where
        strand' | f6 == Just True = "+"
                | f6 == Just False = "-"
                | otherwise = "."
        score' = case f5 of
                     Just x -> fromJust $ packDecimal x
                     _      -> "."
    {-# INLINE toLine #-}

    convert bed = BED (bed^.chrom) (bed^.chromStart) (bed^.chromEnd) (bed^.name)
                      (bed^.score) (bed^.strand)

-- * BED3 format

data BED3 = BED3
    { _bed3_chrom       :: !B.ByteString
    , _bed3_chrom_start :: !Int
    , _bed3_chrom_end   :: !Int
    } deriving (Eq, Show, Read)

instance Ord BED3 where
    compare (BED3 x1 x2 x3) (BED3 y1 y2 y3) = compare (x1,x2,x3) (y1,y2,y3)

instance BEDLike BED3 where
    chrom = lens _bed3_chrom (\bed x -> bed { _bed3_chrom = x })
    chromStart = lens _bed3_chrom_start (\bed x -> bed { _bed3_chrom_start = x })
    chromEnd = lens _bed3_chrom_end (\bed x -> bed { _bed3_chrom_end = x })
    name = lens (const Nothing) (\bed _ -> bed)
    score = lens (const Nothing) (\bed _ -> bed)
    strand = lens (const Nothing) (\bed _ -> bed)

instance BEDConvert BED3 where
    asBed = BED3

    fromLine l = case B.split '\t' l of
                    (a:b:c:_) -> BED3 a (readInt b) $ readInt c
                    _ -> error "Read BED fail: Incorrect number of fields"
    {-# INLINE fromLine #-}

    toLine (BED3 a b c) = B.intercalate "\t"
        [a, fromJust $ packDecimal b, fromJust $ packDecimal c]
    {-# INLINE toLine #-}

-- | ENCODE narrowPeak format: https://genome.ucsc.edu/FAQ/FAQformat.html#format12
data NarrowPeak = NarrowPeak
    { _npChrom  :: !B.ByteString
    , _npStart  :: !Int
    , _npEnd    :: !Int
    , _npName   :: !(Maybe B.ByteString)
    , _npScore  :: !Int
    , _npStrand :: !(Maybe Bool)
    , _npSignal  :: !Double
    , _npPvalue :: !(Maybe Double)
    , _npQvalue :: !(Maybe Double)
    , _npPeak   :: !(Maybe Int)
    } deriving (Eq, Show, Read)

makeLensesFor [ ("_npSignal", "npSignal")
              , ("_npPvalue", "npPvalue")
              , ("_npQvalue", "npQvalue")
              , ("_npPeak", "npPeak")
              ] ''NarrowPeak

instance BEDLike NarrowPeak where
    chrom = lens _npChrom (\bed x -> bed { _npChrom = x })
    chromStart = lens _npStart (\bed x -> bed { _npStart = x })
    chromEnd = lens _npEnd (\bed x -> bed { _npEnd = x })
    name = lens _npName (\bed x -> bed { _npName = x })
    score = lens (Just . _npScore) (\bed x -> bed { _npScore = fromJust x })
    strand = lens _npStrand (\bed x -> bed { _npStrand = x })

instance BEDConvert NarrowPeak where
    asBed chr s e = NarrowPeak chr s e Nothing 0 Nothing 0 Nothing Nothing Nothing

    fromLine = go . B.split '\t'
      where
        go [a,b,c] = convert $ BED3 a (readInt b) $ readInt c
        go (a:b:c:d:e:f:g:h:i:j:_) = NarrowPeak a (readInt b) (readInt c)
            (if d == "." then Nothing else Just d)
            (readInt e)
            (if f == "." then Nothing else if f == "+" then Just True else Just False)
            (readDouble g)
            (readDoubleNonnegative h)
            (readDoubleNonnegative i)
            (readIntNonnegative j)
    {-# INLINE fromLine #-}

    toLine (NarrowPeak a b c d e f g h i j) = B.intercalate "\t"
        [ a, fromJust $ packDecimal b, fromJust $ packDecimal c, fromMaybe "." d
        , fromJust $ packDecimal e
        , case f of
            Nothing   -> "."
            Just True -> "+"
            _         -> "-"
        , toShortest g, fromMaybe "-1" $ fmap toShortest h
        , fromMaybe "-1" $ fmap toShortest i
        , fromMaybe "-1" $ fmap (fromJust . packDecimal) j
        ]
    {-# INLINE toLine #-}

    convert bed = NarrowPeak (bed^.chrom) (bed^.chromStart) (bed^.chromEnd) (bed^.name)
        (fromMaybe 0 $ bed^.score) (bed^.strand) 0 Nothing Nothing Nothing

-- | ENCODE broadPeak format: https://genome.ucsc.edu/FAQ/FAQformat.html#format13
data BroadPeak = BroadPeak
    { _bpChrom  :: !B.ByteString
    , _bpStart  :: !Int
    , _bpEnd    :: !Int
    , _bpName   :: !(Maybe B.ByteString)
    , _bpScore  :: !Int
    , _bpStrand :: !(Maybe Bool)
    , _bpSignal  :: !Double
    , _bpPvalue :: !(Maybe Double)
    , _bpQvalue :: !(Maybe Double)
    } deriving (Eq, Show, Read)

makeLensesFor [ ("_bpSignal", "bpSignal")
              , ("_bpPvalue", "bpPvalue")
              , ("_bpQvalue", "bpQvalue")
              ] ''BroadPeak

instance BEDLike BroadPeak where
    chrom = lens _bpChrom (\bed x -> bed { _bpChrom = x })
    chromStart = lens _bpStart (\bed x -> bed { _bpStart = x })
    chromEnd = lens _bpEnd (\bed x -> bed { _bpEnd = x })
    name = lens _bpName (\bed x -> bed { _bpName = x })
    score = lens (Just . _bpScore) (\bed x -> bed { _bpScore = fromJust x })
    strand = lens _bpStrand (\bed x -> bed { _bpStrand = x })

instance BEDConvert BroadPeak where
    asBed chr s e = BroadPeak chr s e Nothing 0 Nothing 0 Nothing Nothing

    fromLine l = BroadPeak a (readInt b) (readInt c)
        (if d == "." then Nothing else Just d)
        (readInt e)
        (if f == "." then Nothing else if f == "+" then Just True else Just False)
        (readDouble g)
        (readDoubleNonnegative h)
        (readDoubleNonnegative i)
      where
        (a:b:c:d:e:f:g:h:i:_) = B.split '\t' l
    {-# INLINE fromLine #-}

    toLine (BroadPeak a b c d e f g h i) = B.intercalate "\t"
        [ a, fromJust $ packDecimal b, fromJust $ packDecimal c, fromMaybe "." d
        , fromJust $ packDecimal e
        , case f of
            Nothing   -> "."
            Just True -> "+"
            _         -> "-"
        , toShortest g, fromMaybe "-1" $ fmap toShortest h
        , fromMaybe "-1" $ fmap toShortest i
        ]
    {-# INLINE toLine #-}

    convert bed = BroadPeak (bed^.chrom) (bed^.chromStart) (bed^.chromEnd) (bed^.name)
        (fromMaybe 0 $ bed^.score) (bed^.strand) 0 Nothing Nothing


data BEDExt bed a = BEDExt
    { _ext_bed :: bed
    , _ext_data :: a
    } deriving (Eq, Show, Read)

makeLensesFor [("_ext_bed", "_bed"), ("_ext_data", "_data")] ''BEDExt

instance BEDLike bed => BEDLike (BEDExt bed a) where
    chrom = _bed . chrom
    chromStart = _bed . chromStart
    chromEnd = _bed . chromEnd
    name = _bed . name
    score = _bed . score
    strand = _bed . strand

instance (Default a, Read a, Show a, BEDConvert bed) => BEDConvert (BEDExt bed a) where
    asBed chr s e = BEDExt (asBed chr s e) def

    fromLine l = let (a, b) = B.breakEnd (=='\t') l
                 in BEDExt (fromLine $ B.init a) $ read $ B.unpack b
    {-# INLINE fromLine #-}

    toLine (BEDExt bed a) = toLine bed <> "\t" <> B.pack (show a)
    {-# INLINE toLine #-}

type BEDTree a = M.HashMap B.ByteString (IM.IntervalMap Int a)

-- | a type to imply that underlying data structure is sorted
newtype Sorted b = Sorted {fromSorted :: b} deriving (Show, Read, Eq)