{-# LANGUAGE FlexibleInstances #-}

module Bio.SeqLoc.SeqLike
       ( SeqLike(..)
       )
       where 

import Prelude hiding (length, concat)

import qualified Data.ByteString.Char8 as BS
import qualified Data.ByteString.Lazy.Char8 as LBS
import qualified Data.List as L
import Data.Maybe

class SeqLike s where
  -- | Length of sequence data
  length :: (Integral n) => s -> n
  
  -- | 'Just' the nucleotide at a specified sequence data offset,
  -- given in 0-based coordinates, or 'Nothing' if the offset is
  -- beyond the bounds of the data
  ntAt :: (Integral n) => s -> n -> Maybe Char
  
  -- | 'Just' the nucleotides in subsequence of the sequence data, or
  -- 'Nothing' if the region extends beyond the bounds of the
  -- sequence.
  subseq :: (Integral n, Integral m) 
            => n -- ^ Starting position in 0-based coordinates
            -> m -- ^ Length
            -> s -- ^ Sequence data
            -> Maybe s
            
  -- | Nucleotides in a subsequence of the sequence data, padded with
  -- @N@ when the region extends beyond the bounds of the sequence.
  subseqPad :: (Integral n, Integral m)
               => n -- ^ Starting position in 0-based coordinates
               -> m -- ^ Length
               -> s -- ^ Sequence data
               -> s
  
  concat :: [s] -> s

instance SeqLike [Char] where
  length = L.genericLength
  ntAt l p | p < 0 = Nothing
           | otherwise = listToMaybe . drop (fromIntegral p) $ l
  subseq = listSubseq
  subseqPad = listSubseqPad
  concat = L.concat
  
listSubseq :: (Integral n, Integral m) => n -> m -> [Char] -> Maybe [Char]
listSubseq start len l | start < 0 = Nothing
                       | otherwise = case take (fromIntegral len) . drop (fromIntegral start) $ l of 
                         ss | L.genericLength ss == len -> Just ss
                            | otherwise -> Nothing

listSubseqPad :: (Integral n, Integral m) => n -> m -> [Char] -> [Char]
listSubseqPad istart ilen sequ
  | start + len <= 0 = replicate len 'N'
  | start < 0 = replicate (negate start) 'N' ++ takePadded (len + start) sequ
  | otherwise = takePadded len . drop start $ sequ
    where takePadded sublen subsequ = case take sublen subsequ of
            subsubsequ | length subsubsequ == sublen -> subsubsequ
                       | otherwise -> subsubsequ ++ replicate (sublen - length subsubsequ) 'N'
          start = fromIntegral istart
          len = fromIntegral ilen

instance SeqLike BS.ByteString where      
  length = fromIntegral . BS.length
  ntAt sequ ipos | pos >= 0 && pos < BS.length sequ = Just $ BS.index sequ pos
                 | otherwise = Nothing
                  where pos = fromIntegral ipos
  subseq = bsSubseq
  subseqPad = bsSubseqPad
  concat = BS.concat
  
bsSubseq :: (Integral n, Integral m) => n -> m -> BS.ByteString -> Maybe BS.ByteString
bsSubseq istart ilen sequ
  | start < 0 || start + len > BS.length sequ = Nothing
  | otherwise = Just . BS.take len . BS.drop start $ sequ
    where start = fromIntegral istart
          len = fromIntegral ilen

bsSubseqPad :: (Integral n, Integral m) => n -> m -> BS.ByteString -> BS.ByteString                
bsSubseqPad istart ilen sequ
  | start + len <= 0        = BS.replicate len 'N'
  | start >= BS.length sequ = BS.replicate len 'N'
  | start < 0  = BS.replicate (negate start) 'N' `BS.append` takePadded (len + start) sequ
  | otherwise = takePadded len $ BS.drop start sequ
    where takePadded sublen subsequ
            | sublen <= BS.length subsequ = BS.take sublen subsequ
            | otherwise = subsequ `BS.append` BS.replicate (sublen - BS.length subsequ) 'N'
          start = fromIntegral istart
          len = fromIntegral ilen

instance SeqLike LBS.ByteString where
    length = fromIntegral . LBS.length
    ntAt = lbsNtAt
    subseq = lbsSubseq
    subseqPad = lbsSubseqPad
    concat = LBS.concat
    
lbsNtAt :: (Integral n) => LBS.ByteString -> n -> Maybe Char
lbsNtAt sequ pos | pos < 0 = Nothing
                 | otherwise = case LBS.drop (fromIntegral pos) sequ of
                   trimmed | LBS.null trimmed -> Nothing
                           | otherwise -> Just . LBS.head $ trimmed

lbsSubseq :: (Integral n, Integral m) => n -> m -> LBS.ByteString -> Maybe LBS.ByteString
lbsSubseq istart ilen sequ
  | start < 0 = Nothing
  | otherwise = case LBS.take len . LBS.drop start $ sequ of
    subsequ | LBS.length subsequ < len -> Nothing
            | otherwise -> Just subsequ
    where start = fromIntegral istart
          len = fromIntegral ilen
    
lbsSubseqPad :: (Integral n, Integral m) => n -> m -> LBS.ByteString -> LBS.ByteString
lbsSubseqPad istart ilen sequ
  | start + len <= 0 = LBS.replicate len 'N'
  | start < 0  = LBS.replicate (negate start) 'N' `LBS.append` takePadded (len + start) sequ
  | otherwise = takePadded len $ LBS.drop start sequ
    where takePadded sublen subsequ = case LBS.take sublen subsequ of
            subsubsequ | LBS.length subsubsequ == sublen -> subsubsequ
                       | otherwise -> subsubsequ `LBS.append` LBS.replicate (sublen - LBS.length subsubsequ) 'N'
          start = fromIntegral istart
          len = fromIntegral ilen