{-# LANGUAGE NamedFieldPuns, RecordWildCards, DeriveDataTypeable #-}
{-# OPTIONS_HADDOCK prune #-}
module Language.Pads.Source where
import qualified Data.ByteString as B   
import qualified Text.Regex.Posix as TRP
import Language.Pads.RegExp                        
import Text.PrettyPrint.Mainland as PP
import Text.PrettyPrint.Mainland.Class
import Data.Int
import Data.Data
import Data.Word
import Data.Char
import Data.Bits (shiftR, shiftL, (.&.))
import qualified Data.ByteString.Char8 as Char8
type RawStream = B.ByteString   
data Source = Source
  { current  :: B.ByteString      
  , rest     :: B.ByteString      
  , loc      :: Loc               
  , bit      :: Int               
  , disc     :: RecordDiscipline  
  , eorAtEOF :: Bool  
                      
  }
data RecordDiscipline =
    Single Word8          
  | Multi B.ByteString    
  | Bytes Int             
  | NoPartition           
  | NoDiscipline          
newline = Single (chrToWord8 '\n')
windows = Multi  (B.pack (strToWord8s "\r\n"))
bytes n = Bytes n
none    = NoPartition
data Loc = Loc
  { recordNumber  :: Int64 
  , byteOffset    :: Int64 
  } deriving (Typeable, Data,Eq, Ord, Show)
data Span = Span
  { begin      :: Loc       
  , end        :: Maybe Loc 
  } deriving (Typeable, Data, Eq, Ord, Show)
zeroLoc   = Loc {recordNumber = 0,  byteOffset = 0}
zeroSpan = locToSpan zeroLoc
zeroBit = 7
incRecordNumber :: Loc -> Loc
incRecordNumber Loc{recordNumber, ..} = Loc{ recordNumber = recordNumber+1
                                       , byteOffset = 0}
decLineNumber :: Loc -> Loc
decLineNumber Loc{recordNumber, ..} = Loc{recordNumber=recordNumber-1, byteOffset=0}
incOffset :: Loc -> Loc
incOffset l@Loc{byteOffset} = l { byteOffset = byteOffset + 1 }
incOffsetBy :: Loc -> Int -> Loc
incOffsetBy l@Loc{byteOffset} n = l { byteOffset = byteOffset + fromIntegral n }
decOffset :: Loc -> Loc
decOffset l@Loc{byteOffset} = l { byteOffset = byteOffset - 1 }
getSrcLoc :: Source -> Loc
getSrcLoc = loc
getRecordDiscipline :: Source -> RecordDiscipline
getRecordDiscipline = disc
emptySource = Source
  { current   = B.empty
  , rest      = B.empty
  , loc       = zeroLoc
  , bit       = zeroBit
  , eorAtEOF  = False
  , disc      = newline
  }
padsSourceFromString :: String -> Source
padsSourceFromString str = padsSourceFromByteString (strToByteString str)
padsSourceFromStringWithDisc :: RecordDiscipline -> String -> Source
padsSourceFromStringWithDisc d str = padsSourceFromByteStringWithDisc d (strToByteString str)
padsSourceFromFile :: FilePath -> IO Source
padsSourceFromFile file = do
  bs <- B.readFile file
  return (padsSourceFromByteString bs)
padsSourceFromFileWithDisc :: RecordDiscipline -> FilePath -> IO Source
padsSourceFromFileWithDisc d file = do
  bs <- B.readFile file
  return (padsSourceFromByteStringWithDisc d bs)
padsSourceFromByteString :: B.ByteString -> Source
padsSourceFromByteString bs =
    let rawSource = Source{ current  = B.empty
                          , rest     = bs
                          , loc      = zeroLoc
                          , bit      = zeroBit
                          , disc     = newline
                          , eorAtEOF = False
                          }
    in getNextRecord rawSource
padsSourceFromByteStringWithDisc :: RecordDiscipline -> B.ByteString -> Source
padsSourceFromByteStringWithDisc d bs =
    let rawSource = Source{ current  = B.empty
                          , rest     = bs
                          , loc      = zeroLoc
                          , bit      = zeroBit
                          , disc     = d
                          , eorAtEOF = False
                          }
    in getNextRecord rawSource
isEOF :: Source -> Bool
isEOF (s @ Source{current, rest, eorAtEOF, ..}) = B.null current && B.null rest && not eorAtEOF
isEOR :: Source -> Bool
isEOR = B.null . current
getNextRecord :: Source -> Source
getNextRecord (s @ Source {current, rest, loc, bit, disc, eorAtEOF}) =
      if isEOF s then s
      else if eorAtEOF || B.null rest then
            (Source {current = B.empty, rest = B.empty, loc = incRecordNumber loc, bit = zeroBit, disc, eorAtEOF = False})
      else  (Source {current = nextLine, rest=residual, loc = incRecordNumber loc, bit = zeroBit, disc, eorAtEOF = eorAtEOF'}) 
        where (nextLine, residual, eorAtEOF') = breakUsingDisc rest disc
srcLineBegin :: Source -> (Maybe String, Source)
srcLineBegin s = (Nothing, s)
srcLineEnd :: Source -> (Maybe String, Source)
srcLineEnd s = if isEOF s
     then (Just "Found EOF when looking for EOR", s)
     else (Nothing, getNextRecord s)
setRecordDiscipline :: RecordDiscipline -> Source -> ((),Source)
setRecordDiscipline r s =
  let s'   = unputCurrentLine s
      s'' = s'{disc = r}
  in ((),getNextRecord s'')
unputCurrentLine :: Source -> Source
unputCurrentLine (s @ Source {current, rest, loc, disc, eorAtEOF}) =
      if isEOF s then s
      else case disc of
        Single n -> let rest' = if B.null rest
                                then if eorAtEOF
                                     then B.concat [current, B.singleton n]
                                     else current
                                else B.concat [current, B.singleton n, rest]
                        loc'  = if B.null current then loc else decLineNumber loc
                    in Source {current = B.empty, rest = rest', loc = loc', bit = zeroBit, disc = NoDiscipline, eorAtEOF = False}
        Multi  br -> let rest' = if B.null rest
                                then if eorAtEOF
                                     then B.concat [current, br]
                                     else current
                                else B.concat [current, rest]
                         loc'  = if B.null current then loc else decLineNumber loc
                    in Source {current = B.empty, rest = rest', loc = loc', bit = zeroBit, disc = NoDiscipline, eorAtEOF = False}
        Bytes n -> Source {current = B.empty, rest = B.append current rest, loc = decLineNumber loc, bit = zeroBit, disc = NoDiscipline, eorAtEOF = False}
        NoPartition -> Source {current = B.empty, rest = current, loc = decLineNumber loc, bit = zeroBit, disc = NoDiscipline, eorAtEOF = False}
        NoDiscipline -> s
breakUsingDisc :: B.ByteString -> RecordDiscipline -> (B.ByteString, B.ByteString, Bool)
breakUsingDisc bs rd = case rd of
  Single n -> let (nextLine, raw_residual) = B.break (\c->c == n) bs
                  residual = B.drop 1 raw_residual
                  eorAtEOF = (B.null residual) && (not $ B.null raw_residual)
              in  (nextLine, residual, eorAtEOF)
  Multi s  -> let (nextLine, raw_residual) = B.breakSubstring s bs
                  residual = B.drop (B.length s) raw_residual
                  eorAtEOF =  (B.null residual) && (not $ B.null raw_residual)
              in  (nextLine, residual, eorAtEOF)
  Bytes n ->  let (nextLine, residual) = B.splitAt n bs
              in  (nextLine, residual, False)
  NoPartition -> (bs, B.empty, False)
  NoDiscipline -> error "Pads Source: Attempt to partition source using internal discipline 'NoDiscipline'"
padsSourceToString :: Source -> String
padsSourceToString = (map word8ToChr) . B.unpack . padsSourceToByteString
padsSourceToByteString :: Source -> B.ByteString
padsSourceToByteString = rest . unputCurrentLine
drainSource :: Source -> (String, Source)
drainSource s = (padsSourceToString s, emptySource)
drainSourceNB :: Source -> (String, Source)
drainSourceNB (s @ Source{current,loc, ..}) =
    let len = (B.length current) - (if bit == zeroBit then 0 else 1)
        (bs, s') = takeBits (len * 8) s
    in (map word8ToChr (numToWord8s bs []), emptySource)
rawSource :: Source -> (B.ByteString, Source)
rawSource s = (padsSourceToByteString s, emptySource)
restRec :: Source -> String
restRec = byteStringToStr . current
head :: Source -> Char
head = word8ToChr . headOrZero . current
headOrZero s = if B.null s then chrToWord8 '\0' else B.head s
peekHeadM :: Source -> (Maybe Char, Source)
peekHeadM (s @ Source{current,loc, ..}) =
  if B.null current then (Nothing, s) else (Just (Language.Pads.Source.head s), s)
takeHead :: Source -> (Char, Source)
takeHead (s @ Source{current,loc, ..}) =
    (word8ToChr $ B.head current, s{current = B.tail current, loc = incOffset loc})
partitionBS :: Integral a => B.ByteString -> a -> a -> (B.ByteString, B.ByteString, Bool)
partitionBS bS bitIndex bits =
    let part b bs = if bs > b + 1 then 1 + part zeroBit (bs - (b + 1)) else 1
        byteAlign = (bits - (bitIndex + 1)) `mod` 8 == 0
        withinByte = bits <= bitIndex + 1
        hd = B.take (part (fromIntegral bitIndex) (fromIntegral bits)) bS
        tl = B.drop (B.length hd - if not byteAlign then 1 else 0) bS
    in  (hd, tl, withinByte || not byteAlign)
accumulate :: Integral a => a -> (a, Int) -> (a, Int)
accumulate byte (num, pow) = ((byte * (256 ^ pow)) + num, pow + 1)
takeBits8 :: Integral a => a -> Source -> (Word8, Source)
takeBits8 b (s @ Source{current,loc,bit, ..}) =
    let (hd, tl, partial) = partitionBS current bit (fromIntegral b)
        bS    = map (\x -> fromIntegral x :: Word16) (B.unpack $ B.take 2 hd)
        bytes = fst $ foldr accumulate (0,0) bS
        mask  = (2 ^ b) - 1
        bits  = mask .&. shiftR bytes ((B.length hd * 8) - (fromIntegral b) - (zeroBit - bit))
    in  (fromIntegral bits, s{current = tl,
                 loc = incOffsetBy loc (B.length hd - if partial then 1 else 0),
                 bit = (zeroBit - (((zeroBit - bit) + (fromIntegral b)) `mod` 8))})
takeBits16 :: Integral a => a -> Source -> (Word16, Source)
takeBits16 b (s @ Source{current,loc,bit, ..}) =
    let (hd, tl, partial) = partitionBS current bit (fromIntegral b)
        bS    = map (\x -> fromIntegral x :: Word32) (B.unpack $ B.take 3 hd)
        bytes = fst $ foldr accumulate (0,0) bS
        mask  = (2 ^ b) - 1
        bits  = mask .&. shiftR bytes ((B.length hd * 8) - (fromIntegral b) - (zeroBit - bit))
    in  (fromIntegral bits, s{current = tl,
                 loc = incOffsetBy loc (B.length hd - if partial then 1 else 0),
                 bit = (zeroBit - (((zeroBit - bit) + (fromIntegral b)) `mod` 8))})
takeBits32 :: Integral a => a -> Source -> (Word32, Source)
takeBits32 b (s @ Source{current,loc,bit, ..}) =
    let (hd, tl, partial) = partitionBS current bit (fromIntegral b)
        bS    = map (\x -> fromIntegral x :: Word64) (B.unpack $ B.take 5 hd)
        bytes = fst $ foldr accumulate (0,0) bS
        mask  = (2 ^ b) - 1
        bits  = mask .&. shiftR bytes ((B.length hd * 8) - (fromIntegral b) - (zeroBit - bit))
    in  (fromIntegral bits, s{current = tl,
                 loc = incOffsetBy loc (B.length hd - if partial then 1 else 0),
                 bit = zeroBit - (((zeroBit - bit) + (fromIntegral b)) `mod` 8)})
takeBits64 :: Integral a => a -> Source -> (Word64, Source)
takeBits64 b s = let (bits, s') = takeBits b s
                 in (fromIntegral bits, s')
tobinary :: Integer -> Integer
tobinary x
    | (div x 2) == 0 = x
    | otherwise = (mod x 2) + (10 * (tobinary $ div x 2))
takeBits :: Integral a => a -> Source -> (Integer, Source)
takeBits b (s @ Source{current,loc,bit, ..}) =
    let (hd, tl, partial) = partitionBS current bit (fromIntegral b)
        bS    = map fromIntegral (B.unpack hd)
        bytes = fst $ foldr accumulate (0,0) bS
        mask  = (2 ^ b) - 1
        shiftAmt = max 0 ((B.length hd * 8) - (fromIntegral b) - (zeroBit - bit))
        bits  = mask .&. shiftR bytes shiftAmt
    in  (bits, s{current = tl,
                 loc = incOffsetBy loc (B.length hd - if partial then 1 else 0),
                 bit = zeroBit - (((zeroBit - bit) + (fromIntegral b)) `mod` 8)})
takeHeadM :: Source -> (Maybe Char, Source)
takeHeadM (s @ Source{current,loc, ..}) =
  if B.null current then (Nothing, s)
  else (Just $ word8ToChr $ B.head current, s{current = B.tail current, loc = incOffset loc})
takeHeadStr :: String -> Source -> (Bool, Source)
takeHeadStr str s =
   let pstr = strToByteString str
   in if B.isPrefixOf pstr (current s)
      then let (res,source) = Language.Pads.Source.take (B.length pstr) s
            in (True, source)
      else (False, s)
matchString :: String -> Source -> Maybe(String, Source)
matchString str s =
   let pstr = strToByteString str
   in if B.isPrefixOf pstr (current s)
      then let (res,source) = Language.Pads.Source.take (B.length pstr) s
            in Just(str, source)
      else Nothing
breakSubstring :: B.ByteString 
               -> B.ByteString 
               -> (B.ByteString,B.ByteString) 
breakSubstring pat src = search 0 src
  where
    
    search :: Int -> B.ByteString -> (B.ByteString, B.ByteString)
    search a b | a `seq` b `seq` False = undefined
    search n s
        | B.null s             = (src,B.empty)      
        | pat `B.isPrefixOf` s = (B.take n src,s)
        | otherwise            = search (n+1) (B.tail s)
scanStr :: String -> Source -> (Maybe String, Source)
scanStr str (s @ Source{current,loc, ..}) =
  let pat = strToByteString str
      (before,after) = breakSubstring pat current
  in if B.null after then (Nothing, s)
     else let len = B.length pat
          in (Just (byteStringToStr before),
              s{current = B.drop len after, loc = incOffsetBy loc len})
scanString :: String -> Source -> Maybe (String, Source)
scanString str (s @ Source{current,loc, ..}) =
  let pat = strToByteString str
      (before,after) = breakSubstring pat current
  in if B.null after then Nothing
     else let len = B.length pat
          in Just (byteStringToStr before, s{current= B.drop len after, loc = incOffsetBy loc len})
satisfyNB :: (Char -> Bool) -> Source -> (String, Source)
satisfyNB p s =
    let (c, s') = takeBits8 8 s
        c' = word8ToChr c
    in  if   p c'
        then (c' : (fst $ satisfyNB p s'), snd $ satisfyNB p s')
        else ([], s)
takeBytes :: Int -> Source -> (B.ByteString, Source)
takeBytes n (s @ Source{current,loc, ..}) =
     let (head, tail) = B.splitAt n current
         incOffset    = B.length head
     in (head, s{current= tail, loc = incOffsetBy loc incOffset})
takeBytesNB :: Int -> Source -> (B.ByteString, Source)
takeBytesNB n s =
    let (bits, s') = takeBits (n * 8) s
        numToBS x = B.pack $ numToWord8s x []
    in (numToBS bits, s')
numToWord8s :: Integral a => a -> [Word8] -> [Word8]
numToWord8s x accum
    | x < 256   = fromIntegral x : accum
    | otherwise = numToWord8s (x `div` 256) (fromIntegral (x `mod` 256) : accum)
take :: Int -> Source -> (String, Source)
take n s = let (bs, s') = takeBytes n s
           in (byteStringToStr bs, s')
regexMatch :: RE -> Source -> (Maybe String, Source)
regexMatch (RE re_str_raw) (s @ Source{current,loc,..}) =
     let (before, match, after) = current TRP.=~ (strToByteString('^' : re_str_raw))
     in if not (B.null before) then (Nothing, s)   
        else  (Just (byteStringToStr match), s{current=after, loc=incOffsetBy loc (fromIntegral (B.length match))})
regexMatch (REd re_str_raw def ) s = regexMatch (RE re_str_raw) s
regexStop :: RE -> Source -> (Maybe String, Source)
regexStop (RE re_str_raw) (s @ Source{current,loc,..}) =
     let packed = strToByteString re_str_raw
         (before, match, after) = current TRP.=~ packed      
         isMatch = current TRP.=~ packed
     in if not isMatch
         then (Nothing, s)        
         else (Just (byteStringToStr before),
                s{current= B.append match after,loc=incOffsetBy loc (fromIntegral (B.length before))})
regexStop (REd re_str_raw def) s = regexStop (RE re_str_raw) s
span p (s @ Source{current,loc,..}) =
     let (head, tail) = B.span p current
         incOffset    = B.length head
     in (B.unpack head, s{current=tail, loc = incOffsetBy loc incOffset})
whileS :: (Char -> Bool) -> Source -> Maybe (String,Source)
whileS p (s @ Source{current,loc,..}) =
     let (head, tail) = B.span (p . word8ToChr) current
         incOffset    = B.length head
     in Just (byteStringToStr head, s{current=tail, loc=incOffsetBy loc incOffset})
tail  (s @ Source{current,loc,..}) =
       (s{current=B.tail current,loc=incOffset loc})
scanTo :: Char -> Source -> (Bool, Source, Span)
scanTo chr (src @ Source{current,loc, ..}) =
     let begin = getSrcLoc src
         (skipped, residual) = B.break (\c->c== (chrToWord8 chr)) current
         (found,remaining,incAmount) =
            if B.null residual then   
                 (False, residual,         B.length skipped)
            else (True,  B.tail residual, (B.length skipped) + 1)
         newLoc = incOffsetBy loc incAmount
         endErrLoc = incOffsetBy loc (B.length skipped)
      in (found,
          src {current = remaining, loc=newLoc},
          Span   {begin, end=Just endErrLoc})
lift :: (String -> [(a, String)]) -> (Source -> (Maybe a, Source))
lift f s = case f (byteStringToStr $ current s) of
  [] -> (Nothing, s)
  (x,residual):rest -> (Just x, s{current= (strToByteString residual)})
eqCurrent :: Source -> Source -> Bool
eqCurrent s s'= current s == current s'
chrToWord8 :: Char -> Word8
chrToWord8 c = toEnum $ fromEnum c
strToWord8s :: String -> [Word8]
strToWord8s = map chrToWord8
word8ToChr :: Word8 -> Char
word8ToChr = toEnum . fromEnum
word8sToStr :: [Word8] -> String
word8sToStr = map word8ToChr
byteStringToStr :: B.ByteString -> String
byteStringToStr = word8sToStr . B.unpack
strToByteString :: String -> B.ByteString
strToByteString = Char8.pack
locToSpan :: Loc -> Span
locToSpan loc = Span { begin = loc, end = Nothing }
locsToSpan :: Loc -> Loc -> Span
locsToSpan b e = Span {begin = b, end = Just e}
instance Pretty Source where
    ppr (Source{current, rest, ..}) = text "Current:" <+> text (show current)
instance Pretty Loc where
 ppr (Loc{recordNumber,byteOffset}) = text "Line:" <+> ppr recordNumber <> text ", Offset:" <+> ppr byteOffset
instance Pretty Span where
  ppr (Span{begin,end}) = case end of
                                Nothing -> ppr begin
                                Just end_loc ->  text "from:" <+> ppr begin <+> text "to:" <+> ppr end_loc