module Data.IterIO.Search (inumStopString
                          , mapI, mapLI
                          ) where

import qualified Data.ByteString.Char8 as S8
import qualified Data.ByteString.Lazy.Char8 as L8
import qualified Data.ByteString.Lazy.Search as Search
import qualified Data.ListLike as LL
import Data.Map (Map)
import qualified Data.Map as Map
import Data.Monoid

import Data.IterIO.Iter
import Data.IterIO.Inum

-- | Feeds input to an Iteratee until some boundary string is found.
-- The boundary string is neither consumed nor passed through to the
-- target 'Iter'.  (Thus, if the input is at end-of-file after
-- inumStopString returns, it means the boundary string was never
-- encountered.)
inumStopString :: (Monad m) =>
                  S8.ByteString
               -> Inum L8.ByteString L8.ByteString m a
inumStopString spat = mkInumM $ nextChunk L8.empty
    where
      lpat = L8.fromChunks [spat]
      plen = toEnum $ S8.length spat
      search = Search.breakOn spat
      nextChunk old = do
        (Chunk t eof) <- chunkI
        case search $ L8.append old t of
          (a, b) | not (L8.null b) -> ungetI b >> ifeed a
          (a, _) | eof             -> ifeed a
          (a, _)                   -> checkEnd a
      checkEnd t = let tlen = L8.length t
                       hlen = max 0 (tlen - plen - 1)
                       ttail = L8.drop hlen t
                       fpm = firstPossibleMatch 0 ttail
                       rlen = hlen + fpm
                   in if rlen == tlen
                      then ifeed t >> nextChunk L8.empty
                      else case L8.splitAt rlen t of
                             (r, o) -> ifeed r >> nextChunk o
      firstPossibleMatch n t =
          if t `L8.isPrefixOf` lpat
          then n
          else firstPossibleMatch (n + 1) (L8.tail t)

longestCommonPrefix :: (LL.ListLike t e, Eq e) => t -> t -> t
longestCommonPrefix a0 = cmp 0 a0
    where
      cmp n a b | LL.null a || LL.null b = LL.take n a0
      cmp n a b | LL.head a == LL.head b = cmp (n + 1) (LL.tail a) (LL.tail b)
      cmp n _ _                          = LL.take n a0

findLongestPrefix :: (LL.ListLike t e, Ord t, Eq e) =>
                     Map t a -> t -> Maybe (t, a)
findLongestPrefix mp t = maybe ckprefix (\v1 -> Just (t, v1)) ma
    where
      (ltmap, ma, _) = Map.splitLookup t mp
      (k, v) = Map.findMax ltmap
      p = longestCommonPrefix k t
      ckprefix | Map.null mp || LL.null t = Nothing
               | k `LL.isPrefixOf` t      = Just (k, v)
               | otherwise                = findLongestPrefix ltmap p

-- | Reads input until it can uniquely determine the longest key in a
-- 'Map.Map' that is a prefix of the input.  Consumes the input that
-- matches the key, and returns the corresponding value in the
-- 'Map.Map', along with the residual input that follows the key.
mapI :: (ChunkData t, LL.ListLike t e, Ord t, Eq e, Monad m) =>
        Map t a -> Iter t m a
mapI mp | Map.null mp = fail $ "mapI: null map"
        | otherwise = do
  c@(Chunk t eof) <- chunkI
  if not (eof) && more t
    then iterF (runIter (mapI mp) . mappend c)
    else case findLongestPrefix mp t of
           Nothing -> Iter $ \c' ->
             Fail (IterExpected $
                   (show c
                   , show (Map.size mp) ++ " keys including the following:")
                   : map (\k -> ("", chunkShow k)) (take 5 $ Map.keys mp))
             Nothing (Just $ mappend c c')
           Just (k, v) -> ungetI (LL.drop (LL.length k) t) >> return v
    where
      gtmap t = snd $ Map.split t mp
      more t | Map.null $ gtmap t = False
             | otherwise = t `LL.isPrefixOf` (fst $ Map.findMin $ gtmap t)

-- | @mapLI@ is a variant of 'mapI' that takes a list of
-- @(key, value)@ pairs instead of a 'Map.Map'.
-- @mapLI = 'mapI' . 'Map.fromList'@.
mapLI :: (ChunkData t, LL.ListLike t e, Ord t, Eq e, Monad m) =>
         [(t, a)] -> Iter t m a
mapLI = mapI . Map.fromList




{-
main :: IO ()
main = enumStdin |$ do
         inumStopString end .| stdoutI
         match end
         liftIO $ putStrLn "\n\n*** We have reached THE END #1 ***\n\n"
         inumStopString end .| stdoutI
         match end
         liftIO $ putStrLn "\n\n*** We have reached THE END #2 ***\n\n"
         stdoutI
    where
      end = L8.pack "TheEnd"
-}