{-# LANGUAGE MultiParamTypeClasses, FlexibleInstances, FlexibleContexts #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE UndecidableInstances #-}     -- Why?

-----------------------------------------------------------------------------
-- |
-- Module      :  Parsimony.Char
-- Copyright   :  (c) Iavor S. Diatchki 2009
-- License     :  BSD3
--
-- Maintainer  :  iavor.diatchki@gmail.com
-- Stability   :  provisional
--
-- A generic way to extract tokens from a stream.
--
-----------------------------------------------------------------------------



module Parsimony.Stream
  ( Token(..), Stream(..)
  , ASCII, UTF8, ascii, utf8
  ) where

import Parsimony.Prim
import Parsimony.Pos
import Parsimony.Error

import qualified Data.ByteString as Strict (ByteString,uncons)
import qualified Data.ByteString.Lazy as Lazy (ByteString,uncons)
import Data.String.UTF8 (UTF8,UTF8Bytes,fromRep,replacement_char)
import qualified Data.String.UTF8 as UTF8 (uncons)
import Data.Word (Word8)
import Numeric (showHex)

-- | A class describing useful token operations.
class Token token where
  -- | How tokens affect file positions.
  updatePos :: token -> SourcePos -> SourcePos

  -- | How to display tokens.
  showToken :: token -> String

instance Token Char where
  updatePos c p = updatePosChar p c
  showToken     = show

instance Token Word8 where
  updatePos _ p = incSourceColumn p 1
  showToken b   = "0x" ++ showHex b ""




-- We have the fun. dep. here because otherwise multiple
-- reads from a stream could give potentially different types of
-- tokens which leads to ambiguities.

-- | Streams of tokens.
class Token token => Stream stream token | stream -> token where
  getToken :: PrimParser stream token

eof_err :: SourcePos -> Reply s a
eof_err p = Error $ newErrorMessage (UnExpect "end of input") p

{-# INLINE genToken #-}
genToken :: Token t => (i -> Maybe (t,i)) -> PrimParser i t
genToken unc (State i p) =
    case unc i of
      Nothing     -> eof_err p
      Just (t,ts) -> Ok t State { stateInput = ts
                                , statePos   = updatePos t p
                                }

instance Token a => Stream [a] a where
  getToken = genToken (\xs -> case xs of
                                [] -> Nothing
                                c : cs -> Just (c,cs))

instance Stream Strict.ByteString Word8 where
  getToken = genToken Strict.uncons

instance Stream Lazy.ByteString Word8 where
  getToken = genToken Lazy.uncons

-- Character encodings ---------------------------------------------------------


-- | The type of ASCII encoded content.
newtype ASCII content = ASCII content

-- | Specify ASCII encoding for some content.
ascii :: content -> ASCII content
ascii = ASCII

-- | Specify UTF8 encoding for some content.
utf8 :: content -> UTF8 content
utf8 = fromRep

instance Stream a Word8 => Stream (ASCII a) Char where
  getToken (State (ASCII buf) p) =
    case getToken (State buf p) of
      Error err           -> Error err
      Ok w (State b1 p1)  -> Ok (toEnum (fromEnum w)) (State (ASCII b1) p1)


instance Stream (UTF8 [Word8]) Char where
  getToken = genTokenChar

instance Stream (UTF8 Strict.ByteString) Char where
  getToken = genTokenChar

instance Stream (UTF8 Lazy.ByteString) Char where
  getToken = genTokenChar


{-# INLINE genTokenChar #-}
genTokenChar :: UTF8Bytes stream ix => PrimParser (UTF8 stream) Char
genTokenChar (State i p) =
    case UTF8.uncons i of
      Just (a,i1)
        | a /= replacement_char -> Ok a (State i1 (updatePos a p))
        | otherwise -> Error $ newErrorMessage
                              (Message "invalid UTF8 character") p
      Nothing -> eof_err p