{-# OPTIONS_GHC -Wall -fno-warn-unused-do-bind -fno-warn-name-shadowing #-}
{-# LANGUAGE BangPatterns, MagicHash, Rank2Types, UnboxedTuples #-}
{-# LANGUAGE PackageImports #-}

module Parser
  ( fromString
  , Parser(..), State(..), Pos, End, Row, Col
  , oneOf, oneOfWithFallback
  , inContext, specialize
  , getPosition, getCol, addLocation, addEnd
  , word1, word2
  , symbol, k4, k5
  , unsafeIndex, isWord, getCharWidth
  , chompInnerChars
  , getUpperWidth
  , getInnerWidth
  , getInnerWidthHelp
  )
  where


import qualified Data.Char as Char
import qualified "text-utf8" Data.Text.Internal as T
import qualified "text-utf8" Data.Text.Array as T
import GHC.Exts (Char(C#), Int#, (+#), (-#), chr#, uncheckedIShiftL#, word2Int#)
import GHC.Prim (ByteArray#, indexWord8Array#)
import GHC.Types (Int(I#))
import GHC.Word (Word8(W8#), Word16)
import qualified Parser.Reporting as R
import Prelude hiding (length)

import qualified Result as R
import qualified String


-- PARSER


newtype Parser x a =
  Parser (
    forall b.
      State
      -> (a -> State -> b)                       -- consumed ok
      -> (a -> State -> b)                       -- empty ok
      -> (Row -> Col -> (Row -> Col -> x) -> b)  -- consumed err
      -> (Row -> Col -> (Row -> Col -> x) -> b)  -- empty err
      -> b
  )


data State = -- TODO try taking some out to avoid allocation?
  State
    { _src :: ByteArray#
    , _pos :: {-# UNPACK #-} !Pos
    , _end :: {-# UNPACK #-} !End
    , _row :: {-# UNPACK #-} !Row
    , _col :: {-# UNPACK #-} !Col
    }


type Pos = Int
type End = Int

type Row = Word16
type Col = Word16


-- FUNCTOR


instance Functor (Parser x) where
  {-# INLINE fmap #-}
  fmap f (Parser parser) =
    Parser $ \state cok eok cerr eerr ->
      let
        cok' a s = cok (f a) s
        eok' a s = eok (f a) s
      in
      parser state cok' eok' cerr eerr


-- APPLICATIVE


instance Applicative (Parser x) where
  {-# INLINE pure #-}
  pure = return

  {-# INLINE (<*>) #-}
  (<*>) (Parser parserFunc) (Parser parserArg) =
    Parser $ \state cok eok cerr eerr ->
      let
        cokF func s1 =
          let
            cokA arg s2 = cok (func arg) s2
          in
          parserArg s1 cokA cokA cerr cerr

        eokF func s1 =
          let
            cokA arg s2 = cok (func arg) s2
            eokA arg s2 = eok (func arg) s2
          in
          parserArg s1 cokA eokA cerr eerr
      in
      parserFunc state cokF eokF cerr eerr


-- ONE OF


{-# INLINE oneOf #-}
oneOf :: (Row -> Col -> x) -> [Parser x a] -> Parser x a
oneOf toError parsers =
  Parser $ \state cok eok cerr eerr ->
    oneOfHelp state cok eok cerr eerr toError parsers


oneOfHelp
  :: State
  -> (a -> State -> b)
  -> (a -> State -> b)
  -> (Row -> Col -> (Row -> Col -> x) -> b)
  -> (Row -> Col -> (Row -> Col -> x) -> b)
  -> (Row -> Col -> x)
  -> [Parser x a]
  -> b
oneOfHelp state cok eok cerr eerr toError parsers =
  case parsers of
    Parser parser : parsers ->
      let
        eerr' _ _ _ =
          oneOfHelp state cok eok cerr eerr toError parsers
      in
      parser state cok eok cerr eerr'

    [] ->
      let
        (State _ _ _ row col) = state
      in
      eerr row col toError


-- ONE OF WITH FALLBACK


{-# INLINE oneOfWithFallback #-}
oneOfWithFallback :: [Parser x a] -> a -> Parser x a -- TODO is this function okay? Worried about allocation/laziness with fallback values.
oneOfWithFallback parsers fallback =
  Parser $ \state cok eok cerr _ ->
    oowfHelp state cok eok cerr parsers fallback


oowfHelp
  :: State
  -> (a -> State -> b)
  -> (a -> State -> b)
  -> (Row -> Col -> (Row -> Col -> x) -> b)
  -> [Parser x a]
  -> a
  -> b
oowfHelp state cok eok cerr parsers fallback =
  case parsers of
    [] ->
      eok fallback state

    Parser parser : parsers ->
      let
        eerr' _ _ _ =
          oowfHelp state cok eok cerr parsers fallback
      in
      parser state cok eok cerr eerr'


-- MONAD


instance Monad (Parser x) where
  {-# INLINE return #-}
  return value =
    Parser $ \state _ eok _ _ ->
      eok value state

  {-# INLINE (>>=) #-}
  (Parser parserA) >>= callback =
    Parser $ \state cok eok cerr eerr ->
      let
        cok' a s =
          case callback a of
            Parser parserB -> parserB s cok cok cerr cerr

        eok' a s =
          case callback a of
            Parser parserB -> parserB s cok eok cerr eerr
      in
      parserA state cok' eok' cerr eerr


-- FROM STRING


fromString :: Parser x a -> (Row -> Col -> x) -> String.String -> R.Result x a
fromString (Parser parser) toBadEnd string =
  let
    !(T.Text (T.Array src) pos length) = String.toTextUtf8 string
    toOk' = toOk toBadEnd
  in
  parser (State src pos (pos + length) 1 1) toOk' toOk' toErr toErr


toOk :: (Row -> Col -> x) -> a -> State -> R.Result x a
toOk toBadEnd !a (State _ pos end row col) =
  if pos == end
  then R.Ok a
  else R.Err (toBadEnd row col)


toErr :: Row -> Col -> (Row -> Col -> x) -> R.Result x a
toErr row col toError =
  R.Err (toError row col)


-- POSITION


getCol :: Parser x Word16
getCol =
  Parser $ \state@(State _ _ _ _ col) _ eok _ _ ->
    eok col state


{-# INLINE getPosition #-}
getPosition :: Parser x R.Position
getPosition =
  Parser $ \state@(State _ _ _ row col) _ eok _ _ ->
    eok (R.Position row col) state


addLocation :: Parser x a -> Parser x (R.Located a)
addLocation (Parser parser) =
  Parser $ \state@(State _ _ _ sr sc) cok eok cerr eerr ->
    let
      cok' a s@(State _ _ _ er ec) = cok (R.At (R.Region (R.Position sr sc) (R.Position er ec)) a) s
      eok' a s@(State _ _ _ er ec) = eok (R.At (R.Region (R.Position sr sc) (R.Position er ec)) a) s
    in
    parser state cok' eok' cerr eerr


addEnd :: R.Position -> a -> Parser x (R.Located a)
addEnd start value =
  Parser $ \state@(State _ _ _ row col) _ eok _ _ ->
    eok (R.at start (R.Position row col) value) state


-- CONTEXT


inContext :: (x -> Row -> Col -> y) -> Parser y start -> Parser x a -> Parser y a
inContext addContext (Parser parserStart) (Parser parserA) =
  Parser $ \state@(State _ _ _ row col) cok eok cerr eerr ->
    let
      cerrA r c tx = cerr row col (addContext (tx r c))
      eerrA r c tx = eerr row col (addContext (tx r c))

      cokS _ s = parserA s cok cok cerrA cerrA
      eokS _ s = parserA s cok eok cerrA eerrA
    in
    parserStart state cokS eokS cerr eerr


specialize :: (x -> Row -> Col -> y) -> Parser x a -> Parser y a
specialize addContext (Parser parser) =
  Parser $ \state@(State _ _ _ row col) cok eok cerr eerr ->
    let
      cerr' r c tx = cerr row col (addContext (tx r c))
      eerr' r c tx = eerr row col (addContext (tx r c))
    in
    parser state cok eok cerr' eerr'


-- SYMBOLS


word1 :: Word8 -> (Row -> Col -> x) -> Parser x ()
word1 word toError =
  Parser $ \(State src pos end row col) cok _ _ eerr ->
    if pos < end && unsafeIndex src pos == word then
      let !newState = State src (pos + 1) end row (col + 1) in
      cok () newState
    else
      eerr row col toError


word2 :: Word8 -> Word8 -> (Row -> Col -> x) -> Parser x ()
word2 w1 w2 toError =
  Parser $ \(State src pos end row col) cok _ _ eerr ->
    let
      !pos1 = pos + 1
    in
    if pos1 < end && unsafeIndex src pos == w1 && unsafeIndex src pos1 == w2 then
      let !newState = State src (pos + 2) end row (col + 2) in
      cok () newState
    else
      eerr row col toError


symbol :: Word8 -> (Row -> Col -> x) -> Parser x ()
symbol w1 toError =
  Parser $ \(State src pos end row col) cok _ _ eerr ->
    let !pos1 = pos + 1 in
    if pos1 <= end && unsafeIndex src pos == w1
    then
      let !s = State src pos1 end row (col + 1) in cok () s
    else
      eerr row col toError


k4 :: Word8 -> Word8 -> Word8 -> Word8 -> (Row -> Col -> x) -> Parser x ()
k4 w1 w2 w3 w4 toError =
  Parser $ \(State src pos end row col) cok _ _ eerr ->
    let !pos4 = pos + 4 in
    if pos4 <= end
      && unsafeIndex src (pos    ) == w1
      && unsafeIndex src (pos + 1) == w2
      && unsafeIndex src (pos + 2) == w3
      && unsafeIndex src (pos + 3) == w4
      && getInnerWidth src pos4 end == 0
    then
      let !s = State src pos4 end row (col + 4) in cok () s
    else
      eerr row col toError


k5 :: Word8 -> Word8 -> Word8 -> Word8 -> Word8 -> (Row -> Col -> x) -> Parser x ()
k5 w1 w2 w3 w4 w5 toError =
  Parser $ \(State src pos end row col) cok _ _ eerr ->
    let !pos5 = pos + 5 in
    if pos5 <= end
      && unsafeIndex src (pos    ) == w1
      && unsafeIndex src (pos + 1) == w2
      && unsafeIndex src (pos + 2) == w3
      && unsafeIndex src (pos + 3) == w4
      && unsafeIndex src (pos + 4) == w5
      && getInnerWidth src pos5 end == 0
    then
      let !s = State src pos5 end row (col + 5) in cok () s
    else
      eerr row col toError


-- LOW-LEVEL CHECKS


{-# INLINE unsafeIndex #-}
unsafeIndex :: ByteArray# -> Pos -> Word8
unsafeIndex src (I# pos) =
  W8# (indexWord8Array# src pos)


{-# INLINE isWord #-}
isWord :: ByteArray# -> Pos -> End -> Word8 -> Bool
isWord src pos end word =
  pos < end && unsafeIndex src pos == word


getCharWidth :: Word8 -> Int
getCharWidth word
  | word < 0x80 = 1
  | word < 0xc0 = error "Need UTF-8 encoded input. Ran into unrecognized bits."
  | word < 0xe0 = 2
  | word < 0xf0 = 3
  | word < 0xf8 = 4
  | True        = error "Need UTF-8 encoded input. Ran into unrecognized bits."


-- UPPER CHARS


{-# INLINE getUpperWidth #-}
getUpperWidth :: ByteArray# -> Pos -> End -> Int
getUpperWidth src pos end =
  if pos < end then
    getUpperWidthHelp src pos end (unsafeIndex src pos)
  else
    0


{-# INLINE getUpperWidthHelp #-}
getUpperWidthHelp :: ByteArray# -> Pos -> End -> Word8 -> Int
getUpperWidthHelp src pos _ word
  | 0x41 {- A -} <= word && word <= 0x5A {- Z -} = 1
  | word < 0xc0 = 0
  | word < 0xe0 = if Char.isUpper (chr2 src pos word) then 2 else 0
  | word < 0xf0 = if Char.isUpper (chr3 src pos word) then 3 else 0
  | word < 0xf8 = if Char.isUpper (chr4 src pos word) then 4 else 0
  | True        = 0


-- INNER CHARS


chompInnerChars :: ByteArray# -> Pos -> End -> Col -> (# Pos, Col #)
chompInnerChars src !pos end !col =
  let !width = getInnerWidth src pos end in
  if width == 0 then
    (# pos, col #)
  else
    chompInnerChars src (pos + width) end (col + 1)


getInnerWidth :: ByteArray# -> Pos -> End -> Int
getInnerWidth src pos end =
  if pos < end then
    getInnerWidthHelp src pos end (unsafeIndex src pos)
  else
    0


{-# INLINE getInnerWidthHelp #-}
getInnerWidthHelp :: ByteArray# -> Pos -> End -> Word8 -> Int
getInnerWidthHelp src pos _ word
  | 0x61 {- a -} <= word && word <= 0x7A {- z -} = 1
  | 0x41 {- A -} <= word && word <= 0x5A {- Z -} = 1
  | 0x30 {- 0 -} <= word && word <= 0x39 {- 9 -} = 1
  | word == 0x5F {- _ -} = 1
  | word < 0xc0 = 0
  | word < 0xe0 = if Char.isAlpha (chr2 src pos word) then 2 else 0
  | word < 0xf0 = if Char.isAlpha (chr3 src pos word) then 3 else 0
  | word < 0xf8 = if Char.isAlpha (chr4 src pos word) then 4 else 0
  | True        = 0


-- EXTRACT CHARACTERS


{-# INLINE chr2 #-}
chr2 :: ByteArray# -> Pos -> Word8 -> Char
chr2 src pos firstWord =
  let
    !i1# = unpack firstWord
    !i2# = unpack (unsafeIndex src (pos + 1))
    !c1# = uncheckedIShiftL# (i1# -# 0xC0#) 6#
    !c2# = i2# -# 0x80#
  in
  C# (chr# (c1# +# c2#))


{-# INLINE chr3 #-}
chr3 :: ByteArray# -> Pos -> Word8 -> Char
chr3 src pos firstWord =
  let
    !i1# = unpack firstWord
    !i2# = unpack (unsafeIndex src (pos + 1))
    !i3# = unpack (unsafeIndex src (pos + 2))
    !c1# = uncheckedIShiftL# (i1# -# 0xE0#) 12#
    !c2# = uncheckedIShiftL# (i2# -# 0x80#) 6#
    !c3# = i3# -# 0x80#
  in
  C# (chr# (c1# +# c2# +# c3#))


{-# INLINE chr4 #-}
chr4 :: ByteArray# -> Pos -> Word8 -> Char
chr4 src pos firstWord =
  let
    !i1# = unpack firstWord
    !i2# = unpack (unsafeIndex src (pos + 1))
    !i3# = unpack (unsafeIndex src (pos + 2))
    !i4# = unpack (unsafeIndex src (pos + 3))
    !c1# = uncheckedIShiftL# (i1# -# 0xF0#) 18#
    !c2# = uncheckedIShiftL# (i2# -# 0x80#) 12#
    !c3# = uncheckedIShiftL# (i3# -# 0x80#) 6#
    !c4# = i4# -# 0x80#
  in
  C# (chr# (c1# +# c2# +# c3# +# c4#))


unpack :: Word8 -> Int#
unpack (W8# word#) =
  word2Int# word#