-------------------------------------------------------------------------------

  {-  LANGUAGE CPP #-}

#define DO_TRACE 0

#if ABBREV_WN_AND_TN_CONCRETE_SYNTAX_TO_NUMBER_ALONE__SAFE_ONLY_TO_DEPTH_19 && ABBREV_WN_AND_TN_CONCRETE_SYNTAX_TO_SINGLE_DIGIT__CAN_ONLY_EXPRESS_DOWN_TO_DEPTH_9
#error Please set at most one of the flags ABBREV_WN_AND_TN_CONCRETE_SYNTAX_TO_NUMBER_ALONE__SAFE_ONLY_TO_DEPTH_19 and ABBREV_WN_AND_TN_CONCRETE_SYNTAX_TO_SINGLE_DIGIT__CAN_ONLY_EXPRESS_DOWN_TO_DEPTH_9 to True.
#endif

#define ALLOW_ESCAPED_TYPE_LIST_SEPARATOR 1

-------------------------------------------------------------------------------

#if USE_ATTOPARSEC
  {-# LANGUAGE OverloadedStrings #-}
  {-  LANGUAGE ScopedTypeVariables #-}  -- debugging only
  {-# LANGUAGE BangPatterns #-}  -- for forcing tracelines in do blocks
#endif

-------------------------------------------------------------------------------

-- |
-- Module      :  Control.DeepSeq.Bounded.Compile_new_grammar
-- Copyright   :  Andrew G. Seniuk 2014-2015
-- License     :  BSD-style (see the file LICENSE)
--
-- Maintainer  :  Andrew Seniuk <rasfar@gmail.com>
-- Stability   :  provisional
-- Portability :  attoparsec
--

-------------------------------------------------------------------------------

  module Control.DeepSeq.Bounded.Compile_new_grammar

-- XXX If commented out, it's for debugging only!
#if 1

--- {-# DEPRECATED "Use Wobble instead" #-}
  (

#if USE_ATTOPARSEC
#if HASKELL98_FRAGMENT
#error Sorry, HASKELL98_FRAGMENT incompatible with NEW_IMPROVED_PATTERN_GRAMMAR, because only USE_ATTOPARSEC parser is working. With some artful CPP you could cut out the Pattern parser/compiler, and then resort to the bare PatNode constructors...
#endif
      compileUsingAttoparsec  ,
--    parsePat  ,
#else
      compilePat'  ,
#endif

  )

#endif

  where

-------------------------------------------------------------------------------

  import Control.DeepSeq.Bounded.Pattern
  import Control.DeepSeq.Bounded.Compile_shared_utils

  import Data.Maybe ( isNothing, fromJust )

  import Data.List ( intercalate )

  import Debug.Trace ( trace )

#if USE_ATTOPARSEC

  import qualified Data.Attoparsec.Text as AT
  import qualified Data.Text as T
  import Control.Applicative

  import Data.Char ( isLetter )
  import Data.Char ( isDigit )
  import Data.Char ( ord )

  import Control.Monad ( liftM )
--import Control.Monad ( foldM )
--import Data.Foldable ( fold )
  import Control.Monad ( mzero )
  import Data.Monoid ( mempty )

#endif

-------------------------------------------------------------------------------

#if DO_TRACE
  mytrace = trace
#else
  mytrace _ = id
#endif

#if USE_ATTOPARSEC

-------------------------------------------------------------------------------

  -- The caller (or someone up there) uses liftPat if
  -- multiple patterns are parsed (adds new common root).
  parsePatsTop :: AT.Parser [Pattern]
  parsePatsTop = do
    !_ <- mytrace ("parsePatsTop.") $ return ()
    let as = emptyPatNodeAttrs
    AT.many' (AT.skipSpace *> parsePat as <* AT.skipSpace)

-------------------------------------------------------------------------------

  -- This differs from parsePatsTop in that it assumes an
  -- opening grouping token has been consumed (so will be
  -- expecting a corresponding closing token).
  parsePats :: AT.Parser [Pattern]
#if 1
  parsePats = do
        !_ <- mytrace "parsePats." $ return ()
        (AT.endOfInput *> pure [])
    <|> (do
            c <- AT.peekChar'
#if USE_CURLY_BRACE_INSTEAD_OF_PAREN_FOR_SUBPATTERNS
            if c == '}'
#else
            if c == ')'
#endif
              then pure []
---           then AT.char '}' >> pure []
              else let as = emptyPatNodeAttrs in
                   AT.many' (AT.skipSpace *> parsePat as <* AT.skipSpace))
#else
  -- XXX Why does this work so badly?
  parsePats = AT.manyTill'
                   (AT.skipSpace *> parsePat emptyPatNodeAttrs)
                   (AT.endOfInput <|> (AT.char ')' *> return ()))
#endif

-------------------------------------------------------------------------------

  -- Test if next character is non-attribute, up front,
  -- and skip all this attribute stuff in that case!...
  parsePat :: PatNodeAttrs -> AT.Parser Pattern
  parsePat as = do
    !_ <- mytrace "parsePat." $ return ()
    let modchars = ":@=>+^/%"
    mc <- AT.peekChar
    let c = fromJust mc
    if isNothing mc
      then fail "parse_type_constraints: unexpected end-of-input"
      else if c `elem` modchars
             then parsePatAttributes as
             else parsePat3 as

  -- Note: Previously, type constraint was handled in a more
  -- ad hoc manner. The existence of separate T* nodes is
  -- evidence of this, but those will likely be removed in 0.7,
  -- making type constraint just another attribute of W* nodes.
  parsePatAttributes :: PatNodeAttrs -> AT.Parser Pattern
  parsePatAttributes as = do
    !_ <- mytrace "parsePatAttributes." $ return ()
    foldr (<|>) mempty $
--  foldM (<|>) mempty $
--  fold (<|>) mempty $
--  fold (<|>) mzero $
      ( map
          ( \ (c,s,p,b,a)
                -> do 
                      let q = (c,s,b,a)
                      if c == '\0'
                      then fail ""
                      else p c b a s <* AT.skipSpace
          )
          -- doConstrainType, doDelay, and doPseq handled specially,
          -- due to their taking arguments.
          [
            ( '\0' , ""       , dud_parser    , const False     , as )
          , ( ':'  , "types"  , types_parser  , doConstrainType , as_types  )
          , ( '@'  , "delay"  , delay_parser  , doDelay         , as_delay  )
#if USE_PAR_PATNODE
          , ( '='  , "spark"  , no_arg_parser , doSpark         , as_spark  )
#endif
#if USE_PSEQ_PATNODE
          , ( '>'  , "pseq"   , pseq_parser   , doPseq          , as_pseq   )
#endif
#if USE_TRACE_PATNODE
          , ( '+'  , "trace"  , no_arg_parser , doTrace         , as_trace  )
#endif
#if USE_PING_PATNODE
          , ( '^'  , "ping"   , no_arg_parser , doPing          , as_ping   )
#endif
#if USE_DIE_PATNODE
          , ( '/'  , "die"    , no_arg_parser , doDie           , as_die    )
#endif
#if USE_TIMING_PATNODE
          , ( '%'  , "timing" , no_arg_parser , doTiming        , as_timing )
#endif
          ]
      ) ++ [ parsePat3 as ]
   where
    dud_parser _ _ _ _ = fail "dud_parser"  -- (is never run; should use Proxy)
    no_arg_parser c b a s = do
         ( (AT.char c) <* AT.skipSpace )
      >> (if b as
            then fail $    "compilePat: duplicate "
                        ++ show c ++ " (" ++ s ++ ") "
                        ++ "node attribute"
            else parsePat a)
    types_parser _ _ a _ = parsePat1''' a
    delay_parser _ _ a _ = parsePat1' a
    pseq_parser _ _ a _ = parsePat1'' a
    as_types   = as { doConstrainType  = True }
    as_delay   = as { doDelay          = True }
#if USE_PAR_PATNODE
    as_spark   = as { doSpark          = True }
#endif
#if USE_PSEQ_PATNODE
    as_pseq    = as { doPseq           = True }
#endif
#if USE_TRACE_PATNODE
    as_trace   = as { doTrace          = True }
#endif
#if USE_PING_PATNODE
    as_ping    = as { doPing           = True }
#endif
#if USE_DIE_PATNODE
    as_die     = as { doDie            = True }
#endif
#if USE_TIMING_PATNODE
    as_timing  = as { doTiming         = True }
#endif

  -- Parse the ":Int;Maybe Float:" typeConstraints attribute, if present.
  parsePat1''' :: PatNodeAttrs -> AT.Parser Pattern
  parsePat1''' as = do
          !_ <- mytrace "parsePat1'''." $ return ()
          AT.char ':'
          >> ( ( parse_type_constraints <* AT.skipSpace )
               >>= \ tcs -> let as' = as { doConstrainType = True
                                         , typeConstraints = map T.unpack tcs }
                            in parsePat as' )

  -- Parse the "@50000" delayus attribute, if present.
  parsePat1' :: PatNodeAttrs -> AT.Parser Pattern
  parsePat1' as = do
          !_ <- mytrace "parsePat1'." $ return ()
          AT.char '@'
          >> ( ( AT.decimal <* AT.skipSpace )
               >>= \ dly -> let as' = as { doDelay = True
                                         , delayus = dly }
                            in parsePat as' )

#if USE_PSEQ_PATNODE
  -- Parse the ">cdba" pseqPerm attribute, if present.
  parsePat1'' :: PatNodeAttrs -> AT.Parser Pattern
  parsePat1'' as = do
          !_ <- mytrace "parsePat1''." $ return ()
          AT.char '>'
          >> ( ( AT.many1' AT.letter ) <* AT.skipSpace
               >>= \ perm -> let perm' = map (\c -> ord c - ord 'a') perm
                                 as' = as { doPseq = True
                                          , pseqPerm = Just perm' }
                             in parsePat as' )
#endif

-------------------------------------------------------------------------------

  -- Handle "*23"-style (WN and TN) nodes.
  parsePat3 :: PatNodeAttrs -> AT.Parser Pattern
  parsePat3 as = do
    !_ <- mytrace "parsePat3." $ return ()
    b <- AT.peekChar'
#if ( ! ABBREV_WN_AND_TN_CONCRETE_SYNTAX_TO_NUMBER_ALONE__SAFE_ONLY_TO_DEPTH_19 ) && ( ! ABBREV_WN_AND_TN_CONCRETE_SYNTAX_TO_SINGLE_DIGIT__CAN_ONLY_EXPRESS_DOWN_TO_DEPTH_9 )
    AT.anyChar
#endif
    !_ <- mytrace ("boo-0: "++show b) $ return ()
#if ( ! ABBREV_WN_AND_TN_CONCRETE_SYNTAX_TO_NUMBER_ALONE__SAFE_ONLY_TO_DEPTH_19 ) && ( ! ABBREV_WN_AND_TN_CONCRETE_SYNTAX_TO_SINGLE_DIGIT__CAN_ONLY_EXPRESS_DOWN_TO_DEPTH_9 )
    !_ <- mytrace ("boo-1: "++show b) $ return ()
    if b == '*'
#else
    !_ <- mytrace ("boo-1-_19__9: "++show b) $ return ()
    if isDigit b
#endif
      then do (     (parsePat3_aux b as)
                <|> (do
#if ABBREV_WN_AND_TN_CONCRETE_SYNTAX_TO_NUMBER_ALONE__SAFE_ONLY_TO_DEPTH_19 || ABBREV_WN_AND_TN_CONCRETE_SYNTAX_TO_SINGLE_DIGIT__CAN_ONLY_EXPRESS_DOWN_TO_DEPTH_9
#if 1
#if ABBREV_WN_AND_TN_CONCRETE_SYNTAX_TO_SINGLE_DIGIT__CAN_ONLY_EXPRESS_DOWN_TO_DEPTH_9
                        AT.anyChar
                        (case b of
                          '0' -> parsePat4 '0' Nothing as
                          '1' -> parsePat4 '1' Nothing as
                          _   -> parsePat4 '*' Nothing as
                            ) ) )
#else
                        (parsePat4 '*' Nothing as) ) )
#endif
#else
                        !_ <- mytrace ("parsePat3: unexpected digit " ++ [b]) $ return ()
                        fail $ "parsePat3: unexpected digit " ++ [b] ) )
#endif
#else
                        (parsePat4 '*' Nothing as) ) )
#endif
      else do
#if ABBREV_WN_AND_TN_CONCRETE_SYNTAX_TO_NUMBER_ALONE__SAFE_ONLY_TO_DEPTH_19 || ABBREV_WN_AND_TN_CONCRETE_SYNTAX_TO_SINGLE_DIGIT__CAN_ONLY_EXPRESS_DOWN_TO_DEPTH_9
--      fail $ "parsePat3: #2 unexpected digit " ++ [b]
        AT.anyChar
#endif
        parsePat4 b Nothing as

  -- Actual handler, in case it /was/ WN or TN node.
  parsePat3_aux :: Char -> PatNodeAttrs -> AT.Parser Pattern
  parsePat3_aux b as = do
    !_ <- mytrace "parsePat3_aux." $ return ()
    !_ <- mytrace ("boo-2: "++show b) $ return ()
-- These should be safe cutoffs without having to worry about exact figures.
--- DEPTH_USES_INT64 isn't implemented yet, this is just a note
--- for future consideration. (Should be in NFDataN if anywhere...).
--- I'm not ready to make this sweeping change yet.
--- #if DEPTH_USES_INT64
---    if length n_integer_str > 19  = fail $ "compilePat: *" ++ n_integer_str ++ " is too large"
    !_ <- mytrace ("boo-3: "++show b) $ return ()
#if ( ! ABBREV_WN_AND_TN_CONCRETE_SYNTAX_TO_NUMBER_ALONE__SAFE_ONLY_TO_DEPTH_19 ) && ( ! ABBREV_WN_AND_TN_CONCRETE_SYNTAX_TO_SINGLE_DIGIT__CAN_ONLY_EXPRESS_DOWN_TO_DEPTH_9 )
    AT.skipSpace
#endif
#if ABBREV_WN_AND_TN_CONCRETE_SYNTAX_TO_NUMBER_ALONE__SAFE_ONLY_TO_DEPTH_19
    n_integer_cs <- if b == '1'
                      then AT.anyChar *> AT.option "1" (AT.digit >>= \ c -> return ('1':[c]))
                      else AT.anyChar *> return [b]
    !_ <- mytrace ("boo-3.2: n_integer_ns="++n_integer_cs) $ return ()
    let n_integer = read n_integer_cs :: Integer
#elif ABBREV_WN_AND_TN_CONCRETE_SYNTAX_TO_SINGLE_DIGIT__CAN_ONLY_EXPRESS_DOWN_TO_DEPTH_9
    n_integer_c <- AT.anyChar :: AT.Parser Char
    !_ <- mytrace ("n_integer_c="++show n_integer_c) $ return ()
    n_integer <- case n_integer_c of
                     '0' -> mytrace ("ANDDD...(0):") $ fail ""
                     '1' -> mytrace ("ANDDD...(1):") $ fail ""
                     _   -> parsePat4 '*' Nothing as
    !_ <- mytrace "oops!!!!" $ return ()
    let n_integer = read [n_integer_c] :: Integer
#else
    n_integer <- AT.decimal :: AT.Parser Integer
#endif
    !_ <- mytrace ("boo-3.5: "++show b) $ return ()
    let n_integer_str = show n_integer
    !_ <- mytrace ("boo-4: "++show b++" "++n_integer_str) $ return ()
    if length n_integer_str > 9
      then fail $ "compilePat: *" ++ n_integer_str ++ " is too large"
      else parsePat4 '*' (Just (read n_integer_str :: Int)) as

-------------------------------------------------------------------------------

  -- This handles whether or not it's a type-constrainted node.
  -- (The constraints themselves will have already been parsed.)
  parsePat4 :: Char -> Maybe Int -> PatNodeAttrs -> AT.Parser Pattern
  parsePat4 b mdepth as = do
    !_ <- mytrace "parsePat4." $ return ()
    !_ <- mytrace ("GOO-1: "++show b++" "++show (doConstrainType as)) $ return ()
    if doConstrainType as
      then parsePat4_t b mdepth as
      else parsePat4_w b mdepth as

  -- Actual handler, in case it was /NOT/ a type-constrained node;
  -- i.e. a WI, WR, WS (if still exists), WN or WW node.
  parsePat4_w :: Char -> Maybe Int -> PatNodeAttrs -> AT.Parser Pattern
  parsePat4_w b mdepth as = do
    !_ <- mytrace "parsePat4_w." $ return ()
    !_ <- mytrace ("GOO-3: "++show b) $ return ()
    case b of
#if ABBREV_WN_AND_TN_CONCRETE_SYNTAX_TO_SINGLE_DIGIT__CAN_ONLY_EXPRESS_DOWN_TO_DEPTH_9
     '0' -> return (Node (WI as) [])
#else
     '.' -> return (Node (WI as) [])
#endif
#if ABBREV_WN_AND_TN_CONCRETE_SYNTAX_TO_SINGLE_DIGIT__CAN_ONLY_EXPRESS_DOWN_TO_DEPTH_9
     '1' -> return (Node (WS as) [])
#else
     '!' -> return (Node (WS as) [])
#endif
     '*' -> if isNothing mdepth
              then return (Node (WW as) [])
              else return (Node (WN as_n) [])
#if USE_CURLY_BRACE_INSTEAD_OF_PAREN_FOR_SUBPATTERNS
     '{' -> parsePat_WRTR_tail False b as
#else
     '(' -> parsePat_WRTR_tail False b as
#endif
#if USE_CURLY_BRACE_INSTEAD_OF_PAREN_FOR_SUBPATTERNS
     _ -> fail $ "compilePat: expected one of \".!*{\" (got " ++ show b ++ ")"
#else
     _ -> fail $ "compilePat: expected one of \".!*(\" (got " ++ show b ++ ")"
#endif
   where
    as_n = as { depth = fromJust mdepth }

  -- This is a helper of patsePat4_aux.
  parsePat4_t :: Char -> Maybe Int -> PatNodeAttrs -> AT.Parser Pattern
  parsePat4_t b mdepth as_t = do
    !_ <- mytrace "parsePat4_t." $ return ()
    !_ <- mytrace ("parsePat4_t: b="++show b) $ return ()
    case b of
#if USE_CURLY_BRACE_INSTEAD_OF_PAREN_FOR_SUBPATTERNS
     '{' -> do
#else
     '(' -> do
#endif
               !_ <- mytrace "parsePat4_t: entering TR_tail..." $ return ()
               parsePat_WRTR_tail True 'x' as_t
---            !_ <- mytrace "parsePat4_t: exited TR_tail!" $ return ()
#if ABBREV_WN_AND_TN_CONCRETE_SYNTAX_TO_SINGLE_DIGIT__CAN_ONLY_EXPRESS_DOWN_TO_DEPTH_9
     '0' -> return (Node (TI as_t) [])
#else
     '.' -> return (Node (TI as_t) [])
#endif
     '*' -> if isNothing mdepth
              then return (Node (TW as_t) [])
              else return (Node (TN as_t_n) [])
#if ABBREV_WN_AND_TN_CONCRETE_SYNTAX_TO_SINGLE_DIGIT__CAN_ONLY_EXPRESS_DOWN_TO_DEPTH_9
     _ -> fail $ "compilePat: expected '*' or digit (got " ++ show b ++ ")"
#elif ABBREV_WN_AND_TN_CONCRETE_SYNTAX_TO_SINGLE_DIGIT__SAFE_DOWN_TO_DEPTH_19
     _ -> fail $ "compilePat: expected '.' or '*' or digit (got " ++ show b ++ ")"
#else
     _ -> fail $ "compilePat: expected '.' or '*' (got " ++ show b ++ ")"
#endif
   where
    as_t_n = as_t { depth = fromJust mdepth }

-------------------------------------------------------------------------------

  parsePat_WRTR_tail :: Bool -> Char -> PatNodeAttrs -> AT.Parser Pattern
  parsePat_WRTR_tail isTR x as
   = do
        !_ <- mytrace "parsePat_WRTR_tail." $ return ()
        !_ <- mytrace ("**HWRTR1**: isTR="++show isTR++" x="++show x) $ return ()
#if 0
        roi <- AT.takeText
        error $ "DEVEXIT: " ++ show roi
#endif
        !_ <- mytrace ("**HWRTR1.5**: "++show x) $ return ()
        pats <- parsePats <|> pure []
        !_ <- mytrace ("**HWRTR2**: "++show x) $ return ()
#if USE_CURLY_BRACE_INSTEAD_OF_PAREN_FOR_SUBPATTERNS
        AT.char '}'
#else
        AT.char ')'
#endif
        !_ <- mytrace ("**HWRTR3**: "++show x) $ return ()
        if isTR
          then return (Node (TR as) pats)
          else return (Node (WR as) pats)

-------------------------------------------------------------------------------

  parse_type_constraints :: AT.Parser [T.Text]
  parse_type_constraints = do

--  AT.take 3 >>= \ test -> error $ "test="++show test

    !_ <- mytrace "parse_type_constraints." $ return ()

    let endchar = ':'
    let sepchar = ';'

    -- (1) Grab (or be ready to grab) input up to the next unescaped ':'
    --     character, which must exist.  We might as well do this up front,
    --     since we /will/ actually consume all of it.
    let loop = do
          seg <- AT.takeWhile (\c->c/=endchar&&c/='\\') :: AT.Parser T.Text
          !_ <- mytrace ("loop: seg="++T.unpack seg) $ return ()
          if T.null seg
           then do
            !_ <- mytrace "loop: T.null seg" $ return ()
            return []
           else do
                 (do
                     !_ <- mytrace "loop: <appending backslash>" $ return ()
                     AT.char '\\' *> AT.anyChar
                     >>= \ c -> ( ( seg `T.snoc` '\\' `T.snoc` c ) : ) <$> loop
                  )
             <|> (do
                     !_ <- mytrace ("loop: [seg]="++show [seg]) $ return ()
                     AT.char endchar
                     return [seg]  -- we know it was endchar
                  )
             <|> (do
                     !_ <- mytrace "\"parse_type_constraints: unexpected end of input\"" $ return ()
                     fail "parse_type_constraints: unexpected end of input"
                  )
    segs <- loop
    !_ <- mytrace ("segs="++show segs) $ return ()
    let seg = T.concat segs
    !_ <- mytrace ("seg="++T.unpack seg) $ return ()

    let eblocks = AT.parseOnly
                    ( AT.sepBy1'
                          (AT.takeWhile (/=';'))
                          (AT.char ';')
                    )
                    seg
    !_ <- mytrace ("eblocks="++show eblocks) $ return ()
    let blocks = case eblocks of
         Left msg -> error $ "parse_type_constraints: eblocks parse failure: " ++ msg
         Right blocks -> blocks
    !_ <- mytrace ("blocks="++show blocks) $ return ()
    return blocks
  {-# INLINE parse_type_constraints #-}

-------------------------------------------------------------------------------

  -- Try attoparsec.

-- XXX If you toggle this, don't forget to also toggle
-- the one in Compile_shared_utils2.hs (compilePat_).
#if 1

  compileUsingAttoparsec :: T.Text -> AT.Result [Pattern]
  compileUsingAttoparsec input
   = AT.feed (AT.parse parsePatsTop input) T.empty

#else

  compileUsingAttoparsec :: T.Text -> Either String [Pattern]
  compileUsingAttoparsec input
   = AT.parseOnly parsePatsTop input

#endif

-------------------------------------------------------------------------------

#else

  compilePat' :: String -> Pattern
#if 0
  compilePat' s = ...
#else
  err = intercalate "\n"
   [ "\n"
   , "Sorry, at this time (version 0.6.0.*) there is no non-attoparsec parser"
   , "for the new pattern grammar.  This also implies that HASKELL98_FRAGMENT"
   , "has no pattern DSL facilities (except for showPat), and it is necessary"
   , "to work with the PatNode constructors directly.  The situation should"
   , "be remedied by version 0.6.1."
   ]
  compilePat' _ = error err
#endif

#endif

-------------------------------------------------------------------------------