System/Console/MultiArg/Prim.hs

-- | Parser primitives. These are the only functions that have access
-- to the internals of the parser. Use these functions if you want to
-- build your own parser from scratch. If your needs are simpler, you
-- will want to look at "System.Console.MultiArg.SimpleParser" or
-- "System.Console.MultiArg.Combinator", which do a lot of grunt work
-- for you.
module System.Console.MultiArg.Prim (
    -- * Parser types
  Parser,
  
  -- * Running a parser
  
  -- | Each parser runner is applied to a list of Strings, which are the
  -- command line arguments to parse. 
  parse,
  
  -- * Higher-level parser combinators
  parserMap,
  good,
  apply,
  choice,
  combine,
  lookAhead,
  
  -- ** Running parsers multiple times
  several,
  manyTill,

  -- ** Failure and errors
  throw,
  throwString,
  genericThrow,
  (<??>),
  try,
  
  -- * Parsers
  -- ** Short options and arguments
  pendingShortOpt,
  nonPendingShortOpt,
  pendingShortOptArg,  
  
  -- ** Long options and arguments
  exactLongOpt,
  approxLongOpt,

  -- ** Stoppers
  stopper,
  resetStopper,
  
  -- ** Positional (non-option) arguments
  nextArg,
  nonOptionPosArg,
  
  -- ** Miscellaneous
  end,
  
  -- * Errors
  Message(Expected, StrMsg, Replaced, UnknownError),
  Error(Error),
  Location

  ) where


import System.Console.MultiArg.Option
  (ShortOpt,
    unShortOpt,
    LongOpt,
    unLongOpt, 
    makeLongOpt )
import Control.Applicative ( Applicative, Alternative )
import qualified Control.Applicative
import Control.Monad.Exception.Synchronous
  (Exceptional(Success, Exception))
import qualified Data.Set as Set
import Data.Set ( Set )
import Control.Monad ( when, MonadPlus(mzero, mplus), guard )
import Data.Monoid ( Monoid ( mempty, mappend ) )
import qualified Data.List as L
import Data.List (isPrefixOf)

type Location = String

-- | An Error contains a list of Messages and a String indicating
-- where the error happened.
data Error = Error [Message] Location deriving Show

-- | Extract a Location from a ParseSt for use in error messages.
location :: ParseSt -> Location
location st = pending ++ next ++ stop where
  pending
    | null (pendingShort st) = ""
    | otherwise = "short option or short option argument: "
                  ++ pendingShort st ++ " "
  next = case remaining st of
    [] -> "no words remaining"
    x:_ -> "next word: " ++ x
  stop = if sawStopper st then " (stopper already seen)" else ""

-- | Error messages.
data Message =
  Expected String
  -- ^ The parser expected to see one thing, but it actually saw
  -- something else. The string indicates what was expected.
  | StrMsg String
    -- ^ The 'fromString' function was applied.
    
  | Replaced String
    -- ^ A previous list of error messages was replaced with this error message.
    
  | UnknownError
    -- ^ Any other error; used by 'genericThrow'.

  deriving Show

-- | Carries the internal state of the parser. The counter is a simple
-- way to determine whether the remaining list one ParseSt has been
-- modified from another. When parsers modify remaining, they
-- increment the counter.
data ParseSt = ParseSt { pendingShort :: String
                       , remaining :: [String]
                       , sawStopper :: Bool
                       , counter :: Int
                       , errors :: [Message]
                       } deriving Show

-- | Load up the ParseSt with an initial user state and a list of
-- commmand line arguments.
defaultState :: [String] -> ParseSt
defaultState ts = ParseSt { pendingShort = ""
                          , remaining = ts
                          , sawStopper = False
                          , counter = 0
                          , errors = [] }

-- | Carries the result of each parse.
data Result a = Bad | Good a

-- | Parsers. Internally the parser tracks what input remains to be
-- parsed, whether there are any pending short options, and whether a
-- stopper has been seen. A parser can return a value of any type.
--
-- The parser also includes the notion of failure. Any parser can
-- fail; a failed parser affects the behavior of combinators such as
-- combine.
data Parser a =
  Parser { runParser :: ParseSt -> (Result a, ParseSt) }

instance Functor Parser where
  fmap = parserMap

instance Applicative Parser where
  pure = good
  (<*>) = apply

instance Monoid (Parser a) where
  mempty = genericThrow
  mappend = choice

instance Alternative Parser where
  empty = genericThrow
  (<|>) = choice
  many = several

instance Monad Parser where
  (>>=) = combine
  return = good
  fail = throwString

instance MonadPlus Parser where
  mzero = genericThrow
  mplus = choice


-- | Runs a parser. This is the only way to change a value of type
-- @Parser a@ into a value of type @a@ (that is, it is the only way to
-- \"get out of the Parser monad\" or to \"escape the Parser monad\".)
parse ::
  [String]
  -- ^ Command line arguments to parse. Presumably you got these from
  -- 'getArgs'. If there is any chance that you will be parsing
  -- Unicode strings, see the documentation in
  -- "System.Console.MultiArg.GetArgs" before you use
  -- 'System.Environment.getArgs'.
  
  -> Parser a
  -- ^ Parser to run
  
  -> Exceptional Error a
  -- ^ Success or failure. Any parser might fail; for example, the
  -- command line might not have any values left to parse. Use of the
  -- 'choice' combinator can lead to a list of failures. If multiple
  -- parsers are tried one after another using the 'choice' combinator,
  -- and each fails without consuming any input, then multiple Error
  -- will result, one for each failure.

parse ts p =
  let (result, st') = runParser p (defaultState ts)
  in case result of
    Good g -> Success g
    Bad ->
      let e = Error (errors st') (location st')
      in Exception e


-- | Combines two parsers into a single parser. The second parser can
-- optionally depend upon the result from the first parser.
--
-- This applies the first parser. If the first parser succeeds,
-- combine then takes the result from the first parser, applies the
-- function given to the result from the first parser, and then
-- applies the resulting parser.
--
-- If the first parser fails, combine will not apply the second
-- function but instead will bypass the second parser.
--
-- This provides the implementation for '>>=' in
-- 'Control.Monad.Monad'.
combine :: Parser a -> (a -> Parser b) -> Parser b
combine a k = Parser $ \s ->
  let (r, s') = runParser a s
  in case r of
    Bad -> (Bad, s')
    Good g -> runParser (k g) s'


-- | @lookAhead p@ runs parser p. If p succeeds, lookAhead p succeeds
-- without consuming any input. If p fails without consuming any
-- input, so does lookAhead. If p fails and consumes input, lookAhead
-- also fails and consumes input. If this is undesirable, combine with
-- "try".
lookAhead :: Parser a -> Parser a
lookAhead a = Parser $ \s ->
  let (r, s') = runParser a s
  in case r of
    Good g -> (Good g, s)
    Bad -> (Bad, s')


-- | @good a@ always succeeds without consuming any input and has
-- result a. This provides the implementation for
-- 'Control.Monad.Monad.return' and
-- 'Control.Applicative.Applicative.pure'.
good :: a -> Parser a
good a = Parser $ \s -> (Good a, s)


-- | @throwString s@ always fails without consuming any input. The
-- failure contains a record of the string passed in by s. This
-- provides the implementation for 'Control.Monad.Monad.fail'.
throwString :: String -> Parser a
throwString e = Parser $ \s ->
  let err = StrMsg e
      s' = s { errors = err : errors s }
  in (Bad, s')


-- | @parserMap f p@ applies function f to the result of parser
-- p. First parser p is run. If it succeeds, function f is applied to
-- the result and another parser is returned with the result. If it
-- fails, f is not applied and a failed parser is returned. This
-- provides the implementation for 'Prelude.Functor.fmap'.
parserMap :: (a -> b) -> Parser a -> Parser b
parserMap f l = Parser $ \s ->
  let (r, s') = runParser l s
  in case r of
    Good g -> (Good (f g), s')
    Bad -> (Bad, s')


-- | apply l r applies the function found in parser l to the result of
-- parser r. First the l parser is run. If it succeeds, it has a
-- resulting function. Then the r parser is run. If it succeeds, the
-- function from the l parser is applied to the result of the r
-- parser, and a new parser is returned with the result. If either
-- parser l or parser r fails, then a failed parser is returned. This
-- provides the implementation for '<*>' in
-- 'Control.Applicative.Applicative'.
apply :: Parser (a -> b) -> Parser a -> Parser b
apply fa a = Parser $ \s ->
  let (r, s') = runParser fa s
  in case r of
    Good g ->
      let (ra, sa) = runParser a s'
      in case ra of
        Good ga -> (Good (g ga), sa)
        Bad -> (Bad, sa)
    Bad -> (Bad, s')


-- | Fail with an unhelpful error message. Usually throw is more
-- useful, but this is handy to implement some typeclass instances.
genericThrow :: Parser a
genericThrow = throw UnknownError

-- | throw e always fails without consuming any input and returns a
-- failed parser with error state e.
throw :: Message -> Parser a
throw e = Parser $ \s ->
  (Bad, s { errors = e : errors s })

noConsumed :: ParseSt -> ParseSt -> Bool
noConsumed old new = counter old >= counter new

-- | Runs the first parser. If it fails without consuming any input,
-- then runs the second parser. If the first parser succeeds, then
-- returns the result of the first parser. If the first parser fails
-- and consumes input, then returns the result of the first
-- parser. This provides the implementation for
-- '<|>' in 'Control.Applicative.Alternative'.
choice :: Parser a -> Parser a -> Parser a
choice a b = Parser $ \sOld ->
  let (ra, sa) = runParser a sOld
  in case ra of
    Good g ->
      let sNew = sa { errors = [] }
      in (Good g, sNew)
    Bad ->
      if noConsumed sOld sa
      then let sNew = sOld { errors = errors sa }
               (rb, sb) = runParser b sNew
           in case rb of
             Good g' -> let sb' = sb { errors = [] }
                        in (Good g', sb')
             Bad -> (Bad, sb)
      else (Bad, sa)


-- | Runs the parser given. If it fails /without consuming any input/,
-- then applies the given function to the list of messages and replaces
-- the list of messages with the list returned by the
-- function. Otherwise, returns the result of the parser.
(<??>) :: Parser a -> ([Message] -> [Message]) -> Parser a
(<??>) l f = Parser $ \s ->
  let (r, s') = runParser l s
  in case r of
    Good g -> (Good g, s')
    Bad ->
      if noConsumed s s'
      then let s'' = s' { errors = f $ errors s' }
           in (Bad, s'')
      else (Bad, s')

infix 0 <??>

increment :: ParseSt -> ParseSt
increment old = old { counter = succ . counter $ old }

-- | Parses only pending short options. Fails without consuming any
-- input if there has already been a stopper or if there are no
-- pending short options. Fails without consuming any input if there
-- is a pending short option, but it does not match the short option
-- given. Succeeds and consumes a pending short option if it matches
-- the short option given.

pendingShortOpt :: ShortOpt -> Parser ()
pendingShortOpt so = Parser $ \s ->
  let err = Expected ("pending short option: " ++ [(unShortOpt so)])
      es = (Bad, s { errors = err : errors s })
      gd newSt = (Good (), newSt)
  in maybe es gd $ do
    when (sawStopper s) Nothing
    (first, rest) <- case pendingShort s of
      [] -> Nothing
      x:xs -> return (x, xs)
    when (unShortOpt so /= first) Nothing
    return (increment s { pendingShort = rest })

-- | Parses only non-pending short options. Fails without consuming
-- any input if, in order:
--
-- * there are pending short options
--
-- * there has already been a stopper
--
-- * there are no arguments left to parse
--
-- * the next argument is an empty string
--
-- * the next argument does not begin with a dash
--
-- * the next argument is a single dash
--
-- * the next argument is a short option but it does not match
--   the one given
--
-- * the next argument is a stopper
--
-- Otherwise, consumes the next argument, puts any remaining letters
-- from the argument into a pending short, and removes the first word
-- from remaining arguments to be parsed.
nonPendingShortOpt :: ShortOpt -> Parser ()
nonPendingShortOpt so = Parser $ \s ->
  let err = Expected (msg ++ [unShortOpt so])
      msg = "non pending short option: "
      errRet = (Bad, s { errors = err : errors s })
      gd n = (Good (), n)
  in maybe errRet gd $ do
    guard (noPendingShorts s)
    guard (noStopper s)
    (a, s') <- nextWord s
    (maybeDash, word) <- case a of
      [] -> Nothing
      x:xs -> return (x, xs)
    guard (maybeDash == '-')
    (letter, arg) <- case word of
      [] -> Nothing
      x:xs -> return (x, xs)
    guard (letter == unShortOpt so)
    let s'' = s' { pendingShort = arg }
    return s''

-- | Parses an exact long option. That is, the text of the
-- command-line option must exactly match the text of the
-- option. Returns the option, and any argument that is attached to
-- the same word of the option with an equal sign (for example,
-- @--follow=\/dev\/random@ will return @Just \"\/dev\/random\"@ for the
-- argument.) If there is no equal sign, returns Nothing for the
-- argument. If there is an equal sign but there is nothing after it,
-- returns @Just \"\"@ for the argument.
--
-- If you do not want your long option to have equal signs and
-- GNU-style option arguments, wrap this parser in something that will
-- fail if there is an option argument.
--
-- Fails without consuming any input if:
--
-- * there are pending short options
--
-- * a stopper has been parsed
--
-- * there are no arguments left on the command line
--
-- * the next argument on the command line does not begin with
--   two dashes
--
-- * the next argument on the command line is @--@ (a stopper)
--
-- * the next argument on the command line does begin with two
--   dashes but its text does not match the argument we're looking for
exactLongOpt :: LongOpt -> Parser (Maybe String)
exactLongOpt lo = Parser $ \s ->
  let ert = (Bad, err)
      err = s { errors = Expected msg : errors s } where
        msg = "long option: " ++ unLongOpt lo
      gd (g, n) = (Good g, n)
  in maybe ert gd $ do
    guard (noPendingShorts s)
    guard (noStopper s)
    (x, s') <- nextWord s
    (word, afterEq) <- getLongOption x
    guard (word == unLongOpt lo)
    return (afterEq, s')

-- | Takes a single String and returns a tuple, where the first element
-- is the first two letters, the second element is everything from the
-- third letter to the equal sign, and the third element is Nothing if
-- there is no equal sign, or Just String with everything after the
-- equal sign if there is one.
splitLongWord :: String -> (String, String, Maybe String)
splitLongWord t = (f, s, r) where
  (f, rest) = L.splitAt 2 t
  (s, withEq) = L.break (== '=') rest
  r = case withEq of
    [] -> Nothing
    _:xs -> Just xs


noPendingShorts :: ParseSt -> Bool
noPendingShorts st = case pendingShort st of
  [] -> True
  _ -> False

noStopper :: ParseSt -> Bool
noStopper = not . sawStopper

getLongOption :: String -> Maybe (String, Maybe String)
getLongOption str = do
  guard (str /= "--")
  let (pre, word, afterEq) = splitLongWord str
  guard (pre == "--")
  return (word, afterEq)
  

nextWord :: ParseSt -> Maybe (String, ParseSt)
nextWord st = case remaining st of
  [] -> Nothing
  x:xs ->
    let s' = increment st { remaining = xs }
    in return (x, s')

approxLongOptError ::
  Set LongOpt
  -> ParseSt
  -> ParseSt
approxLongOptError set st = st { errors = newE : errors st } where
  newE = Expected ex
  ex = "a long option: " ++ longs
  longs = concat . L.intersperse ", " $ opts
  opts = fmap unLongOpt . Set.toList $ set

-- | Examines the next word. If it matches a Text in the set
-- unambiguously, returns a tuple of the word actually found and the
-- matching word in the set and the accompanying text after the equal
-- sign (if any). If the Set is empty, this parser will always fail.
approxLongOpt ::
  Set LongOpt
  -> Parser (String, LongOpt, Maybe String)
approxLongOpt ts = Parser $ \s ->
  let err = (Bad, approxLongOptError ts s)
      gd (g, newSt) = (Good g, newSt)
  in maybe err gd $ do
    guard (noPendingShorts s)
    (x, s') <- nextWord s
    (word, afterEq) <- getLongOption x
    opt <- makeLongOpt word
    if Set.member opt ts
      then return ((word, opt, afterEq), s')
      else do
      let p t = word `isPrefixOf` (unLongOpt t)
          matches = Set.filter p ts
      case Set.toList matches of
        [] -> Nothing
        (m:[]) -> return ((word, m, afterEq), s')
        _ -> Nothing

-- | Parses only pending short option arguments. For example, for the
-- @tail@ command, if you enter the option @-c25@, then after parsing
-- the @-c@ option the @25@ becomes a pending short option argument
-- because it was in the same command line argument as the @-c@.
--
-- Fails without consuming any input if:
--
-- * a stopper has already been parsed
--
-- * there are no pending short option arguments
--
-- On success, returns the String of the pending short option argument
-- (this String will never be empty).
pendingShortOptArg :: Parser String
pendingShortOptArg = Parser $ \s ->
  let ert = (Bad, err)
      err = s { errors = Expected msg : errors s } where
        msg = "pending short option argument"
      gd (g, newSt) = (Good g, newSt)
  in maybe ert gd $ do
    guard (noStopper s)
    case pendingShort s of
      [] -> Nothing
      xs ->
        let newSt = increment s { pendingShort = "" }
        in return (xs, newSt)


-- | Parses a \"stopper\" - that is, a double dash. Changes the internal
-- state of the parser to reflect that a stopper has been seen.
stopper :: Parser ()
stopper = Parser $ \s ->
  let err = s { errors = Expected msg : errors s } where
        msg = "stopper"
      ert = (Bad, err)
      gd (g, newSt) = (Good g, newSt)
  in maybe ert gd $ do
    guard (noPendingShorts s)
    guard (noStopper s)
    (x, s') <- nextWord s
    guard (x == "--")
    let s'' = s' { sawStopper = True }
    return ((), s'')

-- | If a stopper has already been seen, change the internal state
-- back to indicating that no stopper has been seen.
resetStopper :: Parser ()
resetStopper = Parser $ \s ->
  let s' = s { sawStopper = False }
  in (Good (), s')

-- | try p behaves just like p, but if p fails, try p will not consume
-- any input.
try :: Parser a -> Parser a
try a = Parser $ \s ->
  let (r, s') = runParser a s
  in case r of
    Good g -> (Good g, s')
    Bad -> (Bad, s'') where
      s'' = s { errors = errors s' }
      

-- | Returns the next string on the command line as long as there are
-- no pendings. Be careful - this will return the next string even if
-- it looks like an option (that is, it starts with a dash.) Consider
-- whether you should be using nonOptionPosArg instead. However this
-- can be useful when parsing command line options after a stopper.
nextArg :: Parser String
nextArg = Parser $ \s ->
  let ert = (Bad, err)
      err = s { errors = Expected msg : errors s } where
        msg = "next argument"
      gd (g, newSt) = (Good g, newSt)
  in maybe ert gd $ do
    guard (noPendingShorts s)
    nextWord s


-- | If there are pending short options, fails without consuming any input.
--
-- Otherwise, if a stopper has NOT already been parsed, then returns
-- the next word if it is either a single dash or any other word that
-- does not begin with a dash. If the next word does not meet these
-- criteria, fails without consuming any input.
--
-- Otherwise, if a stopper has already been parsed, then returns the
-- next word, regardless of whether it begins with a dash or not.
nonOptionPosArg :: Parser String
nonOptionPosArg = Parser $ \s ->
  let ert = (Bad, err)
      err = s { errors = Expected msg : errors s } where
        msg = "non option positional argument"
      gd (g, newSt) = (Good g, newSt)
  in maybe ert gd $ do
    guard (noPendingShorts s)
    (x, s') <- nextWord s
    result <-
      if sawStopper s
      then return x
      else case x of
        [] -> return x
        '-':[] -> return "-"
        f:_ -> if f == '-'
               then Nothing
               else return x
    return (result, s')


-- | manyTill p e runs parser p repeatedly until parser e succeeds.
--
-- More precisely, first it runs parser e. If parser e succeeds, then
-- manyTill returns the result of all the preceding successful parses
-- of p. If parser e fails (it does not matter whether e consumed any
-- input or not), manyTill runs parser p again. What happens next
-- depends on whether p succeeded or failed. If p succeeded, then the
-- loop starts over by running parser e again. If p failed (it does
-- not matter whether it consumed any input or not), then manyTill
-- fails. The state of the parser is updated to reflect its state
-- after the failed run of p, and the parser is left in a failed
-- state.
--
-- Should parser e succeed (as it will on a successful application of
-- manyTill), then the parser state will reflect that parser e
-- succeeded--that is, if parser e consumes input, that input will be
-- consumed in the parser that is returned. Wrap e inside of
-- @lookAhead@ if that is undesirable.
--
-- Be particularly careful to get the order of the arguments
-- correct. Applying this function to reversed arguments will yield
-- bugs that are very difficult to diagnose.
manyTill :: Parser a -> Parser end -> Parser [a]
manyTill (Parser r) (Parser f) = Parser $ \s ->
  let Till g lS lF = parseTill s r f
  in if lF then (Bad, lS) else (Good g, lS)


data Till a =
  Till { _goods :: [a]
       , _lastSt :: ParseSt
       , _lastRunFailed :: Bool }

parseTill ::
  ParseSt
  -> (ParseSt -> (Result a, ParseSt))
  -> (ParseSt -> (Result b, ParseSt))
  -> Till a
parseTill s fr ff =
  case ff s of
    (Good _, s') -> Till [] s' False
    (Bad, _) ->
      case fr s of
        (Bad, s'') -> Till [] s'' True
        (Good g, s'') ->
          let Till gs lS lF = parseTill s'' fr ff
          in if counter s'' == counter s
             then parseTillErr
             else Till (g:gs) lS lF

parseTillErr :: a
parseTillErr =
  error "parseTill applied to parser that takes empty list"


-- | several p runs parser p zero or more times and returns all the
-- results. This proceeds like this: parser p is run and, if it
-- succeeds, the result is saved and parser p is run
-- again. Repeat. Eventually this will have to fail. If the last run
-- of parser p fails without consuming any input, then several p runs
-- successfully. The state of the parser is updated to reflect the
-- successful runs of p. If the last run of parser p fails but it
-- consumed input, then several p fails. The state of the parser is
-- updated to reflect the state up to and including the run that
-- partially consumed input. The parser is left in a failed state.
--
-- This semantic can come in handy. For example you might run a parser
-- multiple times that parses an option and arguments to the
-- option. If the arguments fail to parse, then several will fail.
--
-- This function provides the implementation for
-- 'Control.Applicative.Alternative.many'.
several :: Parser a -> Parser [a]
several (Parser l) = Parser $ \s ->
  let (result, finalGoodSt, finalBadSt) = parseRepeat s l
  in if noConsumed finalGoodSt finalBadSt
     then (Good result, finalGoodSt)
     else (Bad, finalBadSt)


parseRepeat ::
  ParseSt
  -> (ParseSt -> (Result a, ParseSt))
  -> ([a], ParseSt, ParseSt)
parseRepeat st1 f =
  case f st1 of
    (Good a, st') ->
      if noConsumed st1 st'
      then error $ "several applied to parser that succeeds without"
           ++ " consuming any input"
      else
        let (ls, finalGoodSt, finalBadSt) = parseRepeat st' f
        in (a : ls, finalGoodSt, finalBadSt)
    (Bad, st') -> ([], st1, st')
    

-- | Succeeds if there is no more input left.
end :: Parser ()
end = Parser $ \s ->
  let ert = (Bad, err)
      err = s { errors = Expected msg : errors s } where
        msg = "end of input"
      gd (g, newSt) = (Good g, newSt)
  in maybe ert gd $ do
    guard (noPendingShorts s)
    guard (null . remaining $ s)
    return ((), s)