{-
    Copyright 2012-2019 Vidar Holen

    This file is part of ShellCheck.
    https://www.shellcheck.net

    ShellCheck is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    ShellCheck is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <https://www.gnu.org/licenses/>.
-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE NoMonomorphismRestriction #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE MultiWayIf #-}
module ShellCheck.Parser (parseScript, runTests) where

import ShellCheck.AST
import ShellCheck.ASTLib hiding (runTests)
import ShellCheck.Data
import ShellCheck.Interface

import Control.Applicative ((<*), (*>))
import Control.Monad
import Control.Monad.Identity
import Control.Monad.Trans
import Data.Char
import Data.Functor
import Data.List (isPrefixOf, isInfixOf, isSuffixOf, partition, sortBy, intercalate, nub, find)
import Data.Maybe
import Data.Monoid
import Debug.Trace
import GHC.Exts (sortWith)
import Prelude hiding (readList)
import System.IO
import Text.Parsec hiding (runParser, (<?>))
import Text.Parsec.Error
import Text.Parsec.Pos
import qualified Control.Monad.Reader as Mr
import qualified Control.Monad.State as Ms
import qualified Data.Map as Map

import Test.QuickCheck.All (quickCheckAll)

type SCBase m = Mr.ReaderT (Environment m) (Ms.StateT SystemState m)
type SCParser m v = ParsecT String UserState (SCBase m) v

backslash :: Monad m => SCParser m Char
backslash = char '\\'
linefeed :: Monad m => SCParser m Char
linefeed = do
    optional carriageReturn
    c <- char '\n'
    readPendingHereDocs
    return c
singleQuote = char '\''
doubleQuote = char '"'
variableStart = upper <|> lower <|> oneOf "_"
variableChars = upper <|> lower <|> digit <|> oneOf "_"
-- Chars to allow in function names
functionChars = variableChars <|> oneOf ":+?-./^@"
-- Chars to allow in functions using the 'function' keyword
extendedFunctionChars = functionChars <|> oneOf "[]*=!"
specialVariable = oneOf (concat specialVariables)
paramSubSpecialChars = oneOf "/:+-=%"
quotableChars = "|&;<>()\\ '\t\n\r\xA0" ++ doubleQuotableChars
quotable = almostSpace <|> oneOf quotableChars
bracedQuotable = oneOf "}\"$`'"
doubleQuotableChars = "\\\"$`"
doubleQuotable = oneOf doubleQuotableChars
whitespace = oneOf " \t" <|> carriageReturn <|> almostSpace <|> linefeed
linewhitespace = oneOf " \t" <|> almostSpace

suspectCharAfterQuotes = variableChars <|> char '%'

extglobStartChars = "?*@!+"
extglobStart = oneOf extglobStartChars

unicodeDoubleQuotes = "\x201C\x201D\x2033\x2036"
unicodeSingleQuotes = "\x2018\x2019"

prop_spacing1 = isOk spacing "  \\\n # Comment"
prop_spacing2 = isOk spacing "# We can continue lines with \\"
prop_spacing3 = isWarning spacing "   \\\n #  --verbose=true \\"
spacing = do
    x <- many (many1 linewhitespace <|> continuation)
    optional readComment
    return $ concat x
  where
    continuation = do
        try (string "\\\n")
        -- The line was continued. Warn if this next line is a comment with a trailing \
        whitespace <- many linewhitespace
        optional $ do
            x <- readComment
            when ("\\" `isSuffixOf` x) $
                parseProblem ErrorC 1143 "This backslash is part of a comment and does not continue the line."
        return whitespace

spacing1 = do
    spacing <- spacing
    when (null spacing) $ fail "Expected whitespace"
    return spacing

prop_allspacing = isOk allspacing "#foo"
prop_allspacing2 = isOk allspacing " #foo\n # bar\n#baz\n"
prop_allspacing3 = isOk allspacing "#foo\n#bar\n#baz\n"
allspacing = do
    s <- spacing
    more <- option False (linefeed >> return True)
    if more then do
        rest <- allspacing
        return $ s ++ "\n" ++ rest
      else
        return s

allspacingOrFail = do
    s <- allspacing
    when (null s) $ fail "Expected whitespace"
    return s

readUnicodeQuote = do
    start <- startSpan
    c <- oneOf (unicodeSingleQuotes ++ unicodeDoubleQuotes)
    id <- endSpan start
    parseProblemAtId id WarningC 1110 "This is a unicode quote. Delete and retype it (or quote to make literal)."
    return $ T_Literal id [c]

carriageReturn = do
    pos <- getPosition
    char '\r'
    parseProblemAt pos ErrorC 1017 "Literal carriage return. Run script through tr -d '\\r' ."
    return '\r'

almostSpace =
    choice [
        check '\xA0' "unicode non-breaking space",
        check '\x200B' "unicode zerowidth space"
    ]
  where
    check c name = do
        parseNote ErrorC 1018 $ "This is a " ++ name ++ ". Delete and retype it."
        char c
        return ' '

--------- Message/position annotation on top of user state
data ParseNote = ParseNote SourcePos SourcePos Severity Code String deriving (Show, Eq)
data Context =
        ContextName SourcePos String
        | ContextAnnotation [Annotation]
        | ContextSource String
    deriving (Show)

data HereDocContext =
        HereDocPending Token [Context] -- on linefeed, read this T_HereDoc
    deriving (Show)

data UserState = UserState {
    lastId :: Id,
    positionMap :: Map.Map Id (SourcePos, SourcePos),
    parseNotes :: [ParseNote],
    hereDocMap :: Map.Map Id [Token],
    pendingHereDocs :: [HereDocContext]
}
initialUserState = UserState {
    lastId = Id $ -1,
    positionMap = Map.empty,
    parseNotes = [],
    hereDocMap = Map.empty,
    pendingHereDocs = []
}

codeForParseNote (ParseNote _ _ _ code _) = code

getLastId = lastId <$> getState

getNextIdBetween startPos endPos = do
    state <- getState
    let newId = incId (lastId state)
    let newMap = Map.insert newId (startPos, endPos) (positionMap state)
    putState $ state {
        lastId = newId,
        positionMap = newMap
    }
    return newId
  where incId (Id n) = Id $ n+1

getNextIdSpanningTokens startTok endTok = do
    (start, _) <- getSpanForId (getId startTok)
    (_, end)   <- getSpanForId (getId endTok)
    getNextIdBetween start end

-- Get an ID starting from the first token of the list, and ending after the last
getNextIdSpanningTokenList list =
    case list of
    [] -> do
        pos <- getPosition
        getNextIdBetween pos pos
    (h:_) ->
        getNextIdSpanningTokens h (last list)

-- Get the span covered by an id
getSpanForId :: Monad m => Id -> SCParser m (SourcePos, SourcePos)
getSpanForId id =
    Map.findWithDefault (error "Internal error: no position for id. Please report!") id <$>
        getMap

-- Create a new id with the same span as an existing one
getNewIdFor :: Monad m => Id -> SCParser m Id
getNewIdFor id = getSpanForId id >>= uncurry getNextIdBetween

data IncompleteInterval = IncompleteInterval SourcePos

startSpan = IncompleteInterval <$> getPosition

endSpan (IncompleteInterval start) = do
    endPos <- getPosition
    getNextIdBetween start endPos

getSpanPositionsFor m = do
    start <- getPosition
    m
    end <- getPosition
    return (start, end)

addToHereDocMap id list = do
    state <- getState
    let map = hereDocMap state
    putState $ state {
        hereDocMap = Map.insert id list map
    }

addPendingHereDoc t = do
    state <- getState
    context <- getCurrentContexts
    let docs = pendingHereDocs state
    putState $ state {
        pendingHereDocs = HereDocPending t context : docs
    }

popPendingHereDocs = do
    state <- getState
    let pending = pendingHereDocs state
    putState $ state {
        pendingHereDocs = []
    }
    return . reverse $ pendingHereDocs state

getMap = positionMap <$> getState
getParseNotes = parseNotes <$> getState

addParseNote n = do
    irrelevant <- shouldIgnoreCode (codeForParseNote n)
    unless irrelevant $ do
        state <- getState
        putState $ state {
            parseNotes = n : parseNotes state
        }

ignoreProblemsOf p = do
    systemState <- lift . lift $ Ms.get
    p <* (lift . lift . Ms.put $ systemState)

shouldIgnoreCode code = do
    context <- getCurrentContexts
    checkSourced <- Mr.asks checkSourced
    return $ any (contextItemDisablesCode checkSourced code) context

-- Does this item on the context stack disable warnings for 'code'?
contextItemDisablesCode :: Bool -> Integer -> Context -> Bool
contextItemDisablesCode alsoCheckSourced code = disabling alsoCheckSourced
  where
    disabling checkSourced item =
        case item of
            ContextAnnotation list -> any disabling' list
            ContextSource _ -> not $ checkSourced
            _ -> False
    disabling' (DisableComment n m) = code >= n && code < m
    disabling' _ = False


getCurrentAnnotations includeSource =
    concatMap get . takeWhile (not . isBoundary) <$> getCurrentContexts
  where
    get (ContextAnnotation list) = list
    get _ = []
    isBoundary (ContextSource _) = not includeSource
    isBoundary _ = False


shouldFollow file = do
    context <- getCurrentContexts
    if any isThisFile context
      then return False
      else
        if length (filter isSource context) >= 100
          then do
            parseProblem ErrorC 1092 "Stopping at 100 'source' frames :O"
            return False
          else
            return True
  where
    isSource (ContextSource _) = True
    isSource _ = False
    isThisFile (ContextSource name) | name == file = True
    isThisFile _= False

getSourceOverride = do
    context <- getCurrentContexts
    return . msum . map findFile $ takeWhile isSameFile context
  where
    isSameFile (ContextSource _) = False
    isSameFile _ = True

    findFile (ContextAnnotation list) = msum $ map getFile list
    findFile _ = Nothing
    getFile (SourceOverride str) = Just str
    getFile _ = Nothing

-- Store potential parse problems outside of parsec

data SystemState = SystemState {
    contextStack :: [Context],
    parseProblems :: [ParseNote]
}
initialSystemState = SystemState {
    contextStack = [],
    parseProblems = []
}

data Environment m = Environment {
    systemInterface :: SystemInterface m,
    checkSourced :: Bool,
    ignoreRC :: Bool,
    currentFilename :: String,
    shellTypeOverride :: Maybe Shell
}

parseProblem level code msg = do
    pos <- getPosition
    parseProblemAt pos level code msg

setCurrentContexts c = Ms.modify (\state -> state { contextStack = c })
getCurrentContexts = Ms.gets contextStack

popContext = do
    v <- getCurrentContexts
    case v of
        (a:r) -> do
            setCurrentContexts r
            return $ Just a
        [] ->
            return Nothing

pushContext c = do
    v <- getCurrentContexts
    setCurrentContexts (c:v)

parseProblemAtWithEnd start end level code msg = do
    irrelevant <- shouldIgnoreCode code
    unless irrelevant $
        addParseProblem note
  where
    note = ParseNote start end level code msg

addParseProblem note =
    Ms.modify (\state -> state {
        parseProblems = note:parseProblems state
    })

parseProblemAt pos = parseProblemAtWithEnd pos pos

parseProblemAtId :: Monad m => Id -> Severity -> Integer -> String -> SCParser m ()
parseProblemAtId id level code msg = do
    (start, end) <- getSpanForId id
    parseProblemAtWithEnd start end level code msg

-- Store non-parse problems inside

parseNote c l a = do
    pos <- getPosition
    parseNoteAt pos c l a

parseNoteAt pos c l a = addParseNote $ ParseNote pos pos c l a
parseNoteAtId id c l a = do
    (start, end) <- getSpanForId id
    addParseNote $ ParseNote start end c l a

parseNoteAtWithEnd start end c l a = addParseNote $ ParseNote start end c l a

--------- Convenient combinators
thenSkip main follow = main <* optional follow

unexpecting s p = try $
    (try p >> fail ("Unexpected " ++ s)) <|> return ()

notFollowedBy2 = unexpecting ""

isFollowedBy p = (lookAhead . try $ p $> True) <|> return False

reluctantlyTill p end =
    (lookAhead (void (try end) <|> eof) >> return []) <|> do
        x <- p
        more <- reluctantlyTill p end
        return $ x:more
      <|> return []

reluctantlyTill1 p end = do
    notFollowedBy2 end
    x <- p
    more <- reluctantlyTill p end
    return $ x:more

attempting rest branch =
    (try branch >> rest) <|> rest

orFail parser errorAction =
    try parser <|> (errorAction >>= fail)

-- Construct a node with a parser, e.g. T_Literal `withParser` (readGenericLiteral ",")
withParser node parser = do
    start <- startSpan
    contents <- parser
    id <- endSpan start
    return $ node id contents

wasIncluded p = option False (p >> return True)

acceptButWarn parser level code note =
    optional $ try (do
        pos <- getPosition
        parser
        parseProblemAt pos level code note
      )

parsecBracket before after op = do
    val <- before
    op val `thenSkip` after val <|> (after val *> fail "")

swapContext contexts p =
    parsecBracket (getCurrentContexts <* setCurrentContexts contexts)
                  setCurrentContexts
                  (const p)

withContext entry p = parsecBracket (pushContext entry) (const popContext) (const p)

called s p = do
    pos <- getPosition
    withContext (ContextName pos s) p

withAnnotations anns =
    withContext (ContextAnnotation anns)

readConditionContents single =
    readCondContents `attempting` lookAhead (do
                                pos <- getPosition
                                s <- readVariableName
                                spacing1
                                when (s `elem` commonCommands) $
                                    parseProblemAt pos WarningC 1014 "Use 'if cmd; then ..' to check exit code, or 'if [[ $(cmd) == .. ]]' to check output.")

  where
    spacingOrLf = condSpacing True
    condSpacing required = do
        pos <- getPosition
        space <- allspacing
        when (required && null space) $
            parseProblemAt pos ErrorC 1035 "You are missing a required space here."
        when (single && '\n' `elem` space) $
            parseProblemAt pos ErrorC 1080 "When breaking lines in [ ], you need \\ before the linefeed."
        return space

    typ = if single then SingleBracket else DoubleBracket
    readCondBinaryOp = try $ do
        optional guardArithmetic
        op <- getOp
        spacingOrLf
        return op
      where
        flaglessOps = [ "==", "!=", "<=", ">=", "=~", ">", "<", "=" ]

        getOp = do
            start <- startSpan
            op <- readRegularOrEscaped anyOp
            id <- endSpan start
            return $ TC_Binary id typ op

        anyOp = flagOp <|> flaglessOp <|> fail
                    "Expected comparison operator (don't wrap commands in []/[[]])"
        flagOp = try $ do
            s <- readOp
            when (s == "-a" || s == "-o") $ fail "Unexpected operator"
            return s
        flaglessOp =
            choice $ map (try . string) flaglessOps

        -- hacks to read quoted operators without having to read a shell word
    readEscaped p = try $ withEscape <|> withQuotes
      where
        withEscape = do
            char '\\'
            escaped <$> p
        withQuotes = do
            c <- oneOf "'\""
            s <- p
            char c
            return $ escaped s
        escaped s = if any (`elem` s) "<>()" then '\\':s else s

    readRegularOrEscaped p = readEscaped p <|> p


    guardArithmetic = do
        try . lookAhead $ void (oneOf "+*/%") <|> void (string "- ")
        parseProblem ErrorC 1076 $
            if single
            then "Trying to do math? Use e.g. [ $((i/2+7)) -ge 18 ]."
            else "Trying to do math? Use e.g. [[ $((i/2+7)) -ge 18 ]]."

    readCondUnaryExp = do
      op <- readCondUnaryOp
      pos <- getPosition
      liftM op readCondWord `orFail` do
          parseProblemAt pos ErrorC 1019 "Expected this to be an argument to the unary condition."
          return "Expected an argument for the unary operator"

    readCondUnaryOp = try $ do
        start <- startSpan
        s <- readOp
        id <- endSpan start
        spacingOrLf
        return $ TC_Unary id typ s

    readOp = try $ do
        char '-' <|> weirdDash
        s <- many1 letter <|> fail "Expected a test operator"
        return ('-':s)

    weirdDash = do
        pos <- getPosition
        oneOf "\x058A\x05BE\x2010\x2011\x2012\x2013\x2014\x2015\xFE63\xFF0D"
        parseProblemAt pos ErrorC 1100
            "This is a unicode dash. Delete and retype as ASCII minus."
        return '-'

    readCondWord = do
        notFollowedBy2 (try (spacing >> string "]"))
        x <- readNormalWord
        pos <- getPosition
        when (endedWith "]" x && notArrayIndex x) $ do
            parseProblemAt pos ErrorC 1020 $
                "You need a space before the " ++ (if single then "]" else "]]") ++ "."
            fail "Missing space before ]"
        when (single && endedWith ")" x) $ do
            parseProblemAt pos ErrorC 1021
                "You need a space before the \\)"
            fail "Missing space before )"
        void spacing
        return x
      where endedWith str (T_NormalWord id s@(_:_)) =
                case last s of T_Literal id s -> str `isSuffixOf` s
                               _ -> False
            endedWith _ _ = False
            notArrayIndex (T_NormalWord id s@(_:T_Literal _ t:_)) = t /= "["
            notArrayIndex _ = True

    readCondAndOp = readAndOrOp TC_And "&&" False <|> readAndOrOp TC_And "-a" True

    readCondOrOp = do
        optional guardArithmetic
        readAndOrOp TC_Or "||" False <|> readAndOrOp TC_Or "-o" True

    readAndOrOp node op requiresSpacing = do
        optional $ lookAhead weirdDash
        start <- startSpan
        x <- try $ string op
        id <- endSpan start
        condSpacing requiresSpacing
        return $ node id typ x

    readCondNullaryOrBinary = do
      start <- startSpan
      x <- readCondWord `attempting` (do
              pos <- getPosition
              lookAhead (char '[')
              parseProblemAt pos ErrorC 1026 $ if single
                  then "If grouping expressions inside [..], use \\( ..\\)."
                  else "If grouping expressions inside [[..]], use ( .. )."
            )
      id <- endSpan start
      (do
            pos <- getPosition
            isRegex <- regexOperatorAhead
            op <- readCondBinaryOp
            y <- if isRegex
                    then readRegex
                    else  readCondWord <|> (parseProblemAt pos ErrorC 1027 "Expected another argument for this operator." >> mzero)
            return (x `op` y)
          ) <|> ( do
            checkTrailingOp x
            return $ TC_Nullary id typ x
          )

    checkTrailingOp x = sequence_ $ do
        (T_Literal id str) <- getTrailingUnquotedLiteral x
        trailingOp <- find (`isSuffixOf` str) binaryTestOps
        return $ parseProblemAtId id ErrorC 1108 $
            "You need a space before and after the " ++ trailingOp ++ " ."

    readCondGroup = do
        start <- startSpan
        pos <- getPosition
        lparen <- try $ readRegularOrEscaped (string "(")
        when (single && lparen == "(") $
            singleWarning pos
        when (not single && lparen == "\\(") $
            doubleWarning pos
        condSpacing single
        x <- readCondContents
        cpos <- getPosition
        rparen <- readRegularOrEscaped (string ")")
        id <- endSpan start
        condSpacing single
        when (single && rparen == ")") $
            singleWarning cpos
        when (not single && rparen == "\\)") $
            doubleWarning cpos
        return $ TC_Group id typ x

      where
        singleWarning pos =
            parseProblemAt pos ErrorC 1028 "In [..] you have to escape \\( \\) or preferably combine [..] expressions."
        doubleWarning pos =
            parseProblemAt pos ErrorC 1029 "In [[..]] you shouldn't escape ( or )."


    -- Currently a bit of a hack since parsing rules are obscure
    regexOperatorAhead = lookAhead (do
        try (string "=~") <|> try (string "~=")
        return True)
          <|> return False
    readRegex = called "regex" $ do
        start <- startSpan
        parts <- many1 readPart
        id <- endSpan start
        void spacing
        return $ T_NormalWord id parts
      where
        readPart = choice [
            readGroup,
            readSingleQuoted,
            readDoubleQuoted,
            readDollarExpression,
            readLiteralForParser $ readNormalLiteral "( ",
            readLiteralString "|",
            readGlobLiteral
            ]
        readGlobLiteral = do
            start <- startSpan
            s <- extglobStart <|> oneOf "{}[]$"
            id <- endSpan start
            return $ T_Literal id [s]
        readGroup = called "regex grouping" $ do
            start <- startSpan
            p1 <- readLiteralString "("
            parts <- many (readPart <|> readRegexLiteral)
            p2 <- readLiteralString ")"
            id <- endSpan start
            return $ T_NormalWord id (p1:(parts ++ [p2]))
        readRegexLiteral = do
            start <- startSpan
            str <- readGenericLiteral1 (singleQuote <|> doubleQuotable <|> oneOf "()")
            id <- endSpan start
            return $ T_Literal id str
        readLiteralString s = do
            start <- startSpan
            str <- string s
            id <- endSpan start
            return $ T_Literal id str

    readCondTerm = do
        term <- readCondNot <|> readCondExpr
        condSpacing False
        return term

    readCondNot = do
        start <- startSpan
        char '!'
        id <- endSpan start
        spacingOrLf
        expr <- readCondExpr
        return $ TC_Unary id typ "!" expr

    readCondExpr =
      readCondGroup <|> readCondUnaryExp <|> readCondNullaryOrBinary

    readCondOr = chainl1 readCondAnd readCondAndOp
    readCondAnd = chainl1 readCondTerm readCondOrOp
    readCondContents = readCondOr


prop_a1 = isOk readArithmeticContents " n++ + ++c"
prop_a2 = isOk readArithmeticContents "$N*4-(3,2)"
prop_a3 = isOk readArithmeticContents "n|=2<<1"
prop_a4 = isOk readArithmeticContents "n &= 2 **3"
prop_a5 = isOk readArithmeticContents "1 |= 4 && n >>= 4"
prop_a6 = isOk readArithmeticContents " 1 | 2 ||3|4"
prop_a7 = isOk readArithmeticContents "3*2**10"
prop_a8 = isOk readArithmeticContents "3"
prop_a9 = isOk readArithmeticContents "a^!-b"
prop_a10= isOk readArithmeticContents "! $?"
prop_a11= isOk readArithmeticContents "10#08 * 16#f"
prop_a12= isOk readArithmeticContents "\"$((3+2))\" + '37'"
prop_a13= isOk readArithmeticContents "foo[9*y+x]++"
prop_a14= isOk readArithmeticContents "1+`echo 2`"
prop_a15= isOk readArithmeticContents "foo[`echo foo | sed s/foo/4/g` * 3] + 4"
prop_a16= isOk readArithmeticContents "$foo$bar"
prop_a17= isOk readArithmeticContents "i<(0+(1+1))"
prop_a18= isOk readArithmeticContents "a?b:c"
prop_a19= isOk readArithmeticContents "\\\n3 +\\\n  2"
prop_a20= isOk readArithmeticContents "a ? b ? c : d : e"
prop_a21= isOk readArithmeticContents "a ? b : c ? d : e"
prop_a22= isOk readArithmeticContents "!!a"
prop_a23= isOk readArithmeticContents "~0"
readArithmeticContents :: Monad m => SCParser m Token
readArithmeticContents =
    readSequence
  where
    spacing =
        let lf = try (string "\\\n") >> return '\n'
        in many (whitespace <|> lf)

    splitBy x ops = chainl1 x (readBinary ops)
    readBinary ops = readComboOp ops TA_Binary
    readComboOp op token = do
        start <- startSpan
        op <- choice (map (\x -> try $ do
                                        s <- string x
                                        failIfIncompleteOp
                                        return s
                            ) op)
        id <- endSpan start
        spacing
        return $ token id op

    failIfIncompleteOp = notFollowedBy2 $ oneOf "&|<>="

    -- Read binary minus, but also check for -lt, -gt and friends:
    readMinusOp = do
        start <- startSpan
        pos <- getPosition
        try $ do
            char '-'
            failIfIncompleteOp
        optional $ do
            (str, alt) <- lookAhead . choice $ map tryOp [
                ("lt", "<"),
                ("gt", ">"),
                ("le", "<="),
                ("ge", ">="),
                ("eq", "=="),
                ("ne", "!=")
              ]
            parseProblemAt pos ErrorC 1106 $ "In arithmetic contexts, use " ++ alt ++ " instead of -" ++ str
        id <- endSpan start
        spacing
        return $ TA_Binary id "-"
      where
        tryOp (str, alt) = try $ do
            string str
            spacing1
            return (str, alt)

    readArrayIndex = do
        start <- startSpan
        char '['
        pos <- getPosition
        middle <- readStringForParser readArithmeticContents
        char ']'
        id <- endSpan start
        return $ T_UnparsedIndex id pos middle

    literal s = do
        start <- startSpan
        string s
        id <- endSpan start
        return $ T_Literal id s

    readVariable = do
        start <- startSpan
        name <- readVariableName
        indices <- many readArrayIndex
        id <- endSpan start
        spacing
        return $ TA_Variable id name indices

    readExpansion = do
        start <- startSpan
        pieces <- many1 $ choice [
            readSingleQuoted,
            readDoubleQuoted,
            readNormalDollar,
            readBraced,
            readUnquotedBackTicked,
            literal "#",
            readNormalLiteral "+-*/=%^,]?:"
            ]
        id <- endSpan start
        spacing
        return $ TA_Expansion id pieces

    readGroup = do
        char '('
        s <- readSequence
        char ')'
        spacing
        return s

    readArithTerm = readGroup <|> readVariable <|> readExpansion

    readSequence = do
        spacing
        start <- startSpan
        l <- readAssignment `sepBy` (char ',' >> spacing)
        id <- endSpan start
        return $ TA_Sequence id l

    readAssignment = chainr1 readTrinary readAssignmentOp
    readAssignmentOp = readComboOp ["=", "*=", "/=", "%=", "+=", "-=", "<<=", ">>=", "&=", "^=", "|="] TA_Assignment

    readTrinary = do
        x <- readLogicalOr
        do
            start <- startSpan
            string "?"
            spacing
            y <- readTrinary
            string ":"
            spacing
            z <- readTrinary
            id <- endSpan start
            return $ TA_Trinary id x y z
         <|>
          return x

    readLogicalOr  = readLogicalAnd `splitBy` ["||"]
    readLogicalAnd = readBitOr `splitBy` ["&&"]
    readBitOr  = readBitXor `splitBy` ["|"]
    readBitXor = readBitAnd `splitBy` ["^"]
    readBitAnd = readEquated `splitBy` ["&"]
    readEquated = readCompared `splitBy` ["==", "!="]
    readCompared = readShift `splitBy` ["<=", ">=", "<", ">"]
    readShift = readAddition `splitBy` ["<<", ">>"]
    readAddition = chainl1 readMultiplication (readBinary ["+"] <|> readMinusOp)
    readMultiplication = readExponential `splitBy` ["*", "/", "%"]
    readExponential = readAnyNegated `splitBy` ["**"]

    readAnyNegated = readNegated <|> readAnySigned
    readNegated = do
        start <- startSpan
        op <- oneOf "!~"
        id <- endSpan start
        spacing
        x <- readAnyNegated
        return $ TA_Unary id [op] x

    readAnySigned = readSigned <|> readAnycremented
    readSigned = do
        start <- startSpan
        op <- choice (map readSignOp "+-")
        id <- endSpan start
        spacing
        x <- readAnycremented
        return $ TA_Unary id [op] x
     where
        readSignOp c = try $ do
            char c
            notFollowedBy2 $ char c
            spacing
            return c

    readAnycremented = readNormalOrPostfixIncremented <|> readPrefixIncremented
    readPrefixIncremented = do
        start <- startSpan
        op <- try $ string "++" <|> string "--"
        id <- endSpan start
        spacing
        x <- readArithTerm
        return $ TA_Unary id (op ++ "|") x

    readNormalOrPostfixIncremented = do
        x <- readArithTerm
        spacing
        do
            start <- startSpan
            op <- try $ string "++" <|> string "--"
            id <- endSpan start
            spacing
            return $ TA_Unary id ('|':op) x
         <|>
            return x


prop_readCondition   = isOk readCondition "[ \\( a = b \\) -a \\( c = d \\) ]"
prop_readCondition2  = isOk readCondition "[[ (a = b) || (c = d) ]]"
prop_readCondition3  = isOk readCondition "[[ $c = [[:alpha:].~-] ]]"
prop_readCondition4  = isOk readCondition "[[ $c =~ *foo* ]]"
prop_readCondition5  = isOk readCondition "[[ $c =~ f( ]] )* ]]"
prop_readCondition5a = isOk readCondition "[[ $c =~ a(b) ]]"
prop_readCondition5b = isOk readCondition "[[ $c =~ f( ($var ]]) )* ]]"
prop_readCondition6  = isOk readCondition "[[ $c =~ ^[yY]$ ]]"
prop_readCondition7  = isOk readCondition "[[ ${line} =~ ^[[:space:]]*# ]]"
prop_readCondition8  = isOk readCondition "[[ $l =~ ogg|flac ]]"
prop_readCondition9  = isOk readCondition "[ foo -a -f bar ]"
prop_readCondition10 = isOk readCondition "[[\na == b\n||\nc == d ]]"
prop_readCondition10a= isOk readCondition "[[\na == b  ||\nc == d ]]"
prop_readCondition10b= isOk readCondition "[[ a == b\n||\nc == d ]]"
prop_readCondition11 = isOk readCondition "[[ a == b ||\n c == d ]]"
prop_readCondition12 = isWarning readCondition "[ a == b \n -o c == d ]"
prop_readCondition13 = isOk readCondition "[[ foo =~ ^fo{1,3}$ ]]"
prop_readCondition14 = isOk readCondition "[ foo '>' bar ]"
prop_readCondition15 = isOk readCondition "[ foo \">=\" bar ]"
prop_readCondition16 = isOk readCondition "[ foo \\< bar ]"
prop_readCondition17 = isOk readCondition "[[ ${file::1} = [-.\\|/\\\\] ]]"
prop_readCondition18 = isOk readCondition "[ ]"
prop_readCondition19 = isOk readCondition "[ '(' x \")\" ]"
prop_readCondition20 = isOk readCondition "[[ echo_rc -eq 0 ]]"
prop_readCondition21 = isOk readCondition "[[ $1 =~ ^(a\\ b)$ ]]"
prop_readCondition22 = isOk readCondition "[[ $1 =~ \\.a\\.(\\.b\\.)\\.c\\. ]]"
prop_readCondition23 = isOk readCondition "[[ -v arr[$var] ]]"
prop_readCondition25 = isOk readCondition "[[ lex.yy.c -ot program.l ]]"
prop_readCondition26 = isOk readScript "[[ foo ]]\\\n && bar"
prop_readCondition27 = not $ isOk readConditionCommand "[[ x ]] foo"
readCondition = called "test expression" $ do
    opos <- getPosition
    start <- startSpan
    open <- try (string "[[") <|> string "["
    let single = open == "["
    let typ = if single then SingleBracket else DoubleBracket

    pos <- getPosition
    space <- allspacing
    when (null space) $
        parseProblemAtWithEnd opos pos ErrorC 1035 $ "You need a space after the " ++
            if single
                then "[ and before the ]."
                else "[[ and before the ]]."
    when (single && '\n' `elem` space) $
        parseProblemAt pos ErrorC 1080 "You need \\ before line feeds to break lines in [ ]."

    condition <- readConditionContents single <|> do
        guard . not . null $ space
        lookAhead $ string "]"
        id <- endSpan start
        return $ TC_Empty id typ

    cpos <- getPosition
    close <- try (string "]]") <|> string "]" <|> fail "Expected test to end here (don't wrap commands in []/[[]])"
    id <- endSpan start
    when (open == "[[" && close /= "]]") $ parseProblemAt cpos ErrorC 1033 "Test expression was opened with double [[ but closed with single ]. Make sure they match."
    when (open == "[" && close /= "]" ) $ parseProblemAt opos ErrorC 1034 "Test expression was opened with single [ but closed with double ]]. Make sure they match."
    spacing
    return $ T_Condition id typ condition

readAnnotationPrefix = do
    char '#'
    many linewhitespace
    string "shellcheck"

prop_readAnnotation1 = isOk readAnnotation "# shellcheck disable=1234,5678\n"
prop_readAnnotation2 = isOk readAnnotation "# shellcheck disable=SC1234 disable=SC5678\n"
prop_readAnnotation3 = isOk readAnnotation "# shellcheck disable=SC1234 source=/dev/null disable=SC5678\n"
prop_readAnnotation4 = isWarning readAnnotation "# shellcheck cats=dogs disable=SC1234\n"
prop_readAnnotation5 = isOk readAnnotation "# shellcheck disable=SC2002 # All cats are precious\n"
prop_readAnnotation6 = isOk readAnnotation "# shellcheck disable=SC1234 # shellcheck foo=bar\n"
prop_readAnnotation7 = isOk readAnnotation "# shellcheck disable=SC1000,SC2000-SC3000,SC1001\n"
readAnnotation = called "shellcheck directive" $ do
    try readAnnotationPrefix
    many1 linewhitespace
    readAnnotationWithoutPrefix

readAnnotationWithoutPrefix = do
    values <- many1 readKey
    optional readAnyComment
    void linefeed <|> eof <|> do
        parseNote ErrorC 1125 "Invalid key=value pair? Ignoring the rest of this directive starting here."
        many (noneOf "\n")
        void linefeed <|> eof
    many linewhitespace
    return $ concat values
  where
    readKey = do
        keyPos <- getPosition
        key <- many1 (letter <|> char '-')
        char '=' <|> fail "Expected '=' after directive key"
        annotations <- case key of
            "disable" -> readRange `sepBy` char ','
              where
                readRange = do
                    from <- readCode
                    to <- choice [ char '-' *> readCode, return $ from+1 ]
                    return $ DisableComment from to
                readCode = do
                    optional $ string "SC"
                    int <- many1 digit
                    return $ read int

            "enable" -> readName `sepBy` char ','
              where
                readName = EnableComment <$> many1 (letter <|> char '-')

            "source" -> do
                filename <- many1 $ noneOf " \n"
                return [SourceOverride filename]

            "source-path" -> do
                dirname <- many1 $ noneOf " \n"
                return [SourcePath dirname]

            "shell" -> do
                pos <- getPosition
                shell <- many1 $ noneOf " \n"
                when (isNothing $ shellForExecutable shell) $
                    parseNoteAt pos ErrorC 1103
                        "This shell type is unknown. Use e.g. sh or bash."
                return [ShellOverride shell]

            _ -> do
                parseNoteAt keyPos WarningC 1107 "This directive is unknown. It will be ignored."
                anyChar `reluctantlyTill` whitespace
                return []

        many linewhitespace
        return annotations

readAnnotations = do
    annotations <- many (readAnnotation `thenSkip` allspacing)
    return $ concat annotations

readComment = do
    unexpecting "shellcheck annotation" readAnnotationPrefix
    readAnyComment

prop_readAnyComment = isOk readAnyComment "# Comment"
readAnyComment = do
    char '#'
    many $ noneOf "\r\n"

prop_readNormalWord = isOk readNormalWord "'foo'\"bar\"{1..3}baz$(lol)"
prop_readNormalWord2 = isOk readNormalWord "foo**(foo)!!!(@@(bar))"
prop_readNormalWord3 = isOk readNormalWord "foo#"
prop_readNormalWord4 = isOk readNormalWord "$\"foo\"$'foo\nbar'"
prop_readNormalWord5 = isWarning readNormalWord "${foo}}"
prop_readNormalWord6 = isOk readNormalWord "foo/{}"
prop_readNormalWord7 = isOk readNormalWord "foo\\\nbar"
prop_readNormalWord8 = isWarning readSubshell "(foo\\ \nbar)"
prop_readNormalWord9 = isOk readSubshell "(foo\\ ;\nbar)"
prop_readNormalWord10 = isWarning readNormalWord "\x201Chello\x201D"
prop_readNormalWord11 = isWarning readNormalWord "\x2018hello\x2019"
prop_readNormalWord12 = isWarning readNormalWord "hello\x2018"
readNormalWord = readNormalishWord "" ["do", "done", "then", "fi", "esac"]

readPatternWord = readNormalishWord "" ["esac"]

readNormalishWord end terms = do
    start <- startSpan
    pos <- getPosition
    x <- many1 (readNormalWordPart end)
    id <- endSpan start
    checkPossibleTermination pos x terms
    return $ T_NormalWord id x

readIndexSpan = do
    start <- startSpan
    x <- many (readNormalWordPart "]" <|> someSpace <|> otherLiteral)
    id <- endSpan start
    return $ T_NormalWord id x
  where
    someSpace = do
        start <- startSpan
        str <- spacing1
        id <- endSpan start
        return $ T_Literal id str
    otherLiteral = do
        start <- startSpan
        str <- many1 $ oneOf quotableChars
        id <- endSpan start
        return $ T_Literal id str

checkPossibleTermination pos [T_Literal _ x] terminators =
    when (x `elem` terminators) $
        parseProblemAt pos WarningC 1010 $ "Use semicolon or linefeed before '" ++ x ++ "' (or quote to make it literal)."
checkPossibleTermination _ _ _ = return ()

readNormalWordPart end = do
    notFollowedBy2 $ oneOf end
    checkForParenthesis
    choice [
        readSingleQuoted,
        readDoubleQuoted,
        readGlob,
        readNormalDollar,
        readBraced,
        readUnquotedBackTicked,
        readProcSub,
        readUnicodeQuote,
        readNormalLiteral end,
        readLiteralCurlyBraces
      ]
  where
    checkForParenthesis =
        return () `attempting` do
            pos <- getPosition
            lookAhead $ char '('
            parseProblemAt pos ErrorC 1036 "'(' is invalid here. Did you forget to escape it?"

    readLiteralCurlyBraces = do
        start <- startSpan
        str <- findParam <|> literalBraces
        id <- endSpan start
        return $ T_Literal id str

    findParam = try $ string "{}"
    literalBraces = do
        pos <- getPosition
        c <- oneOf "{}"
        parseProblemAt pos WarningC 1083 $
            "This " ++ [c] ++ " is literal. Check expression (missing ;/\\n?) or quote it."
        return [c]


readSpacePart = do
    start <- startSpan
    x <- many1 whitespace
    id <- endSpan start
    return $ T_Literal id x

readDollarBracedWord = do
    start <- startSpan
    list <- many readDollarBracedPart
    id <- endSpan start
    return $ T_NormalWord id list

readDollarBracedPart = readSingleQuoted <|> readDoubleQuoted <|>
                       readParamSubSpecialChar <|> readExtglob <|> readNormalDollar <|>
                       readUnquotedBackTicked <|> readDollarBracedLiteral

readDollarBracedLiteral = do
    start <- startSpan
    vars <- (readBraceEscaped <|> (anyChar >>= \x -> return [x])) `reluctantlyTill1` bracedQuotable
    id <- endSpan start
    return $ T_Literal id $ concat vars

readParamSubSpecialChar = do
    start <- startSpan
    x <- many1 paramSubSpecialChars
    id <- endSpan start
    return $ T_ParamSubSpecialChar id x

prop_readProcSub1 = isOk readProcSub "<(echo test | wc -l)"
prop_readProcSub2 = isOk readProcSub "<(  if true; then true; fi )"
prop_readProcSub3 = isOk readProcSub "<( # nothing here \n)"
readProcSub = called "process substitution" $ do
    start <- startSpan
    dir <- try $ do
                    x <- oneOf "<>"
                    char '('
                    return [x]
    list <- readCompoundListOrEmpty
    allspacing
    char ')'
    id <- endSpan start
    return $ T_ProcSub id dir list

prop_readSingleQuoted = isOk readSingleQuoted "'foo bar'"
prop_readSingleQuoted2 = isWarning readSingleQuoted "'foo bar\\'"
prop_readSingleQuoted4 = isWarning readNormalWord "'it's"
prop_readSingleQuoted5 = isWarning readSimpleCommand "foo='bar\ncow 'arg"
prop_readSingleQuoted6 = isOk readSimpleCommand "foo='bar cow 'arg"
prop_readSingleQuoted7 = isOk readSingleQuoted "'foo\x201C\&bar'"
prop_readSingleQuoted8 = isWarning readSingleQuoted "'foo\x2018\&bar'"
readSingleQuoted = called "single quoted string" $ do
    start <- startSpan
    startPos <- getPosition
    singleQuote
    s <- many readSingleQuotedPart
    let string = concat s
    endPos <- getPosition
    singleQuote <|> fail "Expected end of single quoted string"

    optional $ do
        c <- try . lookAhead $ suspectCharAfterQuotes <|> oneOf "'"
        if not (null string) && isAlpha c && isAlpha (last string)
          then
            parseProblemAt endPos WarningC 1011
                "This apostrophe terminated the single quoted string!"
          else
            when ('\n' `elem` string && not ("\n" `isPrefixOf` string)) $
                suggestForgotClosingQuote startPos endPos "single quoted string"

    id <- endSpan start
    return (T_SingleQuoted id string)

readSingleQuotedLiteral = do
    singleQuote
    strs <- many1 readSingleQuotedPart
    singleQuote
    return $ concat strs

readSingleQuotedPart =
    readSingleEscaped
    <|> many1 (noneOf $ "'\\" ++ unicodeSingleQuotes)
    <|> readUnicodeQuote
   where
    readUnicodeQuote = do
        pos <- getPosition
        x <- oneOf unicodeSingleQuotes
        parseProblemAt pos WarningC 1112
            "This is a unicode quote. Delete and retype it (or ignore/doublequote for literal)."
        return [x]


prop_readBackTicked = isOk (readBackTicked False) "`ls *.mp3`"
prop_readBackTicked2 = isOk (readBackTicked False) "`grep \"\\\"\"`"
prop_readBackTicked3 = isWarning (readBackTicked False) "´grep \"\\\"\"´"
prop_readBackTicked4 = isOk readSimpleCommand "`echo foo\necho bar`"
prop_readBackTicked5 = isOk readSimpleCommand "echo `foo`bar"
prop_readBackTicked6 = isWarning readSimpleCommand "echo `foo\necho `bar"
prop_readBackTicked7 = isOk readSimpleCommand "`#inline comment`"
prop_readBackTicked8 = isOk readSimpleCommand "echo `#comment` \\\nbar baz"
readQuotedBackTicked = readBackTicked True
readUnquotedBackTicked = readBackTicked False
readBackTicked quoted = called "backtick expansion" $ do
    start <- startSpan
    startPos <- getPosition
    backtick
    subStart <- getPosition
    subString <- readGenericLiteral "`´"
    endPos <- getPosition
    backtick
    id <- endSpan start

    optional $ do
        c <- try . lookAhead $ suspectCharAfterQuotes
        when ('\n' `elem` subString && not ("\n" `isPrefixOf` subString)) $
            suggestForgotClosingQuote startPos endPos "backtick expansion"

    -- Result positions may be off due to escapes
    result <- subParse subStart (tryWithErrors subParser <|> return []) (unEscape subString)
    return $ T_Backticked id result
  where
    unEscape [] = []
    unEscape ('\\':'"':rest) | quoted = '"' : unEscape rest
    unEscape ('\\':x:rest) | x `elem` "$`\\" = x : unEscape rest
    unEscape ('\\':'\n':rest) = unEscape rest
    unEscape (c:rest) = c : unEscape rest
    subParser = do
        cmds <- readCompoundListOrEmpty
        verifyEof
        return cmds
    backtick =
      void (char '`') <|> do
         pos <- getPosition
         char '´'
         parseProblemAt pos ErrorC 1077
            "For command expansion, the tick should slant left (` vs ´). Use $(..) instead."

-- Run a parser on a new input, such as for `..` or here documents.
subParse pos parser input = do
    lastPosition <- getPosition
    lastInput <- getInput
    setPosition pos
    setInput input
    result <- parser
    setInput lastInput
    setPosition lastPosition
    return result

-- Parse something, but forget all parseProblems
inSeparateContext = parseForgettingContext True
-- Parse something, but forget all parseProblems on failure
forgetOnFailure = parseForgettingContext False

parseForgettingContext alsoOnSuccess parser = do
    context <- Ms.get
    success context <|> failure context
  where
    success c = do
        res <- try parser
        when alsoOnSuccess $ Ms.put c
        return res
    failure c = do
        Ms.put c
        fail ""

prop_readDoubleQuoted = isOk readDoubleQuoted "\"Hello $FOO\""
prop_readDoubleQuoted2 = isOk readDoubleQuoted "\"$'\""
prop_readDoubleQuoted3 = isOk readDoubleQuoted "\"\x2018hello\x2019\""
prop_readDoubleQuoted4 = isWarning readSimpleCommand "\"foo\nbar\"foo"
prop_readDoubleQuoted5 = isOk readSimpleCommand "lol \"foo\nbar\" etc"
prop_readDoubleQuoted6 = isOk readSimpleCommand "echo \"${ ls; }\""
prop_readDoubleQuoted7 = isOk readSimpleCommand "echo \"${ ls;}bar\""
prop_readDoubleQuoted8 = isWarning readDoubleQuoted "\"\x201Chello\x201D\""
prop_readDoubleQuoted10 = isOk readDoubleQuoted "\"foo\\\\n\""
readDoubleQuoted = called "double quoted string" $ do
    start <- startSpan
    startPos <- getPosition
    doubleQuote
    x <- many doubleQuotedPart
    endPos <- getPosition
    doubleQuote <|> fail "Expected end of double quoted string"
    id <- endSpan start
    optional $ do
        try . lookAhead $ suspectCharAfterQuotes <|> oneOf "$\""
        when (any hasLineFeed x && not (startsWithLineFeed x)) $
            suggestForgotClosingQuote startPos endPos "double quoted string"
    return $ T_DoubleQuoted id x
  where
    startsWithLineFeed (T_Literal _ ('\n':_):_) = True
    startsWithLineFeed _ = False
    hasLineFeed (T_Literal _ str) | '\n' `elem` str = True
    hasLineFeed _ = False

suggestForgotClosingQuote startPos endPos name = do
    parseProblemAt startPos WarningC 1078 $
        "Did you forget to close this " ++ name ++ "?"
    parseProblemAt endPos InfoC 1079
        "This is actually an end quote, but due to next char it looks suspect."

doubleQuotedPart = readDoubleLiteral <|> readDoubleQuotedDollar <|> readQuotedBackTicked <|> readUnicodeQuote
  where
    readUnicodeQuote = do
        pos <- getPosition
        start <- startSpan
        c <- oneOf unicodeDoubleQuotes
        id <- endSpan start
        parseProblemAt pos WarningC 1111
            "This is a unicode quote. Delete and retype it (or ignore/singlequote for literal)."
        return $ T_Literal id [c]

readDoubleLiteral = do
    start <- startSpan
    s <- many1 readDoubleLiteralPart
    id <- endSpan start
    return $ T_Literal id (concat s)

readDoubleLiteralPart = do
    x <- many1 (readDoubleEscaped <|> many1 (noneOf (doubleQuotableChars ++ unicodeDoubleQuotes)))
    return $ concat x

readNormalLiteral end = do
    start <- startSpan
    s <- many1 (readNormalLiteralPart end)
    id <- endSpan start
    return $ T_Literal id (concat s)

prop_readGlob1 = isOk readGlob "*"
prop_readGlob2 = isOk readGlob "[^0-9]"
prop_readGlob3 = isOk readGlob "[a[:alpha:]]"
prop_readGlob4 = isOk readGlob "[[:alnum:]]"
prop_readGlob5 = isOk readGlob "[^[:alpha:]1-9]"
prop_readGlob6 = isOk readGlob "[\\|]"
prop_readGlob7 = isOk readGlob "[^[]"
prop_readGlob8 = isOk readGlob "[*?]"
readGlob = readExtglob <|> readSimple <|> readClass <|> readGlobbyLiteral
    where
        readSimple = do
            start <- startSpan
            c <- oneOf "*?"
            id <- endSpan start
            return $ T_Glob id [c]
        -- Doesn't handle weird things like [^]a] and [$foo]. fixme?
        readClass = try $ do
            start <- startSpan
            char '['
            s <- many1 (predefined <|> readNormalLiteralPart "]" <|> globchars)
            char ']'
            id <- endSpan start
            return $ T_Glob id $ "[" ++ concat s ++ "]"
          where
           globchars = fmap return . oneOf $ "!$[" ++ extglobStartChars
           predefined = do
              try $ string "[:"
              s <- many1 letter
              string ":]"
              return $ "[:" ++ s ++ ":]"

        readGlobbyLiteral = do
            start <- startSpan
            c <- extglobStart <|> char '['
            id <- endSpan start
            return $ T_Literal id [c]

readNormalLiteralPart customEnd =
    readNormalEscaped <|>
        many1 (noneOf (customEnd ++ standardEnd))
  where
    standardEnd = "[{}"
        ++ quotableChars
        ++ extglobStartChars
        ++ unicodeDoubleQuotes
        ++ unicodeSingleQuotes

readNormalEscaped = called "escaped char" $ do
    pos <- getPosition
    backslash
    do
        next <- quotable <|> oneOf "?*@!+[]{}.,~#"
        when (next == ' ') $ checkTrailingSpaces pos <|> return ()
        -- Check if this line is followed by a commented line with a trailing backslash
        when (next == '\n') $ try . lookAhead $ void spacing
        return $ if next == '\n' then "" else [next]
      <|>
        do
            next <- anyChar
            case escapedChar next of
                Just name -> parseNoteAt pos WarningC 1012 $ "\\" ++ [next] ++ " is just literal '" ++ [next] ++ "' here. For " ++ name ++ ", use " ++ alternative next ++ " instead."
                Nothing -> parseNoteAt pos InfoC 1001 $ "This \\" ++ [next] ++ " will be a regular '" ++ [next] ++ "' in this context."
            return [next]
  where
    alternative 'n' = "a quoted, literal line feed"
    alternative t = "\"$(printf '\\" ++ [t] ++ "')\""
    escapedChar 'n' = Just "line feed"
    escapedChar 't' = Just "tab"
    escapedChar 'r' = Just "carriage return"
    escapedChar _ = Nothing

    checkTrailingSpaces pos = lookAhead . try $ do
        many linewhitespace
        void linefeed <|> eof
        parseProblemAt pos ErrorC 1101 "Delete trailing spaces after \\ to break line (or use quotes for literal space)."


prop_readExtglob1 = isOk readExtglob "!(*.mp3)"
prop_readExtglob2 = isOk readExtglob "!(*.mp3|*.wmv)"
prop_readExtglob4 = isOk readExtglob "+(foo \\) bar)"
prop_readExtglob5 = isOk readExtglob "+(!(foo *(bar)))"
prop_readExtglob6 = isOk readExtglob "*(((||))|())"
prop_readExtglob7 = isOk readExtglob "*(<>)"
prop_readExtglob8 = isOk readExtglob "@(|*())"
readExtglob = called "extglob" $ do
    start <- startSpan
    c <- try $ do
            f <- extglobStart
            char '('
            return f
    contents <- readExtglobPart `sepBy` char '|'
    char ')'
    id <- endSpan start
    return $ T_Extglob id [c] contents

readExtglobPart = do
    start <- startSpan
    x <- many (readExtglobGroup <|> readNormalWordPart "" <|> readSpacePart <|> readExtglobLiteral)
    id <- endSpan start
    return $ T_NormalWord id x
  where
    readExtglobGroup = do
        char '('
        start <- startSpan
        contents <- readExtglobPart `sepBy` char '|'
        id <- endSpan start
        char ')'
        return $ T_Extglob id "" contents
    readExtglobLiteral = do
        start <- startSpan
        str <- many1 (oneOf "<>#;&")
        id <- endSpan start
        return $ T_Literal id str


readSingleEscaped = do
    pos <- getPosition
    s <- backslash
    x <- lookAhead anyChar

    case x of
        '\'' -> parseProblemAt pos InfoC 1003 "Want to escape a single quote? echo 'This is how it'\\''s done'.";
        '\n' -> parseProblemAt pos InfoC 1004 "This backslash+linefeed is literal. Break outside single quotes if you just want to break the line."
        _ -> return ()

    return [s]

readDoubleEscaped = do
    pos <- getPosition
    bs <- backslash
    (linefeed >> return "")
        <|> fmap return doubleQuotable
        <|> do
            c <- anyChar
            -- This is an invalid escape sequence where the \ is literal.
            -- Previously this caused a SC1117, which may be re-enabled as
            -- as a pedantic warning.
            return [bs, c]

readBraceEscaped = do
    bs <- backslash
    (linefeed >> return "")
        <|> fmap return bracedQuotable
        <|> fmap (\ x -> [bs, x]) anyChar


readGenericLiteral endChars = do
    strings <- many (readGenericEscaped <|> many1 (noneOf ('\\':endChars)))
    return $ concat strings

readGenericLiteral1 endExp = do
    strings <- (readGenericEscaped <|> (anyChar >>= \x -> return [x])) `reluctantlyTill1` endExp
    return $ concat strings

readGenericEscaped = do
    backslash
    x <- anyChar
    return $ if x == '\n' then [] else ['\\', x]

prop_readBraced = isOk readBraced "{1..4}"
prop_readBraced2 = isOk readBraced "{foo,bar,\"baz lol\"}"
prop_readBraced3 = isOk readBraced "{1,\\},2}"
prop_readBraced4 = isOk readBraced "{1,{2,3}}"
prop_readBraced5 = isOk readBraced "{JP{,E}G,jp{,e}g}"
prop_readBraced6 = isOk readBraced "{foo,bar,$((${var}))}"
prop_readBraced7 = isNotOk readBraced "{}"
prop_readBraced8 = isNotOk readBraced "{foo}"
readBraced = try braceExpansion
  where
    braceExpansion =
        T_BraceExpansion `withParser` do
            char '{'
            elements <- bracedElement `sepBy1` char ','
            guard $
                case elements of
                    (_:_:_) -> True
                    [t] -> ".." `isInfixOf` onlyLiteralString t
                    [] -> False
            char '}'
            return elements
    bracedElement =
        T_NormalWord `withParser` do
            many $ choice [
                braceExpansion,
                readDollarExpression,
                readSingleQuoted,
                readDoubleQuoted,
                braceLiteral
                ]
    braceLiteral =
        T_Literal `withParser` readGenericLiteral1 (oneOf "{}\"$'," <|> whitespace)

ensureDollar =
    -- The grammar should have been designed along the lines of readDollarExpr = char '$' >> stuff, but
    -- instead, each subunit parses its own $. This results in ~7 1-3 char lookaheads instead of one 1-char.
    -- Instead of optimizing the grammar, here's a green cut that decreases shellcheck runtime by 10%:
    lookAhead $ char '$'

readNormalDollar = do
    ensureDollar
    readDollarExp <|> readDollarDoubleQuote <|> readDollarSingleQuote <|> readDollarLonely False
readDoubleQuotedDollar = do
    ensureDollar
    readDollarExp <|> readDollarLonely True


prop_readDollarExpression1 = isOk readDollarExpression "$(((1) && 3))"
prop_readDollarExpression2 = isWarning readDollarExpression "$(((1)) && 3)"
prop_readDollarExpression3 = isWarning readDollarExpression "$((\"$@\" &); foo;)"
readDollarExpression :: Monad m => SCParser m Token
readDollarExpression = do
    ensureDollar
    readDollarExp

readDollarExp = arithmetic <|> readDollarExpansion <|> readDollarBracket <|> readDollarBraceCommandExpansion <|> readDollarBraced <|> readDollarVariable
  where
    arithmetic = readAmbiguous "$((" readDollarArithmetic readDollarExpansion (\pos ->
        parseNoteAt pos ErrorC 1102 "Shells disambiguate $(( differently or not at all. For $(command substitution), add space after $( . For $((arithmetics)), fix parsing errors.")

prop_readDollarSingleQuote = isOk readDollarSingleQuote "$'foo\\\'lol'"
readDollarSingleQuote = called "$'..' expression" $ do
    start <- startSpan
    try $ string "$'"
    str <- readGenericLiteral "'"
    char '\''
    id <- endSpan start
    return $ T_DollarSingleQuoted id str

prop_readDollarDoubleQuote = isOk readDollarDoubleQuote "$\"hello\""
readDollarDoubleQuote = do
    lookAhead . try $ string "$\""
    start <- startSpan
    char '$'
    doubleQuote
    x <- many doubleQuotedPart
    doubleQuote <|> fail "Expected end of translated double quoted string"
    id <- endSpan start
    return $ T_DollarDoubleQuoted id x

prop_readDollarArithmetic = isOk readDollarArithmetic "$(( 3 * 4 +5))"
prop_readDollarArithmetic2 = isOk readDollarArithmetic "$(((3*4)+(1*2+(3-1))))"
readDollarArithmetic = called "$((..)) expression" $ do
    start <- startSpan
    try (string "$((")
    c <- readArithmeticContents
    pos <- getPosition
    char ')'
    char ')' <|> fail "Expected a double )) to end the $((..))"
    id <- endSpan start
    return (T_DollarArithmetic id c)

readDollarBracket = called "$[..] expression" $ do
    start <- startSpan
    try (string "$[")
    c <- readArithmeticContents
    string "]"
    id <- endSpan start
    return (T_DollarBracket id c)

prop_readArithmeticExpression = isOk readArithmeticExpression "((a?b:c))"
readArithmeticExpression = called "((..)) command" $ do
    start <- startSpan
    try (string "((")
    c <- readArithmeticContents
    string "))"
    id <- endSpan start
    spacing
    return (T_Arithmetic id c)

-- If the next characters match prefix, try two different parsers and warn if the alternate parser had to be used
readAmbiguous :: Monad m => String -> SCParser m p -> SCParser m p -> (SourcePos -> SCParser m ()) -> SCParser m p
readAmbiguous prefix expected alternative warner = do
    pos <- getPosition
    try . lookAhead $ string prefix
    -- If the expected parser fails, try the alt.
    -- If the alt fails, run the expected one again for the errors.
    try expected <|> try (withAlt pos) <|> expected
  where
    withAlt pos = do
        t <- forgetOnFailure alternative
        warner pos
        return t

prop_readDollarBraceCommandExpansion1 = isOk readDollarBraceCommandExpansion "${ ls; }"
prop_readDollarBraceCommandExpansion2 = isOk readDollarBraceCommandExpansion "${\nls\n}"
readDollarBraceCommandExpansion = called "ksh ${ ..; } command expansion" $ do
    start <- startSpan
    try $ do
        string "${"
        whitespace
    allspacing
    term <- readTerm
    char '}' <|> fail "Expected } to end the ksh ${ ..; } command expansion"
    id <- endSpan start
    return $ T_DollarBraceCommandExpansion id term

prop_readDollarBraced1 = isOk readDollarBraced "${foo//bar/baz}"
prop_readDollarBraced2 = isOk readDollarBraced "${foo/'{cow}'}"
prop_readDollarBraced3 = isOk readDollarBraced "${foo%%$(echo cow\\})}"
prop_readDollarBraced4 = isOk readDollarBraced "${foo#\\}}"
readDollarBraced = called "parameter expansion" $ do
    start <- startSpan
    try (string "${")
    word <- readDollarBracedWord
    char '}'
    id <- endSpan start
    return $ T_DollarBraced id True word

prop_readDollarExpansion1= isOk readDollarExpansion "$(echo foo; ls\n)"
prop_readDollarExpansion2= isOk readDollarExpansion "$(  )"
prop_readDollarExpansion3= isOk readDollarExpansion "$( command \n#comment \n)"
readDollarExpansion = called "command expansion" $ do
    start <- startSpan
    try (string "$(")
    cmds <- readCompoundListOrEmpty
    char ')' <|> fail "Expected end of $(..) expression"
    id <- endSpan start
    return $ T_DollarExpansion id cmds

prop_readDollarVariable = isOk readDollarVariable "$@"
prop_readDollarVariable2 = isOk (readDollarVariable >> anyChar) "$?!"
prop_readDollarVariable3 = isWarning (readDollarVariable >> anyChar) "$10"
prop_readDollarVariable4 = isWarning (readDollarVariable >> string "[@]") "$arr[@]"
prop_readDollarVariable5 = isWarning (readDollarVariable >> string "[f") "$arr[f"

readDollarVariable :: Monad m => SCParser m Token
readDollarVariable = do
    start <- startSpan
    pos <- getPosition

    let singleCharred p = do
        value <- wrapString ((:[]) <$> p)
        id <- endSpan start
        return $ (T_DollarBraced id False value)

    let positional = do
        value <- singleCharred digit
        return value `attempting` do
            lookAhead digit
            parseNoteAt pos ErrorC 1037 "Braces are required for positionals over 9, e.g. ${10}."

    let special = singleCharred specialVariable

    let regular = do
        value <- wrapString readVariableName
        id <- endSpan start
        return (T_DollarBraced id False value) `attempting` do
            lookAhead $ char '['
            parseNoteAt pos ErrorC 1087 "Use braces when expanding arrays, e.g. ${array[idx]} (or ${var}[.. to quiet)."

    try $ char '$' >> (positional <|> special <|> regular)

  where
    wrapString p = do
        start <- getPosition
        s <- p
        end <- getPosition
        id1 <- getNextIdBetween start end
        id2 <- getNextIdBetween start end
        return $ T_NormalWord id1 [T_Literal id2 s]

readVariableName = do
    f <- variableStart
    rest <- many variableChars
    return (f:rest)


prop_readDollarLonely1 = isWarning readNormalWord "\"$\"var"
prop_readDollarLonely2 = isWarning readNormalWord "\"$\"\"var\""
prop_readDollarLonely3 = isOk readNormalWord "\"$\"$var"
prop_readDollarLonely4 = isOk readNormalWord "\"$\"*"
prop_readDollarLonely5 = isOk readNormalWord "$\"str\""
readDollarLonely quoted = do
    start <- startSpan
    char '$'
    id <- endSpan start
    when quoted $ do
        isHack <- quoteForEscape
        when isHack $
            parseProblemAtId id StyleC 1135
                "Prefer escape over ending quote to make $ literal. Instead of \"It costs $\"5, use \"It costs \\$5\"."
    return $ T_Literal id "$"
  where
    quoteForEscape = option False $ try . lookAhead $ do
        char '"'
        -- Check for "foo $""bar"
        optional $ char '"'
        c <- anyVar
        -- Don't trigger on [[ x == "$"* ]] or "$"$pattern
        return $ c `notElem` "*$"
    anyVar = variableStart <|> digit <|> specialVariable


prop_readHereDoc = isOk readScript "cat << foo\nlol\ncow\nfoo"
prop_readHereDoc2 = isNotOk readScript "cat <<- EOF\n  cow\n  EOF"
prop_readHereDoc3 = isOk readScript "cat << foo\n$\"\nfoo"
prop_readHereDoc4 = isNotOk readScript "cat << foo\n`\nfoo"
prop_readHereDoc5 = isOk readScript "cat <<- !foo\nbar\n!foo"
prop_readHereDoc6 = isOk readScript "cat << foo\\ bar\ncow\nfoo bar"
prop_readHereDoc7 = isOk readScript "cat << foo\n\\$(f ())\nfoo"
prop_readHereDoc8 = isOk readScript "cat <<foo>>bar\netc\nfoo"
prop_readHereDoc9 = isOk readScript "if true; then cat << foo; fi\nbar\nfoo\n"
prop_readHereDoc10= isOk readScript "if true; then cat << foo << bar; fi\nfoo\nbar\n"
prop_readHereDoc11= isOk readScript "cat << foo $(\nfoo\n)lol\nfoo\n"
prop_readHereDoc12= isOk readScript "cat << foo|cat\nbar\nfoo"
prop_readHereDoc13= isOk readScript "cat <<'#!'\nHello World\n#!\necho Done"
prop_readHereDoc14= isWarning readScript "cat << foo\nbar\nfoo \n"
prop_readHereDoc15= isWarning readScript "cat <<foo\nbar\nfoo bar\nfoo"
prop_readHereDoc16= isOk readScript "cat <<- ' foo'\nbar\n foo\n"
prop_readHereDoc17= isWarning readScript "cat <<- ' foo'\nbar\n  foo\n foo\n"
prop_readHereDoc18= isOk readScript "cat <<'\"foo'\nbar\n\"foo\n"
prop_readHereDoc20= isWarning readScript "cat << foo\n  foo\n()\nfoo\n"
prop_readHereDoc21= isOk readScript "# shellcheck disable=SC1039\ncat << foo\n  foo\n()\nfoo\n"
prop_readHereDoc22 = isWarning readScript "cat << foo\r\ncow\r\nfoo\r\n"
prop_readHereDoc23 = isNotOk readScript "cat << foo \r\ncow\r\nfoo\r\n"
readHereDoc = called "here document" $ do
    pos <- getPosition
    try $ string "<<"
    dashed <- (char '-' >> return Dashed) <|> return Undashed
    sp <- spacing
    optional $ do
        try . lookAhead $ char '('
        let message = "Shells are space sensitive. Use '< <(cmd)', not '<<" ++ sp ++ "(cmd)'."
        parseProblemAt pos ErrorC 1038 message
    start <- startSpan
    (quoted, endToken) <- readToken
    hid <- endSpan start

    -- add empty tokens for now, read the rest in readPendingHereDocs
    let doc = T_HereDoc hid dashed quoted endToken []
    addPendingHereDoc doc
    return doc
  where
    unquote :: String -> (Quoted, String)
    unquote "" = (Unquoted, "")
    unquote [c] = (Unquoted, [c])
    unquote s@(cl:tl) =
      case reverse tl of
        (cr:tr) | cr == cl && cl `elem` "\"'" -> (Quoted, reverse tr)
        _ -> (if '\\' `elem` s then (Quoted, filter ((/=) '\\') s) else (Unquoted, s))
    -- Fun fact: bash considers << foo"" quoted, but not << <("foo").
    readToken = do
        str <- readStringForParser readNormalWord
        -- A here doc actually works with \r\n because the \r becomes part of the token
        crstr <- (carriageReturn >> (return $ str ++ "\r")) <|> return str
        return $ unquote crstr

readPendingHereDocs = do
    docs <- popPendingHereDocs
    mapM_ readDoc docs
  where
    readDoc (HereDocPending (T_HereDoc id dashed quoted endToken _) ctx) =
      swapContext ctx $
      do
        docStartPos <- getPosition
        (terminated, wasWarned, lines) <- readDocLines dashed endToken
        docEndPos <- getPosition
        let hereData = unlines lines
        unless terminated $ do
            unless wasWarned $
                debugHereDoc id endToken hereData
            fail "Here document was not correctly terminated"
        list <- parseHereData quoted (docStartPos, docEndPos) hereData
        addToHereDocMap id list

    -- Read the lines making up the here doc. Returns (IsTerminated, Lines)
    readDocLines :: Monad m => Dashed -> String -> SCParser m (Bool, Bool, [String])
    readDocLines dashed endToken = do
        pos <- getPosition
        str <- rawLine
        isEof <- option False (eof >> return True)
        (isEnd, wasWarned) <- subParse pos checkEnd str
        if
            | isEnd -> return (True, wasWarned, [])
            | isEof -> return (False, wasWarned, [str])
            | True -> do
                (ok, previousWarning, rest) <- readDocLines dashed endToken
                return (ok, wasWarned || previousWarning, str:rest)
      where
        -- Check if this is the actual end, or a plausible false end
        checkEnd = option (False, False) $ try $ do
            -- Match what's basically '^( *)token( *)(.*)$'
            leadingSpacePos <- getPosition
            leadingSpace <- linewhitespace `reluctantlyTill` string endToken
            string endToken
            trailingSpacePos <- getPosition
            trailingSpace <- many linewhitespace
            trailerPos <- getPosition
            trailer <- many anyChar

            let leadingSpacesAreTabs = all (== '\t') leadingSpace
            let thereIsNoTrailer = null trailingSpace && null trailer
            let leaderIsOk = null leadingSpace
                    || dashed == Dashed && leadingSpacesAreTabs
            let trailerStart = case trailer of [] -> '\0'; (h:_) -> h
            let hasTrailingSpace = not $ null trailingSpace
            let hasTrailer = not $ null trailer
            let ppt = parseProblemAt trailerPos ErrorC

            if leaderIsOk && thereIsNoTrailer
              then return (True, False)
              else do
                let foundCause = return (False, True)
                let skipLine = return (False, False)
                -- This may be intended as an end token. Debug why it isn't.
                if
                    | trailerStart == ')' -> do
                        ppt 1119 $ "Add a linefeed between end token and terminating ')'."
                        foundCause
                    | trailerStart == '#' -> do
                        ppt 1120 "No comments allowed after here-doc token. Comment the next line instead."
                        foundCause
                    | trailerStart `elem` ";>|&" -> do
                        ppt 1121 "Add ;/& terminators (and other syntax) on the line with the <<, not here."
                        foundCause
                    | hasTrailingSpace && hasTrailer -> do
                        ppt 1122 "Nothing allowed after end token. To continue a command, put it on the line with the <<."
                        foundCause
                    | leaderIsOk && hasTrailingSpace && not hasTrailer -> do
                        parseProblemAt trailingSpacePos ErrorC 1118 "Delete whitespace after the here-doc end token."
                        -- Parse as if it's the actual end token. Will koala_man regret this once again?
                        return (True, True)
                    | not hasTrailingSpace && hasTrailer ->
                        -- The end token is just a prefix
                        skipLine
                    | hasTrailer ->
                        error "ShellCheck bug, please report (here doc trailer)."

                    -- The following cases assume no trailing text:
                    | dashed == Undashed && (not $ null leadingSpace) -> do
                        parseProblemAt leadingSpacePos ErrorC 1039 "Remove indentation before end token (or use <<- and indent with tabs)."
                        foundCause
                    | dashed == Dashed && not leadingSpacesAreTabs -> do
                        parseProblemAt leadingSpacePos ErrorC 1040 "When using <<-, you can only indent with tabs."
                        foundCause
                    | True -> skipLine

    rawLine = do
        c <- many $ noneOf "\n"
        void (char '\n') <|> eof
        return c

    parseHereData Quoted (start,end) hereData = do
        id <- getNextIdBetween start end
        return [T_Literal id hereData]

    parseHereData Unquoted (startPos, _) hereData =
        subParse startPos readHereData hereData

    readHereData = many $ doubleQuotedPart <|> readHereLiteral

    readHereLiteral = do
        start <- startSpan
        chars <- many1 $ noneOf "`$\\"
        id <- endSpan start
        return $ T_Literal id chars

    debugHereDoc tokenId endToken doc
        | endToken `isInfixOf` doc =
            let lookAt line = when (endToken `isInfixOf` line) $
                      parseProblemAtId tokenId ErrorC 1042 ("Close matches include '" ++ (e4m line) ++ "' (!= '" ++ (e4m endToken) ++ "').")
            in do
                  parseProblemAtId tokenId ErrorC 1041 ("Found '" ++ (e4m endToken) ++ "' further down, but not on a separate line.")
                  mapM_ lookAt (lines doc)
        | map toLower endToken `isInfixOf` map toLower doc =
            parseProblemAtId tokenId ErrorC 1043 ("Found " ++ (e4m endToken) ++ " further down, but with wrong casing.")
        | otherwise =
            parseProblemAtId tokenId ErrorC 1044 ("Couldn't find end token `" ++ (e4m endToken) ++ "' in the here document.")


readFilename = readNormalWord
readIoFileOp = choice [g_DGREAT, g_LESSGREAT, g_GREATAND, g_LESSAND, g_CLOBBER, redirToken '<' T_Less, redirToken '>' T_Greater ]

readIoDuplicate = try $ do
    start <- startSpan
    op <- g_GREATAND <|> g_LESSAND
    target <- readIoVariable <|> digitsAndOrDash
    id <- endSpan start
    return $ T_IoDuplicate id op target
  where
    -- either digits with optional dash, or a required dash
    digitsAndOrDash = do
        str <- many digit
        dash <- (if null str then id else option "") $ string "-"
        return $ str ++ dash


prop_readIoFile = isOk readIoFile ">> \"$(date +%YYmmDD)\""
readIoFile = called "redirection" $ do
    start <- startSpan
    op <- readIoFileOp
    spacing
    file <- readFilename
    id <- endSpan start
    return $ T_IoFile id op file

readIoVariable = try $ do
    char '{'
    x <- readVariableName
    char '}'
    return $ "{" ++ x ++ "}"

readIoSource = try $ do
    x <- string "&" <|> readIoVariable <|> many digit
    lookAhead $ void readIoFileOp <|> void (string "<<")
    return x

prop_readIoRedirect = isOk readIoRedirect "3>&2"
prop_readIoRedirect2 = isOk readIoRedirect "2> lol"
prop_readIoRedirect3 = isOk readIoRedirect "4>&-"
prop_readIoRedirect4 = isOk readIoRedirect "&> lol"
prop_readIoRedirect5 = isOk readIoRedirect "{foo}>&2"
prop_readIoRedirect6 = isOk readIoRedirect "{foo}<&-"
prop_readIoRedirect7 = isOk readIoRedirect "{foo}>&1-"
readIoRedirect = do
    start <- startSpan
    n <- readIoSource
    redir <- readHereString <|> readHereDoc <|> readIoDuplicate <|> readIoFile
    id <- endSpan start
    skipAnnotationAndWarn
    spacing
    return $ T_FdRedirect id n redir

prop_readHereString = isOk readHereString "<<< \"Hello $world\""
readHereString = called "here string" $ do
    start <- startSpan
    try $ string "<<<"
    id <- endSpan start
    spacing
    word <- readNormalWord
    return $ T_HereString id word

readNewlineList =
    many1 ((linefeed <|> carriageReturn) `thenSkip` spacing) <* checkBadBreak
  where
    checkBadBreak = optional $ do
                pos <- getPosition
                try $ lookAhead (oneOf "|&") --  See if the next thing could be |, || or &&
                parseProblemAt pos ErrorC 1133
                    "Unexpected start of line. If breaking lines, |/||/&& should be at the end of the previous one."
readLineBreak = optional readNewlineList

prop_readSeparator1 = isWarning readScript "a &; b"
prop_readSeparator2 = isOk readScript "a & b"
prop_readSeparator3 = isWarning readScript "a &amp; b"
prop_readSeparator4 = isWarning readScript "a &gt; file; b"
prop_readSeparator5 = isWarning readScript "curl https://example.com/?foo=moo&bar=cow"
readSeparatorOp = do
    notFollowedBy2 (void g_AND_IF <|> void readCaseSeparator)
    notFollowedBy2 (string "&>")
    start <- getPosition
    f <- try (do
                    pos <- getPosition
                    char '&'
                    optional $ choice [
                        do
                            s <- lookAhead . choice . map (try . string) $
                                ["amp;", "gt;", "lt;"]
                            parseProblemAt pos ErrorC 1109 "This is an unquoted HTML entity. Replace with corresponding character.",

                        do
                            try . lookAhead $ variableStart
                            parseProblemAt pos WarningC 1132 "This & terminates the command. Escape it or add space after & to silence."
                      ]

                    spacing
                    pos <- getPosition
                    char ';'
                    -- In case statements we might have foo & ;;
                    notFollowedBy2 $ char ';'
                    parseProblemAt pos ErrorC 1045 "It's not 'foo &; bar', just 'foo & bar'."
                    return '&'
            ) <|> char ';' <|> char '&'
    end <- getPosition
    spacing
    return (f, (start, end))

readSequentialSep = void (g_Semi >> readLineBreak) <|> void readNewlineList
readSeparator =
    do
        separator <- readSeparatorOp
        readLineBreak
        return separator
     <|>
        do
            start <- getPosition
            readNewlineList
            end <- getPosition
            return ('\n', (start, end))

prop_readSimpleCommand = isOk readSimpleCommand "echo test > file"
prop_readSimpleCommand2 = isOk readSimpleCommand "cmd &> file"
prop_readSimpleCommand3 = isOk readSimpleCommand "export foo=(bar baz)"
prop_readSimpleCommand4 = isOk readSimpleCommand "typeset -a foo=(lol)"
prop_readSimpleCommand5 = isOk readSimpleCommand "time if true; then echo foo; fi"
prop_readSimpleCommand6 = isOk readSimpleCommand "time -p ( ls -l; )"
prop_readSimpleCommand7 = isOk readSimpleCommand "\\ls"
prop_readSimpleCommand8 = isWarning readSimpleCommand "// Lol"
prop_readSimpleCommand9 = isWarning readSimpleCommand "/* Lolbert */"
prop_readSimpleCommand10 = isWarning readSimpleCommand "/**** Lolbert */"
prop_readSimpleCommand11 = isOk readSimpleCommand "/\\* foo"
prop_readSimpleCommand12 = isWarning readSimpleCommand "elsif foo"
prop_readSimpleCommand13 = isWarning readSimpleCommand "ElseIf foo"
prop_readSimpleCommand14 = isWarning readSimpleCommand "elseif[$i==2]"
readSimpleCommand = called "simple command" $ do
    prefix <- option [] readCmdPrefix
    skipAnnotationAndWarn
    cmd <- option Nothing $ Just <$> readCmdName
    when (null prefix && isNothing cmd) $ fail "Expected a command"

    case cmd of
      Nothing -> do
        id1 <- getNextIdSpanningTokenList prefix
        id2 <- getNewIdFor id1
        return $ makeSimpleCommand id1 id2 prefix [] []

      Just cmd -> do
            validateCommand cmd
            -- We have to ignore possible parsing problems from the lookAhead parser
            firstArgument <- ignoreProblemsOf . optionMaybe . try . lookAhead $ readCmdWord
            suffix <- option [] $ getParser readCmdSuffix
                    -- If `export` or other modifier commands are called with `builtin` we have to look at the first argument
                    (if isCommand ["builtin"] cmd then fromMaybe cmd firstArgument else cmd) [
                        (["declare", "export", "local", "readonly", "typeset"], readModifierSuffix),
                        (["time"], readTimeSuffix),
                        (["let"], readLetSuffix),
                        (["eval"], readEvalSuffix)
                    ]

            id1 <- getNextIdSpanningTokenList (prefix ++ (cmd:suffix))
            id2 <- getNewIdFor id1

            let result = makeSimpleCommand id1 id2 prefix [cmd] suffix
            if isCommand ["source", "."] cmd
                then readSource result
                else return result
  where
    isCommand strings (T_NormalWord _ [T_Literal _ s]) = s `elem` strings
    isCommand _ _ = False
    getParser def cmd [] = def
    getParser def cmd ((list, action):rest) =
        if isCommand list cmd
        then action
        else getParser def cmd rest

    validateCommand cmd =
        case cmd of
            (T_NormalWord _ [T_Literal _ "//"]) -> commentWarning (getId cmd)
            (T_NormalWord _ (T_Literal _ "/" : T_Glob _ "*" :_)) -> commentWarning (getId cmd)
            (T_NormalWord _ (T_Literal _ str:_)) -> do
                let cmdString = map toLower $ takeWhile isAlpha str
                when (cmdString `elem` ["elsif", "elseif"]) $
                    parseProblemAtId (getId cmd) ErrorC 1131 "Use 'elif' to start another branch."
            _ -> return ()

    commentWarning id =
        parseProblemAtId id ErrorC 1127 "Was this intended as a comment? Use # in sh."

    makeSimpleCommand id1 id2 prefix cmd suffix =
        let
            (preAssigned, preRest) = partition assignment prefix
            (preRedirected, preRest2) = partition redirection preRest
            (postRedirected, postRest) = partition redirection suffix

            redirs = preRedirected ++ postRedirected
            assigns = preAssigned
            args = cmd ++ preRest2 ++ postRest
        in
            T_Redirecting id1 redirs $ T_SimpleCommand id2 assigns args
      where
        assignment (T_Assignment {}) = True
        assignment _ = False
        redirection (T_FdRedirect {}) = True
        redirection _ = False


readSource :: Monad m => Token -> SCParser m Token
readSource t@(T_Redirecting _ _ (T_SimpleCommand cmdId _ (cmd:file':rest'))) = do
    let file = getFile file' rest'
    override <- getSourceOverride
    let literalFile = do
        name <- override `mplus` getLiteralString file `mplus` stripDynamicPrefix file
        -- Hack to avoid 'source ~/foo' trying to read from literal tilde
        guard . not $ "~/" `isPrefixOf` name
        return name
    case literalFile of
        Nothing -> do
            parseNoteAtId (getId file) WarningC 1090
                "ShellCheck can't follow non-constant source. Use a directive to specify location."
            return t
        Just filename -> do
            proceed <- shouldFollow filename
            if not proceed
              then do
                -- FIXME: This actually gets squashed without -a
                parseNoteAtId (getId file) InfoC 1093
                    "This file appears to be recursively sourced. Ignoring."
                return t
              else do
                sys <- Mr.asks systemInterface
                (input, resolvedFile) <-
                    if filename == "/dev/null" -- always allow /dev/null
                    then return (Right "", filename)
                    else do
                        currentScript <- Mr.asks currentFilename
                        paths <- mapMaybe getSourcePath <$> getCurrentAnnotations True
                        resolved <- system $ siFindSource sys currentScript paths filename
                        contents <- system $ siReadFile sys resolved
                        return (contents, resolved)
                case input of
                    Left err -> do
                        parseNoteAtId (getId file) InfoC 1091 $
                            "Not following: " ++ err
                        return t
                    Right script -> do
                        id1 <- getNewIdFor cmdId
                        id2 <- getNewIdFor cmdId

                        let included = do
                            src <- subRead resolvedFile script
                            return $ T_SourceCommand id1 t (T_Include id2 src)

                        let failed = do
                            parseNoteAtId (getId file) WarningC 1094
                                "Parsing of sourced file failed. Ignoring it."
                            return t

                        included <|> failed
  where
    getFile :: Token -> [Token] -> Token
    getFile file (next:rest) =
        case getLiteralString file of
            Just "--" -> next
            x -> file
    getFile file _ = file

    getSourcePath t =
        case t of
            SourcePath x -> Just x
            _ -> Nothing

    -- If the word has a single expansion as the directory, try stripping it
    -- This affects `$foo/bar` but not `${foo}-dir/bar` or `/foo/$file`
    stripDynamicPrefix word =
        case getWordParts word of
            exp : rest | isStringExpansion exp -> do
                str <- getLiteralString (T_NormalWord (Id 0) rest)
                guard $ "/" `isPrefixOf` str
                return $ "." ++ str
            _ -> Nothing

    subRead name script =
        withContext (ContextSource name) $
            inSeparateContext $
                subParse (initialPos name) (readScriptFile True) script
readSource t = return t


prop_readPipeline = isOk readPipeline "! cat /etc/issue | grep -i ubuntu"
prop_readPipeline2 = isWarning readPipeline "!cat /etc/issue | grep -i ubuntu"
prop_readPipeline3 = isOk readPipeline "for f; do :; done|cat"
readPipeline = do
    unexpecting "keyword/token" readKeyword
    do
        (T_Bang id) <- g_Bang
        pipe <- readPipeSequence
        return $ T_Banged id pipe
      <|>
        readPipeSequence

prop_readAndOr = isOk readAndOr "grep -i lol foo || exit 1"
prop_readAndOr1 = isOk readAndOr "# shellcheck disable=1\nfoo"
prop_readAndOr2 = isOk readAndOr "# shellcheck disable=1\n# lol\n# shellcheck disable=3\nfoo"
readAndOr = do
    start <- startSpan
    apos <- getPosition
    annotations <- readAnnotations
    aid <- endSpan start

    unless (null annotations) $ optional $ do
        try . lookAhead $ readKeyword
        parseProblemAt apos ErrorC 1123 "ShellCheck directives are only valid in front of complete compound commands, like 'if', not e.g. individual 'elif' branches."

    andOr <- withAnnotations annotations $
        chainr1 readPipeline $ do
            op <- g_AND_IF <|> g_OR_IF
            readLineBreak
            return $ case op of T_AND_IF id -> T_AndIf id
                                T_OR_IF  id -> T_OrIf id

    return $ if null annotations
                then andOr
                else T_Annotation aid annotations andOr

readTermOrNone = do
    allspacing
    readTerm <|> do
        eof
        return []

prop_readTerm = isOk readTerm "time ( foo; bar; )"
readTerm = do
    allspacing
    m <- readAndOr
    readTerm' m
  where
    readTerm' current =
        do
            (sep, (start, end)) <- readSeparator
            id <- getNextIdBetween start end
            more <- option (T_EOF id) readAndOr
            case more of (T_EOF _) -> return [transformWithSeparator id sep current]
                         _         -> do
                                    list <- readTerm' more
                                    return (transformWithSeparator id sep current : list)
          <|>
            return [current]
      where
        transformWithSeparator i '&' = T_Backgrounded i
        transformWithSeparator i _  = id


readPipeSequence = do
    start <- startSpan
    (cmds, pipes) <- sepBy1WithSeparators readCommand
                        (readPipe `thenSkip` (spacing >> readLineBreak))
    id <- endSpan start
    spacing
    return $ T_Pipeline id pipes cmds
  where
    sepBy1WithSeparators p s = do
        let elems = p >>= \x -> return ([x], [])
        let seps = do
            separator <- s
            return $ \(a,b) (c,d) -> (a++c, b ++ d ++ [separator])
        elems `chainl1` seps

readPipe = do
    notFollowedBy2 g_OR_IF
    start <- startSpan
    char '|'
    qualifier <- string "&" <|> return ""
    id <- endSpan start
    spacing
    return $ T_Pipe id ('|':qualifier)

readCommand = choice [
    readCompoundCommand,
    readConditionCommand,
    readCoProc,
    readSimpleCommand
    ]

readCmdName = do
    -- Ignore alias suppression
    optional . try $ do
        char '\\'
        lookAhead $ variableChars
    readCmdWord

readCmdWord = do
    skipAnnotationAndWarn
    readNormalWord <* spacing

-- Due to poor planning, annotations after commands isn't handled well.
-- At the time this function is used, it's usually too late to skip
-- comments, so you end up with a parse failure instead.
skipAnnotationAndWarn = optional $ do
        try . lookAhead $ readAnnotationPrefix
        parseProblem ErrorC 1126 "Place shellcheck directives before commands, not after."
        readAnyComment

prop_readIfClause = isOk readIfClause "if false; then foo; elif true; then stuff; more stuff; else cows; fi"
prop_readIfClause2 = isWarning readIfClause "if false; then; echo oo; fi"
prop_readIfClause3 = isWarning readIfClause "if false; then true; else; echo lol; fi"
prop_readIfClause4 = isWarning readIfClause "if false; then true; else if true; then echo lol; fi; fi"
prop_readIfClause5 = isOk readIfClause "if false; then true; else\nif true; then echo lol; fi; fi"
prop_readIfClause6 = isWarning readIfClause "if true\nthen\nDo the thing\nfi"
readIfClause = called "if expression" $ do
    start <- startSpan
    pos <- getPosition
    (condition, action) <- readIfPart
    elifs <- many readElifPart
    elses <- option [] readElsePart

    g_Fi `orFail` do
        parseProblemAt pos ErrorC 1046 "Couldn't find 'fi' for this 'if'."
        parseProblem ErrorC 1047 "Expected 'fi' matching previously mentioned 'if'."
        return "Expected 'fi'"
    id <- endSpan start

    return $ T_IfExpression id ((condition, action):elifs) elses


verifyNotEmptyIf s =
    optional (do
                emptyPos <- getPosition
                try . lookAhead $ (g_Fi <|> g_Elif <|> g_Else)
                parseProblemAt emptyPos ErrorC 1048 $ "Can't have empty " ++ s ++ " clauses (use 'true' as a no-op).")
readIfPart = do
    pos <- getPosition
    g_If
    allspacing
    condition <- readTerm

    ifNextToken (g_Fi <|> g_Elif <|> g_Else) $
        parseProblemAt pos ErrorC 1049 "Did you forget the 'then' for this 'if'?"

    called "then clause" $ do
        g_Then `orFail` do
            parseProblem ErrorC 1050 "Expected 'then'."
            return "Expected 'then'"

        acceptButWarn g_Semi ErrorC 1051 "Semicolons directly after 'then' are not allowed. Just remove it."
        allspacing
        verifyNotEmptyIf "then"

        action <- readTerm
        return (condition, action)

readElifPart = called "elif clause" $ do
    pos <- getPosition
    g_Elif
    allspacing
    condition <- readTerm
    ifNextToken (g_Fi <|> g_Elif <|> g_Else) $
        parseProblemAt pos ErrorC 1049 "Did you forget the 'then' for this 'elif'?"

    g_Then
    acceptButWarn g_Semi ErrorC 1052 "Semicolons directly after 'then' are not allowed. Just remove it."
    allspacing
    verifyNotEmptyIf "then"
    action <- readTerm
    return (condition, action)

readElsePart = called "else clause" $ do
    pos <- getPosition
    g_Else
    optional $ do
        try . lookAhead $ g_If
        parseProblemAt pos ErrorC 1075 "Use 'elif' instead of 'else if' (or put 'if' on new line if nesting)."

    acceptButWarn g_Semi ErrorC 1053 "Semicolons directly after 'else' are not allowed. Just remove it."
    allspacing
    verifyNotEmptyIf "else"
    readTerm

ifNextToken parser action =
    optional $ do
        try . lookAhead $ parser
        action

prop_readSubshell = isOk readSubshell "( cd /foo; tar cf stuff.tar * )"
readSubshell = called "explicit subshell" $ do
    start <- startSpan
    char '('
    allspacing
    list <- readCompoundList
    allspacing
    char ')' <|> fail "Expected ) closing the subshell"
    id <- endSpan start
    spacing
    return $ T_Subshell id list

prop_readBraceGroup = isOk readBraceGroup "{ a; b | c | d; e; }"
prop_readBraceGroup2 = isWarning readBraceGroup "{foo;}"
prop_readBraceGroup3 = isOk readBraceGroup "{(foo)}"
readBraceGroup = called "brace group" $ do
    start <- startSpan
    char '{'
    void allspacingOrFail <|> optional (do
        lookAhead $ noneOf "(" -- {( is legal
        parseProblem ErrorC 1054 "You need a space after the '{'.")
    optional $ do
        pos <- getPosition
        lookAhead $ char '}'
        parseProblemAt pos ErrorC 1055 "You need at least one command here. Use 'true;' as a no-op."
    list <- readTerm
    char '}' <|> do
        parseProblem ErrorC 1056 "Expected a '}'. If you have one, try a ; or \\n in front of it."
        fail "Missing '}'"
    id <- endSpan start
    spacing
    return $ T_BraceGroup id list

prop_readBatsTest = isOk readBatsTest "@test 'can parse' {\n  true\n}"
readBatsTest = called "bats @test" $ do
    start <- startSpan
    try $ string "@test"
    spacing
    name <- readNormalWord
    spacing
    test <- readBraceGroup
    id <- endSpan start
    return $ T_BatsTest id name test

prop_readWhileClause = isOk readWhileClause "while [[ -e foo ]]; do sleep 1; done"
readWhileClause = called "while loop" $ do
    start <- startSpan
    kwId <- getId <$> g_While
    condition <- readTerm
    statements <- readDoGroup kwId
    id <- endSpan start
    return $ T_WhileExpression id condition statements

prop_readUntilClause = isOk readUntilClause "until kill -0 $PID; do sleep 1; done"
readUntilClause = called "until loop" $ do
    start <- startSpan
    kwId <- getId <$> g_Until
    condition <- readTerm
    statements <- readDoGroup kwId
    id <- endSpan start
    return $ T_UntilExpression id condition statements

readDoGroup kwId = do
    optional (do
                try . lookAhead $ g_Done
                parseProblemAtId kwId ErrorC 1057 "Did you forget the 'do' for this loop?")

    doKw <- g_Do `orFail` do
        parseProblem ErrorC 1058 "Expected 'do'."
        return "Expected 'do'"

    acceptButWarn g_Semi ErrorC 1059 "No semicolons directly after 'do'."
    allspacing

    optional (do
                try . lookAhead $ g_Done
                parseProblemAtId (getId doKw) ErrorC 1060 "Can't have empty do clauses (use 'true' as a no-op).")

    commands <- readCompoundList
    g_Done `orFail` do
            parseProblemAtId (getId doKw) ErrorC 1061 "Couldn't find 'done' for this 'do'."
            parseProblem ErrorC 1062 "Expected 'done' matching previously mentioned 'do'."
            return "Expected 'done'"

    optional . lookAhead $ do
        pos <- getPosition
        try $ string "<("
        parseProblemAt pos ErrorC 1142 "Use 'done < <(cmd)' to redirect from process substitution (currently missing one '<')."
    return commands


prop_readForClause = isOk readForClause "for f in *; do rm \"$f\"; done"
prop_readForClause1 = isOk readForClause "for f in *; { rm \"$f\"; }"
prop_readForClause3 = isOk readForClause "for f; do foo; done"
prop_readForClause4 = isOk readForClause "for((i=0; i<10; i++)); do echo $i; done"
prop_readForClause5 = isOk readForClause "for ((i=0;i<10 && n>x;i++,--n))\ndo \necho $i\ndone"
prop_readForClause6 = isOk readForClause "for ((;;))\ndo echo $i\ndone"
prop_readForClause7 = isOk readForClause "for ((;;)) do echo $i\ndone"
prop_readForClause8 = isOk readForClause "for ((;;)) ; do echo $i\ndone"
prop_readForClause9 = isOk readForClause "for i do true; done"
prop_readForClause10= isOk readForClause "for ((;;)) { true; }"
prop_readForClause12= isWarning readForClause "for $a in *; do echo \"$a\"; done"
prop_readForClause13= isOk readForClause "for foo\nin\\\n  bar\\\n  baz\ndo true; done"
readForClause = called "for loop" $ do
    pos <- getPosition
    (T_For id) <- g_For
    spacing
    readArithmetic id <|> readRegular id
  where
    readArithmetic id = called "arithmetic for condition" $ do
        readArithmeticDelimiter '(' "Missing second '(' to start arithmetic for ((;;)) loop"
        x <- readArithmeticContents
        char ';' >> spacing
        y <- readArithmeticContents
        char ';' >> spacing
        z <- readArithmeticContents
        spacing
        readArithmeticDelimiter ')' "Missing second ')' to terminate 'for ((;;))' loop condition"
        spacing
        optional $ readSequentialSep >> spacing
        group <- readBraced <|> readDoGroup id
        return $ T_ForArithmetic id x y z group

    -- For c='(' read "((" and be lenient about spaces
    readArithmeticDelimiter c msg = do
        char c
        startPos <- getPosition
        sp <- spacing
        endPos <- getPosition
        char c <|> do
            parseProblemAt startPos ErrorC 1137 msg
            fail ""
        unless (null sp) $
            parseProblemAtWithEnd startPos endPos ErrorC 1138 $ "Remove spaces between " ++ [c,c] ++ " in arithmetic for loop."

    readBraced = do
        (T_BraceGroup _ list) <- readBraceGroup
        return list

    readRegular id = do
        acceptButWarn (char '$') ErrorC 1086
            "Don't use $ on the iterator name in for loops."
        name <- readVariableName `thenSkip` allspacing
        values <- readInClause <|> (optional readSequentialSep >> return [])
        group <- readBraced <|> readDoGroup id
        return $ T_ForIn id name values group

prop_readSelectClause1 = isOk readSelectClause "select foo in *; do echo $foo; done"
prop_readSelectClause2 = isOk readSelectClause "select foo; do echo $foo; done"
readSelectClause = called "select loop" $ do
    (T_Select id) <- g_Select
    spacing
    typ <- readRegular
    group <- readDoGroup id
    typ id group
  where
    readRegular = do
        name <- readVariableName
        spacing
        values <- readInClause <|> (readSequentialSep >> return [])
        return $ \id group -> (return $ T_SelectIn id name values group)

readInClause = do
    g_In
    things <- readCmdWord `reluctantlyTill`
                (void g_Semi <|> void linefeed <|> void g_Do)

    do {
        lookAhead g_Do;
        parseNote ErrorC 1063 "You need a line feed or semicolon before the 'do'.";
    } <|> do {
        optional g_Semi;
        void allspacing;
    }

    return things

prop_readCaseClause = isOk readCaseClause "case foo in a ) lol; cow;; b|d) fooo; esac"
prop_readCaseClause2 = isOk readCaseClause "case foo\n in * ) echo bar;; esac"
prop_readCaseClause3 = isOk readCaseClause "case foo\n in * ) echo bar & ;; esac"
prop_readCaseClause4 = isOk readCaseClause "case foo\n in *) echo bar ;& bar) foo; esac"
prop_readCaseClause5 = isOk readCaseClause "case foo\n in *) echo bar;;& foo) baz;; esac"
prop_readCaseClause6 = isOk readCaseClause "case foo\n in if) :;; done) :;; esac"
readCaseClause = called "case expression" $ do
    start <- startSpan
    g_Case
    word <- readNormalWord
    allspacing
    g_In <|> fail "Expected 'in'"
    readLineBreak
    list <- readCaseList
    g_Esac <|> fail "Expected 'esac' to close the case statement"
    id <- endSpan start
    return $ T_CaseExpression id word list

readCaseList = many readCaseItem

readCaseItem = called "case item" $ do
    notFollowedBy2 g_Esac
    optional $ do
        try . lookAhead $ readAnnotationPrefix
        parseProblem ErrorC 1124 "ShellCheck directives are only valid in front of complete commands like 'case' statements, not individual case branches."
    optional g_Lparen
    spacing
    pattern' <- readPattern
    void g_Rparen <|> do
        parseProblem ErrorC 1085
            "Did you forget to move the ;; after extending this case item?"
        fail "Expected ) to open a new case item"
    readLineBreak
    list <- (lookAhead readCaseSeparator >> return []) <|> readCompoundList
    separator <- readCaseSeparator `attempting` do
        pos <- getPosition
        lookAhead g_Rparen
        parseProblemAt pos ErrorC 1074
            "Did you forget the ;; after the previous case item?"
    readLineBreak
    return (separator, pattern', list)

readCaseSeparator = choice [
    tryToken ";;&" (const ()) >> return CaseContinue,
    tryToken ";&" (const ()) >> return CaseFallThrough,
    g_DSEMI >> return CaseBreak,
    lookAhead (readLineBreak >> g_Esac) >> return CaseBreak
    ]

prop_readFunctionDefinition = isOk readFunctionDefinition "foo() { command foo --lol \"$@\"; }"
prop_readFunctionDefinition1 = isOk readFunctionDefinition "foo   (){ command foo --lol \"$@\"; }"
prop_readFunctionDefinition4 = isWarning readFunctionDefinition "foo(a, b) { true; }"
prop_readFunctionDefinition5 = isOk readFunctionDefinition ":(){ :|:;}"
prop_readFunctionDefinition6 = isOk readFunctionDefinition "?(){ foo; }"
prop_readFunctionDefinition7 = isOk readFunctionDefinition "..(){ cd ..; }"
prop_readFunctionDefinition8 = isOk readFunctionDefinition "foo() (ls)"
prop_readFunctionDefinition9 = isOk readFunctionDefinition "function foo { true; }"
prop_readFunctionDefinition10= isOk readFunctionDefinition "function foo () { true; }"
prop_readFunctionDefinition11= isWarning readFunctionDefinition "function foo{\ntrue\n}"
prop_readFunctionDefinition12= isOk readFunctionDefinition "function []!() { true; }"
prop_readFunctionDefinition13= isOk readFunctionDefinition "@require(){ true; }"
readFunctionDefinition = called "function" $ do
    start <- startSpan
    functionSignature <- try readFunctionSignature
    allspacing
    void (lookAhead $ oneOf "{(") <|> parseProblem ErrorC 1064 "Expected a { to open the function definition."
    group <- readBraceGroup <|> readSubshell
    id <- endSpan start
    return $ functionSignature id group
  where
    readFunctionSignature =
        readWithFunction <|> readWithoutFunction
      where
        readWithFunction = do
            try $ do
                string "function"
                whitespace
            spacing
            name <- many1 extendedFunctionChars
            spaces <- spacing
            hasParens <- wasIncluded readParens
            when (not hasParens && null spaces) $
                acceptButWarn (lookAhead (oneOf "{("))
                    ErrorC 1095 "You need a space or linefeed between the function name and body."
            return $ \id -> T_Function id (FunctionKeyword True) (FunctionParentheses hasParens) name

        readWithoutFunction = try $ do
            name <- many1 functionChars
            guard $ name /= "time"  -- Interferes with time ( foo )
            spacing
            readParens
            return $ \id -> T_Function id (FunctionKeyword False) (FunctionParentheses True) name

        readParens = do
            g_Lparen
            spacing
            g_Rparen <|> do
                parseProblem ErrorC 1065 "Trying to declare parameters? Don't. Use () and refer to params as $1, $2.."
                many $ noneOf "\n){"
                g_Rparen
            return ()

prop_readCoProc1 = isOk readCoProc "coproc foo { echo bar; }"
prop_readCoProc2 = isOk readCoProc "coproc { echo bar; }"
prop_readCoProc3 = isOk readCoProc "coproc echo bar"
readCoProc = called "coproc" $ do
    start <- startSpan
    try $ do
        string "coproc"
        whitespace
    choice [ try $ readCompoundCoProc start, readSimpleCoProc start ]
  where
    readCompoundCoProc start = do
        var <- optionMaybe $
            readVariableName `thenSkip` whitespace
        body <- readBody readCompoundCommand
        id <- endSpan start
        return $ T_CoProc id var body
    readSimpleCoProc start = do
        body <- readBody readSimpleCommand
        id <- endSpan start
        return $ T_CoProc id Nothing body
    readBody parser = do
        start <- startSpan
        body <- parser
        id <- endSpan start
        return $ T_CoProcBody id body

readPattern = (readPatternWord `thenSkip` spacing) `sepBy1` (char '|' `thenSkip` spacing)

prop_readConditionCommand = isOk readConditionCommand "[[ x ]] > foo 2>&1"
readConditionCommand = do
    cmd <- readCondition
    redirs <- many readIoRedirect
    id <- getNextIdSpanningTokenList (cmd:redirs)

    pos <- getPosition
    hasDashAo <- isFollowedBy $ do
        c <- choice $ try . string <$> ["-o", "-a", "or", "and"]
        posEnd <- getPosition
        parseProblemAtWithEnd pos posEnd ErrorC 1139 $
            "Use " ++ alt c ++ " instead of '" ++ c ++ "' between test commands."

    -- If the next word is a keyword, readNormalWord will trigger a warning
    hasKeyword <- isFollowedBy readKeyword
    hasWord <- isFollowedBy readNormalWord

    when (hasWord && not (hasKeyword || hasDashAo)) $ do
        -- We have other words following, and no error has been emitted.
        posEnd <- getPosition
        parseProblemAtWithEnd pos posEnd ErrorC 1140 "Unexpected parameters after condition. Missing &&/||, or bad expression?"

    return $ T_Redirecting id redirs cmd
  where
    alt "or" = "||"
    alt "-o" = "||"
    alt "and" = "&&"
    alt "-a" = "&&"
    alt _ = "|| or &&"

prop_readCompoundCommand = isOk readCompoundCommand "{ echo foo; }>/dev/null"
readCompoundCommand = do
    cmd <- choice [
        readBraceGroup,
        readAmbiguous "((" readArithmeticExpression readSubshell (\pos ->
            parseNoteAt pos ErrorC 1105 "Shells disambiguate (( differently or not at all. For subshell, add spaces around ( . For ((, fix parsing errors."),
        readSubshell,
        readWhileClause,
        readUntilClause,
        readIfClause,
        readForClause,
        readSelectClause,
        readCaseClause,
        readBatsTest,
        readFunctionDefinition
        ]
    redirs <- many readIoRedirect
    id <- getNextIdSpanningTokenList (cmd:redirs)
    optional . lookAhead $ do
        notFollowedBy2 $ choice [readKeyword, g_Lbrace]
        pos <- getPosition
        many1 readNormalWord
        posEnd <- getPosition
        parseProblemAtWithEnd pos posEnd ErrorC 1141 "Unexpected tokens after compound command. Bad redirection or missing ;/&&/||/|?"
    return $ T_Redirecting id redirs cmd


readCompoundList = readTerm
readCompoundListOrEmpty = do
    allspacing
    readTerm <|> return []

readCmdPrefix = many1 (readIoRedirect <|> readAssignmentWord)
readCmdSuffix = many1 (readIoRedirect <|> readCmdWord)
readModifierSuffix = many1 (readIoRedirect <|> readWellFormedAssignment <|> readCmdWord)
readTimeSuffix = do
    flags <- many readFlag
    pipeline <- readPipeline
    return $ flags ++ [pipeline]
  where
    -- This fails for quoted variables and such. Fixme?
    readFlag = do
        lookAhead $ char '-'
        readCmdWord

-- Fixme: this is a hack that doesn't handle let c='4'"5" or let a\>b
readLetSuffix :: Monad m => SCParser m [Token]
readLetSuffix = many1 (readIoRedirect <|> try readLetExpression <|> readCmdWord)
  where
    readLetExpression :: Monad m => SCParser m Token
    readLetExpression = do
        startPos <- getPosition
        expression <- readStringForParser readCmdWord
        let (unQuoted, newPos) = kludgeAwayQuotes expression startPos
        subParse newPos readArithmeticContents unQuoted

    kludgeAwayQuotes :: String -> SourcePos -> (String, SourcePos)
    kludgeAwayQuotes s p =
        case s of
            first:rest@(_:_) ->
                let (last:backwards) = reverse rest
                    middle = reverse backwards
                in
                    if first `elem` "'\"" && first == last
                    then (middle, updatePosChar p first)
                    else (s, p)
            x -> (s, p)


-- bash allows a=(b), ksh allows $a=(b). dash allows neither. Let's warn.
readEvalSuffix = many1 (readIoRedirect <|> readCmdWord <|> evalFallback)
  where
    evalFallback = do
        pos <- getPosition
        lookAhead $ char '('
        parseProblemAt pos WarningC 1098 "Quote/escape special characters when using eval, e.g. eval \"a=(b)\"."
        fail "Unexpected parentheses. Make sure to quote when eval'ing as shell parsers differ."

-- Get whatever a parser would parse as a string
readStringForParser parser = do
    pos <- inSeparateContext $ lookAhead (parser >> getPosition)
    readUntil pos
  where
    readUntil endPos = anyChar `reluctantlyTill` (getPosition >>= guard . (== endPos))

-- Like readStringForParser, returning the span as a T_Literal
readLiteralForParser parser = do
    start <- startSpan
    str <- readStringForParser parser
    id <- endSpan start
    return $ T_Literal id str

prop_readAssignmentWord = isOk readAssignmentWord "a=42"
prop_readAssignmentWord2 = isOk readAssignmentWord "b=(1 2 3)"
prop_readAssignmentWord5 = isOk readAssignmentWord "b+=lol"
prop_readAssignmentWord7 = isOk readAssignmentWord "a[3$n'']=42"
prop_readAssignmentWord8 = isOk readAssignmentWord "a[4''$(cat foo)]=42"
prop_readAssignmentWord9 = isOk readAssignmentWord "IFS= "
prop_readAssignmentWord9a= isOk readAssignmentWord "foo="
prop_readAssignmentWord9b= isOk readAssignmentWord "foo=  "
prop_readAssignmentWord9c= isOk readAssignmentWord "foo=  #bar"
prop_readAssignmentWord11= isOk readAssignmentWord "foo=([a]=b [c] [d]= [e f )"
prop_readAssignmentWord12= isOk readAssignmentWord "a[b <<= 3 + c]='thing'"
prop_readAssignmentWord13= isOk readAssignmentWord "var=( (1 2) (3 4) )"
prop_readAssignmentWord14= isOk readAssignmentWord "var=( 1 [2]=(3 4) )"
prop_readAssignmentWord15= isOk readAssignmentWord "var=(1 [2]=(3 4))"
readAssignmentWord = readAssignmentWordExt True
readWellFormedAssignment = readAssignmentWordExt False
readAssignmentWordExt lenient = called "variable assignment" $ do
    -- Parse up to and including the = in a 'try'
    (id, variable, op, indices) <- try $ do
        start <- startSpan
        pos <- getPosition
        -- Check for a leading $ at parse time, to warn for $foo=(bar) which
        -- would otherwise cause a parse failure so it can't be checked later.
        leadingDollarPos <-
            if lenient
            then optionMaybe $ getSpanPositionsFor (char '$')
            else return Nothing
        variable <- readVariableName
        indices <- many readArrayIndex
        hasLeftSpace <- fmap (not . null) spacing
        opStart <- getPosition
        id <- endSpan start
        op <- readAssignmentOp
        opEnd <- getPosition

        when (isJust leadingDollarPos || hasLeftSpace) $ do
            hasParen <- isFollowedBy (spacing >> char '(')
            when hasParen $
                sequence_ $ do
                    (l, r) <- leadingDollarPos
                    return $ parseProblemAtWithEnd l r ErrorC 1066 "Don't use $ on the left side of assignments."

            -- Fail so that this is not parsed as an assignment.
            fail ""
        -- At this point we know for sure.
        return (id, variable, op, indices)

    rightPosStart <- getPosition
    hasRightSpace <- fmap (not . null) spacing
    rightPosEnd <- getPosition
    isEndOfCommand <- fmap isJust $ optionMaybe (try . lookAhead $ (void (oneOf "\r\n;&|)") <|> eof))

    if hasRightSpace || isEndOfCommand
      then do
        when (variable /= "IFS" && hasRightSpace && not isEndOfCommand) $ do
            parseProblemAtWithEnd rightPosStart rightPosEnd WarningC 1007
                "Remove space after = if trying to assign a value (for empty string, use var='' ... )."
        value <- readEmptyLiteral
        return $ T_Assignment id op variable indices value
      else do
        optional $ do
            lookAhead $ char '='
            parseProblem ErrorC 1097 "Unexpected ==. For assignment, use =. For comparison, use [/[[. Or quote for literal string."

        value <- readArray <|> readNormalWord
        spacing
        return $ T_Assignment id op variable indices value
  where
    readAssignmentOp = do
        -- This is probably some kind of ascii art border
        unexpecting "===" (string "===")
        choice [
            string "+=" >> return Append,
            string "=" >> return Assign
            ]

readEmptyLiteral = do
    start <- startSpan
    id <- endSpan start
    return $ T_Literal id ""

readArrayIndex = do
    start <- startSpan
    char '['
    pos <- getPosition
    str <- readStringForParser readIndexSpan
    char ']'
    id <- endSpan start
    return $ T_UnparsedIndex id pos str

readArray :: Monad m => SCParser m Token
readArray = called "array assignment" $ do
    start <- startSpan
    opening <- getPosition
    char '('
    optional $ do
        lookAhead $ char '('
        parseProblemAt opening ErrorC 1116 "Missing $ on a $((..)) expression? (or use ( ( for arrays)."
    allspacing
    words <- readElement `reluctantlyTill` char ')'
    char ')' <|> fail "Expected ) to close array assignment"
    id <- endSpan start
    return $ T_Array id words
  where
    readElement = (readIndexed <|> readRegular) `thenSkip` allspacing
    readIndexed = do
        start <- startSpan
        index <- try $ do
            x <- many1 readArrayIndex
            char '='
            return x
        value <- readRegular <|> nothing
        id <- endSpan start
        return $ T_IndexedElement id index value
    readRegular = readArray <|> readNormalWord

    nothing = do
        start <- startSpan
        id <- endSpan start
        return $ T_Literal id ""

tryToken s t = try $ do
    start <- startSpan
    string s
    id <- endSpan start
    spacing
    return $ t id

redirToken c t = try $ do
    start <- startSpan
    char c
    id <- endSpan start
    notFollowedBy2 $ char '('
    return $ t id

tryWordToken s t = tryParseWordToken s t `thenSkip` spacing
tryParseWordToken keyword t = try $ do
    pos <- getPosition
    start <- startSpan
    str <- anycaseString keyword
    id <- endSpan start

    optional $ do
        c <- try . lookAhead $ anyChar
        let warning code = parseProblem ErrorC code $ "You need a space before the " ++ [c] ++ "."
        case c of
            '[' -> warning 1069
            '#' -> warning 1099
            '!' -> warning 1129
            ':' -> warning 1130
            _ -> return ()

    lookAhead keywordSeparator
    when (str /= keyword) $ do
        parseProblemAt pos ErrorC 1081 $
            "Scripts are case sensitive. Use '" ++ keyword ++ "', not '" ++ str ++ "' (or quote if literal)."
        fail ""
    return $ t id

anycaseString =
    mapM anycaseChar
  where
    anycaseChar c = char (toLower c) <|> char (toUpper c)

g_AND_IF = tryToken "&&" T_AND_IF
g_OR_IF = tryToken "||" T_OR_IF
g_DSEMI = tryToken ";;" T_DSEMI
g_DLESS = tryToken "<<" T_DLESS
g_DGREAT = tryToken ">>" T_DGREAT
g_LESSAND = tryToken "<&" T_LESSAND
g_GREATAND = tryToken ">&" T_GREATAND
g_LESSGREAT = tryToken "<>" T_LESSGREAT
g_DLESSDASH = tryToken "<<-" T_DLESSDASH
g_CLOBBER = tryToken ">|" T_CLOBBER
g_OPERATOR = g_AND_IF <|> g_OR_IF <|> g_DSEMI <|> g_DLESSDASH <|> g_DLESS <|> g_DGREAT <|> g_LESSAND <|> g_GREATAND <|> g_LESSGREAT

g_If = tryWordToken "if" T_If
g_Then = tryWordToken "then" T_Then
g_Else = tryWordToken "else" T_Else
g_Elif = tryWordToken "elif" T_Elif
g_Fi = tryWordToken "fi" T_Fi
g_Do = tryWordToken "do" T_Do
g_Done = tryWordToken "done" T_Done
g_Case = tryWordToken "case" T_Case
g_Esac = tryWordToken "esac" T_Esac
g_While = tryWordToken "while" T_While
g_Until = tryWordToken "until" T_Until
g_For = tryWordToken "for" T_For
g_Select = tryWordToken "select" T_Select
g_In = tryWordToken "in" T_In <* skipAnnotationAndWarn
g_Lbrace = tryWordToken "{" T_Lbrace
g_Rbrace = do -- handled specially due to ksh echo "${ foo; }bar"
    start <- startSpan
    char '}'
    id <- endSpan start
    return $ T_Rbrace id

g_Lparen = tryToken "(" T_Lparen
g_Rparen = tryToken ")" T_Rparen
g_Bang = do
    start <- startSpan
    char '!'
    id <- endSpan start
    void spacing1 <|> do
        pos <- getPosition
        parseProblemAt pos ErrorC 1035
            "You are missing a required space after the !."
    return $ T_Bang id

g_Semi = do
    notFollowedBy2 g_DSEMI
    tryToken ";" T_Semi

keywordSeparator =
    eof <|> void (try allspacingOrFail) <|> void (oneOf ";()[<>&|")

readKeyword = choice [ g_Then, g_Else, g_Elif, g_Fi, g_Do, g_Done, g_Esac, g_Rbrace, g_Rparen, g_DSEMI ]

ifParse p t f =
    (lookAhead (try p) >> t) <|> f

prop_readShebang1 = isOk readShebang "#!/bin/sh\n"
prop_readShebang2 = isWarning readShebang "!# /bin/sh\n"
prop_readShebang3 = isNotOk readShebang "#shellcheck shell=/bin/sh\n"
prop_readShebang4 = isWarning readShebang "! /bin/sh"
prop_readShebang5 = isWarning readShebang "\n#!/bin/sh"
prop_readShebang6 = isWarning readShebang " # Copyright \n!#/bin/bash"
prop_readShebang7 = isNotOk readShebang "# Copyright \nfoo\n#!/bin/bash"
readShebang = do
    start <- startSpan
    anyShebang <|> try readMissingBang <|> withHeader
    many linewhitespace
    str <- many $ noneOf "\r\n"
    id <- endSpan start
    optional carriageReturn
    optional linefeed
    return $ T_Literal id str
  where
    anyShebang = choice $ map try [
        readCorrect,
        readSwapped,
        readTooManySpaces,
        readMissingHash
        ]
    readCorrect = void $ string "#!"

    readSwapped = do
        start <- startSpan
        string "!#"
        id <- endSpan start
        parseProblemAtId id ErrorC 1084
            "Use #!, not !#, for the shebang."

    skipSpaces = fmap (not . null) $ many linewhitespace
    readTooManySpaces = do
        startPos <- getPosition
        startSpaces <- skipSpaces
        char '#'
        middlePos <- getPosition
        middleSpaces <- skipSpaces
        char '!'
        when startSpaces $
            parseProblemAt startPos ErrorC 1114
                "Remove leading spaces before the shebang."
        when middleSpaces $
            parseProblemAt middlePos ErrorC 1115
                "Remove spaces between # and ! in the shebang."

    readMissingHash = do
        pos <- getPosition
        char '!'
        ensurePathAhead
        parseProblemAt pos ErrorC 1104
            "Use #!, not just !, for the shebang."

    readMissingBang = do
        char '#'
        pos <- getPosition
        ensurePathAhead
        parseProblemAt pos ErrorC 1113
            "Use #!, not just #, for the shebang."

    ensurePathAhead = lookAhead $ do
        many linewhitespace
        char '/'

    withHeader = try $ do
        many1 headerLine
        pos <- getPosition
        anyShebang <*
            parseProblemAt pos ErrorC 1128 "The shebang must be on the first line. Delete blanks and move comments."

    headerLine = do
        notFollowedBy2 anyShebang
        many linewhitespace
        optional readAnyComment
        linefeed

verifyEof = eof <|> choice [
        ifParsable g_Lparen $
            parseProblem ErrorC 1088 "Parsing stopped here. Invalid use of parentheses?",

        ifParsable readKeyword $
            parseProblem ErrorC 1089 "Parsing stopped here. Is this keyword correctly matched up?",

        parseProblem ErrorC 1070 "Parsing stopped here. Mismatched keywords or invalid parentheses?"
    ]
  where
    ifParsable p action = do
        try (lookAhead p)
        action


readConfigFile :: Monad m => FilePath -> SCParser m [Annotation]
readConfigFile filename = do
    shouldIgnore <- Mr.asks ignoreRC
    if shouldIgnore then return [] else read' filename
  where
    read' filename = do
        sys <- Mr.asks systemInterface
        contents <- system $ siGetConfig sys filename
        case contents of
            Nothing -> return []
            Just (file, str) -> readConfig file str

    readConfig filename contents = do
        result <- lift $ runParserT readConfigKVs initialUserState filename contents
        case result of
            Right result ->
                return result

            Left err -> do
                parseProblem ErrorC 1134 $ errorFor filename err
                return []

    errorFor filename err =
        let line = "line " ++ (show . sourceLine $ errorPos err)
            suggestion = getStringFromParsec $ errorMessages err
        in
            "Failed to process " ++ (e4m filename) ++ ", " ++ line ++ ": "
                ++ suggestion

prop_readConfigKVs1 = isOk readConfigKVs "disable=1234"
prop_readConfigKVs2 = isOk readConfigKVs "# Comment\ndisable=1234 # Comment\n"
prop_readConfigKVs3 = isOk readConfigKVs ""
prop_readConfigKVs4 = isOk readConfigKVs "\n\n\n\n\t \n"
prop_readConfigKVs5 = isOk readConfigKVs "# shellcheck accepts annotation-like comments in rc files\ndisable=1234"
readConfigKVs = do
    anySpacingOrComment
    annotations <- many (readAnnotationWithoutPrefix <* anySpacingOrComment)
    eof
    return $ concat annotations
anySpacingOrComment =
    many (void allspacingOrFail <|> void readAnyComment)

prop_readScript1 = isOk readScript "#!/bin/bash\necho hello world\n"
prop_readScript2 = isWarning readScript "#!/bin/bash\r\necho hello world\n"
prop_readScript3 = isWarning readScript "#!/bin/bash\necho hello\xA0world"
prop_readScript4 = isWarning readScript "#!/usr/bin/perl\nfoo=("
prop_readScript5 = isOk readScript "#!/bin/bash\n#This is an empty script\n\n"
prop_readScript6 = isOk readScript "#!/usr/bin/env -S X=FOO bash\n#This is an empty script\n\n"
readScriptFile sourced = do
    start <- startSpan
    pos <- getPosition
    optional $ do
        readUtf8Bom
        parseProblem ErrorC 1082
            "This file has a UTF-8 BOM. Remove it with: LC_CTYPE=C sed '1s/^...//' < yourscript ."
    shebang <- readShebang <|> readEmptyLiteral
    let (T_Literal _ shebangString) = shebang
    allspacing
    annotationStart <- startSpan
    fileAnnotations <- readAnnotations
    rcAnnotations <- if sourced
                     then return []
                     else do
                        filename <- Mr.asks currentFilename
                        readConfigFile filename
    let annotations = fileAnnotations ++ rcAnnotations
    annotationId <- endSpan annotationStart
    let shellAnnotationSpecified =
            any (\x -> case x of ShellOverride {} -> True; _ -> False) annotations
    shellFlagSpecified <- isJust <$> Mr.asks shellTypeOverride
    let ignoreShebang = shellAnnotationSpecified || shellFlagSpecified

    unless ignoreShebang $
        verifyShebang pos (executableFromShebang shebangString)
    if ignoreShebang || isValidShell (executableFromShebang shebangString) /= Just False
      then do
            commands <- withAnnotations annotations readCompoundListOrEmpty
            id <- endSpan start
            verifyEof
            let script = T_Annotation annotationId annotations $
                            T_Script id shebang commands
            reparseIndices script
        else do
            many anyChar
            id <- endSpan start
            return $ T_Script id shebang []

  where
    verifyShebang pos s = do
        case isValidShell s of
            Just True -> return ()
            Just False -> parseProblemAt pos ErrorC 1071 "ShellCheck only supports sh/bash/dash/ksh scripts. Sorry!"
            Nothing -> parseProblemAt pos ErrorC 1008 "This shebang was unrecognized. ShellCheck only supports sh/bash/dash/ksh. Add a 'shell' directive to specify."

    isValidShell s =
        let good = null s || any (`isPrefixOf` s) goodShells
            bad = any (`isPrefixOf` s) badShells
        in
            if good
                then Just True
                else if bad
                        then Just False
                        else Nothing

    goodShells = [
        "sh",
        "ash",
        "dash",
        "bash",
        "bats",
        "ksh"
        ]
    badShells = [
        "awk",
        "csh",
        "expect",
        "perl",
        "python",
        "ruby",
        "tcsh",
        "zsh"
        ]

    readUtf8Bom = called "Byte Order Mark" $ string "\xFEFF"

readScript = readScriptFile False

-- Interactively run a specific parser in ghci:
-- debugParse readSimpleCommand "echo 'hello world'"
debugParse p string = runIdentity $ do
    (res, _) <- runParser testEnvironment p "-" string
    return res

-- Interactively run the complete parser in ghci:
-- debugParseScript "#!/bin/bash\necho 'Hello World'\n"
debugParseScript string =
    result {
        -- Remove the noisiest parts
        prTokenPositions = Map.fromList [
            (Id 0, (newPosition {
                posFile = "removed for clarity",
                posLine = -1,
                posColumn = -1
            }, newPosition {
                posFile = "removed for clarity",
                posLine = -1,
                posColumn = -1
            }))]
    }
  where
    result = runIdentity $
        parseScript (mockedSystemInterface []) $ newParseSpec {
            psFilename = "debug",
            psScript = string
        }

testEnvironment =
    Environment {
        systemInterface = (mockedSystemInterface []),
        checkSourced = False,
        currentFilename = "myscript",
        ignoreRC = False,
        shellTypeOverride = Nothing
    }


isOk p s =      parsesCleanly p s == Just True   -- The string parses with no warnings
isWarning p s = parsesCleanly p s == Just False  -- The string parses with warnings
isNotOk p s =   parsesCleanly p s == Nothing     -- The string does not parse

parsesCleanly parser string = runIdentity $ do
    (res, sys) <- runParser testEnvironment
                    (parser >> eof >> getState) "-" string
    case (res, sys) of
        (Right userState, systemState) ->
            return $ Just . null $ parseNotes userState ++ parseProblems systemState
        (Left _, _) -> return Nothing

-- For printf debugging: print the value of an expression
-- Example: return $ dump $ T_Literal id [c]
dump :: Show a => a -> a
dump x = trace (show x) x

-- Like above, but print a specific expression:
-- Example: return $ dumps ("Returning: " ++ [c])  $ T_Literal id [c]
dumps :: Show x => x -> a -> a
dumps t = trace (show t)

parseWithNotes parser = do
    item <- parser
    state <- getState
    return (item, state)

compareNotes (ParseNote pos1 pos1' level1 _ s1) (ParseNote pos2 pos2' level2 _ s2) = compare (pos1, pos1', level1) (pos2, pos2', level2)
sortNotes = sortBy compareNotes


makeErrorFor parsecError =
    ParseNote pos pos ErrorC 1072 $
        getStringFromParsec $ errorMessages parsecError
    where
      pos = errorPos parsecError

getStringFromParsec errors =
        case map f errors of
            r -> unwords (take 1 $ catMaybes $ reverse r)  ++
                " Fix any mentioned problems and try again."
    where
        f err =
            case err of
                UnExpect s    ->  Nothing -- Due to not knowing Parsec, none of these
                SysUnExpect s ->  Nothing -- are actually helpful. <?> has been hidden
                Expect s      ->  Nothing -- and we only show explicit fail statements.
                Message s     ->  if null s then Nothing else return $ s ++ "."

runParser :: Monad m =>
    Environment m ->
    SCParser m v ->
    String ->
    String ->
    m (Either ParseError v, SystemState)

runParser env p filename contents =
    Ms.runStateT
        (Mr.runReaderT
            (runParserT p initialUserState filename contents)
            env)
        initialSystemState
system = lift . lift . lift

parseShell env name contents = do
    (result, state) <- runParser env (parseWithNotes readScript) name contents
    case result of
        Right (script, userstate) ->
            return newParseResult {
                prComments = map toPositionedComment $ nub $ parseNotes userstate ++ parseProblems state,
                prTokenPositions = Map.map startEndPosToPos (positionMap userstate),
                prRoot = Just $
                    reattachHereDocs script (hereDocMap userstate)
            }
        Left err -> do
            let context = contextStack state
            return newParseResult {
                prComments =
                    map toPositionedComment $
                        (filter (not . isIgnored context) $
                            notesForContext context
                            ++ [makeErrorFor err])
                        ++ parseProblems state,
                prTokenPositions = Map.empty,
                prRoot = Nothing
            }
  where
    -- A final pass for ignoring parse errors after failed parsing
    isIgnored stack note = any (contextItemDisablesCode False (codeForParseNote note)) stack

notesForContext list = zipWith ($) [first, second] $ filter isName list
  where
    isName (ContextName _ _) = True
    isName _ = False
    first (ContextName pos str) = ParseNote pos pos ErrorC 1073 $
        "Couldn't parse this " ++ str ++ ". Fix to allow more checks."
    second (ContextName pos str) = ParseNote pos pos InfoC 1009 $
        "The mentioned syntax error was in this " ++ str ++ "."

-- Go over all T_UnparsedIndex and reparse them as either arithmetic or text
-- depending on declare -A statements.
reparseIndices root =
   analyze blank blank f root
  where
    associative = getAssociativeArrays root
    isAssociative s = s `elem` associative
    f (T_Assignment id mode name indices value) = do
        newIndices <- mapM (fixAssignmentIndex name) indices
        newValue <- case value of
            (T_Array id2 words) -> do
                newWords <- mapM (fixIndexElement name) words
                return $ T_Array id2 newWords
            x -> return x
        return $ T_Assignment id mode name newIndices newValue
    f (TA_Variable id name indices) = do
        newIndices <- mapM (fixAssignmentIndex name) indices
        return $ TA_Variable id name newIndices
    f t = return t

    fixIndexElement name word =
        case word of
            T_IndexedElement id indices value -> do
                new <- mapM (fixAssignmentIndex name) indices
                return $ T_IndexedElement id new value
            _ -> return word

    fixAssignmentIndex name word =
        case word of
            T_UnparsedIndex id pos src ->
                parsed name pos src
            _ -> return word

    parsed name pos src =
        if isAssociative name
        then subParse pos (called "associative array index" $ readIndexSpan) src
        else subParse pos (called "arithmetic array index expression" $ optional space >> readArithmeticContents) src

reattachHereDocs root map =
    doTransform f root
  where
    f t@(T_HereDoc id dash quote string []) = fromMaybe t $ do
        list <- Map.lookup id map
        return $ T_HereDoc id dash quote string list
    f t = t

toPositionedComment :: ParseNote -> PositionedComment
toPositionedComment (ParseNote start end severity code message) =
    newPositionedComment {
        pcStartPos = (posToPos start)
      , pcEndPos = (posToPos end)
      , pcComment = newComment {
          cSeverity = severity
        , cCode = code
        , cMessage = message
      }
    }

posToPos :: SourcePos -> Position
posToPos sp = newPosition {
    posFile = sourceName sp,
    posLine = fromIntegral $ sourceLine sp,
    posColumn = fromIntegral $ sourceColumn sp
}

startEndPosToPos :: (SourcePos, SourcePos) -> (Position, Position)
startEndPosToPos (s, e) = (posToPos s, posToPos e)

-- TODO: Clean up crusty old code that this is layered on top of
parseScript :: Monad m =>
        SystemInterface m -> ParseSpec -> m ParseResult
parseScript sys spec =
    parseShell env (psFilename spec) (psScript spec)
  where
    env = Environment {
        systemInterface = sys,
        checkSourced = psCheckSourced spec,
        currentFilename = psFilename spec,
        ignoreRC = psIgnoreRC spec,
        shellTypeOverride = psShellTypeOverride spec
    }

-- Same as 'try' but emit syntax errors if the parse fails.
tryWithErrors :: Monad m => SCParser m v -> SCParser m v
tryWithErrors parser = do
    userstate <- getState
    oldContext <- getCurrentContexts
    input <- getInput
    pos <- getPosition
    result <- lift $ runParserT (setPosition pos >> getResult parser) userstate (sourceName pos) input
    case result of
        Right (result, endPos, endInput, endState) -> do
            -- 'many' objects if we don't consume anything at all, so read a dummy value
            void anyChar <|> eof
            putState endState
            setPosition endPos
            setInput endInput
            return result

        Left err -> do
            newContext <- getCurrentContexts
            addParseProblem $ makeErrorFor err
            mapM_ addParseProblem $ notesForContext newContext
            setCurrentContexts oldContext
            fail ""
  where
    getResult p = do
        result <- p
        endPos <- getPosition
        endInput <- getInput
        endState <- getState
        return (result, endPos, endInput, endState)

return []
runTests = $quickCheckAll