{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE TypeFamilies #-} -- | -- -- /Commentary/ -- -- We're optimizing for simplicity here. The language balances conventions -- from other languages with choices to not overcomplicate things. Not -- overloading operators, for example. Mostly we want to have "good error -- messages" which is tough and subjective anyway. Not having multiline -- anything, for example, might be a good choice, except that we also want to -- be whitepsace insensitive. module Technique.Parser ( -- parser for technique procedure files. pTechnique, -- everthing else is only exposed for testing purposes. pMagicLine, pSpdxLine, pIdentifier, pType, stringLiteral, numberLiteral, pQuantity, pAttribute, pExpression, pStatement, pBlock, pProcedureDeclaration, pProcedureCode, ) where import Control.Monad ( unless, void, ) import Control.Monad.Combinators ( (<|>), many, optional, sepBy, sepBy1, some, ) import Core.Text.Rope ( Rope, appendRope, emptyRope, intoRope, singletonRope, ) import Data.Foldable ( foldl', ) import Data.Int ( Int64, Int8, ) import Data.Text ( Text, ) import qualified Data.Text as T (pack) import Data.Void ( Void, ) import Technique.Language import Technique.Quantity import Text.Megaparsec ( (), Parsec, getOffset, hidden, label, lookAhead, notFollowedBy, oneOf, skipMany, takeWhile1P, takeWhileP, try, ) import Text.Megaparsec.Char ( char, digitChar, lowerChar, newline, printChar, space, spaceChar, string, upperChar, ) import Text.Read ( readMaybe, ) type Parser = Parsec Void Text __VERSION__ :: Int __VERSION__ = 0 -- | -- Skip /zero/ or more actual space characters. The __megaparsec__ function -- @space@ etc consume all whitespace, not just ' '. That includes newlines, -- which is very unhelpful. skipSpace :: Parser () skipSpace = void (hidden (many (char ' ' <|> char '\t'))) -- | -- Skip at least /one/ actual space character. skipSpace1 :: Parser () skipSpace1 = void (hidden (some (char ' ' <|> char '\t'))) digitChar0 :: Parser Char digitChar0 = label "a digit" $ digitChar pMagicLine :: Parser Int pMagicLine = do void (char '%') "first line to begin with % character" void spaceChar "a space character" void (string "technique") void spaceChar "a space character" void (char 'v') "the character 'v' and then a number" v <- numberLiteral "the language version" void newline return (fromIntegral v) pSpdxLine :: Parser (Rope, Maybe Rope) pSpdxLine = do void (char '!') "second line to begin with ! character" skipSpace -- I know we're supposed to use takeWhile1P in cases like this, but aren't -- we just duplicating the work of the parser combinators? license <- takeWhile1P (Just "software license description (ie an SPDX-Licence-Header value)") (\c -> not (c == ';' || c == '\n')) copyright <- optional $ do void (char ';') "a semicolon" skipSpace void (char '©') <|> void (string "(c)") skipSpace takeWhile1P (Just "a copyright declaration") (/= '\n') void newline return (intoRope license, fmap intoRope copyright) --------------------------------------------------------------------- pProcedureDeclaration :: Parser (Identifier, [Identifier], [Type], [Type]) pProcedureDeclaration = do name <- pIdentifier skipSpace -- zero or more separated by comma params <- pIdentifiers skipSpace void (char ':') skipSpace ins <- pTypes1 skipSpace void (string "->") skipSpace out <- pTypes1 return (name, params, ins, out) identifierChar :: Parser Char identifierChar = hidden (lowerChar <|> digitChar0 <|> char '_' <|> char '\'') -- these do NOT consume trailing space. That's for pExpression to do. pIdentifier :: Parser Identifier pIdentifier = label "a valid identifier" $ do first <- lowerChar remainder <- many identifierChar return (Identifier (singletonRope first <> intoRope remainder)) pIdentifiers :: Parser [Identifier] pIdentifiers = sepBy (pIdentifier <* skipSpace) (char ',' <* skipSpace) pIdentifiers1 :: Parser [Identifier] pIdentifiers1 = sepBy1 (pIdentifier <* skipSpace) (char ',' <* skipSpace) typeChar :: Parser Char typeChar = hidden (upperChar <|> lowerChar <|> digitChar0) pType :: Parser Type pType = label "a valid type" $ try ( do void (string "()") return (Type "()") ) <|> ( do first <- upperChar remainder <- many typeChar return (Type (singletonRope first <> intoRope remainder)) ) pTypes1 :: Parser [Type] pTypes1 = sepBy1 (pType <* skipSpace) (char ',' <* skipSpace) --------------------------------------------------------------------- stringLiteral :: Parser Text stringLiteral = label "a string literal" $ do void (char '\"') str <- many ( do try ( do void (char '\\') void (char '"') return '"' ) <|> ( do notFollowedBy (char '\"') printChar ) ) void (char '\"') return (T.pack str) unitChar :: Parser Char unitChar = hidden (upperChar <|> lowerChar <|> char '°') unitLiteral :: Parser Rope unitLiteral = label "a units symbol" $ do str <- some unitChar return (intoRope str) numberLiteral :: Parser Int64 numberLiteral = label "a number literal" $ do digits <- some digitChar0 let result = readMaybe digits case result of Just number -> return number Nothing -> fail "expected a number but couldn't parse" decimalLiteral :: Parser Decimal decimalLiteral = label "a decimal literal" $ do digits1 <- some digitChar0 fraction <- optional ( do void (char '.') some digitChar0 ) return ( case fraction of Nothing -> let number = read digits1 in Decimal number 0 Just digits2 -> let e = fromIntegral (length digits2) decimal = read digits1 * 10 ^ e + read digits2 in Decimal decimal e ) superscriptLiteral :: Parser Int8 superscriptLiteral = label "a superscript literal" $ do sign <- optional (char '⁻' <|> char '¯') -- honestly not sure what the second of those is digits <- some (oneOf ['⁰', '¹', '²', '³', '⁴', '⁵', '⁶', '⁷', '⁸', '⁹']) let number = read (map toNumbers digits) return ( case sign of Just _ -> negate number Nothing -> number ) toNumbers :: Char -> Char toNumbers c = case c of '⁰' -> '0' '¹' -> '1' '²' -> '2' '³' -> '3' '⁴' -> '4' '⁵' -> '5' '⁶' -> '6' '⁷' -> '7' '⁸' -> '8' '⁹' -> '9' _ -> error "Invalid, superscript expected" pQuantity :: Parser Quantity pQuantity = ( do -- look ahead far enough to commit to this branch: the pieces of a -- decimal, a space, and then one of the characters that starts an -- uncertainty, magnitude, or symbol. lookAhead ( try ( do skipMany (digitChar0 <|> char '.' <|> char '-' <|> char ' ') void (char '±' <|> char '+' <|> char '×' <|> char 'x' <|> unitChar) ) ) n <- pMantissa u <- pUncertainty <|> pure (Decimal 0 0) m <- pMagnitude <|> pure 0 s <- pSymbol return (Quantity n u m s) ) <|> ( do n <- pNumber return (Number n) ) where pNumber = do sign <- optional (char '-') number <- numberLiteral return ( case sign of Just _ -> negate number Nothing -> number ) pMantissa = do sign <- optional (char '-') decimal <- try decimalLiteral skipSpace return ( case sign of Just _ -> negateDecimal decimal Nothing -> decimal ) pUncertainty = do void (char '±') <|> void (string "+/-") skipSpace decimal <- decimalLiteral skipSpace return decimal pMagnitude = do void (char '×') <|> void (char 'x') <|> hidden (void (char '*')) skipSpace void (string "10") number <- ( do void (char '^') sign <- optional (char '-') e <- numberLiteral pure ( fromIntegral ( case sign of Just _ -> negate e Nothing -> e ) ) <|> superscriptLiteral ) skipSpace return number pSymbol = do symbol <- unitLiteral skipSpace return symbol pOperator :: Parser Operator pOperator = (char '&' *> return WaitBoth) <|> (char '|' *> return WaitEither) <|> (char '+' *> return Combine) -- | -- Parse a Tablet. This consumes trailing space around initial delimiter and -- removes blank lines within the table (they're not syntactically meaningful) -- but only cosnsumes a single newline after trailing delimeter, leaving -- further consumption to pStatement. -- -- TODO this doesn't preserve alternate syntax if employed by user pTablet :: Parser Tablet pTablet = do void (char '[' <* hidden space) bindings <- many (pBinding <* hidden space) void (char ']' <* skipSpace) return (Tablet bindings) where pBinding = do name <- stringLiteral skipSpace void (char '~') skipSpace subexpr <- pExpression -- handle alternate syntax here {- -- FIXME this is not working void (optional (char ',')) -} return (Binding (Label (intoRope name)) subexpr) pAttribute :: Parser Attribute pAttribute = ( do void (char '@') role <- pIdentifier <|> pAny return (Role role) ) <|> ( do void (char '#') place <- pIdentifier <|> pAny return (Place place) ) where pAny = do void (char '*') return (Identifier (singletonRope '*')) pExpression :: Parser Expression pExpression = do o <- getOffset expr1 <- pTerm o skipSpace rest <- (optional (try pOperation2)) skipSpace case rest of Just (oper, expr2) -> return (Operation o oper expr1 expr2) Nothing -> return expr1 where pTerm o = pNone o <|> pUndefined o <|> pRestriction o <|> pGrouping o <|> pObject o <|> pApplication o <|> pLiteral o <|> pVariable o pNone :: Offset -> Parser Expression pNone o = do void (string "()") return (None o) pUndefined o = do void (char '?') return (Undefined o) pOperation2 = do -- 2 as in 2nd half operator <- pOperator skipSpace subexpr2 <- pExpression return (operator, subexpr2) pRestriction o = do attr <- pAttribute hidden space block <- pBlock return (Restriction o attr block) pGrouping o = do void (char '(') skipSpace subexpr <- pExpression void (char ')') skipSpace return (Grouping o subexpr) pObject o = do tablet <- pTablet return (Object o tablet) pApplication o = do lookAhead ( try ( do skipMany identifierChar skipSpace1 void (identifierChar <|> digitChar0 <|> char '(' <|> char '\"') ) ) name <- pIdentifier -- ie at least one space skipSpace1 -- FIXME better do this manually, not all valid subexpr <- pExpression return (Application o name subexpr) pLiteral o = ( do str <- stringLiteral return (Text o (intoRope str)) ) <|> ( do qty <- pQuantity return (Amount o qty) ) pVariable o = do names <- pIdentifiers1 return (Variable o names) pStatement :: Parser Statement pStatement = do o <- getOffset statement <- pAssignment o <|> pDeclaration o <|> pExecute o <|> pBlank o <|> pSeries o return statement where pAssignment o = label "an assignment" $ do lookAhead ( try ( do skipMany (identifierChar <|> char ',' <|> char ' ') void (char '=') ) ) names <- pIdentifiers1 skipSpace void (char '=') hidden space expr <- pExpression return (Assignment o names expr) pDeclaration o = label "a declaration" $ do lookAhead ( try ( do skipMany (identifierChar <|> char ',' <|> char ' ') void (char ':') ) ) proc <- pProcedureCode return (Declaration o proc) pExecute o = label "a value to execute" $ do expr <- pExpression return (Execute o expr) pBlank o = hidden $ do -- label "a blank line" void newline return (Blank o) pSeries o = do void (char ';') return (Series o) --------------------------------------------------------------------- pBlock :: Parser Block pBlock = do -- open block, absorb whitespace void (char '{' <* hidden space) -- process statements, but only single newline at a time statements <- many (pStatement <* skipSpace <* optional newline <* skipSpace) -- close block, and wipe out any trailing whitespace void (char '}' <* skipSpace <* optional newline) return (Block statements) -- Frankly, this parser looks ridiculous. Someone who knows what they are -- doing *please* help refactor this. It seems unavoidlable to run the -- pProcedureDeclaration parser twice, unless we can combine the successful -- parse and the consumtion of description lines into one function. Maybe -- this would be better done scanning ahead to count characters until a -- declaration shows up, then explicitly taking that many? fourSpaces :: Parser () fourSpaces = -- label "a code block indented by four spaces" $ void (char ' ' <* char ' ' <* char ' ' <* char ' ') <|> fail "code blocks must be indented by four spaces" pMarkdown :: Parser Markdown pMarkdown = do -- gobble blank newlines before a heading void ( many ( do notFollowedBy fourSpaces notFollowedBy pProcedureDeclaration void (skipSpace *> hidden newline) ) ) -- TODO heading results <- some ( do notFollowedBy fourSpaces notFollowedBy pProcedureDeclaration line <- takeWhileP (Just "another line of description text") (/= '\n') void (hidden newline) return line ) let description = foldl' (\acc text -> appendRope text acc <> "\n") emptyRope results return (Markdown description) pProcedureCode :: Parser Procedure pProcedureCode = do o <- getOffset (name, params, ins, out) <- pProcedureDeclaration <* skipSpace <* optional newline <* skipSpace block <- pBlock <* skipSpace <* optional newline return ( Procedure { procedureOffset = o, procedureName = name, procedureParams = params, procedureInput = ins, procedureOutput = out, procedureTitle = Nothing, -- FIXME procedureDescription = Nothing, procedureBlock = block } ) pProcedure :: Parser Procedure pProcedure = do description <- optional pMarkdown fourSpaces proc <- pProcedureCode return ( proc { procedureTitle = Nothing, -- FIXME procedureDescription = description } ) --------------------------------------------------------------------- pTechnique :: Parser Technique pTechnique = do version <- pMagicLine unless (version == __VERSION__) (fail ("currently the only recognized language version is v" ++ show __VERSION__)) (license, copyright) <- pSpdxLine void (many newline) body <- many pProcedure return $ Technique { techniqueVersion = version, techniqueLicense = intoRope license, techniqueCopyright = fmap intoRope copyright, techniqueBody = body }