{-# OPTIONS_GHC -fno-warn-orphans #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE TupleSections #-} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE CPP #-} -- | -- Module : Data.TPTP.Parse.Combinators -- Description : Parser combinators for the TPTP language. -- Copyright : (c) Evgenii Kotelnikov, 2019-2021 -- License : GPL-3 -- Maintainer : evgeny.kotelnikov@gmail.com -- Stability : experimental -- module Data.TPTP.Parse.Combinators ( -- * Whitespace skipWhitespace, input, -- * Names atom, var, distinctObject, function, predicate, -- * Sorts and types sort, tff1Sort, type_, -- * First-order logic number, term, literal, clause, unsortedFirstOrder, sortedFirstOrder, monomorphicFirstOrder, polymorphicFirstOrder, -- * Units unit, tptp, tstp, -- * Annotations szs, intro, parent, source, info ) where #if MIN_VERSION_base(4, 8, 0) import Prelude hiding (pure, (<$>), (<*>), (*>), (<*)) #endif import Control.Applicative (pure, (<*>), (*>), (<*), (<|>), optional, empty, many) import Data.Char (isAscii, isAsciiLower, isAsciiUpper, isDigit, isPrint) import Data.Function (on) import Data.Functor ((<$>), ($>)) import Data.Maybe (fromMaybe) #if !MIN_VERSION_base(4, 8, 0) import Data.Monoid (Monoid(..)) #endif import Data.List (sortBy, genericLength) import Data.List.NonEmpty (NonEmpty) import qualified Data.List.NonEmpty as NEL (fromList, toList) #if !MIN_VERSION_base(4, 11, 0) import Data.Semigroup (Semigroup(..), sconcat) #else import Data.Semigroup (sconcat) #endif import qualified Data.Scientific as Sci (base10Exponent, coefficient) import Data.Text (Text) import qualified Data.Text as Text (pack, unpack, cons) import Data.Attoparsec.Text as Atto ( Parser, (), char, string, decimal, scientific, signed, isEndOfLine, endOfLine, satisfy, option, eitherP, choice, manyTill, takeWhile, skip, skipSpace, skipMany, skipWhile, endOfInput, sepBy, sepBy1 ) import Data.TPTP hiding (name, clause) import qualified Data.TPTP as TPTP (name) -- * Helper functions -- | Consume all character until the end of line. skipLine :: Parser () skipLine = skipWhile (not . isEndOfLine) {-# INLINE skipLine #-} -- | Consume the first character of a single line comment - @%@ or @#@. -- The grammar of the TPTP language only defines @%@, -- but some theorem provers in addition use @#@. skipBeginComment :: Parser () skipBeginComment = skip (\c -> c == '%' || c == '#') {-# INLINE skipBeginComment #-} -- | Parse the contents of a single line comment. commented :: Parser p -> Parser p commented p = skipBeginComment *> p <* skipLine <* (endOfLine <|> endOfInput) "commented" -- | Consume a single line comment - characters between @%@ or @#@ and newline. skipComment :: Parser () skipComment = commented (pure ()) "comment" {-# INLINE skipComment #-} -- | Consume a block comment - characters between /* and */. skipBlockComment :: Parser () skipBlockComment = string "/*" *> bc "block comment" where bc = skipWhile (/= '*') *> (string "*/" $> () <|> bc) -- | Consume white space and trailing comments. skipWhitespace :: Parser () skipWhitespace = skipSpace *> skipMany ((skipComment <|> skipBlockComment) *> skipSpace) "whitespace" -- | @lexeme@ makes a given parser consume trailing whitespace. This function is -- needed because off-the-shelf attoparsec parsers do not do it. lexeme :: Parser a -> Parser a lexeme p = p <* skipWhitespace {-# INLINE lexeme #-} -- | @input@ runs a given parser skipping leading whitespace. The function -- succeeds if the parser consumes the entire input. input :: Parser a -> Parser a input p = skipWhitespace *> p <* endOfInput "input" {-# INLINE input #-} -- | Parse an unsigned integer. integer :: Parser Integer integer = lexeme decimal "integer" {-# INLINE integer #-} token :: Text -> Parser Text token t = lexeme (string t) "token " ++ Text.unpack t {-# INLINE token #-} op :: Char -> Parser Char op c = lexeme (char c) "operator " ++ [c] {-# INLINE op #-} parens :: Parser a -> Parser a parens p = op '(' *> p <* op ')' "parens" {-# INLINE parens #-} optionalParens :: Parser a -> Parser a optionalParens p = p <|> parens (optionalParens p) {-# INLINE optionalParens #-} brackets :: Parser a -> Parser a brackets p = op '[' *> p <* op ']' "brackets" {-# INLINE brackets #-} bracketList :: Parser a -> Parser [a] bracketList p = brackets (p `sepBy` op ',') "bracket list" {-# INLINE bracketList #-} bracketList1 :: Parser a -> Parser (NonEmpty a) bracketList1 p = NEL.fromList <$> brackets (p `sepBy1` op ',') "bracket list 1" {-# INLINE bracketList1 #-} application :: Parser f -> Parser a -> Parser (f, [a]) application f a = (,) <$> f <*> option [] (parens (optionalParens a `sepBy1` op ',')) {-# INLINE application #-} labeled :: Text -> Parser a -> Parser a labeled l p = token l *> parens p {-# INLINE labeled #-} comma :: Parser a -> Parser a comma p = op ',' *> p {-# INLINE comma #-} maybeP :: Parser a -> Parser (Maybe a) maybeP = optional . comma {-# INLINE maybeP #-} named :: (Named a, Enum a, Bounded a) => Parser a named = choice $ fmap (\(n, c) -> string n $> c "named " ++ Text.unpack n) $ sortBy (flip compare `on` fst) $ fmap (\c -> (TPTP.name c, c)) [minBound..] enum :: (Named a, Enum a, Bounded a) => Parser a enum = lexeme named {-# INLINE enum #-} -- * Parser combinators -- ** Names isAlphaNumeric :: Char -> Bool isAlphaNumeric c = isAsciiLower c || isAsciiUpper c || isDigit c || c == '_' isAsciiPrint :: Char -> Bool isAsciiPrint c = isAscii c && isPrint c lowerWord, upperWord :: Parser Text lowerWord = Text.cons <$> satisfy isAsciiLower <*> Atto.takeWhile isAlphaNumeric upperWord = Text.cons <$> satisfy isAsciiUpper <*> Atto.takeWhile isAlphaNumeric quoted :: Char -> Parser Text quoted q = Text.pack <$> (char q *> manyTill escaped (char q)) "quoted " ++ [q] where escaped = char '\\' *> (char q $> q <|> char '\\' $> '\\') <|> satisfy isAsciiPrint -- | Parse an atomic word. Single-quoted atoms are parsed without the single -- quotes and with the characters @'@ and @\\@ unescaped. atom :: Parser Atom atom = Atom <$> lexeme (quoted '\'' <|> lowerWord) "atom" {-# INLINE atom #-} -- | Parse a variable. var :: Parser Var var = Var <$> lexeme upperWord "var" {-# INLINE var #-} -- | Parse a distinct object. Double quotes are not preserved and the characters -- @'@ and @\\@ are unescaped. distinctObject :: Parser DistinctObject distinctObject = DistinctObject <$> lexeme (quoted '"') "distinct object" {-# INLINE distinctObject #-} -- | Parse a reserved word. reserved :: (Named a, Enum a, Bounded a) => Parser (Reserved a) reserved = extended <$> lexeme lowerWord "reserved" {-# INLINE reserved #-} name :: (Named a, Enum a, Bounded a) => Parser (Name a) name = Reserved <$> (char '$' *> reserved) <|> Defined <$> atom "name" -- | Parse a function name. function :: Parser (Name Function) function = name "function" {-# INLINE function #-} -- | Parse a predicate name. predicate :: Parser (Name Predicate) predicate = name "predicate" {-# INLINE predicate #-} -- ** Sorts and typess -- | Parse a sort. sort :: Parser (Name Sort) sort = name "sort" {-# INLINE sort #-} -- | Parse a sort in sorted polymorphic logic. tff1Sort :: Parser TFF1Sort tff1Sort = SortVariable <$> var <|> uncurry TFF1Sort <$> application sort tff1Sort "tff1 sort" mapping :: Parser a -> Parser ([a], a) mapping s = (,) <$> option [] (args <* op '>') <*> s where args = fmap (:[]) s <|> parens (s `sepBy1` op '*') -- | Parse a type. type_ :: Parser Type type_ = uncurry . tff1Type <$> (maybe [] NEL.toList <$> optional prefix) <*> matrix "type" where prefix = token "!>" *> bracketList1 sortVar <* op ':' sortVar = var <* op ':' <* optionalParens (token "$tType") matrix = optionalParens (mapping (optionalParens tff1Sort)) -- ** First-order logic -- | Parse a number. number :: Parser Number number = RationalConstant <$> signed integer <* char '/' <*> integer <|> real <$> lexeme scientific "number" where real n | Sci.base10Exponent n == 0 = IntegerConstant (Sci.coefficient n) | otherwise = RealConstant n -- | Parse a term. -- -- @term@ supports parsing superfluous parenthesis around arguments -- of the function application, which are not present in the TPTP grammar. term :: Parser Term term = uncurry Function <$> application function term <|> Variable <$> var <|> Number <$> number <|> DistinctTerm <$> distinctObject "term" -- | Parse the equality and the inequality sign. eq :: Parser Sign eq = enum "eq" {-# INLINE eq #-} -- | Parse a literal. -- -- @literal@ supports parsing superfluous parenthesis around arguments -- of the predicate application, which are not present in the TPTP grammar. literal :: Parser Literal literal = Equality <$> optionalParens term <*> eq <*> optionalParens term <|> uncurry Predicate <$> application predicate term "literal" -- | Parse a signed literal. -- -- @signedLiteral@ supports parsing superfluous parenthesis around the literal -- under the negation sign, which are not present in the TPTP grammar. signedLiteral :: Parser (Sign, Literal) signedLiteral = fmap (Negative,) (op '~' *> optionalParens literal) <|> fmap (Positive,) literal "signed literal" {-# INLINE signedLiteral #-} -- | Parse a clause. -- -- @clause@ supports parsing superfluous parenthesis around any of the -- subclauses of the clause, which are not present in the TPTP grammar. clause :: Parser Clause clause = sconcat . NEL.fromList <$> subclause `sepBy1` op '|' where subclause = unitClause <$> signedLiteral <|> parens clause "subclause" -- | Parse a quantifier. quantifier :: Parser Quantifier quantifier = enum "quantifier" {-# INLINE quantifier #-} -- | Parse a logical connective. connective :: Parser Connective connective = enum "connective" {-# INLINE connective #-} -- | Given a parser for sort annotations, parse a formula in first-order logic. firstOrder :: Parser s -> Parser (FirstOrder s) firstOrder p = do f <- unitary option f (Connected f <$> connective <*> firstOrder p) where unitary = Atomic <$> literal <|> Negated <$> (op '~' *> unitary) <|> Quantified <$> quantifier <*> vs <* op ':' <*> unitary <|> parens (firstOrder p) "unitary first order" vs = bracketList1 $ (,) <$> var <*> p -- | Parse a formula in unsorted first-order logic. unsortedFirstOrder :: Parser UnsortedFirstOrder unsortedFirstOrder = firstOrder unsorted where unsorted = pure (Unsorted ()) "unsorted" sorted :: Parser s -> Parser (Sorted s) sorted s = Sorted <$> optional (op ':' *> optionalParens s) "sorted" -- | An alias for 'monomorphicFirstOrder'. sortedFirstOrder :: Parser SortedFirstOrder sortedFirstOrder = monomorphicFirstOrder -- | Parse a formula in sorted monomorphic first-order logic. monomorphicFirstOrder :: Parser MonomorphicFirstOrder monomorphicFirstOrder = firstOrder (sorted sort) "tff0" quantifiedSort :: Parser QuantifiedSort quantifiedSort = token "$tType" $> QuantifiedSort () -- | Parse a formula in sorted polymorphic first-order logic. polymorphicFirstOrder :: Parser PolymorphicFirstOrder polymorphicFirstOrder = firstOrder (sorted (eitherP quantifiedSort tff1Sort)) "tff1" -- ** Units -- | Parse a formula in a given TPTP language. formula :: Language -> Parser Formula formula = \case CNF_ -> CNF <$> clause "cnf" FOF_ -> FOF <$> unsortedFirstOrder "fof" TFF_ -> tff <$> polymorphicFirstOrder "tff" where tff f = case monomorphizeFirstOrder f of Just f' -> TFF0 f' Nothing -> TFF1 f -- | Parse a formula role. role :: Parser (Reserved Role) role = reserved "role" {-# INLINE role #-} -- | Parse the name of a TPTP language. language :: Parser Language language = enum "language" {-# INLINE language #-} -- | Parse a TPTP declaration in a given language. declaration :: Language -> Parser Declaration declaration l = token "type" *> comma (optionalParens typeDeclaration) <|> Formula <$> role <*> comma (formula l) "declaration" -- | Parse a declaration with the @type@ role - either a typing relation or -- a sort declaration. typeDeclaration :: Parser Declaration typeDeclaration = Sort <$> atom <* op ':' <*> optionalParens arity <|> Typing <$> atom <* op ':' <*> optionalParens type_ "type declaration" where arity = genericLength . fst <$> mapping (optionalParens (token "$tType")) -- | Parse a unit name. unitName :: Parser (Either Atom Integer) unitName = eitherP atom (signed integer) "unit name" {-# INLINE unitName #-} -- | Parse a list of unit names. unitNames :: Parser (NonEmpty UnitName) unitNames = bracketList1 unitName "unit names" {-# INLINE unitNames #-} -- | Parse an @include@ statement. include :: Parser Unit include = labeled "include" (Include <$> atom <*> maybeP unitNames) <* op '.' "include" -- | Parse an annotated unit. annotatedUnit :: Parser Unit annotatedUnit = do l <- language let n = unitName let d = declaration l let a = maybeP annotation parens (Unit <$> n <*> comma d <*> a) <* op '.' "annotated unit" -- | Parse a TPTP unit. unit :: Parser Unit unit = include <|> annotatedUnit "unit" -- | Parse a TPTP input. tptp :: Parser TPTP tptp = TPTP <$> manyTill unit endOfInput "tptp" -- | Parse a TSTP input. tstp :: Parser TSTP tstp = skipSpace *> (TSTP <$> szs <*> manyTill unit endOfInput) <* endOfInput "tstp" -- ** Annotations instance Semigroup SZS where SZS s d <> SZS s' d' = SZS (s <|> s') (d <|> d') instance Monoid SZS where mempty = SZS empty empty mappend = (<>) -- | Parse the SZS ontology information of a TSTP output inside a comment. szs :: Parser SZS szs = fromMaybe mempty . mconcat <$> many szsComment szsComment :: Parser (Maybe SZS) szsComment = commented (skipSpace *> optional (skipBeginComment *> skipSpace) *> optional szsAnnotation) <* skipSpace "szs comment" szsAnnotation :: Parser SZS szsAnnotation = string "SZS" *> skipSpace *> (szsStatus <|> szsDataform) "szs annotation" szsStatus :: Parser SZS szsStatus = string "status" *> skipSpace *> (fromStatus <$> status) "status" where fromStatus s = SZS (Just s) Nothing status = eitherP (unwrapSZSOntology <$> named) (unwrapSZSOntology <$> named) szsDataform :: Parser SZS szsDataform = string "output" *> skipSpace *> string "start" *> skipSpace *> (fromDataform <$> dataform) "dataform" where fromDataform d = SZS Nothing (Just d) dataform = unwrapSZSOntology <$> named -- | Parse an introduction marking. intro :: Parser Intro intro = enum "intro" {-# INLINE intro #-} -- | Parse a unit of information about a formula. info :: Parser Info info = labeled "description" (Description <$> atom) <|> labeled "iquote" (Iquote <$> atom) <|> labeled "status" (Status <$> reserved) <|> labeled "assumptions" (Assumptions <$> unitNames) <|> labeled "refutation" (Refutation <$> atom) <|> labeled "new_symbols" (NewSymbols <$> atom <*> comma symbols) <|> labeled "bind" (Bind <$> var <*> comma expr) <|> Expression <$> expr <|> uncurry Application <$> application atom info <|> InfoNumber <$> number <|> Infos <$> infos where symbols = bracketList (eitherP var atom) infos :: Parser [Info] infos = bracketList info "infos" {-# INLINE infos #-} -- | Parse an expression. expr :: Parser Expression expr = char '$' *> (labeled "fot" (Term <$> optionalParens term) <|> Logical <$> (language >>= parens . optionalParens . formula)) "expression" -- | Parse a parent. parent :: Parser Parent parent = Parent <$> source <*> option [] (op ':' *> infos) "parent" -- | Parse the source of a unit. source :: Parser Source source = token "unknown" $> UnknownSource <|> labeled "file" (File <$> atom <*> maybeP unitName) <|> labeled "theory" (Theory <$> atom <*> maybeP infos) <|> labeled "creator" (Creator <$> atom <*> maybeP infos) <|> labeled "introduced" (Introduced <$> reserved <*> maybeP infos) <|> labeled "inference" (Inference <$> atom <*> comma infos <*> comma (bracketList parent)) <|> UnitSource <$> unitName "source" -- | Parse an annotation. annotation :: Parser Annotation annotation = (,) <$> source <*> maybeP infos "annotation"