{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE CPP #-} -- | -- Module : Data.TPTP.Parse.Combinators -- Description : Parser combinators for the TPTP language. -- Copyright : (c) Evgenii Kotelnikov, 2019 -- License : GPL-3 -- Maintainer : evgeny.kotelnikov@gmail.com -- Stability : experimental -- module Data.TPTP.Parse.Combinators ( -- * Whitespace whitespace, -- * Names atom, var, distinctObject, function, predicate, -- * Sorts and types sort, tff1Sort, type_, -- * First-order logic number, term, literal, clause, unsortedFirstOrder, sortedFirstOrder, monomorphicFirstOrder, polymorphicFirstOrder, -- * Units unit, tptp, -- * Annotations intro, parent, source, info ) where #if MIN_VERSION_base(4, 8, 0) import Prelude hiding (pure, (<$>), (<*>), (*>), (<*)) #endif import Control.Applicative (pure, (<*>), (*>), (<*), (<|>), optional) import Data.Char (isAscii, isAsciiLower, isAsciiUpper, isDigit, isPrint) import Data.Function (on) import Data.Functor ((<$>), ($>)) import Data.List (sortBy, genericLength) import Data.List.NonEmpty (NonEmpty) import qualified Data.List.NonEmpty as NEL (fromList, toList) import qualified Data.Scientific as Sci (base10Exponent, coefficient) import Data.Text (Text) import qualified Data.Text as Text (pack, unpack, cons) import Data.Attoparsec.Text as Atto ( Parser, (), char, string, decimal, scientific, signed, isEndOfLine, endOfLine, satisfy, option, eitherP, choice, manyTill, takeWhile, skipSpace, skipMany, skipWhile, endOfInput, sepBy, sepBy1 ) import Data.TPTP hiding (name, clause) import qualified Data.TPTP as TPTP (name) -- * Helper functions -- | Consume a single line comment - characters between @%@ and newline. comment :: Parser () comment = char '%' *> skipWhile (not . isEndOfLine) *> (endOfLine <|> endOfInput) "comment" -- | Consume a block comments - characters between /* and */. blockComment :: Parser () blockComment = string "/*" *> bc "block comment" where bc = skipWhile (/= '*') *> (string "*/" $> () <|> bc) -- | Consume white space and trailing comments. whitespace :: Parser () whitespace = skipSpace *> skipMany ((comment <|> blockComment) *> skipSpace) "whitespace" -- | @lexeme@ makes a given parser consume trailing whitespace. This function is -- needed because off-the-shelf attoparsec parsers do not do it. lexeme :: Parser a -> Parser a lexeme p = p <* whitespace {-# INLINE lexeme #-} -- | Parse an unsigned integer. integer :: Parser Integer integer = lexeme decimal "integer" {-# INLINE integer #-} token :: Text -> Parser Text token t = lexeme (string t) "token " ++ Text.unpack t {-# INLINE token #-} op :: Char -> Parser Char op c = lexeme (char c) "operator " ++ [c] {-# INLINE op #-} parens :: Parser a -> Parser a parens p = op '(' *> p <* op ')' "parens" {-# INLINE parens #-} optionalParens :: Parser a -> Parser a optionalParens p = parens p <|> p {-# INLINE optionalParens #-} brackets :: Parser a -> Parser a brackets p = op '[' *> p <* op ']' "brackets" {-# INLINE brackets #-} bracketList :: Parser a -> Parser [a] bracketList p = brackets (p `sepBy` op ',') "bracket list" {-# INLINE bracketList #-} bracketList1 :: Parser a -> Parser (NonEmpty a) bracketList1 p = NEL.fromList <$> brackets (p `sepBy1` op ',') "bracket list 1" {-# INLINE bracketList1 #-} application :: Parser f -> Parser a -> Parser (f, [a]) application f a = (,) <$> f <*> option [] (parens (a `sepBy1` op ',')) {-# INLINE application #-} labeled :: Text -> Parser a -> Parser a labeled l p = token l *> parens p {-# INLINE labeled #-} comma :: Parser a -> Parser a comma p = op ',' *> p {-# INLINE comma #-} maybeP :: Parser a -> Parser (Maybe a) maybeP = optional . comma {-# INLINE maybeP #-} enum :: (Named a, Enum a, Bounded a) => Parser a enum = choice $ fmap (\(n, c) -> token n $> c "reserved " ++ Text.unpack n) $ sortBy (flip compare `on` fst) $ fmap (\c -> (TPTP.name c, c)) [minBound..] -- * Parser combinators -- ** Names isAlphaNumeric :: Char -> Bool isAlphaNumeric c = isAsciiLower c || isAsciiUpper c || isDigit c || c == '_' isAsciiPrint :: Char -> Bool isAsciiPrint c = isAscii c && isPrint c lowerWord, upperWord :: Parser Text lowerWord = Text.cons <$> satisfy isAsciiLower <*> Atto.takeWhile isAlphaNumeric upperWord = Text.cons <$> satisfy isAsciiUpper <*> Atto.takeWhile isAlphaNumeric quoted :: Char -> Parser Text quoted q = Text.pack <$> (char q *> manyTill escaped (char q)) "quoted " ++ [q] where escaped = char '\\' *> (char q $> q <|> char '\\' $> '\\') <|> satisfy isAsciiPrint -- | Parse an atomic word. Single-quoted atoms are parsed without the single -- quotes and with the characters @'@ and @\\@ unescaped. atom :: Parser Atom atom = Atom <$> lexeme (quoted '\'' <|> lowerWord) "atom" {-# INLINE atom #-} -- | Parse a variable. var :: Parser Var var = Var <$> lexeme upperWord "var" {-# INLINE var #-} -- | Parse a distinct object. Double quotes are not preserved and the characters -- @'@ and @\\@ are unescaped. distinctObject :: Parser DistinctObject distinctObject = DistinctObject <$> lexeme (quoted '"') "distinct object" {-# INLINE distinctObject #-} -- | Parse a reserved word. reserved :: (Named a, Enum a, Bounded a) => Parser (Reserved a) reserved = extended <$> lexeme lowerWord "reserved" {-# INLINE reserved #-} name :: (Named a, Enum a, Bounded a) => Parser (Name a) name = Reserved <$> (char '$' *> reserved) <|> Defined <$> atom "name" -- | Parser a function name. function :: Parser (Name Function) function = name "function" {-# INLINE function #-} -- | Parse a predicate name. predicate :: Parser (Name Predicate) predicate = name "predicate" {-# INLINE predicate #-} -- ** Sorts and typess -- | Parse a sort. sort :: Parser (Name Sort) sort = name "sort" {-# INLINE sort #-} -- | Parse a sort in sorted polymorphic logic. tff1Sort :: Parser TFF1Sort tff1Sort = SortVariable <$> var <|> uncurry TFF1Sort <$> application sort tff1Sort "tff1 sort" mapping :: Parser a -> Parser ([a], a) mapping s = (,) <$> option [] (args <* op '>') <*> s where args = fmap (:[]) s <|> parens (s `sepBy1` op '*') -- | Parse a type. type_ :: Parser Type type_ = uncurry . tff1Type <$> (maybe [] NEL.toList <$> optional prefix) <*> matrix "type" where prefix = token "!>" *> bracketList1 sortVar <* op ':' sortVar = var <* op ':' <* token "$tType" matrix = optionalParens (mapping tff1Sort) -- ** First-order logic -- | Parse a number. number :: Parser Number number = RationalConstant <$> signed integer <* char '/' <*> integer <|> real <$> lexeme scientific "number" where real n | Sci.base10Exponent n == 0 = IntegerConstant (Sci.coefficient n) | otherwise = RealConstant n -- | Parse a term. term :: Parser Term term = parens term <|> uncurry Function <$> application function term <|> Variable <$> var <|> Number <$> number <|> DistinctTerm <$> distinctObject "term" -- | Parse the equality and the unequality sign. eq :: Parser Sign eq = enum "eq" {-# INLINE eq #-} -- | Parse a literal. literal :: Parser Literal literal = parens literal <|> Equality <$> term <*> eq <*> term <|> uncurry Predicate <$> application predicate term "literal" -- | Parse the negation sign. sign :: Parser Sign sign = option Positive (op '~' $> Negative) {-# INLINE sign #-} -- | Parse a signed literal. signedLiteral :: Parser (Sign, Literal) signedLiteral = (,) <$> sign <*> literal "signed literal" {-# INLINE signedLiteral #-} -- | Parse a clause. clause :: Parser Clause clause = parens clause <|> Clause . NEL.fromList <$> signedLiteral `sepBy1` op '|' "clause" -- | Parse a quantifier. quantifier :: Parser Quantifier quantifier = enum "quantifier" {-# INLINE quantifier #-} -- | Parse a logical connective. connective :: Parser Connective connective = enum "connective" {-# INLINE connective #-} -- | Given a parser for sort annotations, parse a formula in first-order logic. firstOrder :: Parser s -> Parser (FirstOrder s) firstOrder p = do f <- unitary option f (Connected f <$> connective <*> firstOrder p) where unitary = parens (firstOrder p) <|> Atomic <$> literal <|> Quantified <$> quantifier <*> vs <* op ':' <*> unitary <|> Negated <$> (op '~' *> unitary) "unitary first order" vs = bracketList1 $ (,) <$> var <*> p -- | Parse a formula in unsorted first-order logic. unsortedFirstOrder :: Parser UnsortedFirstOrder unsortedFirstOrder = firstOrder unsorted where unsorted = pure (Unsorted ()) "unsorted" sorted :: Parser s -> Parser (Sorted s) sorted s = Sorted <$> optional (op ':' *> s) "sorted" -- | An alias for 'monomorphicFirstOrder'. sortedFirstOrder :: Parser SortedFirstOrder sortedFirstOrder = monomorphicFirstOrder -- | Parse a formula in sorted monomorphic first-order logic. monomorphicFirstOrder :: Parser MonomorphicFirstOrder monomorphicFirstOrder = firstOrder (sorted sort) "tff0" quantifiedSort :: Parser QuantifiedSort quantifiedSort = token "$tType" $> QuantifiedSort () -- | Parse a formula in sorted polymorphic first-order logic. polymorphicFirstOrder :: Parser PolymorphicFirstOrder polymorphicFirstOrder = firstOrder (sorted (eitherP quantifiedSort tff1Sort)) "tff1" -- ** Units -- | Parse a formula in a given TPTP language. formula :: Language -> Parser Formula formula = \case CNF_ -> CNF <$> clause "cnf" FOF_ -> FOF <$> unsortedFirstOrder "fof" TFF_ -> tff <$> polymorphicFirstOrder "tff" where tff f = case monomorphizeFirstOrder f of Just f' -> TFF0 f' Nothing -> TFF1 f -- | Parse a formula role. role :: Parser (Reserved Role) role = reserved "role" {-# INLINE role #-} -- | Parse the name of a TPTP language. language :: Parser Language language = enum "language" {-# INLINE language #-} -- | Parse a TPTP declaration in a given language. declaration :: Language -> Parser Declaration declaration l = token "type" *> comma (optionalParens typeDeclaration) <|> Formula <$> role <*> comma (formula l) "declaration" -- | Parse a declaration with the @type@ role - either a typing relation or -- a sort declaration. typeDeclaration :: Parser Declaration typeDeclaration = Sort <$> atom <* op ':' <*> arity <|> Typing <$> atom <* op ':' <*> type_ "type declaration" where arity = genericLength . fst <$> mapping (token "$tType") -- | Parse a unit name. unitName :: Parser (Either Atom Integer) unitName = eitherP atom (signed integer) "unit name" {-# INLINE unitName #-} -- | Parse a list of unit names. unitNames :: Parser (NonEmpty UnitName) unitNames = bracketList1 unitName "unit names" {-# INLINE unitNames #-} -- | Parse an @include@ statement. include :: Parser Unit include = labeled "include" (Include <$> atom <*> maybeP unitNames) <* op '.' "include" -- | Parse an annotated unit. annotatedUnit :: Parser Unit annotatedUnit = do l <- language let n = unitName let d = declaration l let a = maybeP annotation parens (Unit <$> n <*> comma d <*> a) <* op '.' "annotated unit" -- | Parse a TPTP unit. unit :: Parser Unit unit = include <|> annotatedUnit "unit" -- | Parse a TPTP input. tptp :: Parser TPTP tptp = TPTP <$> manyTill unit endOfInput "derivation" -- ** Annotations -- | Parse an introduction marking. intro :: Parser Intro intro = enum "intro" {-# INLINE intro #-} -- | Parse a unit of information about a formula. info :: Parser Info info = labeled "description" (Description <$> atom) <|> labeled "iquote" (Iquote <$> atom) <|> labeled "status" (Status <$> reserved) <|> labeled "assumptions" (Assumptions <$> unitNames) <|> labeled "refutation" (Refutation <$> atom) <|> labeled "new_symbols" (NewSymbols <$> atom <*> comma symbols) <|> labeled "bind" (Bind <$> var <*> comma expr) <|> Expression <$> expr <|> uncurry Application <$> application atom info <|> InfoNumber <$> number <|> Infos <$> infos where symbols = bracketList (eitherP var atom) infos :: Parser [Info] infos = bracketList info "infos" {-# INLINE infos #-} -- | Parse and expression expr :: Parser Expression expr = char '$' *> (labeled "fot" (Term <$> term) <|> Logical <$> (language >>= parens . formula)) "expression" -- | Parse a parent. parent :: Parser Parent parent = Parent <$> source <*> option [] (op ':' *> infos) "parent" -- | Parse the source of a unit. source :: Parser Source source = token "unknown" $> UnknownSource <|> labeled "file" (File <$> atom <*> maybeP unitName) <|> labeled "theory" (Theory <$> atom <*> maybeP infos) <|> labeled "creator" (Creator <$> atom <*> maybeP infos) <|> labeled "introduced" (Introduced <$> reserved <*> maybeP infos) <|> labeled "inference" (Inference <$> atom <*> comma infos <*> comma (bracketList parent)) <|> UnitSource <$> unitName "source" -- | Parse an annotation. annotation :: Parser Annotation annotation = (,) <$> source <*> maybeP infos "annotation"