Maintainer | Krasimir Angelov |
---|---|
Stability | stable |
Portability | portable |
Safe Haskell | None |
Language | Haskell2010 |
This module is an Application Programming Interface to load and interpret grammars compiled in Portable Grammar Format (PGF). The PGF format is produced as a final output from the GF compiler. The API is meant to be used for embedding GF grammars in Haskell programs
Synopsis
- data PGF
- readPGF :: FilePath -> IO PGF
- parsePGF :: ByteString -> PGF
- data CId
- mkCId :: String -> CId
- wildCId :: CId
- showCId :: CId -> String
- readCId :: String -> Maybe CId
- ppCId :: CId -> Doc
- pIdent :: ReadP String
- utf8CId :: ByteString -> CId
- type Language = CId
- showLanguage :: Language -> String
- readLanguage :: String -> Maybe Language
- languages :: PGF -> [Language]
- abstractName :: PGF -> Language
- languageCode :: PGF -> Language -> Maybe String
- data Type
- type Hypo = (BindType, CId, Type)
- showType :: [CId] -> Type -> String
- readType :: String -> Maybe Type
- mkType :: [Hypo] -> CId -> [Expr] -> Type
- mkHypo :: Type -> Hypo
- mkDepHypo :: CId -> Type -> Hypo
- mkImplHypo :: CId -> Type -> Hypo
- unType :: Type -> ([Hypo], CId, [Expr])
- categories :: PGF -> [CId]
- categoryContext :: PGF -> CId -> Maybe [Hypo]
- startCat :: PGF -> Type
- functions :: PGF -> [CId]
- functionsByCat :: PGF -> CId -> [CId]
- functionType :: PGF -> CId -> Maybe Type
- missingLins :: PGF -> Language -> [CId]
- type Tree = Expr
- data Expr
- showExpr :: [CId] -> Expr -> String
- readExpr :: String -> Maybe Expr
- mkAbs :: BindType -> CId -> Expr -> Expr
- unAbs :: Expr -> Maybe (BindType, CId, Expr)
- mkApp :: CId -> [Expr] -> Expr
- unApp :: Expr -> Maybe (CId, [Expr])
- unapply :: Expr -> (Expr, [Expr])
- mkStr :: String -> Expr
- unStr :: Expr -> Maybe String
- mkInt :: Int -> Expr
- unInt :: Expr -> Maybe Int
- mkDouble :: Double -> Expr
- unDouble :: Expr -> Maybe Double
- mkFloat :: Double -> Expr
- unFloat :: Expr -> Maybe Double
- mkMeta :: Int -> Expr
- unMeta :: Expr -> Maybe Int
- pExpr :: ReadP Expr
- exprSize :: Expr -> Int
- exprFunctions :: Expr -> [CId]
- linearize :: PGF -> Language -> Tree -> String
- linearizeAllLang :: PGF -> Tree -> [(Language, String)]
- linearizeAll :: PGF -> Tree -> [String]
- bracketedLinearize :: PGF -> Language -> Tree -> [BracketedString]
- bracketedLinearizeAll :: PGF -> Language -> Tree -> [[BracketedString]]
- tabularLinearizes :: PGF -> Language -> Expr -> [[(String, String)]]
- groupResults :: [[(Language, String)]] -> [(Language, [String])]
- showPrintName :: PGF -> Language -> CId -> String
- data BracketedString
- type FId = Int
- type LIndex = Int
- type Token = String
- showBracketedString :: BracketedString -> String
- flattenBracketedString :: BracketedString -> [String]
- parse :: PGF -> Language -> Type -> String -> [Tree]
- parseAllLang :: PGF -> Type -> String -> [(Language, [Tree])]
- parseAll :: PGF -> Type -> String -> [[Tree]]
- parse_ :: PGF -> Language -> Type -> Maybe Int -> String -> (ParseOutput, BracketedString)
- parseWithRecovery :: PGF -> Language -> Type -> [Type] -> Maybe Int -> String -> (ParseOutput, BracketedString)
- complete :: PGF -> Language -> Type -> String -> String -> (BracketedString, String, Map Token [CId])
- compute :: PGF -> Expr -> Expr
- paraphrase :: PGF -> Expr -> [Expr]
- checkType :: PGF -> Type -> Either TcError Type
- checkExpr :: PGF -> Expr -> Type -> Either TcError Expr
- inferExpr :: PGF -> Expr -> Either TcError (Expr, Type)
- data TcError
- = UnknownCat CId
- | UnknownFun CId
- | WrongCatArgs [CId] Type CId Int Int
- | TypeMismatch [CId] Expr Type Type
- | NotFunType [CId] Expr Type
- | CannotInferType [CId] Expr
- | UnresolvedMetaVars [CId] Expr [MetaId]
- | UnexpectedImplArg [CId] Expr
- | UnsolvableGoal [CId] MetaId Type
- ppTcError :: TcError -> Doc
- data ParseState
- initState :: PGF -> Language -> Type -> ParseState
- nextState :: ParseState -> ParseInput -> Either ErrorState ParseState
- getCompletions :: ParseState -> String -> Map Token ParseState
- recoveryStates :: [Type] -> ErrorState -> (ParseState, Map Token ParseState)
- data ParseInput = ParseInput {}
- simpleParseInput :: Token -> ParseInput
- mkParseInput :: PGF -> Language -> (forall a. b -> Map Token a -> Maybe a) -> [(CId, b -> Maybe (Tree, [Token]))] -> b -> ParseInput
- data ParseOutput
- = ParseFailed Int
- | TypeError [(FId, TcError)]
- | ParseOk [Tree]
- | ParseIncomplete
- getParseOutput :: ParseState -> Type -> Maybe Int -> (ParseOutput, BracketedString)
- getContinuationInfo :: ParseState -> Map [Token] [(FunId, CId, String)]
- generateAll :: PGF -> Type -> [Expr]
- generateAllDepth :: PGF -> Type -> Maybe Int -> [Expr]
- generateFrom :: PGF -> Expr -> [Expr]
- generateFromDepth :: PGF -> Expr -> Maybe Int -> [Expr]
- generateRandom :: RandomGen g => g -> PGF -> Type -> [Expr]
- generateRandomDepth :: RandomGen g => g -> PGF -> Type -> Maybe Int -> [Expr]
- generateRandomFrom :: RandomGen g => g -> PGF -> Expr -> [Expr]
- generateRandomFromDepth :: RandomGen g => g -> PGF -> Expr -> Maybe Int -> [Expr]
- type Lemma = CId
- type Analysis = String
- data Morpho
- lookupMorpho :: Morpho -> String -> [(Lemma, Analysis)]
- buildMorpho :: PGF -> Language -> Morpho
- fullFormLexicon :: Morpho -> [(String, [(Lemma, Analysis)])]
- morphoMissing :: Morpho -> [String] -> [String]
- morphoKnown :: Morpho -> [String] -> [String]
- isInMorpho :: Morpho -> String -> Bool
- graphvizAbstractTree :: PGF -> (Bool, Bool) -> Tree -> String
- graphvizParseTree :: PGF -> Language -> GraphvizOptions -> Tree -> String
- graphvizParseTreeDep :: Maybe Labels -> PGF -> Language -> GraphvizOptions -> Tree -> String
- graphvizDependencyTree :: String -> Bool -> Maybe Labels -> Maybe CncLabels -> PGF -> CId -> Tree -> String
- graphvizBracketedString :: GraphvizOptions -> Maybe Labels -> Tree -> [BracketedString] -> String
- graphvizAlignment :: PGF -> [Language] -> Expr -> String
- gizaAlignment :: PGF -> (Language, Language) -> Expr -> (String, String, String)
- data GraphvizOptions = GraphvizOptions {}
- graphvizDefaults :: GraphvizOptions
- conlls2latexDoc :: [String] -> String
- type Labels = Map CId [String]
- getDepLabels :: String -> Labels
- type CncLabels = [CncLabel]
- getCncDepLabels :: String -> CncLabels
- data Probabilities
- mkProbabilities :: PGF -> Map CId Double -> Probabilities
- defaultProbabilities :: PGF -> Probabilities
- showProbabilities :: Probabilities -> String
- readProbabilitiesFromFile :: FilePath -> PGF -> IO Probabilities
- probTree :: PGF -> Expr -> Double
- setProbabilities :: Probabilities -> PGF -> PGF
- rankTreesByProbs :: PGF -> [Expr] -> [(Expr, Double)]
- browse :: PGF -> CId -> Maybe (String, [CId], [CId])
- data ATree t
- data Trie
- toATree :: Tree -> ATree Tree
- toTrie :: [ATree Tree] -> [[Trie]]
PGF
An abstract data type representing multilingual grammar in Portable Grammar Format.
readPGF :: FilePath -> IO PGF Source #
Reads file in Portable Grammar Format and produces
PGF
structure. The file is usually produced with:
$ gf -make <grammar file name>
parsePGF :: ByteString -> PGF Source #
Like readPGF
but you have the manage file-handling.
Since: 3.9.1
Identifiers
An abstract data type that represents identifiers for functions and categories in PGF.
utf8CId :: ByteString -> CId Source #
Creates an identifier from a UTF-8-encoded ByteString
Languages
This is just a CId
with the language name.
A language name is the identifier that you write in the
top concrete or abstract module in GF after the
concrete/abstract keyword. Example:
abstract Lang = ... concrete LangEng of Lang = ...
showLanguage :: Language -> String Source #
abstractName :: PGF -> Language Source #
The abstract language name is the name of the top-level abstract module
languageCode :: PGF -> Language -> Maybe String Source #
Gets the RFC 4646 language tag
of the language which the given concrete syntax implements,
if this is listed in the source grammar.
Example language tags include "en"
for English,
and "en-UK"
for British English.
Types
type Hypo = (BindType, CId, Type) Source #
Hypo
represents a hypothesis in a type i.e. in the type A -> B, A is the hypothesis
showType :: [CId] -> Type -> String Source #
renders type as String
. The list
of identifiers is the list of all free variables
in the expression in order reverse to the order
of binding.
mkType :: [Hypo] -> CId -> [Expr] -> Type Source #
creates a type from list of hypothesises, category and
list of arguments for the category. The operation
mkType [h_1,...,h_n] C [e_1,...,e_m]
will create
h_1 -> ... -> h_n -> C e_1 ... e_m
mkImplHypo :: CId -> Type -> Hypo Source #
creates hypothesis for dependent type with implicit argument i.e. ({x} : A)
categories :: PGF -> [CId] Source #
List of all categories defined in the given grammar. The categories are defined in the abstract syntax with the 'cat' keyword.
startCat :: PGF -> Type Source #
The start category is defined in the grammar with the 'startcat' flag. This is usually the sentence category but it is not necessary. Despite that there is a start category defined you can parse with any category. The start category definition is just for convenience.
Functions
missingLins :: PGF -> Language -> [CId] Source #
List of functions that lack linearizations in the given language.
Expressions & Trees
Tree
Expr
An expression in the abstract syntax of the grammar. It could be both parameter of a dependent type or an abstract syntax tree for for some sentence.
showExpr :: [CId] -> Expr -> String Source #
renders expression as String
. The list
of identifiers is the list of all free variables
in the expression in order reverse to the order
of binding.
mkApp :: CId -> [Expr] -> Expr Source #
Constructs an expression by applying a function to a list of expressions
unapply :: Expr -> (Expr, [Expr]) Source #
Decomposes an expression into an application of a constructor such as a constant or a metavariable
exprFunctions :: Expr -> [CId] Source #
Operations
Linearization
linearize :: PGF -> Language -> Tree -> String Source #
Linearizes given expression as string in the language
linearizeAllLang :: PGF -> Tree -> [(Language, String)] Source #
Linearizes given expression as string in all languages available in the grammar.
linearizeAll :: PGF -> Tree -> [String] Source #
The same as linearizeAllLang
but does not return
the language.
bracketedLinearize :: PGF -> Language -> Tree -> [BracketedString] Source #
Linearizes given expression as a bracketed string in the language
bracketedLinearizeAll :: PGF -> Language -> Tree -> [[BracketedString]] Source #
Linearizes given expression as a bracketed string in the language
tabularLinearizes :: PGF -> Language -> Expr -> [[(String, String)]] Source #
Creates a table from feature name to linearization. The outher list encodes the variations
showPrintName :: PGF -> Language -> CId -> String Source #
Show the printname of function or category
data BracketedString Source #
BracketedString represents a sentence that is linearized
as usual but we also want to retain the 'brackets'
that
mark the beginning and the end of each constituent.
Leaf Token | this is the leaf i.e. a single token |
Bracket CId !FId !FId !LIndex CId [Expr] [BracketedString] | this is a bracket. The |
Instances
JSON BracketedString Source # | |
Defined in PGFService readJSON :: JSValue -> Result BracketedString # showJSON :: BracketedString -> JSValue # readJSONs :: JSValue -> Result [BracketedString] # showJSONs :: [BracketedString] -> JSValue # |
showBracketedString :: BracketedString -> String Source #
Renders the bracketed string as string where
the brackets are shown as (S ...)
where
S
is the category.
Parsing
parse :: PGF -> Language -> Type -> String -> [Tree] Source #
Tries to parse the given string in the specified language and to produce abstract syntax expression.
parseAllLang :: PGF -> Type -> String -> [(Language, [Tree])] Source #
Tries to parse the given string with all available languages. The returned list contains pairs of language and list of abstract syntax expressions (this is a list, since grammars can be ambiguous). Only those languages for which at least one parsing is possible are listed.
parseAll :: PGF -> Type -> String -> [[Tree]] Source #
The same as parseAllLang
but does not return
the language.
parse_ :: PGF -> Language -> Type -> Maybe Int -> String -> (ParseOutput, BracketedString) Source #
The same as parse
but returns more detailed information
parseWithRecovery :: PGF -> Language -> Type -> [Type] -> Maybe Int -> String -> (ParseOutput, BracketedString) Source #
This is an experimental function. Use it on your own risk
complete :: PGF -> Language -> Type -> String -> String -> (BracketedString, String, Map Token [CId]) Source #
Evaluation
Type Checking
The type checker in PGF does both type checking and renaming
i.e. it verifies that all identifiers are declared and it
distinguishes between global function or type indentifiers and
variable names. The type checker should always be applied on
expressions entered by the user i.e. those produced via functions
like readType
and readExpr
because otherwise unexpected results
could appear. All typechecking functions returns updated versions
of the input types or expressions because the typechecking could
also lead to metavariables instantiations.
checkType :: PGF -> Type -> Either TcError Type Source #
Check whether a given type is consistent with the abstract syntax of the grammar.
checkExpr :: PGF -> Expr -> Type -> Either TcError Expr Source #
Checks an expression against a specified type.
inferExpr :: PGF -> Expr -> Either TcError (Expr, Type) Source #
Tries to infer the type of a given expression. Note that
even if the expression is type correct it is not always
possible to infer its type in the GF type system.
In this case the function returns the CannotInferType
error.
If an error occurs in the typechecking phase
the type checker returns not a plain text error message
but a TcError
structure which describes the error.
UnknownCat CId | Unknown category name was found. |
UnknownFun CId | Unknown function name was found. |
WrongCatArgs [CId] Type CId Int Int | A category was applied to wrong number of arguments.
The first integer is the number of expected arguments and
the second the number of given arguments.
The |
TypeMismatch [CId] Expr Type Type | The expression is not of the expected type.
The first type is the expected type, while
the second is the inferred. The |
NotFunType [CId] Expr Type | Something that is not of function type was applied to an argument. |
CannotInferType [CId] Expr | It is not possible to infer the type of an expression. |
UnresolvedMetaVars [CId] Expr [MetaId] | Some metavariables have to be instantiated in order to complete the typechecking. |
UnexpectedImplArg [CId] Expr | Implicit argument was passed where the type doesn't allow it |
UnsolvableGoal [CId] MetaId Type | There is a goal that cannot be solved |
Low level parsing API
data ParseState Source #
An abstract data type whose values represent the current state in an incremental parser.
initState :: PGF -> Language -> Type -> ParseState Source #
Creates an initial parsing state for a given language and startup category.
nextState :: ParseState -> ParseInput -> Either ErrorState ParseState Source #
From the current state and the next token
nextState
computes a new state, where the token
is consumed and the current position is shifted by one.
If the new token cannot be accepted then an error state
is returned.
getCompletions :: ParseState -> String -> Map Token ParseState Source #
If the next token is not known but only its prefix (possible empty prefix)
then the getCompletions
function can be used to calculate the possible
next words and the consequent states. This is used for word completions in
the GF interpreter.
recoveryStates :: [Type] -> ErrorState -> (ParseState, Map Token ParseState) Source #
data ParseInput Source #
The input to the parser is a pair of predicates. The first one
piToken
selects a token from a list of suggestions from the grammar,
actually appears at the current position in the input string.
The second one piLiteral
recognizes whether a literal with forest id FId
could be matched at the current position.
simpleParseInput :: Token -> ParseInput Source #
This function constructs the simplest possible parser input.
It checks the tokens for exact matching and recognizes only String
, Int
and Float
literals.
The Int
and Float
literals match only if the token passed is some number.
The String
literal always match but the length of the literal could be only one token.
mkParseInput :: PGF -> Language -> (forall a. b -> Map Token a -> Maybe a) -> [(CId, b -> Maybe (Tree, [Token]))] -> b -> ParseInput Source #
data ParseOutput Source #
This data type encodes the different outcomes which you could get from the parser.
ParseFailed Int | The integer is the position in number of tokens where the parser failed. |
TypeError [(FId, TcError)] | The parsing was successful but none of the trees is type correct.
The forest id ( |
ParseOk [Tree] | If the parsing and the type checking are successful we get a list of abstract syntax trees. The list should be non-empty. |
ParseIncomplete | The sentence is not complete. Only partial output is produced |
getParseOutput :: ParseState -> Type -> Maybe Int -> (ParseOutput, BracketedString) Source #
This function extracts the list of all completed parse trees that spans the whole input consumed so far. The trees are also limited by the category specified, which is usually the same as the startup category.
getContinuationInfo :: ParseState -> Map [Token] [(FunId, CId, String)] Source #
Return the Continuation of a Parsestate with exportable types Used by PGFService
Generation
The PGF interpreter allows automatic generation of abstract syntax expressions of a given type. Since the type system of GF allows dependent types, the generation is in general undecidable. In fact, the set of all type signatures in the grammar is equivalent to a Turing-complete language (Prolog).
There are several generation methods which mainly differ in:
- whether the expressions are sequentially or randomly generated?
- are they generated from a template? The template is an expression containing meta variables which the generator will fill in.
- is there a limit of the depth of the expression? The depth can be used to limit the search space, which in some cases is the only way to make the search decidable.
generateAll :: PGF -> Type -> [Expr] Source #
Generates an exhaustive possibly infinite list of abstract syntax expressions.
generateAllDepth :: PGF -> Type -> Maybe Int -> [Expr] Source #
A variant of generateAll
which also takes as argument
the upper limit of the depth of the generated expression.
generateFrom :: PGF -> Expr -> [Expr] Source #
Generates a list of abstract syntax expressions
in a way similar to generateAll
but instead of
generating all instances of a given type, this
function uses a template.
generateFromDepth :: PGF -> Expr -> Maybe Int -> [Expr] Source #
A variant of generateFrom
which also takes as argument
the upper limit of the depth of the generated subexpressions.
generateRandom :: RandomGen g => g -> PGF -> Type -> [Expr] Source #
Generates an infinite list of random abstract syntax expressions. This is usefull for tree bank generation which after that can be used for grammar testing.
generateRandomDepth :: RandomGen g => g -> PGF -> Type -> Maybe Int -> [Expr] Source #
A variant of generateRandom
which also takes as argument
the upper limit of the depth of the generated expression.
generateRandomFrom :: RandomGen g => g -> PGF -> Expr -> [Expr] Source #
Random generation based on template
generateRandomFromDepth :: RandomGen g => g -> PGF -> Expr -> Maybe Int -> [Expr] Source #
Random generation based on template with a limitation in the depth.
Morphological Analysis
Visualizations
graphvizAbstractTree :: PGF -> (Bool, Bool) -> Tree -> String Source #
Renders abstract syntax tree in Graphviz format.
The pair of Bool
(funs,cats)
lets you control whether function names and
category names are included in the rendered tree.
graphvizParseTree :: PGF -> Language -> GraphvizOptions -> Tree -> String Source #
graphvizParseTreeDep :: Maybe Labels -> PGF -> Language -> GraphvizOptions -> Tree -> String Source #
graphvizDependencyTree Source #
:: String | Output format: |
-> Bool | Include extra information (debug) |
-> Maybe Labels | abstract label information obtained with |
-> Maybe CncLabels | concrete label information obtained with ' ' (was: unused (was: |
-> PGF | |
-> CId | The language of analysis |
-> Tree | |
-> String | Rendered output in the specified format |
Visualize word dependency tree.
graphvizBracketedString :: GraphvizOptions -> Maybe Labels -> Tree -> [BracketedString] -> String Source #
data GraphvizOptions Source #
conlls2latexDoc :: [String] -> String Source #
getDepLabels :: String -> Labels Source #
Prepare lines obtained from a configuration file for labels for
use with graphvizDependencyTree
. Format per line fun label*
.
getCncDepLabels :: String -> CncLabels Source #
Probabilities
data Probabilities Source #
An abstract data structure which represents the probabilities for the different functions in a grammar.
mkProbabilities :: PGF -> Map CId Double -> Probabilities Source #
Builds probability tables. The second argument is a map which contains the know probabilities. If some function is not in the map then it gets assigned some probability based on the even distribution of the unallocated probability mass for the result category.
defaultProbabilities :: PGF -> Probabilities Source #
Returns the default even distibution.
showProbabilities :: Probabilities -> String Source #
Renders the probability structure as string
readProbabilitiesFromFile :: FilePath -> PGF -> IO Probabilities Source #
Reads the probabilities from a file. This should be a text file where on every line there is a function name followed by a real number. The number represents the probability mass allocated for that function. The function name and the probability should be separated by a whitespace.
setProbabilities :: Probabilities -> PGF -> PGF Source #
rankTreesByProbs :: PGF -> [Expr] -> [(Expr, Double)] Source #
rank from highest to lowest probability
Browsing
Tries
A type for plain applicative trees