Maintainer | Krasimir Angelov |
---|---|

Stability | stable |

Portability | portable |

Safe Haskell | None |

Language | Haskell2010 |

This module is an Application Programming Interface to load and interpret grammars compiled in the Portable Grammar Format (PGF). The PGF format is produced as the final output from the GF compiler. The API is meant to be used for embedding GF grammars in Haskell programs

## Synopsis

- data PGF
- readPGF :: FilePath -> IO PGF
- showPGF :: PGF -> String
- type CId = String
- type AbsName = CId
- abstractName :: PGF -> AbsName
- type Cat = CId
- categories :: PGF -> [Cat]
- categoryContext :: PGF -> Cat -> [Hypo]
- type Fun = CId
- functions :: PGF -> [Fun]
- functionsByCat :: PGF -> Cat -> [Fun]
- functionType :: PGF -> Fun -> Maybe Type
- functionIsConstructor :: PGF -> Fun -> Bool
- hasLinearization :: Concr -> Fun -> Bool
- data Expr
- showExpr :: [CId] -> Expr -> String
- readExpr :: String -> Maybe Expr
- pExpr :: ReadS Expr
- mkAbs :: BindType -> CId -> Expr -> Expr
- unAbs :: Expr -> Maybe (BindType, CId, Expr)
- mkApp :: Fun -> [Expr] -> Expr
- unApp :: Expr -> Maybe (Fun, [Expr])
- mkStr :: String -> Expr
- unStr :: Expr -> Maybe String
- mkInt :: Int -> Expr
- unInt :: Expr -> Maybe Int
- mkFloat :: Double -> Expr
- unFloat :: Expr -> Maybe Double
- mkMeta :: Int -> Expr
- unMeta :: Expr -> Maybe Int
- mkCId :: p -> p
- exprHash :: Int32 -> Expr -> Int32
- exprSize :: Expr -> Int
- exprFunctions :: Expr -> [Fun]
- exprSubstitute :: Expr -> [Expr] -> Expr
- treeProbability :: PGF -> Expr -> Float
- data Type
- type Hypo = (BindType, CId, Type)
- data BindType
- startCat :: PGF -> Type
- readType :: String -> Maybe Type
- showType :: [CId] -> Type -> String
- showContext :: [CId] -> [Hypo] -> String
- mkType :: [Hypo] -> CId -> [Expr] -> Type
- unType :: Type -> ([Hypo], CId, [Expr])
- checkExpr :: PGF -> Expr -> Type -> Either String Expr
- inferExpr :: PGF -> Expr -> Either String (Expr, Type)
- checkType :: PGF -> Type -> Either String Type
- compute :: PGF -> Expr -> Expr
- type ConcName = CId
- data Concr
- languages :: PGF -> Map ConcName Concr
- concreteName :: Concr -> ConcName
- languageCode :: Concr -> String
- linearize :: Concr -> Expr -> String
- linearizeAll :: Concr -> Expr -> [String]
- tabularLinearize :: Concr -> Expr -> [(String, String)]
- tabularLinearizeAll :: Concr -> Expr -> [[(String, String)]]
- bracketedLinearize :: Concr -> Expr -> [BracketedString]
- bracketedLinearizeAll :: Concr -> Expr -> [[BracketedString]]
- type FId = Int
- data BracketedString
- showBracketedString :: BracketedString -> String
- flattenBracketedString :: BracketedString -> [String]
- printName :: Concr -> Fun -> Maybe String
- categoryFields :: Concr -> Cat -> Maybe [String]
- alignWords :: Concr -> Expr -> [(String, [Int])]
- data ParseOutput a
- parse :: Concr -> Type -> String -> ParseOutput [(Expr, Float)]
- parseWithHeuristics :: Concr -> Type -> String -> Double -> [(Cat, String -> Int -> Maybe (Expr, Float, Int))] -> ParseOutput [(Expr, Float)]
- parseToChart :: Concr -> Type -> String -> Double -> [(Cat, String -> Int -> Maybe (Expr, Float, Int))] -> Int -> ParseOutput ([FId], Map FId ([(Int, Int, String)], [(Expr, [PArg], Float)], Cat))
- data PArg = PArg [FId] !FId
- complete :: Concr -> Type -> String -> String -> ParseOutput [(String, CId, CId, Float)]
- lookupSentence :: Concr -> Type -> String -> [(Expr, Float)]
- generateAll :: PGF -> Type -> [(Expr, Float)]
- type MorphoAnalysis = (Fun, String, Float)
- lookupMorpho :: Concr -> String -> [MorphoAnalysis]
- lookupCohorts :: Concr -> String -> [(Int, String, [MorphoAnalysis], Int)]
- fullFormLexicon :: Concr -> [(String, [MorphoAnalysis])]
- filterBest :: [(Int, String, [MorphoAnalysis], Int)] -> [(Int, String, [MorphoAnalysis], Int)]
- filterLongest :: [(Int, String, [MorphoAnalysis], Int)] -> [(Int, String, [MorphoAnalysis], Int)]
- data GraphvizOptions = GraphvizOptions {}
- graphvizDefaults :: GraphvizOptions
- graphvizAbstractTree :: PGF -> GraphvizOptions -> Expr -> String
- graphvizParseTree :: Concr -> GraphvizOptions -> Expr -> String
- graphvizWordAlignment :: [Concr] -> GraphvizOptions -> Expr -> String
- newtype PGFError = PGFError String
- type LiteralCallback = PGF -> (ConcName, Concr) -> String -> String -> Int -> Maybe (Expr, Float, Int)
- literalCallbacks :: [(AbsName, [(Cat, LiteralCallback)])]

# PGF

An abstract data type representing multilingual grammar in Portable Grammar Format.

readPGF :: FilePath -> IO PGF Source #

Reads file in Portable Grammar Format and produces
`PGF`

structure. The file is usually produced with:

$ gf -make <grammar file name>

# Identifiers

An data type that represents identifiers for functions and categories in PGF.

# Abstract syntax

abstractName :: PGF -> AbsName Source #

The abstract language name is the name of the top-level abstract module

## Categories

categories :: PGF -> [Cat] Source #

List of all categories defined in the grammar. The categories are defined in the abstract syntax with the 'cat' keyword.

## Functions

hasLinearization :: Concr -> Fun -> Bool Source #

Returns True if there is a linearization defined for that function in that language

## Expressions

## Instances

Eq Expr Source # | |

Data Expr Source # | |

Defined in PGF2.Expr gfoldl :: (forall d b. Data d => c (d -> b) -> d -> c b) -> (forall g. g -> c g) -> Expr -> c Expr # gunfold :: (forall b r. Data b => c (b -> r) -> c r) -> (forall r. r -> c r) -> Constr -> c Expr # dataTypeOf :: Expr -> DataType # dataCast1 :: Typeable t => (forall d. Data d => c (t d)) -> Maybe (c Expr) # dataCast2 :: Typeable t => (forall d e. (Data d, Data e) => c (t d e)) -> Maybe (c Expr) # gmapT :: (forall b. Data b => b -> b) -> Expr -> Expr # gmapQl :: (r -> r' -> r) -> r -> (forall d. Data d => d -> r') -> Expr -> r # gmapQr :: (r' -> r -> r) -> r -> (forall d. Data d => d -> r') -> Expr -> r # gmapQ :: (forall d. Data d => d -> u) -> Expr -> [u] # gmapQi :: Int -> (forall d. Data d => d -> u) -> Expr -> u # gmapM :: Monad m => (forall d. Data d => d -> m d) -> Expr -> m Expr # gmapMp :: MonadPlus m => (forall d. Data d => d -> m d) -> Expr -> m Expr # gmapMo :: MonadPlus m => (forall d. Data d => d -> m d) -> Expr -> m Expr # | |

Show Expr Source # | |

showExpr :: [CId] -> Expr -> String Source #

renders an expression as a `String`

. The list
of identifiers is the list of all free variables
in the expression in order reverse to the order
of binding.

unAbs :: Expr -> Maybe (BindType, CId, Expr) Source #

Decomposes an expression into an abstraction and a body

mkApp :: Fun -> [Expr] -> Expr Source #

Constructs an expression by applying a function to a list of expressions

unApp :: Expr -> Maybe (Fun, [Expr]) Source #

Decomposes an expression into an application of a function

this functions is only for backward compatibility with the old Haskell runtime

exprFunctions :: Expr -> [Fun] Source #

## Types

type Hypo = (BindType, CId, Type) Source #

`Hypo`

represents a hypothesis in a type i.e. in the type A -> B, A is the hypothesis

startCat :: PGF -> Type Source #

The start category is defined in the grammar with the 'startcat' flag. This is usually the sentence category but it is not necessary. Despite that there is a start category defined you can parse with any category. The start category definition is just for convenience.

showType :: [CId] -> Type -> String Source #

renders a type as a `String`

. The list
of identifiers is the list of all free variables
in the type in order reverse to the order
of binding.

showContext :: [CId] -> [Hypo] -> String Source #

renders a type as a `String`

. The list
of identifiers is the list of all free variables
in the type in order reverse to the order
of binding.

mkType :: [Hypo] -> CId -> [Expr] -> Type Source #

creates a type from a list of hypothesises, a category and
a list of arguments for the category. The operation
`mkType [h_1,...,h_n] C [e_1,...,e_m]`

will create
`h_1 -> ... -> h_n -> C e_1 ... e_m`

unType :: Type -> ([Hypo], CId, [Expr]) Source #

Decomposes a type into a list of hypothesises, a category and a list of arguments for the category.

## Type checking

Dynamically-built expressions should always be type-checked before using in other functions, as the exceptions thrown by using invalid expressions may not catchable.

checkExpr :: PGF -> Expr -> Type -> Either String Expr Source #

Checks an expression against a specified type.

inferExpr :: PGF -> Expr -> Either String (Expr, Type) Source #

Tries to infer the type of an expression. Note that even if the expression is type correct it is not always possible to infer its type in the GF type system. In this case the function returns an error.

checkType :: PGF -> Type -> Either String Type Source #

Check whether a type is consistent with the abstract syntax of the grammar.

## Computing

# Concrete syntax

concreteName :: Concr -> ConcName Source #

languageCode :: Concr -> String Source #

## Linearization

linearizeAll :: Concr -> Expr -> [String] Source #

Generates all possible linearizations of an expression

tabularLinearize :: Concr -> Expr -> [(String, String)] Source #

Generates a table of linearizations for an expression

tabularLinearizeAll :: Concr -> Expr -> [[(String, String)]] Source #

Generates a table of linearizations for an expression

bracketedLinearize :: Concr -> Expr -> [BracketedString] Source #

bracketedLinearizeAll :: Concr -> Expr -> [[BracketedString]] Source #

data BracketedString Source #

BracketedString represents a sentence that is linearized
as usual but we also want to retain the '`brackets'`

that
mark the beginning and the end of each constituent.

Leaf String | this is the leaf i.e. a single token |

BIND | the surrounding tokens must be bound together |

Bracket CId !FId String CId [BracketedString] | this is a bracket. The |

showBracketedString :: BracketedString -> String Source #

Renders the bracketed string as a string where
the brackets are shown as `(S ...)`

where
`S`

is the category.

flattenBracketedString :: BracketedString -> [String] Source #

Extracts the sequence of tokens from the bracketed string

## Parsing

data ParseOutput a Source #

This data type encodes the different outcomes which you could get from the parser.

ParseFailed Int String | The integer is the position in number of unicode characters where the parser failed. The string is the token where the parser have failed. |

ParseOk a | If the parsing and the type checking are successful we get the abstract syntax trees as either a list or a chart. |

ParseIncomplete | The sentence is not complete. |

:: Concr | the language with which we parse |

-> Type | the start category |

-> String | the input sentence |

-> Double | the heuristic factor. A negative value tells the parser to lookup up the default from the grammar flags |

-> [(Cat, String -> Int -> Maybe (Expr, Float, Int))] | a list of callbacks for literal categories. The arguments of the callback are: the index of the constituent for the literal category; the input sentence; the current offset in the sentence. If a literal has been recognized then the output should be Just (expr,probability,end_offset) |

-> ParseOutput [(Expr, Float)] |

:: Concr | the language with which we parse |

-> Type | the start category |

-> String | the input sentence |

-> Double | the heuristic factor. A negative value tells the parser to lookup up the default from the grammar flags |

-> [(Cat, String -> Int -> Maybe (Expr, Float, Int))] | a list of callbacks for literal categories. The arguments of the callback are: the index of the constituent for the literal category; the input sentence; the current offset in the sentence. If a literal has been recognized then the output should be Just (expr,probability,end_offset) |

-> Int | the maximal number of roots |

-> ParseOutput ([FId], Map FId ([(Int, Int, String)], [(Expr, [PArg], Float)], Cat)) |

:: Concr | the language with which we parse |

-> Type | the start category |

-> String | the input sentence (excluding token being completed) |

-> String | prefix (partial token being completed) |

-> ParseOutput [(String, CId, CId, Float)] | (token, category, function, probability) |

Returns possible completions of the current partial input.

## Sentence Lookup

## Generation

generateAll :: PGF -> Type -> [(Expr, Float)] Source #

Generates an exhaustive possibly infinite list of all abstract syntax expressions of the given type. The expressions are ordered by their probability.

## Morphological Analysis

type MorphoAnalysis = (Fun, String, Float) Source #

This triple is returned by all functions that deal with the grammar's lexicon. Its first element is the name of an abstract lexical function which can produce a given word or a multiword expression (i.e. this is the lemma). After that follows a string which describes the particular inflection form.

The last element is a logarithm from the the probability of the function. The probability is not conditionalized on the category of the function. This makes it possible to compare the likelihood of two functions even if they have different types.

lookupMorpho :: Concr -> String -> [MorphoAnalysis] Source #

`lookupMorpho`

takes a string which must be a single word or
a multiword expression. It then computes the list of all possible
morphological analyses.

lookupCohorts :: Concr -> String -> [(Int, String, [MorphoAnalysis], Int)] Source #

`lookupCohorts`

takes an arbitrary string an produces
a list of all places where lexical items from the grammar have been
identified (i.e. cohorts). The list consists of triples of the format `(start,ans,end)`

,
where `start-end`

identifies the span in the text and `ans`

is
the list of possible morphological analyses similar to `lookupMorpho`

.

The list is sorted first by the `start`

position and after than
by the `end`

position. This can be used for instance if you want to
filter only the longest matches.

fullFormLexicon :: Concr -> [(String, [MorphoAnalysis])] Source #

filterBest :: [(Int, String, [MorphoAnalysis], Int)] -> [(Int, String, [MorphoAnalysis], Int)] Source #

filterLongest :: [(Int, String, [MorphoAnalysis], Int)] -> [(Int, String, [MorphoAnalysis], Int)] Source #

## Visualizations

data GraphvizOptions Source #

graphvizAbstractTree :: PGF -> GraphvizOptions -> Expr -> String Source #

Renders an abstract syntax tree in a Graphviz format.

graphvizParseTree :: Concr -> GraphvizOptions -> Expr -> String Source #

graphvizWordAlignment :: [Concr] -> GraphvizOptions -> Expr -> String Source #

# Exceptions

## Instances

Show PGFError Source # | |

Exception PGFError Source # | |

Defined in PGF2 toException :: PGFError -> SomeException # fromException :: SomeException -> Maybe PGFError # displayException :: PGFError -> String # |

# Grammar specific callbacks

type LiteralCallback = PGF -> (ConcName, Concr) -> String -> String -> Int -> Maybe (Expr, Float, Int) Source #

literalCallbacks :: [(AbsName, [(Cat, LiteralCallback)])] Source #

Callbacks for the App grammar