Maintainer	Krasimir Angelov
Stability	stable
Portability	portable
Safe Haskell	None
Language	Haskell2010

PGF2

Contents

PGF
Identifiers
Abstract syntax
Concrete syntax
Exceptions
Grammar specific callbacks

Description

This module is an Application Programming Interface to load and interpret grammars compiled in the Portable Grammar Format (PGF). The PGF format is produced as the final output from the GF compiler. The API is meant to be used for embedding GF grammars in Haskell programs

Synopsis

data PGF
readPGF :: FilePath -> IO PGF
showPGF :: PGF -> String
type CId = String
type AbsName = CId
abstractName :: PGF -> AbsName
type Cat = CId
categories :: PGF -> [Cat]
categoryContext :: PGF -> Cat -> [Hypo]
type Fun = CId
functions :: PGF -> [Fun]
functionsByCat :: PGF -> Cat -> [Fun]
functionType :: PGF -> Fun -> Maybe Type
functionIsConstructor :: PGF -> Fun -> Bool
hasLinearization :: Concr -> Fun -> Bool
data Expr
showExpr :: [CId] -> Expr -> String
readExpr :: String -> Maybe Expr
pExpr :: ReadS Expr
mkAbs :: BindType -> CId -> Expr -> Expr
unAbs :: Expr -> Maybe (BindType, CId, Expr)
mkApp :: Fun -> [Expr] -> Expr
unApp :: Expr -> Maybe (Fun, [Expr])
mkStr :: String -> Expr
unStr :: Expr -> Maybe String
mkInt :: Int -> Expr
unInt :: Expr -> Maybe Int
mkFloat :: Double -> Expr
unFloat :: Expr -> Maybe Double
mkMeta :: Int -> Expr
unMeta :: Expr -> Maybe Int
mkCId :: p -> p
exprHash :: Int32 -> Expr -> Int32
exprSize :: Expr -> Int
exprFunctions :: Expr -> [Fun]
exprSubstitute :: Expr -> [Expr] -> Expr
treeProbability :: PGF -> Expr -> Float
data Type
type Hypo = (BindType, CId, Type)
data BindType
- = Explicit
- | Implicit
startCat :: PGF -> Type
readType :: String -> Maybe Type
showType :: [CId] -> Type -> String
showContext :: [CId] -> [Hypo] -> String
mkType :: [Hypo] -> CId -> [Expr] -> Type
unType :: Type -> ([Hypo], CId, [Expr])
checkExpr :: PGF -> Expr -> Type -> Either String Expr
inferExpr :: PGF -> Expr -> Either String (Expr, Type)
checkType :: PGF -> Type -> Either String Type
compute :: PGF -> Expr -> Expr
type ConcName = CId
data Concr
languages :: PGF -> Map ConcName Concr
concreteName :: Concr -> ConcName
languageCode :: Concr -> String
linearize :: Concr -> Expr -> String
linearizeAll :: Concr -> Expr -> [String]
tabularLinearize :: Concr -> Expr -> [(String, String)]
tabularLinearizeAll :: Concr -> Expr -> [[(String, String)]]
bracketedLinearize :: Concr -> Expr -> [BracketedString]
bracketedLinearizeAll :: Concr -> Expr -> [[BracketedString]]
type FId = Int
data BracketedString
- = Leaf String
- | BIND
- | Bracket CId !FId String CId [BracketedString]
showBracketedString :: BracketedString -> String
flattenBracketedString :: BracketedString -> [String]
printName :: Concr -> Fun -> Maybe String
categoryFields :: Concr -> Cat -> Maybe [String]
alignWords :: Concr -> Expr -> [(String, [Int])]
data ParseOutput a
- = ParseFailed Int String
- | ParseOk a
- | ParseIncomplete
parse :: Concr -> Type -> String -> ParseOutput [(Expr, Float)]
parseWithHeuristics :: Concr -> Type -> String -> Double -> [(Cat, String -> Int -> Maybe (Expr, Float, Int))] -> ParseOutput [(Expr, Float)]
parseToChart :: Concr -> Type -> String -> Double -> [(Cat, String -> Int -> Maybe (Expr, Float, Int))] -> Int -> ParseOutput ([FId], Map FId ([(Int, Int, String)], [(Expr, [PArg], Float)], Cat))
data PArg = PArg [FId] !FId
lookupSentence :: Concr -> Type -> String -> [(Expr, Float)]
generateAll :: PGF -> Type -> [(Expr, Float)]
type MorphoAnalysis = (Fun, String, Float)
lookupMorpho :: Concr -> String -> [MorphoAnalysis]
lookupCohorts :: Concr -> String -> [(Int, String, [MorphoAnalysis], Int)]
fullFormLexicon :: Concr -> [(String, [MorphoAnalysis])]
filterBest :: [(Int, String, [MorphoAnalysis], Int)] -> [(Int, String, [MorphoAnalysis], Int)]
filterLongest :: [(Int, String, [MorphoAnalysis], Int)] -> [(Int, String, [MorphoAnalysis], Int)]
data GraphvizOptions = GraphvizOptions {
- noLeaves :: Bool
- noFun :: Bool
- noCat :: Bool
- noDep :: Bool
- nodeFont :: String
- leafFont :: String
- nodeColor :: String
- leafColor :: String
- nodeEdgeStyle :: String
- leafEdgeStyle :: String
}
graphvizDefaults :: GraphvizOptions
graphvizAbstractTree :: PGF -> GraphvizOptions -> Expr -> String
graphvizParseTree :: Concr -> GraphvizOptions -> Expr -> String
graphvizWordAlignment :: [Concr] -> GraphvizOptions -> Expr -> String
newtype PGFError = PGFError String
type LiteralCallback = PGF -> (ConcName, Concr) -> String -> String -> Int -> Maybe (Expr, Float, Int)
literalCallbacks :: [(AbsName, [(Cat, LiteralCallback)])]

PGF

data PGF Source #

An abstract data type representing multilingual grammar in Portable Grammar Format.

readPGF :: FilePath -> IO PGF Source #

Reads file in Portable Grammar Format and produces PGF structure. The file is usually produced with:

$ gf -make <grammar file name>

showPGF :: PGF -> String Source #

Identifiers

type CId = String Source #

An data type that represents identifiers for functions and categories in PGF.

Abstract syntax

type AbsName Source #

Arguments

= CId

Name of abstract syntax

abstractName :: PGF -> AbsName Source #

The abstract language name is the name of the top-level abstract module

Functions

type Fun Source #

Arguments

= CId

Name of function

functions :: PGF -> [Fun] Source #

List of all functions defined in the abstract syntax

functionsByCat :: PGF -> Cat -> [Fun] Source #

List of all functions defined for a category

functionType :: PGF -> Fun -> Maybe Type Source #

The type of a function

functionIsConstructor :: PGF -> Fun -> Bool Source #

The type of a function

hasLinearization :: Concr -> Fun -> Bool Source #

Returns True if there is a linearization defined for that function in that language

Expressions

data Expr Source #

Instances

Eq Expr Source #
Instance details Defined in PGF2.Expr Methods (==) :: Expr -> Expr -> Bool # (/=) :: Expr -> Expr -> Bool #
Data Expr Source #
Instance details Defined in PGF2.Expr Methods gfoldl :: (forall d b. Data d => c (d -> b) -> d -> c b) -> (forall g. g -> c g) -> Expr -> c Expr # gunfold :: (forall b r. Data b => c (b -> r) -> c r) -> (forall r. r -> c r) -> Constr -> c Expr # toConstr :: Expr -> Constr # dataTypeOf :: Expr -> DataType # dataCast1 :: Typeable t => (forall d. Data d => c (t d)) -> Maybe (c Expr) # dataCast2 :: Typeable t => (forall d e. (Data d, Data e) => c (t d e)) -> Maybe (c Expr) # gmapT :: (forall b. Data b => b -> b) -> Expr -> Expr # gmapQl :: (r -> r' -> r) -> r -> (forall d. Data d => d -> r') -> Expr -> r # gmapQr :: (r' -> r -> r) -> r -> (forall d. Data d => d -> r') -> Expr -> r # gmapQ :: (forall d. Data d => d -> u) -> Expr -> [u] # gmapQi :: Int -> (forall d. Data d => d -> u) -> Expr -> u # gmapM :: Monad m => (forall d. Data d => d -> m d) -> Expr -> m Expr # gmapMp :: MonadPlus m => (forall d. Data d => d -> m d) -> Expr -> m Expr # gmapMo :: MonadPlus m => (forall d. Data d => d -> m d) -> Expr -> m Expr #
Show Expr Source #
Instance details Defined in PGF2.Expr Methods showsPrec :: Int -> Expr -> ShowS # show :: Expr -> String # showList :: [Expr] -> ShowS #

showExpr :: [CId] -> Expr -> String Source #

renders an expression as a String. The list of identifiers is the list of all free variables in the expression in order reverse to the order of binding.

readExpr :: String -> Maybe Expr Source #

parses a String as an expression

pExpr :: ReadS Expr Source #

mkAbs :: BindType -> CId -> Expr -> Expr Source #

Constructs an expression by lambda abstraction

unAbs :: Expr -> Maybe (BindType, CId, Expr) Source #

Decomposes an expression into an abstraction and a body

mkApp :: Fun -> [Expr] -> Expr Source #

Constructs an expression by applying a function to a list of expressions

unApp :: Expr -> Maybe (Fun, [Expr]) Source #

Decomposes an expression into an application of a function

mkStr :: String -> Expr Source #

Constructs an expression from a string literal

unStr :: Expr -> Maybe String Source #

Decomposes an expression into a string literal

mkInt :: Int -> Expr Source #

Constructs an expression from an integer literal

unInt :: Expr -> Maybe Int Source #

Decomposes an expression into an integer literal

mkFloat :: Double -> Expr Source #

Constructs an expression from a real number

unFloat :: Expr -> Maybe Double Source #

Decomposes an expression into a real number literal

mkMeta :: Int -> Expr Source #

Constructs a meta variable as an expression

unMeta :: Expr -> Maybe Int Source #

Decomposes an expression into a meta variable

mkCId :: p -> p Source #

this functions is only for backward compatibility with the old Haskell runtime

exprHash :: Int32 -> Expr -> Int32 Source #

exprSize :: Expr -> Int Source #

exprFunctions :: Expr -> [Fun] Source #

exprSubstitute :: Expr -> [Expr] -> Expr Source #

treeProbability :: PGF -> Expr -> Float Source #

Types

data Type Source #

Instances

Show Type Source #
Instance details Defined in PGF2.Type Methods showsPrec :: Int -> Type -> ShowS # show :: Type -> String # showList :: [Type] -> ShowS #

type Hypo = (BindType, CId, Type) Source #

Hypo represents a hypothesis in a type i.e. in the type A -> B, A is the hypothesis

data BindType Source #

Constructors

Explicit
Implicit

Instances

Show BindType Source #
Instance details Defined in PGF2.Expr Methods showsPrec :: Int -> BindType -> ShowS # show :: BindType -> String # showList :: [BindType] -> ShowS #

startCat :: PGF -> Type Source #

The start category is defined in the grammar with the 'startcat' flag. This is usually the sentence category but it is not necessary. Despite that there is a start category defined you can parse with any category. The start category definition is just for convenience.

readType :: String -> Maybe Type Source #

parses a String as a type

showType :: [CId] -> Type -> String Source #

renders a type as a String. The list of identifiers is the list of all free variables in the type in order reverse to the order of binding.

showContext :: [CId] -> [Hypo] -> String Source #

renders a type as a String. The list of identifiers is the list of all free variables in the type in order reverse to the order of binding.

mkType :: [Hypo] -> CId -> [Expr] -> Type Source #

creates a type from a list of hypothesises, a category and a list of arguments for the category. The operation mkType [h_1,...,h_n] C [e_1,...,e_m] will create h_1 -> ... -> h_n -> C e_1 ... e_m

unType :: Type -> ([Hypo], CId, [Expr]) Source #

Decomposes a type into a list of hypothesises, a category and a list of arguments for the category.

Type checking

checkExpr :: PGF -> Expr -> Type -> Either String Expr Source #

Checks an expression against a specified type.

inferExpr :: PGF -> Expr -> Either String (Expr, Type) Source #

Tries to infer the type of an expression. Note that even if the expression is type correct it is not always possible to infer its type in the GF type system. In this case the function returns an error.

checkType :: PGF -> Type -> Either String Type Source #

Check whether a type is consistent with the abstract syntax of the grammar.

Computing

compute :: PGF -> Expr -> Expr Source #

Concrete syntax

type ConcName Source #

Arguments

= CId

Name of concrete syntax

data Concr Source #

languages :: PGF -> Map ConcName Concr Source #

List of all languages available in the grammar.

concreteName :: Concr -> ConcName Source #

languageCode :: Concr -> String Source #

Linearization

linearize :: Concr -> Expr -> String Source #

Linearizes an expression as a string in the language

linearizeAll :: Concr -> Expr -> [String] Source #

Generates all possible linearizations of an expression

tabularLinearize :: Concr -> Expr -> [(String, String)] Source #

Generates a table of linearizations for an expression

tabularLinearizeAll :: Concr -> Expr -> [[(String, String)]] Source #

Generates a table of linearizations for an expression

bracketedLinearize :: Concr -> Expr -> [BracketedString] Source #

bracketedLinearizeAll :: Concr -> Expr -> [[BracketedString]] Source #

type FId = Int Source #

data BracketedString Source #

BracketedString represents a sentence that is linearized as usual but we also want to retain the 'brackets' that mark the beginning and the end of each constituent.

Constructors

Leaf String	this is the leaf i.e. a single token
BIND	the surrounding tokens must be bound together
Bracket CId !FId String CId [BracketedString]	this is a bracket. The `CId` is the category of the phrase. The `FId` is an unique identifier for every phrase in the sentence. For context-free grammars i.e. without discontinuous constituents this identifier is also unique for every bracket. When there are discontinuous phrases then the identifiers are unique for every phrase but not for every bracket since the bracket represents a constituent. The different constituents could still be distinguished by using the analysis string. If the grammar is reduplicating then the constituent indices will be the same for all brackets that represents the same constituent. The second `CId` is the name of the abstract function that generated this phrase.

showBracketedString :: BracketedString -> String Source #

Renders the bracketed string as a string where the brackets are shown as (S ...) where S is the category.

flattenBracketedString :: BracketedString -> [String] Source #

Extracts the sequence of tokens from the bracketed string

printName :: Concr -> Fun -> Maybe String Source #

categoryFields :: Concr -> Cat -> Maybe [String] Source #

alignWords :: Concr -> Expr -> [(String, [Int])] Source #

Parsing

data ParseOutput a Source #

This data type encodes the different outcomes which you could get from the parser.

Constructors

ParseFailed Int String	The integer is the position in number of unicode characters where the parser failed. The string is the token where the parser have failed.
ParseOk a	If the parsing and the type checking are successful we get the abstract syntax trees as either a list or a chart.
ParseIncomplete	The sentence is not complete.

parse :: Concr -> Type -> String -> ParseOutput [(Expr, Float)] Source #

parseWithHeuristics Source #

Arguments

:: Concr	the language with which we parse
-> Type	the start category
-> String	the input sentence
-> Double	the heuristic factor. A negative value tells the parser to lookup up the default from the grammar flags
-> [(Cat, String -> Int -> Maybe (Expr, Float, Int))]	a list of callbacks for literal categories. The arguments of the callback are: the index of the constituent for the literal category; the input sentence; the current offset in the sentence. If a literal has been recognized then the output should be Just (expr,probability,end_offset)
-> ParseOutput [(Expr, Float)]

parseToChart Source #

Arguments

:: Concr	the language with which we parse
-> Type	the start category
-> String	the input sentence
-> Double	the heuristic factor. A negative value tells the parser to lookup up the default from the grammar flags
-> [(Cat, String -> Int -> Maybe (Expr, Float, Int))]	a list of callbacks for literal categories. The arguments of the callback are: the index of the constituent for the literal category; the input sentence; the current offset in the sentence. If a literal has been recognized then the output should be Just (expr,probability,end_offset)
-> Int	the maximal number of roots
-> ParseOutput ([FId], Map FId ([(Int, Int, String)], [(Expr, [PArg], Float)], Cat))

data PArg Source #

Constructors

PArg [FId] !FId

Instances

Eq PArg Source #
Instance details Defined in PGF2.FFI Methods (==) :: PArg -> PArg -> Bool # (/=) :: PArg -> PArg -> Bool #
Ord PArg Source #
Instance details Defined in PGF2.FFI Methods compare :: PArg -> PArg -> Ordering # (<) :: PArg -> PArg -> Bool # (<=) :: PArg -> PArg -> Bool # (>) :: PArg -> PArg -> Bool # (>=) :: PArg -> PArg -> Bool # max :: PArg -> PArg -> PArg # min :: PArg -> PArg -> PArg #
Show PArg Source #
Instance details Defined in PGF2.FFI Methods showsPrec :: Int -> PArg -> ShowS # show :: PArg -> String # showList :: [PArg] -> ShowS #

Sentence Lookup

lookupSentence Source #

Arguments

:: Concr	the language with which we parse
-> Type	the start category
-> String	the input sentence
-> [(Expr, Float)]

Generation

generateAll :: PGF -> Type -> [(Expr, Float)] Source #

Generates an exhaustive possibly infinite list of all abstract syntax expressions of the given type. The expressions are ordered by their probability.

Morphological Analysis

type MorphoAnalysis = (Fun, String, Float) Source #

This triple is returned by all functions that deal with the grammar's lexicon. Its first element is the name of an abstract lexical function which can produce a given word or a multiword expression (i.e. this is the lemma). After that follows a string which describes the particular inflection form.

The last element is a logarithm from the the probability of the function. The probability is not conditionalized on the category of the function. This makes it possible to compare the likelihood of two functions even if they have different types.

lookupMorpho :: Concr -> String -> [MorphoAnalysis] Source #

lookupMorpho takes a string which must be a single word or a multiword expression. It then computes the list of all possible morphological analyses.

lookupCohorts :: Concr -> String -> [(Int, String, [MorphoAnalysis], Int)] Source #

lookupCohorts takes an arbitrary string an produces a list of all places where lexical items from the grammar have been identified (i.e. cohorts). The list consists of triples of the format (start,ans,end), where start-end identifies the span in the text and ans is the list of possible morphological analyses similar to lookupMorpho.

The list is sorted first by the start position and after than by the end position. This can be used for instance if you want to filter only the longest matches.

fullFormLexicon :: Concr -> [(String, [MorphoAnalysis])] Source #

filterBest :: [(Int, String, [MorphoAnalysis], Int)] -> [(Int, String, [MorphoAnalysis], Int)] Source #

filterLongest :: [(Int, String, [MorphoAnalysis], Int)] -> [(Int, String, [MorphoAnalysis], Int)] Source #

Visualizations

data GraphvizOptions Source #

Constructors

GraphvizOptions
Fields noLeaves :: Bool noFun :: Bool noCat :: Bool noDep :: Bool nodeFont :: String leafFont :: String nodeColor :: String leafColor :: String nodeEdgeStyle :: String leafEdgeStyle :: String

graphvizDefaults :: GraphvizOptions Source #

graphvizAbstractTree :: PGF -> GraphvizOptions -> Expr -> String Source #

Renders an abstract syntax tree in a Graphviz format.

graphvizParseTree :: Concr -> GraphvizOptions -> Expr -> String Source #

graphvizWordAlignment :: [Concr] -> GraphvizOptions -> Expr -> String Source #

Exceptions

newtype PGFError Source #

Constructors

PGFError String

Instances

Show PGFError Source #
Instance details Defined in PGF2 Methods showsPrec :: Int -> PGFError -> ShowS # show :: PGFError -> String # showList :: [PGFError] -> ShowS #
Exception PGFError Source #
Instance details Defined in PGF2 Methods toException :: PGFError -> SomeException # fromException :: SomeException -> Maybe PGFError # displayException :: PGFError -> String #

Grammar specific callbacks

type LiteralCallback = PGF -> (ConcName, Concr) -> String -> String -> Int -> Maybe (Expr, Float, Int) Source #

literalCallbacks :: [(AbsName, [(Cat, LiteralCallback)])] Source #

Callbacks for the App grammar

PGF

Identifiers

Abstract syntax

Categories

Functions

Expressions

Types

Type checking

Computing

Concrete syntax

Linearization

Parsing

Sentence Lookup

Generation

Morphological Analysis

Visualizations

Exceptions

Grammar specific callbacks