-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Grammatical Framework -- -- GF, Grammatical Framework, is a programming language for multilingual -- grammar applications @package gf @version 3.6 module PGF.Lexing -- | Text lexing with standard word capitalization of the first word of -- every sentence lexText :: String -> [String] -- | Text lexing with custom treatment of the first word of every sentence. lexText' :: (String -> String) -> String -> [String] unlexText :: [String] -> String -- | Bind tokens separated by Prelude.BIND, i.e. &+ bindTok :: [String] -> [String] -- | Haskell lexer, usable for much code lexCode :: String -> [String] unlexCode :: [String] -> String -- | LaTeX style lexer, with math environment using Code between -- $...$ lexMixed :: String -> [String] unlexMixed :: [String] -> String -- | Capitalize first letter capitInit :: [Char] -> [Char] -- | Uncapitalize first letter uncapitInit :: [Char] -> [Char] -- | Unquote each string wrapped in double quotes unquote :: [[Char]] -> [[Char]] isPunct :: Char -> Bool isMajorPunct :: Char -> Bool isMinorPunct :: Char -> Bool isParen :: Char -> Bool isClosing :: Char -> Bool -- | Basic utilities module PGF.Utilities -- | Like nub, but O(n log n) instead of O(n^2), since it uses a set -- to lookup previous things. The result list is stable (the elements are -- returned in the order they occur), and lazy. Requires that the list -- elements can be compared by Ord. Code ruthlessly taken from -- http://hpaste.org/54411 nub' :: Ord a => [a] -> [a] -- | Replace all occurences of an element by another element. replace :: Eq a => a -> a -> [a] -> [a] -- | This module is an Application Programming Interface to load and -- interpret grammars compiled in Portable Grammar Format (PGF). The PGF -- format is produced as a final output from the GF compiler. The API is -- meant to be used for embedding GF grammars in Haskell programs module PGF -- | An abstract data type representing multilingual grammar in Portable -- Grammar Format. data PGF -- | Reads file in Portable Grammar Format and produces PGF -- structure. The file is usually produced with: -- --
--   $ gf -make <grammar file name>
--   
readPGF :: FilePath -> IO PGF -- | An abstract data type that represents identifiers for functions and -- categories in PGF. data CId -- | Creates a new identifier from String mkCId :: String -> CId wildCId :: CId -- | Renders the identifier as String showCId :: CId -> String -- | Reads an identifier from String. The function returns -- Nothing if the string is not valid identifier. readCId :: String -> Maybe CId ppCId :: CId -> Doc pIdent :: ReadP String -- | Creates an identifier from a UTF-8-encoded ByteString utf8CId :: ByteString -> CId -- | This is just a CId with the language name. A language name is -- the identifier that you write in the top concrete or abstract module -- in GF after the concrete/abstract keyword. Example: -- --
--   abstract Lang = ...
--   concrete LangEng of Lang = ...
--   
type Language = CId showLanguage :: Language -> String readLanguage :: String -> Maybe Language -- | List of all languages available in the given grammar. languages :: PGF -> [Language] -- | The abstract language name is the name of the top-level abstract -- module abstractName :: PGF -> Language -- | Gets the RFC 4646 language tag of the language which the given -- concrete syntax implements, if this is listed in the source grammar. -- Example language tags include "en" for English, and -- "en-UK" for British English. languageCode :: PGF -> Language -> Maybe String -- | To read a type from a String, use readType. data Type -- | Hypo represents a hypothesis in a type i.e. in the type A -> -- B, A is the hypothesis type Hypo = (BindType, CId, Type) -- | renders type as String. The list of identifiers is the list of -- all free variables in the expression in order reverse to the order of -- binding. showType :: [CId] -> Type -> String -- | Reads a Type from a String. readType :: String -> Maybe Type -- | creates a type from list of hypothesises, category and list of -- arguments for the category. The operation mkType [h_1,...,h_n] C -- [e_1,...,e_m] will create h_1 -> ... -> h_n -> C e_1 -- ... e_m mkType :: [Hypo] -> CId -> [Expr] -> Type -- | creates hypothesis for non-dependent type i.e. A mkHypo :: Type -> Hypo -- | creates hypothesis for dependent type i.e. (x : A) mkDepHypo :: CId -> Type -> Hypo -- | creates hypothesis for dependent type with implicit argument i.e. ({x} -- : A) mkImplHypo :: CId -> Type -> Hypo unType :: Type -> ([Hypo], CId, [Expr]) -- | List of all categories defined in the given grammar. The categories -- are defined in the abstract syntax with the 'cat' keyword. categories :: PGF -> [CId] categoryContext :: PGF -> CId -> Maybe [Hypo] -- | The start category is defined in the grammar with the 'startcat' flag. -- This is usually the sentence category but it is not necessary. Despite -- that there is a start category defined you can parse with any -- category. The start category definition is just for convenience. startCat :: PGF -> Type -- | List of all functions defined in the abstract syntax functions :: PGF -> [CId] -- | List of all functions defined for a given category functionsByCat :: PGF -> CId -> [CId] -- | The type of a given function functionType :: PGF -> CId -> Maybe Type -- | List of functions that lack linearizations in the given language. missingLins :: PGF -> Language -> [CId] -- | Tree is the abstract syntax representation of a given sentence in some -- concrete syntax. Technically Tree is a type synonym of -- Expr. type Tree = Expr -- | An expression in the abstract syntax of the grammar. It could be both -- parameter of a dependent type or an abstract syntax tree for for some -- sentence. data Expr -- | renders expression as String. The list of identifiers is the -- list of all free variables in the expression in order reverse to the -- order of binding. showExpr :: [CId] -> Expr -> String -- | parses String as an expression readExpr :: String -> Maybe Expr mkAbs :: BindType -> CId -> Expr -> Expr unAbs :: Expr -> Maybe (BindType, CId, Expr) -- | Constructs an expression by applying a function to a list of -- expressions mkApp :: CId -> [Expr] -> Expr -- | Decomposes an expression into application of function unApp :: Expr -> Maybe (CId, [Expr]) -- | Constructs an expression from string literal mkStr :: String -> Expr -- | Decomposes an expression into string literal unStr :: Expr -> Maybe String -- | Constructs an expression from integer literal mkInt :: Int -> Expr -- | Decomposes an expression into integer literal unInt :: Expr -> Maybe Int -- | Constructs an expression from real number literal mkDouble :: Double -> Expr -- | Decomposes an expression into real number literal unDouble :: Expr -> Maybe Double -- | Constructs an expression which is meta variable mkMeta :: Int -> Expr -- | Checks whether an expression is a meta variable unMeta :: Expr -> Maybe Int pExpr :: ReadP Expr -- | Linearizes given expression as string in the language linearize :: PGF -> Language -> Tree -> String -- | Linearizes given expression as string in all languages available in -- the grammar. linearizeAllLang :: PGF -> Tree -> [(Language, String)] -- | The same as linearizeAllLang but does not return the language. linearizeAll :: PGF -> Tree -> [String] -- | Linearizes given expression as a bracketed string in the language bracketedLinearize :: PGF -> Language -> Tree -> [BracketedString] -- | Creates a table from feature name to linearization. The outher list -- encodes the variations tabularLinearizes :: PGF -> Language -> Expr -> [[(String, String)]] groupResults :: [[(Language, String)]] -> [(Language, [String])] -- | Show the printname of function or category showPrintName :: PGF -> Language -> CId -> String -- | BracketedString represents a sentence that is linearized as usual but -- we also want to retain the ''brackets'' that mark the beginning and -- the end of each constituent. data BracketedString -- | this is the leaf i.e. a single token Leaf :: Token -> BracketedString -- | this is a bracket. The CId is the category of the phrase. The -- FId is an unique identifier for every phrase in the sentence. -- For context-free grammars i.e. without discontinuous constituents this -- identifier is also unique for every bracket. When there are -- discontinuous phrases then the identifiers are unique for every phrase -- but not for every bracket since the bracket represents a constituent. -- The different constituents could still be distinguished by using the -- constituent index i.e. LIndex. If the grammar is reduplicating -- then the constituent indices will be the same for all brackets that -- represents the same constituent. Bracket :: CId -> {-# UNPACK #-} !FId -> {-# UNPACK #-} !LIndex -> CId -> [Expr] -> [BracketedString] -> BracketedString type FId = Int type LIndex = Int type Token = String -- | Renders the bracketed string as string where the brackets are shown as -- (S ...) where S is the category. showBracketedString :: BracketedString -> String flattenBracketedString :: BracketedString -> [String] -- | Tries to parse the given string in the specified language and to -- produce abstract syntax expression. parse :: PGF -> Language -> Type -> String -> [Tree] -- | Tries to parse the given string with all available languages. The -- returned list contains pairs of language and list of abstract syntax -- expressions (this is a list, since grammars can be ambiguous). Only -- those languages for which at least one parsing is possible are listed. parseAllLang :: PGF -> Type -> String -> [(Language, [Tree])] -- | The same as parseAllLang but does not return the language. parseAll :: PGF -> Type -> String -> [[Tree]] -- | The same as parse but returns more detailed information parse_ :: PGF -> Language -> Type -> Maybe Int -> String -> (ParseOutput, BracketedString) -- | This is an experimental function. Use it on your own risk parseWithRecovery :: PGF -> Language -> Type -> [Type] -> Maybe Int -> String -> (ParseOutput, BracketedString) -- | Converts an expression to normal form compute :: PGF -> Expr -> Expr paraphrase :: PGF -> Expr -> [Expr] -- | Check whether a given type is consistent with the abstract syntax of -- the grammar. checkType :: PGF -> Type -> Either TcError Type -- | Checks an expression against a specified type. checkExpr :: PGF -> Expr -> Type -> Either TcError Expr -- | Tries to infer the type of a given expression. Note that even if the -- expression is type correct it is not always possible to infer its type -- in the GF type system. In this case the function returns the -- CannotInferType error. inferExpr :: PGF -> Expr -> Either TcError (Expr, Type) -- | If an error occurs in the typechecking phase the type checker returns -- not a plain text error message but a TcError structure which -- describes the error. data TcError -- | Unknown category name was found. UnknownCat :: CId -> TcError -- | Unknown function name was found. UnknownFun :: CId -> TcError -- | A category was applied to wrong number of arguments. The first integer -- is the number of expected arguments and the second the number of given -- arguments. The [CId] argument is the list of free variables -- in the type. It should be used for the showType function. WrongCatArgs :: [CId] -> Type -> CId -> Int -> Int -> TcError -- | The expression is not of the expected type. The first type is the -- expected type, while the second is the inferred. The [CId] -- argument is the list of free variables in both the expression and the -- type. It should be used for the showType and showExpr -- functions. TypeMismatch :: [CId] -> Expr -> Type -> Type -> TcError -- | Something that is not of function type was applied to an argument. NotFunType :: [CId] -> Expr -> Type -> TcError -- | It is not possible to infer the type of an expression. CannotInferType :: [CId] -> Expr -> TcError -- | Some metavariables have to be instantiated in order to complete the -- typechecking. UnresolvedMetaVars :: [CId] -> Expr -> [MetaId] -> TcError -- | Implicit argument was passed where the type doesn't allow it UnexpectedImplArg :: [CId] -> Expr -> TcError -- | There is a goal that cannot be solved UnsolvableGoal :: [CId] -> MetaId -> Type -> TcError -- | Renders the type checking error to a document. See PrettyPrint. ppTcError :: TcError -> Doc -- | An abstract data type whose values represent the current state in an -- incremental parser. data ParseState -- | Creates an initial parsing state for a given language and startup -- category. initState :: PGF -> Language -> Type -> ParseState -- | From the current state and the next token nextState computes a -- new state, where the token is consumed and the current position is -- shifted by one. If the new token cannot be accepted then an error -- state is returned. nextState :: ParseState -> ParseInput -> Either ErrorState ParseState -- | If the next token is not known but only its prefix (possible empty -- prefix) then the getCompletions function can be used to -- calculate the possible next words and the consequent states. This is -- used for word completions in the GF interpreter. getCompletions :: ParseState -> String -> Map Token ParseState recoveryStates :: [Type] -> ErrorState -> (ParseState, Map Token ParseState) -- | The input to the parser is a pair of predicates. The first one -- piToken selects a token from a list of suggestions from the -- grammar, actually appears at the current position in the input string. -- The second one piLiteral recognizes whether a literal with -- forest id FId could be matched at the current position. data ParseInput ParseInput :: (forall a. Map Token a -> Maybe a) -> (FId -> Maybe (CId, Tree, [Token])) -> ParseInput piToken :: ParseInput -> forall a. Map Token a -> Maybe a piLiteral :: ParseInput -> FId -> Maybe (CId, Tree, [Token]) -- | This function constructs the simplest possible parser input. It checks -- the tokens for exact matching and recognizes only String, -- Int and Float literals. The Int and -- Float literals match only if the token passed is some number. -- The String literal always match but the length of the literal -- could be only one token. simpleParseInput :: Token -> ParseInput mkParseInput :: PGF -> Language -> (forall a. b -> Map Token a -> Maybe a) -> [(CId, b -> Maybe (Tree, [Token]))] -> (b -> ParseInput) -- | This data type encodes the different outcomes which you could get from -- the parser. data ParseOutput -- | The integer is the position in number of tokens where the parser -- failed. ParseFailed :: Int -> ParseOutput -- | The parsing was successful but none of the trees is type correct. The -- forest id (FId) points to the bracketed string from the parser -- where the type checking failed. More than one error is returned if -- there are many analizes for some phrase but they all are not type -- correct. TypeError :: [(FId, TcError)] -> ParseOutput -- | If the parsing and the type checking are successful we get a list of -- abstract syntax trees. The list should be non-empty. ParseOk :: [Tree] -> ParseOutput -- | The sentence is not complete. Only partial output is produced ParseIncomplete :: ParseOutput -- | This function extracts the list of all completed parse trees that -- spans the whole input consumed so far. The trees are also limited by -- the category specified, which is usually the same as the startup -- category. getParseOutput :: ParseState -> Type -> Maybe Int -> (ParseOutput, BracketedString) -- | Generates an exhaustive possibly infinite list of abstract syntax -- expressions. generateAll :: PGF -> Type -> [Expr] -- | A variant of generateAll which also takes as argument the upper -- limit of the depth of the generated expression. generateAllDepth :: PGF -> Type -> Maybe Int -> [Expr] -- | Generates a list of abstract syntax expressions in a way similar to -- generateAll but instead of generating all instances of a given -- type, this function uses a template. generateFrom :: PGF -> Expr -> [Expr] -- | A variant of generateFrom which also takes as argument the -- upper limit of the depth of the generated subexpressions. generateFromDepth :: PGF -> Expr -> Maybe Int -> [Expr] -- | Generates an infinite list of random abstract syntax expressions. This -- is usefull for tree bank generation which after that can be used for -- grammar testing. generateRandom :: RandomGen g => g -> PGF -> Type -> [Expr] -- | A variant of generateRandom which also takes as argument the -- upper limit of the depth of the generated expression. generateRandomDepth :: RandomGen g => g -> PGF -> Type -> Maybe Int -> [Expr] -- | Random generation based on template generateRandomFrom :: RandomGen g => g -> PGF -> Expr -> [Expr] -- | Random generation based on template with a limitation in the depth. generateRandomFromDepth :: RandomGen g => g -> PGF -> Expr -> Maybe Int -> [Expr] type Lemma = CId type Analysis = String data Morpho lookupMorpho :: Morpho -> String -> [(Lemma, Analysis)] buildMorpho :: PGF -> Language -> Morpho fullFormLexicon :: Morpho -> [(String, [(Lemma, Analysis)])] morphoMissing :: Morpho -> [String] -> [String] morphoKnown :: Morpho -> [String] -> [String] isInMorpho :: Morpho -> String -> Bool -- | This is the construction function. Given a PGF and a Language, it -- extract the lexicon for this language and build a tokenization fst -- from it. mkTokenizer :: PGF -> Language -> (String -> Maybe [String]) -- | Renders abstract syntax tree in Graphviz format graphvizAbstractTree :: PGF -> (Bool, Bool) -> Tree -> String graphvizParseTree :: PGF -> Language -> GraphvizOptions -> Tree -> String graphvizDependencyTree :: String -> Bool -> Maybe Labels -> Maybe String -> PGF -> CId -> Tree -> String graphvizBracketedString :: GraphvizOptions -> [BracketedString] -> String graphvizAlignment :: PGF -> [Language] -> Expr -> String gizaAlignment :: PGF -> (Language, Language) -> Expr -> (String, String, String) data GraphvizOptions GraphvizOptions :: Bool -> Bool -> Bool -> String -> String -> String -> String -> String -> String -> GraphvizOptions noLeaves :: GraphvizOptions -> Bool noFun :: GraphvizOptions -> Bool noCat :: GraphvizOptions -> Bool nodeFont :: GraphvizOptions -> String leafFont :: GraphvizOptions -> String nodeColor :: GraphvizOptions -> String leafColor :: GraphvizOptions -> String nodeEdgeStyle :: GraphvizOptions -> String leafEdgeStyle :: GraphvizOptions -> String graphvizDefaults :: GraphvizOptions getDepLabels :: [String] -> Labels -- | An abstract data structure which represents the probabilities for the -- different functions in a grammar. data Probabilities -- | Builds probability tables. The second argument is a map which contains -- the know probabilities. If some function is not in the map then it -- gets assigned some probability based on the even distribution of the -- unallocated probability mass for the result category. mkProbabilities :: PGF -> Map CId Double -> Probabilities -- | Returns the default even distibution. defaultProbabilities :: PGF -> Probabilities -- | Renders the probability structure as string showProbabilities :: Probabilities -> String -- | Reads the probabilities from a file. This should be a text file where -- on every line there is a function name followed by a real number. The -- number represents the probability mass allocated for that function. -- The function name and the probability should be separated by a -- whitespace. readProbabilitiesFromFile :: FilePath -> PGF -> IO Probabilities -- | compute the probability of a given tree probTree :: PGF -> Expr -> Double setProbabilities :: Probabilities -> PGF -> PGF -- | rank from highest to lowest probability rankTreesByProbs :: PGF -> [Expr] -> [(Expr, Double)] browse :: PGF -> CId -> Maybe (String, [CId], [CId]) -- | A type for plain applicative trees data ATree Other :: Tree -> ATree App :: CId -> [ATree] -> ATree -- | A type for tries of plain applicative trees data Trie Oth :: Tree -> Trie Ap :: CId -> [[Trie]] -> Trie -- | Convert a Tree to an ATree toATree :: Tree -> ATree -- | Combine a list of trees into a trie toTrie :: [ATree] -> [[Trie]] instance Show ATree instance Show Trie