% GenI surface realiser
% Copyright (C) 2005 Carlos Areces and Eric Kow
%
% This program is free software; you can redistribute it and/or
% modify it under the terms of the GNU General Public License
% as published by the Free Software Foundation; either version 2
% of the License, or (at your option) any later version.
%
% This program is distributed in the hope that it will be useful,
% but WITHOUT ANY WARRANTY; without even the implied warranty of
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
% GNU General Public License for more details.
%
% You should have received a copy of the GNU General Public License
% along with this program; if not, write to the Free Software
% Foundation, Inc., 59 Temple Place Suite 330, Boston, MA 021111307, USA.
\chapter{File formats}
\label{cha:formats}
\label{cha:GeniParsers}
This chapter is a description of the file formats used by \geni. We'll be
using EBNFs to describe the format below. Here are some rules and types of
rules we leave out, and prefer to describe informally:
\begin{verbatim}
<alphanumeric>
<stringliteral> (stuff between quotes)
<optwhatever> (systematically... "" | <whatever>)
<keywordwhatever> (systematically.. "whatever" ":")
\end{verbatim}
\ignore{
\begin{code}
module NLP.GenI.GeniParsers (
geniTestSuite, geniSemanticInput, geniTestSuiteString,
geniDerivations,
toSemInputString,
geniMacros, geniTagElems,
geniLexicon, geniMorphInfo,
geniFeats, geniPolarities, geniSemantics, geniValue, geniWords,
geniWord, geniLanguageDef, tillEof,
parseFromFile,
module Text.ParserCombinators.Parsec
) where
import NLP.GenI.General ((!+!), Interval, ival)
import NLP.GenI.Btypes
import NLP.GenI.Tags (TagElem(..), emptyTE, setTidnums)
import NLP.GenI.GeniShow (GeniShow(geniShow))
import NLP.GenI.PolarityTypes
import Control.Monad (liftM, when)
import Data.List (sort)
import qualified Data.Map as Map
import qualified Data.Tree as T
import Text.ParserCombinators.Parsec hiding (parseFromFile)
import Text.ParserCombinators.Parsec.Language (emptyDef)
import Text.ParserCombinators.Parsec.Token (TokenParser,
LanguageDef(..), makeTokenParser)
import qualified Text.ParserCombinators.Parsec.Token as P
import qualified System.IO.UTF8 as UTF8
\end{code}
}
\section{General notes}
\subsection{Comments}
Any \geni format file can include comments. Comments start \verb!%!.
There is also the option of using \verb'/* */' for embedded comments.
\subsection{Reserved words}
The following are reserved words. You should not use them as variable names.
NB: the reserved words are indicated below between quotes; eg. ``semantics''.
You can ignore C preprocessor noise such as \verb!#define SEMANTICS!
\begin{includecodeinmanual}
\begin{code}
#define SEMANTICS "semantics"
#define SENTENCE "sentence"
#define OUTPUT "output"
#define TRACE "trace"
#define ANCHOR "anchor"
#define SUBST "subst"
#define FOOT "foot"
#define LEX "lex"
#define TYPE "type"
#define ACONSTR_NOADJ "aconstr:noadj"
#define INITIAL "initial"
#define AUXILIARY "auxiliary"
#define IDXCONSTRAINTS "idxconstraints"
#define BEGIN "begin"
#define END "end"
\end{code}
\end{includecodeinmanual}
\subsection{Lexer}
For reference, we include the Parsec LanguageDef that we use to implement
the \geni format.
\begin{includecodeinmanual}
\begin{code}
geniLanguageDef :: LanguageDef ()
geniLanguageDef = emptyDef
{ commentLine = "%"
, commentStart = "/*"
, commentEnd = "*/"
, opLetter = oneOf ""
, reservedOpNames = [""]
, reservedNames =
[ SEMANTICS , SENTENCE, OUTPUT, IDXCONSTRAINTS, TRACE
, ANCHOR , SUBST , FOOT , LEX , TYPE , ACONSTR_NOADJ
, INITIAL , AUXILIARY
, BEGIN , END ]
, identLetter = identStuff
, identStart = identStuff
}
where identStuff = alphaNum <|> oneOf "_'+-."
\end{code}
\end{includecodeinmanual}
\section{The basics}
\subsection{Variables and constants}
Below are some examples of \geni variables and constants. Note that we support
atomic disjunction of constants, as in \verb!Foo|bar|baz!, but not variables.
\begin{center}
\begin{tabular}{ll}
anonymous variable & \verb!?_! or \verb!_! \\
variables & \verb!Foo!, \verb!?X! or \verb!?x! \\
constants & \verb!Foo!, \verb!foo!, \verb!X!, \verb!x! or \verb!Foo|bar! \\
\end{tabular}
\end{center}
Here is an EBNF for GenI variables and constants
\begin{SaveVerbatim}{KoweyTmp}
<value> ::= <variable> | <anonymousvariable> | <constantdisj>
<variable> ::= "?" <identifier>
<anonymous> ::= "?_" | "_"
<constantdisj> ::= <constant> (| <constant>)*
<constant> ::= <identifier>
<identifier> ::= <alphanumeric> | "+" | "-" | "_"
\end{SaveVerbatim}
\begin{center}
\fbox{\BUseVerbatim{KoweyTmp}}
\end{center}
\begin{code}
geniValue :: Parser GeniVal
geniValue = ((try $ anonymous) <?> "_ or ?_")
<|> (constants <?> "a constant or atomic disjunction")
<|> (variable <?> "a variable")
where
question = "?"
constants :: Parser GeniVal
constants =
do c <- sepBy1 (looseIdentifier <|> stringLiteral) (symbol "|")
return (GConst c)
variable :: Parser GeniVal
variable =
do symbol question
v <- identifier
return (GVar v)
anonymous :: Parser GeniVal
anonymous =
do optional $ symbol question
symbol "_"
return GAnon
\end{code}
\subsection{Feature structures}
In addition to variables and constants, \geni also makes heavy use of flat
feature structures. They take the form \verb![foo:bar ping:?Pong]!, or more
formally,
\begin{SaveVerbatim}{KoweyTmp}
<featurestructure> ::= "[" <atttributevaluepair>* "]"
<attributevaluepair> ::= <identifierorreserved> ":" <value>
<identifierorreserved> ::= <identifier> | <reserved>
\end{SaveVerbatim}
\begin{center}
\fbox{\BUseVerbatim{KoweyTmp}}
\end{center}
\begin{code}
geniFeats :: Parser Flist
geniFeats = option [] $ squares $ many geniAttVal
geniAttVal :: Parser AvPair
geniAttVal = do
att <- identifierR <?> "an attribute"; colon
val <- geniValue <?> "a GenI value"
return $ AvPair att val
\end{code}
\subsection{Semantics}
\label{sec:genisemantics}
A \jargon{semantics} is basically a set of literals. Semantics are used in
to provide \geni input (section \ref{sec:geniinputsemantics}) and in the
definition of lexical entries (section \ref{sec:genilexicon}).
Notice that this is a flat semantic representation! No literals within
literals, please. A literal can take one of two forms:
\begin{verbatim}
handle:predicate(arguments)
predicate(arguments)
\end{verbatim}
The arguments are spacedelimited. Not providing a handle is
equivalent to providing an anonymous one.
\begin{SaveVerbatim}{KoweyTmp}
<semantics> ::= <keywordsemantics> "[" <literal>* "]"
<literal> ::= <identifier> "(" <value>* ")"
\end{SaveVerbatim}
\begin{center}
\fbox{\BUseVerbatim{KoweyTmp}}
\end{center}
\begin{code}
geniSemantics :: Parser Sem
geniSemantics =
do sem <- many (geniLiteral <?> "a literal")
return (sortSem sem)
geniLiteral :: Parser Pred
geniLiteral =
do handle <- option GAnon handleParser <?> "a handle"
predicate <- geniValue <?> "a predicate"
pars <- parens (many geniValue) <?> "some parameters"
return (handle, predicate, pars)
where handleParser =
try $ do { h <- geniValue ; char ':' ; return h }
\end{code}
\section{Semantic inputs and test suites}
\label{sec:geniinputsemantics}
\subsection{Semantic input}
The semantic input can either be provided directly in the graphical interface
or as part of a test suite.
The format for semantic inputs is actually a bit richer than the core
definition in section \ref{sec:genisemantics}, but I have not yet written the
documentation for it.
\textbf{TODO}: The semantics may contain literal based constraints as described
in section \ref{sec:fixme}. These constraints are just a spacedelimited list
of String. When returning the results, we separate them out from the semantics
proper so that they can be treated separately. Index constraints are
represented as feature structures.
\begin{code}
geniSemanticInput :: Parser (Sem,Flist,[LitConstr])
geniSemanticInput =
do keywordSemantics
(sem,litC) <- liftM unzip $ squares $ many literalAndConstraint
idxC <- option [] geniIdxConstraints
let sem2 = createHandles sem
semlitC2 = [ (s,c) | (s,c) <- zip sem2 litC, (not.null) c ]
return (createHandles sem, idxC, semlitC2)
where
createHandles :: Sem -> Sem
createHandles = zipWith setHandle ([1..] :: [Int])
setHandle i (h, pred_, par) =
let h2 = if h /= GAnon then h
else GConst ["genihandle" ++ (show i)]
in (h2, pred_, par)
literalAndConstraint :: Parser (Pred, [String])
literalAndConstraint =
do l <- geniLiteral
t <- option [] $ squares $ many identifier
return (l,t)
geniSemanticInputString :: Parser String
geniSemanticInputString =
do keywordSemantics
s <- squaresString
whiteSpace
optional geniIdxConstraints
return s
geniIdxConstraints :: Parser Flist
geniIdxConstraints = keyword IDXCONSTRAINTS >> geniFeats
squaresString :: Parser String
squaresString =
do char '['
s <- liftM concat $ many $ (many1 $ noneOf "[]") <|> squaresString
char ']'
return $ "[" ++ s ++ "]"
data SemInputString = SemInputString String Flist
instance GeniShow SemInputString where
geniShow (SemInputString semStr idxC) =
SEMANTICS ++ ":" ++ semStr ++ (if null idxC then "" else r)
where r = "\n" ++ IDXCONSTRAINTS ++ ": " ++ showFlist idxC
toSemInputString :: SemInput -> String -> SemInputString
toSemInputString (_,lc,_) s = SemInputString s lc
\end{code}
\subsection{Test suite}
\geni accepts an entire test suite of semantic inputs that you can choose from.
The test suite entries can be named. In fact, it is probably a good idea to do
so, because the names are often shorter than the expected output, and easier to
read than the semantics. Note the expected output isn't used by \geni itself,
but external tools that ``test'' \geni.
\begin{SaveVerbatim}{KoweyTmp}
<testsuite> ::= <testsuiteentry>*
<testsuiteentry> ::= <optidentifier> <semantics> <expectedoutput>*
<expectedoutput> ::= <optkeywordsentence> "[" <identifier>* "]"
\end{SaveVerbatim}
\begin{center}
\fbox{\BUseVerbatim{KoweyTmp}}
\end{center}
\begin{code}
geniTestSuite :: Parser [TestCase]
geniTestSuite =
tillEof (many geniTestCase)
geniTestSuiteString :: Parser [String]
geniTestSuiteString =
tillEof (many geniTestCaseString)
geniDerivations :: Parser [TestCaseOutput]
geniDerivations = tillEof $ many geniOutput
geniTestCase :: Parser TestCase
geniTestCase =
do name <- option "" (identifier <?> "a test case name")
seminput <- geniSemanticInput
sentences <- many geniSentence
outputs <- many geniOutput
return $ TestCase name "" seminput sentences outputs
type TestCaseOutput = (String, Map.Map (String,String) [String])
geniOutput :: Parser TestCaseOutput
geniOutput =
do ws <- keyword OUTPUT >> (squares geniWords)
ds <- Map.fromList `fmap` many geniTraces
return (ws, ds)
geniTraces :: Parser ((String,String), [String])
geniTraces =
do keyword TRACE
squares $ do
k1 <- withWhite geniWord
k2 <- withWhite geniWord
whiteSpace >> char '!' >> whiteSpace
traces <- sepEndBy1 geniWord whiteSpace
return ((k1,k2), traces)
withWhite :: Parser a -> Parser a
withWhite p = p >>= (\a -> whiteSpace >> return a)
geniSentence :: Parser String
geniSentence = optional (keyword SENTENCE) >> squares geniWords
geniWords :: Parser String
geniWords =
unwords `fmap` (sepEndBy1 geniWord whiteSpace <?> "a sentence")
geniWord :: Parser String
geniWord = many1 (noneOf "[]\v\f\t\r\n ")
geniTestCaseString :: Parser String
geniTestCaseString =
do option "" (identifier <?> "a test case name")
s <- geniSemanticInputString
many geniSentence
many geniOutput
return s
\end{code}
\section{Lexicon}
\label{sec:genilexicon}
The lexicon associates semantic entries with lemmas and trees.
\subsection{Lexicon examples}
There are two ways to write the lexicon. We show the old (deprecated)
way first because most of the examples are still written in this style.
\paragraph{Example 1 (deprecated)}
\begin{verbatim}
le clitic (?I)
semantics:[]
le Det (?I)
semantics:[def(?I)]
livre nC (?I)
semantics:[book(?I)]
persuader vArity3 (?E ?X ?Y ?Z)
semantics:[?E:convince(?X ?Y ?Z)]
persuader v vArity3controlObj
semantics:[?E:convince(?X ?Y ?Z)]
\end{verbatim}
\paragraph{Example 2 (preferred)}
\begin{verbatim}
detester n0Vn1
equations:[theta1:agent theta2:patient arg1:?X arg2:?Y evt:?L]
filters:[family:n0Vn1]
semantics:[?E:hate(?L) ?E:agent(?L ?X) ?E:patient(?L ?Y)]
\end{verbatim}
\subsection{Notes about lexicons}
\begin{itemize}
\item The semantics associated with a lexicali item may have more than one literal
\begin{verbatim}
cher adj (?E ?X ?Y)
semantics:[?E:cost(?X ?Y) ?E:high(?Y)]
\end{verbatim}
\item A lemma may have more than one distinct semantics
\begin{verbatim}
bank n (?X)
semantics:[bank(?X)]
bank v (?E ?X ?D)
semantics:[?E:lean(?X,?D)]
\end{verbatim}
\item A semantics may be realised by more than one lexical entry (e.g. synonynms)
\begin{verbatim}
livre nC (?I)
semantics:[book(?I)]
bouquin nC (?I)
semantics:[book(?I)]
\end{verbatim}
\end{itemize}
\subsection{Lexicon EBNF}
\begin{SaveVerbatim}{KoweyTmp}
<lexicon> ::= <lexiconentry>*
<lexiconentry> ::= <lexiconheader> <optfilters> <semantics>
<lexiconheader> ::= <lemma> <family> <parameters>
| <lemma> <family> <keywordequations> <featurestructure>
<parameters> ::= "(" <value>* <optinterface> ")"
<interface> ::= "!" <attributevaluepairs>*
<filters> ::= <keywordfilter> <featurestructure>
\end{SaveVerbatim}
\begin{center}
\fbox{\BUseVerbatim{KoweyTmp}}
\end{center}
\begin{code}
geniLexicon :: Parser [ILexEntry]
geniLexicon = tillEof $ many1 geniLexicalEntry
geniLexicalEntry :: Parser ILexEntry
geniLexicalEntry =
do lemma <- (looseIdentifier <|> stringLiteral) <?> "a lemma"
family <- identifier <?> "a tree family"
(pars, interface) <- option ([],[]) $ parens paramsParser
equations <- option [] $ do keyword "equations"
geniFeats <?> "path equations"
filters <- option [] $ do keyword "filters"
geniFeats
keywordSemantics
(sem,pols) <- squares geniLexSemantics
return emptyLE { iword = [lemma]
, ifamname = family
, iparams = pars
, iinterface = sortFlist interface
, iequations = equations
, ifilters = filters
, isemantics = sem
, isempols = pols }
where
paramsParser :: Parser ([GeniVal], Flist)
paramsParser = do
pars <- many geniValue <?> "some parameters"
interface <- option [] $ do symbol "!"
many geniAttVal
return (pars, interface)
geniLexSemantics :: Parser (Sem, [[Int]])
geniLexSemantics =
do litpols <- many (geniLexLiteral <?> "a literal")
return $ unzip litpols
geniLexLiteral :: Parser (Pred, [Int])
geniLexLiteral =
do (handle, hpol) <- option (GAnon,0) (handleParser <?> "a handle")
predicate <- geniValue <?> "a predicate"
paramsPols <- parens (many geniPolValue) <?> "some parameters"
let (pars, pols) = unzip paramsPols
literal = (handle, predicate, pars)
return (literal, hpol:pols)
where handleParser =
try $ do { h <- geniPolValue; colon; return h }
geniPolValue :: Parser (GeniVal, Int)
geniPolValue =
do p <- geniPolarity
v <- geniValue
return (v,p)
\end{code}
\section{Tree schemata}
The tree schemata file (for historical reasons, this is also called the macros
file) contains a set of unlexicalised trees organised into families. Such
``macros'' consist of a
\begin{enumerate}
\item a family name and (optionally) a macro name
\item a list of parameters
\item ''initial'' or ''auxiliary''
\item a tree.
\end{enumerate}
\subsection{Trees}
\jargon{Trees} are recursively defined structure of form \verb!node{tree*}!
For example, in the table below, the structure on the left should produce the
tree on the right:
\begin{SaveVerbatim}{KoweyTmp}
n1{
n2
n3{
n4
n5
}
n6
}
\end{SaveVerbatim}
\begin{tabular}{ll}
\BUseVerbatim{KoweyTmp} & \includegraphics[scale=0.50]{images/treeformatexample.png} \\
\end{tabular}
\subsection{Nodes}
\jargon{Nodes} consist of
\begin{enumerate}
\item a name
\item a type (optional)
\item either a lexeme, or top and bottom feature structures. Here are examples of the five possible kinds of nodes:
\end{enumerate}
\noindent
Here are some examples of nodes
\begin{verbatim}
n1 [cat:n idx:?I]![cat:n idx:?I] % basic
n3 type:subst [cat:n idx:?Y]![cat:n idx:?Y] % subst
n4 type:foot [cat:n idx:?Y]![cat:n idx:?Y] % foot
n5 type:lex "de" % coanchor
n2 anchor % anchor
n5 aconstr:noadj % node with a nulladjunction constraint (other than subst or foot)
\end{verbatim}
\subsection{Example}
\begin{verbatim}
adj:post(?I) auxiliary
n0[cat:n idx:?I det:_]![cat:n idx:?I det:minus ]
{
n1 type:foot [cat:n idx:?I det:minus]![cat:n idx:?I det:minus]
n2[cat:a]![]
{
n3 anchor
}
}
adj:pre(?I) auxiliary
n0[cat:n idx:?I det:_ qu:_]![cat:n idx:?I det:minus ]
{
n1[cat:a]![]
{
n2 anchor
}
n3 type:foot [cat:n idx:?I det:minus]![cat:n idx:?I det:minus]
}
vArity2:n0vn1(?E ?X ?Y) initial
n1[cat:p]![]
{
n2 type:subst [cat:n idx:?X det:plus]![cat:n idx:?X]
n3[cat:v idx:?E]![]
{
n4 anchor
}
n5 type:subst [cat:n idx:?Y det:plus]![cat:n idx:?Y]
}
\end{verbatim}
\subsection{EBNF}
\begin{SaveVerbatim}{KoweyTmp}
<macros> ::= <macro>*
<macro> ::= <familyname> <optmacroname> <parameters> <treetype> <tree>
<optsemantics> <opttrace>
<parameters> ::= "(" <value>* <optinterface> ")"
<interface> ::= ! <attributevaluepair>*
<macroname> ::= <identifier>
<treetype> ::= "initial" | "auxiliary"
<trace> ::= <keywordtrace> "[" <identifier>* "]"
<tree> ::= <node> | <node> "{" <tree>* "}"
<node> ::= <nodename> <optnodetype> <nodepayload>
<nodename> ::= <identifier>
<nodetype> ::= <keywordtype> <corenodetype> | "anchor"
<corenodetype> ::= "foot" | "subst" | "lex"
<nodepayload> ::= <stringliteral> | <featurestructure> "!" <featurestructure>
\end{SaveVerbatim}
\begin{center}
\fbox{\BUseVerbatim{KoweyTmp}}
\end{center}
\begin{code}
geniMacros :: Parser [MTtree]
geniMacros = tillEof $ many geniTreeDef
initType, auxType :: Parser Ptype
initType = do { reserved INITIAL ; return Initial }
auxType = do { reserved AUXILIARY ; return Auxiliar }
geniTreeDef :: Parser MTtree
geniTreeDef =
do sourcePos <- getPosition
family <- identifier
tname <- option "" $ do { colon; identifier }
(pars,iface) <- geniParams
theTtype <- (initType <|> auxType)
theTree <- geniTree
let treeFail x =
do setPosition sourcePos
fail $ "In tree " ++ family ++ ":" ++ tname ++ " " ++ show sourcePos ++ ": " ++ x
let theNodes = T.flatten theTree
numFeet = length [ x | x <- theNodes, gtype x == Foot ]
numAnchors = length [ x | x <- theNodes, ganchor x ]
when (not $ any ganchor theNodes) $
treeFail "At least one node in an LTAG tree must be an anchor"
when (numAnchors > 1) $
treeFail "There can be no more than 1 anchor node in a tree"
when (numFeet > 1) $
treeFail "There can be no more than 1 foot node in a tree"
when (theTtype == Initial && numFeet > 0) $
treeFail "Initial trees may not have foot nodes"
psem <- option Nothing $ do { keywordSemantics; liftM Just (squares geniSemantics) }
ptrc <- option [] $ do { keyword TRACE; squares (many identifier) }
return TT{ params = pars
, pfamily = family
, pidname = tname
, pinterface = sortFlist iface
, ptype = theTtype
, tree = theTree
, ptrace = ptrc
, psemantics = psem
}
geniTree :: Parser (T.Tree GNode)
geniTree =
do node <- geniNode
kids <- option [] (braces $ many geniTree)
<?> "child nodes"
let noKidsAllowed t c = when (c node && (not.null $ kids)) $
fail $ t ++ " nodes may *not* have any children"
noKidsAllowed "Anchor" $ ganchor
noKidsAllowed "Substitution" $ (== Subs) . gtype
noKidsAllowed "Foot" $ (== Foot) . gtype
return (T.Node node kids)
geniNode :: Parser GNode
geniNode =
do name <- identifier
nodeType <- option "" ( (keyword TYPE >> typeParser)
<|>
reserved ANCHOR)
lex_ <- if nodeType == LEX
then (sepBy (stringLiteral<|>identifier) (symbol "|") <?> "some lexemes")
else return []
constr <- case nodeType of
"" -> adjConstraintParser
ANCHOR -> adjConstraintParser
_ -> return True
(top_,bot_) <-
if nodeType == LEX
then option ([],[]) $ try topbotParser
else topbotParser
let top = sort top_
bot = sort bot_
nodeType2 = case nodeType of
ANCHOR -> Lex
LEX -> Lex
FOOT -> Foot
SUBST -> Subs
"" -> Other
other -> error ("unknown node type: " ++ other)
return $ GN { gnname = name, gtype = nodeType2
, gup = top, gdown = bot
, glexeme = lex_
, ganchor = (nodeType == ANCHOR)
, gaconstr = constr
, gorigin = "" }
where
typeParser = choice $ map (try.symbol) [ ANCHOR, FOOT, SUBST, LEX ]
adjConstraintParser = option False $ reserved ACONSTR_NOADJ >> return True
topbotParser =
do top <- geniFeats <?> "top features"
symbol "!"
bot <- geniFeats <?> "bot features"
return (top,bot)
geniTagElems :: Parser [TagElem]
geniTagElems = tillEof $ setTidnums `fmap` many geniTagElem
geniTagElem :: Parser TagElem
geniTagElem =
do family <- identifier
tname <- option "" $ do { colon; identifier }
iface <- (snd `liftM` geniParams) <|> geniFeats
theType <- initType <|> auxType
theTree <- geniTree
sem <- do { keywordSemantics; squares geniSemantics }
return $ emptyTE { idname = tname
, ttreename = family
, tinterface = iface
, ttype = theType
, ttree = theTree
, tsemantics = sem }
geniParams :: Parser ([GeniVal], Flist)
geniParams = parens $ do
pars <- many geniValue <?> "some parameters"
interface <- option [] $ do { symbol "!"; many geniAttVal }
return (pars, interface)
\end{code}
\section{Morphology}
A morphinfo file associates predicates with morphological feature structures.
Each morphological entry consists of a predicate followed by a feature
structuer. For more information, see chapter \ref{cha:Morphology}.
(\textbf{TODO}: describe format)
\begin{code}
geniMorphInfo :: Parser [(String,Flist)]
geniMorphInfo = tillEof $ many morphEntry
morphEntry :: Parser (String,Flist)
morphEntry =
do pred_ <- identifier
feats <- geniFeats
return (pred_, feats)
\end{code}
% ======================================================================
% everything else
% ======================================================================
\begin{code}
geniPolarities :: Parser (Map.Map PolarityKey Interval)
geniPolarities = tillEof $ toMap `fmap` many pol
where
toMap = Map.fromListWith (!+!)
pol = do p <- geniPolarity
i <- identifier
return (PolarityKey i,ival p)
geniPolarity :: Parser Int
geniPolarity = option 0 (plus <|> minus)
where
plus = do { char '+'; return 1 }
minus = do { char '-'; return (1) }
keyword :: String -> Parser String
keyword k =
do let helper = try $ do { reserved k; colon; return k }
helper <?> k ++ ":"
keywordSemantics :: Parser String
keywordSemantics = keyword SEMANTICS
lexer :: TokenParser ()
lexer = makeTokenParser geniLanguageDef
whiteSpace :: CharParser () ()
whiteSpace = P.whiteSpace lexer
looseIdentifier, identifier, stringLiteral, colon :: CharParser () String
identifier = P.identifier lexer
looseIdentifier =
do { i <- ident ; whiteSpace; return i }
where
ident =
do { c <- identStart geniLanguageDef
; cs <- many (identLetter geniLanguageDef)
; return (c:cs) } <?> "identifier"
stringLiteral = P.stringLiteral lexer
colon = P.colon lexer
squares, braces, parens :: CharParser () a -> CharParser () a
squares = P.squares lexer
braces = P.braces lexer
parens = P.parens lexer
reserved, symbol :: String -> CharParser () String
reserved s = P.reserved lexer s >> return s
symbol = P.symbol lexer
identifierR :: CharParser () String
identifierR
= do { c <- P.identStart geniLanguageDef
; cs <- many (P.identLetter geniLanguageDef)
; return (c:cs)
}
<?> "identifier or reserved word"
tillEof :: Parser a -> Parser a
tillEof p =
do whiteSpace
r <- p
eof
return r
parseFromFile :: Parser a -> SourceName -> IO (Either ParseError a)
parseFromFile p fname
= do{ input <- UTF8.readFile fname
; return (parse p fname input)
}
\end{code}