% GenI surface realiser
% Copyright (C) 2009 Eric Kow
%
% This program is free software; you can redistribute it and/or
% modify it under the terms of the GNU General Public License
% as published by the Free Software Foundation; either version 2
% of the License, or (at your option) any later version.
%
% This program is distributed in the hope that it will be useful,
% but WITHOUT ANY WARRANTY; without even the implied warranty of
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
% GNU General Public License for more details.
%
% You should have received a copy of the GNU General Public License
% along with this program; if not, write to the Free Software
% Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
\chapter{Ranking output}
\label{cha:ranking}
\begin{code}
module NLP.GenI.OptimalityTheory
(
OtConstraint(..), OtRanking,
GetTraces, OtResult, OtViolation, RankedOtConstraint(..),
rankResults, otWarnings,
prettyViolations,prettyRank
)
where
import Control.Applicative ( (<$>), (<*>) )
import Control.Arrow ( first )
import Data.Function (on)
import Data.Char ( isSpace )
import Data.List (nub, partition, sort, sortBy, groupBy, intersperse, (\\), unfoldr )
import Text.JSON
import NLP.GenI.Btypes ( Macros, ptrace )
import qualified NLP.GenI.Builder as B
\end{code}
If your tree schemata are annotated with traces (TODO link to traces and
metagrammars), you can re-use them as a basis for ranking the output produced
by GenI. The basic idea is to supply a list of either positive, negative or
negative conjunction constraints.
For users familiar with Haskell, the constraints are described with the
following type:
\begin{includecodeinmanual}
\begin{code}
data OtConstraint = PositiveC String
| NegativeC String
| NegativeConjC [String]
deriving (Show, Eq)
\end{code}
\end{includecodeinmanual}
Roughly speaking the more highly ranked the constraint, the greater the impact
of a violation of that constraint will be. See section
\ref{sec:ranking-procedure} for more details on the ranking procedure.
\begin{code}
data RankedOtConstraint = RankedOtConstraint Int OtConstraint
deriving (Show, Eq)
instance Ord RankedOtConstraint where
compare (RankedOtConstraint r1 _) (RankedOtConstraint r2 _) = compare r1 r2
newtype RankedOtConstraint2 = RankedOtConstraint2 RankedOtConstraint deriving Eq
instance Ord RankedOtConstraint2 where
compare (RankedOtConstraint2 x) (RankedOtConstraint2 y) = compare y x
type OtRanking = [[OtConstraint]]
data OtViolation = OtViolation { otLexName :: String
, otConstraintViolated :: RankedOtConstraint }
deriving (Show, Eq, Ord)
data LexItem = LexItem
{ lLexname :: String
, lTraces :: [String]
} deriving (Ord, Eq, Show)
type GetTraces = String -> [String]
type OtResult x = (Int,x,[OtViolation])
\end{code}
\section{Input format}
Constraints are expressed in JSON as a list of \jargon{ranking levels}. A
ranking level is a list of constraints that should be assigned the same rank.
In lieu of a formal description, we provide an example below:
\small{NB: Either the JSON format or the JSON parser used by GenI is strict
enough to refuse initial whitespace in this file.}
\begin{verbatim}
[
[{"neg-constraint": "dian0Vn1dePassive"},
{"pos-constraint": "CanonicalSubject"}],
[{"neg-conj-constraint": ["InvertedNominalSubject",
"CanonicalSententialObjectFinite"]}],
[{"neg-conj-constraint": ["InvertedNominalSubject",
"UnboundedCleft"]},
{"neg-constraint": "CleftSubject"}]
]
\end{verbatim}
This example constraints file has three ranking levels. These levels contain
following constraints:
\begin{enumerate}
\item A negative constraint saying that \verb!dian0Vn1dePassive! should
not appear, and a positive one saying that \verb!CanonicalSubject!
\emph{should} appear. These constraints appear together only because
the author of the example thinks they should have the same rank,
not because there is neccesarily any inherent relationship between
them.
\item A single negative conjunction constraint saying that
\verb!InvertedNominalSubject! and
\verb!CanonicalSententialObjectFinite!
should not appear together.
\item A negative conjunction constraint saying tat
\verb!InvertedNominalSubject! and \verb!UnboundedCleft! should not
appear together; and also a negative constraints saying that
\verb!CleftSubject! should not appear. As with the first ranking
level, there is no relationship between these two constraints. We
just put them on the same level to give them the same rank
\end{enumerate}
\begin{code}
instance JSON OtConstraint where
readJSON j =
do jv <- fromJSObject `fmap` readJSON j
case lookup "pos-constraint" jv of
Just v -> PositiveC `fmap` readJSON v
Nothing -> case lookup "neg-constraint" jv of
Just v -> NegativeC `fmap` readJSON v
Nothing -> case lookup "neg-conj-constraint" jv of
Just v -> NegativeConjC `fmap` readJSONs v
Nothing -> fail $ "Could not read OtConstraint"
showJSON (PositiveC c) =
JSObject . toJSObject $ [ ("pos-constraint", showJSON c ) ]
showJSON (NegativeC c) =
JSObject . toJSObject $ [ ("neg-constraint", showJSON c ) ]
showJSON (NegativeConjC cs) =
JSObject . toJSObject $ [ ("neg-conj-constraint", showJSONs cs ) ]
\end{code}
\begin{code}
otWarnings :: Macros -> OtRanking -> [OtViolation] -> [String]
otWarnings gram ranking blocks =
addWarning neTraces neTracesW
. addWarning nvConstraints nvConstraintsW
$ []
where
addWarning xs w = if null xs then id else (w xs :)
neTracesW xs = "these traces never appear in the grammar: " ++ unwords xs
neTraces = nonExistentTraces gram ranking
nvConstraintsW xs = "these constraints are never violated: " ++ unwords (map prettyConstraint xs)
nvConstraints = neverViolated blocks ranking
rankResults :: GetTraces -> (a -> B.Derivation) -> OtRanking -> [a] -> [OtResult a]
rankResults getTraces getDerivation r = squish . sortResults . map addViolations
where
addViolations x = (x, getViolations x)
getViolations = violations (concatRank r) . lexTraces getTraces . getDerivation
squish = concat . zipWith applyRank [1..]
applyRank i = map (\(x,vs) -> (i,x,vs))
\end{code}
\begin{code}
violations :: [RankedOtConstraint] -> [LexItem] -> [OtViolation]
violations cs ls = posVs ls ++ negVs ls
where
negVs = concatMap (\l -> negViolations cs (lLexname l) (lTraces l))
posVs = posViolations cs . concatMap lTraces
posViolations :: [RankedOtConstraint] -> [String] -> [OtViolation]
posViolations cs ss =
[ OtViolation "" c | c@(RankedOtConstraint _ (PositiveC s)) <- cs, not (s `elem` ss) ]
negViolations :: [RankedOtConstraint]
-> String
-> [String]
-> [OtViolation]
negViolations cs l ss =
[ OtViolation l c | c@(RankedOtConstraint _ (NegativeC s)) <- cs, s `elem` ss ] ++
[ OtViolation l c | c@(RankedOtConstraint _ (NegativeConjC xs)) <- cs, all (`elem` ss) xs ]
\end{code}
\section{Ranking procedure}
\label{sec:ranking-procedure}
Generation results are sorted according to their highest-ranking constraint
violation (moving on to the next-highest ranking violation and so forth in case
of a tie). The best result appears first.
\begin{code}
sortedViolations :: (a, [OtViolation]) -> [RankedOtConstraint2]
sortedViolations = map (RankedOtConstraint2 . otConstraintViolated) . sort . snd
sortResults :: [(a, [OtViolation])] -> [[(a, [OtViolation])]]
sortResults = sortAndGroupByDecoration compare sortedViolations
lexTraces :: GetTraces -> B.Derivation -> [LexItem]
lexTraces getTraces = map (toLexItem getTraces) . B.lexicalSelection
toLexItem :: GetTraces -> String -> LexItem
toLexItem getTraces t =
LexItem { lLexname = t
, lTraces = getTraces t }
\end{code}
\section{Output format}
Constraint violations can be outputted as JSON objects as the following example
shows
\begin{verbatim}
{ "lex-item": "discuter:n0Vn1pn2:Tn0Vn1pn2-5830:22",
, "rank": 6,
, "violation": {"neg-constraint": "passiveVerbMorphology"}
}
\end{verbatim}
Positive constraint violations are not associated with any lexical items
so the lex-item field is omitted for them.
\begin{code}
instance JSON RankedOtConstraint where
readJSON j =
do jo <- fromJSObject `fmap` readJSON j
let field x = maybe (fail $ "Could not find: " ++ x) readJSON
$ lookup x jo
RankedOtConstraint <$> field "rank"
<*> field "violation"
showJSON = JSObject . toJSObject . rankedOtConstraintToPairs
rankedOtConstraintToPairs :: RankedOtConstraint -> [ (String, JSValue) ]
rankedOtConstraintToPairs (RankedOtConstraint r c) =
[ ("rank", showJSON r), ("violation", showJSON c) ]
instance JSON OtViolation where
readJSON j =
do jo <- fromJSObject `fmap` readJSON j
case lookup "lex-item" jo of
Nothing -> OtViolation "" <$> readJSON j
Just l -> OtViolation <$> readJSON l
<*> readJSON j
showJSON ov = JSObject . toJSObject $ pairs
where
pairs = case otLexName ov of
"" -> basicPairs
l -> ("lex-item", showJSON l) : basicPairs
basicPairs = rankedOtConstraintToPairs (otConstraintViolated ov)
prettyViolations :: GetTraces -> Bool -> [OtViolation] -> String
prettyViolations getTraces noisy vs =
unlines $ (if null posVs then [] else [ indented 1 75 . showPosVs $ posVs ])
++ map showLexVs negBuckets
where
(posVs, negVs) = partition (null . otLexName) vs
negBuckets = buckets otLexName negVs
showPosVs = unwords . map (prettyRankedConstraint . otConstraintViolated)
showLexVs (l,lvs) =
let itmName = "(" ++ l ++ ")"
constraints = map otConstraintViolated lvs
allTraces = indented 4 75 . unwords . getTraces $ l
in (indented 2 75 . unwords $ itmName : map prettyRankedConstraint constraints)
++ (if noisy then "\n" ++ allTraces else "")
prettyRankedConstraint :: RankedOtConstraint -> String
prettyRankedConstraint (RankedOtConstraint r c) = prettyConstraint c ++ " " ++ prettyRank r
prettyConstraint :: OtConstraint -> String
prettyConstraint (PositiveC str) = '+' : str
prettyConstraint (NegativeC str) = '*' : str
prettyConstraint (NegativeConjC strs) = "*(" ++ (concat $ intersperse " & " strs) ++ ")"
prettyRank :: Int -> String
prettyRank r = "(r" ++ show r ++ ")"
neverViolated :: [OtViolation] -> [[OtConstraint]] -> [OtConstraint]
neverViolated vs ranking = concat ranking \\ cs_used
where
cs_used = nub . map (noRank . otConstraintViolated) $ vs
nonExistentTraces :: Macros -> [[OtConstraint]] -> [String]
nonExistentTraces ms vs = r_traces \\ m_traces
where
m_traces = nub $ concatMap ptrace ms
r_traces = nub $ concatMap cTraces $ concat vs
cTraces :: OtConstraint -> [String]
cTraces (PositiveC c) = [c]
cTraces (NegativeConjC cs) = cs
cTraces (NegativeC c) = [c]
concatRank :: [[OtConstraint]] -> [RankedOtConstraint]
concatRank = concat . zipWith rank [1..]
where
rank x ys = map (RankedOtConstraint x) ys
noRank :: RankedOtConstraint -> OtConstraint
noRank (RankedOtConstraint _ c) = c
buckets :: Ord b => (a -> b) -> [a] -> [ (b,[a]) ]
buckets f = map (first head . unzip)
. groupBy ((==) `on` fst)
. sortBy (compare `on` fst)
. map (\x -> (f x, x))
sortAndGroupByDecoration :: Eq b => (b -> b -> Ordering) -> (a -> b) -> [a] -> [[a]]
sortAndGroupByDecoration cmp f = map (map snd)
. groupBy ((==) `on` fst)
. sortBy (cmp `on` fst)
. map (\x -> (f x, x))
indented :: Int -> Int -> String -> String
indented x len = concat . intersperse "\n" . map (\s -> spaces x ++ s) . unfoldr f
where
f "" = Nothing
f str = Just $ splitAtBefore len str
spaces :: Int -> String
spaces n = replicate n ' '
splitAtBefore :: Int -> String -> (String, String)
splitAtBefore len xs
| length xs < len = (xs, "")
| any isSpace xs = (begin, trim $ drop (length begin) xs)
| otherwise = (xs, "")
where
begin
| length upToSpace > len = upToSpace
| otherwise = reverse . trim . dropWhile isNotSpace . reverse . take len $ xs
upToSpace = takeWhile isNotSpace xs
isNotSpace = not . isSpace
trim = drop 1
\end{code}