module NLP.GenI.Polarity(
module NLP.GenI.Polarity.Types,
PolAut, PolState(PolSt), AutDebug, PolResult(..),
buildAutomaton,
PolPathSet, detectPolPaths, hasSharedPolPaths,
polPathsToList,
singletonPolPath,
emptyPolPaths, polPathsNull,
intersectPolPaths, unionPolPaths,
makePolAut,
fixPronouns,
detectSansIdx, suggestPolFeatures, detectPols,
declareIdxConstraints, detectIdxConstraints,
prettyPolPaths,
automatonPaths, finalSt,
NFA(states, transitions),
)
where
import Data.IntSet (IntSet)
import qualified Data.IntSet as IntSet
import Data.List
import qualified Data.Map as Map
import qualified Data.Set as Set
import Data.Text (Text)
import qualified Data.Text as T
import Control.Error
import NLP.GenI.Automaton
import NLP.GenI.FeatureStructure (AvPair (..), FeatStruct, Flist,
unifyFeat)
import NLP.GenI.General (Interval,
isEmptyIntersect, ival, thd3,
(!+!))
import NLP.GenI.GeniVal (GeniVal (gConstraints), isAnon,
mkGAnon, replace)
import NLP.GenI.Polarity.Internal
import NLP.GenI.Polarity.Types
import NLP.GenI.Pretty
import NLP.GenI.Semantics (Literal (..), Sem, SemInput,
emptyLiteral, sortSem)
import NLP.GenI.Tag (TagElem (..), TagItem (..),
setTidnums)
import NLP.GenI.TreeSchema (GNode, Ptype (Initial), gdown, gup,
root, rootUpd)
data PolResult = PolResult { prIntermediate :: [AutDebug]
, prInitial :: PolAut
, prFinal :: PolAut
, prSem :: Sem }
type AutDebug = (PolarityKey, PolAut, PolAut)
buildAutomaton :: Set.Set PolarityAttr
-> FeatStruct GeniVal
-> PolMap
-> SemInput
-> [TagElem]
-> PolResult
buildAutomaton polarityAttrs rootFeat extrapol (tsem,tres,_) candRaw =
let
rcatPol :: PolMap
rcatPol = detectRootCompensation polarityAttrs rootFeat
detect = detectIdxConstraints tres
constrain t = t { tpolarities = Map.unionWith (!+!) p r
}
where p = tpolarities t
r = detect . tinterface $ t
candRest = map constrain candRaw
inputRest = declareIdxConstraints tres
cand1 = map (detectPols polarityAttrs) candRest
extras1 = Map.unionsWith (!+!) [ extrapol, inputRest, rcatPol ]
ks1 = polarityKeys cand1 Map.empty
tconvert t = t { tpolarities = convertUnconstrainedPolarities ks1 (tpolarities t) }
cand = map tconvert cand1
extras = convertUnconstrainedPolarities ks1 extras1
ks = polarityKeys cand extras
in makePolAut cand tsem extras ks
makePolAut :: [TagElem] -> Sem -> PolMap -> [PolarityKey] -> PolResult
makePolAut candsRaw tsemRaw extraPol ks =
let
(tsem, cands') = fixPronouns (tsemRaw,candsRaw)
cands = setTidnums cands'
sortedsem = sortSemByFreq tsem cands
smap = buildColumns cands sortedsem
seed = buildSeedAut smap sortedsem
build k xs = (k,aut,prune aut):xs
where aut = buildPolAut k initK (thd3 $ head xs)
initK = Map.findWithDefault (ival 0) k extraPol
res = foldr build [(PolarityKeyStr "(seed)",seed,prune seed)] ks
in PolResult { prIntermediate = reverse res
, prInitial = seed
, prFinal = thd3 $ head res
, prSem = tsem }
buildColumns :: (TagItem t) => [t]
-> Sem
-> Map.Map (Literal GeniVal) [t]
buildColumns cands [] =
Map.singleton emptyLiteral e
where e = filter (null.tgSemantics) cands
buildColumns cands (l:ls) =
let matchfn t = l `elem` tgSemantics t
(match, cands2) = partition matchfn cands
next = buildColumns cands2 ls
in Map.insert l match next
buildSeedAut :: SemMap -> Sem -> PolAut
buildSeedAut cands tsem =
let start = polstart []
hasZero (x,y) = x <= 0 && y >= 0
isFinal (PolSt c _ pols) =
c == length tsem && all hasZero pols
initAut = NFA
{ startSt = start
, isFinalSt = Just isFinal
, finalStList = []
, states = [[start]]
, transitions = Map.empty }
in nubAut $ buildSeedAut' cands tsem 1 initAut
buildSeedAut' :: SemMap -> Sem -> Int -> PolAut -> PolAut
buildSeedAut' _ [] _ aut = aut
buildSeedAut' cands (l:ls) i aut =
let
prev = head $ states aut
tcands = Map.findWithDefault [] l cands
fn st ap = buildSeedAutHelper tcands l i st ap
(newAut,newStates) = foldr fn (aut,[]) prev
next = nub newStates : states aut
in buildSeedAut' cands ls (i+1) (newAut { states = next })
buildSeedAutHelper :: [TagElem]
-> Literal GeniVal
-> Int
-> PolState
-> (PolAut,[PolState])
-> (PolAut,[PolState])
buildSeedAutHelper cs l i st (aut,prev) =
let
(PolSt _ ex1 _) = st
tcand = [ Just t | t <- cs
, isEmptyIntersect ex1 (tsemantics t) ]
addT tr (a,n) = (addTrans a st tr st2, st2:n)
where
st2 = PolSt i (delete l $ ex1 ++ ex2) []
ex2 = case tr of
Nothing -> []
Just tr_ -> tsemantics tr_
in if (l `elem` ex1)
then addT Nothing (aut,prev)
else foldr addT (aut,prev) tcand
buildPolAut :: PolarityKey -> Interval -> PolAut -> PolAut
buildPolAut k initK skelAut =
let concatPol p (PolSt pr b pol) = PolSt pr b (p:pol)
newStart = concatPol initK $ startSt skelAut
initAut = skelAut
{ startSt = newStart
, states = [[newStart]]
, transitions = Map.empty }
in nubAut $ buildPolAut' k (transitions skelAut) initAut
buildPolAut' :: PolarityKey -> PolTransFn -> PolAut -> PolAut
buildPolAut' fk skeleton aut =
let
prev = head $ states aut
fn st ap = buildPolAutHelper fk skeleton st ap
(newAut,newStates) = foldr fn (aut,Set.empty) prev
next = Set.toList newStates : states aut
in if Set.null newStates
then aut
else buildPolAut' fk skeleton (newAut { states = next })
buildPolAutHelper :: PolarityKey -> PolTransFn -> PolState -> (PolAut,Set.Set PolState) -> (PolAut,Set.Set PolState)
buildPolAutHelper fk skeleton st (aut,prev) =
let
PolSt pr ex (po1:skelpo1) = st
skelSt = PolSt pr ex skelpo1
trans = Map.toList $ Map.findWithDefault Map.empty skelSt skeleton
result = foldr addT (aut,prev) trans
addT (oldSt2,trs) (a,n) = foldr (addTS oldSt2) (a,n) trs
addTS skel2 tr (a,n) = (addTrans a st tr st2, Set.insert st2 n)
where st2 = newSt tr skel2
newSt :: Maybe TagElem -> PolState -> PolState
newSt t skel2 = PolSt pr2 ex2 (po2:skelPo2)
where
PolSt pr2 ex2 skelPo2 = skel2
po2 = po1 !+! Map.findWithDefault (ival 0) fk pol
pol = case t of Nothing -> Map.empty
Just t2 -> tpolarities t2
in result
prune :: PolAut -> PolAut
prune aut =
let theStates = states aut
final = finalSt aut
lastStates = head theStates
nextStates = tail theStates
nonFinal = (lastStates \\ final)
firstAut = aut { states = [] }
pruned = prune' (nonFinal:nextStates) firstAut
statesPruned = states pruned
headPruned = head statesPruned
tailPruned = tail statesPruned
in if (null theStates)
then aut
else pruned { states = (headPruned ++ final) : tailPruned }
prune' :: [[PolState]] -> PolAut -> PolAut
prune' [] oldAut = oldAut { states = reverse $ states oldAut }
prune' (sts:next) oldAut =
let
oldT = transitions oldAut
oldSt = states oldAut
transFrom st = Map.lookup st oldT
blacklist = filter (isNothing.transFrom) sts
allTrans = Map.toList $ transitions oldAut
miniTrim = Map.filterWithKey (\k _ -> not (k `elem` blacklist))
trim = Map.filterWithKey (\k m -> not (k `elem` blacklist || Map.null m))
newT = trim $ Map.fromList [ (st2, miniTrim m) | (st2,m) <- allTrans ]
newSts = sts \\ blacklist
newAut = oldAut { transitions = newT,
states = newSts : oldSt }
in if null blacklist
then oldAut { states = (reverse oldSt) ++ (sts:next) }
else prune' next newAut
type PredLite = (String,[GeniVal])
type SemWeightMap = Map.Map PredLite SemPols
fixPronouns :: (Sem,[TagElem]) -> (Sem,[TagElem])
fixPronouns (tsem,cands) =
let
getpols :: TagElem -> [ (PredLite,SemPols) ]
getpols x = zip [ (prettyStr p, h:as) | Literal h p as <- tsemantics x ] (tsempols x)
sempols :: [ (PredLite,SemPols) ]
sempols = concatMap getpols cands
usagemap :: SemWeightMap
usagemap = Map.fromListWith (zipWith min) sempols
chargemap :: Map.Map GeniVal Int
chargemap = Map.fromListWith (+) $ concatMap clump $ Map.toList usagemap
where clump ((_,is),ps) = zip is ps
indices = concatMap fn (Map.toList chargemap)
where fn (i,c) = replicate (negate c) i
extraSem = map indexLiteral indices
tsem2 = sortSem (tsem ++ extraSem)
zlit = filter (null.tsemantics) cands
cands2 = (cands \\ zlit) ++ concatMap fn indices
where fn i = map (tweak i) zlit
tweak i x = assignIndex i $ x { tsemantics = [indexLiteral i] }
comparefn :: GeniVal -> Int -> Int -> [GeniVal]
comparefn i ct cm = if cm < ct then extra else []
where maxNeeded = Map.findWithDefault 0 i chargemap
extra = replicate (min (negate maxNeeded) (ct cm)) i
comparePron :: (PredLite,SemPols) -> [GeniVal]
comparePron (lit,c1) = concat $ zipWith3 comparefn idxs c1 c2
where idxs = snd lit
c2 = Map.findWithDefault [] lit usagemap
addextra :: TagElem -> TagElem
addextra c = c { tsemantics = sortSem (sem ++ extra) }
where sem = tsemantics c
extra = map indexLiteral $ concatMap comparePron (getpols c)
cands3 = map addextra cands2
in (tsem2, cands3)
indexLiteral :: GeniVal -> Literal GeniVal
indexLiteral x = Literal x mkGAnon []
isExtraCol :: Literal GeniVal -> Bool
isExtraCol (Literal _ p []) = isAnon p
isExtraCol _ = False
assignIndex :: GeniVal -> TagElem -> TagElem
assignIndex i te =
let idxfs = [ AvPair __idx__ i ]
oldt = ttree te
oldr = root oldt
tfup = gup oldr
in case hush (unifyFeat tfup idxfs) of
Nothing -> te
Just (gup2, sub) -> replace sub $ te { ttree = newt }
where newt = rootUpd oldt $ oldr { gup = gup2 }
detectIdxConstraints :: Flist GeniVal -> Flist GeniVal -> PolMap
detectIdxConstraints cs interface =
let matches = intersect cs interface
matchStr = map idxConstraintKey matches
in Map.fromList $ zip matchStr ((repeat.ival) 1)
declareIdxConstraints :: Flist GeniVal -> PolMap
declareIdxConstraints = Map.fromList . (map declare) where
declare c = (idxConstraintKey c, minusone)
minusone = ival (1)
idxConstraintKey :: AvPair GeniVal -> PolarityKey
idxConstraintKey = PolarityKeyStr . ("." <>) . pretty
suggestPolFeatures :: [TagElem] -> [Text]
suggestPolFeatures tes =
let
rfeats, sfeats :: [Flist GeniVal]
rfeats = map (gdown.root.ttree) $ filter (\t -> ttype t == Initial) tes
sfeats = [ concat s | s <- map substTops tes, (not.null) s ]
attrs :: Flist GeniVal -> [Text]
attrs avs = [ a | AvPair a v <- avs, isJust (gConstraints v) ]
theAttributes = map attrs $ rfeats ++ sfeats
in if null theAttributes then [] else foldr1 intersect theAttributes
detectSansIdx :: [TagElem] -> [TagElem]
detectSansIdx =
let rfeats t = (gdown.root.ttree) t
feats t | ttype t == Initial = concat $ rfeats t : substTops t
feats t = concat $ substTops t
attrs avs = [ a | AvPair a v <- avs, isJust (gConstraints v) ]
hasIdx t = __idx__ `elem` (attrs.feats $ t) || (ttype t /= Initial && (null $ substTops t))
in filter (not.hasIdx)
detectPols :: Set.Set PolarityAttr -> TagElem -> TagElem
detectPols attrs t =
t { tpolarities = addPols (detectPolsH attrs t) (tpolarities t) }
type PolPathSet = IntSet
type PolPathMap = Map.Map TagElem IntSet
detectPolPaths :: [[TagElem]] -> [(TagElem,PolPathSet)]
detectPolPaths paths =
Map.toList pathFM
where
(pathFM, _) = foldl' addPath (Map.empty, 1) paths
addPath :: (PolPathMap, Int) -> [TagElem] -> (PolPathMap, Int)
addPath (accFM, counter) path =
(foldl' ins accFM path, counter + 1)
where
myBit = singletonPolPath counter
ins m t = Map.insertWith (IntSet.union) t myBit m
emptyPolPaths :: PolPathSet
emptyPolPaths = IntSet.empty
polPathsNull :: PolPathSet -> Bool
polPathsNull = IntSet.null
polPathsToList :: PolPathSet -> [Int]
polPathsToList = IntSet.toAscList
unionPolPaths :: PolPathSet -> PolPathSet -> PolPathSet
unionPolPaths = IntSet.union
intersectPolPaths :: PolPathSet -> PolPathSet -> PolPathSet
intersectPolPaths = IntSet.intersection
hasSharedPolPaths :: PolPathSet -> PolPathSet -> Bool
hasSharedPolPaths x y = not . polPathsNull $
x `intersectPolPaths` y
prettyPolPaths :: PolPathSet -> Text
prettyPolPaths paths =
T.intercalate ", " $ map pretty pathlist
where
pathlist = IntSet.toAscList paths
singletonPolPath :: Int -> PolPathSet
singletonPolPath = IntSet.singleton
sortSemByFreq :: Sem -> [TagElem] -> Sem
sortSemByFreq tsem cands =
let counts = map lenfn tsem
lenfn l = length $ filter fn cands
where fn x = l `elem` (tsemantics x)
sortfn a b
| isX a && isX b = compare (snd a) (snd b)
| isX a = GT
| isX b = LT
| otherwise = compare (snd a) (snd b)
where isX = isExtraCol.fst
sorted = sortBy sortfn $ zip tsem counts
in (fst.unzip) sorted
data PolState = PolSt Int [Literal GeniVal] [(Int,Int)]
deriving (Eq)
type PolTrans = TagElem
type PolAut = NFA PolState PolTrans
type PolTransFn = Map.Map PolState (Map.Map PolState [Maybe PolTrans])
instance Show PolState
where show (PolSt pr ex po) = show pr ++ " " ++ prettyStr ex ++ show po
instance Ord PolState where
compare (PolSt pr1 ex1 po1) (PolSt pr2 ex2 po2) =
let prC = compare pr1 pr2
expoC = compare (ex1,po1) (ex2,po2)
in if (prC == EQ) then expoC else prC
fakestate :: Int -> [Interval] -> PolState
fakestate s pol = PolSt s [] pol
polstart :: [Interval] -> PolState
polstart pol = fakestate 0 pol