module NLP.GenI.Polarity(
module NLP.GenI.Polarity.Types,
PolAut, PolState(PolSt), AutDebug, PolResult(..),
buildAutomaton,
makePolAut,
fixPronouns,
detectSansIdx, suggestPolFeatures, detectPols, detectPolPaths,
declareIdxConstraints, detectIdxConstraints,
prettyPolPaths, prettyPolPaths',
automatonPaths, finalSt,
NFA(states, transitions),
)
where
import Data.Bits
import qualified Data.Set as Set
import qualified Data.Map as Map
import Data.List
import Data.Maybe (isNothing, isJust)
import Data.Text (Text)
import qualified Data.Text as T
import NLP.GenI.Automaton
import NLP.GenI.FeatureStructure ( Flist, AvPair(..), FeatStruct, unifyFeat )
import NLP.GenI.General
( BitVector, isEmptyIntersect, thd3, Interval, ival, (!+!)
)
import NLP.GenI.GeniVal ( GeniVal(gConstraints), mkGAnon, isAnon, replace )
import NLP.GenI.Polarity.Internal
import NLP.GenI.Polarity.Types
import NLP.GenI.Pretty
import NLP.GenI.Semantics ( Literal(..), SemInput, Sem, emptyLiteral
, sortSem
)
import NLP.GenI.Tag ( TagElem(..), TagItem(..), setTidnums )
import NLP.GenI.TreeSchema ( Ptype(Initial), GNode, root, gup, gdown, rootUpd)
data PolResult = PolResult { prIntermediate :: [AutDebug]
, prInitial :: PolAut
, prFinal :: PolAut
, prSem :: Sem }
type AutDebug = (PolarityKey, PolAut, PolAut)
buildAutomaton :: Set.Set PolarityAttr
-> FeatStruct GeniVal
-> PolMap
-> SemInput
-> [TagElem]
-> PolResult
buildAutomaton polarityAttrs rootFeat extrapol (tsem,tres,_) candRaw =
let
rcatPol :: PolMap
rcatPol = detectRootCompensation polarityAttrs rootFeat
detect = detectIdxConstraints tres
constrain t = t { tpolarities = Map.unionWith (!+!) p r
}
where p = tpolarities t
r = detect . tinterface $ t
candRest = map constrain candRaw
inputRest = declareIdxConstraints tres
cand1 = map (detectPols polarityAttrs) candRest
extras1 = Map.unionsWith (!+!) [ extrapol, inputRest, rcatPol ]
ks1 = polarityKeys cand1 Map.empty
tconvert t = t { tpolarities = convertUnconstrainedPolarities ks1 (tpolarities t) }
cand = map tconvert cand1
extras = convertUnconstrainedPolarities ks1 extras1
ks = polarityKeys cand extras
in makePolAut cand tsem extras ks
makePolAut :: [TagElem] -> Sem -> PolMap -> [PolarityKey] -> PolResult
makePolAut candsRaw tsemRaw extraPol ks =
let
(tsem, cands') = fixPronouns (tsemRaw,candsRaw)
cands = setTidnums cands'
sortedsem = sortSemByFreq tsem cands
smap = buildColumns cands sortedsem
seed = buildSeedAut smap sortedsem
build k xs = (k,aut,prune aut):xs
where aut = buildPolAut k initK (thd3 $ head xs)
initK = Map.findWithDefault (ival 0) k extraPol
res = foldr build [(PolarityKeyStr "(seed)",seed,prune seed)] ks
in PolResult { prIntermediate = reverse res
, prInitial = seed
, prFinal = thd3 $ head res
, prSem = tsem }
buildColumns :: (TagItem t) => [t]
-> Sem
-> Map.Map (Literal GeniVal) [t]
buildColumns cands [] =
Map.singleton emptyLiteral e
where e = filter (null.tgSemantics) cands
buildColumns cands (l:ls) =
let matchfn t = l `elem` tgSemantics t
(match, cands2) = partition matchfn cands
next = buildColumns cands2 ls
in Map.insert l match next
buildSeedAut :: SemMap -> Sem -> PolAut
buildSeedAut cands tsem =
let start = polstart []
hasZero (x,y) = x <= 0 && y >= 0
isFinal (PolSt c _ pols) =
c == length tsem && all hasZero pols
initAut = NFA
{ startSt = start
, isFinalSt = Just isFinal
, finalStList = []
, states = [[start]]
, transitions = Map.empty }
in nubAut $ buildSeedAut' cands tsem 1 initAut
buildSeedAut' :: SemMap -> Sem -> Int -> PolAut -> PolAut
buildSeedAut' _ [] _ aut = aut
buildSeedAut' cands (l:ls) i aut =
let
prev = head $ states aut
tcands = Map.findWithDefault [] l cands
fn st ap = buildSeedAutHelper tcands l i st ap
(newAut,newStates) = foldr fn (aut,[]) prev
next = nub newStates : states aut
in buildSeedAut' cands ls (i+1) (newAut { states = next })
buildSeedAutHelper :: [TagElem]
-> Literal GeniVal
-> Int
-> PolState
-> (PolAut,[PolState])
-> (PolAut,[PolState])
buildSeedAutHelper cs l i st (aut,prev) =
let
(PolSt _ ex1 _) = st
tcand = [ Just t | t <- cs
, isEmptyIntersect ex1 (tsemantics t) ]
addT tr (a,n) = (addTrans a st tr st2, st2:n)
where
st2 = PolSt i (delete l $ ex1 ++ ex2) []
ex2 = case tr of
Nothing -> []
Just tr_ -> tsemantics tr_
in if (l `elem` ex1)
then addT Nothing (aut,prev)
else foldr addT (aut,prev) tcand
buildPolAut :: PolarityKey -> Interval -> PolAut -> PolAut
buildPolAut k initK skelAut =
let concatPol p (PolSt pr b pol) = PolSt pr b (p:pol)
newStart = concatPol initK $ startSt skelAut
initAut = skelAut
{ startSt = newStart
, states = [[newStart]]
, transitions = Map.empty }
in nubAut $ buildPolAut' k (transitions skelAut) initAut
buildPolAut' :: PolarityKey -> PolTransFn -> PolAut -> PolAut
buildPolAut' fk skeleton aut =
let
prev = head $ states aut
fn st ap = buildPolAutHelper fk skeleton st ap
(newAut,newStates) = foldr fn (aut,Set.empty) prev
next = Set.toList newStates : states aut
in if Set.null newStates
then aut
else buildPolAut' fk skeleton (newAut { states = next })
buildPolAutHelper :: PolarityKey -> PolTransFn -> PolState -> (PolAut,Set.Set PolState) -> (PolAut,Set.Set PolState)
buildPolAutHelper fk skeleton st (aut,prev) =
let
PolSt pr ex (po1:skelpo1) = st
skelSt = PolSt pr ex skelpo1
trans = Map.toList $ Map.findWithDefault Map.empty skelSt skeleton
result = foldr addT (aut,prev) trans
addT (oldSt2,trs) (a,n) = foldr (addTS oldSt2) (a,n) trs
addTS skel2 tr (a,n) = (addTrans a st tr st2, Set.insert st2 n)
where st2 = newSt tr skel2
newSt :: Maybe TagElem -> PolState -> PolState
newSt t skel2 = PolSt pr2 ex2 (po2:skelPo2)
where
PolSt pr2 ex2 skelPo2 = skel2
po2 = po1 !+! Map.findWithDefault (ival 0) fk pol
pol = case t of Nothing -> Map.empty
Just t2 -> tpolarities t2
in result
prune :: PolAut -> PolAut
prune aut =
let theStates = states aut
final = finalSt aut
lastStates = head theStates
nextStates = tail theStates
nonFinal = (lastStates \\ final)
firstAut = aut { states = [] }
pruned = prune' (nonFinal:nextStates) firstAut
statesPruned = states pruned
headPruned = head statesPruned
tailPruned = tail statesPruned
in if (null theStates)
then aut
else pruned { states = (headPruned ++ final) : tailPruned }
prune' :: [[PolState]] -> PolAut -> PolAut
prune' [] oldAut = oldAut { states = reverse $ states oldAut }
prune' (sts:next) oldAut =
let
oldT = transitions oldAut
oldSt = states oldAut
transFrom st = Map.lookup st oldT
blacklist = filter (isNothing.transFrom) sts
allTrans = Map.toList $ transitions oldAut
miniTrim = Map.filterWithKey (\k _ -> not (k `elem` blacklist))
trim = Map.filterWithKey (\k m -> not (k `elem` blacklist || Map.null m))
newT = trim $ Map.fromList [ (st2, miniTrim m) | (st2,m) <- allTrans ]
newSts = sts \\ blacklist
newAut = oldAut { transitions = newT,
states = newSts : oldSt }
in if null blacklist
then oldAut { states = (reverse oldSt) ++ (sts:next) }
else prune' next newAut
type PredLite = (String,[GeniVal])
type SemWeightMap = Map.Map PredLite SemPols
fixPronouns :: (Sem,[TagElem]) -> (Sem,[TagElem])
fixPronouns (tsem,cands) =
let
getpols :: TagElem -> [ (PredLite,SemPols) ]
getpols x = zip [ (prettyStr p, h:as) | Literal h p as <- tsemantics x ] (tsempols x)
sempols :: [ (PredLite,SemPols) ]
sempols = concatMap getpols cands
usagemap :: SemWeightMap
usagemap = Map.fromListWith (zipWith min) sempols
chargemap :: Map.Map GeniVal Int
chargemap = Map.fromListWith (+) $ concatMap clump $ Map.toList usagemap
where clump ((_,is),ps) = zip is ps
indices = concatMap fn (Map.toList chargemap)
where fn (i,c) = replicate (negate c) i
extraSem = map indexLiteral indices
tsem2 = sortSem (tsem ++ extraSem)
zlit = filter (null.tsemantics) cands
cands2 = (cands \\ zlit) ++ concatMap fn indices
where fn i = map (tweak i) zlit
tweak i x = assignIndex i $ x { tsemantics = [indexLiteral i] }
comparefn :: GeniVal -> Int -> Int -> [GeniVal]
comparefn i ct cm = if cm < ct then extra else []
where maxNeeded = Map.findWithDefault 0 i chargemap
extra = replicate (min (negate maxNeeded) (ct cm)) i
comparePron :: (PredLite,SemPols) -> [GeniVal]
comparePron (lit,c1) = concat $ zipWith3 comparefn idxs c1 c2
where idxs = snd lit
c2 = Map.findWithDefault [] lit usagemap
addextra :: TagElem -> TagElem
addextra c = c { tsemantics = sortSem (sem ++ extra) }
where sem = tsemantics c
extra = map indexLiteral $ concatMap comparePron (getpols c)
cands3 = map addextra cands2
in (tsem2, cands3)
indexLiteral :: GeniVal -> Literal GeniVal
indexLiteral x = Literal x mkGAnon []
isExtraCol :: Literal GeniVal -> Bool
isExtraCol (Literal _ p []) = isAnon p
isExtraCol _ = False
assignIndex :: GeniVal -> TagElem -> TagElem
assignIndex i te =
let idxfs = [ AvPair __idx__ i ]
oldt = ttree te
oldr = root oldt
tfup = gup oldr
in case unifyFeat tfup idxfs of
Nothing -> te
Just (gup2, sub) -> replace sub $ te { ttree = newt }
where newt = rootUpd oldt $ oldr { gup = gup2 }
detectIdxConstraints :: Flist GeniVal -> Flist GeniVal -> PolMap
detectIdxConstraints cs interface =
let matches = intersect cs interface
matchStr = map idxConstraintKey matches
in Map.fromList $ zip matchStr ((repeat.ival) 1)
declareIdxConstraints :: Flist GeniVal -> PolMap
declareIdxConstraints = Map.fromList . (map declare) where
declare c = (idxConstraintKey c, minusone)
minusone = ival (1)
idxConstraintKey :: AvPair GeniVal -> PolarityKey
idxConstraintKey = PolarityKeyStr . ("." <>) . pretty
suggestPolFeatures :: [TagElem] -> [Text]
suggestPolFeatures tes =
let
rfeats, sfeats :: [Flist GeniVal]
rfeats = map (gdown.root.ttree) $ filter (\t -> ttype t == Initial) tes
sfeats = [ concat s | s <- map substTops tes, (not.null) s ]
attrs :: Flist GeniVal -> [Text]
attrs avs = [ a | AvPair a v <- avs, isJust (gConstraints v) ]
theAttributes = map attrs $ rfeats ++ sfeats
in if null theAttributes then [] else foldr1 intersect theAttributes
detectSansIdx :: [TagElem] -> [TagElem]
detectSansIdx =
let rfeats t = (gdown.root.ttree) t
feats t | ttype t == Initial = concat $ rfeats t : substTops t
feats t = concat $ substTops t
attrs avs = [ a | AvPair a v <- avs, isJust (gConstraints v) ]
hasIdx t = __idx__ `elem` (attrs.feats $ t) || (ttype t /= Initial && (null $ substTops t))
in filter (not.hasIdx)
detectPols :: Set.Set PolarityAttr -> TagElem -> TagElem
detectPols attrs t =
t { tpolarities = addPols (detectPolsH attrs t) (tpolarities t) }
detectPolPaths :: [[TagElem]] -> [(TagElem,BitVector)]
detectPolPaths paths =
let pathFM = detectPolPaths' Map.empty 0 paths
lookupTr k = Map.findWithDefault 0 k pathFM
in map (\k -> (k, lookupTr k)) $ Map.keys pathFM
type PolPathMap = Map.Map TagElem BitVector
detectPolPaths' :: PolPathMap -> Int -> [[TagElem]] -> PolPathMap
detectPolPaths' accFM _ [] = accFM
detectPolPaths' accFM counter (path:ps) =
let currentBits = shiftL 1 counter
fn f [] = f
fn f (t:ts) = fn (Map.insertWith (.|.) t currentBits f) ts
newFM = fn accFM path
in detectPolPaths' newFM (counter+1) ps
prettyPolPaths :: BitVector -> Text
prettyPolPaths paths =
T.intercalate ", " $ map pretty pathlist
where
pathlist = prettyPolPaths' paths 1
prettyPolPaths' :: BitVector -> Int -> [Int]
prettyPolPaths' 0 _ = []
prettyPolPaths' bv counter =
if b then (counter:next) else next
where b = testBit bv 0
next = prettyPolPaths' (shiftR bv 1) (counter + 1)
sortSemByFreq :: Sem -> [TagElem] -> Sem
sortSemByFreq tsem cands =
let counts = map lenfn tsem
lenfn l = length $ filter fn cands
where fn x = l `elem` (tsemantics x)
sortfn a b
| isX a && isX b = compare (snd a) (snd b)
| isX a = GT
| isX b = LT
| otherwise = compare (snd a) (snd b)
where isX = isExtraCol.fst
sorted = sortBy sortfn $ zip tsem counts
in (fst.unzip) sorted
data PolState = PolSt Int [Literal GeniVal] [(Int,Int)]
deriving (Eq)
type PolTrans = TagElem
type PolAut = NFA PolState PolTrans
type PolTransFn = Map.Map PolState (Map.Map PolState [Maybe PolTrans])
instance Show PolState
where show (PolSt pr ex po) = show pr ++ " " ++ prettyStr ex ++ show po
instance Ord PolState where
compare (PolSt pr1 ex1 po1) (PolSt pr2 ex2 po2) =
let prC = compare pr1 pr2
expoC = compare (ex1,po1) (ex2,po2)
in if (prC == EQ) then expoC else prC
fakestate :: Int -> [Interval] -> PolState
fakestate s pol = PolSt s [] pol
polstart :: [Interval] -> PolState
polstart pol = fakestate 0 pol