CYK Parsing Module Donya Quick Last modified: 22-Jan-2016 > module Kulitta.Learning.Parser where > import Data.List > type Rule a = (a, [a]) > type Partial a = [Rule a] > type Parse a = [Partial a] 1. Given a list of symbols, need a way to find all possible rule parses. The following function will find possible rules to apply to the start of a string (list) of symbols. > findMatches :: (Eq a) => [Rule a] -> [a] -> [(a, [a])] > findMatches rules xs = > let f (lhs, rhs) = take (length rhs) xs == rhs > in filter f rules We try brute-force left to right parsing, given a rule list. > recParse1 :: (Eq a) => [Rule a] -> [a] -> [Partial a] -- [[(a, [a])]] > recParse1 rules [] = [[]] > recParse1 rules xs = > let mats = findMatches rules xs -- get all possible next steps > f (lhs, rhs) = drop (length rhs) xs > xs' = map f mats -- cut xs based on mats > finals = map (recParse1 rules) xs' > finals' = zipWith (\h ts -> map (h:) ts) mats finals > in filter (okParse xs) $ concat finals' if null mats then [] else undefined -- filter (okParse xs) finalStrs > okParse :: (Eq a) => [a] -> [(a, [a])] -> Bool > okParse xs parse = length xs == length (concatMap snd parse) > nextLevel :: (Eq a) => [Rule a] -> Partial a -> [Partial a] > nextLevel rules = recParse1 rules . map fst We need to do the following: 1. First form an initial parse list. Must go from [a] to [Partial a]. 2. For each Partial, x, create a new [Partial a], xs. - for each y in xs, append x to it. Make it a Parse a. > iterParseStep :: (Eq a) => [a] -> [Rule a] -> Parse a -> [Parse a] > iterParseStep dset rules [] = map (\x -> [x]) $ recParse1 rules dset > iterParseStep dset rules parse = > let theStr = head parse > nextLevels = nextLevel rules theStr > in map (\x -> x:parse) nextLevels > isStart :: (Eq a) => a -> [Rule a] -> Bool > isStart ssym [(a,bs)] = ssym==a > isStart ssym _ = False > type StopFun a = Parse a -> Bool > noReps :: (Eq a) => [a] -> Bool > noReps [] = True > noReps (x:xs) = not (elem x xs) && noReps xs > isNew :: (Eq a) => [[a]] -> [a] -> Bool > isNew allPs newP = > let x = head newP > f p = elem x p > in not $ or $ map f allPs > filterUniques :: (Eq a) => [[a]] -> [[a]] > filterUniques [] = [] > filterUniques (x:xs) = > if isNew xs x then x:filterUniques xs else filterUniques xs > removeRedundants :: (Eq a) => [[a]] -> [[a]] > removeRedundants xs = > let f a bs = elem (head a) $ tail bs > g x = not $ or $ map (f x) xs > in filter g xs > iterParse :: (Eq a) => StopFun a -> [a] -> [Rule a] -> [Parse a] -> Int -> Int -> [Parse a] > iterParse stopFun dset rules parses count lim = > let iStops = findIndices stopFun parses -- find any finished parses > parses' = concatMap (iterParseStep dset rules) parses > fullParse = iterParse stopFun dset rules (iterParseStep dset rules []) (count+1) lim > recParse = filter (noReps) $ nub $ iterParse stopFun dset rules parses' (count+1) lim > recParse' = filterUniques recParse > in if count >= lim then parses else -- stop because of iterations limit > if null parses then fullParse else -- start from beginning > if null iStops then recParse' else map (parses !!) iStops > parse :: (Eq a) => StopFun a -> [Rule a] -> [a] -> Int -> [Parse a] > parse stopFun rules dset maxIters = iterParse stopFun dset rules [] 0 maxIters > iterParse2 :: (Eq a) => StopFun a -> [a] -> [Rule a] -> [Parse a] -> Int -> Int -> [Parse a] > iterParse2 stopFun dset rules parses count lim = > let iStops = findIndices stopFun parses -- find any finished parses > fParses = map (parses!!) iStops > uParses = map (parses!!) [x | x<-[0..length parses-1], not $ elem x iStops] > parses' = concatMap (iterParseStep dset rules) uParses > fullParse = iterParse2 stopFun dset rules (iterParseStep dset rules []) (count+1) lim > recParse = nub $ iterParse2 stopFun dset rules parses' (count+1) lim > in if null parses then fullParse else > if count >= lim then fParses else (fParses ++ recParse) > parseAll :: (Eq a) => StopFun a -> [Rule a] -> [a] -> Int -> [Parse a] > parseAll stopFun rules dset maxIters = iterParse2 stopFun dset rules [] 0 maxIters ======= DISPLAY ======= > showRHS :: (Show a) => Partial a -> String > showRHS = show . concatMap snd > showParse :: (Show a) => Parse a -> String > showParse [] = [] > showParse [x] = showLevel x ++ "\n" ++ showRHS x ++ "\n\n" > showParse (h:t) = showLevel h ++ "\n" ++ showParse t > showLevel :: (Show a) => Partial a -> String > showLevel [] = [] > showLevel (h:t) = show h ++ " " ++ showLevel t > printParse p = putStr $ showParse p ======= TESTING ======= > testStr = [1,1,1] > testRules = [(1, [1,1]), > (1, [1,2]), > (2, [2,2]), > (1, [1])] > testStop :: StopFun Int > testStop ([(1,_)]:_) = True > testStop _ = False > testP = parse testStop testRules testStr 100 > testP' = parseAll testStop testRules testStr 100 > testStr2 = [1,1,1,2] > testP2 = parse testStop testRules testStr2 100 > testP2' = parseAll testStop testRules testStr2 100