CYK Parsing Module
Donya Quick
Last modified: 22-Jan-2016
> module Kulitta.Learning.Parser where
> import Data.List
> type Rule a = (a, [a])
> type Partial a = [Rule a]
> type Parse a = [Partial a]
1. Given a list of symbols, need a way to find
all possible rule parses.
The following function will find possible rules to apply
to the start of a string (list) of symbols.
> findMatches :: (Eq a) => [Rule a] -> [a] -> [(a, [a])]
> findMatches rules xs =
> let f (lhs, rhs) = take (length rhs) xs == rhs
> in filter f rules
We try brute-force left to right parsing, given a rule list.
> recParse1 :: (Eq a) => [Rule a] -> [a] -> [Partial a]
> recParse1 rules [] = [[]]
> recParse1 rules xs =
> let mats = findMatches rules xs
> f (lhs, rhs) = drop (length rhs) xs
> xs' = map f mats
> finals = map (recParse1 rules) xs'
> finals' = zipWith (\h ts -> map (h:) ts) mats finals
> in filter (okParse xs) $ concat finals'
if null mats then [] else undefined
> okParse :: (Eq a) => [a] -> [(a, [a])] -> Bool
> okParse xs parse = length xs == length (concatMap snd parse)
> nextLevel :: (Eq a) => [Rule a] -> Partial a -> [Partial a]
> nextLevel rules = recParse1 rules . map fst
We need to do the following:
1. First form an initial parse list. Must go from [a] to [Partial a].
2. For each Partial, x, create a new [Partial a], xs.
- for each y in xs, append x to it. Make it a Parse a.
> iterParseStep :: (Eq a) => [a] -> [Rule a] -> Parse a -> [Parse a]
> iterParseStep dset rules [] = map (\x -> [x]) $ recParse1 rules dset
> iterParseStep dset rules parse =
> let theStr = head parse
> nextLevels = nextLevel rules theStr
> in map (\x -> x:parse) nextLevels
> isStart :: (Eq a) => a -> [Rule a] -> Bool
> isStart ssym [(a,bs)] = ssym==a
> isStart ssym _ = False
> type StopFun a = Parse a -> Bool
> noReps :: (Eq a) => [a] -> Bool
> noReps [] = True
> noReps (x:xs) = not (elem x xs) && noReps xs
> isNew :: (Eq a) => [[a]] -> [a] -> Bool
> isNew allPs newP =
> let x = head newP
> f p = elem x p
> in not $ or $ map f allPs
> filterUniques :: (Eq a) => [[a]] -> [[a]]
> filterUniques [] = []
> filterUniques (x:xs) =
> if isNew xs x then x:filterUniques xs else filterUniques xs
> removeRedundants :: (Eq a) => [[a]] -> [[a]]
> removeRedundants xs =
> let f a bs = elem (head a) $ tail bs
> g x = not $ or $ map (f x) xs
> in filter g xs
> iterParse :: (Eq a) => StopFun a -> [a] -> [Rule a] -> [Parse a] -> Int -> Int -> [Parse a]
> iterParse stopFun dset rules parses count lim =
> let iStops = findIndices stopFun parses
> parses' = concatMap (iterParseStep dset rules) parses
> fullParse = iterParse stopFun dset rules (iterParseStep dset rules []) (count+1) lim
> recParse = filter (noReps) $ nub $ iterParse stopFun dset rules parses' (count+1) lim
> recParse' = filterUniques recParse
> in if count >= lim then parses else
> if null parses then fullParse else
> if null iStops then recParse' else map (parses !!) iStops
> parse :: (Eq a) => StopFun a -> [Rule a] -> [a] -> Int -> [Parse a]
> parse stopFun rules dset maxIters = iterParse stopFun dset rules [] 0 maxIters
> iterParse2 :: (Eq a) => StopFun a -> [a] -> [Rule a] -> [Parse a] -> Int -> Int -> [Parse a]
> iterParse2 stopFun dset rules parses count lim =
> let iStops = findIndices stopFun parses
> fParses = map (parses!!) iStops
> uParses = map (parses!!) [x | x<-[0..length parses-1], not $ elem x iStops]
> parses' = concatMap (iterParseStep dset rules) uParses
> fullParse = iterParse2 stopFun dset rules (iterParseStep dset rules []) (count+1) lim
> recParse = nub $ iterParse2 stopFun dset rules parses' (count+1) lim
> in if null parses then fullParse else
> if count >= lim then fParses else (fParses ++ recParse)
> parseAll :: (Eq a) => StopFun a -> [Rule a] -> [a] -> Int -> [Parse a]
> parseAll stopFun rules dset maxIters = iterParse2 stopFun dset rules [] 0 maxIters
======= DISPLAY =======
> showRHS :: (Show a) => Partial a -> String
> showRHS = show . concatMap snd
> showParse :: (Show a) => Parse a -> String
> showParse [] = []
> showParse [x] = showLevel x ++ "\n" ++ showRHS x ++ "\n\n"
> showParse (h:t) = showLevel h ++ "\n" ++ showParse t
> showLevel :: (Show a) => Partial a -> String
> showLevel [] = []
> showLevel (h:t) = show h ++ " " ++ showLevel t
> printParse p = putStr $ showParse p
======= TESTING =======
> testStr = [1,1,1]
> testRules = [(1, [1,1]),
> (1, [1,2]),
> (2, [2,2]),
> (1, [1])]
> testStop :: StopFun Int
> testStop ([(1,_)]:_) = True
> testStop _ = False
> testP = parse testStop testRules testStr 100
> testP' = parseAll testStop testRules testStr 100
> testStr2 = [1,1,1,2]
> testP2 = parse testStop testRules testStr2 100
> testP2' = parseAll testStop testRules testStr2 100