> module Kulitta.Learning.CykParser where
> import Data.List
> import Kulitta.Learning.Parser

CYK Implementation
Donya Quick
Last modified: 22-Jan-2016



type Rule a = (a, [a])

> findProducers :: (Eq a) => [Rule a] -> [a] -> [Rule a]
> findProducers rs str = filter (\(l,r) -> r==str) rs

> cat :: [a] -> [a] -> [[a]]
> cat xs ys = [[x,y] | x<-xs, y<-ys]
 
 
> nextRow :: (Eq a) => [Rule a] -> [[[a]]] -> [[a]]
> nextRow rs rows = 
>     let n = length rows + 1 -- we assume terminal level is not included
>         segs = map (\i -> (i,n-i)) [1..n-1]
>         strs offset (i,j) = cat
>             (rows !! (i-1) !! offset)
>             (rows !! (j-1) !! (offset + i)) 
>         rules offset (i,j) = concatMap (map fst. findProducers rs) 
>                              (strs offset (i,j)) 
>         f offset = nub $ concatMap (rules offset) segs 
>     in  map f [0..length (rows !! 0) - n]


> mkSegs' :: Int -> Int -> [[Int]]
> mkSegs' n m = filter (\s -> sum s == n) $ 
>     makeRange $ take m $ repeat (0,n) where
>     makeRange = foldr (\(l,u) xs -> [(a:b) | a<-[l..u], b<-xs]) [[]]

> mkSegs :: Int -> Int -> [[Int]]
> mkSegs n m = filter (\s -> length s > 1) $ map (filter (>0)) $ mkSegs' n m

mkSegs must be called with:
n = the maximum length of the string in question.
m = the maximum possible number of substrings

n needs to be controlled by both the level (row+1) and
the offset. The formula should be:

n = min (length rows) (length (rows !! 0) - offset)

> toInds :: Int -> [Int] -> [(Int, Int)]
> toInds offset [] = []
> toInds offset (l:ls) = 
>     let row = l - 1
>         col = offset
>     in  if l <= 0 then toInds offset ls 
>         else (row, col) : toInds (offset+l) ls

> toStrs :: [[[a]]] -> [(Int, Int)] -> [[a]]
> toStrs rows [] = [[]]
> toStrs rows ((i,j):cs) = 
>     let strs = toStrs rows cs
>         theCell = if i < length rows && j < length (rows !! i)
>                  then rows !! i !! j
>                  else error ("(toStr) Bad box: ("++show i++", "++show j++")")
>     in  [(x:y) | x<-theCell, y<-strs]

> nextRowM m rs rows = -- m is the # of subdivisions
>     let n = length rows +1 -- the "level"
>         m' = min n m -- the number of substrings (or nonterms)
>         segs = mkSegs n m' -- this is ok
>         offsets = [0..length (rows !! 0) - n] -- all offsets
>         nts o s = concatMap (map fst . findProducers rs) (toStrs rows (toInds o s))
>         f o = concatMap (nts o) segs
>     in  map f offsets

> nextRowM2 m rs rows = -- m is the # of subdivisions
>     let n = length rows +1 -- the "level"
>         m' = min n m -- the number of substrings (or nonterms)
>         segs = mkSegs n m' -- this is ok
>         offsets = [0..length (rows !! 0) - n] -- all offsets
>         nts o s = concatMap (map fst . findProducers rs) (toStrs rows (toInds o s))
>         f o = concatMap (nts o) segs
>     in  map f offsets

> allRowsMS :: (Eq a) => Int -> [Rule a] -> [a] -> [[[a]]]
> allRowsMS m rs str = allRows' rs [fixRow rs $ firstRow' rs str] where
>     allRows' rs rows = if length rows == length (head rows) then rows
>                        else allRows' rs (rows ++ [fixRow rs $ nextRowM m rs rows])

> allRows :: (Eq a) => [Rule a] -> [a] -> [[[a]]]
> allRows rs str = allRows' rs [firstRow rs str] where 
>     allRows' rs rows = if length rows == length (head rows) then rows
>                        else allRows' rs (rows ++ [nextRow rs rows])


> showRows :: (Show a) => [[[a]]] -> String
> showRows rs = 
>     let f line = concatMap g line ++ "\n"
>         g bucket = show bucket ++ "\t"
>     in  concatMap f (reverse rs)

> printRows :: (Show a) => [[[a]]] -> IO ()
> printRows = putStr . showRows

==============

SYNONYM EXTENION

> findSynonyms :: (Eq a) => [Rule a] -> a -> [a]
> findSynonyms rules x = map fst $ filter (\(l,r) -> r==[x]) rules

> findSynRec :: (Eq a) => [Rule a] -> [a] -> [a]
> findSynRec rules syns = 
>     let s = nub (syns ++ concatMap (findSynonyms rules) syns)
>     in  if s == syns then syns else findSynRec rules s


 findSynRec :: (Eq a) => [Rule a] -> a -> [a]
 findSynRec rules x = 
     let s = findSynonyms rules x
         s' = nub (s ++ concatMap (findSynonyms rules) s)
     in  if s == s' then s else nub $ s ++ concatMap (findSynRec rules) s'

> fixSyns :: (Eq a) => [Rule a] -> [a] -> [a]
> fixSyns rules bucket = nub (bucket ++ concatMap (findSynonyms rules) bucket)

> fixRow :: (Eq a) => [Rule a] -> [[a]] -> [[a]]
> fixRow rules row = map (findSynRec rules) row

> allRowsS :: (Eq a) => [Rule a] -> [a] -> [[[a]]]
> allRowsS rs str = allRows' rs [fixRow rs $ firstRow rs str] where
>     allRows' rs rows = if length rows == length (head rows) then rows
>                        else allRows' rs (rows ++ [fixRow rs $ nextRow rs rows])

> firstRowOld rs cs = map (nub . map fst . findProducers rs . \a -> [a]) cs

> firstRow rs cs = map (\c -> [c]) cs

> firstRow' rs [] = []
> firstRow' rs (c:cs) = 
>     let fr0 = nub $ c : (map fst $ findProducers rs [c])
>     in  (if null fr0 then [c] else fr0) : firstRow' rs cs



==============


GENERATING ALL PARSES

 allParses :: [Rule a] -> [[[a]]] -> Int -> Int [[Rule a]]
 allParses rules rows i j = -- i is the row, j is the column
     let f :: a -> [Rule a]
         f x = filter (\(l,r) -> l==x) rules 
     in  undefined

mkSegs :: Int -> Int -> [[Int]]
mkSegs n m = filter (\s -> length s > 1) $ map (filter (>0)) $ mkSegs' n m

mkSegs must be called with:
n = the maximum length of the string in question.
m = the maximum possible number of substrings

Given a rule that we know can be applied, pick the cells it generates.

> getCells :: (Eq a) => [[[a]]] -> Rule a -> Int -> Int -> [[(Int, Int)]]
> getCells rows (lhs, rhs) level offset = 
>     let n = level + 1
>         m = length rhs -- number of syms to genererate
>         segs = mkSegs n m -- get all possible ways to chunk the string
>         inds = map (toInds offset) segs -- :: [[(Int, Int]] turn these into cells
>         -- need to filter the cells now! --
>     in  filter (goodCells rows rhs) inds

> goodCells :: (Eq a) => [[[a]]] -> [a] -> [(Int, Int)] -> Bool
> goodCells rows [] [] = True
> goodCells rows (x:xs) ((i,j):is) = 
>     elem x (rows !! i !! j) && goodCells rows xs is

> appendTo :: (Eq a) => [[[a]]] -> (Int, Int) -> a -> [[[a]]]
> appendTo [] (i,j) x = []
> appendTo xs (i,j) x = 
>     let (preRs, theRow, postRs) = (take i xs, xs !! i, drop (i+1) xs)
>         (preCs, theCell, postCs) = (take j theRow, theRow !! j, drop (j+1) theRow)
>         newCell = nub (x : theCell) -- ensure no duplicates
>     in  preRs ++ (preCs ++ newCell : postCs) : postRs

> appendTo2 :: (Eq a) => [[[a]]] -> (Int, Int) -> [a] -> [[[a]]]
> appendTo2 [] (i,j) x = []
> appendTo2 xs (i,j) x = 
>     let (preRs, theRow, postRs) = (take i xs, xs !! i, drop (i+1) xs)
>         (preCs, theCell, postCs) = (take j theRow, theRow !! j, drop (j+1) theRow)
>         newCell = nub (x ++ theCell) -- ensure no duplicates
>     in  preRs ++ (preCs ++ newCell : postCs) : postRs


The parseDown1 function completes a parse from a particular cell and symbol. We 
assume the symbol is a member of the cell in question.

> doAll = True
 
> parseDown1 :: (Eq a) => [[[a]]] -> [Rule a] -> [[[a]]] -> ((Int, Int), a) -> [[[[a]]]]
> parseDown1 rows rules newRows ((0,j),x) = [appendTo newRows (0,j) x]
> parseDown1 rows rules newRows ((i,j),x) = 
>     let newRows' = appendTo newRows (i,j) x -- put x in the current table
>         pRules = filter (\(l,r) -> l==x && length r > 0) rules -- rules of form A->BC...N
>         sRules = filter (\(l,r) -> l==x && length r == 1) rules -- rules of form A->B
>         f r = getCells rows r i j -- get a rule's target cells (CAN BE >1 LIST!)
>         f2 r@(lhs,rhs) = map (zipWith (\a (b,c) -> ((b,c),a)) rhs) (f r) -- group with coords
>         --pCells type is [[[((Int, Int), a)]]] 
>         pCells = filter (\l -> not $ null l) $ map f2 pRules -- get cells according to pRules 
>         recCall pCell = parseDown1 rows rules newRows' pCell -- recurse on rule expansions
>         pTabs = map (map (map recCall)) pCells -- gives one SET of tables per pCell
>         pTabs' = map (map combineSets) pTabs -- gives one SET of tables per inner pCell list (can be >1)
>         pTabs'' = concat $ concat pTabs' -- flatten lists, the parses are done
>         syns = filter (/=x) $ map (\(l,r) -> head r) $ sRules -- what synonym symbols are there?
>         synResults = concatMap (\a -> parseDown1 rows rules newRows' ((i,j),a)) syns -- recurse on syns
>     in  synResults ++ pTabs''


The parseDown function takes the cyk rows, the grammar's rules, and the start symbol.

> parseDown rows rules s =
>     let n = length $ head rows
>     in  map (map (map reverse)) $ parseDown1 rows rules (emptyRows n) ((n-1,0),s)

 xtrs = [(1,[1,1]), (1,[1,2]), (2,[2,2]), (3, [1]), (1, [1])] :: [Rule Int]
 xstr = [1,1,1,1] :: [Int]
 xp = allRowsMS 2 xtrs xstr
 xtest1 = parseDown xp xtrs 3

> emptyRows n = 
>     if n <= 0 then [] else take n (repeat []) : emptyRows (n-1)

2 x
1 x x
0 x x x
  0 1 2 
  
The combineSets function takes a bunch of table sets, one set per
cell that has been parsed, and finds every combination of them.
  
> combineSets :: (Eq a) => [[[[[a]]]]] -> [[[[a]]]]
> combineSets [] = error "(combineSets) No sets to combine!"
> combineSets [tset] = tset
> combineSets (tset:moreSets) = 
>     let pairs = [(a,b) | a<-tset, b<-combineSets moreSets]
>     in  map (\(a,b) -> combine1 a b) pairs
  

> -- merges N tables
> combineN :: (Eq a) => [[[[a]]]] -> [[[a]]]
> combineN [] = error "(combineN) No tables to combine!"
> combineN [t] = t
> combineN (t:ts) = combine1 t (combineN ts)

> -- merges two tables
> combine1 :: (Eq a) => [[[a]]] -> [[[a]]] -> [[[a]]]
> combine1 tab1 tab2 = 
>     let n = length $ head tab1
>         is = [(i,j) | i<-[0..n-1], j<-[0..n-1], j<=n-i-1]
>         xs = map (\(i,j) -> tab2 !! i !! j) is
>     in  foldUpdate2 tab1 (zip is xs)

> foldUpdate :: (Eq a) => [[[a]]] -> [((Int, Int), a)] -> [[[a]]]
> foldUpdate table [] = table
> foldUpdate table ((c,x):xs) = foldUpdate (appendTo table c x) xs


> foldUpdate2 :: (Eq a) => [[[a]]] -> [((Int, Int), [a])] -> [[[a]]]
> foldUpdate2 table [] = table
> foldUpdate2 table ((c,x):xs) = foldUpdate2 (appendTo2 table c x) xs