-- Copyright (c) David Amos, 2008. All rights reserved.

module Math.Algebra.Group.StringRewriting where

import Data.List as L
import Data.Maybe (catMaybes)

-- REWRITING

rewrite rules word = rewrite' rules word where
    rewrite' (r:rs) xs =
        case rewrite'' r xs of
        Nothing -> rewrite' rs xs
        Just ys -> rewrite' rules ys
    rewrite' [] xs = xs
rewrite'' (l,r) xs =
    case xs `splitSubstring` l of
    Nothing -> Nothing
    Just (a,b) -> Just (a++r++b)

-- given a string x and a substring b, find if possible (a,c) such that xs = abc
splitSubstring xs b = splitSubstring' [] xs where
    splitSubstring' ls [] = Nothing
    splitSubstring' ls (r:rs) =
        if b `L.isPrefixOf` (r:rs)
        then Just (reverse ls, drop (length b) (r:rs))
        else splitSubstring' (r:ls) rs
-- there might be a more efficient way to do this


-- KNUTH-BENDIX

-- given two strings x,y, find if possible a,b,c with x=ab y=bc
findOverlap xs ys = findOverlap' [] xs ys where
    findOverlap' as [] cs = Nothing -- (reverse as, [], cs)
    findOverlap' as (b:bs) cs =
        if (b:bs) `L.isPrefixOf` cs
        then Just (reverse as, b:bs, drop (length (b:bs)) cs)
        else findOverlap' (b:as) bs cs
-- there might be a more efficient way to do this

-- note that findOverlap "abab" "abab" won't find the partial overlap ("ab","ab","ab")

-- Knuth-Bendix algorithm
-- http://en.wikipedia.org/wiki/Knuth-Bendix_algorithm
-- Given a set of rules (assumed already reduced with respect to each other)
-- return a confluent rewrite system
knuthBendix1 rules = knuthBendix' rules pairs where
    pairs = [(lri,lrj) | lri <- rules, lrj <- rules, lri /= lrj]
    knuthBendix' rules [] = rules -- should reduce in some way
    knuthBendix' rules ( ((li,ri),(lj,rj)) : ps) =
        case findOverlap li lj of
        Nothing -> knuthBendix' rules ps
        Just (a,b,c) -> case ordpair (rewrite rules (ri++c)) (rewrite rules (a++rj)) of
                        Nothing -> knuthBendix' rules ps -- they both reduce to the same thing
                        Just rule' -> let rules' = reduce rule' rules
                                          ps' = ps ++ [(rule',rule) | rule <- rules'] ++ [(rule,rule') | rule <- rules']
                                      in knuthBendix' (rule':rules') ps'
                    	-- the new rule comes from seeing that
                    	-- a ++ b ++ c == l1 ++ c -> r1 ++ c (by rule 1)
                    	-- a ++ b ++ c == a ++ l2 -> a ++ r2 (by rule 2)
    reduce rule@(l,r) rules = filter (\(l',r') -> not (L.isInfixOf l l')) rules
        -- [rule' | rule'@(l',r') <- rules, not (l `L.isInfixOf` l')]
-- !! Possible efficiency improvement is to somehow prune the pairs queue, as or after we prune the rules

ordpair x y =
    case shortlex x y of
    LT -> Just (y,x)
    EQ -> Nothing
    GT -> Just (x,y)

shortlex x y = compare (length x, x) (length y, y)

-- for groups, where "letters" will take the form Either a a, we will want a different order, because we will want x^-1 -> x^3 to be the right way round


-- An optimisation - keep the rules ordered smallest first, and process the pairs smallest first
-- Appears to be significantly faster on average
knuthBendix2 rules = map snd $ knuthBendix' rules' pairs where
    rules' = L.sort $ map sizedRule rules
    pairs = L.sort [sizedPair sri srj | sri <- rules', srj <- rules', sri /= srj]
    knuthBendix' rules [] = rules
    knuthBendix' rules ( (s,(li,ri),(lj,rj)) : ps) =
        case findOverlap li lj of
        Nothing -> knuthBendix' rules ps
        Just (a,b,c) -> case ordpair (rewrite (map snd rules) (ri++c)) (rewrite (map snd rules) (a++rj)) of
                        Nothing -> knuthBendix' rules ps -- they both reduce to the same thing
                        Just rule' -> let rules' = reduce (snd rule') rules
                                          -- ps' = L.sort $ ps ++ [sizedPair rule' rule | rule <- rules'] ++ [sizedPair rule rule' | rule <- rules']
                                          ps' = merge ps $ merge [sizedPair rule' rule | rule <- rules'] [sizedPair rule rule' | rule <- rules']
                                     in knuthBendix' (L.insert rule' rules') ps'
    reduce rule@(l,r) rules = filter (\(s',(l',r')) -> not (L.isInfixOf l l')) rules
    -- reduce rule@(l,r) rules = [rule' | rule'@(s',(l',r')) <- rules, not (l `L.isInfixOf` l')]
    ordpair x y =
        let lx = length x; ly = length y in
            case compare (lx,x) (ly,y) of
            LT -> Just (ly,(y,x)); EQ -> Nothing; GT -> Just (lx,(x,y))
    sizedRule (rule@(l,r)) = (length l, rule)
    sizedPair (s1,r1) (s2,r2) = (s1+s2,r1,r2)

-- merge two ordered lists
merge (x:xs) (y:ys) =
    case compare x y of
    LT -> x : merge xs (y:ys)
    GT -> y : merge (x:xs) ys
    EQ -> error "" -- shouldn't happen in our case
merge xs ys = xs++ys

-- Another optimisation - at the stage where we remove some rules, we remove corresponding pairs too
-- Seems to perform about 25% faster on large problems (eg Coxeter groups A4-12, B4-12)
knuthBendix3 rules = knuthBendix' rules' pairs (length rules' + 1) where
    rules' = L.sort $ zipWith (\i (l,r) -> (length l,i,(l,r)) ) [1..] rules
    pairs = L.sort [sizedPair ri rj | ri <- rules', rj <- rules', ri /= rj]
    knuthBendix' rules [] k = map (\(s,i,r) -> r) rules
    knuthBendix' rules ( (s,(i,j),((li,ri),(lj,rj))) : ps) k =
        case findOverlap li lj of
        Nothing -> knuthBendix' rules ps k
        Just (a,b,c) -> case ordpair k (rewrite (map third rules) (ri++c)) (rewrite (map third rules) (a++rj)) of
                        Nothing -> knuthBendix' rules ps k -- they both reduce to the same thing
                        Just rule'@(_,_,(l,r)) ->
                            let (outrules,inrules) = L.partition (\(s',i',(l',r')) -> L.isInfixOf l l') rules
                                removedIndices = map second outrules
                                ps' = [p | p@(s,(i,j),(ri,rj)) <- ps, i `notElem` removedIndices, j `notElem` removedIndices]
                                ps'' = merge ps' $ merge [sizedPair rule' rule | rule <- inrules] [sizedPair rule rule' | rule <- inrules]
                            in knuthBendix' (L.insert rule' inrules) ps'' (k+1)
    ordpair k x y =
        let lx = length x; ly = length y in
            case compare (lx,x) (ly,y) of
            LT -> Just (ly,k,(y,x)); EQ -> Nothing; GT -> Just (lx,k,(x,y))
    second (s,i,r) = i
    third (s,i,r) = r
    sizedPair (si,i,ri) (sj,j,rj) = (si+sj,(i,j),(ri,rj))


-- Version of knuthBendix that makes sure the initial rules are reduced with respect to each other
knuthBendix rules = knuthBendix3 (reduce [] rules) where
    reduce ls (r:rs) = reduce (r: reduce' r ls) (reduce' r rs)
    reduce ls [] = ls
    reduce' r rules = catMaybes [ordpair (rewrite [r] lhs) (rewrite [r] rhs) | (lhs,rhs) <- rules]


-- given generators and rules/relations, list all normal forms
-- the rules are assumed to be a confluent rewrite system
nfs (gs,rs) = nfs' [[]] where
    nfs' [] = [] -- we have run out of words - this is a finite semigroup
    nfs' ws = let ws' = [g:w | g <- gs, w <- ws, not (any (`L.isPrefixOf` (g:w)) (map fst rs))]
              in ws ++ nfs' ws'

elts (gs,rs) = nfs (gs, knuthBendix rs)


-- PRESENTATIONS FOR SOME STANDARD GROUPS
-- Would like to add a few more to this list

newtype S = S Int deriving (Eq,Ord)

instance Show S where
    show (S i) = "s" ++ show i

s_ i = S i
s1 = s_ 1
s2 = s_ 2
s3 = s_ 3

-- D L Johnson, Presentations of Groups, p62

-- symmetric group, generated by adjacent transpositions
_S n = (gs, r ++ s ++ t) where
    gs = map s_ [1..n-1]
    r = [([s_ i, s_ i],[]) | i <- [1..n-1]]
    s = [(concat $ replicate 3 [s_ i, s_ (i+1)],[]) | i <- [1..n-2]]
    t = [([s_ i, s_ j, s_ i, s_ j],[]) | i <- [1..n-1], j <- [i+2..n-1]]

-- http://en.wikipedia.org/wiki/Triangle_group
-- triangle group
tri l m n = ("abc", [("aa",""),("bb",""),("cc",""),("ab" ^ l,""),("bc" ^ n,""),("ca" ^ m,"" )])
    where xs ^ i = concat $ replicate i xs

-- von Dyck groups
-- The subgroup of index 2 of the triangle group of elts that preserve the orientation of the triangle
_D l m n = ("xy", [("x" ^ l,""), ("y" ^ m,""), ("xy" ^ n,"")])
    where xs ^ i = concat $ replicate i xs

-- So 2,3,3 -> tetrahedron; 2,3,4 -> cube/octahedron; 2,3,5 -> dodecahedron/icosahedron
-- 2,2,n, n>=2 -> n-gon bipyramid
-- Other values correspond to Euclidean or Hyperbolic groups, which are infinite