module Data.DAWG.Frozen
(
DAWG
, lookup
, numStates
, index
, byIndex
, hash
, unHash
, empty
, fromList
, fromListWith
, fromLang
, assocs
, keys
, elems
, freeze
) where
import Prelude hiding (lookup)
import Control.Applicative (pure, (<$), (<$>), (<*>))
import Control.Arrow (first, second)
import Data.Binary (Binary, put, get)
import Data.Vector.Binary ()
import qualified Data.IntMap as M
import qualified Data.Vector as V
import qualified Data.DAWG.VMap as VM
import qualified Data.DAWG.Internal as I
import qualified Data.DAWG as D
type Id = Int
data Node a = Node {
value :: !a
, size :: !Int
, edges :: !VM.VMap }
deriving (Show, Eq, Ord)
instance Binary a => Binary (Node a) where
put Node{..} = put value >> put size >> put edges
get = Node <$> get <*> get <*> get
onSym :: Int -> Node a -> Maybe Id
onSym x (Node _ _ es) = VM.lookup x es
edgeList :: Node a -> [(Int, Id)]
edgeList = VM.toList . edges
children :: Node a -> [Id]
children = map snd . edgeList
type DAWG a b = V.Vector (Node (Maybe b))
empty :: DAWG a b
empty = V.singleton (Node Nothing 0 VM.empty)
numStates :: DAWG a b -> Int
numStates = V.length
nodeBy :: Id -> DAWG a b -> Node (Maybe b)
nodeBy i d = d V.! i
lookup :: Enum a => [a] -> DAWG a b -> Maybe b
lookup xs' =
let xs = map fromEnum xs'
in lookup'I xs 0
lookup'I :: [Int] -> Id -> DAWG a b -> Maybe b
lookup'I [] i d = value (nodeBy i d)
lookup'I (x:xs) i d = case onSym x (nodeBy i d) of
Just j -> lookup'I xs j d
Nothing -> Nothing
assocs :: Enum a => DAWG a b -> [([a], b)]
assocs d = map (first (map toEnum)) (assocs'I 0 d)
assocs'I :: Id -> DAWG a b -> [([Int], b)]
assocs'I i d =
here ++ concatMap there (VM.toList (edges n))
where
n = nodeBy i d
here = case value n of
Just x -> [([], x)]
Nothing -> []
there (sym, j) = map (first (sym:)) (assocs'I j d)
keys :: Enum a => DAWG a b -> [[a]]
keys = map fst . assocs
elems :: DAWG a b -> [b]
elems = map snd . assocs'I 0
fromList :: (Enum a, Ord b) => [([a], b)] -> DAWG a b
fromList = freeze . D.fromList
fromListWith :: (Enum a, Ord b) => (b -> b -> b) -> [([a], b)] -> DAWG a b
fromListWith f = freeze . D.fromListWith f
fromLang :: Enum a => [[a]] -> DAWG a ()
fromLang = freeze . D.fromLang
detSize :: DAWG a b -> DAWG a b
detSize d = V.fromList
[ (nodeBy i d) { size = mem i }
| i <- [0 .. numStates d 1] ]
where
add w x = maybe 0 (const 1) w + sum x
mem = ((V.!) . V.fromList) (map det [0 .. numStates d 1])
det i =
let n = nodeBy i d
js = children n
in add (value n) (map mem js)
freeze :: D.DAWG a b -> DAWG a b
freeze d = detSize . V.fromList $
map (stop . oldBy) (M.elems (inverse old2new))
where
old2new = M.fromList $ (D.root d, 0) : zip (nodeIDs d) [1..]
nodeIDs = filter (/= D.root d) . branchIDs
stop = Node <$> onEps <*> pure 0 <*> mkEdges . I.edgeMap
onEps = I.unValue . oldBy . I.eps
mkEdges = VM.fromList . map (second (old2new M.!)) . VM.toList
oldBy i = I.nodeBy i (D.graph d)
branchIDs :: D.DAWG a b -> [I.Id]
branchIDs
= map fst . filter (isBranch . snd)
. M.assocs . I.nodeMap . D.graph
where
isBranch (I.Branch _ _) = True
isBranch _ = False
inverse :: M.IntMap Int -> M.IntMap Int
inverse =
let swap (x, y) = (y, x)
in M.fromList . map swap . M.toList
index :: Enum a => [a] -> DAWG a b -> Maybe Int
index xs = index'I (map fromEnum xs) 0
index'I :: [Int] -> Id -> DAWG a b -> Maybe Int
index'I [] i d = 0 <$ value (nodeBy i d)
index'I (x:xs) i d = case onSym x n of
Just j -> do
x0 <- index'I xs j d
let x1 = maybe 0 (const 1) (value n)
+ (sum . map sizeBy) (before (x, j))
return $ x0 + x1
Nothing -> Nothing
where
n = nodeBy i d
sizeBy = size . flip nodeBy d
before e = map snd . fst $ span (/=e) (edgeList n)
hash :: Enum a => [a] -> DAWG a b -> Maybe Int
hash = index
byIndex :: Enum a => Int -> DAWG a b -> Maybe [a]
byIndex i d = map toEnum <$> byIndex'I i 0 d
byIndex'I :: Int -> Id -> DAWG a b -> Maybe [Int]
byIndex'I ix i d = do
(acc, x, j) <- findChild 0 (edgeList n)
xs <- byIndex'I (ix acc) j d
return (x:xs)
where
n = nodeBy i d
sizeBy = size . flip nodeBy d
findChild acc ((x, j) : js)
| acc < ix = findChild (acc + sizeBy j) js
| otherwise = Just (acc, x, j)
findChild _ [] = Nothing
unHash :: Enum a => Int -> DAWG a b -> Maybe [a]
unHash = byIndex