module Data.DAWG.Static
(
DAWG (..)
, lookup
, numStates
, index
, byIndex
, hash
, unHash
, empty
, fromList
, fromListWith
, fromLang
, freeze
, Weight
, weigh
, assocs
, keys
, elems
) where
import Prelude hiding (lookup)
import Control.Applicative ((<$), (<$>), (<*>), (<|>))
import Control.Arrow (first, second)
import Data.Binary (Binary, put, get)
import Data.Vector.Binary ()
import Data.Vector.Unboxed (Unbox)
import qualified Data.IntMap as M
import qualified Data.Vector as V
import qualified Data.DAWG.VMap as VM
import qualified Data.DAWG.Internal as I
import qualified Data.DAWG as D
type Id = Int
type Sym = Int
type Edge a = (Id, a)
to :: Edge a -> Id
to = fst
label :: Edge a -> a
label = snd
annotate :: a -> Edge b -> Edge a
annotate x (i, _) = (i, x)
labeled :: a -> Id -> Edge a
labeled x i = (i, x)
data Node a b = Node {
value :: !a
, edgeMap :: !(VM.VMap (Edge b)) }
deriving (Show, Eq, Ord)
instance (Unbox b, Binary a, Binary b) => Binary (Node a b) where
put Node{..} = put value >> put edgeMap
get = Node <$> get <*> get
onSym :: Unbox b => Sym -> Node a b -> Maybe (Edge b)
onSym x (Node _ es) = VM.lookup x es
trans :: Unbox b => Node a b -> [(Sym, Edge b)]
trans = VM.toList . edgeMap
edges :: Unbox b => Node a b -> [Edge b]
edges = map snd . trans
children :: Unbox b => Node a b -> [Id]
children = map to . edges
newtype DAWG a b c = DAWG { unDAWG :: V.Vector (Node (Maybe b) c) }
empty :: Unbox c => DAWG a b c
empty = DAWG $ V.singleton (Node Nothing VM.empty)
numStates :: DAWG a b c -> Int
numStates = V.length . unDAWG
nodeBy :: Id -> DAWG a b c -> Node (Maybe b) c
nodeBy i d = unDAWG d V.! i
lookup :: (Unbox c, Enum a) => [a] -> DAWG a b c -> Maybe b
lookup xs' =
let xs = map fromEnum xs'
in lookup'I xs 0
lookup'I :: Unbox c => [Sym] -> Id -> DAWG a b c -> Maybe b
lookup'I [] i d = value (nodeBy i d)
lookup'I (x:xs) i d = case onSym x (nodeBy i d) of
Just e -> lookup'I xs (to e) d
Nothing -> Nothing
assocs :: (Enum a, Unbox c) => DAWG a b c -> [([a], b)]
assocs d = map (first (map toEnum)) (assocs'I 0 d)
assocs'I :: Unbox c => Id -> DAWG a b c -> [([Sym], b)]
assocs'I i d =
here ++ concatMap there (trans n)
where
n = nodeBy i d
here = case value n of
Just x -> [([], x)]
Nothing -> []
there (x, e) = map (first (x:)) (assocs'I (to e) d)
keys :: (Unbox c, Enum a) => DAWG a b c -> [[a]]
keys = map fst . assocs
elems :: Unbox c => DAWG a b c -> [b]
elems = map snd . assocs'I 0
fromList :: (Enum a, Ord b) => [([a], b)] -> DAWG a b ()
fromList = freeze . D.fromList
fromListWith :: (Enum a, Ord b) => (b -> b -> b) -> [([a], b)] -> DAWG a b ()
fromListWith f = freeze . D.fromListWith f
fromLang :: Enum a => [[a]] -> DAWG a () ()
fromLang = freeze . D.fromLang
type Weight = Int
weigh :: Unbox c => DAWG a b c -> DAWG a b Weight
weigh d = (DAWG . V.fromList)
[ Node (value n) (apply ws (trans n))
| i <- [0 .. numStates d 1]
, let n = nodeBy i d
, let ws = accum (children n) ]
where
nodeWeight = ((V.!) . V.fromList) (map detWeight [0 .. numStates d 1])
detWeight i =
let n = nodeBy i d
js = children n
in add (value n) (map nodeWeight js)
add w x = maybe 0 (const 1) w + sum x
accum = init . scanl (+) 0 . map nodeWeight
apply ws ts = VM.fromList
[ (x, annotate w e)
| (w, (x, e)) <- zip ws ts ]
freeze :: D.DAWG a b -> DAWG a b ()
freeze d = DAWG . V.fromList $
map (stop . oldBy) (M.elems (inverse old2new))
where
old2new :: M.IntMap Int
old2new = M.fromList $ (D.root d, 0) : zip (nodeIDs d) [1..]
nodeIDs = filter (/= D.root d) . branchIDs
stop = Node <$> onEps <*> mkEdges . I.edgeMap
onEps = I.unValue . oldBy . I.eps
mkEdges = VM.fromList . map (second mkEdge) . VM.toList
mkEdge = labeled () . (old2new M.!)
oldBy i = I.nodeBy i (D.graph d)
branchIDs :: D.DAWG a b -> [I.Id]
branchIDs
= map fst . filter (isBranch . snd)
. M.assocs . I.nodeMap . D.graph
where
isBranch (I.Branch _ _) = True
isBranch _ = False
inverse :: M.IntMap Int -> M.IntMap Int
inverse =
let swap (x, y) = (y, x)
in M.fromList . map swap . M.toList
index :: Enum a => [a] -> DAWG a b Weight -> Maybe Int
index xs = index'I (map fromEnum xs) 0
index'I :: [Sym] -> Id -> DAWG a b Weight -> Maybe Int
index'I [] i d = 0 <$ value (nodeBy i d)
index'I (x:xs) i d = do
let n = nodeBy i d
v = maybe 0 (const 1) (value n)
e <- onSym x n
w <- index'I xs (to e) d
return (v + w + label e)
hash :: Enum a => [a] -> DAWG a b Weight -> Maybe Int
hash = index
byIndex :: Enum a => Int -> DAWG a b Weight -> Maybe [a]
byIndex ix d = map toEnum <$> byIndex'I ix 0 d
byIndex'I :: Int -> Id -> DAWG a b Weight -> Maybe [Sym]
byIndex'I ix i d
| ix < 0 = Nothing
| otherwise = here <|> there
where
n = nodeBy i d
v = maybe 0 (const 1) (value n)
here
| ix == 0 = [] <$ value (nodeBy i d)
| otherwise = Nothing
there = do
(x, e) <- VM.findLastLE cmp (edgeMap n)
xs <- byIndex'I (ix v label e) (to e) d
return (x:xs)
cmp e = compare (label e) (ix v)
unHash :: Enum a => Int -> DAWG a b Weight -> Maybe [a]
unHash = byIndex