module Data.DAWG.Static
(
DAWG
, lookup
, numStates
, index
, byIndex
, hash
, unHash
, empty
, fromList
, fromListWith
, fromLang
, freeze
, Weight
, weigh
, assocs
, keys
, elems
) where
import Prelude hiding (lookup)
import Control.Applicative ((<$), (<$>), (<|>))
import Control.Arrow (first)
import Data.Binary (Binary, put, get)
import Data.Vector.Binary ()
import Data.Vector.Unboxed (Unbox)
import qualified Data.IntMap as M
import qualified Data.Vector as V
import qualified Data.Vector.Unboxed as U
import Data.DAWG.Types
import Data.DAWG.Trans (Trans)
import Data.DAWG.Node (Node)
import qualified Data.DAWG.Trans as T
import qualified Data.DAWG.Trans.Vector as VT
import qualified Data.DAWG.Node as N
import qualified Data.DAWG.Graph as G
import qualified Data.DAWG.Internal as D
import qualified Data.DAWG.Util as Util
newtype DAWG t a b c = DAWG { unDAWG :: V.Vector (Node t b c) }
deriving (Show)
deriving instance (Eq b, Eq c, Unbox b) => Eq (DAWG VT.Trans a b c)
deriving instance (Ord b, Ord c, Unbox b) => Ord (DAWG VT.Trans a b c)
instance (Binary t, Binary b, Binary c, Unbox b) => Binary (DAWG t a b c) where
put = put . unDAWG
get = DAWG <$> get
empty :: (Trans t, Unbox b) => DAWG t a b c
empty = DAWG $ V.fromList
[ N.Branch 1 T.empty U.empty
, N.Leaf Nothing ]
numStates :: DAWG t a b c -> Int
numStates = V.length . unDAWG
nodeBy :: ID -> DAWG t a b c -> Node t b c
nodeBy i d = unDAWG d V.! i
leafValue :: Node t b c -> DAWG t a b c -> Maybe c
leafValue n = N.value . nodeBy (N.eps n)
lookup :: (Enum a, Trans t, Unbox b) => [a] -> DAWG t a b c -> Maybe c
lookup xs' =
let xs = map fromEnum xs'
in lookup'I xs 0
lookup'I :: (Trans t, Unbox b) => [Sym] -> ID -> DAWG t a b c -> Maybe c
lookup'I [] i d = leafValue (nodeBy i d) d
lookup'I (x:xs) i d = case N.onSym x (nodeBy i d) of
Just j -> lookup'I xs j d
Nothing -> Nothing
assocs :: (Enum a, Trans t, Unbox b) => DAWG t a b c -> [([a], c)]
assocs d = map (first (map toEnum)) (assocs'I 0 d)
assocs'I :: (Trans t, Unbox b) => ID -> DAWG t a b c -> [([Sym], c)]
assocs'I i d =
here ++ concatMap there (N.edges n)
where
n = nodeBy i d
here = case leafValue n d of
Just x -> [([], x)]
Nothing -> []
there (x, j) = map (first (x:)) (assocs'I j d)
keys :: (Enum a, Trans t, Unbox b) => DAWG t a b c -> [[a]]
keys = map fst . assocs
elems :: (Trans t, Unbox b) => DAWG t a b c -> [c]
elems = map snd . assocs'I 0
fromList
:: (Enum a, D.MkNode t b)
=> [([a], b)] -> DAWG t a () b
fromList = freeze . D.fromList
fromListWith
:: (Enum a, D.MkNode t b)
=> (b -> b -> b) -> [([a], b)] -> DAWG t a () b
fromListWith f = freeze . D.fromListWith f
fromLang
:: (Enum a, D.MkNode t ())
=> [[a]] -> DAWG t a () ()
fromLang = freeze . D.fromLang
type Weight = Int
weigh :: Trans t => DAWG t a b c -> DAWG t a Weight c
weigh d = (DAWG . V.fromList)
[ branch n ws
| i <- [0 .. numStates d 1]
, let n = nodeBy i d
, let ws = accum (N.children n) ]
where
branch N.Branch{..} ws = N.Branch eps transMap ws
branch N.Leaf{..} _ = N.Leaf value
nodeWeight = ((V.!) . V.fromList) (map detWeight [0 .. numStates d 1])
detWeight i = case nodeBy i d of
N.Leaf w -> maybe 0 (const 1) w
n -> sum . map nodeWeight $ allChildren n
accum = U.fromList . init . scanl (+) 0 . map nodeWeight
allChildren n = N.eps n : N.children n
freeze :: Trans t => D.DAWG t a b -> DAWG t a () b
freeze d = DAWG . V.fromList $
map (N.reID newID . oldBy)
(M.elems (inverse old2new))
where
old2new = M.fromList $ (D.root d, 0) : zip (nodeIDs d) [1..]
newID = (M.!) old2new
nodeIDs = filter (/= D.root d) . map fst . M.assocs . G.nodeMap . D.graph
oldBy i = G.nodeBy i (D.graph d)
inverse :: M.IntMap Int -> M.IntMap Int
inverse =
let swap (x, y) = (y, x)
in M.fromList . map swap . M.toList
index :: (Enum a, Trans t) => [a] -> DAWG t a Weight c -> Maybe Int
index xs = index'I (map fromEnum xs) 0
index'I :: Trans t => [Sym] -> ID -> DAWG t a Weight c -> Maybe Int
index'I [] i d = 0 <$ leafValue (nodeBy i d) d
index'I (x:xs) i d = do
let n = nodeBy i d
u = maybe 0 (const 1) (leafValue n d)
(j, v) <- N.onSym' x n
w <- index'I xs j d
return (u + v + w)
hash :: (Enum a, Trans t) => [a] -> DAWG t a Weight c -> Maybe Int
hash = index
byIndex :: (Enum a, Trans t) => Int -> DAWG t a Weight c -> Maybe [a]
byIndex ix d = map toEnum <$> byIndex'I ix 0 d
byIndex'I :: Trans t => Int -> ID -> DAWG t a Weight c -> Maybe [Sym]
byIndex'I ix i d
| ix < 0 = Nothing
| otherwise = here <|> there
where
n = nodeBy i d
u = maybe 0 (const 1) (leafValue n d)
here
| ix == 0 = [] <$ leafValue (nodeBy i d) d
| otherwise = Nothing
there = do
(k, w) <- Util.findLastLE cmp (N.labelVect n)
(x, j) <- T.byIndex k (N.transMap n)
xs <- byIndex'I (ix u w) j d
return (x:xs)
cmp w = compare w (ix u)
unHash :: (Enum a, Trans t) => Int -> DAWG t a Weight c -> Maybe [a]
unHash = byIndex