module Data.DAWG.Packed (
Node
, fromList
, fromAscList
, fromFile
, char
, endOfWord
, root
, children
, lookupPrefixBy
, lookupPrefix
, memberBy
, member
, toList
, toFile
, pack
, unpack
, NodeVector
, nodeVector
, endOfList
, childIndex
, getNodeAt
) where
import qualified Data.Vector.Unboxed as V
import qualified Data.HashMap.Strict as HM
import qualified Control.Monad.State.Strict as S
import Control.DeepSeq
import Control.Arrow
import Data.Binary
import Data.Vector.Binary
import Data.List (foldl', sort, find)
import Data.Bits
import Data.Word
import Data.Char
import Text.Printf
type NodeVector = V.Vector Word32
data Node = Node {
nodeVector :: !NodeVector,
childIndex :: !Word32,
char :: !Char,
endOfList :: !Bool,
endOfWord :: !Bool} deriving Eq
instance Show Node where
show (Node _ chi val eol eow) = printf
"Node {childIndex = %d, char = %c, endOfList = %s, endOfWord: = %s}"
chi val (show eol) (show eow)
instance Binary Node where
put (Node v chi val eol eow) = put (v, chi, val, eol, eow)
get = do
(v, chi, val, eol, eow) <- get
return (Node v chi val eol eow)
instance NFData Node where
rnf (Node v chi val eol eow) =
rnf v `seq` rnf chi `seq` rnf val `seq` rnf eol `seq` rnf eow `seq` ()
pack :: Char -> Bool -> Bool -> Int -> Word32
pack !val !eol !eow !chi =
fromIntegral (
(chi `shiftL` 10)
.|. (ord val `shiftL` 2)
.|. (fromEnum eol `shiftL` 1)
.|. (fromEnum eow))
unpack :: Word32 -> NodeVector -> Node
unpack !n !v = Node {
nodeVector = v,
childIndex = (n .&. 4294966272) `shiftR` 10,
char = chr $ fromIntegral $ (n .&. 1020) `shiftR` 2,
endOfList = ((n .&. 2) `shiftR` 1) == 1,
endOfWord = (n .&. 1) == 1}
root :: Node -> Node
root !(Node{nodeVector=v}) = unpack (V.unsafeLast v) v
getNodeAt :: NodeVector -> Word32 -> Node
getNodeAt !v !i = unpack (V.unsafeIndex v (fromIntegral i)) v
children :: Node -> [Node]
children !(Node v chi _ _ _)
| chi == 0 = []
| otherwise = go chi where
go !i | endOfList n = [n]
| otherwise = n : go (i + 1)
where n = getNodeAt v i
lookupPrefixBy :: (Char -> Char -> Ordering) -> String -> Node -> Maybe Node
lookupPrefixBy p = go where
go !(x:xs) !n = go xs =<< findNode p x n
go _ !n = Just n
findNode p c n = go' (children n) where
go' (n:ns) = case p c (char n) of
LT -> go' ns
EQ -> Just n
GT -> Nothing
go' _ = Nothing
lookupPrefix :: String -> Node -> Maybe Node
lookupPrefix = lookupPrefixBy compare
memberBy :: (Char -> Char -> Ordering) -> String -> Node -> Bool
memberBy p !xs !n = maybe False endOfWord $ lookupPrefixBy p xs n
member :: String -> Node -> Bool
member = memberBy compare
data Trie = TrieNode {
eow :: !Bool,
val :: !Char,
chd :: ![Trie]}
insert :: String -> Trie -> Trie
insert [] !n = n {eow = True}
insert (x:xs) !n@TrieNode{chd = chd}
| c:cs <- chd,
val c == x = n {chd = insert xs c :cs}
| otherwise = n {chd = insert xs (TrieNode False x []) :chd}
mkTrie :: [String] -> Trie
mkTrie = foldl' (flip insert) (TrieNode False '\0' [])
fromFile :: FilePath -> IO Node
fromFile = decodeFile
toFile :: FilePath -> Node -> IO ()
toFile = encodeFile
toList :: Node -> [String]
toList n = ["" | endOfWord n] ++ (go =<< children n) where
go n = [[char n] | endOfWord n] ++ (map (char n:) . go =<< children n)
reduce :: Trie -> S.State (HM.HashMap [Word32] Int, Int) Word32
reduce !node@TrieNode{..} = do
xs <- mapM reduce chd
(chiMap, i) <- S.get
let proc = \case [] -> (0, [])
[x] -> (1, [x .|. 2])
x:xs -> (succ *** (x:)) (proc xs)
(len, xs') = proc xs
pack val False eow `fmap` maybe
(S.put (HM.insert xs' (i + 1) chiMap, i + len)
>> return (i + 1))
(return)
(HM.lookup xs' chiMap)
trieToNode :: Trie -> Node
trieToNode trie = let
(root, (m, i)) = S.runState (reduce trie) (HM.singleton [] 0, 0)
assocs = (i + 1, root .|. 2): [(i', x) | (xs, i) <- HM.toList m,
(i', x) <- zip [i..] xs]
vec = V.unsafeAccum (flip const) (V.replicate (i + 2) 0) assocs
in unpack (V.unsafeLast vec) vec
fromAscList :: [String] -> Node
fromAscList = trieToNode . mkTrie
fromList :: [String] -> Node
fromList = fromAscList . sort