module Data.RadixTree
( RadixTree (..)
, RadixNode (..)
, CompressedRadixTree
, fromFoldable
, compressBy
, RadixParsing (..)
, search
) where
import Control.Applicative
import Control.DeepSeq
import Data.Data (Data, Typeable)
import Data.Foldable (asum, foldr', toList)
import Data.Map.Strict (Map)
import qualified Data.Map.Strict as M
import Data.Monoid
import Data.Sequence (Seq)
import qualified Data.Sequence as Seq
import Data.Store ()
import Data.Store.TH (makeStore)
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Array as TI (Array)
import qualified Data.Text.Internal as TI (Text (..), text)
import Data.Vector (Vector)
import qualified Data.Vector as V
import Lens.Micro
import Text.Parser.Char (CharParsing (anyChar, text))
import Text.Parser.Combinators (Parsing (try))
data PrefixNode a = Accept !Text !a | Skip !a
deriving (Show, Eq)
newtype Trie = Trie (PrefixNode (Map Char Trie))
deriving (Show, Eq)
newtype CompressedTrie = CompressedTrie (PrefixNode (Map (Seq Char) CompressedTrie))
deriving (Show, Eq)
node :: Lens (PrefixNode a) (PrefixNode b) a b
node = lens
(\x -> case x of { Accept _ t -> t; Skip t -> t })
(\x a -> case x of { Accept l _ -> Accept l a; Skip _ -> Skip a })
leaf :: Text -> Text -> Trie
leaf ft t = go (T.unpack t)
where
go (x:xs) = Trie (Skip (M.singleton x (go xs)))
go [] = Trie (Accept ft M.empty)
insert :: Text -> Text -> Trie -> Trie
insert ft text' (Trie n) = case T.uncons text' of
Just (c, cs) -> Trie ((node %~
M.insertWith
(\_ orig -> Data.RadixTree.insert ft cs orig)
c
(leaf ft cs)) n)
Nothing ->
Trie (n^.node.to (Accept ft))
makeCompressable :: Trie -> CompressedTrie
makeCompressable (Trie n) = CompressedTrie (
over node (M.map makeCompressable . M.mapKeysMonotonic Seq.singleton) n)
compress :: Trie -> CompressedTrie
compress = go . makeCompressable
where
go :: CompressedTrie -> CompressedTrie
go (CompressedTrie n) = case n of
Accept l m -> CompressedTrie (Accept l (M.map go m))
Skip m -> CompressedTrie (Skip (M.foldMapWithKey compress1 m))
compress1 :: Seq Char -> CompressedTrie -> Map (Seq Char) CompressedTrie
compress1 k c@(CompressedTrie n) =
case M.size sm of
0 -> M.singleton k c
1 | Skip _ <- n -> compress1 (k <> k') sm'
where (k', sm') = M.findMax sm
_ -> M.singleton k (go (n & node .~ sm & CompressedTrie))
where sm = n^.node
data RadixNode = RadixNode !Text !RadixTree
deriving (Eq, Show, Typeable, Data)
data RadixTree
=
RadixAccept
!Text
!(Vector RadixNode)
| RadixSkip
!(Vector RadixNode)
deriving (Eq, Show, Typeable, Data)
instance NFData RadixNode where
rnf (RadixNode l t) = rnf l `seq` rnf t
instance NFData RadixTree where
rnf (RadixAccept t v) = t `seq` rnf v
rnf (RadixSkip v) = rnf v
fromTrie :: Trie -> RadixTree
fromTrie = go . compress
where
!z = V.empty
radixNode :: Seq Char -> CompressedTrie -> RadixNode
radixNode l t = RadixNode (T.pack (toList l)) (go t)
mapToVector :: Map k a -> Vector (k, a)
mapToVector m = case M.size m of
0 -> z
sz -> V.fromListN sz (M.toList m)
go :: CompressedTrie -> RadixTree
go (CompressedTrie n) = case n of
Accept l m -> RadixAccept l . V.map (uncurry radixNode) . mapToVector $! m
Skip m -> RadixSkip . V.map (uncurry radixNode) . mapToVector $! m
data TextSlice = TextSlice
{ tsOffset16 :: !Int
, tsLength16 :: !Int
}
magicallySaveSpaceSometimes :: Text -> Text -> Maybe TextSlice
magicallySaveSpaceSometimes full s@(TI.Text _ _ slen) =
case T.breakOn s full of
(TI.Text{}, r@(TI.Text _ remoffs _))
| T.null r -> Nothing
| otherwise -> Just TextSlice{tsOffset16 = remoffs, tsLength16 = slen}
data CompressedRadixTree
= CompressedRadixTree !TI.Array !CompressedRadixTree1
data CompressedRadixTree1
= CompressedRadixAccept
!TextSlice
!(Vector CompressedRadixNode)
| CompressedRadixSkip !(Vector CompressedRadixNode)
data CompressedRadixNode
= CompressedRadixNode !TextSlice !CompressedRadixTree1
instance NFData CompressedRadixNode where
rnf (CompressedRadixNode ts t) = ts `seq` rnf t
instance NFData CompressedRadixTree where
rnf (CompressedRadixTree arr v) = arr `seq` rnf v
instance NFData CompressedRadixTree1 where
rnf (CompressedRadixAccept ts v) = ts `seq` rnf v
rnf (CompressedRadixSkip v) = rnf v
compressBy :: Text -> RadixTree -> Maybe CompressedRadixTree
compressBy full@(TI.Text arr _ _) rt =
CompressedRadixTree arr <$> recompressT rt
where
magic = magicallySaveSpaceSometimes full
recompressN :: RadixNode -> Maybe CompressedRadixNode
recompressN (RadixNode t tree) = CompressedRadixNode <$> magic t <*> recompressT tree
recompressT :: RadixTree -> Maybe CompressedRadixTree1
recompressT (RadixSkip v) = CompressedRadixSkip <$> V.mapM recompressN v
recompressT (RadixAccept t v) = CompressedRadixAccept <$> magic t <*> V.mapM recompressN v
fromFoldable :: Foldable f => f Text -> RadixTree
fromFoldable =
fromTrie . foldr' (\t -> insert t t) (Trie (Skip M.empty))
makeStore ''RadixNode
makeStore ''RadixTree
class RadixParsing radixtree where
parse :: CharParsing m => radixtree -> m Text
search :: (Monad m, CharParsing m, RadixParsing radixtree)
=> radixtree -> m [Text]
search r = go
where
go =
(parse r >>= \x -> (x:) <$> go) <|>
(anyChar >> go) <|>
return []
instance RadixParsing RadixTree where
parse :: CharParsing m => RadixTree -> m Text
parse = go
where
go r = case r of
RadixAccept l nodes
| T.null l -> empty
| otherwise -> asum (V.map parseRadixNode nodes) <|> pure l
RadixSkip nodes -> asum (V.map parseRadixNode nodes)
parseRadixNode (RadixNode prefix tree)
| T.null prefix = go tree
| otherwise = try (text prefix *> go tree)
instance RadixParsing CompressedRadixTree where
parse :: CharParsing m => CompressedRadixTree -> m Text
parse (CompressedRadixTree arr crt) = go crt
where
fromSlice (TextSlice offs len) = TI.text arr offs len
go r = case r of
CompressedRadixAccept ts nodes -> case fromSlice ts of
l | T.null l -> empty
| otherwise -> asum (V.map parseRadixNode nodes) <|> pure l
CompressedRadixSkip nodes -> asum (V.map parseRadixNode nodes)
parseRadixNode (CompressedRadixNode ts tree) = case fromSlice ts of
prefix | T.null prefix -> go tree
| otherwise -> try (text prefix *> go tree)