{-# LANGUAGE TypeFamilies, PatternGuards #-} -- | @TrieQueue e@ is a priority queue @IQueue@ instance satisfying @QueueKey (TrieQueue e) ~ [e]@, with the property that this queue frequently performs better than any other queue -- implementation in this package for keys of type @[e]@. -- -- This particular implementation is highly experimental and possibly a genuinely new data structure. See the source code for details. -- However, for many cases this priority queue may be used for a heap sort that runs faster than the "Data.List" implementation, -- or the vanilla "Data.Queue.PQueue" implementation. module Data.Queue.TrieQueue (TrieQueue) where import Control.Arrow((***)) import Control.Monad import Data.Monoid import Data.Maybe import Data.Queue.Class import Data.Queue.QueueHelpers(fusing') import Data.Queue.TrieQueue.Edge import Data.Queue.TrieQueue.MonoidQueue import Data.Queue.TrieQueue.TrieLabel import GHC.Exts import Prelude hiding (null) -- On the back end it uses something called a /monoid queue/, -- which takes ordered keys associated with monoid values and returns (k, m') pairs where m' is the concatenation of every monoid value associated with k, -- with no guarantees made upon the order of the concatenation. Essentially, it is a priority queue which internally "merges" values with equal keys. -- See Data.Queue.TrieQueue.MonoidQueue for details and a list of alternative implementations. -- After some experimentation, trie edge labels are currently implemented as vanilla lists; however, the implementation is modularized in -- Data.Queue.TrieQueue.EdgeLabel. (Other possible implementations include mergeable deques and Data.Sequence finger trees.) -- A trie, now, consists of an edge label xs, the number of strings ending with that label, and a monoid queue associating characters in the string to -- tries consisting of strings prefixed by that character. This is the key variation in this implementation, and it exploits the fact that -- random-access string lookup is not required in a priority queue: only extract-min and insert, operations perfectly well suited to a monoid queue. -- Note that the monoid values in the monoid queue are themselves tries, which get recursively merged as necessary. data Trie e = Trie (Label e) {-# UNPACK #-} !Int (MQueue e (Trie e)) deriving (Show) data TrieQueue e = TQ Int (Maybe (Trie e)) deriving (Show) -- This monoid instance can now get exploited for great justice by the monoid queue. instance Ord e => Monoid (Trie e) where mempty = Trie mempty 0 mempty mappend = mergeTrie mconcat = fromMaybe mempty . mergeTries {-# INLINE forceOrd #-} forceOrd :: Ord e => Trie e -> x -> x forceOrd t x = cmp t `seq` x where cmp :: Ord e => Trie e -> (e -> e -> Ordering) cmp _ = compare catTrie :: Ord e => Label e -> Trie e -> Trie e xs `catTrie` Trie ys yn yQ = Trie (xs `mappend` ys) yn yQ consTrie :: Ord e => e -> Trie e -> Trie e x `consTrie` Trie xs xn xQ = Trie (x `cons` xs) xn xQ mergeTrie :: Ord e => Trie e -> Trie e -> Trie e xT@(Trie xs0 xn xQ) `mergeTrie` yT@(Trie ys0 yn yQ) = merging xs0 ys0 split (tail xT yT) (tail yT xT) xy where end (Trie _ xn xQ) x xs = x :- Trie xs xn xQ split pfx x xs y ys = let xEnd = end xT x xs; yEnd = end yT y ys in Trie pfx 0 (xEnd `insert` singleton yEnd) tail (Trie xs xn xQ) yT y ys = let yEnd = end yT y ys in Trie xs xn (yEnd `insert` xQ) xy = Trie xs0 (xn + yn) (xQ `merge` yQ) {-# INLINE compactTrie #-} compactTrie :: Ord e => Trie e -> Maybe (Trie e) compactTrie (Trie xs 0 xQ) | null xQ = Nothing | Just (y :- t) <- extractSingle xQ = Just (xs `catTrie` (y `consTrie` t)) compactTrie t = Just t data Acc e = A {-# UNPACK #-} !Int e -- Note that a monoid queue is built up and (sometimes) torn down for each character. If every label on every trie being merged matches -- on the first character, then the monoid queue simply automatically becomes a singleton, a case handled by compactTrie with a specialized -- implementation based on extractSingle. If the labels do not match, or there are tries being merged with empty labels, -- then the monoid queue is exactly what we needed anyway. mergeTries :: Ord e => [Trie e] -> Maybe (Trie e) mergeTries ts0 = compactTrie (Trie mempty nEmpty ([x :- Trie xs xn xQ | Trie (x:xs) xn xQ <- ts0] `insertAll` mergeAll qs)) where A nEmpty qs = foldr procEmpty (A 0 []) ts0 procEmpty (Trie [] n q) (A nEmpty qs) = A (n + nEmpty) (q:qs) procEmpty _ acc = acc --mergeTries = fusing' mergeTrie {-# INLINE fin #-} fin :: Ord e => Trie e -> Maybe (Trie e) fin (Trie _ 0 q) | null q = Nothing fin t = Just t -- If there are strings ending at this label, we obviously process those. Otherwise, we recurse to the first hanging trie from the monoid queue. -- If it is not exhausted, then we can simply replace the value in the monoid queue; if it is exhausted we may possibly compact the trie -- (e.g. if there is now only one child trie and we may in fact combine those edges). extractTrie :: Ord e => Trie e -> (Label e, Maybe (Trie e)) extractTrie (Trie xs (n+1) xQ) = (xs, inline compactTrie (Trie xs n xQ)) extractTrie (Trie xs 0 xQ) | Just (y :- t, xQ') <- extract xQ, (ys, t') <- extractTrie t = (xs `mappend` (y `cons` ys), case t' of Nothing -> inline compactTrie (Trie xs 0 xQ') Just t' -> fin (Trie xs 0 $ replace (y :- t') xQ)) extractTrie _ = error "Failure to detect empty queue" instance Ord e => Monoid (TrieQueue e) where mempty = TQ 0 Nothing TQ n1 t1 `mappend` TQ n2 t2 = TQ (n1 + n2) (t1 `mappend` t2) mconcat ts = TQ (sum [n | TQ n _ <- ts]) (mergeTries [t | TQ _ (Just t) <- ts]) instance Ord e => IQueue (TrieQueue e) where type QueueKey (TrieQueue e) = [e] empty = mempty merge = mappend mergeAll = mconcat singleton = TQ 1 . Just . single insertAll = mappend . fromListTrie fromList = fromListTrie extract (TQ n t) = fmap ((labelToList *** TQ (n-1)) . extractTrie) t null (TQ _ Nothing) = True null _ = False size (TQ n _) = n fromListTrie :: Ord e => [[e]] -> TrieQueue e fromListTrie = liftM2 TQ length (mergeTries . map single) single :: Ord e => [e] -> Trie e single xs = Trie (labelFromList xs) 1 mempty