{-# LANGUAGE TypeFamilies, PatternGuards, GeneralizedNewtypeDeriving #-} -- | @TrieQueue e@ is a priority queue @IQueue@ instance satisfying @QueueKey (TrieQueue e) ~ [e]@, with the property that this queue frequently performs better than any other queue -- implementation in this package for keys of type @[e]@. -- -- This particular implementation is highly experimental and possibly a genuinely new data structure. See the source code for details. -- However, for many cases this priority queue may be used for a heap sort that runs faster than the "Data.List" implementation, -- or the vanilla "Data.Queue.PQueue" implementation. module Data.Queue.TrieQueue (TrieQueue) where import Control.Arrow((***)) import Control.Monad import Data.Semigroup --import Data.Monoid import Data.Maybe import Data.Queue.Class import Data.Queue.QueueHelpers import Data.Queue.Fuse.PHeap --import Data.Queue.TrieQueue.Edge --import Data.Queue.TrieQueue.MonoidQueue import Data.Queue.TrieQueue.TrieLabel import GHC.Exts import Prelude hiding (null) -- On the back end it uses something called a /monoid queue/, -- which takes ordered keys associated with monoid values and returns (k, m') pairs where m' is the concatenation of every monoid value associated with k, -- with no guarantees made upon the order of the concatenation. Essentially, it is a priority queue which internally "merges" values with equal keys. -- See Data.Queue.Fuse.PHeap for details and a list of alternative implementations. -- After some experimentation, trie edge labels are currently implemented as vanilla lists; however, the implementation is modularized in -- Data.Queue.TrieQueue.EdgeLabel. (Other possible implementations include mergeable deques and Data.Sequence finger trees.) -- A trie, now, consists of an edge label xs, the number of strings ending with that label, and a monoid queue associating characters in the string to -- tries consisting of strings prefixed by that character. This is the key variation in this implementation, and it exploits the fact that -- random-access string lookup is not required in a priority queue: only extract-min and insert, operations perfectly well suited to a monoid queue. -- Note that the monoid values in the monoid queue are themselves tries, which get recursively merged as necessary. data Trie e = Trie (Label e) {-# UNPACK #-} !Int (FusePHeap e (Trie e)) deriving (Show) newtype TrieQueue e = TQ (HeapQ (Trie e)) deriving (Monoid, Show) -- This monoid instance can now get exploited for great justice by the monoid queue. instance Ord e => Semigroup (Trie e) where sappend = mergeTrie sconcat = mergeTries --{-# INLINE forceOrd #-} {-forceOrd :: Ord e => Trie e -> x -> x forceOrd t x = cmp t `seq` x where cmp :: Ord e => Trie e -> (e -> e -> Ordering) cmp _ = compare-} catTrie :: Ord e => Label e -> Trie e -> Trie e xs `catTrie` Trie ys yn yQ = Trie (xs `mappend` ys) yn yQ consTrie :: Ord e => e -> Trie e -> Trie e x `consTrie` Trie xs xn xQ = Trie (x `cons` xs) xn xQ mergeTrie :: Ord e => Endo (Trie e) xT@(Trie xs0 xn xQ) `mergeTrie` yT@(Trie ys0 yn yQ) = merging xs0 ys0 split (tail xT yT) (tail yT xT) xy where end (Trie _ xn xQ) x xs = (x, Trie xs xn xQ) split pfx x xs y ys = let xEnd = end xT x xs; yEnd = end yT y ys in Trie pfx 0 (xEnd `insert` singleton yEnd) tail (Trie xs xn xQ) yT y ys = let yEnd = end yT y ys in Trie xs xn (yEnd `insert` xQ) xy = Trie xs0 (xn + yn) (xQ `merge` yQ) --{-# INLINE compactTrie #-} compactTrie :: Ord e => Trie e -> Maybe (Trie e) compactTrie (Trie xs 0 xQ) | null xQ = Nothing | Just (y, t) <- extractSingle xQ = Just (xs `catTrie` (y `consTrie` t)) compactTrie t = Just t data Acc e f = A {-# UNPACK #-} !Int e f -- Note that a monoid queue is built up and (sometimes) torn down for each character. If every label on every trie being merged matches -- on the first character, then the monoid queue simply automatically becomes a singleton, a case handled by compactTrie with a specialized -- implementation based on extractSingle. If the labels do not match, or there are tries being merged with empty labels, -- then the monoid queue is exactly what we needed anyway. mergeTries :: Ord e => Fusion (Trie e) mergeTries ts0 = compactTrie (Trie mempty nEmpty (combine es qs)) where combine es qs = es `insertAll` mergeAll qs A nEmpty qs es = foldl procEmpty (A 0 [] []) ts0 A nEmpty qs es `procEmpty` Trie xs n q = case uncons xs of Nothing -> A (n + nEmpty) (q:qs) es Just (x, xs) -> A nEmpty qs ((x, Trie xs n q):es) --mergeTries = fusing' mergeTrie {-# INLINE fin #-} fin :: Ord e => Trie e -> Maybe (Trie e) fin (Trie _ 0 q) | null q = Nothing fin t = Just t -- If there are strings ending at this label, we obviously process those. Otherwise, we recurse to the first hanging trie from the monoid queue. -- If it is not exhausted, then we can simply replace the value in the monoid queue; if it is exhausted we may possibly compact the trie -- (e.g. if there is now only one child trie and we may in fact combine those edges). extractTrie :: Ord e => Trie e -> (Label e, Maybe (Trie e)) extractTrie (Trie xs (n+1) xQ) = (xs, inline compactTrie (Trie xs n xQ)) extractTrie (Trie xs 0 xQ) | Just (y, t) <- top xQ, (ys, t') <- extractTrie t = (xs `mappend` (y `cons` ys), case t' of Nothing -> delete xQ >>= inline compactTrie . Trie xs 0 Just t' -> Just (Trie xs 0 $ replace t' xQ)) extractTrie _ = error "Failure to detect empty queue" instance Ord e => IQueue (TrieQueue e) where type QueueKey (TrieQueue e) = [e] empty = mempty merge = mappend mergeAll = mconcat singleton = TQ . HQ 1 . pJust . single insertAll = mappend . fromListTrie fromList = fromListTrie extract (TQ (HQ n (Pt t))) = fmap ((labelToList *** (TQ . HQ (n-1) . Pt)) . extractTrie) t null (TQ (HQ _ (Pt Nothing))) = True null _ = False size (TQ (HQ n _)) = n toList (TQ (HQ _ (Pt t))) = maybe [] trieToList t where trieToList (Trie xs xn xQ) = replicate xn xs ++ [xs ++ y:ys | (y, t) <- toList xQ, ys <- trieToList t] toList_ (TQ (HQ _ (Pt t))) = maybe [] trieToList_ t where trieToList_ (Trie xs xn xQ) = replicate xn xs ++ [xs ++ y:ys | (y, t) <- toList_ xQ, ys <- trieToList_ t] fromListTrie :: Ord e => [[e]] -> TrieQueue e fromListTrie = TQ . liftM2 HQ length (Pt . mergeTries . map single) single :: Ord e => [e] -> Trie e single xs = Trie (labelFromList xs) 1 empty