{-# language CPP #-} {-# language BangPatterns, ScopedTypeVariables, UnboxedTuples, MagicHash #-} {-# language DeriveTraversable, StandaloneDeriving #-} {-# language PatternSynonyms #-} {-# language ViewPatterns #-} {-# language LambdaCase #-} {- OPTIONS_GHC -Wall #-} {- OPTIONS_GHC -ddump-simpl #-} module Data.CompactSequence.Queue.Internal where import qualified Data.CompactSequence.Internal.Array as A import Data.CompactSequence.Internal.Array (Array) import qualified Data.CompactSequence.Internal.Size as Sz import Data.CompactSequence.Internal.Size (Size, Twice) import qualified Data.Foldable as F import Data.Function (on) data FD n a = FD1 !(Array n a) | FD2 !(Array n a) !(Array n a) | FD3 !(Array n a) !(Array n a) !(Array n a) -- FD2 and FD3 are safe; FD1 is dangerous. data RD n a = RD0 | RD1 !(Array n a) | RD2 !(Array n a) !(Array n a) -- RD0 and RD1 are safe; RD2 is dangerous. -- Conceptually, we want something like -- -- data Queue n a = Empty | Node !(FD n a) (Queue n a) !(RD n a) -- -- Unfortunately, this is rather wasteful. The Node itself takes -- 4 words, and the digits combined take between 2 and 7. Total: -- between 6 and 11 words. By manually "unpacking" the digits, expanding -- the Queue to 10 constructors, we now have nodes taking -- between 3 and 7 words, a considerable improvement. This kind of -- unpacking, in general, can risk a loss of sharing, leading to -- increased allocation and (in the presence of persistence) increased -- residency. But that doesn't happen here! The worst case for the -- unpacked representation relative to the conceptual one is when -- the frost digit is 3 and we modify the rear digit. In that case, -- we have to copy the three front array pointers rather than a single -- front digit pointer. Consider, for example, changing a 0 digit -- to a 1 digit in the rear. For the conceptual representation, that -- allocates 4 words for the new Node plus 2 words for the new rear -- digit, for a total of 6 words. The unpacked representation -- allocates one word for the new node header, three words to copy the -- front, one word to copy the middle, and one word for the new rear. -- Total: 6. So in the case that's *worst* for the unpacked version, -- the unpacked version still breaks even in allocation, while -- winning the indirection game. So unpacked is the way to go. -- As long as we're doing it this way, we can bake the "no debits -- on children of unsafe nodes" invariant right into the constructors, -- preventing us from messing that up and as a side benefit avoiding -- some double forcing. data Queue n a = Empty | Node10 !(Array n a) !(Queue (Twice n) a) | Node11 !(Array n a) !(Queue (Twice n) a) !(Array n a) | Node12 !(Array n a) !(Queue (Twice n) a) !(Array n a) !(Array n a) | Node20 !(Array n a) !(Array n a) (Queue (Twice n) a) | Node21 !(Array n a) !(Array n a) (Queue (Twice n) a) !(Array n a) | Node22 !(Array n a) !(Array n a) !(Queue (Twice n) a) !(Array n a) !(Array n a) | Node30 !(Array n a) !(Array n a) !(Array n a) (Queue (Twice n) a) | Node31 !(Array n a) !(Array n a) !(Array n a) (Queue (Twice n) a) !(Array n a) | Node32 !(Array n a) !(Array n a) !(Array n a) !(Queue (Twice n) a) !(Array n a) !(Array n a) deriving (Functor, Foldable, Traversable) -- An Empty node is safe. -- A Node node is safe if both its digits are safe. We require that the child queue of an unsafe -- node be in WHNF, and allow no debits on it. -- -- -- To calculate the debit allowance of the child queue of a *safe* node: -- -- To each ancestor of the node, assign 1 if the node is safe and 0 if it is -- unsafe. Calculate the value of the binary number so obtained. For example, -- given -- -- Safe -- Safe -- Dangerous -- Safe -- Node -- -- the *safety value* above Node, sv(Node), is 1*1+1*2+0*4+1*8 = 11 -- -- We allow the child queue of a safe node four times its safety value (for some value of four). -- Gunk to define a `Node` pattern synonym to pretend we -- have real digits. Sadly, where we really want this most, -- it throws GHC's optimizer for a loop and it makes garbage -- code. data Queue_ n a = Empty_ | Node_ !(FD n a) (Queue (Twice n) a) !(RD n a) matchNode :: Queue n a -> Queue_ n a matchNode q = case q of Empty -> Empty_ Node10 x m -> Node_ (FD1 x) m RD0 Node11 x m a -> Node_ (FD1 x) m (RD1 a) Node12 x m a b -> Node_ (FD1 x) m (RD2 a b) Node20 x y m -> Node_ (FD2 x y) m RD0 Node21 x y m a -> Node_ (FD2 x y) m (RD1 a) Node22 x y m a b -> Node_ (FD2 x y) m (RD2 a b) Node30 x y z m -> Node_ (FD3 x y z) m RD0 Node31 x y z m a -> Node_ (FD3 x y z) m (RD1 a) Node32 x y z m a b -> Node_ (FD3 x y z) m (RD2 a b) {-# INLINE matchNode #-} pattern Node :: FD n a -> Queue (Twice n) a -> RD n a -> Queue n a pattern Node pr m sf <- (matchNode -> Node_ pr m sf) where Node (FD1 x) m RD0 = Node10 x m Node (FD1 x) m (RD1 a) = Node11 x m a Node (FD1 x) m (RD2 a b) = Node12 x m a b Node (FD2 x y) m RD0 = Node20 x y m Node (FD2 x y) m (RD1 a) = Node21 x y m a Node (FD2 x y) m (RD2 a b) = Node22 x y m a b Node (FD3 x y z) m RD0 = Node30 x y z m Node (FD3 x y z) m (RD1 a) = Node31 x y z m a Node (FD3 x y z) m (RD2 a b) = Node32 x y z m a b {-# COMPLETE Empty, Node #-} data ViewA n a = EmptyA | ConsA !(Array n a) (Queue n a) data ViewA2 n a = EmptyA2 | ConsA2 !(Array n a) !(Array n a) (Queue n a) singletonA :: Array n a -> Queue n a singletonA sa = Node (FD1 sa) Empty RD0 viewA :: Size n -> Queue n a -> ViewA n a -- Non-cascading viewA !_ Empty = EmptyA viewA !_ (Node (FD3 sa1 sa2 sa3) m sf) = ConsA sa1 $ Node (FD2 sa2 sa3) m sf viewA !_ (Node (FD2 sa1 sa2) m sf) = ConsA sa1 $ Node (FD1 sa2) m sf -- Potentially cascading viewA !n (Node (FD1 sa1) m (RD2 sa2 sa3)) = ConsA sa1 $ case shiftA n m sa2 sa3 of ShiftedA sam1 sam2 m' -> Node (FD2 sam1 sam2) m' RD0 viewA !n (Node (FD1 sa1) m sf) = ConsA sa1 $ case viewA (Sz.twice n) m of EmptyA -> case sf of RD2 sa2 sa3 -> Node (FD2 sa2 sa3) Empty RD0 RD1 sa2 -> singletonA sa2 RD0 -> Empty ConsA sam m' | (sam1, sam2) <- A.splitArray n sam -> Node (FD2 sam1 sam2) m' sf empty :: Queue n a empty = Empty {- We have some number of unsafe nodes followed by a safe node. Any operation that cascades will turn any node it passes into a safe one. Let's first see how debit allowances change. Initially, the prefix contributes no debit allowance. If the last node that changes was a safe one and it becomes unsafe, that reduces the debit allowance below it. All but a logarithmic amount of that reduction is offset by the changes from unsafe to safe nodes above. For each unsafe node, we may perform `s` splitting work and perform or suspend `s` appending work. For purposes of amortized analysis, we can pretend that we perform all of these eagerly. -} snocA :: Size n -> Queue n a -> Array n a -> Queue n a snocA !_ Empty sa = Node (FD1 sa) empty RD0 snocA !n (Node (FD1 sa0) m (RD2 sa1 sa2)) sa3 | ShiftedA sam1 sam2 m' <- shiftA n m sa1 sa2 = Node (FD3 sa0 sam1 sam2) m' (RD1 sa3) snocA !_ (Node pr m RD0) sa = Node pr m (RD1 sa) snocA !_ (Node pr m (RD1 sa1)) sa2 = Node pr m (RD2 sa1 sa2) snocA !n (Node pr m (RD2 sa1 sa2)) sa3 = Node pr (snocA (Sz.twice n) m (A.append n sa1 sa2)) (RD1 sa3) -- | Uncons from a node and snoc onto it. Ensure that if the operation is -- expensive then it leaves the node in a safe configuration. Why do we need -- this? Suppose we have -- -- Two m Two -- -- If we snoc onto this, the operation cascades, and we get -- -- Two m Zero -- -- Then when we view, we get -- -- One m Zero -- -- which is not safe. -- -- Instead, we need to view first, getting -- -- One m Two -- -- immediately, then snoc on, cascading and getting -- -- Three m Zero -- -- which is safe. -- -- If instead we have -- -- One m One -- -- we have to do the opposite: snoc then view. We might as well -- just write a dedicated shifting operation. shiftA :: Size n -> Queue (Twice n) a -> Array n a -> Array n a -> ShiftedA n a -- BLAST AND DARN. I started out using the Node pattern synonym all -- through here. Sadly, GHC was *way* too eager with join point -- transformations and decided to actually pass around front -- and rear digits to make things slow. GRRR. So in this function, -- we use the raw constructors by hand. -- Non-cascading cases shiftA !_ Empty !sa1 !sa2 = ShiftedA sa1 sa2 Empty shiftA !n (Node20 sa1 sa2 m) !sa3 !sa4 = shrift n sa1 $ Node11 sa2 m (A.append n sa3 sa4) shiftA !n (Node21 sa1 sa2 m sa3) !sa4 !sa5 = shrift n sa1 $ Node12 sa2 m sa3 (A.append n sa4 sa5) shiftA !n (Node30 sa1 sa2 sa3 m) !sa4 !sa5 = shrift n sa1 $ Node21 sa2 sa3 m (A.append n sa4 sa5) shiftA !n (Node31 sa1 sa2 sa3 m sa4) !sa5 !sa6 = shrift n sa1 $ Node22 sa2 sa3 m sa4 (A.append n sa5 sa6) -- cascading cases shiftA !n (Node10 sa1 m) !sa3 !sa4 = shrift n sa1 $ case viewA (Sz.twice (Sz.twice n)) m of EmptyA -> Node10 (A.append n sa3 sa4) Empty ConsA sam m' | (sam1, sam2) <- A.splitArray (Sz.twice n) sam -> Node21 sam1 sam2 m' (A.append n sa3 sa4) shiftA !n (Node11 sa1 m sa2) !sa3 !sa4 = shrift n sa1 $ case shiftA (Sz.twice n) m sa2 (A.append n sa3 sa4) of ShiftedA sam1 sam2 m' -> Node20 sam1 sam2 m' shiftA n (Node12 sa1 m sa2 sa3) !sa4 !sa5 = shrift n sa1 $ case shiftA (Sz.twice n) m sa2 sa3 of ShiftedA sam1 sam2 m' -> Node21 sam1 sam2 m' (A.append n sa4 sa5) shiftA n (Node22 sa1 sa2 m sa3 sa4) !sa5 !sa6 = shrift n sa1 $ case shiftA (Sz.twice n) m sa3 sa4 of ShiftedA sam1 sam2 m' -> Node31 sa2 sam1 sam2 m' (A.append n sa5 sa6) shiftA n (Node32 sa1 sa2 sa3 m sa4 sa5) !sa6 !sa7 = shrift n sa1 $ Node21 sa2 sa3 (snocA (Sz.twice (Sz.twice n)) m (A.append (Sz.twice n) sa4 sa5)) (A.append n sa6 sa7) shrift :: Size n -> Array (Twice n) a -> Queue (Twice n) a -> ShiftedA n a shrift n sa q | (sa1, sa2) <- A.splitArray n sa = ShiftedA sa1 sa2 q data ShiftedA n a = ShiftedA !(Array n a) !(Array n a) (Queue (Twice n) a) instance Show a => Show (Queue n a) where showsPrec p xs = showParen (p > 10) $ showString "fromList " . shows (F.toList xs) instance Eq a => Eq (Queue n a) where (==) = (==) `on` F.toList instance Ord a => Ord (Queue n a) where compare = compare `on` F.toList