-- | -- -- Module : Math.DyckWord.Binary -- Copyright : (c) 2017 Johannes Hildén -- License : BSD3 -- Maintainer : johannes@isomorphic.co -- Stability : experimental -- Portability : GHC -- -- == Background -- -- In formal language theory, the /Dyck language/ consists of all strings of -- evenly balanced left and right parentheses, brackets, or some other -- symbols, together with the /empty/ word. Words in this language (named -- after German mathematician Walther von Dyck) are known as /Dyck words/, -- some examples of which are @()()()@, @(())((()))@, and @((()()))()@. -- -- The type of Dyck language considered here is defined over a binary alphabet. -- We will take this alphabet to be the set Σ = {(, )} in the following -- examples. The binary Dyck language is the subset of Σ* (the Kleene -- closure of Σ) of all words that satisfy two conditions: -- -- 1. The number of left brackets must be the same as the number of right -- brackets. -- 2. Going from left to right, for each character read, the total number of -- right brackets visited must be less than or equal to the number of left -- brackets up to the current position. -- -- E.g., @(()(()@ and @())(())()@ are __not__ Dyck words. -- -- When regarded as a combinatorial class—with the 'size' of a word defined as -- the number of bracket pairs it contains—the counting sequence associated -- with the Dyck language is the /Catalan numbers/. -- -- \[ -- \begin{array}{ccl} -- \text{Size} & \text{Count} & \text{Words} -- \\ -- 0 & 1 & \epsilon -- \\ -- 1 & 1 & ⟨⟩ -- \\ -- 2 & 2 & ⟨⟩⟨⟩, \ ⟨⟨⟩⟩ -- \\ -- 3 & 5 & ⟨⟩⟨⟩⟨⟩, \ ⟨⟩⟨⟨⟩⟩, \ ⟨⟨⟩⟩⟨⟩, \ ⟨⟨⟩⟨⟩⟩, \ ⟨⟨⟨⟩⟩⟩ -- \\ -- 4 & 14 & ⟨⟩⟨⟩⟨⟩⟨⟩, \ ⟨⟩⟨⟩⟨⟨⟩⟩, \ ⟨⟩⟨⟨⟩⟩⟨⟩, \ ⟨⟩⟨⟨⟩⟨⟩⟩, \ ⟨⟩⟨⟨⟨⟩⟩⟩, \ ⟨⟨⟩⟩⟨⟩⟨⟩, \ ⟨⟨⟩⟩⟨⟨⟩⟩, \ ⟨⟨⟩⟨⟩⟩⟨⟩, -- \\ -- & & ⟨⟨⟨⟩⟩⟩⟨⟩, \ ⟨⟨⟩⟨⟩⟨⟩⟩, \ ⟨⟨⟩⟨⟨⟩⟩⟩, \ ⟨⟨⟨⟩⟩⟨⟩⟩, \ ⟨⟨⟨⟩⟨⟩⟩⟩, \ ⟨⟨⟨⟨⟩⟩⟩⟩ -- \\ -- 5 & 42 & ⟨⟩⟨⟩⟨⟩⟨⟩⟨⟩, \ ⟨⟩⟨⟩⟨⟩⟨⟨⟩⟩, \ ⟨⟩⟨⟩⟨⟨⟩⟩⟨⟩, \ ⟨⟩⟨⟩⟨⟨⟩⟨⟩⟩, \ ⟨⟩⟨⟩⟨⟨⟨⟩⟩⟩, \ \dots, \ ⟨⟨⟨⟨⟨⟩⟩⟩⟩⟩ -- \\ -- 6 & 132 & \dots -- \end{array} -- \] module Math.DyckWord.Binary ( -- * Types Size , Rank , DyckWord -- * Creating and inspecting Dyck words , empty , size , setAlphabet , concatWords -- ** Textual form , fromText , fromText' , toText -- * Ranking and unranking -- $ranking , rank , rankRelative , unrank , unrankRelative , unrankRelative' , wordsOfSize ) where import Data.Maybe ( fromJust ) import Data.Monoid ( (<>) ) import Data.Text ( Text, unfoldrN ) import Math.DyckWord.Binary.Internal import qualified Data.Text as T -- $ranking -- -- To /rank/ a Dyck word is to determine its position in some ordered sequence -- of words. The dual of this—to produce the Dyck word corresponding to a -- position in said sequence—is called /unranking/. The order we are -- interested in here is known as /shortlex/, and it demands that a smaller -- Dyck word always gets precedence over a bigger one. When comparing words of -- the same size, normal lexicographical order applies. -- -- === Relative vs. absolute rank -- -- In this library, we adopt the following (non-standard) terminology. -- The /relative/ rank of a Dyck word /w/ is its position in the sequence of -- those words with the same size as /w/. By contrast, the /absolute/ rank -- means a word's position in the shortlex sequence of __all__ Dyck words. -- /For example:/ The (absolute) rank of @(((())))@ is 9, but the relative rank -- of the same word is 0, since it is the first word of size four. -- -- \[ -- \begin{array}{lccc} -- \text{Word} & \text{Size} & \text{Rank} & \text{Relative rank} -- \\ -- \epsilon & 0 & 0 & 0 \\ -- ⟨⟩ & 1 & 1 & 0 \\ -- ⟨⟨⟩⟩ & 2 & 2 & 0 \\ -- ⟨⟩⟨⟩ & 2 & 3 & 1 \\ -- ⟨⟨⟨⟩⟩⟩ & 3 & 4 & 0 \\ -- ⟨⟨⟩⟨⟩⟩ & 3 & 5 & 1 \\ -- ⟨⟨⟩⟩⟨⟩ & 3 & 6 & 2 \\ -- ⟨⟩⟨⟨⟩⟩ & 3 & 7 & 3 \\ -- ⟨⟩⟨⟩⟨⟩ & 3 & 8 & 4 \\ -- ⟨⟨⟨⟨⟩⟩⟩⟩ & 4 & 9 & 0 \\ -- ⟨⟨⟨⟩⟨⟩⟩⟩ & 4 & 10 & 1 \\ -- \;\;\;\; \vdots & \vdots & \vdots & \vdots -- \end{array} -- \] -- -- If we let \(r(w)\) denote the relative rank of a word \(w\), and \(C_i\) the -- \(i^{th}\) Catalan number, then the absolute rank \(R(w)\) of \(w\) is given -- by the formula \( R(w) = r(w) + \sum_{i=0}^{s-1} C_i, \) where \(s\) is the -- size of \(w\). -- | See 'size'. type Size = Integer -- | Represents the /rank/ of a Dyck word. type Rank = Integer -- | Opaque Dyck word type. For functions to build Dyck words, see: -- -- * 'empty', -- * 'fromText', -- * 'fromText'', -- * 'unrank', -- * 'unrankRelative', and -- * 'unrankRelative''. -- -- Conceptually, every non-empty Dyck word has the form /(a)b/, where /a/ and -- /b/ are Dyck words. The BNF grammar for this language is given by -- \( \omega = \epsilon \mid ( \omega ) \omega \). data DyckWord = DyckWord { _size :: !Size , _absRank :: !Rank , _relRank :: !Rank , _text :: !Text } deriving (Show) -- | Two Dyck words are considered equal when they have the same /absolute/ -- rank. -- -- >>> fromText' "010011000111" == fromText' "xyxxyyxxxyyy" -- True instance Eq DyckWord where a == b = _absRank a == _absRank b -- | Dyck words of the same size are ordered lexicographically, and a smaller -- Dyck word always gets precedence over a bigger one. -- -- >>> fromText' "0011" < fromText' "0101" -- True -- -- >>> fromText' "0101" > fromText' "01" -- True instance Ord DyckWord where a <= b = _absRank a <= _absRank b -- | The /empty/ Dyck word. empty :: DyckWord empty = DyckWord { _size = 0 , _absRank = 0 , _relRank = 0 , _text = T.empty } -- | The /size/ of a Dyck word is the number of bracket pairs it contains, -- i.e., half of the length of the word's string representation. Inductively, -- it can be defined as -- -- \[ -- \text{size}\;w = -- \begin{cases} -- 0 && \text{if} \; w = \epsilon \\ -- 1 + \text{size}\;u + \text{size}\;v && \text{if} \; w = (u)v. -- \end{cases} -- \] size :: DyckWord -> Size size = _size juxtapose :: DyckWord -> DyckWord -> Text juxtapose a b | compatible = _text a <> _text b | otherwise = _text a <> _text (setAlphabet (firstChar a) (finalChar a) b) where firstChar = T.head . _text finalChar = T.last . _text compatible | 0 == _absRank a = True | 0 == _absRank b = True | otherwise = firstChar a == firstChar b && finalChar a == finalChar b -- | Change the alphabet of a Dyck word. An alphabet has two characters, here -- referred to as the /left/ and /right/ symbol, respectively. For example: -- -- >>> toText (setAlphabet 'x' 'o' (unrank 55)) -- xoxxoxxooo setAlphabet :: Char -- ^ Left symbol -> Char -- ^ Right symbol -> DyckWord -> DyckWord setAlphabet a' b' w = w { _text = f `T.map` t } where t = _text w a = T.head t b = T.last t f c | c == a = a' | c == b = b' | otherwise = error "not a valid dyck word" -- | Concatenate two Dyck words. Corresponds to ordinary string concatenation. -- -- \[ -- \begin{align} -- \epsilon +\!\!\!+\;w &= w -- \\ -- (u)v +\!\!\!+\;w &= (u)[ v +\!\!\!+\; w ] -- \end{align} -- \] -- -- If the two words have different alphabets, the concatenated word will -- inherit the first word's symbol set. concatWords :: DyckWord -> DyckWord -> DyckWord concatWords a b = fromText' (juxtapose a b) instance Monoid DyckWord where mappend = concatWords mempty = empty -- | Return the /absolute/ rank of a Dyck word. That is, its position in the -- (shortlex) ordered sequence of /all/ Dyck words. rank :: DyckWord -> Rank rank = _absRank -- | Return the /relative/ rank of a Dyck word. rankRelative :: DyckWord -> Rank rankRelative = _relRank -- | Parse a 'Text' value to a 'DyckWord'. The result is wrapped in a 'Maybe', -- so that the value becomes 'Nothing' if parsing fails. The alphabet is -- determined by looking at the first and last characters of the input. fromText :: Text -> Either String DyckWord fromText t | T.null t = Right empty | odd len = Left "odd length input" | a == b = err | otherwise = case T.foldr f (Right (0, s, s)) (T.reverse t) of Left e -> Left e Right (r,_,_) -> Right $ DyckWord { _size = s , _absRank = catalanSum (fromIntegral s) + r , _relRank = r , _text = t } where a = T.head t b = T.last t s = fromIntegral (len `div` 2) len = T.length t err = Left "bad input" f c (Right (i, x, y)) | x > y || x < 0 = err | c == a = Right (i, x - 1, y) | c == b = Right (i + catalanTriangle y (x - 1), x, y - 1) f _ _ = err -- | Parse a 'Text' value to a 'DyckWord'. Throws an error if parsing fails. -- This is an unsafe version of 'fromText'. fromText' :: Text -> DyckWord fromText' t = case fromText t of Right r -> r Left _ -> error "not a valid dyck word" -- | Return a textual representation of a 'DyckWord'. toText :: DyckWord -> Text toText = _text -- | Return the /n/-th Dyck word in the (shortlex) ordered sequence of /all/ -- Dyck words. unrank :: Rank -> DyckWord unrank r | r < 0 = error "rank cannot be negative" | otherwise = unrankRelative' s i where (s, i) = sizeOffs r 0 sizeOffs :: Integral a => Integer -> a -> (a, Integer) sizeOffs n x | n < c = (x, n) | otherwise = sizeOffs (n - c) (x + 1) where c = catalan (fromIntegral x) -- | Return the /n/-th Dyck word, restricted to only words of a given size. -- Words are lexicographically ordered. The result is wrapped in a 'Maybe', -- and is equal to 'Nothing' if the given rank is outside of the valid range. unrankRelative :: Size -> Rank -> Maybe DyckWord unrankRelative s r | r < 0 || r >= catalan s = Nothing | otherwise = Just DyckWord { _size = s , _absRank = offset + r , _relRank = r , _text = t } where t = unfoldrN (fromIntegral (2*s)) f (r, s, s) offset = catalanSum (fromIntegral s) f (i, x, y) | 0 == y = Nothing | j >= 0 = Just (')', (j, x, y - 1)) | otherwise = Just ('(', (i, x - 1, y)) where j = i - catalanTriangle y (x - 1) -- | Unsafe version of 'unrankRelative'. This function throws an error if the -- given rank is outside of the valid range. unrankRelative' :: Size -> Rank -> DyckWord unrankRelative' s = fromJust . unrankRelative s -- | Return a lexicographically ordered list with all Dyck words of a specific -- size. wordsOfSize :: Size -> [DyckWord] wordsOfSize = ofSize unrankRelative' ofSize :: (Size -> Integer -> b) -> Size -> [b] ofSize f s = f s <$> [0 .. catalan (fromIntegral s) - 1]