-- |
--
-- Module      : Math.DyckWord.Binary
-- Copyright   : (c) 2017 Johannes Hildén
-- Maintainer  : johannes@isomorphic.co
-- Stability   : experimental
-- Portability : GHC
--
-- == Background
--
-- In formal language theory, the /Dyck language/ consists of all strings of
-- evenly balanced left and right parentheses, brackets, or some other
-- symbols, together with the /empty/ word. Words in this language (named
-- after German mathematician Walther von Dyck) are known as /Dyck words/,
-- some examples of which are @()()()@, @(())((()))@, and @((()()))()@.
--
-- The type of Dyck language considered here is defined over a binary alphabet.
-- We will take this alphabet to be the set &#931; = {(, )} in the following
-- examples. The binary Dyck language is the subset of &#931;* (the Kleene
-- closure of &#931;) of all words that satisfy two conditions:
--
-- 1. The number of left brackets must be the same as the number of right
--    brackets.
-- 2. Going from left to right, for each character read, the total number of
--    right brackets visited must be less than or equal to the number of left
--    brackets up to the current position.
--
-- E.g., @(()(()@ and @())(())()@ are __not__ Dyck words.
--
-- When regarded as a combinatorial class&#8212;with the 'size' of a word defined as
-- the number of bracket pairs it contains&#8212;the counting sequence associated
-- with the Dyck language is the /Catalan numbers/.
--
-- $-- \begin{array}{ccl} -- \text{Size} & \text{Count} & \text{Words} -- \\ -- 0 & 1 & \epsilon -- \\ -- 1 & 1 & ⟨⟩ -- \\ -- 2 & 2 & ⟨⟩⟨⟩, \ ⟨⟨⟩⟩ -- \\ -- 3 & 5 & ⟨⟩⟨⟩⟨⟩, \ ⟨⟩⟨⟨⟩⟩, \ ⟨⟨⟩⟩⟨⟩, \ ⟨⟨⟩⟨⟩⟩, \ ⟨⟨⟨⟩⟩⟩ -- \\ -- 4 & 14 & ⟨⟩⟨⟩⟨⟩⟨⟩, \ ⟨⟩⟨⟩⟨⟨⟩⟩, \ ⟨⟩⟨⟨⟩⟩⟨⟩, \ ⟨⟩⟨⟨⟩⟨⟩⟩, \ ⟨⟩⟨⟨⟨⟩⟩⟩, \ ⟨⟨⟩⟩⟨⟩⟨⟩, \ ⟨⟨⟩⟩⟨⟨⟩⟩, \ ⟨⟨⟩⟨⟩⟩⟨⟩, -- \\ -- & & ⟨⟨⟨⟩⟩⟩⟨⟩, \ ⟨⟨⟩⟨⟩⟨⟩⟩, \ ⟨⟨⟩⟨⟨⟩⟩⟩, \ ⟨⟨⟨⟩⟩⟨⟩⟩, \ ⟨⟨⟨⟩⟨⟩⟩⟩, \ ⟨⟨⟨⟨⟩⟩⟩⟩ -- \\ -- 5 & 42 & ⟨⟩⟨⟩⟨⟩⟨⟩⟨⟩, \ ⟨⟩⟨⟩⟨⟩⟨⟨⟩⟩, \ ⟨⟩⟨⟩⟨⟨⟩⟩⟨⟩, \ ⟨⟩⟨⟩⟨⟨⟩⟨⟩⟩, \ ⟨⟩⟨⟩⟨⟨⟨⟩⟩⟩, \ \dots, \ ⟨⟨⟨⟨⟨⟩⟩⟩⟩⟩ -- \\ -- 6 & 132 & \dots -- \end{array} --$

module Math.DyckWord.Binary (
-- * Types
Size
, Rank
, DyckWord
-- * Creating and inspecting Dyck words
, empty
, size
, setAlphabet
, concatWords
-- ** Textual form
, fromText
, fromText'
, toText
-- * Ranking and unranking
-- $ranking , rank , rankRelative , unrank , unrankRelative , unrankRelative' , wordsOfSize ) where import Data.Maybe ( fromJust ) import Data.Monoid ( (<>) ) import Data.Text ( Text, unfoldrN ) import Math.DyckWord.Binary.Internal import qualified Data.Text as T --$ranking
--
-- To /rank/ a Dyck word is to determine its position in some ordered sequence
-- of words. The dual of this&#8212;to produce the Dyck word corresponding to a
-- position in said sequence&#8212;is called /unranking/. The order we are
-- interested in here is known as /shortlex/, and it demands that a smaller
-- Dyck word always gets precedence over a bigger one. When comparing words of
-- the same size, normal lexicographical order applies.
--
-- === Relative vs. absolute rank
--
-- In this library, we adopt the following (non-standard) terminology.
-- The /relative/ rank of a Dyck word /w/ is its position in the sequence of
-- those words with the same size as /w/. By contrast, the /absolute/ rank
-- means a word's position in the shortlex sequence of __all__ Dyck words.
-- /For example:/ The (absolute) rank of @(((())))@ is 9, but the relative rank
-- of the same word is 0, since it is the first word of size four.
--
-- $-- \begin{array}{lccc} -- \text{Word} & \text{Size} & \text{Rank} & \text{Relative rank} -- \\ -- \epsilon & 0 & 0 & 0 \\ -- ⟨⟩ & 1 & 1 & 0 \\ -- ⟨⟨⟩⟩ & 2 & 2 & 0 \\ -- ⟨⟩⟨⟩ & 2 & 3 & 1 \\ -- ⟨⟨⟨⟩⟩⟩ & 3 & 4 & 0 \\ -- ⟨⟨⟩⟨⟩⟩ & 3 & 5 & 1 \\ -- ⟨⟨⟩⟩⟨⟩ & 3 & 6 & 2 \\ -- ⟨⟩⟨⟨⟩⟩ & 3 & 7 & 3 \\ -- ⟨⟩⟨⟩⟨⟩ & 3 & 8 & 4 \\ -- ⟨⟨⟨⟨⟩⟩⟩⟩ & 4 & 9 & 0 \\ -- ⟨⟨⟨⟩⟨⟩⟩⟩ & 4 & 10 & 1 \\ -- \;\;\;\; \vdots & \vdots & \vdots & \vdots -- \end{array} --$
--
-- If we let $$r(w)$$ denote the relative rank of a word $$w$$, and $$C_i$$ the
-- $$i^{th}$$ Catalan number, then the absolute rank $$R(w)$$ of $$w$$ is given
-- by the formula $$R(w) = r(w) + \sum_{i=0}^{s-1} C_i,$$ where $$s$$ is the
-- size of $$w$$.

-- | See 'size'.
type Size = Integer

-- | Represents the /rank/ of a Dyck word.
type Rank = Integer

-- | Opaque Dyck word type. For functions to build Dyck words, see:
--
--     * 'empty',
--     * 'fromText',
--     * 'fromText'',
--     * 'unrank',
--     * 'unrankRelative', and
--     * 'unrankRelative''.
--
-- Conceptually, every non-empty Dyck word has the form /(a)b/, where /a/ and
-- /b/ are Dyck words. The BNF grammar for this language is given by
-- $$\omega = \epsilon \mid ( \omega ) \omega$$.
data DyckWord = DyckWord
{ _size    :: !Size
, _absRank :: !Rank
, _relRank :: !Rank
, _text    :: !Text
} deriving (Show)

-- | Two Dyck words are considered equal when they have the same /absolute/
--   rank.
--
-- >>> fromText' "010011000111" == fromText' "xyxxyyxxxyyy"
-- True
instance Eq DyckWord where
a == b = _absRank a == _absRank b

-- | Dyck words of the same size are ordered lexicographically, and a smaller
--   Dyck word always gets precedence over a bigger one.
--
-- >>> fromText' "0011" < fromText' "0101"
-- True
--
-- >>> fromText' "0101" > fromText' "01"
-- True
instance Ord DyckWord where
a <= b = _absRank a <= _absRank b

-- | The /empty/ Dyck word.
empty :: DyckWord
empty = DyckWord
{ _size    = 0
, _absRank = 0
, _relRank = 0
, _text    = T.empty }

-- | The /size/ of a Dyck word is the number of bracket pairs it contains,
--   i.e., half of the length of the word's string representation. Inductively,
--   it can be defined as
--
-- $-- \text{size}\;w = -- \begin{cases} -- 0 && \text{if} \; w = \epsilon \\ -- 1 + \text{size}\;u + \text{size}\;v && \text{if} \; w = (u)v. -- \end{cases} --$
size :: DyckWord -> Size
size = _size

juxtapose :: DyckWord -> DyckWord -> Text
juxtapose a b
| compatible = _text a <> _text b
| otherwise  = _text a <> _text (setAlphabet (firstChar a) (finalChar a) b)
where
finalChar = T.last . _text
compatible
| 0 == _absRank a = True
| 0 == _absRank b = True
| otherwise = firstChar a == firstChar b && finalChar a == finalChar b

-- | Change the alphabet of a Dyck word. An alphabet has two characters, here
--   referred to as the /left/ and /right/ symbol, respectively. For example:
--
-- >>> toText (setAlphabet 'x' 'o' (unrank 55))
-- xoxxoxxooo
setAlphabet :: Char       -- ^ Left symbol
-> Char       -- ^ Right symbol
-> DyckWord
-> DyckWord
setAlphabet a' b' w = w { _text = f T.map t }
where
t = _text w
b = T.last t
f c | c == a = a'
| c == b = b'
| otherwise = error "not a valid dyck word"

-- | Concatenate two Dyck words. Corresponds to ordinary string concatenation.
--
-- -- \begin{align} -- \epsilon +\!\!\!+\;w &= w -- \\ -- (u)v +\!\!\!+\;w &= (u)[ v +\!\!\!+\; w ] -- \end{align} --
--
-- If the two words have different alphabets, the concatenated word will
-- inherit the first word's symbol set.
concatWords :: DyckWord -> DyckWord -> DyckWord
concatWords a b = fromText' (juxtapose a b)

instance Monoid DyckWord where
mappend = concatWords
mempty = empty

-- | Return the /absolute/ rank of a Dyck word. That is, its position in the
--   (shortlex) ordered sequence of /all/ Dyck words.
rank :: DyckWord -> Rank
rank = _absRank

-- | Return the /relative/ rank of a Dyck word.
rankRelative :: DyckWord -> Rank
rankRelative = _relRank

-- | Parse a 'Text' value to a 'DyckWord'. The result is wrapped in a 'Maybe',
--   so that the value becomes 'Nothing' if parsing fails. The alphabet is
--   determined by looking at the first and last characters of the input.
fromText :: Text -> Either String DyckWord
fromText t
| T.null t  = Right empty
| odd len   = Left "odd length input"
| a == b    = err
| otherwise = case T.foldr f (Right (0, s, s)) (T.reverse t) of
Left e -> Left e
Right (r,_,_) -> Right $DyckWord { _size = s , _absRank = catalanSum (fromIntegral s) + r , _relRank = r , _text = t } where a = T.head t b = T.last t s = fromIntegral (len div 2) len = T.length t err = Left "bad input" f c (Right (i, x, y)) | x > y || x < 0 = err | c == a = Right (i, x - 1, y) | c == b = Right (i + catalanTriangle y (x - 1), x, y - 1) f _ _ = err -- | Parse a 'Text' value to a 'DyckWord'. Throws an error if parsing fails. -- This is an unsafe version of 'fromText'. fromText' :: Text -> DyckWord fromText' t = case fromText t of Right r -> r Left _ -> error "not a valid dyck word" -- | Return a textual representation of a 'DyckWord'. toText :: DyckWord -> Text toText = _text -- | Return the /n/-th Dyck word in the (shortlex) ordered sequence of /all/ -- Dyck words. unrank :: Rank -> DyckWord unrank r | r < 0 = error "rank cannot be negative" | otherwise = unrankRelative' s i where (s, i) = sizeOffs r 0 sizeOffs :: Integral a => Integer -> a -> (a, Integer) sizeOffs n x | n < c = (x, n) | otherwise = sizeOffs (n - c) (x + 1) where c = catalan (fromIntegral x) -- | Return the /n/-th Dyck word, restricted to only words of a given size. -- Words are lexicographically ordered. The result is wrapped in a 'Maybe', -- and is equal to 'Nothing' if the given rank is outside of the valid range. unrankRelative :: Size -> Rank -> Maybe DyckWord unrankRelative s r | r < 0 || r >= catalan s = Nothing | otherwise = Just DyckWord { _size = s , _absRank = offset + r , _relRank = r , _text = t } where t = unfoldrN (fromIntegral (2*s)) f (r, s, s) offset = catalanSum (fromIntegral s) f (i, x, y) | 0 == y = Nothing | j >= 0 = Just (')', (j, x, y - 1)) | otherwise = Just ('(', (i, x - 1, y)) where j = i - catalanTriangle y (x - 1) -- | Unsafe version of 'unrankRelative'. This function throws an error if the -- given rank is outside of the valid range. unrankRelative' :: Size -> Rank -> DyckWord unrankRelative' s = fromJust . unrankRelative s -- | Return a lexicographically ordered list with all Dyck words of a specific -- size. wordsOfSize :: Size -> [DyckWord] wordsOfSize = ofSize unrankRelative' ofSize :: (Size -> Integer -> b) -> Size -> [b] ofSize f s = f s <$> [0 .. catalan (fromIntegral s) - 1]