{-
A simple pretty printer combinator library.

A pretty printer converts an inductively defined data structure (i.e. a tree)
into a text (i.e. a list of lines).
Indentation of lines reflects the tree form of the data structure.
However, to produce only a minimal number of lines substructures are put
on a single line as far as possible.

The combinators provided here enable simple definition of a pretty printer 
for any tree structure.
The pretty printer requires time linear in the size of the data structure
and space linear in the desired maximal width of the output.
The interface of the library is based on:
  Philip Wadler: A prettier printer, 
  http://cm.bell-labs.com/cm/cs/who/wadler/topics/recent.html
The implementation uses some ideas from
  Derek C Oppen: Prettyprinting, TOPLAS volume 2 number 4, ACM, 1980, 465-483.
(Wadler's implementation performs some backtracking and hence may require time
linear in both the size of the data structure and the maximal width.)
-}
module PrettyLib (nil,(<>),text,line,delimiter,fdelimiter,group,nest,groupNest
                 ,Doc
                 ,parens,brackets,braces,string
                 ,pretty,simple) where


{- Exported definitions ================================================== -}
-- Core pretty printer combinators ------------------------------------------

infixr 6 <>

nil :: Doc
nil = NIL

(<>) :: Doc -> Doc -> Doc
(<>) = (:<>)

text :: String -> Doc
text = TEXT

delimiter :: String -> Doc
delimiter = DELIMITER

{- 
Fill line, that is, 
only make newline if part up to next possible newline does not fit on line.
-}
fdelimiter :: String -> Doc
fdelimiter = FDELIMITER

{-
Force line break. The surrounding groups cannot fit on one line.
-}
line :: Doc
line = LINE

group :: Doc -> Doc
group = GROUP

nest :: Int -> Doc -> Doc
nest = NEST

-- Derived pretty printer combinators ---------------------------------------

{-
The often used combination of group and nest.
-}
groupNest :: Int -> Doc -> Doc
groupNest indentation doc = group (nest indentation doc)

parens :: Doc -> Doc
parens doc = text "(" <> doc <> text ")"

brackets :: Doc -> Doc
brackets doc = text "[" <> doc <> text "]"

braces :: Doc -> Doc
braces doc = text "{" <> doc <> text "}"

string :: String -> Doc
string s = text ('\"' : s ++ "\"")

{- Optimally pretty print the document within the given line width. -}
pretty :: Int -> Doc -> String
pretty width = layout width . normalise . removeFDelimiter . flatten 

{- Never turn a delimiter into a newline. -}

simple :: Doc -> String
simple = token2String . flatten


{- Implementation ========================================================= -}

data Doc = NIL | Doc :<> Doc 
         | TEXT String | DELIMITER String | FDELIMITER String | LINE 
         | GROUP Doc | NEST Int Doc
         deriving Show


data Token = Text String Int 
           | Delimiter String Int Int | FDelimiter String Int Int | Line Int 
           | Open | Close
           deriving Show


{- 
Staightforward conversion of tokens into a string,
ignoring brackets and possiblity of turning delimiters into newlines.
-}
token2String :: [Token] -> String

token2String [] = ""
token2String (Text s _ : tokens) = s ++ token2String tokens
token2String (Delimiter s _ _ : tokens) = s ++ token2String tokens
token2String (FDelimiter s _ _ : tokens) = s ++ token2String tokens
token2String (Line i : tokens) = '\n' : replicate i ' ' ++ token2String tokens
token2String (Open : tokens) = token2String tokens
token2String (Close : tokens) = token2String tokens
 

{-
Convert the tree structure of a document into a stream of tokens.
A group is repesented by an opening and a closing bracket.
-}
flatten :: Doc -> [Token]

flatten doc = go 0 doc []
  where
  go :: Int -> Doc -> [Token] -> [Token]
  {- use accumulator to perform flattening in linear time -}
  go i NIL rest = rest
  go i (d1 :<> d2) rest = go i d1 (go i d2 rest)
  go i (TEXT s) rest = Text s (length s) : rest
  go i (DELIMITER s) rest = Delimiter s (length s) i : rest
  go i (FDELIMITER s) rest = FDelimiter s (length s) i : rest
  go i LINE rest = Line i : rest
  go i (GROUP doc) rest = Open : go i doc (Close : rest)
  go i (NEST j doc) rest = go (i+j) doc rest


{-
Replace FDelimiter s l i by Open, Delimiter s l i, ..., Close.
So that Close is in front of the next (F)Delimiter s l i of the
same group or the next Close or the end.
-}
removeFDelimiter :: [Token] -> [Token]

removeFDelimiter = go 0 [] 
  where
  {-
  Invariants: elements of stack are strictly sorted, largest on top
              nesting depth is greater or equal top element of stack (if ex.)
  -}
  go :: Int {- nesting depth of brackets; without new ones -}
     -> [Int] {- stack for closing brackets that still have to be inserted -}
     -> [Token] {- stream of tokens with FDelimiter -}
     -> [Token] {- stream of tokens without FDelimiter -}
  go _ [] [] = [] -- first argument should be 0
  go _ _  [] = [Close] -- the stack can at most contain one entry
  go d toClose (Open : tokens) = Open : go (d+1) toClose tokens
  go d toClose (Close : tokens) = 
    possiblyClose (Close :) (subtract 1) id d toClose tokens
  go d toClose (FDelimiter s l i : tokens) =
    possiblyClose (\x -> Open : Delimiter s l i : x) id (d:) d toClose tokens
  go d toClose (Delimiter s l i : tokens) =
    possiblyClose (Delimiter s l i :) id id d toClose tokens
  go d toClose (Line i : tokens) =
    possiblyClose (Line i :) id id d toClose tokens
  go d toClose (text : tokens) = text : go d toClose tokens

  possiblyClose :: ([Token] -> [Token])  -- what to put into output stream
                -> (Int -> Int)          -- how to change nesting depth
                -> ([Int] -> [Int])      -- how to change stack
                -> Int                   -- nesting depth
                -> [Int]                 -- stack
                -> [Token]
                -> [Token]
  possiblyClose modOut modDepth modStack d (close : toClose) tokens
    | close == d = Close : modOut (go (modDepth d) (modStack toClose) tokens) 
  possiblyClose modOut modDepth modStack d toClose tokens =
    modOut (go (modDepth d) (modStack toClose) tokens)
  

{-
Normalise a stream of tokens wrt the following rewriting rules
Open, Text s l  ==> Text s l, Open
Close, Text s l ==> Text s l, Close
Then any block of brackets is followed by a Line s l i or the end of the stream
-}
normalise :: [Token] -> [Token]

normalise tokens = go 0 0 tokens
  where
  go :: Int {- number of deferred closing brackets -} 
     -> Int {- number of deferred opening brackets -} 
     -> [Token] {- stream of tokens to normalise -}
     -> [Token] {- normalised stream of tokens -}
  go i _ [] = replicate i Close -- there should be no deferred opening brackets
  go i j (Open : tokens) = go i (j+1) tokens
  go i j (Close : tokens)
    | j == 0    = go (i+1) j tokens
    | otherwise = go i (j-1) tokens
  go i j (deli@(Delimiter _ _ _) : tokens) =
    replicate i Close ++ replicate j Open ++ deli : go 0 0 tokens
  go i j (line@(Line _) : tokens) =
    replicate i Close ++ replicate j Open ++ line : go 0 0 tokens
  go i j (text : tokens) = text : go i j tokens
 

{-
The list of tokens is optimally pretty printed within the given width.
Precondition: between a `Close' and a `Text s l' is always a `Line s l i'.
For efficiency also better if no `Open' is directly followed by `Text s l'.
A normalised token list has this property.
-}
layout :: Int {- width of layout -}
       -> [Token] 
       -> String

layout width tokens = snd $ go width 1 empty1 0 tokens
  where
  go :: Int -- space remaining in current line (in characters)
     -> Int -- current stream position (in characters)
     -> Q1 Int -- queue of maximal streamposition for end of group to fit
     -> Int -- how deep inside a fitting group (<= 0: not in fitting group)
     -> [Token]
     -> (Q2 Bool -- queue which states if groups fit
        ,String) -- optimally layouted output
  go _ _ groupsMaxEnd _ [] = (empty2, "")
  go r p groupsMaxEnd fitDepth (Open : tokens) = (groupFits', string)
    where
    (groupsMaxEnd', fits', groupFits') = cons (r+p) groupsMaxEnd groupsFits
    (groupsFits, string) = go r p groupsMaxEnd' fitDepth' tokens
    fitDepth' = if fitDepth > 0 then succ fitDepth else if fits' then 1 else 0
  go r p groupsMaxEnd fitDepth (Close : tokens) 
    | isEmpty1 groupsMaxEnd = go r p groupsMaxEnd (pred fitDepth) tokens
    | otherwise = (groupFits', string)
    where
    (_, groupsMaxEnd', groupFits') = lview groupsMaxEnd True groupFits
    (groupFits, string) = go r p groupsMaxEnd' (pred fitDepth) tokens
  go r p groupsMaxEnd fitDepth (Delimiter s l i : tokens)  = 
    (groupFits', output ++ string)
    where
    (output, newRemainingSpace) 
      | fitDepth > 0 = (s, r-l)
      | otherwise    = ('\n' : replicate i ' ', width-i)
    (groupFits', string) = 
      checkGroupsMaxEnd newRemainingSpace (p+l) groupsMaxEnd fitDepth tokens 
  go r p groupsMaxEnd fitDepth (Line i : tokens) =
    -- a compulsary newline makes all surrounding blocks not to fit
    (map1To2 (\_ -> False) groupsMaxEnd, '\n' : replicate i ' ' ++ string)
    where
    (_, string) = go (width-i) p empty1 fitDepth tokens
  go r p groupsMaxEnd fitDepth (Text s l : tokens) = (groupFits', s ++ string)
    where
    (groupFits', string) = 
      checkGroupsMaxEnd (r-l) (p+l) groupsMaxEnd fitDepth tokens 


  checkGroupsMaxEnd :: Int -> Int -> Q1 Int -> Int -> [Token] 
                    -> (Q2 Bool, String)
  checkGroupsMaxEnd r p groupsMaxEnd fitDepth tokens 
    | isEmpty1 groupsMaxEnd || p <= maxEnd = 
      go r p groupsMaxEnd fitDepth tokens
    | otherwise = (groupsFits', string)
    where
    (groupsMaxEnd', maxEnd, groupsFits') = rview groupsMaxEnd groupsFits False
    (groupsFits, string) = checkGroupsMaxEnd r p groupsMaxEnd' fitDepth tokens


{- Special double ended queues -------------------------------------------- 

Two related kinds of double ended queue types are defined.
A queue operation always operates on two queues, one of each kind.
Each operation performs an operation on one queue and exactly the inverse
operation on the other queue.
For example, an operation that splits one queue into its first element and
the remaining queue, also puts an element in front of the other queue to obtain
a longer queue.

The inverse operations are very lazy.
Let there be a sequence of queue operations such that the first queue 
resulting from an operation is always the input first queue to the 
*next* operation and the second queue resulting from an operation is always 
the input second queue to the *preceding* operation.
Let the first operation `cons' an element to the front of the first queue 
and the last operation remove exactly this element from the first queue 
(either front or rear). Then the last operation adds another element to 
the second queue and the first operation removes exactly this element.
The operations on the second queue are so lazy that for obtaining this 
element the second queue which is input to the last operation is not
demanded, that is, does not need to be evaluated.

Note that the inverse operations only work correctly, if the two queues
are passed in such a sequence. The implementation does not only assume
that the two queues handled by an operation have the same number of elements,
but also that they have the same internal structure.

Using a monad to ascertain correct passing of queues is possible but 
seems rather restrictive and hides the idea. However, maybe another
kind of sequencing interface would be good to ensure safe usage.

            element                            element
               v                                  ^
  queue1  -> +----+ -> +---------+ ->     -> +---------+ ->
             |cons|    |operation|    ...    |operation|
          <- +----+ <- +---------+ <-     <- +---------+ <- queue2
               v                                  ^
            element'                           element'

Except for the additional lazy inverse operations the implementation agrees 
with the Banker's queue given in:
  Chris Okasaki: Purely Functional Data Structures, CUP, 1998, Section 8.4.2 
-}

{-
q12List (Q1 _ f _ r) = f ++ reverse r
Furthermore, the structure contains the lengths of the two lists
Q2 does not contain lengths
-}

data Q1 a = Q1 !Int [a] !Int [a] deriving Show
data Q2 a = Q2 [a] [a] deriving Show


reverse1 :: Q1 a -> Q1 a
reverse1 (Q1 lenf f lenr r) = Q1 lenr r lenf f

reverse2 :: Q2 a -> Q2 a
reverse2 (Q2 f r) = Q2 r f


empty1 = Q1 0 [] 0 []
empty2 = Q2 [] []


isEmpty1 (Q1 lenf _ lenr _) = lenf + lenr == 0 


map1To2 :: (a -> b) -> Q1 a -> Q2 b
map1To2 g (Q1 _ f _ r) = Q2 (map g f) (map g r)


{-
Keep lenghts of the two lists in balance
-}

check :: Int -> [a] -> Int -> [a] -> Q2 b -> (Q1 a, [b], [b])

check lenf f lenr r q2 =
  if lenf > balanceConstant * lenr + 1 then
    let
      len = lenf + lenr
      lenf' = len `div` 2
      lenr' = len - lenf'
      (f', rf') = splitAt lenf' f
      (r2, rf2) = lsplitAt lenr r2'
    in (Q1 lenf' f' lenr' (r ++ reverse rf')
       ,lappend lenf' f2' (lreverse (lenr'-lenr) rf2)
       ,r2)
   else
    (Q1 lenf f lenr r, f2', r2')
  where
  Q2 f2' r2' = q2
  balanceConstant = 3 :: Int


{-
Put an element in front of the first list.
Inversely, split second list into front element and tail queue.
-}
cons :: a -> Q1 a -> Q2 b -> (Q1 a, b, Q2 b)

cons x (Q1 lenf f lenr r) q2' = (q', head f2, Q2 (tail f2) r2)
  where
  (q', f2, r2) = check (lenf+1) (x:f) lenr r q2'


{-
Split first list into initial queue and rear element.
Inversely, add an element to rear of second list.
-}
rview :: Q1 a -> Q2 b -> b -> (Q1 a, a, Q2 b)

rview (Q1 _ (x:_) _ []) q2' y = (empty1, x, Q2 [y] [])
rview (Q1 _ [] _ []) _ _ = error "empty queue"
rview (Q1 lenf f lenr (x:r)) q2' y = (q', x, Q2 f2 (y:r2))
  where
  (q', f2, r2) = check lenf f (lenr-1) r q2'


{-
Split first list into first element and tail queue.
Inversely, add an element to the front of the second list.
-}
lview :: Q1 a -> b -> Q2 b -> (a, Q1 a, Q2 b)

lview q1 y q2 = (x, reverse1 q1', reverse2 q2')
  where
  (q1', x, q2') = rview (reverse1 q1) (reverse2 q2) y


{-
Some very lazy variants of standard list functions.
They get the length of the result or a list argument as first argument and 
thus are able to construct the list structure of the result without demanding
evaluation of any of its other arguments.
Demanding some list element of the result will naturally lead to 
more demand of the arguments.
-}


{-
The first argument gives the length of the argument/result list.
-}
lreverse :: Int -> [a] -> [a]

lreverse n xs = lreverseAcc n xs []
  where
  lreverseAcc 0 _ acc = acc
  lreverseAcc n xs acc = lreverseAcc (n-1) ys (y:acc)
    where
    y:ys = xs

{-
The first argument gives the length of the first list argument.
-}
lappend :: Int -> [a] -> [a] -> [a]

lappend 0 _ zs = zs
lappend n xs zs = y : lappend (n-1) ys zs
  where
  y:ys = xs


{-
The first argument gives the position at which the input list shall be split.
The list must be at least that long.
-}
lsplitAt :: Int -> [a] -> ([a], [a])
 
lsplitAt 0 xs = ([],xs)
lsplitAt n ys = (x:xs',xs'') 
  where 
  x:xs = ys
  (xs',xs'') = lsplitAt (n-1) xs

{- End PrettyLib ========================================================== -}