%-------------------------------=  --------------------------------------------
\subsection{Poly formatter}
%-------------------------------=  --------------------------------------------

ks, 28.07.2003: This is a new style that is based on the old @math@-style
and is intended to replace @math@ style in a future version. Because the
former @math@ style should remain compatible, I've copied the entire module.
Essentially, there are the same functions here doing the same job, but there
are subtle differences, and they will grow over time \dots

%if codeOnly || showModuleHeader

> module MathPoly               (  module MathPoly, substitute, number  )
> where
>
> import Prelude hiding         (  lines )
> import Data.List              (  partition, nub, sort, transpose )
> import Control.Applicative
> import Control.Arrow          (  (>>>) )
> import Control.Monad          (  (>=>) )
>
> import Verbatim               (  expand, trim )
> import MathCommon
> import Document
> import Directives
> import HsLexer
> import Parser
> import Auxiliaries
> import TeXCommands            (  Lang(..)  )
> -- import Debug.Trace ( trace )

%endif

% - - - - - - - - - - - - - - - = - - - - - - - - - - - - - - - - - - - - - - -
\subsubsection{Inline and display code}
% - - - - - - - - - - - - - - - = - - - - - - - - - - - - - - - - - - - - - - -

> inline                        :: Lang -> Formats -> Bool -> String -> Either Exc Doc
> inline lang fmts auto         =   fmap unNL
>                               >>> tokenize lang
>                               >=> lift (number 1 1)
>                               >=> when auto (lift (filter (isNotSpace . token)))
>                               >=> lift (partition (\t -> catCode t /= White))
>                               >=> exprParse *** return
>                               >=> lift (substitute fmts auto) *** return
>                               >=> lift (uncurry merge)
>                               >=> lift (fmap token)
>                               >=> when auto (lift addSpaces)
>                               >=> lift (latexs fmts)
>                               >=> lift sub'inline

> display                       :: Lang -> Int -> Formats -> Bool -> Int -> Int -> Stack
>                               -> String -> Either Exc (Doc, Stack)
> display lang line fmts auto sep lat _stack
>                               =   lift trim
>                               >=> lift (expand 0)
>                               >=> tokenize lang
>                               >=> lift (number line 1)
>       --                     |>=> when auto (lift (filter (isNotSpace . token)))|
>                               >=> lift (partition (\t -> catCode t /= White))
>                               >=> exprParse *** return
>                               >=> lift (substitute fmts auto) *** return
>                               >=> lift (uncurry merge)
>                               >=> lift lines
>                               >=> when auto (lift (fmap addSpaces))
>                               >=> lift (\ts -> (autoalign sep ts,ts))
>       --                     |>=> lift (\(x,y) -> trace ((unlines $ map show $ y) ++ "\n" ++ show x) (x,y))|
>                               >=> lift (\(cs,ts) -> let ats = align cs sep lat ts
>                                                         cs' = [("B",0)] ++ cs
>                                                            ++ [("E",error "E column")]
>                                                     in  (autocols cs' ats,ats)
>                                        )
>                               >=> return *** when auto (lift (fmap (fmap (filter (isNotSpace . token)))))
>       --                     |>=> return *** when auto (lift (fmap (fmap (addSpaces . filter (isNotSpace . token)))))|
>                               >=> lift (\((cs,z),ats) -> (cs,(z,ats)))
>                               >=> return *** lift (\(z,ats) -> leftIndent fmts auto z [] ats)
>       -- ks, 17.07.2003: i've changed "stack" into "[]" and thereby disabled
>       -- the global stack for now as it leads to unexepected behaviour
>                               >=> lift (\(cs,(d,stack)) -> (sub'code (columns cs <<>> d),stack))
>
> columns                       :: [(String,Doc)] -> Doc
> columns                       =  foldr (<<>>) Empty
>                               .  map (uncurry sub'column)

% - - - - - - - - - - - - - - - = - - - - - - - - - - - - - - - - - - - - - - -
\subsubsection{A very simple Haskell Parser}
% - - - - - - - - - - - - - - - = - - - - - - - - - - - - - - - - - - - - - - -

The parser is based on the Smugweb parser.
This variant can handle unbalanced parentheses in some cases (see below).

> exprParse                     :: (CToken tok, Show tok) => [Pos tok] -> Either Exc (Chunk (Pos tok))
> exprParse s                   =  case run (chunk 0) s of
>     Nothing                   -> Left ("syntax error", show s) -- HACK: |show s|
>     Just e                    -> Right e
>
> chunk                         :: (CToken tok) => Int -> Parser (Pos tok) (Chunk (Pos tok))
> chunk d                       =  do a <- many (atom d)
>                                     as <- many (do s <- csep; a' <- many (atom d); return (Delim s : offside a'))
>                                     return (offside a ++ concat as)
>     where offside []          =  []
>           -- old: |opt a =  [Apply a]|
>           offside (a : as)    =  Apply (a : bs) : offside cs
>               where (bs, cs)  =  span (\a' -> col' a < col' a') as
>           col' (Atom a)       =  poscol a
>           col' (Paren a _ _)  =  poscol a
>
> atom                          :: (CToken tok) => Int -> Parser (Pos tok) (Atom (Pos tok))
> atom d                        =   fmap Atom cnoSep
>                               <|> do l <- left
>                                      e <- chunk (d+1)
>                                      r <- right l
>                                      return (Paren l e r)
>                               <|> if d == 0 then do r <- anyright
>                                                     return (Paren (fromToken $ TeX False Empty) [] r)
>                                             else empty

ks, 09.09.2003: Added handling of unbalanced parentheses, surely not in the
most elegant way. Both |chunk| and |atom| now take an integer argument
indicating the nesting level. Only on the top-level unbalanced right
parentheses are accepted. The end of file (end of code block) can be
parsed as an arbitrary amount of right parentheses.

Primitive parser.

> csep, cnoSep, left, anyright  :: (CToken tok) => Parser tok tok
> csep                          =  satisfy (\t -> catCode t == Sep)
> cnoSep                        =  satisfy (\t -> catCode t == NoSep)
> left                          =  satisfy (\t -> case catCode t of Del c -> c `elem` "([{"; _ -> False)
> anyright                      =  satisfy (\t -> case catCode t of Del c -> c `elem` ")]}"; _ -> False)
>
> right                         :: (CToken tok) => tok -> Parser tok tok
> right l                       =   satisfy (\c' -> case (catCode l, catCode c') of
>                                     (Del o, Del c) -> (o,c) `elem` zip "([{" ")]}"
>                                     _     -> False)
>                               <|> do eof
>                                      return (fromToken $ TeX False Empty)

ks, 06.09.2003: Modified the |right| parser to accept the end of file,
to allow for unbalanced parentheses. This behaviour is not (yet) backported
to |math| style. Also added |anyright|.

% - - - - - - - - - - - - - - - = - - - - - - - - - - - - - - - - - - - - - - -
\subsubsection{Internal alignment}
% - - - - - - - - - - - - - - - = - - - - - - - - - - - - - - - - - - - - - - -

> data Line a                   =  Blank
>                               |  Poly  [((String,Int),a,Bool)]
>
> autoalign                     :: (Show tok,CToken tok) => Int              -- "Trennung"
>                                               -> [[Pos tok]]      -- positionierte tokens per Zeile
>                                               -> [(String,Int)]   -- alignment-info (Name, Spalte)
> autoalign sep toks            =  map (\x -> (show x,x))
>                               .  nub
>                               .  sort
>                               .  concat
>                               .  fmap findCols
>                               $  toks
>   where
>   findCols                    :: (CToken tok,Show tok) => [Pos tok] -> [Col]
>   findCols ts                 =  case {- |trace (show ts)| -}
>                                       (break (\t -> not . isNotSpace . token $ t) ts) of
>       (_, [])                 -> []   -- done
>       (_, [_v])               -> []   -- last token is whitespace, doesn't matter
>       (_, v:v':vs)
>         | posrow v' == 0 && poscol v' == 0
>                               -> findCols (v:vs)  -- skip internal tokens (automatically added spaces)
>         | length (string (token v)) >= sep
>                               -> {- |trace ("found: " ++ show (col v')) $| -} poscol v' : findCols (v':vs)
>         | otherwise           -> {- |trace ("found too short")|            -} findCols (v':vs)

ks, 21.11.2005: I've fixed a bug that was known to me since long ago, but I never got
around to investigate. When a parametrized formatting directive directly precedes a
token that should be aligned, then sometimes that token was not aligned. The reason
was that in |findCols| above, the recursive calls used |vs| instead of |(v':vs)|.

> align                         :: (CToken tok) => [(String,Int)]   -- alignment-info (Name, Spalte)
>                                               -> Int              -- "Trennung"
>                                               -> Int              -- "Traegheit"
>                                               -> [[Pos tok]]      -- positionierte tokens per Zeile
>                                               -> [Line [Pos tok]]
> align cs sep lat toks         =  fmap (\t -> {- |trace (show (map token t) ++ "\n") $| -}
>                                              let res = splitn ("B",0) False cs t
>                                              in  if null [x | x <- t
>                                                          , (posrow x /= 0 || poscol x /= 0) && isNotSpace (token x)]
>                                                      || null res
>                                              then Blank
>                                              else Poly res
>                                       ) toks
>   where
>   splitn _cc _ind [] []       =  []
>   splitn  cc  ind [] ts       =  [(cc,ts,ind)]
>   splitn  cc  ind ((n,i):oas) ts=
>     case span (\t -> poscol t < i) ts of
>       ([], vs)                -> splitn cc ind oas vs
>       (us, [])                -> [(cc,us,ind)]
>       (us, (v:vs))            ->
>         let lu = head [ u | u <- reverse us, poscol u /= 0 || posrow u /= 0 ]
>                                  -- again, we skip automatically added spaces
>             llu = length (string (token lu))
>         in case () of
>             _ | (lat /= 0 && isNotSpace (token lu)) || llu < lat || poscol v /= i
>                                  -- no alignment for this column
>                               -> splitn cc ind oas (us ++ (v:vs))
>               | not (isNotSpace (token lu)) && llu >= sep
>                               -> (cc,us,ind) : splitn (n,i) True oas (v:vs)
>               | otherwise
>                               -> (cc,us,ind) : splitn (n,i) False oas (v:vs)

The function |isInternal| returns |True| iff the argument is a symbol
or a special internal symbol. See @HsLexer@ for the list of special
symbols.

> isInternal                    :: (CToken tok) => tok -> Bool
> isInternal t                  =  case token t of
>     Consym _                  -> True
>     Varsym _                  -> True
>     Special _                 -> True
>     _                         -> False
>
> instance Functor Line where
>     fmap _f Blank             =  Blank
>     fmap  f (Poly ls)         =  Poly (map (\(x,y,z) -> (x,f y,z)) ls)

% - - - - - - - - - - - - - - - = - - - - - - - - - - - - - - - - - - - - - - -
\subsubsection{Automatically determining centered columns}
% - - - - - - - - - - - - - - - = - - - - - - - - - - - - - - - - - - - - - - -

We use a simple heuristic: a column that contains only single tokens and
at least one ``internal'' token is centered. For centered columns, we create
an additional ``end'' column to make sure that all entries are centered on the
same amount of space.

> autocols                      :: (CToken tok, Show tok) => [(String,Int)]   -- column info
>                                               -> [Line [Pos tok]] -- aligned tokens
>                                               -> ([(String,Doc)],[Col]) -- cols+alignment, plus centered columns
> autocols cs ats               = (\(x,y) -> (concat x,concat y)) $ unzip
>                               $ zipWith3 (\(cn,n) ml ai ->
>                                              if ml <= 2 && ai then ([(cn,sub'centered)
>                                                                     ,(cn ++ "E",sub'dummycol)
>                                                                     ],[n])
>                                                               else ([(cn,sub'left)],[])
>                                          ) cs maxlengths anyinternals
>                                 -- length 2, because space tokens are always there
>     where
>     cts                       = transpose (concatMap (deline cs) ats)
>     maxlengths                = {- |trace (show cts) $ |-} map (maximum . map length) cts
>     anyinternals              = map (any (any isInternal)) cts
>
>     deline                    :: [(String,Int)] -> Line [a] -> [[[a]]]
>     deline _cs' Blank         = []
>     deline  cs' (Poly ls)     = [decol cs' ls]
>
>     decol                     :: [(String, Int)] -> [((String, Int), [a], Bool)] -> [[a]]
>     decol cs' []              = replicate (length cs') []
>     decol ((cn,_):cs') r@(((cn',_),ts,_):rs)
>       | cn' == cn             = ts : decol cs' rs
>       | otherwise             = [] : decol cs' r
>     decol _ _                 = impossible "autocols.decol"

% - - - - - - - - - - - - - - - = - - - - - - - - - - - - - - - - - - - - - - -
\subsubsection{Adding spaces}
% - - - - - - - - - - - - - - - = - - - - - - - - - - - - - - - - - - - - - - -

Inserting spaces before and after keywords. We use a simple finite
automata with three states: |before b| means before a keyword, |b|
indicates whether to insert a space or not; |after| means immediately
after a keyword (hence |before b| really means not immediately after).

> addSpaces                     :: (CToken tok) => [tok] -> [tok]
> addSpaces ts0                 =  before False ts0
>     where
>     before _b []              =  []
>     before  b (t : ts)        =  case token t of
>         u | not (isNotSpace u)-> t : before b ts
>           | selfSpacing u     -> t : before False ts
>         Special c
>           | c `elem` ",;([{"  -> t : before False ts
>         Keyword _             -> [ fromToken (TeX False sub'space) | b ] ++ t : after ts
>         _                     -> t : before True ts
>
>     after []                  =  []
>     after (t : ts)            =  case token t of
>         u | not (isNotSpace u)-> t : after ts
>           | selfSpacing u     -> t : before False ts
>         Special c
>           | c `elem` ",;([{"  -> fromToken (TeX False sub'space) : t : before False ts
>         Keyword _             -> fromToken (TeX False sub'space) : t : after ts
>         _                     -> fromToken (TeX False sub'space) : t : before True ts

Operators are `self spacing'.

> selfSpacing                   :: Token -> Bool
> selfSpacing (Consym _)        =  True
> selfSpacing (Varsym _)        =  True
> selfSpacing (Op _)            =  True
> -- |selfSpacing (TeX _) =  True|
> selfSpacing _                 =  False

\NB It's not a good idea to regard inline \TeX\ as self spacing -- consider,
for example, a macro like @%format mu = "\mu "@.

% - - - - - - - - - - - - - - - = - - - - - - - - - - - - - - - - - - - - - - -
\subsubsection{Left indentation}
% - - - - - - - - - - - - - - - = - - - - - - - - - - - - - - - - - - - - - - -

ks, 16.07.2003: I don't quite understand the meaning of |auto|. Even
if |auto = False|, the stack is still updated.

ks, 16.07.2003: I'm going to implement the following relatively simple
heuristic for indentation. Based on my current experience with the
@poly@-style I think that this is sufficient. If not, one can still
indent explicitly via annotations.

> type Stack                    =  [(Col, Line [Pos Token])]

The stack is a list of pairs of column numbers and tokens. The head
of the list is the largest column number, and the column numbers in
the list appear sorted and descending.

Indentations occur at the beginning of a line, and the position
of the first token of the line is relevant. First, the stack is
adjusted: all elements that have a higher or equal column number
than the current line are removed.

In the now topmost stack element, we then look for the final token
that occurs at a column less than or equal to the current element.
Relative to this token, we indent. Note: this happens in \emph{all}
situations, currently. Perhaps, there are a few situations where
this is not a good idea, but let's see.

As a final step, the current line is placed on the stack.

> leftIndent                    :: Formats -> Bool
>                               -> [Col]        -- centered columns
>                               -> Stack        -- current stack
>                               -> [Line [Pos Token]]
>                               -> (Doc, Stack)
> leftIndent dict _auto z stack0
>                               =  loop True stack0
>   where
>   loop                        :: Bool -> Stack -> [Line [Pos Token]] -> (Doc, Stack)
>   loop _first stack []        =  (Empty, stack)  -- done
>   loop  first stack (l:ls)    =  case l of
>       Blank                   -> loop True stack ls -- ignore blank lines
>    {-| Poly x || trace (show x) False -> undefined |-}
>       Poly []                 -> loop True stack ls -- next line
>       Poly (((_n,_c),[],_ind):rs)
>         | first               -> loop True stack (Poly rs:ls) -- ignore leading blank columns
>       Poly p@(((n,c),ts,_ind):rs)
>         | first               -> -- check indentation
>                                  let -- step 1: shrink stack
>                                      rstack  = dropWhile (\(rc',_) -> rc' >= c) stack
>                                      -- step 2: find relevant column
>                                      (rn,rc) = findrel (n,c) rstack
>                                      -- step 3: place line on stack
>                                      fstack  = (c,l) : rstack
>                                  in mkFromTo fstack rn n rc [fromToken $ TeX False (indent (rn,rc) (n,c))] p ls
>
>
>         | c `elem` z          -> mkFromTo stack n (n ++ "E") c ts rs ls
>                                                     -- treat centered lines special
>       Poly [((n,c),ts,_ind)]  -> mkFromTo stack n "E" c ts [] ls
>                                                     -- last columns
>       Poly (((n,c),ts,_ind):rs@(((nn,_),_,_):_))
>                               -> mkFromTo stack n nn  c ts rs ls
>
>   mkFromTo                    :: Stack -> String -> String -> Col -> [Pos Token] -> [((String, Int), [Pos Token], Bool)] -> [Line [Pos Token]] -> (Doc, Stack)
>   mkFromTo stack bn en _c ts rs ls
>     | bn == en                =  -- this can happen at the beginning of a line due to indentation
>                                  (rest,stack')
>     | otherwise               =  (sub'fromto bn en (latexs dict ts)
>                                     <<>> (if null rs then sep ls else Empty) <<>> rest
>                                  ,stack'
>                                  )
>     where
>       (rest,stack')           =  loop False  -- not first of a line
>                                       stack
>                                       (Poly rs : ls)
>
>
>   findrel                     :: (String,Col) -> Stack -> (String,Col)
>   findrel (n,c) []            =  (n,c)
>   findrel (n,c) ((_,Blank):r) =  findrel (n,c) r  -- should never happen
>   findrel (_n,c) ((_,Poly t):_)
>                               =  case break (\((_n',c'),_,_) -> c' > c) t of
>                                    ([],_)     -> error "findrel: the impossible happened"
>                                    (pre,_)    -> let ((rn,rc),_,_) = last pre
>                                                  in  (rn,rc)
>
>   sep []                      =  Empty
>   sep (Blank : _ )            =  sub'blankline
>   sep (_ : _)                 =  sub'nl
>
>   indent                      :: (String,Int) -> (String,Int) -> Doc
>   indent (_n,c) (_n',c')
>     | c /= c'                 =  sub'indent (Text (show (c' - c)))
>     | otherwise               =  Empty

%
\begin{verbatim}
where |a      =    where |Str c =    [    [    (    {
      |(b, c) =          |c@(..)=    ,    |    ,    ;
                                     ]    ]    )    }
\end{verbatim}