> {-# LANGUAGE OverloadedStrings #-}
> import Data.Char (toLower)
> import Data.Maybe (maybe,maybeToList)
> import Data.Text.Lazy (Text)
> import qualified Data.Text.Lazy as T
> import qualified Data.Text.Lazy.IO as T
> import System.Environment (getArgs,getProgName)
> import System.Exit (exitWith,ExitCode(..))
> import System.IO (hPutStrLn,stderr)
> import System.Console.GetOpt (OptDescr(..),ArgDescr(..),ArgOrder(..),usageInfo,getOpt)


What are literate programs?
===========================

There are several styles of literate programming. Most commonly,
these are: LaTeX-style code tags, Bird tags and Markdown fenced code
blocks.

Each of these styles is characterised by its own set of delimiters:

> data Delim = BeginCode  | EndCode
>            | BirdTag
>            | TildeFence | BacktickFence
>            deriving (Eq)

> instance Show Delim where
>   show BeginCode     = "\\begin{code}"
>   show EndCode       = "\\end{code}"
>   show BirdTag       = ">"
>   show TildeFence    = "~~~"
>   show BacktickFence = "```"

In LaTeX-style, a codeblock is delimited by `\begin{code}` and
`\end{code}` tags.

> beginCode, endCode :: Text
> beginCode = "\\begin{code}"
> endCode   = "\\end{code}"
>
> isBeginCode, isEndCode :: Text -> Bool
> isBeginCode l = beginCode `T.isPrefixOf` l
> isEndCode   l = endCode   `T.isPrefixOf` l


In Bird-style, every line in a codeblock must start with a Bird tag.
A tagged line is defined as *either* a line containing solely the
symbol '>', or a line starting with the symbol '>' followed by at
least one space.

> isBirdTag :: Text -> Bool
> isBirdTag l = (l == ">") || ("> " `T.isPrefixOf` l)

Due to this definition, whenever we strip a bird tag, we also remove
a the first space following it.

> stripBirdTag :: Text -> Text
> stripBirdTag l
>   | l == ">" = ""
>   | otherwise = T.drop 2 l


Lastly, Markdown supports two styles of fenced codeblocks: using
tildes or using backticks.

> tildeFence, backtickFence :: Text
> tildeFence    = "~~~"
> backtickFence = "```"
>
> isTildeFence, isBacktickFence :: Text -> Bool
> isTildeFence    l = tildeFence    `T.isPrefixOf` l
> isBacktickFence l = backtickFence `T.isPrefixOf` l

These two fences have support for adding metadata, in the form of a
CSS-style dictionary (`{#mycode .haskell .numberLines startFrom=100}`)
for long fences or a list of classes for short fences.[^fenced-code-attributes]


In general, we will also need a function that checks, for a given
line, whether it conforms to *any* of the styles.

> isDelim :: Text -> Maybe Delim
> isDelim l
>   | isBeginCode     l = Just BeginCode
>   | isEndCode       l = Just EndCode
>   | isBirdTag       l = Just BirdTag
>   | isTildeFence    l = Just TildeFence
>   | isBacktickFence l = Just BacktickFence
>   | otherwise         = Nothing

And, for the styles which use opening and closing brackets, we will
need a function that checks if these pairs match.

> match :: Delim -> Delim -> Bool
> match BeginCode     EndCode       = True
> match TildeFence    TildeFence    = True
> match BacktickFence BacktickFence = True
> match _             _             = False

Note that Bird-tags are notably absent from the `match` function, as
they are a special case.


What do we want `unlit` to do?
==============================

The `unlit` program that we will implement below will do the following:
it will read a literate program from the standard input---allowing one
or more styles of code block---and emit only the code to the standard
output.

The options for source styles are as follows:

> data Name  = LaTeX | Bird | Markdown deriving (Show)
> data Style = Style { name :: Name, allowed :: [Delim] }
>
> latex, bird, markdown :: Style
> latex    = Style LaTeX    [BeginCode, EndCode]
> bird     = Style Bird     [BirdTag]
> markdown = Style Markdown [BirdTag, TildeFence, BacktickFence]

Additionally, when the source style is set to `Nothing`, the program
will guess the style based on the first delimiter it encounters,
always guessing the most permissive style---i.e. when it encounters a
Bird-tag it will assume that it is dealing with a Markdown-style
literate file and also allow fenced code blocks.

> infer :: Maybe Delim -> Maybe Style -> Maybe Style
> infer  Nothing         Nothing  = Nothing
> infer (Just BeginCode) Nothing  = Just latex
> infer (Just _)         Nothing  = Just markdown
> infer  Nothing        (Just ss) = Just ss
> infer (Just del)      (Just ss) = Just (check del ss)

> check :: Delim -> Style -> Style
> check del ss
>   | del `elem` allowed ss = ss
>   | otherwise = error ("delimiter " ++ show del ++ " disallowed in " ++ show (name ss))

Thus, the `unlit` function will have two parameters: its source style
and the text to convert.

> unlit :: Maybe Style -> [(Int, Text)] -> [Text]
> unlit ss = unlit' ss Nothing

However, the helper function `unlit'` is best thought of as a finite
state automaton, where the states are used to remember the what kind
of code block (if any) the automaton currently is in.

> type State = Maybe Delim

> unlit' :: Maybe Style -> State -> [(Int, Text)] -> [Text]
> unlit' _ _ [] = []
> unlit' ss q ((n, l):ls) = case (q, q') of
>
>   (Nothing      , Nothing)      -> continue
>   (Nothing      , Just BirdTag) -> blockOpen     $ Just (stripBirdTag l)
>   (Just BirdTag , Just BirdTag) -> blockContinue $ stripBirdTag l
>   (Just BirdTag , Nothing)      -> blockClose
>   (Nothing      , Just EndCode) -> spurious EndCode
>   (Nothing      , Just o)       -> blockOpen     $ Nothing
>   (Just o       , Nothing)      -> blockContinue $ l
>   (Just o       , Just c)       -> if o `match` c then blockClose else spurious c
>
>   where
>     q'              = isDelim l
>     continueWith  q = unlit' (infer q' ss) q ls
>     continue        = continueWith q
>     blockOpen     l = maybeToList l ++ continueWith q'
>     blockContinue l = l : continue
>     blockClose      = T.empty : continueWith Nothing
>     spurious      q = error ("at line " ++ show n ++ ": spurious " ++ show q)


What do we want `relit` to do?
==============================

Sadly, no, `relit` won't be able to take source code and
automatically convert it to literate code. I'm not quite up to the
challenge of automatically generating meaningful documentation from
arbitrary code... I wish I was.

What `relit` will do is read a literate file using one style of
delimiters and emit the same file using an other style of delimiters.

> relit :: Maybe Style -> Name -> [(Int, Text)] -> [Text]
> relit ss ts = relit' ss ts Nothing

Again, we will interpret the helper function `relit'` as an
automaton, which remembers the current state. However, we now also
need a function which can emit code blocks in a certain style. For
this purpose we will define a triple of functions.

> emitBirdTag :: Text -> Text
> emitBirdTag l = "> " `T.append` l
>
> emitOpen  :: Name -> Maybe Text -> [Text]
> emitOpen  LaTeX    l = beginCode : maybeToList l
> emitOpen  Bird     l = T.empty : map emitBirdTag (maybeToList l)
> emitOpen  Markdown l = backtickFence : maybeToList l
>
> emitCode  :: Name -> Text -> Text
> emitCode  Bird     l = emitBirdTag l
> emitCode  _        l = l
>
> emitClose :: Name -> Text
> emitClose LaTeX      = endCode
> emitClose Bird       = T.empty
> emitClose Markdown   = backtickFence

Using these simple functions we can easily define the `relit'`
function.

> relit' :: Maybe Style -> Name -> State -> [(Int, Text)] -> [Text]
> relit' _ _ _ [] = []
> relit' ss ts q ((n, l):ls) = case (q, q') of
>
>   (Nothing      , Nothing)      -> l : continue
>   (Nothing      , Just BirdTag) -> blockOpen     $ Just (stripBirdTag l)
>   (Just BirdTag , Just BirdTag) -> blockContinue $ stripBirdTag l
>   (Just BirdTag , Nothing)      -> blockClose
>   (Nothing      , Just EndCode) -> spurious EndCode
>   (Nothing      , Just o)       -> blockOpen     $ Nothing
>   (Just o       , Nothing)      -> blockContinue $ l
>   (Just o       , Just c)       -> if o `match` c then blockClose else spurious c
>
>   where
>     q'              = isDelim l
>     continueWith  q = relit' (infer q' ss) ts q ls
>     continue        = continueWith q
>     blockOpen     l = emitOpen  ts l ++ continueWith q'
>     blockContinue l = emitCode  ts l : continue
>     blockClose      = emitClose ts   : continueWith Nothing
>     spurious      q = error ("at line " ++ show n ++ ": spurious " ++ show q)


Implementing the `main` function
================================

All that remains now is to implement the `main` function. This is
grossly uninteresting, so go look elsewhere.

> str2name :: String -> Maybe Name
> str2name arg = case map toLower arg of
>   "latex"    -> Just LaTeX
>   "bird"     -> Just Bird
>   "markdown" -> Just Markdown
>   "code"     -> Nothing
>   _          -> error ("non-existent style " ++ arg)
>
> name2style LaTeX    = latex
> name2style Bird     = bird
> name2style Markdown = markdown
>
> data Options = Options
>   { optSourceStyle :: Maybe Style
>   , optTargetStyle :: Maybe Name
>   }
>
> defaultOptions :: Options
> defaultOptions = Options Nothing Nothing
>
> options :: [ OptDescr (Options -> IO Options) ]
> options =
>   [ Option "s" ["source"]
>     (ReqArg (\arg opt -> return opt { optSourceStyle = fmap name2style (str2name arg) })
>             "STYLE_NAME")
>     "Source style (latex, bird, markdown)"
>   , Option "t" ["target"]
>     (ReqArg (\arg opt -> return opt { optTargetStyle = str2name arg })
>             "STYLE_NAME")
>     "Target style (latex, bird, markdown)"
>   , Option "h" ["help"]
>     (NoArg  (\_ -> do
>     	        prg <- getProgName
>               hPutStrLn stderr (usageInfo prg options)
>               exitWith ExitSuccess))
>     "Show help"
>   ]
>
> main :: IO ()
> main = do
>   args <- getArgs
>
>   -- parse options
>   let (actions, nonOptions, errors) = getOpt Permute options args
>   opts <- foldl (>>=) (return defaultOptions) actions
>   let Options { optSourceStyle = ss
>               , optTargetStyle = ts } = opts
>
>   -- define unlit/relit
>   let run = maybe (unlit ss) (relit ss) ts
>
>   -- run unlit/relit
>   T.getContents >>= sequence_ . fmap T.putStrLn . run . zip [1..] . T.lines


[^fenced-code-attributes]: http://johnmacfarlane.net/pandoc/demo/example9/pandocs-markdown.html#extension-fenced_code_attributes