module Text.Pandoc.Readers.Textile ( readTextile) where
import Text.Pandoc.Definition
import Text.Pandoc.Builder (Inlines, Blocks, trimInlines)
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Options
import Text.Pandoc.Parsing
import Text.Pandoc.Readers.HTML ( htmlTag, isBlockTag )
import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock )
import Text.HTML.TagSoup (parseTags, innerText, fromAttrib, Tag(..))
import Text.HTML.TagSoup.Match
import Data.List ( intercalate )
import Data.Char ( digitToInt, isUpper)
import Control.Monad ( guard, liftM, when )
import Text.Printf
import Control.Applicative ((<$>), (*>), (<*), (<$))
import Data.Monoid
import Debug.Trace (trace)
readTextile :: ReaderOptions 
            -> String       
            -> Pandoc
readTextile opts s =
  (readWith parseTextile) def{ stateOptions = opts } (s ++ "\n\n")
parseTextile :: Parser [Char] ParserState Pandoc
parseTextile = do
  
  oldOpts <- stateOptions `fmap` getState
  updateState $ \state -> state{ stateOptions =
                                   oldOpts{ readerSmart = True
                                          , readerParseRaw = True
                                          , readerOldDashes = True
                                          } }
  many blankline
  startPos <- getPosition
  
  
  let firstPassParser = noteBlock <|> lineClump
  manyTill firstPassParser eof >>= setInput . concat
  setPosition startPos
  st' <- getState
  let reversedNotes = stateNotes st'
  updateState $ \s -> s { stateNotes = reverse reversedNotes }
  
  blocks <- parseBlocks
  return $ Pandoc nullMeta (B.toList blocks) 
noteMarker :: Parser [Char] ParserState [Char]
noteMarker = skipMany spaceChar >> string "fn" >> manyTill digit (char '.')
noteBlock :: Parser [Char] ParserState [Char]
noteBlock = try $ do
  startPos <- getPosition
  ref <- noteMarker
  optional blankline
  contents <- liftM unlines $ many1Till anyLine (blanklines <|> noteBlock)
  endPos <- getPosition
  let newnote = (ref, contents ++ "\n")
  st <- getState
  let oldnotes = stateNotes st
  updateState $ \s -> s { stateNotes = newnote : oldnotes }
  
  return $ replicate (sourceLine endPos  sourceLine startPos) '\n'
parseBlocks :: Parser [Char] ParserState Blocks
parseBlocks = mconcat <$> manyTill block eof
blockParsers :: [Parser [Char] ParserState Blocks]
blockParsers = [ codeBlock
               , header
               , blockQuote
               , hrule
               , commentBlock
               , anyList
               , rawHtmlBlock
               , rawLaTeXBlock'
               , maybeExplicitBlock "table" table
               , maybeExplicitBlock "p" para
               , mempty <$ blanklines
               ]
block :: Parser [Char] ParserState Blocks
block = do
  res <- choice blockParsers <?> "block"
  pos <- getPosition
  tr <- getOption readerTrace
  when tr $
    trace (printf "line %d: %s" (sourceLine pos)
           (take 60 $ show $ B.toList res)) (return ())
  return res
commentBlock :: Parser [Char] ParserState Blocks
commentBlock = try $ do
  string "###."
  manyTill anyLine blanklines
  return mempty
codeBlock :: Parser [Char] ParserState Blocks
codeBlock = codeBlockBc <|> codeBlockPre
codeBlockBc :: Parser [Char] ParserState Blocks
codeBlockBc = try $ do
  string "bc. "
  contents <- manyTill anyLine blanklines
  return $ B.codeBlock (unlines contents)
codeBlockPre :: Parser [Char] ParserState Blocks
codeBlockPre = try $ do
  (t@(TagOpen _ attrs),_) <- htmlTag (tagOpen (=="pre") (const True))
  result' <- (innerText . parseTags) `fmap` 
               manyTill anyChar (htmlTag (tagClose (=="pre")))
  optional blanklines
  
  let result'' = case result' of
                      '\n':xs -> xs
                      _       -> result'
  
  let result''' = case reverse result'' of
                       '\n':_ -> init result''
                       _      -> result''
  let classes = words $ fromAttrib "class" t
  let ident = fromAttrib "id" t
  let kvs = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
  return $ B.codeBlockWith (ident,classes,kvs) result'''
header :: Parser [Char] ParserState Blocks
header = try $ do
  char 'h'
  level <- digitToInt <$> oneOf "123456"
  attr <- attributes
  char '.'
  lookAhead whitespace
  name <- trimInlines . mconcat <$> many inline
  attr' <- registerHeader attr name
  return $ B.headerWith attr' level name
blockQuote :: Parser [Char] ParserState Blocks
blockQuote = try $ do
  string "bq" >> attributes >> char '.' >> whitespace
  B.blockQuote <$> para
hrule :: Parser [Char] st Blocks
hrule = try $ do
  skipSpaces
  start <- oneOf "-*"
  count 2 (skipSpaces >> char start)
  skipMany (spaceChar <|> char start)
  newline
  optional blanklines
  return B.horizontalRule
anyList :: Parser [Char] ParserState Blocks
anyList = try $ anyListAtDepth 1 <* blanklines
anyListAtDepth :: Int -> Parser [Char] ParserState Blocks
anyListAtDepth depth = choice [ bulletListAtDepth depth,
                                orderedListAtDepth depth,
                                definitionList ]
bulletListAtDepth :: Int -> Parser [Char] ParserState Blocks
bulletListAtDepth depth = try $ B.bulletList  <$> many1 (bulletListItemAtDepth depth)
bulletListItemAtDepth :: Int -> Parser [Char] ParserState Blocks
bulletListItemAtDepth = genericListItemAtDepth '*'
orderedListAtDepth :: Int -> Parser [Char] ParserState Blocks
orderedListAtDepth depth = try $ do
  items <- many1 (orderedListItemAtDepth depth)
  return $ B.orderedList items
orderedListItemAtDepth :: Int -> Parser [Char] ParserState Blocks
orderedListItemAtDepth = genericListItemAtDepth '#'
genericListItemAtDepth :: Char -> Int -> Parser [Char] ParserState Blocks
genericListItemAtDepth c depth = try $ do
  count depth (char c) >> attributes >> whitespace
  p <- mconcat <$> many listInline
  newline
  sublist <- option mempty (anyListAtDepth (depth + 1))
  return $ (B.plain p) <> sublist
definitionList :: Parser [Char] ParserState Blocks
definitionList = try $ B.definitionList <$> many1 definitionListItem
listStart :: Parser [Char] ParserState ()
listStart = genericListStart '*'
        <|> () <$ genericListStart '#'
        <|> () <$ definitionListStart
genericListStart :: Char -> Parser [Char] st ()
genericListStart c = () <$ try (many1 (char c) >> whitespace)
definitionListStart :: Parser [Char] ParserState Inlines
definitionListStart = try $ do
  char '-'
  whitespace
  trimInlines . mconcat <$>
    many1Till inline (try (string ":=")) <* optional whitespace
listInline :: Parser [Char] ParserState Inlines
listInline = try (notFollowedBy newline >> inline)
         <|> try (endline <* notFollowedBy listStart)
definitionListItem :: Parser [Char] ParserState (Inlines, [Blocks])
definitionListItem = try $ do
  term <- definitionListStart
  def' <- multilineDef <|> inlineDef
  return (term, def')
  where inlineDef :: Parser [Char] ParserState [Blocks]
        inlineDef = liftM (\d -> [B.plain d])
                    $ optional whitespace >> (trimInlines . mconcat <$> many listInline) <* newline
        multilineDef :: Parser [Char] ParserState [Blocks]
        multilineDef = try $ do
          optional whitespace >> newline
          s <- many1Till anyChar (try (string "=:" >> newline))
          
          ds <- parseFromString parseBlocks (s ++ "\n\n")
          return [ds]
rawHtmlBlock :: Parser [Char] ParserState Blocks
rawHtmlBlock = try $ do
  skipMany spaceChar
  (_,b) <- htmlTag isBlockTag
  optional blanklines
  return $ B.rawBlock "html" b
rawLaTeXBlock' :: Parser [Char] ParserState Blocks
rawLaTeXBlock' = do
  guardEnabled Ext_raw_tex
  B.rawBlock "latex" <$> (rawLaTeXBlock <* spaces)
para :: Parser [Char] ParserState Blocks
para = B.para . trimInlines . mconcat <$> many1 inline
tableCell :: Parser [Char] ParserState Blocks
tableCell = do
  c <- many1 (noneOf "|\n")
  content <- trimInlines . mconcat <$> parseFromString (many1 inline) c
  return $ B.plain content
tableRow :: Parser [Char] ParserState [Blocks]
tableRow = try $ ( char '|' *>
  (endBy1 tableCell (optional blankline *> char '|')) <* newline)
tableRows :: Parser [Char] ParserState [[Blocks]]
tableRows = many1 tableRow
tableHeaders :: Parser [Char] ParserState [Blocks]
tableHeaders = let separator = (try $ string "|_.") in
  try $ ( separator *> (sepBy1 tableCell separator) <* char '|' <* newline )
table :: Parser [Char] ParserState Blocks
table = try $ do
  headers <- option mempty tableHeaders
  rows <- tableRows
  blanklines
  let nbOfCols = max (length headers) (length $ head rows)
  return $ B.table mempty
    (zip (replicate nbOfCols AlignDefault) (replicate nbOfCols 0.0))
    headers
    rows
maybeExplicitBlock :: String  
                    -> Parser [Char] ParserState Blocks 
                    -> Parser [Char] ParserState Blocks
maybeExplicitBlock name blk = try $ do
  optional $ try $ string name >> attributes >> char '.' >>
    optional whitespace >> optional endline
  blk
inline :: Parser [Char] ParserState Inlines
inline = do
    choice inlineParsers <?> "inline"
inlineParsers :: [Parser [Char] ParserState Inlines]
inlineParsers = [ str
                , whitespace
                , endline
                , code
                , escapedInline
                , inlineMarkup
                , groupedInlineMarkup
                , rawHtmlInline
                , rawLaTeXInline'
                , note
                , link
                , image
                , mark
                , (B.str . (:[])) <$> characterReference
                , smartPunctuation inline
                , symbol
                ]
inlineMarkup :: Parser [Char] ParserState Inlines
inlineMarkup = choice [ simpleInline (string "??") (B.cite [])
                      , simpleInline (string "**") B.strong
                      , simpleInline (string "__") B.emph
                      , simpleInline (char '*') B.strong
                      , simpleInline (char '_') B.emph
                      , simpleInline (char '+') B.emph  
                      , simpleInline (char '-' <* notFollowedBy (char '-')) B.strikeout
                      , simpleInline (char '^') B.superscript
                      , simpleInline (char '~') B.subscript
                      , simpleInline (char '%') id
                      ]
mark :: Parser [Char] st Inlines
mark = try $ char '(' >> (try tm <|> try reg <|> copy)
reg :: Parser [Char] st Inlines
reg = do
  oneOf "Rr"
  char ')'
  return $ B.str "\174"
tm :: Parser [Char] st Inlines
tm = do
  oneOf "Tt"
  oneOf "Mm"
  char ')'
  return $ B.str "\8482"
copy :: Parser [Char] st Inlines
copy = do
  oneOf "Cc"
  char ')'
  return $ B.str "\169"
note :: Parser [Char] ParserState Inlines
note = try $ do
  ref <- (char '[' *> many1 digit <* char ']')
  notes <- stateNotes <$> getState
  case lookup ref notes of
    Nothing   -> fail "note not found"
    Just raw  -> B.note <$> parseFromString parseBlocks raw
markupChars :: [Char]
markupChars = "\\*#_@~-+^|%=[]&"
stringBreakers :: [Char]
stringBreakers = " \t\n\r.,\"'?!;:<>«»„“”‚‘’()[]"
wordBoundaries :: [Char]
wordBoundaries = markupChars ++ stringBreakers
hyphenedWords :: Parser [Char] ParserState String
hyphenedWords = do
  x <- wordChunk
  xs <-  many (try $ char '-' >> wordChunk)
  return $ intercalate "-" (x:xs)
wordChunk :: Parser [Char] ParserState String
wordChunk = try $ do
  hd <- noneOf wordBoundaries
  tl <- many ( (noneOf wordBoundaries) <|>
               try (notFollowedBy' note *> oneOf markupChars
                     <* lookAhead (noneOf wordBoundaries) ) )
  return $ hd:tl
str :: Parser [Char] ParserState Inlines
str = do
  baseStr <- hyphenedWords
  
  
  fullStr <- option baseStr $ try $ do
    guard $ all isUpper baseStr
    acro <- enclosed (char '(') (char ')') anyChar'
    return $ concat [baseStr, " (", acro, ")"]
  updateLastStrPos
  return $ B.str fullStr
whitespace :: Parser [Char] st Inlines
whitespace = many1 spaceChar >> return B.space <?> "whitespace"
endline :: Parser [Char] ParserState Inlines
endline = try $ do
  newline
  notFollowedBy blankline
  notFollowedBy listStart
  notFollowedBy rawHtmlBlock
  return B.linebreak
rawHtmlInline :: Parser [Char] ParserState Inlines
rawHtmlInline = B.rawInline "html" . snd <$> htmlTag (const True)
rawLaTeXInline' :: Parser [Char] ParserState Inlines
rawLaTeXInline' = try $ do
  guardEnabled Ext_raw_tex
  B.singleton <$> rawLaTeXInline
link :: Parser [Char] ParserState Inlines
link = try $ do
  bracketed <- (True <$ char '[') <|> return False
  char '"' *> notFollowedBy (oneOf " \t\n\r")
  attr <- attributes
  name <- trimInlines . mconcat <$>
          withQuoteContext InDoubleQuote (many1Till inline (char '"'))
  char ':'
  let stop = if bracketed
                then char ']'
                else lookAhead $ space <|>
                       try (oneOf "!.,;:" *> (space <|> newline))
  url <- manyTill nonspaceChar stop
  let name' = if B.toList name == [Str "$"] then B.str url else name
  return $ if attr == nullAttr
              then B.link url "" name'
              else B.spanWith attr $ B.link url "" name'
image :: Parser [Char] ParserState Inlines
image = try $ do
  char '!' >> notFollowedBy space
  src <- manyTill anyChar' (lookAhead $ oneOf "!(")
  alt <- option "" (try $ (char '(' >> manyTill anyChar' (char ')')))
  char '!'
  return $ B.image src alt (B.str alt)
escapedInline :: Parser [Char] ParserState Inlines
escapedInline = escapedEqs <|> escapedTag
escapedEqs :: Parser [Char] ParserState Inlines
escapedEqs = B.str <$>
  (try $ string "==" *> manyTill anyChar' (try $ string "=="))
escapedTag :: Parser [Char] ParserState Inlines
escapedTag = B.str <$>
  (try $ string "<notextile>" *>
         manyTill anyChar' (try $ string "</notextile>"))
symbol :: Parser [Char] ParserState Inlines
symbol = B.str . singleton <$> (notFollowedBy newline *>
                                notFollowedBy rawHtmlBlock *>
                                oneOf wordBoundaries)
code :: Parser [Char] ParserState Inlines
code = code1 <|> code2
anyChar' :: Parser [Char] ParserState Char
anyChar' =
  satisfy (/='\n') <|> (try $ char '\n' <* notFollowedBy blankline)
code1 :: Parser [Char] ParserState Inlines
code1 = B.code <$> surrounded (char '@') anyChar'
code2 :: Parser [Char] ParserState Inlines
code2 = do
  htmlTag (tagOpen (=="tt") null)
  B.code <$> manyTill anyChar' (try $ htmlTag $ tagClose (=="tt"))
attributes :: Parser [Char] ParserState Attr
attributes = (foldl (flip ($)) ("",[],[])) `fmap` many attribute
attribute :: Parser [Char] ParserState (Attr -> Attr)
attribute = classIdAttr <|> styleAttr <|> langAttr
classIdAttr :: Parser [Char] ParserState (Attr -> Attr)
classIdAttr = try $ do 
  char '('
  ws <- words `fmap` manyTill anyChar' (char ')')
  case reverse ws of
       []                      -> return $ \(_,_,keyvals) -> ("",[],keyvals)
       (('#':ident'):classes') -> return $ \(_,_,keyvals) ->
                                             (ident',classes',keyvals)
       classes'                -> return $ \(_,_,keyvals) ->
                                             ("",classes',keyvals)
styleAttr :: Parser [Char] ParserState (Attr -> Attr)
styleAttr = do
  style <- try $ enclosed (char '{') (char '}') anyChar'
  return $ \(id',classes,keyvals) -> (id',classes,("style",style):keyvals)
langAttr :: Parser [Char] ParserState (Attr -> Attr)
langAttr = do
  lang <- try $ enclosed (char '[') (char ']') alphaNum
  return $ \(id',classes,keyvals) -> (id',classes,("lang",lang):keyvals)
surrounded :: Parser [Char] st t   
	    -> Parser [Char] st a    
	    -> Parser [Char] st [a]
surrounded border =
  enclosed (border *> notFollowedBy (oneOf " \t\n\r")) (try border)
simpleInline :: Parser [Char] ParserState t           
             -> (Inlines -> Inlines)       
             -> Parser [Char] ParserState Inlines   
simpleInline border construct = try $ do
  st <- getState
  pos <- getPosition
  let afterString = stateLastStrPos st == Just pos
  guard $ not afterString
  border *> notFollowedBy (oneOf " \t\n\r")
  attr <- attributes
  body <- trimInlines . mconcat <$>
          withQuoteContext InSingleQuote
            (manyTill (notFollowedBy newline >> inline)
             (try border <* notFollowedBy alphaNum))
  return $ construct $
        if attr == nullAttr
           then body
           else B.spanWith attr body
groupedInlineMarkup :: Parser [Char] ParserState Inlines
groupedInlineMarkup = try $ do
    char '['
    sp1 <- option mempty $ B.space <$ whitespace
    result <- withQuoteContext InSingleQuote inlineMarkup
    sp2 <- option mempty $ B.space <$ whitespace
    char ']'
    return $ sp1 <> result <> sp2
singleton :: a -> [a]
singleton x = [x]