{-| A parser for the medaiiwki grammar -}
module MediaWikiParser where
import Text.ParserCombinators.Parsec
import qualified Data.List as List
import qualified Data.Map as Map
import MediaWikiParseTree
import MagicStrings
import Control.Monad
import Data.String.HT (trim)
import Data.List.Split (splitOn)
import Data.List (isInfixOf)
import Tools
import Codec.Binary.UTF8.String
import Data.Maybe
import Network.URI
import WikiHelper

{-| flattens out the HTML 'a' tags. That is it replaces each 'a' element with its content. That is everything that is between its opening and its closing tag. The only parameter of this function is the parse tree to be processed. This function returns the parse tree with the 'a' HTML elements flattened |-}

reducea :: [Anything Char] -> [Anything Char]
reducea ll = concat (map go ll)
  where go :: Anything Char -> [Anything Char]
        go (Environment Tag (TagAttr "a" _) l) = l
        go (Environment x y l) = [Environment x y (reducea l)]
        go x = [x]

{-| flattens out the HTML 'div' tags, which have got a 'class' attributes present with the value 'noresize'. That is it replaces each 'div' element with the properties mentioned above by its content. That is everything that is between its opening and its closing tag. The only parameter of this function is the parse tree to be processed. This function returns the parse tree with the 'div' HTML elements with the properties given above flattened |-}

reducediv :: [Anything Char] -> [Anything Char]
reducediv ll = concat (map go ll)
  where go :: Anything Char -> [Anything Char]
        go (Environment Tag (TagAttr "div" m) l)
          | (Map.lookup "class" m) == (Just "noresize") = l
        go (Environment x y l) = [Environment x y (reducea l)]
        go x = [x]

{-| a function to get HTML elements out of a parse tree. The first parameter is name of the tag to be looked for. The second parameter is a key in the attributes of that element that has to be present for the element to be considered. The third parameter is a value that has to be found under the given key in the attributes of the element, in order for the element to be part of the returned output list. The fourth parameter is the parse tree. |-}

deepGet ::
        [Char] -> String -> [Char] -> [Anything a] -> [Anything a]
deepGet t k v ll = concat $ map go ll
  where go (Environment Tag (TagAttr tag m) l)
          | (tag == t) && ((Map.lookup k m) == (Just v)) =
            [Environment Tag (TagAttr tag m) l]
        go (Environment _ _ l) = (deepGet t k v l)
        go _ = []

{-| flattens a part of the parse tree, that is takes the characters found in the tree and turns them into a string dropping all other information in the tree -}

deepFlatten :: [Anything t] -> [Anything t]
deepFlatten ll = concat $ map go ll
  where go (Environment HtmlChar s l) = [Environment HtmlChar s l]
        go (Environment _ _ l) = (deepFlatten l)
        go x = [x]

{-| converts a wiki source document to a parse tree to be converted to LaTeX be treeToLaTeX3. The first parameter is that list of parsers. That is the list of environments to be recognized by the parser. This is usually either only plain HTML, or HTML mixed with mediawiki markup. The second parameter is the source code to be parsed. This function returns a parse tree |-}

parseit :: [MyParser Char] -> String -> [Anything Char]
parseit pp x
  = (parseit2
       (decon2 (remake pp)
          (parseAnything2
             [MyStackFrame{endparser = pzero, startdata = Str "",
                           environment = Root, badparser = \ _ -> pzero, parsernumber = 0,
                           nestingdepth = 0}]
             (remake pp)
             []))
       ('\n' : x))

{-| helper function of parseit, not to be called directly. This function takes the parser for the grammar, in the sense of a parser of the parsec library, (so that is the final combined parser) as first argument. It takes the source code to be parsed (usually HTML of mediawiki markup mixed with HTML) as second and runs the parser on the source code. It returns the resulting parse tree. |-}

parseit2 :: Parser [Anything Char] -> String -> [Anything Char]
parseit2 p input
  = case (parse p "" input) of
        Left _ -> []
        Right x -> x

{-| A parser for one particular element of the mediawiki grammar -}

data MyParser tok = MyParser{bad ::
                             [Anything tok] -> GenParser tok () (),
                             start :: MyStack tok -> GenParser tok () StartData,
                             end :: StartData -> GenParser tok () (), allowed :: [EnvType],
                             self :: EnvType,
                             modify :: StartData -> [Anything tok] -> [Anything tok],
                             reenv :: EnvType -> EnvType}

{-| the stack of the parser. See documentation on MyStackFrame in this module for details. -}

type MyStack tok = [MyStackFrame tok]

{-| A stack frame on the parsers stack. A stack frame represents an environment that was opened. So something like an opening HTML tag. The value endparser. Is a parser that should match exactly the closing bracket of the environment. The value startdata is the return value of the start parser of the enviroment. This stack frame is created immediately after the startparser of an environment has matches and is given the return value of that startparser as startdata. The value environment is the environment this stack frame belongs to in case of an HTML tag this would be Tag. See the type EnvType in the module MediaWikiParseTree for a full list of possible environments. In the parse tree that is finally generated each node with arbitrarily nested children has got an EnvType associated with it. The badparser is a parser that is repeatedly tries while processing the current environment, if it matches the current environment is considered to be invalid. Backtracking occurs an the characters currently under consideration are parser (possible very) different manner. So with badparser you can signal that an environment is invalid if a creating parser (the badparser) matches within the environment. The parserenumber is just a number that uniquely identifies each parsers in the list of parsers active for the whole parsing process. These numbers are usually generated by the remake function. The nestingdepth is a bit of a misnomer. It is a unique number for each stack frame. So each stack frame that is newly created gets a different number. -}

data MyStackFrame tok = MyStackFrame{endparser ::
                                     GenParser tok () (),
                                     startdata :: StartData, environment :: EnvType,
                                     badparser :: [Anything tok] -> GenParser tok () (),
                                     parsernumber :: Int, nestingdepth :: Int}

{-| takes a result returned by parseAnything3 and converts it into a parse tree for further processing. The only purpose of this function is to convert the notation of bracket. The bracket a denoted by the Open and Close parse tree elements of the type Anything be the function parseAnything3. The need to be converted to environments, that is node with children in the parse tree. The environments will be denoted by the 'Environment' data constructors of the type Anything|-}

decon2 ::
         (Monad m) =>
         [(a1, MyParser a)] -> m (t, [Anything a]) -> m [Anything a]
decon2 l x
  = do (_, s) <- x
       return (findMatchingBrackets l (reverse s))

{-| Usually bracket can be close in an order different from the reverse one in which they were opened. But for certain environments this is not allowed, and the order has to be strictly followed. This value is the list of those environments. -}

preserving :: [EnvType]
preserving
  = [Math, Source, Comment, Gallery, NoWiki, NoInclude, BigMath,
     Preformat, TableCap, TableRowSep, TableColSep, TableHeadColSep,
     TemplateInside, Wikitable, TableTag]

{-| Helper function for parseAnyClosingBracket. Should not be called directly. The only parameter is the current parser stack. Returns the depth on the stack of the stack frame whose closing bracket matched. -}

parseAnyClosingBracket2 ::
                          (Show tok, Eq tok, Read tok) =>
                          MyStack tok -> GenParser tok () Integer
parseAnyClosingBracket2
  = (parseAnyClosingBracket3 0) . (List.map (\ x -> endparser x))

{-| Helper function for parseAnyClosingBracket. Should not be called directly. The this will take stack frame by stack frame of the stack. The first parameter is an integer and indicates how many stack frames have allready been take of the stack. Returns the depth on the stack of the stack frame whose closing bracket matched. -}

parseAnyClosingBracket3 ::
                        Integer -> [GenParser tok () ()] -> GenParser tok () Integer
parseAnyClosingBracket3 i (x : xs)
  = try
      (do x
          return i)
      <|> (parseAnyClosingBracket3 (i + 1) xs)
parseAnyClosingBracket3 _ [] = pzero

{-| Remove the n'th elements from a list. n is an integer an given is first parameter. The list to be processed is given as second parameter -}

myremove :: Integer -> [a] -> [a]
myremove _ [] = []
myremove 0 (_ : xs) = myremove (-1) xs
myremove i (x : xs) = x : (myremove (i - 1) xs)

{-| Enumerates a list of parsers. needed to prepare a list of parsers for use with parseAnything2 -}

remake :: [a] -> [(Int, a)]
remake x = zip (iterate (+ 1) 0) x

{-| predicate to test whether the current stack-frame-index is in a stack. The first parameter is the stack-frame-index the second parameter is the stack. Returns true if it could be found -}

isin ::
       (Show tok, Eq tok, Read tok) => Int -> (MyStack tok) -> Bool
isin i s = i `elem` (List.map nestingdepth s)

{-| tries to parse exactly one specific opening bracket. The parameters are identical to the ones of parseAnyOpeningBracket, which the exception of the second parameter. The second parameter is that parser for the bracket currently under consideration. This function 'catch' the BBad 'exception' 'thrown' by parseAnything. In this case it returns pzero, causing the parser to backtrack. -}

parseSpecificOpeningBracket ::
                              (Show tok, Eq tok, Read tok) =>
                              Int ->
                                (Int, MyParser tok) ->
                                  (MyStack tok) ->
                                    [(Int, MyParser tok)] ->
                                      [Anything tok] ->
                                        GenParser tok () (Either2 (MyStack tok, [Anything tok]))
parseSpecificOpeningBracket v (n, x) s l i
  = do r <- do sd <- try (start x s)
               parseAnything (v + 1)
                 (MyStackFrame{endparser = (end x) sd, startdata = sd,
                               environment = self x, badparser = bad x, parsernumber = n,
                               nestingdepth = v}
                    : s)
                 l
                 ((Open (length s) (self x) sd n) : i)
       case r of
           BBad (ss, y) -> if isin v ss then pzero else return (BBad (ss, y))
           _ -> return r

{-| tried to parse any of the opening brackets given by the parsers passed as the third parameter. The first parameter is the stack number (see documentation of the parseAnything function for more details on that). The second parameter is the current parser stack (see documentation of the parseAnything2 function for more details on that). The forth parameter is the list of parsers to b taken into account by the general parsing process. In contrast the third parameter contains only a list of parsers that are allowed to match in the current step of the parsing process. The fifth parameter is the current parser output stream. That is the information returned by the parser up to the current step. It is kind of an accumulator for parser results. -}

parseAnyOpeningBracket ::
                         (Show tok, Eq tok, Read tok) =>
                         Int ->
                           MyStack tok ->
                             [(Int, MyParser tok)] ->
                               [(Int, MyParser tok)] ->
                                 [Anything tok] ->
                                   GenParser tok () (Either2 (MyStack tok, [Anything tok]))
parseAnyOpeningBracket _ _ [] _ _ = pzero
parseAnyOpeningBracket v s (x : xs) l i
  = try (parseSpecificOpeningBracket v x s l i) <|>
      parseAnyOpeningBracket v s xs l i

{-| insert a list of closing brackets into the parser output stream. Later on matching opening and closing brackets will be found and parse tree will be generated this way. The first parameter is an integer it is the number of brackets which should be close. The second parameter is the parser stack. It says which kind of brackets should be closed. It returns a parser output stream just containing the opening brackets -}

generateClosingBrackets ::
                          (Num a, Eq a, Show tok, Eq tok, Read tok) =>
                          a -> MyStack tok -> [Anything tok]
generateClosingBrackets 0 (s : xs)
  = [Close (length xs) (environment s)]
generateClosingBrackets mi (s : xs)
  = (Close (length xs) (environment s)) :
      (generateClosingBrackets (mi - 1) xs)
generateClosingBrackets _ _ = []

{-| insert a list of opening brackets into the parser output stream. Later on matching opening and closing brackets will be found and parse tree will be generated this way. The first parameter is an integer it is the number of brackets which should be opened. The second parameter is the parser stack. It says which kind of brackets should be opened. It returns a parser output stream just containing the opening brackets -}

generateOpeningBrackets ::
                          (Num a, Eq a, Show tok, Eq tok, Read tok) =>
                          a -> MyStack tok -> [Anything tok]
generateOpeningBrackets 0 _ = []
generateOpeningBrackets mi (s : xs)
  = (Open (length xs) (environment s) (startdata s) (parsernumber s))
      : (generateOpeningBrackets (mi - 1) xs)
generateOpeningBrackets _ _ = []

{-| a version of either with the difference that the left and right types are the same. RRight stands for sucessful parse of a token. BBad stands for parse failure in which the next possiblity is tried. -}

data Either2 b = RRight b
               | BBad b

{-| tries to match any of the currently possible closing brackets. Brackets closed in a order different from the reverse to the one in which they were opened are usually possible. And exception are the so called preserving elements, they can only be closed in the correct order. In the general case of this kind of crossbracketing it is necessary to add some opening and closing brackets to the output stream and to take the right stack frame of the stack keeping all others on it in the right order. -}

parseAnyClosingBracket ::
                         (Show tok, Eq tok, Read tok) =>
                         Int ->
                           MyStack tok ->
                             [(Int, MyParser tok)] ->
                               [Anything tok] ->
                                 GenParser tok () (Either2 (MyStack tok, [Anything tok]))
parseAnyClosingBracket v s l i
  = do mi <- try
               (do mmi <- parseAnyClosingBracket2 s
                   guard
                     (case s of
                          (g : _) -> (mmi == 0) ||
                                       ((environment g) == Link2) ||
                                         ((not ((environment g) `elem` preserving)) &&
                                            (not
                                               ((environment (s !! (fromIntegral mmi))) `elem`
                                                  preserving)))
                          [] -> False)
                   return mmi)
       let ss = myremove mi s
       parseAnything v ss l
         ((reverse
             ((generateClosingBrackets mi s) ++
                (reverse (generateOpeningBrackets mi ss))))
            ++ i)

{-| this function tries to match the bad parser of the current environment. If it matches it returns BBAD, otherwise it returns RRight. See also comment of the parserAnything function. -}

trybadparser ::
               (Show tok, Eq tok, Read tok) =>
               MyStack tok ->
                 [Anything tok] -> GenParser tok () (Either2 (MyStack tok, [a2]))
trybadparser s i
  = do x <- case s of
                (g : _) -> (do _ <- (badparser g) i
                               return True)
                             <|> return False
                [] -> return False
       if x == True then return (BBad (s, [])) else
         return (RRight (s, []))

{-| this is the main function of the parser which calls itself recursively. To run the parser you should not call this function directly but rather use parseAnything2. The parameter are the same as the parameters to the parameters to the function parseAnything2. So look at the documentation for their meaning. But there is one additional parameter namely the first one. This is the stack frame number, it is increase for every stack frame and never decreased this way each stack frame has got a unique identifier this way. An other difference is the return type this function returns the always same type as the function parseAnything2, but wrapped in the Either2 monad. The Either2 monad has an additional bit to signal whether the parse was good or bad. The bad bit signals so called bad parser of the current environment has matched signaling that the environment is to be considered invalid, and we have to backtrack. But what we do here is just stop paring and return a successful parse, but return the bad flag as set in the return type. This will propagate through to the parser that was trying to open the environment that caused the current problem. If that recognizes the problem it can flag the environment as failed by returning pzero. So again here we just return BBad. So we kind of throw an exception. And in parseSpecificOpeningBracket we will catch BBad and signal the actual problem by returning pzero and that way kick of backtracking. -}

parseAnything ::
                (Show tok, Eq tok, Read tok) =>
                Int ->
                  MyStack tok ->
                    [(Int, MyParser tok)] ->
                      [Anything tok] ->
                        GenParser tok () (Either2 (MyStack tok, [Anything tok]))
parseAnything v s l i
  = (do eof
        return $ RRight (s, i ++ (generateClosingBrackets (length s) s)))
      <|>
      do nb <- trybadparser s i
         case nb of
             RRight _ -> try (parseAnyClosingBracket v s l i) <|>
                           case s of
                               (g : _) -> do try
                                               (parseAnyOpeningBracket v s
                                                  [x | x <- l,
                                                   (environment g) `elem` (allowed (snd x))]
                                                  l
                                                  i)
                               [] -> return (RRight ([], i))
                           <|>
                           do c <- anyToken
                              parseAnything v s l ((C c) : i)
             BBad _ -> case s of
                           (g : _) -> try
                                        (parseAnyOpeningBracket v s
                                           [x | x <- l, (environment g) `elem` (allowed (snd x))]
                                           l
                                           i)
                                        <|> return (BBad (s, i))
                           [] -> pzero

{-| This is the main entry point of the parse. So the function you need to call when you want to convert the source into the parse tree. The first parameter is the stack. I usually should contain only and exactly the root stack frame. The second parameter is an enumerated list of parsers. You usually take a list like the list parsers from this module and enumerate it by running remake on it. So thats the list of environments the parser is able to recognize. The third parameter is the parse results that have been created so far. Since we are just starting the parse this has to be the empty list. The function returns a parser. See the documentation of the parse module for more details on the type GenParser. Roughly is means that this parser takes an input list whose items are of type tok and that the parsers does not have state (hence the void type '()') and return a tuple. The first elements of that tuple is a stack. Where a new stack frame is added to the stack for each new environment that is found to open by the parser, like an opening HTML tag. And the second elements of the tuple is a parse tree, that is a list of parse tree elements, where each parse tree element may contain sublists of parse tree element. This way it is a real tree. -}

parseAnything2 ::
                 (Show tok, Eq tok, Read tok) =>
                 MyStack tok ->
                   [(Int, MyParser tok)] ->
                     [Anything tok] -> GenParser tok () (MyStack tok, [Anything tok])
parseAnything2 s l i
  = do x <- parseAnything 0 s l i
       case x of
           BBad (_, b) -> return (s, b)
           RRight b -> return b

{-| this find the matching closing bracket for an opening bracket. It returns a tuple. Its first element is the environment created form the given opening bracket together with its closing bracket and the content between opening and closing bracket. Its second elements is the remaining list of parsed elements, after the closing bracket. This list does still contain the Open and Close parser tree elements for opening and closing bracket, and those are not yet converted to environments. This function takes the list of parse tree elements after the opening bracket as first input parameter. It takes the index of the parser that created the opening bracket as second input parameter. That is the index created by the remake function in this module. It takes the size of the stack at the time when the opening bracket was found as third input parameter. It takes the EnvType of the environment of the opening bracket as fourth input parameter. It takes the StartData parse result associated with the opening bracket as fifth parameter. The sixth parameter is the accumulator an should be the empty list when calling this function externally. The seventh parameter is the remaining parse tree after the opening bracket without the opening and closing brackets converted to environments -}

findMatchingClosingBracket ::
                           [(a1, MyParser a)] ->
                             Int ->
                               Int ->
                                 EnvType ->
                                   StartData ->
                                     [Anything a] -> [Anything a] -> (Anything a, [Anything a])
findMatchingClosingBracket l n i e s b ((Close i2 e2) : xs)
  = if (i, e) == (i2, e2) then
      (Environment ((reenv (snd (l !! n))) e) s
         ((modify (snd (l !! n))) s (findMatchingBrackets l (reverse b))),
       xs)
      else findMatchingClosingBracket l n i e s ((Close i2 e2) : b) xs
findMatchingClosingBracket l n i e s b (x : xs)
  = findMatchingClosingBracket l n i e s (x : b) xs
findMatchingClosingBracket l n _ e s b []
  = (Environment ((reenv (snd (l !! n))) e) s
       ((modify (snd (l !! n))) s (findMatchingBrackets l (reverse b))),
     [])

{-| run findMatchingBrackets on the inner part environment given as second parameter. This function takes the enumerated list of parsers created by remake as first input parameter -}

findMatchingBrackets2 ::
                      [(a1, MyParser a)] -> Anything a -> Anything a
findMatchingBrackets2 l (Environment e s b)
  = Environment e s (findMatchingBrackets l b)
findMatchingBrackets2 l xs
  = Environment Root (Str "") (findMatchingBrackets l [xs])

{-| the parser (Anything3) creates a list of parser elements, which is not a tree. The environments which will form the nodes with children in the final tree are denoted as opening and closing brackets in this list. This function takes that list as second input parameter, finds matching pairs of opening and closing brackets and converts the to environments. The opening an closing brackets are already balanced because of the way Anything3 works, that means there is exactly one matching closing bracket for each opening one and they open and close in to proper order. This function takes the enumerated list of parsers as first input parameter, that is the same list also given to the function Anything. -}

findMatchingBrackets ::
                     [(a1, MyParser a)] -> [Anything a] -> [Anything a]
findMatchingBrackets l ((Open i e s n) : xs)
  = let (t, xxs) = findMatchingClosingBracket l n i e s [] xs in
      (findMatchingBrackets2 l t) : (findMatchingBrackets l xxs)
findMatchingBrackets l (x : xs) = x : (findMatchingBrackets l xs)
findMatchingBrackets _ [] = []

{-| a list of environments. Most parsers use this list as their 'allowed' variable. Meaning that the parser is only allowed to match within the environments given in the 'allowed' list -}

everywhere :: [EnvType]
everywhere = [Wikitable] ++ everywheretbl

{-| list containing the Italic and Bold environments, see documentation on the list 'everywhere' in this module -}

bi :: [EnvType]
bi = [Italic, Bold]

{-| list containing the same environments as the list 'everywhere' except the Wikitable environment, see documentation on the list 'everywhere' in this module -}

everywheretbl :: [EnvType]
everywheretbl = bi ++ everywherebi

{-| list containing the same environments as the list 'everywhere' except the environment Wikitable, Bold and Italic, see documentation on the list 'everywhere' in this module -}

everywherebi :: [EnvType]
everywherebi = basicwhere ++ [Wikilink]

{-| list containing the same environments as the list 'everywhere' except the environment Wikitable, Bold, Italic and Wikilink see documentation on the list 'everywhere' in this module -}

basicwhere :: [EnvType]
basicwhere = [Link] ++ verybasicwhere

{-| list containing the same environments as the list 'everywhere' except the environment Wikitable, Bold, Italic, Wikilink and Link see documentation on the list 'everywhere' in this module -}

verybasicwhere :: [EnvType]
verybasicwhere
  = [Itemgroup, Root, Wikiheading, TableCap, Chapter, Tag, TableTag,
     TemplateInside, IncludeOnly]

{-| list containing the environments where the parser linkp is allowed to match. Currently this seems to be everywhere. So this possibly can go away  -}

everywherel :: [EnvType]
everywherel = basicwhere ++ bi ++ [Wikitable, Wikilink]

{-| list containing the same environments as the list 'everywhere' except the Link environment see documentation on the list 'everywhere' in this module -}

everywherel2 :: [EnvType]
everywherel2 = verybasicwhere ++ bi ++ [Wikitable, Wikilink]

{-| list containing the TableColSep and TableHeadColSep environments, see documentation on the list 'everywhere' in this module. the environments mean table header column separator and table column separator -}

wikilinkwhere :: [EnvType]
wikilinkwhere = [TableColSep, TableHeadColSep]

{-| the list of parsers needed for processing the HTML output created by MediaWiki -}

minparsers :: [MyParser Char]
minparsers
  = [doctagparser, metatagparser, supp, subp, dhunurlp, itagparser,
     pagebreakp, htmlcharp, p302p, attrp, greekp, brparser, mytablep,
     mytrsepp, mytcolsepp, mytcapp, mythcolsepp, annop, tagparser,
     tagparserp, tagparser2, tagparser2p, tagparsert, tagparsert,
     tagparser2t, tagparsers, stagparser, commentp, numhtmlp,
     rtagparser]

{-| the list of parsers for parsing contributor information for images on MediaWiki websites -}

htmlminparsers :: [MyParser Char]
htmlminparsers
  = [doctagparser, metatagparser, supp, subp, dhunurlp, itagparser,
     pagebreakp, htmlcharp, p302p, attrp, greekp, brparser, htmytablep,
     htmytrsepp, htmytcolsepp, htmytcapp, htmythcolsepp, tagparser,
     tagparserp, tagparser2, tagparser2p, tagparsert, tagparsert,
     tagparser2t, tagparsers, stagparser, commentp, numhtmlp,
     rtagparser]

{-| the list of parsers needed for processing the image title description so that is the content a html attibutes -}

imgparsers :: [MyParser Char]
imgparsers = [supp, subp, htmlcharp, p302p, greekp, numhtmlp]

{-| the list of parsers needed for parsing source code in the MediaWiki markup language -}

parsers :: [MyParser Char]
parsers
  = [doctagparser, metatagparser, supp, subp, dhunurlp, itagparser,
     chapterp, prep, pagebreakp, htmlcharp, p302p, attrp, greekp,
     brparser, wikilinkp, wikitablep, mytablep, wikiheadingp,
     itempgrouppt, itempgroupp, itemlinep, boldp, italicp, tablecapp,
     tablecapp2, tablecapp3, rowsepp, mytrsepp, colsepp, colsepp2,
     mytcolsepp, mytcapp, headcolsepp, headcolsepp2, mythcolsepp,
     galleryp, imagemapp, nowikip, noincludep, mathp, annop, imagemapp,
     ttagparser, ttagparser2, ttagparsert, ttagparser2t, ttagparsers,
     tagparser, tagparser2, tagparsert, tagparser2t, tagparsers,
     stagparser, commentp, reservedp, templatewikilinkp, wikiparamp,
     wikitemplatep, templateinsideverbatimp, templateinsidep,
     gallerywlp, imagemapwlp, hdevlinep, linkp, linkp2, presectionp,
     presectionpt, numhtmlp, rtagparser]

{-| the parser record, with some fields initialized with default values -}

baseParser :: MyParser tok
baseParser
  = MyParser{bad = \ _ -> pzero, start = undefined,
             end = \ _ -> return (), allowed = everywhere, self = undefined,
             modify = \ _ x -> x, reenv = id}

{-| this function takes a string and returns a parser that matches any of the given strings -}

oneOfTheStrings :: [String] -> Parser String
oneOfTheStrings (x : xs) = try (string x) <|> (oneOfTheStrings xs)
oneOfTheStrings [] = pzero

{-| parses a HTML entity, that is a character escaped with the ampersand notation -}

htmlcharp :: MyParser Char
htmlcharp
  = baseParser{start =
                 \ _ ->
                   do _ <- char '&'
                      s <- (oneOfTheStrings [fst x | x <- htmlchars])
                      _ <- char ';'
                      return (Str (s)),
               allowed = Preformat : SpaceIndent : NoWiki : everywhere,
               self = HtmlChar}

{-| parses a HTML entity, escaped with numeric ampersand notation -}

numhtmlp :: MyParser Char
numhtmlp
  = baseParser{start =
                 \ _ ->
                   do _ <- string "&#"
                      s <- try (many1 digit) <|>
                             do ss <- try (string "x") <|> try (string "X")
                                sss <- try (many1 hexDigit)
                                return $ ss ++ sss
                      _ <- char ';'
                      return (Str (s)),
               allowed = Preformat : SpaceIndent : NoWiki : everywhere,
               self = NumHtml}

{-| parses a HTML #302 character. Special parser needed since it acts on the receding character -}

p302p :: MyParser Char
p302p
  = baseParser{start =
                 \ _ ->
                   do c <- anyChar
                      _ <- string "&#x302;"
                      return (Str (c : [])),
               self = P302}

{-| parses a HTML &sub entity. -}

subp :: MyParser Char
subp
  = baseParser{start =
                 \ _ ->
                   do _ <- string "&sub"
                      c <- anyChar
                      _ <- string ";"
                      return (Str (c : [])),
               self = Sub}

{-| parses a HTML &sup entity. -}

supp :: MyParser Char
supp
  = baseParser{start =
                 \ _ ->
                   do _ <- string "&sup"
                      c <- anyChar
                      _ <- string ";"
                      return (Str (c : [])),
               self = Sup}

{-| parses the start of a new URL. That is the place where a page begin that was downloaded from an URL different from the previous one -}

dhunurlp :: MyParser Char
dhunurlp
  = baseParser{start =
                 \ _ ->
                   do _ <- string "\ndhunparserurl "
                      return (Str ""),
               end = \ _ -> string "\n" >> return (), self = DhunUrl,
               allowed = [Root, Tag]}

{-| parses a Greek HTML entity. So a Greek letter or something similar -}

greekp :: MyParser Char
greekp
  = baseParser{start =
                 \ _ ->
                   do _ <- char '&'
                      s <- (oneOfTheStrings greek)
                      _ <- char ';'
                      return (Str (s)),
               self = Greek}

{-| parses the mediawiki math tag. That is a latex formula in the wiki -}

mathp :: MyParser Char
mathp
  = (maketagparser ["math"]){allowed =
                               SpaceIndent : everywhere ++ wikilinkwhere,
                             self = Math}

annop :: MyParser Char
annop
  = (maketagparser ["annotation"]){allowed =
                                     SpaceIndent : everywhere ++ wikilinkwhere,
                                   self = Math, reenv = const Tag}

{-| parses a new chapter heading -}

chapterp :: MyParser Char
chapterp
  = baseParser{start =
                 \ _ ->
                   do _ <- try
                             (do _ <- string "\n"
                                 many (char ' '))
                      string "dhunincludechaper" >> return (Str ""),
               end = \ _ -> string "/dhunincludechaper" >> return (),
               self = Chapter}

{-| parses a horizontal dividing line -}

hdevlinep :: MyParser Char
hdevlinep
  = baseParser{start =
                 \ _ ->
                   do _ <- string "----"
                      skipMany (string "-")
                      return (Str ""),
               allowed = [Root], self = HDevLine}

{-| parses the mediawiki 'nowiki' tag -}

nowikip :: MyParser Char
nowikip
  = baseParser{start = \ _ -> string "<nowiki>" >> return (Str ""),
               end = \ _ -> string "</nowiki>" >> return (),
               allowed = everywhere ++ wikilinkwhere ++ [SpaceIndent],
               self = NoWiki}

{-| parses the mediawiki 'noinclude' tag -}

noincludep :: MyParser Char
noincludep
  = baseParser{start =
                 \ _ -> string "<noinclude>" >> return (Str ""),
               end =
                 \ _ ->
                   try (string "</noinclude>" >> return ()) <|>
                     lookAhead (eof >> return ()),
               self = NoInclude}

{-| parses the mediawiki 'includeonly' tag -}

includep :: MyParser Char
includep
  = baseParser{start =
                 \ _ -> string "<includeonly>" >> return (Str ""),
               end = \ _ -> string "</includeonly>" >> return (),
               self = IncludeOnly}

{-| parses the mediawiki 'onlyinclude' tag -}

includep2 :: MyParser Char
includep2
  = baseParser{start =
                 \ _ -> string "<onlyinclude>" >> return (Str ""),
               end = \ _ -> string "</onlyinclude>" >> return (),
               self = IncludeOnly}

{-| parses the mediawiki 'gallery' tag -}

galleryp :: MyParser Char
galleryp
  = baseParser{start =
                 \ _ ->
                   do _ <- string "<gallery"
                      skipMany (try (attr))
                      _ <- try (many (try (oneOf " \n"))) <|> return []
                      _ <- char '>'
                      return (Str ""),
               end = \ _ -> string "</gallery>" >> return (), self = Gallery}

{-| parses a wikilink inside a gallery -}

gallerywlp :: MyParser Char
gallerywlp
  = baseParser{bad =
                 \ _ ->
                   do _ <- lookAhead (try (string "</gallery>"))
                      return (),
               start = \ _ -> string "\n" >> return (Str ""),
               end =
                 \ _ ->
                   do _ <- lookAhead (string "\n")
                      return (),
               modify = \ _ x -> dropWhile (== (C ' ')) x, allowed = [Gallery],
               self = Wikilink}

{-| parses the mediawiki 'imagemap' tag -}

imagemapp :: MyParser Char
imagemapp
  = baseParser{start =
                 \ _ ->
                   do _ <- string "<imagemap"
                      skipMany (try (attr))
                      _ <- try (many (try (oneOf " \n"))) <|> return []
                      _ <- char '>'
                      return (Str ""),
               end = \ _ -> string "</imagemap>" >> return (), self = ImageMap}

{-| parses a wikilink inside and imagemap -}

imagemapwlp :: MyParser Char
imagemapwlp
  = baseParser{bad =
                 \ _ ->
                   do _ <- lookAhead (try (string "</imagemap>"))
                      return (),
               start =
                 \ _ ->
                   (do _ <- string "\n"
                       try (lookAhead (string "Image:")) <|>
                         try (lookAhead (string "File:"))
                         <|> (lookAhead (string "Datei:")))
                     >> return (Str ""),
               end =
                 \ _ ->
                   do _ <- lookAhead (string "\n")
                      return (),
               allowed = [ImageMap], self = Wikilink}

{-| matches a sequence of arbitrary characters up to the character (an excluding it) where one of the strings given as first parameter matches -}

myany :: [String] -> Parser String
myany x
  = do b <- (try (lookAhead (oneOfTheStrings x) >> return False)) <|>
              return True
       if b then
         do c <- anyChar
            cs <- (myany x)
            return (c : cs)
         else return ""

{-| parses the mediawiki template -}

wikitemplatep :: MyParser Char
wikitemplatep
  = baseParser{start =
                 \ _ ->
                   do _ <- string "{{"
                      s <- myany ["}}", "|"]
                      return (Str s),
               end = \ _ -> string "}}" >> return (),
               allowed =
                 everywhere ++
                   wikilinkwhere ++ [TemplateInsideVerbatim, SpaceIndent],
               self = Template}

{-| a special stack frame for parsing the inside of a template -}

madframe :: MyStackFrame Char
madframe
  = MyStackFrame{endparser =
                   (try (lookAhead (oneOfTheStrings ["}}", "|", "="]))) >> return (),
                 startdata = Str "", environment = TemplateInside,
                 badparser = \ _ -> pzero, parsernumber = 0, nestingdepth = 0}

{-| parses the inside of a mediawiki template -}

templateinsidep :: MyParser Char
templateinsidep
  = baseParser{start =
                 \ _ ->
                   do _ <- string "|"
                      (_, bb) <- lookAhead
                                   (try (parseAnything2 [madframe] (remake parsers) []))
                      if
                        all
                          (\ g ->
                             case g of
                                 Open _ _ _ _ -> False
                                 _ -> True)
                          bb
                        then
                        try
                          (do s <- myany ["|", "}}", "="]
                              b <- (try (string "=") >> return True) <|> return False
                              if b then return (Str s) else pzero)
                          <|> return (Str "")
                        else return (Str ""),
               end = \ _ -> lookAhead (oneOfTheStrings ["|", "}}"]) >> return (),
               allowed = [Template], self = TemplateInside}

{-| parses the inside of a mediawiki template, it is parser verbatim that means inner structures are parsed, but returned as plain characters in the parse tree. Needed for templates use for source codes -}

templateinsideverbatimp :: MyParser Char
templateinsideverbatimp
  = baseParser{start =
                 \ sta ->
                   case sta of
                       (g : _) -> case (startdata g) of
                                      Str gg -> if
                                                  (trim gg) `elem`
                                                    ["HaskellGHCiExample",
                                                     "\"HaskellGHCi\",Visual Basic .NET: Vorlage:Code",
                                                     "C++-Programmierung/ Vorlage:Syntax", "syntax",
                                                     "Syntax", "C++-Programmierung/ Vorlage:Code",
                                                     "BigJava", "LaTeX/Usage", "LaTeX/LaTeX",
                                                     "Latex Index"]
                                                  then
                                                  do _ <- string "|"
                                                     try
                                                       (do s <- myany ["|", "}}", "="]
                                                           b <- (try (string "=") >> return True)
                                                                  <|> return False
                                                           if b then return (Str s) else pzero)
                                                       <|> return (Str "")
                                                  else pzero
                                      _ -> pzero
                       [] -> pzero,
               end = \ _ -> lookAhead (oneOfTheStrings ["|", "}}"]) >> return (),
               allowed = [Template], self = TemplateInsideVerbatim,
               reenv = const TemplateInside}

{-| parses the inside of a template parameter -}

wikiparamp :: MyParser Char
wikiparamp
  = baseParser{start = \ _ -> string "{{{" >> return (Str ""),
               end = \ _ -> string "}}}" >> return (),
               allowed = everywhere ++ wikilinkwhere ++ [SpaceIndent],
               self = Parameter}

{-| parses a wikilink -}

wikilinkp :: MyParser Char
wikilinkp
  = baseParser{start = \ _ -> string "[[" >> return (Str ""),
               end = \ _ -> string "]]" >> return (),
               bad =
                 \ pchr ->
                   if
                     not $
                       (C '|') `elem`
                         (takeWhile
                            (\ g ->
                               case g of
                                   Open _ Wikilink _ _ -> False
                                   _ -> True)
                            pchr)
                     then
                     do _ <- lookAhead
                               ((try ((string "]") >> notFollowedBy (string "]")) <|>
                                   (try (string "[" >> return ()))
                                   <|> (try (string "\n") >> return ())))
                        return ()
                     else pzero,
               allowed = everywhere ++ wikilinkwhere ++ [SpaceIndent],
               self = Wikilink}

{-| parses a wikilink template for wikipedia links -}

templatewikilinkp :: MyParser Char
templatewikilinkp
  = baseParser{start = \ _ -> string "{{w|" >> return (Str ""),
               end = \ _ -> string "}}" >> return (),
               allowed = everywhere ++ wikilinkwhere,
               modify = \ _ x -> (C 'w') : (C ':') : x, self = Wikilink}

{-| parses a link -}

linkp :: MyParser Char
linkp
  = baseParser{bad =
                 \ _ ->
                   do _ <- lookAhead (do char '\n')
                      return (),
               start =
                 \ _ ->
                   do _ <- string "["
                      skipMany (char ' ')
                      s <- oneOfTheStrings ["http", "https", "mailto", "//"]
                      return (Str (if s == "//" then "http://" else s)),
               end = \ _ -> string "]" >> return (), allowed = everywherel,
               self = Link}

{-| parses a link, in contrast to linkp this does not match inside links and does not require the square bracket notation -}

linkp2 :: MyParser Char
linkp2
  = baseParser{start =
                 \ _ ->
                   do s <- oneOfTheStrings ["http://", "https://"]
                      return (Str s),
               end = \ _ -> lookAhead (oneOf " \n\r\t<>|\"") >> return (),
               allowed = everywherel2, self = Link2, reenv = const Link}

{-| parses a wikitable -}

wikitablep :: MyParser Char
wikitablep
  = baseParser{start =
                 \ _ ->
                   do _ <- try (char '\n') <|> return '\n'
                      skipMany (char ' ')
                      _ <- try (string "{|") <|> try (string "{{(!}}")
                      s <- many (noneOf "\n")
                      return (Str s),
               end =
                 \ _ ->
                   do _ <- string "\n"
                      skipMany (char ' ')
                      _ <- try (string "|}") <|> try (string "{{!)}}")
                      return (),
               self = Wikitable}

{-| parses a heading, can be a chapter heading as well as a section heading and so on  -}

wikiheadingp :: MyParser Char
wikiheadingp
  = baseParser{bad =
                 \ _ ->
                   lookAhead
                     (do _ <- string "\n"
                         return ()),
               start =
                 \ _ ->
                   do a1 <- string "\n="
                      a2 <- try (string "=") <|> return ""
                      a3 <- try (string "=") <|> return ""
                      a4 <- try (string "=") <|> return ""
                      a5 <- try (string "=") <|> return ""
                      a6 <- try (string "=") <|> return ""
                      case (a1 ++ a2 ++ a3 ++ a4 ++ a5 ++ a6) of
                          (_ : xs) -> return (Str xs)
                          [] -> pzero,
               end =
                 \ (Str x) ->
                   do _ <- string x
                      _ <- notFollowedBy (char '=')
                      return (),
               self = Wikiheading, allowed = everywherel2}

{-| parses an italic text -}

italicp :: MyParser Char
italicp
  = baseParser{start =
                 \ _ ->
                   do _ <- string "''"
                      return (Str "''"),
               end =
                 \ _ ->
                   do (do _ <- try (string "''")
                          _ <- notFollowedBy (string "'")
                          return ())
                        <|> notFollowedBy (noneOf "\n")
                      return (),
               allowed = SpaceIndent : Wikitable : Bold : everywherebi,
               self = Italic}

{-| parses a bold text -}

boldp :: MyParser Char
boldp
  = baseParser{start =
                 \ _ ->
                   do _ <- string "'''"
                      return (Str "'''"),
               end =
                 \ _ ->
                   do (do _ <- try (string "'''")
                          return ())
                        <|> notFollowedBy (noneOf "\n")
                      return (),
               allowed = SpaceIndent : Wikitable : Italic : everywherebi,
               self = Bold}

{-| parses a table caption -}

tablecapp :: MyParser Char
tablecapp
  = baseParser{bad =
                 \ _ ->
                   lookAhead
                     (try
                        (do _ <- string "||"
                            return ())
                        <|>
                        try
                          (do _ <- char '\n'
                              _ <- notFollowedBy (try (char '|') <|> (char '!'))
                              return ())
                        <|>
                        do _ <- string "!!"
                           return ()),
               start =
                 \ _ ->
                   do _ <- string "\n"
                      skipMany (char ' ')
                      _ <- string "|+"
                      _ <- notFollowedBy (oneOf "-}")
                      return (Str ""),
               end =
                 \ _ ->
                   lookAhead
                     (do _ <- char '\n'
                         _ <- try (char '|') <|> char '!'
                         notFollowedBy . char $ '}'
                         return ()),
               allowed = [Wikitable], self = TableCap}

{-| parses a table caption with additional parameter given to the beginning of the caption element in the wiki source  -}

tablecapp2 :: MyParser Char
tablecapp2
  = baseParser{start =
                 \ _ ->
                   do _ <- string "\n"
                      skipMany (char ' ')
                      _ <- string "|+"
                      _ <- many (noneOf "|")
                      _ <- char '|'
                      _ <- notFollowedBy (oneOf "-}")
                      return (Str ""),
               end =
                 \ _ ->
                   lookAhead
                     (try
                        (do _ <- char '\n'
                            _ <- try (char '|') <|> char '!'
                            notFollowedBy . char $ '}'
                            return ())
                        <|>
                        (try ((string "||" >> return ())) <|> (string "!!" >> return ()))),
               allowed = [Wikitable], self = TableCap}

{-| parses a table caption -}

tablecapp3 :: MyParser Char
tablecapp3
  = baseParser{start =
                 \ _ ->
                   do _ <- string "\n"
                      skipMany (char ' ')
                      _ <- string "|+"
                      _ <- notFollowedBy (oneOf "-}")
                      return (Str "2"),
               allowed = [Wikitable], self = TableCap}

{-| parses a table row separator -}

rowsepp :: MyParser Char
rowsepp
  = baseParser{start =
                 \ _ ->
                   do _ <- string "\n"
                      skipMany (char ' ')
                      _ <- try (string "|-") <|> try (string "{{!-}}")
                      s <- many (noneOf "\n")
                      return (Str s),
               allowed = [Wikitable], self = TableRowSep}

{-| parses a table column separator, with additional parameters parser to the beginning of the environment. This parser actually parses only the beginning elements and whats inside it See  -}

colsepp :: MyParser Char
colsepp
  = baseParser{bad =
                 \ _ ->
                   lookAhead
                     (try
                        (do _ <- try (string "||") <|> try (string "{{!!}}")
                            return ())
                        <|>
                        try
                          (do _ <- char '\n'
                              return ())
                        <|>
                        do _ <- try (string "!!")
                           return ()),
               start =
                 \ _ ->
                   try
                     (try
                        (do _ <- string "\n"
                            skipMany (char ' ')
                            _ <- try (string "|") <|> try (string "{{!}}")
                            _ <- notFollowedBy (oneOf "-}")
                            return (Str ""))
                        <|>
                        do _ <- try (string "||") <|> try (string "{{!!}}")
                           return (Str "")),
               end =
                 \ _ ->
                   try
                     (do _ <- try (string "|") <|> try (string "{{!}}")
                         _ <- notFollowedBy (oneOf "}|")
                         return ()),
               allowed = [Wikitable], self = TableColSep}

{-| parses a column separator without anything inside it -}

colsepp2 :: MyParser Char
colsepp2
  = baseParser{start =
                 \ _ ->
                   try
                     (do _ <- string "\n"
                         skipMany (char ' ')
                         _ <- try (string "|") <|> try (string "{{!}}")
                         _ <- notFollowedBy (oneOf "-}")
                         return (Str "2"))
                     <|>
                     do _ <- try (string "||") <|> try (string "{{!!}}")
                        return (Str "2"),
               allowed = [Wikitable], self = TableColSep}

{-| parses a table header column separator, with additional parameters parser to the beginning of the environment. This parser actually parses only the beginning elements and whats inside it See  -}

headcolsepp :: MyParser Char
headcolsepp
  = baseParser{bad =
                 \ _ ->
                   lookAhead
                     (try
                        (do _ <- try (string "||") <|> (string "{{!!}}")
                            return ())
                        <|>
                        try
                          (do _ <- char '\n'
                              return ())
                        <|>
                        do _ <- try (string "!!")
                           return ()),
               start =
                 \ _ ->
                   try
                     (do _ <- string "\n"
                         skipMany (char ' ')
                         _ <- string "!"
                         _ <- notFollowedBy (oneOf "-}")
                         return (Str ""))
                     <|>
                     do _ <- string "!!"
                        return (Str ("")),
               end =
                 \ _ ->
                   do _ <- try (char '|') <|> ((string "{{!}}") >> return '|')
                      notFollowedBy (oneOf "-}|")
                      return (),
               allowed = [Wikitable], self = TableHeadColSep}

{-| parses a header column separator without anything inside it -}

headcolsepp2 :: MyParser Char
headcolsepp2
  = baseParser{start =
                 \ _ ->
                   try
                     (do _ <- string "\n"
                         skipMany (char ' ')
                         _ <- string "!"
                         _ <- notFollowedBy (oneOf "-}")
                         return (Str ""))
                     <|>
                     do _ <- string "!!"
                        return (Str ""),
               allowed = [Wikitable], self = TableHeadColSep}

attrinside :: String -> GenParser Char () Char
attrinside x = try (string "&amp;" >> return '&') <|> (noneOf x)

{-| matches a key value pair . So an attribute of an HTML element -}

attr :: GenParser Char () ([Char], [Char])
attr
  = do skipMany1 (oneOf " \n")
       k <- many1 (try (alphaNum) <|> oneOf ":-")
       v <- try
              (do skipMany (oneOf " \n")
                  _ <- char '='
                  skipMany (oneOf " \n")
                  vv <- try
                          (do _ <- try (char '"')
                              vvv <- many (attrinside "\"><")
                              _ <- try (char '"') <|> try (noneOf "<|>") <|> return ' '
                              return vvv)
                          <|>
                          try
                            (do _ <- (char '\'')
                                vvv <- many (attrinside "'><")
                                _ <- try (char '\'') <|> try (noneOf "<|>") <|> return ' '
                                return vvv)
                          <|>
                          (do vvv <- many (attrinside "\"'></| ")
                              _ <- try (char '\'') <|> try (char '"') <|> try (noneOf "</|> ")
                                     <|> return ' '
                              return vvv)
                  return vv)
              <|> return ""
       return (k, v)

{-| Matches a key value pair. So an attribute of an HTML element -}

attrns :: GenParser Char u ([Char], [Char])
attrns
  = do k <- many1 (try (alphaNum) <|> oneOf ":-")
       v <- do _ <- char '='
               skipMany (oneOf " \n")
               vv <- try
                       (do _ <- try (char '"') <|> char '\''
                           vvv <- many (noneOf "\"'><|")
                           _ <- try (char '"') <|> try (noneOf "<|>") <|> return ' '
                           return vvv)
                       <|>
                       (do vvvv <- (noneOf "\"'></| =")
                           vvv <- many (noneOf "\"'></| ")
                           _ <- try (char '"') <|> try (char '\'') <|> try (noneOf "</|> ")
                                  <|> return ' '
                           return (vvvv : vvv))
               return vv
       _ <- try (many (oneOf " \n")) <|> return []
       return (k, v)

{-| Matches a list of key value pairs . So all attributes of an HTML element -}

attrp :: MyParser Char
attrp
  = baseParser{start =
                 \ _ ->
                   do atr <- try (attrns)
                      return (Attr atr),
               allowed = [TableHeadColSep, TableColSep, TableCap],
               self = Attribute}

{-| Parses the HTML 'pre' tag -}

prep :: MyParser Char
prep
  = baseParser{start =
                 \ _ ->
                   do _ <- string "<"
                      p <- try (string "pre") <|> try (string "PRE") <|>
                             try (string "xmp")
                             <|> string "XMP"
                      atr <- many (try (attr))
                      _ <- try (many (oneOf " \n")) <|> return []
                      _ <- char '>'
                      return (TagAttr p (Map.fromList atr)),
               end =
                 \ (TagAttr x _) ->
                   do _ <- char '<'
                      _ <- char '/'
                      _ <- string x
                      _ <- char '>'
                      return (),
               allowed = everywhere, self = Preformat}

{-| Parses the HTML 'br' tag -}

brparser :: MyParser Char
brparser
  = baseParser{start =
                 \ _ ->
                   do _ <- string "<"
                      _ <- try (string "/") <|> return ""
                      _ <- try (string "br") <|> string ("BR")
                      skipMany (try (attr))
                      _ <- try (many (oneOf " \n")) <|> return []
                      _ <- try (string "/") <|> return ""
                      _ <- try (many (oneOf " \n")) <|> return []
                      _ <- char '>'
                      return (TagAttr "br" Map.empty),
               allowed = SpaceIndent : everywhere, self = Tag}

{-| Returns a Parser that matches all HTML elements, given by the list of strings given as first input parameter. The parser does not match inside tables. Use makettagparser for that -}

maketagparser :: [String] -> MyParser Char
maketagparser tags
  = baseParser{start =
                 \ _ ->
                   do _ <- string "<"
                      t <- (oneOfTheStrings tags)
                      atr <- many (try (attr))
                      _ <- try (many (oneOf " \n")) <|> return []
                      _ <- char '>'
                      return (TagAttr (t) (Map.fromList atr)),
               end =
                 \ (TagAttr x _) ->
                   do _ <- char '<'
                      _ <- try (many (oneOf " ")) <|> return []
                      _ <- if x `elem` nonNestTags then try (char '/') <|> (return '/')
                             else (char '/')
                      _ <- string x
                      _ <- char '>'
                      return (),
               allowed = SpaceIndent : everywheretbl, self = Tag}

{-| Parser for the 'meta' tag of HTML -}

metatagparser :: MyParser Char
metatagparser
  = baseParser{start =
                 \ _ ->
                   do _ <- string "<"
                      t <- (oneOfTheStrings ["meta"])
                      atr <- many (try (attr))
                      _ <- try (many (oneOf " \n")) <|> return []
                      _ <- try (char '/') <|> (return '/')
                      _ <- try (char '>') <|> (return '>')
                      return (TagAttr (t) (Map.fromList atr)),
               allowed = SpaceIndent : everywhere, self = Tag}

{-| Parser for the !DOCTYPE tag of HTML -}

doctagparser :: MyParser Char
doctagparser
  = baseParser{start =
                 \ _ ->
                   do _ <- string "<"
                      t <- (oneOfTheStrings ["!DOCTYPE"])
                      _ <- many (noneOf ">")
                      _ <- char '>'
                      return (TagAttr (t) (Map.fromList [])),
               allowed = SpaceIndent : everywhere, self = Tag}

{-| Parser for closing HTML tags that have not matching opening tag. -}

ctagparser :: [String] -> GenParser Char () ()
ctagparser tags
  = do _ <- string "</"
       _ <- (oneOfTheStrings tags)
       _ <- try (many (oneOf " \n")) <|> return []
       _ <- char '>'
       return ()

{-| Returns a parser that matches all HTML elements, given by the list of strings given as first input parameter. The parser matches only the opening part of the tag. The inside of it is not processed by this parser. The opening tag may also self closing like <br/> because of the tailing /. -}

maketagparser2 :: [String] -> MyParser Char
maketagparser2 tags
  = baseParser{start =
                 \ _ ->
                   do _ <- string "<"
                      t <- (oneOfTheStrings tags)
                      atr <- many (try (attr))
                      _ <- try (many (oneOf " \n")) <|> return []
                      _ <- try (char '/') <|> (return '/')
                      _ <- char '>'
                      return (TagAttr (t) (Map.fromList atr)),
               allowed = Wikitable : SpaceIndent : everywheretbl, self = Tag}

{-| Returns a parser that matches all HTML elements, given by the list of strings given as first input parameter. The parser matches only the opening part of the tag. The inside of it is not processed by this parser. The opening tag may not be self closing so not like <br/> because of the tailing /. -}

maketagparser3 :: [String] -> MyParser Char
maketagparser3 tags
  = baseParser{start =
                 \ _ ->
                   do _ <- string "<"
                      t <- (oneOfTheStrings tags)
                      atr <- many (try (attr))
                      _ <- try (many (oneOf " \n")) <|> return []
                      _ <- char '>'
                      return (TagAttr (t) (Map.fromList atr)),
               allowed = SpaceIndent : everywheretbl, self = Tag}

{-| tags that can not be nested and are thus allowed to closed by an opening tag instead of a closing one -}

nonNestTags :: [String]
nonNestTags
  = ["tt", "pre", "TT", "PRE", "b", "B", "i", "I", "sc", "SC",
     "code", "CODE"]

{-| Returns a parser that matches all HTML elements, given by the list of strings given as first input parameter. The parser matches only inside tables. Use maketagparser for that -}

makettagparser :: [String] -> MyParser Char
makettagparser tags
  = baseParser{bad =
                 \ _ ->
                   do _ <- lookAhead (do try (string "\n|") <|> (string "||"))
                      return (),
               start =
                 \ _ ->
                   do _ <- string "<"
                      t <- (oneOfTheStrings tags)
                      atr <- many (try (attr))
                      _ <- try (many (oneOf " \n")) <|> return []
                      _ <- char '>'
                      return (TagAttr (t) (Map.fromList atr)),
               end =
                 \ (TagAttr x _) ->
                   do _ <- char '<'
                      _ <- try (many (oneOf " ")) <|> return []
                      _ <- if x `elem` nonNestTags then try (char '/') <|> (return '/')
                             else (char '/')
                      _ <- string x
                      _ <- char '>'
                      return (),
               allowed = [Wikitable], self = Tag}

{-| Returns a parser that matches all HTML elements, given by the list of strings given as first input parameter. The parser matches only the opening part of the tag. The inside of it is not processed by this parser. The opening tag may also self closing like <br/> because of the tailing /. Matches only inside tables -}

makettagparser2 :: [String] -> MyParser Char
makettagparser2 tags
  = baseParser{bad =
                 \ _ ->
                   do _ <- lookAhead
                             (do _ <- anyChar
                                 try (string "\n|") <|> (string "||"))
                      return (),
               start =
                 \ _ ->
                   do _ <- string "<"
                      t <- (oneOfTheStrings tags)
                      atr <- many (try (attr))
                      _ <- try (many (oneOf " \n")) <|> return []
                      _ <- char '/'
                      _ <- char '>'
                      return (TagAttr (t) (Map.fromList atr)),
               allowed = [Wikitable], self = Tag}

{-| maketagparser for all HTML elements see documentation on function maketagparser -}

tagparser :: MyParser Char
tagparser = maketagparser listOfTags

{-| maketagparser for the 'pre' HTML tag see documentation on function maketagparser -}

tagparserp :: MyParser Char
tagparserp = maketagparser ["pre"]

{-| maketagparser for the HTML tags for HTML tables see documentation on function maketagparser -}

tagparsert :: MyParser Char
tagparsert
  = (maketagparser listOfTableTags){self = TableTag,
                                    reenv = const Tag}

{-| makettagparser for all HTML elements see documentation on function makettagparser -}

ttagparser :: MyParser Char
ttagparser = makettagparser listOfTags

{-| makettagparser for the 'pre' HTML tag see documentation on function makettagparser -}

ttagparserp :: MyParser Char
ttagparserp = makettagparser ["pre"]

{-| makettagparser for the HTML tags for HTML tables see documentation on function makettagparser -}

ttagparsert :: MyParser Char
ttagparsert
  = (makettagparser listOfTableTags){self = TableTag,
                                     reenv = const Tag}

{-| maketagparser2 for all HTML elements see documentation on function maketagparser2 -}

tagparser2 :: MyParser Char
tagparser2 = maketagparser2 listOfTags

{-| maketagparser2 for the 'pre' HTML tag see documentation on function maketagparser2 -}

tagparser2p :: MyParser Char
tagparser2p = maketagparser2 ["pre"]

{-| maketagparser2 for the HTML tags for HTML tables see documentation on function maketagparser2 -}

tagparser2t :: MyParser Char
tagparser2t
  = (maketagparser2 listOfTableTags){self = TableTag,
                                     reenv = const Tag}

{-| makettagparser2 for all HTML elements see documentation on function makettagparser2 -}

ttagparser2 :: MyParser Char
ttagparser2 = makettagparser2 listOfTags

{-| makettagparser2 for the 'pre' HTML tag see documentation on function makettagparser2 -}

ttagparser2p :: MyParser Char
ttagparser2p = makettagparser2 ["pre"]

{-| makettagparser2 for the HTML tags for HTML tables see documentation on function makettagparser2 -}

ttagparser2t :: MyParser Char
ttagparser2t
  = (makettagparser2 listOfTableTags){self = TableTag,
                                      reenv = const Tag}

{-| a parser for mediawiki source extension tags -}

tagparsers :: MyParser Char
tagparsers
  = (maketagparser ["source", "syntaxhighlight"]){self = Source}

{-| a parser for mediawiki source extension tags inside tables -}

ttagparsers :: MyParser Char
ttagparsers
  = (maketagparser ["source", "syntaxhighlight"]){self = Source,
                                                  allowed = [Wikitable]}

{-| a parser for HTML tables -}

mytablep :: MyParser Char
mytablep
  = (maketagparser ["table"]){self = TableTag,
                              reenv = const Wikitable,
                              allowed = Wikitable : SpaceIndent : everywheretbl}

{-| a parser for HTML table rows -}

mytrsepp :: MyParser Char
mytrsepp
  = (maketagparser3 ["tr"]){reenv = const TableRowSep,
                            allowed = everywhere}

{-| a parser for normal HTML table cells -}

mytcolsepp :: MyParser Char
mytcolsepp
  = (maketagparser3 ["td"]){reenv = const TableColSep,
                            allowed = everywhere}

{-| a parser for HTML table captions cells -}

mytcapp :: MyParser Char
mytcapp
  = (maketagparser ["caption"]){reenv = const TableCap,
                                allowed = everywhere}

{-| a parser for HTML table header cells, so th tags -}

mythcolsepp :: MyParser Char
mythcolsepp
  = (maketagparser3 ["th"]){reenv = const TableHeadColSep,
                            allowed = everywhere}

{-| a parser for HTML tables for html parse mode only -}

htmytablep :: MyParser Char
htmytablep = (maketagparser ["table"])

{-| a parser for HTML table rows for html parse mode only -}

htmytrsepp :: MyParser Char
htmytrsepp = (maketagparser ["tr"])

{-| a parser for normal HTML table cells for html parse mode only -}

htmytcolsepp :: MyParser Char
htmytcolsepp = (maketagparser ["td"])

{-| a parser for HTML table captions cells for html parse mode only -}

htmytcapp :: MyParser Char
htmytcapp = (maketagparser ["caption"])

{-| a parser for HTML table header cells, so th tags for html parse mode only -}

htmythcolsepp :: MyParser Char
htmythcolsepp = (maketagparser ["th"])

{-| a parser for closing HTML tags which don't have an opening partner. This parser is only allowed to match within itemization enumerations etc. -}

itagparser :: MyParser Char
itagparser
  = baseParser{start =
                 \ _ ->
                   do _ <- string "<"
                      skipMany (char '/')
                      t <- (oneOfTheStrings ["small"])
                      atr <- many (try (attr))
                      _ <- try (many (oneOf " \n")) <|> return []
                      _ <- char '>'
                      return (TagAttr (t) (Map.fromList atr)),
               allowed = [Itemgroup], self = Tag}

{-| a parser for closing HTML tags which don't have an opening partner -}

rtagparser :: MyParser Char
rtagparser
  = baseParser{start =
                 \ _ ->
                   do _ <- string "<"
                      skipMany (char '/')
                      s <- (oneOfTheStrings (listOfTags ++ listOfTableTags))
                      atr <- many (try (attr))
                      _ <- try (many (oneOf " \n")) <|> return []
                      _ <- char '>'
                      return (TagAttr ("bad" ++ s) (Map.fromList atr)),
               allowed = everywhere, self = Tag}

{-| a parser for HTML opening tags which might be self closing but never have a matching closing partner -}

stagparser :: MyParser Char
stagparser
  = baseParser{start =
                 \ _ ->
                   do _ <- string "<"
                      t <- (oneOfTheStrings (listOfTags ++ listOfTableTags))
                      atr <- many (try (attr))
                      _ <- try (many (oneOf " ")) <|> return []
                      _ <- try (char '/') <|> return 'f'
                      _ <- char '>'
                      return (TagAttr t (Map.fromList atr)),
               self = Tag, allowed = []}

{-| a parser for HTML page breaks -}

pagebreakp :: MyParser Char
pagebreakp
  = baseParser{start =
                 \ _ ->
                   do _ <- string "<div style=\"page-break-before:always\"/>"
                      return (Str ""),
               end = \ _ -> return (), allowed = everywhere, self = PageBreak}

{-| a parser for HTML comments -}

commentp :: MyParser Char
commentp
  = baseParser{start =
                 \ _ ->
                   do _ <- string "<!--"
                      return (Str ""),
               allowed = everywhere ++ [Tag, TemplateInsideVerbatim],
               end =
                 \ _ ->
                   do _ <- string "-->"
                      return (),
               self = Comment}

{-| a parser for mediawiki reserved words -}

reservedp :: MyParser Char
reservedp
  = baseParser{start =
                 \ _ ->
                   do s <- try (string "__NOTOC__") <|>
                             try (string "__NOEDITSECTION__")
                             <|> try (string "__notoc__")
                             <|> try (string "__TOC__")
                             <|> try (string "__Toc__")
                             <|> try (string "__INHALTSVERZEICHNIS__")
                             <|> try (string "__KEIN_INHALTSVERZEICHNIS__")
                             <|> try (string "__FORCETOC__")
                             <|> string "&nbsp;"
                      return (Str s),
               self = Reserved, allowed = SpaceIndent : everywhere}

{-| See documentation on evaluateItemgroup and itemParserLevelTwo-}

itemStartString :: Anything Char -> String
itemStartString (ItemStart x) = x : []
itemStartString _ = ""

{-| See documentation on evaluateItemgroup and itemParserLevelTwo -}

itemStopString :: Anything Char -> String
itemStopString (ItemStop x) = x : []
itemStopString _ = ""

{-| see documentation on evaluateItemgroup. Parser to parse the ItemStart and ItemStop elements which are essentially bracket into environments which are essentially nodes with children in the parse tree -}

itemParserLevelTwo :: MyParser (Anything Char)
itemParserLevelTwo
  = MyParser{bad = \ _ -> pzero,
             start =
               \ _ ->
                 do pos <- getPosition
                    z <- token show (\ _ -> pos)
                           (\ x -> if (itemStartString x == []) then Nothing else Just x)
                    return (Str (itemStartString z)),
             end =
               \ (Str y) ->
                 do pos <- getPosition
                    _ <- token show (\ _ -> pos)
                           (\ x -> if (itemStopString x == y) then Just x else Nothing)
                    return (),
             allowed = [Root, ItemEnv], self = ItemEnv, modify = \ _ x -> x,
             reenv = id}

{-| see documentation on evaluateItemgroup. helper function to generate a list of only opening or only closing brackets, that is ItemStart or ItemStop values. The string given as first parameter states which kinds of bracket shall be opened or closed. For Openning brackets you have to call this function ItemStart as second and True as third parameter. For closing brackets you have to use ItemStop and False -}

generateEnvironmentTagsHelper ::
                              String -> (Char -> Anything Char) -> Bool -> [Anything Char]
generateEnvironmentTagsHelper (c : cs) t b
  = if b then
      (t c) : ([Item c]) ++ (generateEnvironmentTagsHelper cs t b) else
      (generateEnvironmentTagsHelper cs t b) ++ [(t c)]
generateEnvironmentTagsHelper [] _ _ = []

{-| see documentation on evaluateItemgroup and insertEnvironmentTags. The first parameter is a string consisting of the characters #:* that is the begging of the last Itemline. The second is the same for the current ItemLine. This function return a list of ItemStart and ItemStop values for the difference between the first and the second parameter -}

generateEnvironmentTags :: String -> String -> [Anything Char]
generateEnvironmentTags (o : os) (n : ns)
  = if (o == n) then generateEnvironmentTags os ns else
      (generateEnvironmentTagsHelper (o : os) ItemStop False) ++
        (generateEnvironmentTagsHelper (n : ns) ItemStart True)
generateEnvironmentTags (o : os) []
  = generateEnvironmentTagsHelper (o : os) ItemStop False
generateEnvironmentTags [] (n : ns)
  = generateEnvironmentTagsHelper (n : ns) ItemStart True
generateEnvironmentTags [] [] = []

{-| see documentation on evaluateItemgroup. The first parameter is a string consisting of the characters #:* that is the begging of the last Itemline. The second parameter is the content of an Itemgroup. This function returns the parse tree with the ItemStart and ItemStop values inserted   -}

insertEnvironmentTags ::
                      String -> [Anything Char] -> [Anything Char]
insertEnvironmentTags s ((Environment ItemLine (Str x) _) : xs)
  = (generateEnvironmentTags s x) ++
      ((if
          (((length s) > (length x)) &&
             (if length (generateEnvironmentTags s x) > 0 then
                (case last (generateEnvironmentTags s x) of
                     Item _ -> False
                     _ -> True)
                else True)
               && (x /= ""))
            || (s == x)
          then [Item (last s)] else [])
         ++ insertEnvironmentTags x xs)
insertEnvironmentTags s (x : xs) = x : insertEnvironmentTags s xs
insertEnvironmentTags _ [] = []

{-| see documentation on evaluateItemgroup. Inserts the ItemStart and ItemStop values as preparation for running the second level parse on the content of an ItemGroup. The first pararmenter is a string consisting of the characters #:* that is the begging of the first Itemline. The second parameter is the content of an Itemgroup. This function returns the parse tree with the ItemStart and ItemStop values inserted  -}

toEnvironmentTags :: String -> [Anything Char] -> [Anything Char]
toEnvironmentTags s l
  = insertEnvironmentTags ""
      ((Environment ItemLine (Str s) []) :
         l ++ [(Environment ItemLine (Str "") [])])

{-| see documentation on evaluateItemgroup. Get the second level parse result, back to the first level parse result -}

convertFromParsingLevelTwoToLevelOne ::
                                     [Anything (Anything Char)] -> [Anything Char]
convertFromParsingLevelTwoToLevelOne ((C x) : xs)
  = x : convertFromParsingLevelTwoToLevelOne xs
convertFromParsingLevelTwoToLevelOne ((Environment Root _ x) : xs)
  = (convertFromParsingLevelTwoToLevelOne x) ++
      (convertFromParsingLevelTwoToLevelOne xs)
convertFromParsingLevelTwoToLevelOne
  ((Environment ItemEnv s l) : xs)
  = (Environment ItemEnv s
       (findMatchingBrackets (remake parsers)
          ((convertFromParsingLevelTwoToLevelOne l))))
      : convertFromParsingLevelTwoToLevelOne xs
convertFromParsingLevelTwoToLevelOne (_ : xs)
  = (convertFromParsingLevelTwoToLevelOne xs)
convertFromParsingLevelTwoToLevelOne [] = []

{-| see documentation on evaluateItemgroup. Runs the parser itemParserLevelTwo on the inner part of an ItemGroup -}

runItemGroupPraserLevelTwo ::
                           GenParser (Anything Char) () [Anything (Anything Char)] ->
                             [Anything Char] -> [Anything (Anything Char)]
runItemGroupPraserLevelTwo p input
  = case (parse p "" input) of
        Left _ -> []
        Right x -> x

{-| this is for parsing itemization. It is implemented as two step process in the first step Itemgroup is created with ItemLine s in it. And ItemLine is a line starting with any combination of *#: . And Itemgroup is a sequence of lines of that kind. After this is done this function gets called with the content of an itemgroup. In the second step ItemStart and ItemStop elements are created, those are essentially the bracketed of the opening an closing part of the enumerations itemizations etc. . The function toEnvironmentTags does the insertion of the ItemStop and ItemStart values and remove the ItemLine values. parseAnything2 is run on it using the itemParserLevelTwo. This is turned into the parse tree by being passed to decon2 and put out of the parser monad by runItemGroupPraserLevelTwo. The resulting parse tree is one of type [Anything (Anything Char)] this needs to be but one level down to [Anything Char] this is done be convertFromParsingLevelTwoToLevelOne. The first parameter is a string consisting of the characters #:* that is the begging of the first Itemline. The second parameter is the content of an Itemgroup  -}

evaluateItemgroup :: String -> [Anything Char] -> [Anything Char]
evaluateItemgroup s l
  = convertFromParsingLevelTwoToLevelOne
      (runItemGroupPraserLevelTwo
         (decon2 (remake [itemParserLevelTwo])
            (parseAnything2
               [MyStackFrame{endparser = pzero, startdata = Str "",
                             environment = Root, badparser = \ _ -> pzero, parsernumber = 0,
                             nestingdepth = 0}]
               (remake [itemParserLevelTwo])
               []))
         (toEnvironmentTags s l))

{-| a parser for a group of lines starting with one of *:;# representing in enumeration itemization etc. . This particular parser is allowed to match nearly everywhere -}

itempgroupp :: MyParser Char
itempgroupp
  = MyParser{bad = \ _ -> pzero,
             start =
               \ _ ->
                 (do _ <- string "\n"
                     a <- many1 (oneOf ['*', ':', ';', '#'])
                     return (Str a)),
             end =
               \ _ ->
                 lookAhead
                   (try
                      (do _ <- string "\n"
                          notFollowedBy ((oneOf ['*', ':', ';', '#'])))),
             allowed = [Root, Wikitable, TemplateInside, Tag], self = Itemgroup,
             modify = \ (Str x) -> evaluateItemgroup x, reenv = id}

{-| a parser for a group of lines starting with one of *:;# representing in enumeration itemization etc. . This particular parser is only allowed to match within templates -}

itempgrouppt :: MyParser Char
itempgrouppt
  = MyParser{bad = \ _ -> pzero,
             start =
               \ _ ->
                 do _ <- string "\n"
                    a <- many1 (oneOf ['*', ':', ';', '#'])
                    return (Str a),
             end =
               \ _ ->
                 (try
                    (lookAhead (string "}}") <|>
                       (lookAhead (string "\n" >> (many (char ' ')) >> string "|")))
                    >> return ()),
             allowed = [TemplateInside], self = Itemgroup,
             modify = \ (Str x) -> evaluateItemgroup x, reenv = id}

{-| a parser for a preformat created by indenting with space -}

presectionp :: MyParser Char
presectionp
  = MyParser{bad = \ _ -> try (string "}}") >> (return ()),
             start =
               \ _ ->
                 do _ <- string "\n"
                    _ <- char ' '
                    return (Str ""),
             end =
               \ _ ->
                 lookAhead
                   (do _ <- string "\n"
                       notFollowedBy (char ' ')),
             allowed = [Root, Tag], self = SpaceIndent,
             modify = \ _ x -> (C ' ') : x, reenv = id}

{-| a parser for a preformat created by indenting with space withing templates -}

presectionpt :: MyParser Char
presectionpt
  = MyParser{bad = \ _ -> try (string "}}") >> (return ()),
             start =
               \ _ ->
                 do _ <- string "\n"
                    _ <- notFollowedBy ((many1 (char ' ')) >> (char '|'))
                    _ <- char ' '
                    return (Str ""),
             end =
               \ _ ->
                 lookAhead
                   (do _ <- string "\n"
                       notFollowedBy (char ' ')),
             allowed = [TemplateInside], self = SpaceIndent,
             modify = \ _ x -> (C ' ') : x, reenv = id}

{-| a parser for a line starting with one of *:;# representing in enumeration itemization etc. -}

itemlinep :: MyParser Char
itemlinep
  = MyParser{bad = \ _ -> pzero,
             start =
               \ _ ->
                 do _ <- string "\n"
                    a <- many1 (oneOf ['*', ':', ';', '#'])
                    return (Str a),
             end = \ _ -> (return ()), allowed = [Itemgroup], self = ItemLine,
             modify = \ _ x -> x, reenv = id}

{-| takes a parse tree that was created form the HTML returned by MediaWiki when being requested for the print version of a wiki page. And returns a modified version of that parse tree ready for being converted to LaTeX with treeToLaTeX3 |-}

printPrepareTree :: [Anything Char] -> [Anything Char]
printPrepareTree ll = concat (map printPrepareNode ll)
  where printPrepareNode :: Anything Char -> [Anything Char]
        printPrepareNode (Environment Tag (TagAttr "div" mm) l)
          | (Map.lookup "class" mm) == (Just "thumbinner") =
            case
              do (m, llll) <- case filter (mypred "a") (reducediv l) of
                                  [(Environment Tag (TagAttr "a" mmm) lll)] -> return (mmm, lll)
                                  _ -> mzero
                 tt <- case filter (mypred "div") (reducediv l) of
                           [(Environment Tag (TagAttr "div" mmm) tt)] | (Map.lookup "class"
                                                                           mmm)
                                                                          == (Just "thumbcaption")
                                                                        ->
                                                                        return .
                                                                          (dropWhile (== (C '\n')))
                                                                            . (filter magnpred)
                                                                          $ tt
                           _ -> mzero
                 return $ imgfun m (printPrepareTree llll) (Just (printPrepareTree tt))
              of
                Just x -> x
                _ -> printPrepareTree l
        printPrepareNode (Environment Wikitable (TagAttr "table" m) _)
          | (Map.lookup "class" m) == (Just "toc") = []
        printPrepareNode (Environment Tag (TagAttr "tbody" _) x)
          = printPrepareTree x
        printPrepareNode (Environment Tag (TagAttr "div" m) _)
          | ((Map.lookup "class" m) == (Just "toc") ||
               (Map.lookup "id" m) == (Just "toc"))
            = []
        printPrepareNode (Environment Wikitable (TagAttr "table" m) _)
          | (Map.lookup "class" m) == (Just "navbox") = []
        printPrepareNode (Environment Tag (TagAttr "title" _) _) = []
        printPrepareNode (Environment Tag (TagAttr "div" m) _)
          | (Map.lookup "id" m) == (Just "siteSub") = []
        printPrepareNode (Environment Tag (TagAttr "div" m) _)
          | (Map.lookup "id" m) == (Just "catlinks") = []
        printPrepareNode (Environment Tag (TagAttr "div" m) _)
          | (Map.lookup "id" m) == (Just "jump-to-nav") = []
        printPrepareNode (Environment Tag (TagAttr "div" m) _)
          | (Map.lookup "id" m) == (Just "footer") = []
        printPrepareNode (Environment Tag (TagAttr "div" m) _)
          | (Map.lookup "id" m) == (Just "p-lang") = []
        printPrepareNode (Environment Tag (TagAttr "div" m) _)
          | (Map.lookup "id" m) == (Just "contentSub") = []
        printPrepareNode (Environment Tag (TagAttr "span" m) _)
          | (Map.lookup "style" m) == (Just "display:none") = []
        printPrepareNode (Environment Tag (TagAttr "div" m) _)
          | (Map.lookup "class" m) == (Just "printfooter") = []
        printPrepareNode (Environment Tag (TagAttr "span" m) _)
          | (Map.lookup "class" m) == (Just "mw-cite-backlink") = []
        printPrepareNode (Environment Tag (TagAttr "a" m) _)
          | (Map.lookup "class" m) == (Just "mw-jump-link") = []
        printPrepareNode (Environment Tag (TagAttr "div" m) _)
          | (Map.lookup "id" m) == (Just "mw-navigation") = []
        printPrepareNode (Environment Tag (TagAttr "sup" m) l)
          | (Map.lookup "class" m) == (Just "reference") =
            [Environment Tag (TagAttr "sup" m) (reducea l)]
        printPrepareNode (Environment Tag (TagAttr "li" m) l)
          | (Map.lookup "class" m) == (Just "gallerybox") =
            case
              (do llll <- case deepGet "div" "class" "thumb" l of
                              [Environment Tag (TagAttr "div" _) lll] -> return lll
                              _ -> mzero
                  (mmm, lll) <- case deepGet "a" "class" "image" llll of
                                    [Environment Tag (TagAttr "a" mmm) lll] -> return (mmm, lll)
                                    _ -> mzero
                  te <- case deepGet "div" "class" "gallerytext" l of
                            [Environment Tag (TagAttr "div" _) te] -> return te
                            _ -> mzero
                  return $ imgfun mmm (printPrepareTree lll) (Just te))
              of
                Just x -> x
                _ -> printPrepareTree l
        printPrepareNode (Environment TableHeadColSep (TagAttr t m) l)
          = [Environment TableHeadColSep (TagAttr t m)
               (l ++
                  (map (\ x -> Environment Attribute (Attr x) []) (Map.toList m)))]
        printPrepareNode (Environment TableColSep (TagAttr t m) l)
          = [Environment TableColSep (TagAttr t m)
               (l ++
                  (map (\ x -> Environment Attribute (Attr x) []) (Map.toList m)))]
        printPrepareNode (Environment TableRowSep (TagAttr t m) l)
          = [Environment TableRowSep (TagAttr t m)
               (l ++
                  (map (\ x -> Environment Attribute (Attr x) []) (Map.toList m)))]
        printPrepareNode (Environment Tag (TagAttr "pre" m) l)
          = [Environment Preformat (TagAttr "pre" m) l]
        printPrepareNode (Environment Tag (TagAttr "span" m) _)
          | (Map.lookup "class" m) == (Just "editsection") = []
        printPrepareNode (Environment Tag (TagAttr "span" m) _)
          | (Map.lookup "class" m) == (Just "mw-editsection") = []
        printPrepareNode (Environment Tag (TagAttr "a" m) l)
          = case (Map.lookup "class" m) of
                (Just "image") -> imgfun m (printPrepareTree l) Nothing
                _ -> case (Map.lookup "class" m) of
                         (Just "external free") -> [Environment Tag (TagAttr "a" m) []]
                         _ -> [Environment Tag (TagAttr "a" m) l]
        printPrepareNode (Environment Tag (TagAttr "div" m) _)
          | (Map.lookup "class" m) == (Just "bodyContent") = []
        printPrepareNode (Environment Tag (TagAttr "math" m) l)
          = case deepGet "annotation" "encoding" "application/x-tex" l of
                [Environment Tag (TagAttr "annotation" _) x] -> [Environment Math
                                                                   (TagAttr "math" m)
                                                                   (map C
                                                                      (replace2
                                                                         (replace2
                                                                            (replace2
                                                                               (shallowFlatten x)
                                                                               "&amp;"
                                                                               "&")
                                                                            "&lt;"
                                                                            "<")
                                                                         "&gt;"
                                                                         ">"))]
                _ -> []
        printPrepareNode (Environment Tag (TagAttr "img" m) l)
          | (Map.lookup "class" m) `elem`
              (map Just
                 ["tex", "mwe-math-fallback-png-inline tex",
                  "mwe-math-fallback-image-inline tex"])
            =
            case Map.lookup "alt" m of
                Just x -> [Environment Math (TagAttr "math" m)
                             (map C
                                (replace2 (replace2 (replace2 x "&amp;" "&") "&lt;" "<") "gt;"
                                   ">"))]
                Nothing -> [(Environment Tag (TagAttr "img" m)
                               (printPrepareTree l))]
        printPrepareNode (Environment Tag (TagAttr "div" m) l)
          = case
              do c <- Map.lookup "class" m
                 guard $ ((isInfixOf "source" c) || (isInfixOf "highlight" c))
                 return $
                   Environment Source
                     (TagAttr "source"
                        (Map.fromList [("lang", (takeWhile (/= ' ') c))]))
                     (deepFlatten l)
              of
                Nothing -> [Environment Tag (TagAttr "div" m) (printPrepareTree l)]
                Just x -> [x]
        printPrepareNode (Environment x y l)
          = [Environment x y (printPrepareTree l)]
        printPrepareNode x = [x]
        
        mypred :: String -> Anything Char -> Bool
        mypred x y
          = case y of
                (Environment Tag (TagAttr z _) _) | z == x -> True
                _ -> False
        
        magnpred :: Anything Char -> Bool
        magnpred y
          = case y of
                (Environment Tag (TagAttr "div" m) _) | (Map.lookup "class" m) ==
                                                          (Just "magnify")
                                                        -> False
                _ -> True
        unEsc x
          = let z = unEscapeString x in
              if isUTF8Encoded z then decodeString z else z
        imgfun m l tt
          = maybeToList $
              do t <- case
                        (fmap (parseit imgparsers) (Map.lookup "title" m)) `mplus` tt of
                          Just x -> return $ [C '|'] ++ x
                          Nothing -> return []
                 h <- case filter (mypred "img") l of
                          [(Environment Tag (TagAttr "img" mmm) _)] -> case
                                                                         Map.lookup "src" mmm of
                                                                           Just s -> if
                                                                                       length
                                                                                         (reverse
                                                                                            (splitOn
                                                                                               "/"
                                                                                               s))
                                                                                         >= 2
                                                                                       then
                                                                                       return
                                                                                         ("File:" ++
                                                                                            (unEsc
                                                                                               ((reverse
                                                                                                   (splitOn
                                                                                                      "/"
                                                                                                      s))
                                                                                                  !!
                                                                                                  (if
                                                                                                     "thumb"
                                                                                                       `elem`
                                                                                                       splitOn
                                                                                                         "/"
                                                                                                         s
                                                                                                     then
                                                                                                     1
                                                                                                     else
                                                                                                     0))))
                                                                                       else mzero
                                                                           _ -> mzero
                          _ -> mzero
                 w <- case
                        case filter (mypred "img") l of
                            [(Environment Tag (TagAttr "img" mmm) _)] -> do ma <- Map.lookup
                                                                                    "width"
                                                                                    mmm
                                                                            guard $ tt == Nothing
                                                                            return ma
                            _ -> Nothing
                        of
                          Just x -> return $ [C '|'] ++ (map C (x ++ "px"))
                          Nothing -> return []
                 return (Environment Wikilink (Str "") ((map C h) ++ w ++ t))