This parser emits the data structure of the original tagsoup package by Neil Mitchell.
module Text.HTML.TagSoup.HT.ParserNM (
  ) where

import qualified Text.HTML.TagSoup as TagSoup
import qualified Text.HTML.TagSoup.HT.Parser      as Parser
import qualified Text.HTML.TagSoup.HT.Tag         as Custom
import qualified Text.HTML.TagSoup.HT.PositionTag as PosTag
import qualified Text.XML.Basic.Position    as Position
import qualified Text.XML.Basic.ProcessingInstruction as PI
import qualified Text.XML.Basic.Attribute   as Attr
import qualified Text.XML.Basic.Name        as Name
import qualified Text.XML.Basic.Name.MixedCase as NameMC

import Data.Accessor.Basic ((^.), )

runSoup :: String -> [TagSoup.Tag]
runSoup =
   concatMap convertTag .
   (\x -> x :: [PosTag.T NameMC.T String]) .

convertTag ::
   (Name.Tag name, Name.Attribute name) =>
   PosTag.T name String -> [TagSoup.Tag]
convertTag (PosTag.Cons pos tag) =
   TagSoup.TagPosition (pos ^. Position.row) (pos ^. Position.column) :
   (case tag of
      Custom.Open name attrs ->
         TagSoup.TagOpen (Name.toString name) (map Attr.toPair attrs)
      Custom.Close name ->
         TagSoup.TagClose (Name.toString name)
      Custom.Text text ->
         TagSoup.TagText text
      Custom.Comment text ->
         TagSoup.TagComment text
      Custom.Special name content ->
         TagSoup.TagOpen ('!' : Name.toString name) [("",content)]
      Custom.Processing name p ->
         TagSoup.TagOpen ('?' : Name.toString name) $
         case p of
            PI.Known attrs     -> map Attr.toPair attrs
            PI.Unknown content -> [("",content)]
      Custom.Warning text    -> TagSoup.TagWarning text) :