module Text.XML.WraXML.Tree.TagSoup where
import qualified Text.HTML.TagSoup as Tag
import qualified Text.HTML.TagSoup as Match
import Text.HTML.TagSoup (Tag(..), )
import qualified Text.XML.WraXML.Tree.Literal as XmlTree
import qualified Text.HTML.Basic.Tag as TagH
import qualified Text.XML.Basic.Position as Position
import qualified Text.XML.Basic.Attribute as Attr
import qualified Text.XML.Basic.ProcessingInstruction as PI
import qualified Text.XML.Basic.Name.LowerCase as NameLC
import qualified Text.XML.Basic.Name as Name
import qualified Text.XML.WraXML.Tree.LazyParser as ParserU
import qualified Text.ParserCombinators.Poly.Lazy as Parser
import Data.Bool.HT (select, )
import Control.Monad (liftM, )
import qualified Data.List as List
type PosTag = (Position.T, Tag String)
defaultFilename :: String
defaultFilename :: String
defaultFilename = String
"input"
attachPos :: [Tag String] -> [PosTag]
attachPos :: [Tag String] -> [PosTag]
attachPos =
forall a b. (a, b) -> b
snd forall b c a. (b -> c) -> (a -> b) -> a -> c
.
forall (t :: * -> *) s a b.
Traversable t =>
(s -> a -> (s, b)) -> s -> t a -> (s, t b)
List.mapAccumL
(\T
pos Tag String
t ->
let newPos :: T
newPos =
case Tag String
t of
Tag.TagPosition Row
setRow Row
setColumn ->
String -> Row -> Row -> T
Position.new String
defaultFilename Row
setRow Row
setColumn
Tag String
_ -> T
pos
in (T
newPos, (T
newPos, Tag String
t)))
(String -> T
Position.initialize String
defaultFilename)
removeMetaPos :: [PosTag] -> [PosTag]
removeMetaPos :: [PosTag] -> [PosTag]
removeMetaPos =
forall a. (a -> Bool) -> [a] -> [a]
filter (\(T
_pos,Tag String
tag) -> case Tag String
tag of
TagPosition Row
_ Row
_ -> Bool
False
Tag String
_ -> Bool
True)
type XmlTree name string = XmlTree.T Position.T name string
toXmlTreesString ::
(Name.Tag name, Name.Attribute name) =>
[Tag String] -> [XmlTree name String]
toXmlTreesString :: forall name.
(Tag name, Attribute name) =>
[Tag String] -> [XmlTree name String]
toXmlTreesString =
forall name.
(Tag name, Attribute name) =>
[PosTag] -> [XmlTree name String]
toXmlTreesAux forall b c a. (b -> c) -> (a -> b) -> a -> c
.
[PosTag] -> [PosTag]
removeMetaPos forall b c a. (b -> c) -> (a -> b) -> a -> c
.
[Tag String] -> [PosTag]
attachPos forall b c a. (b -> c) -> (a -> b) -> a -> c
.
forall str. StringLike str => [Tag str] -> [Tag str]
Tag.canonicalizeTags
toXmlTreesAux ::
(Name.Tag name, Name.Attribute name) =>
[PosTag] -> [XmlTree name String]
toXmlTreesAux :: forall name.
(Tag name, Attribute name) =>
[PosTag] -> [XmlTree name String]
toXmlTreesAux =
forall a b. (a, b) -> a
fst forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall t a. Parser t a -> [t] -> (a, [t])
Parser.runParser (forall t a. Parser t a -> Parser t [a]
ParserU.manyLazy forall name i.
(Tag name, Attribute name) =>
Parser i (T i name String)
parseBranch)
type Parser i a = Parser.Parser (i, Tag String) a
parseBranch ::
(Name.Tag name, Name.Attribute name) =>
Parser i (XmlTree.T i name String)
parseBranch :: forall name i.
(Tag name, Attribute name) =>
Parser i (T i name String)
parseBranch =
do (i
ti,Tag String
t) <- forall t. Parser t t
Parser.next
case Tag String
t of
TagText String
text -> forall (m :: * -> *) a. Monad m => a -> m a
return (forall i str name. i -> str -> T i name str
XmlTree.literalIndex i
ti String
text)
TagOpen (Char
'?':String
target) [Attribute String]
attrs ->
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$
forall name i str.
(Tag name, Attribute name) =>
i -> String -> T name str -> T i name str
XmlTree.processingIndex i
ti String
target forall a b. (a -> b) -> a -> b
$
case [Attribute String]
attrs of
[(String
"",String
str)] ->
forall name string. String -> T name string
PI.Unknown String
str
[Attribute String]
_ ->
forall name string. [T name string] -> T name string
PI.Known forall a b. (a -> b) -> a -> b
$
forall a b. (a -> b) -> [a] -> [b]
map (forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry forall name string.
Attribute name =>
String -> string -> T name string
Attr.new) [Attribute String]
attrs
TagOpen String
"![CDATA[" [(String
"",String
str)] ->
forall (m :: * -> *) a. Monad m => a -> m a
return (forall i name str. i -> String -> T i name str
XmlTree.cdataIndex i
ti String
str)
TagOpen String
name [Attribute String]
attrs ->
forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
liftM
(forall name i str.
(Tag name, Attribute name) =>
i -> String -> [(String, str)] -> [T i name str] -> T i name str
XmlTree.tagIndexAttr i
ti String
name [Attribute String]
attrs)
(forall name i.
(Tag name, Attribute name) =>
T i name String -> Name name -> Parser i [T i name String]
parseSubTrees
(forall i name str. i -> String -> T i name str
XmlTree.warningIndex i
ti
String
"unexpected end of file") (forall name. C name => String -> name
Name.fromString String
name))
TagClose String
name ->
let makeWarning ::
(Name.Tag name) =>
i -> TagH.Name name -> XmlTree.T i name String
makeWarning :: forall name i. Tag name => i -> Name name -> T i name String
makeWarning i
i Name name
n =
forall i name str. i -> String -> T i name str
XmlTree.warningIndex i
i forall a b. (a -> b) -> a -> b
$
if forall name. Tag name => Name name -> Bool
TagH.isSloppy Name name
n
then String
"misplaced sloppy closing tag </" forall a. [a] -> [a] -> [a]
++ String
name forall a. [a] -> [a] -> [a]
++ String
">"
else String
"isolated closing tag </" forall a. [a] -> [a] -> [a]
++ String
name forall a. [a] -> [a] -> [a]
++ String
">"
in forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ forall name i. Tag name => i -> Name name -> T i name String
makeWarning i
ti forall a b. (a -> b) -> a -> b
$ forall name. C name => String -> name
Name.fromString String
name
TagComment String
cmt ->
forall (m :: * -> *) a. Monad m => a -> m a
return (forall i name str. i -> String -> T i name str
XmlTree.commentIndex i
ti String
cmt)
TagWarning String
msg ->
forall (m :: * -> *) a. Monad m => a -> m a
return (forall i name str. i -> String -> T i name str
XmlTree.warningIndex i
ti String
msg)
TagPosition Row
_ Row
_ ->
forall a. HasCallStack => String -> a
error String
"Unexpected position information. Please filter it out!"
parseSubTrees ::
(Name.Tag name, Name.Attribute name) =>
XmlTree.T i name String -> TagH.Name name -> Parser i [XmlTree.T i name String]
parseSubTrees :: forall name i.
(Tag name, Attribute name) =>
T i name String -> Name name -> Parser i [T i name String]
parseSubTrees T i name String
warn Name name
name =
forall t a. Parser t a -> Parser t a
ParserU.force forall a b. (a -> b) -> a -> b
$
if forall name. Tag name => Name name -> Bool
TagH.isEmpty Name name
name
then
(forall t. (t -> Bool) -> Parser t t
Parser.satisfy (forall str. Eq str => str -> Tag str -> Bool
Match.isTagCloseName (forall name. C name => name -> String
Name.toString Name name
name) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a, b) -> b
snd) forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return [])
forall t a. Parser t a -> Parser t a -> Parser t a
`Parser.onFail` forall (m :: * -> *) a. Monad m => a -> m a
return []
else
forall t a. Parser t [a] -> a -> Parser t a -> Parser t [a]
ParserU.manyFinallyAppend
(forall name i.
(Tag name, Attribute name) =>
Name name -> Parser i [T i name String]
parseTerminator Name name
name)
T i name String
warn forall name i.
(Tag name, Attribute name) =>
Parser i (T i name String)
parseBranch
parseTerminator ::
(Name.Tag name, Name.Attribute name) =>
TagH.Name name -> Parser i [XmlTree.T i name String]
parseTerminator :: forall name i.
(Tag name, Attribute name) =>
Name name -> Parser i [T i name String]
parseTerminator Name name
name =
do c :: (i, Tag String)
c@(i
ci,Tag String
ct) <- forall t. Parser t t
Parser.next
let retry :: b -> Parser (i, Tag String) b
retry b
warns = forall t. [t] -> Parser t ()
Parser.reparse [(i, Tag String)
c] forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return b
warns
case Tag String
ct of
TagClose String
closeName_ ->
let closeName :: Name name
closeName = forall name. C name => String -> name
Name.fromString String
closeName_
in forall a b c. (a -> b -> c) -> b -> a -> c
flip forall a. a -> [(Bool, a)] -> a
select
((forall name. Tag name => Name name -> Bool
TagH.isEmpty Name name
closeName,
forall (m :: * -> *) a. MonadFail m => String -> m a
fail forall a b. (a -> b) -> a -> b
$ String
"the tag <" forall a. [a] -> [a] -> [a]
++ String
closeName_ forall a. [a] -> [a] -> [a]
++ String
"> closes nothing.") forall a. a -> [a] -> [a]
:
(Name name
name forall name. Tag name => Name name -> Name name -> Bool
`TagH.isInnerOf` Name name
closeName, forall {b}. b -> Parser (i, Tag String) b
retry []) forall a. a -> [a] -> [a]
:
(Name name
nameforall a. Eq a => a -> a -> Bool
==Name name
closeName, forall (m :: * -> *) a. Monad m => a -> m a
return []) forall a. a -> [a] -> [a]
:
(forall name. Tag name => Name name -> Bool
TagH.isSloppy Name name
name,
forall {b}. b -> Parser (i, Tag String) b
retry [forall i name str. i -> String -> T i name str
XmlTree.warningIndex i
ci forall a b. (a -> b) -> a -> b
$
String
"sloppy tag <"
forall a. [a] -> [a] -> [a]
++ forall name. C name => name -> String
Name.toString Name name
name forall a. [a] -> [a] -> [a]
++ String
"> closed by </"
forall a. [a] -> [a] -> [a]
++ String
closeName_ forall a. [a] -> [a] -> [a]
++ String
">"]) forall a. a -> [a] -> [a]
:
(forall name. Tag name => Name name -> Bool
TagH.isSloppy Name name
closeName,
forall (m :: * -> *) a. MonadFail m => String -> m a
fail forall a b. (a -> b) -> a -> b
$ String
"ignore sloppy closing tag </" forall a. [a] -> [a] -> [a]
++ forall name. C name => name -> String
Name.toString Name name
name forall a. [a] -> [a] -> [a]
++ String
">") forall a. a -> [a] -> [a]
:
[])
(forall (m :: * -> *) a. Monad m => a -> m a
return [forall i name str. i -> String -> T i name str
XmlTree.warningIndex i
ci forall a b. (a -> b) -> a -> b
$
String
"open tag <" forall a. [a] -> [a] -> [a]
++ forall name. C name => name -> String
Name.toString Name name
name forall a. [a] -> [a] -> [a]
++
String
"> and close tag </" forall a. [a] -> [a] -> [a]
++ forall name. C name => name -> String
Name.toString Name name
closeName forall a. [a] -> [a] -> [a]
++
String
"> do not match"])
TagOpen String
openName [Attribute String]
_ ->
if forall name. C name => String -> name
Name.fromString String
openName forall name. Tag name => Name name -> Name name -> Bool
`TagH.closes` Name name
name
then forall {b}. b -> Parser (i, Tag String) b
retry []
else forall (m :: * -> *) a. MonadFail m => String -> m a
fail forall a b. (a -> b) -> a -> b
$
String
"open tag <" forall a. [a] -> [a] -> [a]
++ String
openName forall a. [a] -> [a] -> [a]
++
String
"> does not close tag <" forall a. [a] -> [a] -> [a]
++ forall name. C name => name -> String
Name.toString Name name
name forall a. [a] -> [a] -> [a]
++ String
">"
Tag String
_ -> forall (m :: * -> *) a. MonadFail m => String -> m a
fail forall a b. (a -> b) -> a -> b
$ String
"not a termination of a tag"
parseOptions :: Tag.ParseOptions String
parseOptions :: ParseOptions String
parseOptions =
forall str. StringLike str => ParseOptions str
Tag.parseOptions
{optTagPosition :: Bool
Tag.optTagPosition = Bool
True,
optTagWarning :: Bool
Tag.optTagWarning = Bool
True}
example :: [XmlTree NameLC.T String]
example :: [XmlTree T String]
example =
forall name.
(Tag name, Attribute name) =>
[Tag String] -> [XmlTree name String]
toXmlTreesString forall a b. (a -> b) -> a -> b
$
forall str. StringLike str => ParseOptions str -> str -> [Tag str]
Tag.parseTagsOptions ParseOptions String
parseOptions forall a b. (a -> b) -> a -> b
$
String
"<html><b><font>test</b></font></html>"