{-# LANGUAGE CPP #-}
{-# LANGUAGE OverloadedLists #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
module Text.Seonbi.ContentTypes
( ContentType
, HtmlTransformer
, TextTransformer
, asCommonMarkTransformer
, asHtmlTransformer
, asHtmlTransformer'
, asPlainTextTransformer
, asXhtmlTransformer
, contentTypeFromText
, contentTypes
, contentTypeText
, transformWithContentType
) where
#if MIN_VERSION_base(4,13,0)
import Prelude hiding (MonadFail)
#endif
import Control.Monad (forM)
import Control.Monad.Fail (MonadFail)
import Data.Maybe (fromMaybe, isNothing)
import Data.List
import Text.Read (readMaybe)
import CMark
import Data.CaseInsensitive
import Data.Set
import Data.Text as ST
import Data.Text.Lazy as LT
import Data.Text.Lazy.Builder
import HTMLEntities.Builder
import HTMLEntities.Decoder
import Text.Seonbi.Html
import Text.Seonbi.Html.Tag (headingLevel, headingTag')
import qualified Text.Seonbi.Html.TagStack as TagStack
type HtmlTransformer m
= (Monad m, MonadFail m) => [HtmlEntity] -> m [HtmlEntity]
type TextTransformer m
= (Monad m, MonadFail m) => LT.Text -> m LT.Text
type TransformerTransformer m
= (Monad m, MonadFail m) => HtmlTransformer m -> TextTransformer m
asHtmlTransformer'
:: (Monad m, MonadFail m)
=> Bool
-> TransformerTransformer m
asHtmlTransformer' :: Bool -> TransformerTransformer m
asHtmlTransformer' Bool
xhtml HtmlTransformer m
transformer Text
htmlText = do
case Text -> Result [HtmlEntity]
scanHtml Text
htmlText of
Done Text
"" [HtmlEntity]
input -> do
[HtmlEntity]
output <- HtmlTransformer m
[HtmlEntity] -> m [HtmlEntity]
transformer [HtmlEntity]
input
Text -> m Text
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> m Text) -> Text -> m Text
forall a b. (a -> b) -> a -> b
$ [HtmlEntity] -> Text
printHtml' [HtmlEntity]
output
Result [HtmlEntity]
_ ->
String -> m Text
forall (m :: * -> *) a. MonadFail m => String -> m a
fail String
"failed to parse input"
where
printHtml' :: [HtmlEntity] -> LT.Text
printHtml' :: [HtmlEntity] -> Text
printHtml'
| Bool
xhtml = [HtmlEntity] -> Text
printXhtml
| Bool
otherwise = [HtmlEntity] -> Text
printHtml
asHtmlTransformer :: (Monad m, MonadFail m) => TransformerTransformer m
asHtmlTransformer :: HtmlTransformer m -> TextTransformer m
asHtmlTransformer = Bool -> TransformerTransformer m
forall (m :: * -> *).
(Monad m, MonadFail m) =>
Bool -> TransformerTransformer m
asHtmlTransformer' Bool
False
asXhtmlTransformer :: (Monad m, MonadFail m) => TransformerTransformer m
asXhtmlTransformer :: HtmlTransformer m -> TextTransformer m
asXhtmlTransformer = Bool -> TransformerTransformer m
forall (m :: * -> *).
(Monad m, MonadFail m) =>
Bool -> TransformerTransformer m
asHtmlTransformer' Bool
True
asPlainTextTransformer :: (Monad m, MonadFail m) => TransformerTransformer m
asPlainTextTransformer :: HtmlTransformer m -> TextTransformer m
asPlainTextTransformer HtmlTransformer m
transformer Text
text' = do
let escaped :: Text
escaped = Builder -> Text
toLazyText (Builder -> Text) -> Builder -> Text
forall a b. (a -> b) -> a -> b
$ Text -> Builder
HTMLEntities.Builder.text (Text -> Builder) -> Text -> Builder
forall a b. (a -> b) -> a -> b
$ Text -> Text
LT.toStrict Text
text'
let entities :: [HtmlEntity]
entities = [HtmlTagStack -> Text -> HtmlEntity
HtmlText HtmlTagStack
TagStack.empty (Text -> HtmlEntity) -> Text -> HtmlEntity
forall a b. (a -> b) -> a -> b
$ Text -> Text
LT.toStrict Text
escaped]
[HtmlEntity]
output <- HtmlTransformer m
[HtmlEntity] -> m [HtmlEntity]
transformer [HtmlEntity]
entities
Text -> m Text
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> m Text) -> Text -> m Text
forall a b. (a -> b) -> a -> b
$ [HtmlEntity] -> Text
printText [HtmlEntity]
output
asCommonMarkTransformer :: (Monad m, MonadFail m) => TransformerTransformer m
asCommonMarkTransformer :: HtmlTransformer m -> TextTransformer m
asCommonMarkTransformer HtmlTransformer m
transformer Text
input = do
let inputNode :: Node
inputNode = [CMarkOption] -> Text -> Node
commonmarkToNode [Item [CMarkOption]
CMarkOption
optSourcePos, Item [CMarkOption]
CMarkOption
optUnsafe] (Text -> Node) -> Text -> Node
forall a b. (a -> b) -> a -> b
$
Text -> Text
LT.toStrict Text
input
[HtmlEntity]
inputEntities <- HtmlTagStack -> Node -> m [HtmlEntity]
forall (m :: * -> *).
(Monad m, MonadFail m) =>
HtmlTagStack -> Node -> m [HtmlEntity]
fromNode [] Node
inputNode
[HtmlEntity]
outputEntities <- HtmlTransformer m
[HtmlEntity] -> m [HtmlEntity]
transformer ([HtmlEntity] -> m [HtmlEntity]) -> [HtmlEntity] -> m [HtmlEntity]
forall a b. (a -> b) -> a -> b
$ [HtmlEntity] -> [HtmlEntity]
normalizeText [HtmlEntity]
inputEntities
let outputNodes :: [Node]
outputNodes = [HtmlEntity] -> [Node]
toNode [HtmlEntity]
outputEntities
let outputNode :: Node
outputNode = case [Node]
outputNodes of
[node :: Item [Node]
node@(Node _ DOCUMENT _)] -> Item [Node]
Node
node
[Node]
nodes -> Maybe PosInfo -> NodeType -> [Node] -> Node
Node Maybe PosInfo
forall a. Maybe a
Nothing NodeType
DOCUMENT [Node]
nodes
Text -> m Text
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> m Text) -> Text -> m Text
forall a b. (a -> b) -> a -> b
$ Text -> Text
LT.fromStrict (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$
[CMarkOption] -> Maybe Int -> Node -> Text
nodeToCommonmark [Item [CMarkOption]
CMarkOption
optSourcePos , Item [CMarkOption]
CMarkOption
optUnsafe] Maybe Int
forall a. Maybe a
Nothing Node
outputNode
where
fromNode :: (Monad m, MonadFail m) => HtmlTagStack -> Node -> m [HtmlEntity]
fromNode :: HtmlTagStack -> Node -> m [HtmlEntity]
fromNode HtmlTagStack
stack (Node Maybe PosInfo
posInfo NodeType
nodeType [Node]
children) = case NodeType
nodeType of
NodeType
DOCUMENT ->
HtmlTag -> m [HtmlEntity]
forall (m :: * -> *).
(Monad m, MonadFail m) =>
HtmlTag -> m [HtmlEntity]
nodeWithChildren HtmlTag
Article
NodeType
THEMATIC_BREAK -> [HtmlEntity] -> m [HtmlEntity]
forall (m :: * -> *) a. Monad m => a -> m a
return
[ HtmlTagStack -> HtmlTag -> Text -> HtmlEntity
HtmlStartTag HtmlTagStack
stack HtmlTag
HR (Maybe PosInfo -> Text
posAttr Maybe PosInfo
posInfo)
, HtmlTagStack -> HtmlTag -> HtmlEntity
HtmlEndTag HtmlTagStack
stack HtmlTag
HR
]
NodeType
PARAGRAPH ->
HtmlTag -> m [HtmlEntity]
forall (m :: * -> *).
(Monad m, MonadFail m) =>
HtmlTag -> m [HtmlEntity]
nodeWithChildren HtmlTag
P
NodeType
BLOCK_QUOTE ->
HtmlTag -> m [HtmlEntity]
forall (m :: * -> *).
(Monad m, MonadFail m) =>
HtmlTag -> m [HtmlEntity]
nodeWithChildren HtmlTag
BlockQuote
HTML_BLOCK Text
html ->
case Text -> Result [HtmlEntity]
scanHtml (Text -> Result [HtmlEntity]) -> Text -> Result [HtmlEntity]
forall a b. (a -> b) -> a -> b
$ Text -> Text
LT.fromStrict Text
html of
Done Text
"" [HtmlEntity]
input' -> [HtmlEntity] -> m [HtmlEntity]
forall (m :: * -> *) a. Monad m => a -> m a
return ([HtmlEntity] -> m [HtmlEntity]) -> [HtmlEntity] -> m [HtmlEntity]
forall a b. (a -> b) -> a -> b
$ [HtmlEntity] -> [HtmlEntity]
rebaseStack [HtmlEntity]
input'
Result [HtmlEntity]
_ -> [HtmlEntity] -> m [HtmlEntity]
forall (m :: * -> *) a. Monad m => a -> m a
return [HtmlTagStack -> Text -> HtmlEntity
HtmlCdata HtmlTagStack
stack Text
html]
CUSTOM_BLOCK Text
_ Text
_ ->
[HtmlEntity] -> m [HtmlEntity]
forall (m :: * -> *) a. Monad m => a -> m a
return []
CODE_BLOCK Text
info Text
text' -> [HtmlEntity] -> m [HtmlEntity]
forall (m :: * -> *) a. Monad m => a -> m a
return
[ HtmlTagStack -> HtmlTag -> Text -> HtmlEntity
HtmlStartTag HtmlTagStack
stack HtmlTag
Pre (Maybe PosInfo -> Text
posAttr Maybe PosInfo
posInfo Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text -> Text -> Text
forall a. Show a => Text -> a -> Text
attr' Text
"info" Text
info)
, HtmlTagStack -> Text -> HtmlEntity
HtmlCdata (HtmlTag -> HtmlTagStack
nextStack HtmlTag
Pre) Text
text'
, HtmlTagStack -> HtmlTag -> HtmlEntity
HtmlEndTag HtmlTagStack
stack HtmlTag
Pre
]
HEADING Int
level ->
HtmlTag -> m [HtmlEntity]
forall (m :: * -> *).
(Monad m, MonadFail m) =>
HtmlTag -> m [HtmlEntity]
nodeWithChildren (HtmlTag -> m [HtmlEntity]) -> HtmlTag -> m [HtmlEntity]
forall a b. (a -> b) -> a -> b
$ Int -> HtmlTag
headingTag' Int
level
LIST ListAttributes
listAttrs ->
HtmlTag -> Text -> m [HtmlEntity]
forall (m :: * -> *).
(Monad m, MonadFail m) =>
HtmlTag -> Text -> m [HtmlEntity]
nodeWithChildren' HtmlTag
UL (Text -> m [HtmlEntity]) -> Text -> m [HtmlEntity]
forall a b. (a -> b) -> a -> b
$ Text -> ListAttributes -> Text
forall a. Show a => Text -> a -> Text
attr' Text
"list-attrs" ListAttributes
listAttrs
NodeType
ITEM ->
HtmlTag -> m [HtmlEntity]
forall (m :: * -> *).
(Monad m, MonadFail m) =>
HtmlTag -> m [HtmlEntity]
nodeWithChildren HtmlTag
LI
TEXT Text
text' ->
[HtmlEntity] -> m [HtmlEntity]
forall (m :: * -> *) a. Monad m => a -> m a
return [HtmlTagStack -> Text -> HtmlEntity
HtmlCdata HtmlTagStack
stack Text
text']
NodeType
SOFTBREAK -> [HtmlEntity] -> m [HtmlEntity]
forall (m :: * -> *) a. Monad m => a -> m a
return
[ HtmlTagStack -> HtmlTag -> Text -> HtmlEntity
HtmlStartTag HtmlTagStack
stack HtmlTag
BR (Maybe PosInfo -> Text
posAttr Maybe PosInfo
posInfo Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text -> Bool -> Text
forall a. Show a => Text -> a -> Text
attr' Text
"softbreak" Bool
True)
, HtmlTagStack -> HtmlTag -> HtmlEntity
HtmlEndTag HtmlTagStack
stack HtmlTag
BR
]
NodeType
LINEBREAK -> [HtmlEntity] -> m [HtmlEntity]
forall (m :: * -> *) a. Monad m => a -> m a
return
[ HtmlTagStack -> HtmlTag -> Text -> HtmlEntity
HtmlStartTag HtmlTagStack
stack HtmlTag
BR (Maybe PosInfo -> Text
posAttr Maybe PosInfo
posInfo)
, HtmlTagStack -> HtmlTag -> HtmlEntity
HtmlEndTag HtmlTagStack
stack HtmlTag
BR
]
HTML_INLINE Text
html ->
case Text -> Result [HtmlEntity]
scanHtml (Text -> Result [HtmlEntity]) -> Text -> Result [HtmlEntity]
forall a b. (a -> b) -> a -> b
$ Text -> Text
LT.fromStrict Text
html of
Done Text
"" [HtmlEntity]
input' -> [HtmlEntity] -> m [HtmlEntity]
forall (m :: * -> *) a. Monad m => a -> m a
return ([HtmlEntity] -> m [HtmlEntity]) -> [HtmlEntity] -> m [HtmlEntity]
forall a b. (a -> b) -> a -> b
$ [HtmlEntity] -> [HtmlEntity]
rebaseStack [HtmlEntity]
input'
Result [HtmlEntity]
_ -> [HtmlEntity] -> m [HtmlEntity]
forall (m :: * -> *) a. Monad m => a -> m a
return [HtmlTagStack -> Text -> HtmlEntity
HtmlCdata HtmlTagStack
stack Text
html]
CUSTOM_INLINE Text
_ Text
_ ->
[HtmlEntity] -> m [HtmlEntity]
forall (m :: * -> *) a. Monad m => a -> m a
return []
CODE Text
text' -> [HtmlEntity] -> m [HtmlEntity]
forall (m :: * -> *) a. Monad m => a -> m a
return
[ HtmlTagStack -> HtmlTag -> Text -> HtmlEntity
HtmlStartTag HtmlTagStack
stack HtmlTag
Code (Maybe PosInfo -> Text
posAttr Maybe PosInfo
posInfo)
, HtmlTagStack -> Text -> HtmlEntity
HtmlCdata (HtmlTag -> HtmlTagStack
nextStack HtmlTag
Code) Text
text'
, HtmlTagStack -> HtmlTag -> HtmlEntity
HtmlEndTag HtmlTagStack
stack HtmlTag
Code
]
NodeType
EMPH ->
HtmlTag -> m [HtmlEntity]
forall (m :: * -> *).
(Monad m, MonadFail m) =>
HtmlTag -> m [HtmlEntity]
nodeWithChildren HtmlTag
Em
NodeType
STRONG ->
HtmlTag -> m [HtmlEntity]
forall (m :: * -> *).
(Monad m, MonadFail m) =>
HtmlTag -> m [HtmlEntity]
nodeWithChildren HtmlTag
Strong
LINK Text
href Text
title ->
HtmlTag -> Text -> m [HtmlEntity]
forall (m :: * -> *).
(Monad m, MonadFail m) =>
HtmlTag -> Text -> m [HtmlEntity]
nodeWithChildren' HtmlTag
A (Text -> m [HtmlEntity]) -> Text -> m [HtmlEntity]
forall a b. (a -> b) -> a -> b
$ Text -> Text -> Text
rawAttr Text
" href" Text
href Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text -> Text -> Text
rawAttr Text
" title" Text
title
IMAGE Text
src Text
title ->
HtmlTag -> Text -> m [HtmlEntity]
forall (m :: * -> *).
(Monad m, MonadFail m) =>
HtmlTag -> Text -> m [HtmlEntity]
nodeWithChildren' HtmlTag
Img (Text -> m [HtmlEntity]) -> Text -> m [HtmlEntity]
forall a b. (a -> b) -> a -> b
$ Text -> Text -> Text
rawAttr Text
" src" Text
src Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text -> Text -> Text
rawAttr Text
" title" Text
title
where
nextStack :: HtmlTag -> HtmlTagStack
nextStack :: HtmlTag -> HtmlTagStack
nextStack = (HtmlTag -> HtmlTagStack -> HtmlTagStack
`TagStack.push` HtmlTagStack
stack)
nodeWithChildren :: (Monad m, MonadFail m) => HtmlTag -> m [HtmlEntity]
nodeWithChildren :: HtmlTag -> m [HtmlEntity]
nodeWithChildren HtmlTag
tag' = HtmlTag -> Text -> m [HtmlEntity]
forall (m :: * -> *).
(Monad m, MonadFail m) =>
HtmlTag -> Text -> m [HtmlEntity]
nodeWithChildren' HtmlTag
tag' Text
""
nodeWithChildren' :: (Monad m, MonadFail m)
=> HtmlTag -> ST.Text -> m [HtmlEntity]
nodeWithChildren' :: HtmlTag -> Text -> m [HtmlEntity]
nodeWithChildren' HtmlTag
tag' Text
extraAttrs = do
[[HtmlEntity]]
mid <- [Node] -> (Node -> m [HtmlEntity]) -> m [[HtmlEntity]]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
t a -> (a -> m b) -> m (t b)
forM [Node]
children ((Node -> m [HtmlEntity]) -> m [[HtmlEntity]])
-> (Node -> m [HtmlEntity]) -> m [[HtmlEntity]]
forall a b. (a -> b) -> a -> b
$ do
HtmlTagStack -> Node -> m [HtmlEntity]
forall (m :: * -> *).
(Monad m, MonadFail m) =>
HtmlTagStack -> Node -> m [HtmlEntity]
fromNode (HtmlTag -> HtmlTagStack
nextStack HtmlTag
tag')
let middle :: [HtmlEntity]
middle = [[HtmlEntity]] -> [HtmlEntity]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
Data.List.concat [[HtmlEntity]]
mid
[HtmlEntity] -> m [HtmlEntity]
forall (m :: * -> *) a. Monad m => a -> m a
return ([HtmlEntity] -> m [HtmlEntity]) -> [HtmlEntity] -> m [HtmlEntity]
forall a b. (a -> b) -> a -> b
$ HtmlTagStack -> HtmlTag -> Text -> HtmlEntity
HtmlStartTag HtmlTagStack
stack HtmlTag
tag' (Maybe PosInfo -> Text
posAttr Maybe PosInfo
posInfo Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
extraAttrs) HtmlEntity -> [HtmlEntity] -> [HtmlEntity]
forall a. a -> [a] -> [a]
:
[HtmlEntity]
middle [HtmlEntity] -> [HtmlEntity] -> [HtmlEntity]
forall a. [a] -> [a] -> [a]
++ [HtmlTagStack -> HtmlTag -> HtmlEntity
HtmlEndTag HtmlTagStack
stack HtmlTag
tag']
rebase' :: HtmlTagStack -> HtmlTagStack
rebase' :: HtmlTagStack -> HtmlTagStack
rebase' = HtmlTagStack -> HtmlTagStack -> HtmlTagStack -> HtmlTagStack
TagStack.rebase [] HtmlTagStack
stack
rebaseStack :: [HtmlEntity] -> [HtmlEntity]
rebaseStack :: [HtmlEntity] -> [HtmlEntity]
rebaseStack = (HtmlEntity -> HtmlEntity) -> [HtmlEntity] -> [HtmlEntity]
forall a b. (a -> b) -> [a] -> [b]
Prelude.map (\HtmlEntity
e -> HtmlEntity
e { tagStack :: HtmlTagStack
tagStack = HtmlTagStack -> HtmlTagStack
rebase' (HtmlTagStack -> HtmlTagStack) -> HtmlTagStack -> HtmlTagStack
forall a b. (a -> b) -> a -> b
$ HtmlEntity -> HtmlTagStack
tagStack HtmlEntity
e })
toNode :: [HtmlEntity] -> [Node]
toNode :: [HtmlEntity] -> [Node]
toNode [] = []
toNode (HtmlEntity
x:[HtmlEntity]
xs) = case HtmlEntity
x of
HtmlComment HtmlTagStack
_ Text
comment' ->
Maybe PosInfo -> NodeType -> [Node] -> Node
Node Maybe PosInfo
forall a. Maybe a
Nothing (Text -> NodeType
htmlNode Text
comment') [] Node -> [Node] -> [Node]
forall a. a -> [a] -> [a]
: [HtmlEntity] -> [Node]
toNode [HtmlEntity]
xs
HtmlCdata HtmlTagStack
_ Text
cdata ->
Maybe PosInfo -> NodeType -> [Node] -> Node
Node Maybe PosInfo
forall a. Maybe a
Nothing (Text -> NodeType
TEXT Text
cdata) [] Node -> [Node] -> [Node]
forall a. a -> [a] -> [a]
: [HtmlEntity] -> [Node]
toNode [HtmlEntity]
xs
HtmlText HtmlTagStack
_ Text
rawText' ->
Maybe PosInfo -> NodeType -> [Node] -> Node
Node Maybe PosInfo
forall a. Maybe a
Nothing (Text -> NodeType
TEXT (Text -> NodeType) -> Text -> NodeType
forall a b. (a -> b) -> a -> b
$ Text -> Text
unescape Text
rawText') [] Node -> [Node] -> [Node]
forall a. a -> [a] -> [a]
: [HtmlEntity] -> [Node]
toNode [HtmlEntity]
xs
HtmlEndTag HtmlTagStack
_ HtmlTag
_ ->
[HtmlEntity] -> [Node]
toNode [HtmlEntity]
xs
start :: HtmlEntity
start@(HtmlStartTag HtmlTagStack
stack HtmlTag
tag' Text
attrs) ->
let ([HtmlEntity]
children', [HtmlEntity]
rest) = (HtmlEntity -> Bool)
-> [HtmlEntity] -> ([HtmlEntity], [HtmlEntity])
forall a. (a -> Bool) -> [a] -> ([a], [a])
Data.List.break (HtmlTagStack -> HtmlTag -> HtmlEntity -> Bool
endOf HtmlTagStack
stack HtmlTag
tag') [HtmlEntity]
xs
([HtmlEntity]
end, [HtmlEntity]
rest') = case [HtmlEntity]
rest of
end' :: HtmlEntity
end'@(HtmlEndTag HtmlTagStack
endStack HtmlTag
endTag):[HtmlEntity]
afterEnd ->
if HtmlTagStack
endStack HtmlTagStack -> HtmlTagStack -> Bool
forall a. Eq a => a -> a -> Bool
== HtmlTagStack
stack Bool -> Bool -> Bool
&& HtmlTag
endTag HtmlTag -> HtmlTag -> Bool
forall a. Eq a => a -> a -> Bool
== HtmlTag
tag'
then ([Item [HtmlEntity]
HtmlEntity
end'], [HtmlEntity]
afterEnd)
else ([], [HtmlEntity]
rest)
[HtmlEntity]
_ -> ([], [HtmlEntity]
rest)
posInfo :: Maybe PosInfo
posInfo = Maybe PosInfo -> Maybe (Maybe PosInfo) -> Maybe PosInfo
forall a. a -> Maybe a -> a
fromMaybe Maybe PosInfo
forall a. Maybe a
Nothing
(Text -> Text -> Maybe (Maybe PosInfo)
forall a. Read a => Text -> Text -> Maybe a
getAttr Text
attrs Text
"posinfo" :: Maybe (Maybe PosInfo))
softbreak :: Maybe Bool
softbreak = Text -> Text -> Maybe Bool
forall a. Read a => Text -> Text -> Maybe a
getAttr Text
attrs Text
"softbreak" :: Maybe Bool
childrenHtmlNode :: NodeType
childrenHtmlNode = Text -> NodeType
htmlNode (Text -> NodeType) -> Text -> NodeType
forall a b. (a -> b) -> a -> b
$ Text -> Text
LT.toStrict (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ [HtmlEntity] -> Text
printHtml ([HtmlEntity] -> Text) -> [HtmlEntity] -> Text
forall a b. (a -> b) -> a -> b
$
HtmlEntity
start HtmlEntity -> [HtmlEntity] -> [HtmlEntity]
forall a. a -> [a] -> [a]
: [HtmlEntity]
children' [HtmlEntity] -> [HtmlEntity] -> [HtmlEntity]
forall a. [a] -> [a] -> [a]
++ [HtmlEntity]
end
nodeType :: NodeType
nodeType = case HtmlTag
tag' of
HtmlTag
Article -> NodeType
DOCUMENT
HtmlTag
BlockQuote -> NodeType
BLOCK_QUOTE
HtmlTag
HR -> NodeType
THEMATIC_BREAK
HtmlTag
P -> NodeType
PARAGRAPH
HtmlTag
Pre -> case Text -> Text -> Maybe Text
forall a. Read a => Text -> Text -> Maybe a
getAttr Text
"info" Text
attrs of
Just Text
info -> Text -> Text -> NodeType
CODE_BLOCK Text
info (Text -> NodeType) -> Text -> NodeType
forall a b. (a -> b) -> a -> b
$ [HtmlEntity] -> Text
printText' [HtmlEntity]
children'
Maybe Text
Nothing -> NodeType
childrenHtmlNode
HtmlTag
UL ->
NodeType
-> (ListAttributes -> NodeType) -> Maybe ListAttributes -> NodeType
forall b a. b -> (a -> b) -> Maybe a -> b
maybe NodeType
childrenHtmlNode ListAttributes -> NodeType
LIST (Text -> Text -> Maybe ListAttributes
forall a. Read a => Text -> Text -> Maybe a
getAttr Text
attrs Text
"list-attrs")
HtmlTag
LI -> NodeType
ITEM
HtmlTag
BR ->
if Maybe Bool
softbreak Maybe Bool -> Maybe Bool -> Bool
forall a. Eq a => a -> a -> Bool
== Bool -> Maybe Bool
forall a. a -> Maybe a
Just Bool
True then NodeType
SOFTBREAK else NodeType
LINEBREAK
HtmlTag
Code -> Text -> NodeType
CODE (Text -> NodeType) -> Text -> NodeType
forall a b. (a -> b) -> a -> b
$ [HtmlEntity] -> Text
printText' [HtmlEntity]
children'
HtmlTag
Em -> NodeType
EMPH
HtmlTag
Strong -> NodeType
STRONG
HtmlTag
A -> Text -> Text -> NodeType
LINK
(Text -> Maybe Text -> Text
forall a. a -> Maybe a -> a
fromMaybe Text
ST.empty (Maybe Text -> Text) -> Maybe Text -> Text
forall a b. (a -> b) -> a -> b
$ Text -> Text -> Maybe Text
getRawAttr Text
attrs Text
"href")
(Text -> Maybe Text -> Text
forall a. a -> Maybe a -> a
fromMaybe Text
ST.empty (Maybe Text -> Text) -> Maybe Text -> Text
forall a b. (a -> b) -> a -> b
$ Text -> Text -> Maybe Text
getRawAttr Text
attrs Text
"title")
HtmlTag
Img -> Text -> Text -> NodeType
IMAGE
(Text -> Maybe Text -> Text
forall a. a -> Maybe a -> a
fromMaybe Text
ST.empty (Maybe Text -> Text) -> Maybe Text -> Text
forall a b. (a -> b) -> a -> b
$ Text -> Text -> Maybe Text
getRawAttr Text
attrs Text
"src")
(Text -> Maybe Text -> Text
forall a. a -> Maybe a -> a
fromMaybe Text
ST.empty (Maybe Text -> Text) -> Maybe Text -> Text
forall a b. (a -> b) -> a -> b
$ Text -> Text -> Maybe Text
getRawAttr Text
attrs Text
"title")
HtmlTag
_ ->
NodeType -> (Int -> NodeType) -> Maybe Int -> NodeType
forall b a. b -> (a -> b) -> Maybe a -> b
maybe NodeType
childrenHtmlNode Int -> NodeType
HEADING (HtmlTag -> Maybe Int
headingLevel HtmlTag
tag')
(NodeType
nodeType', [Node]
nodeChildren) =
if Maybe PosInfo -> Bool
forall a. Maybe a -> Bool
isNothing Maybe PosInfo
posInfo Bool -> Bool -> Bool
&& Maybe Bool -> Bool
forall a. Maybe a -> Bool
isNothing Maybe Bool
softbreak
then (NodeType
childrenHtmlNode, [])
else (NodeType
nodeType, [HtmlEntity] -> [Node]
toNode [HtmlEntity]
children')
in
Maybe PosInfo -> NodeType -> [Node] -> Node
Node Maybe PosInfo
posInfo NodeType
nodeType' [Node]
nodeChildren Node -> [Node] -> [Node]
forall a. a -> [a] -> [a]
: [HtmlEntity] -> [Node]
toNode [HtmlEntity]
rest'
where
block :: Bool
block :: Bool
block = case HtmlTagStack -> Maybe HtmlTag
TagStack.last (HtmlEntity -> HtmlTagStack
tagStack HtmlEntity
x) of
Just HtmlTag
A -> Bool
False
Just HtmlTag
Em -> Bool
False
Just HtmlTag
H1 -> Bool
False
Just HtmlTag
H2 -> Bool
False
Just HtmlTag
H3 -> Bool
False
Just HtmlTag
H4 -> Bool
False
Just HtmlTag
H5 -> Bool
False
Just HtmlTag
H6 -> Bool
False
Just HtmlTag
P -> Bool
False
Just HtmlTag
Strong -> Bool
False
Just HtmlTag
tag' -> Maybe Int -> Bool
forall a. Maybe a -> Bool
isNothing (HtmlTag -> Maybe Int
headingLevel HtmlTag
tag')
Maybe HtmlTag
_ -> Bool
True
htmlNode :: ST.Text -> NodeType
htmlNode :: Text -> NodeType
htmlNode
| Bool
block = Text -> NodeType
HTML_BLOCK
| Bool
otherwise = Text -> NodeType
HTML_INLINE
unescape :: ST.Text -> ST.Text
unescape :: Text -> Text
unescape = Text -> Text
toStrict (Text -> Text) -> (Text -> Text) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Builder -> Text
toLazyText (Builder -> Text) -> (Text -> Builder) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Builder
htmlEncodedText
rawAttr :: ST.Text -> ST.Text -> ST.Text
rawAttr :: Text -> Text -> Text
rawAttr Text
name Text
value = Text -> Text -> Text
ST.append Text
name (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ Text -> Text
toStrict (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$
Text
"=\"" Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Builder -> Text
toLazyText (Text -> Builder
HTMLEntities.Builder.text Text
value) Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
"\""
attr :: Show a => ST.Text -> a -> ST.Text
attr :: Text -> a -> Text
attr Text
name a
value =
Text -> Text -> Text
rawAttr (Text
"data-seonbi-cmark-" Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
name) (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ String -> Text
ST.pack (a -> String
forall a. Show a => a -> String
show a
value)
attr' :: Show a => ST.Text -> a -> ST.Text
attr' :: Text -> a -> Text
attr' Text
name = Char -> Text -> Text
ST.cons Char
' ' (Text -> Text) -> (a -> Text) -> a -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> a -> Text
forall a. Show a => Text -> a -> Text
attr Text
name
posAttr :: Maybe PosInfo -> ST.Text
posAttr :: Maybe PosInfo -> Text
posAttr = Text -> Maybe PosInfo -> Text
forall a. Show a => Text -> a -> Text
attr Text
"posinfo"
getRawAttr :: HtmlRawAttrs -> ST.Text -> Maybe ST.Text
getRawAttr :: Text -> Text -> Maybe Text
getRawAttr Text
attrs Text
name =
case Text -> Text -> (Text, Text)
ST.breakOn Text
prefix Text
attrs of
(Text
_, Text
"") -> Maybe Text
forall a. Maybe a
Nothing
(Text
_, Text
head') ->
case (Char -> Bool) -> Text -> (Text, Text)
ST.break (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'"') (Int -> Text -> Text
ST.drop (Text -> Int
ST.length Text
prefix) Text
head') of
(Text
_, Text
"") -> Maybe Text
forall a. Maybe a
Nothing
(Text
value, Text
_) ->
Text -> Maybe Text
forall a. a -> Maybe a
Just (Text -> Maybe Text) -> Text -> Maybe Text
forall a b. (a -> b) -> a -> b
$ Text -> Text
toStrict (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ Builder -> Text
toLazyText (Builder -> Text) -> Builder -> Text
forall a b. (a -> b) -> a -> b
$ Text -> Builder
htmlEncodedText Text
value
where
prefix :: ST.Text
prefix :: Text
prefix = Text
name Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
"=\""
getAttr :: Read a => HtmlRawAttrs -> ST.Text -> Maybe a
getAttr :: Text -> Text -> Maybe a
getAttr Text
attrs Text
name =
case Text -> Text -> Maybe Text
getRawAttr Text
attrs (Text
"data-seonbi-cmark-" Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
name) of
Maybe Text
Nothing -> Maybe a
forall a. Maybe a
Nothing
Just Text
value -> String -> Maybe a
forall a. Read a => String -> Maybe a
readMaybe (String -> Maybe a) -> String -> Maybe a
forall a b. (a -> b) -> a -> b
$ Text -> String
ST.unpack Text
value
endOf :: HtmlTagStack -> HtmlTag -> HtmlEntity -> Bool
endOf :: HtmlTagStack -> HtmlTag -> HtmlEntity -> Bool
endOf HtmlTagStack
stack HtmlTag
tag' (HtmlEndTag HtmlTagStack
endStack HtmlTag
endTag) =
Bool -> Bool
not (HtmlTagStack
endStack HtmlTagStack -> HtmlTagStack -> Bool
`TagStack.descendsFrom` HtmlTagStack
stack) Bool -> Bool -> Bool
|| HtmlTag
endTag HtmlTag -> HtmlTag -> Bool
forall a. Eq a => a -> a -> Bool
== HtmlTag
tag'
endOf HtmlTagStack
_ HtmlTag
_ HtmlEntity
_ = Bool
False
printText' :: [HtmlEntity] -> ST.Text
printText' :: [HtmlEntity] -> Text
printText' = Text -> Text
toStrict (Text -> Text) -> ([HtmlEntity] -> Text) -> [HtmlEntity] -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [HtmlEntity] -> Text
printText
type ContentType = CI ST.Text
contentTypeFromText :: ST.Text -> ContentType
contentTypeFromText :: Text -> ContentType
contentTypeFromText = Text -> ContentType
forall s. FoldCase s => s -> CI s
mk
contentTypeText :: ContentType -> ST.Text
contentTypeText :: ContentType -> Text
contentTypeText = ContentType -> Text
forall s. CI s -> s
original
newtype TransformerTransformer' m =
TransformerTransformer' (TransformerTransformer m)
transformers :: (Monad m, MonadFail m)
=> [(ContentType, TransformerTransformer' m)]
transformers :: [(ContentType, TransformerTransformer' m)]
transformers =
[ (ContentType
"text/html", TransformerTransformer m -> TransformerTransformer' m
forall (m :: * -> *).
TransformerTransformer m -> TransformerTransformer' m
TransformerTransformer' TransformerTransformer m
forall (m :: * -> *).
(Monad m, MonadFail m) =>
TransformerTransformer m
asHtmlTransformer)
, (ContentType
"application/xhtml+xml", TransformerTransformer m -> TransformerTransformer' m
forall (m :: * -> *).
TransformerTransformer m -> TransformerTransformer' m
TransformerTransformer' TransformerTransformer m
forall (m :: * -> *).
(Monad m, MonadFail m) =>
TransformerTransformer m
asXhtmlTransformer)
, (ContentType
"text/plain", TransformerTransformer m -> TransformerTransformer' m
forall (m :: * -> *).
TransformerTransformer m -> TransformerTransformer' m
TransformerTransformer' TransformerTransformer m
forall (m :: * -> *).
(Monad m, MonadFail m) =>
TransformerTransformer m
asPlainTextTransformer)
, (ContentType
"text/markdown", TransformerTransformer m -> TransformerTransformer' m
forall (m :: * -> *).
TransformerTransformer m -> TransformerTransformer' m
TransformerTransformer' TransformerTransformer m
forall (m :: * -> *).
(Monad m, MonadFail m) =>
TransformerTransformer m
asCommonMarkTransformer)
]
contentTypes :: Set ContentType
contentTypes :: Set ContentType
contentTypes = ([ContentType] -> Set ContentType
forall a. Ord a => [a] -> Set a
Data.Set.fromList ([ContentType] -> Set ContentType)
-> ([(ContentType, TransformerTransformer' IO)] -> [ContentType])
-> [(ContentType, TransformerTransformer' IO)]
-> Set ContentType
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((ContentType, TransformerTransformer' IO) -> ContentType)
-> [(ContentType, TransformerTransformer' IO)] -> [ContentType]
forall a b. (a -> b) -> [a] -> [b]
Prelude.map (ContentType, TransformerTransformer' IO) -> ContentType
forall a b. (a, b) -> a
fst)
([(ContentType, TransformerTransformer' IO)]
forall (m :: * -> *).
(Monad m, MonadFail m) =>
[(ContentType, TransformerTransformer' m)]
transformers :: [(ContentType, TransformerTransformer' IO)])
getTransformerTransformer :: (Monad m, MonadFail m)
=> ContentType
-> Maybe (TransformerTransformer' m)
getTransformerTransformer :: ContentType -> Maybe (TransformerTransformer' m)
getTransformerTransformer ContentType
contentType =
(ContentType, TransformerTransformer' m)
-> TransformerTransformer' m
forall a b. (a, b) -> b
snd ((ContentType, TransformerTransformer' m)
-> TransformerTransformer' m)
-> Maybe (ContentType, TransformerTransformer' m)
-> Maybe (TransformerTransformer' m)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ((ContentType, TransformerTransformer' m) -> Bool)
-> [(ContentType, TransformerTransformer' m)]
-> Maybe (ContentType, TransformerTransformer' m)
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Maybe a
Data.List.find ((ContentType -> ContentType -> Bool
forall a. Eq a => a -> a -> Bool
== ContentType
contentType) (ContentType -> Bool)
-> ((ContentType, TransformerTransformer' m) -> ContentType)
-> (ContentType, TransformerTransformer' m)
-> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (ContentType, TransformerTransformer' m) -> ContentType
forall a b. (a, b) -> a
fst) [(ContentType, TransformerTransformer' m)]
forall (m :: * -> *).
(Monad m, MonadFail m) =>
[(ContentType, TransformerTransformer' m)]
transformers
transformWithContentType
:: (Monad m, MonadFail m)
=> ContentType
-> HtmlTransformer m
-> LT.Text
-> m LT.Text
transformWithContentType :: ContentType -> HtmlTransformer m -> Text -> m Text
transformWithContentType ContentType
contentType HtmlTransformer m
transformer Text
inputText =
case ContentType -> Maybe (TransformerTransformer' m)
forall (m :: * -> *).
(Monad m, MonadFail m) =>
ContentType -> Maybe (TransformerTransformer' m)
getTransformerTransformer ContentType
contentType of
Maybe (TransformerTransformer' m)
Nothing -> String -> m Text
forall (m :: * -> *) a. MonadFail m => String -> m a
fail (String -> m Text) -> String -> m Text
forall a b. (a -> b) -> a -> b
$ Text -> String
ST.unpack (Text -> String) -> Text -> String
forall a b. (a -> b) -> a -> b
$
Text
"unknown content type: " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> ContentType -> Text
contentTypeText ContentType
contentType
Just (TransformerTransformer' TransformerTransformer m
transformTransformer) ->
TransformerTransformer m
HtmlTransformer m -> Text -> m Text
transformTransformer HtmlTransformer m
transformer Text
inputText