{-# LANGUAGE OverloadedStrings #-}
module Text.Seonbi.Html.Lang
( LangHtmlEntity (..)
, LanguageTag
, annotateWithLang
, extractLang
, isKorean
, isNeverKorean
) where
import Control.Applicative
import Data.Char (isSpace)
import Data.Maybe
import Data.Attoparsec.Text
import Data.Text
import Text.Seonbi.Html.Entity
import Text.Seonbi.Html.Tag (HtmlTag)
type LanguageTag = Text
extractLang
:: HtmlRawAttrs
-> Maybe LanguageTag
HtmlRawAttrs
attrs =
case Parser (Maybe HtmlRawAttrs)
-> HtmlRawAttrs -> Either String (Maybe HtmlRawAttrs)
forall a. Parser a -> HtmlRawAttrs -> Either String a
parseOnly Parser (Maybe HtmlRawAttrs)
parser' HtmlRawAttrs
attrs of
Right (Just HtmlRawAttrs
lang') ->
let lt :: HtmlRawAttrs
lt = HtmlRawAttrs -> HtmlRawAttrs
toLower (HtmlRawAttrs -> HtmlRawAttrs)
-> (HtmlRawAttrs -> HtmlRawAttrs) -> HtmlRawAttrs -> HtmlRawAttrs
forall b c a. (b -> c) -> (a -> b) -> a -> c
. HtmlRawAttrs -> HtmlRawAttrs
strip (HtmlRawAttrs -> HtmlRawAttrs)
-> (HtmlRawAttrs -> HtmlRawAttrs) -> HtmlRawAttrs -> HtmlRawAttrs
forall b c a. (b -> c) -> (a -> b) -> a -> c
. HtmlRawAttrs -> HtmlRawAttrs
normalizeEntities (HtmlRawAttrs -> HtmlRawAttrs) -> HtmlRawAttrs -> HtmlRawAttrs
forall a b. (a -> b) -> a -> b
$ HtmlRawAttrs
lang'
in if HtmlRawAttrs -> Bool
Data.Text.null HtmlRawAttrs
lt then Maybe HtmlRawAttrs
forall a. Maybe a
Nothing else HtmlRawAttrs -> Maybe HtmlRawAttrs
forall a. a -> Maybe a
Just HtmlRawAttrs
lt
Either String (Maybe HtmlRawAttrs)
_ -> Maybe HtmlRawAttrs
forall a. Maybe a
Nothing
where
parser' :: Parser (Maybe Text)
parser' :: Parser (Maybe HtmlRawAttrs)
parser' = do
Parser ()
skipSpace
[Maybe HtmlRawAttrs]
attrs' <- Parser (Maybe HtmlRawAttrs)
langAttr Parser (Maybe HtmlRawAttrs)
-> Parser HtmlRawAttrs Char
-> Parser HtmlRawAttrs [Maybe HtmlRawAttrs]
forall (f :: * -> *) a s. Alternative f => f a -> f s -> f [a]
`sepBy` Parser HtmlRawAttrs Char
space
Parser ()
skipSpace
Maybe HtmlRawAttrs -> Parser (Maybe HtmlRawAttrs)
forall (m :: * -> *) a. Monad m => a -> m a
return (Maybe HtmlRawAttrs -> Parser (Maybe HtmlRawAttrs))
-> Maybe HtmlRawAttrs -> Parser (Maybe HtmlRawAttrs)
forall a b. (a -> b) -> a -> b
$ [HtmlRawAttrs] -> Maybe HtmlRawAttrs
forall a. [a] -> Maybe a
listToMaybe ([HtmlRawAttrs] -> Maybe HtmlRawAttrs)
-> [HtmlRawAttrs] -> Maybe HtmlRawAttrs
forall a b. (a -> b) -> a -> b
$ [Maybe HtmlRawAttrs] -> [HtmlRawAttrs]
forall a. [Maybe a] -> [a]
catMaybes [Maybe HtmlRawAttrs]
attrs'
langAttr :: Parser (Maybe Text)
langAttr :: Parser (Maybe HtmlRawAttrs)
langAttr = do
(Bool
isLang, Bool
cont) <- Parser (Bool, Bool)
attrIsLang
HtmlRawAttrs
value <- if Bool
cont then Parser HtmlRawAttrs
attrValue else HtmlRawAttrs -> Parser HtmlRawAttrs
forall (m :: * -> *) a. Monad m => a -> m a
return HtmlRawAttrs
""
Maybe HtmlRawAttrs -> Parser (Maybe HtmlRawAttrs)
forall (m :: * -> *) a. Monad m => a -> m a
return (if Bool
isLang then HtmlRawAttrs -> Maybe HtmlRawAttrs
forall a. a -> Maybe a
Just HtmlRawAttrs
value else Maybe HtmlRawAttrs
forall a. Maybe a
Nothing)
attrIsLang :: Parser (Bool, Bool)
attrIsLang :: Parser (Bool, Bool)
attrIsLang = [Parser (Bool, Bool)] -> Parser (Bool, Bool)
forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
[ HtmlRawAttrs -> Parser HtmlRawAttrs
asciiCI HtmlRawAttrs
"lang=" Parser HtmlRawAttrs -> Parser (Bool, Bool) -> Parser (Bool, Bool)
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> (Bool, Bool) -> Parser (Bool, Bool)
forall (m :: * -> *) a. Monad m => a -> m a
return (Bool
True, Bool
True)
, do { HtmlRawAttrs
_ <- (Char -> Bool) -> Parser HtmlRawAttrs
takeWhile1 (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'=')
; Maybe Char
eq <- Parser HtmlRawAttrs Char -> Parser HtmlRawAttrs (Maybe Char)
forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional (Char -> Parser HtmlRawAttrs Char
char Char
'=')
; (Bool, Bool) -> Parser (Bool, Bool)
forall (m :: * -> *) a. Monad m => a -> m a
return (Bool
False, Maybe Char -> Bool
forall a. Maybe a -> Bool
isJust Maybe Char
eq)
}
]
attrValue :: Parser Text
attrValue :: Parser HtmlRawAttrs
attrValue = [Parser HtmlRawAttrs] -> Parser HtmlRawAttrs
forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
[ do { (Char -> Bool) -> Parser ()
skip (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'"'); HtmlRawAttrs
v <- (Char -> Bool) -> Parser HtmlRawAttrs
takeTill (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'"'); (Char -> Bool) -> Parser ()
skip (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'"'); HtmlRawAttrs -> Parser HtmlRawAttrs
forall (m :: * -> *) a. Monad m => a -> m a
return HtmlRawAttrs
v }
, do { (Char -> Bool) -> Parser ()
skip (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\'')
; HtmlRawAttrs
v <- (Char -> Bool) -> Parser HtmlRawAttrs
takeTill (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\'')
; (Char -> Bool) -> Parser ()
skip (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\''); HtmlRawAttrs -> Parser HtmlRawAttrs
forall (m :: * -> *) a. Monad m => a -> m a
return HtmlRawAttrs
v
}
, (Char -> Bool) -> Parser HtmlRawAttrs
takeWhile1 (Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
isSpace)
]
normalizeEntities :: Text -> Text
normalizeEntities :: HtmlRawAttrs -> HtmlRawAttrs
normalizeEntities
= HtmlRawAttrs -> HtmlRawAttrs -> HtmlRawAttrs -> HtmlRawAttrs
Data.Text.replace HtmlRawAttrs
"‐" HtmlRawAttrs
"-"
(HtmlRawAttrs -> HtmlRawAttrs)
-> (HtmlRawAttrs -> HtmlRawAttrs) -> HtmlRawAttrs -> HtmlRawAttrs
forall b c a. (b -> c) -> (a -> b) -> a -> c
. HtmlRawAttrs -> HtmlRawAttrs -> HtmlRawAttrs -> HtmlRawAttrs
Data.Text.replace HtmlRawAttrs
"‐" HtmlRawAttrs
"-"
(HtmlRawAttrs -> HtmlRawAttrs)
-> (HtmlRawAttrs -> HtmlRawAttrs) -> HtmlRawAttrs -> HtmlRawAttrs
forall b c a. (b -> c) -> (a -> b) -> a -> c
. HtmlRawAttrs -> HtmlRawAttrs -> HtmlRawAttrs -> HtmlRawAttrs
Data.Text.replace HtmlRawAttrs
"‐" HtmlRawAttrs
"-"
(HtmlRawAttrs -> HtmlRawAttrs)
-> (HtmlRawAttrs -> HtmlRawAttrs) -> HtmlRawAttrs -> HtmlRawAttrs
forall b c a. (b -> c) -> (a -> b) -> a -> c
. HtmlRawAttrs -> HtmlRawAttrs -> HtmlRawAttrs -> HtmlRawAttrs
Data.Text.replace HtmlRawAttrs
"‐" HtmlRawAttrs
"-"
(HtmlRawAttrs -> HtmlRawAttrs)
-> (HtmlRawAttrs -> HtmlRawAttrs) -> HtmlRawAttrs -> HtmlRawAttrs
forall b c a. (b -> c) -> (a -> b) -> a -> c
. HtmlRawAttrs -> HtmlRawAttrs -> HtmlRawAttrs -> HtmlRawAttrs
Data.Text.replace HtmlRawAttrs
"‐" HtmlRawAttrs
"-"
data LangHtmlEntity = LangHtmlEntity
{
LangHtmlEntity -> Maybe HtmlRawAttrs
lang :: Maybe LanguageTag
, LangHtmlEntity -> HtmlEntity
entity :: HtmlEntity
} deriving (Int -> LangHtmlEntity -> ShowS
[LangHtmlEntity] -> ShowS
LangHtmlEntity -> String
(Int -> LangHtmlEntity -> ShowS)
-> (LangHtmlEntity -> String)
-> ([LangHtmlEntity] -> ShowS)
-> Show LangHtmlEntity
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [LangHtmlEntity] -> ShowS
$cshowList :: [LangHtmlEntity] -> ShowS
show :: LangHtmlEntity -> String
$cshow :: LangHtmlEntity -> String
showsPrec :: Int -> LangHtmlEntity -> ShowS
$cshowsPrec :: Int -> LangHtmlEntity -> ShowS
Show, LangHtmlEntity -> LangHtmlEntity -> Bool
(LangHtmlEntity -> LangHtmlEntity -> Bool)
-> (LangHtmlEntity -> LangHtmlEntity -> Bool) -> Eq LangHtmlEntity
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: LangHtmlEntity -> LangHtmlEntity -> Bool
$c/= :: LangHtmlEntity -> LangHtmlEntity -> Bool
== :: LangHtmlEntity -> LangHtmlEntity -> Bool
$c== :: LangHtmlEntity -> LangHtmlEntity -> Bool
Eq)
annotateWithLang :: [HtmlEntity] -> [LangHtmlEntity]
annotateWithLang :: [HtmlEntity] -> [LangHtmlEntity]
annotateWithLang =
[(HtmlTag, Maybe HtmlRawAttrs)] -> [HtmlEntity] -> [LangHtmlEntity]
annotate []
where
annotate :: [(HtmlTag, Maybe LanguageTag)]
-> [HtmlEntity]
-> [LangHtmlEntity]
annotate :: [(HtmlTag, Maybe HtmlRawAttrs)] -> [HtmlEntity] -> [LangHtmlEntity]
annotate [(HtmlTag, Maybe HtmlRawAttrs)]
_ [] = []
annotate [(HtmlTag, Maybe HtmlRawAttrs)]
stack (x :: HtmlEntity
x@HtmlStartTag { tag :: HtmlEntity -> HtmlTag
tag = HtmlTag
tag', rawAttributes :: HtmlEntity -> HtmlRawAttrs
rawAttributes = HtmlRawAttrs
attrs } : [HtmlEntity]
xs) =
Maybe HtmlRawAttrs -> HtmlEntity -> LangHtmlEntity
LangHtmlEntity Maybe HtmlRawAttrs
thisLang HtmlEntity
x LangHtmlEntity -> [LangHtmlEntity] -> [LangHtmlEntity]
forall a. a -> [a] -> [a]
: [(HtmlTag, Maybe HtmlRawAttrs)] -> [HtmlEntity] -> [LangHtmlEntity]
annotate [(HtmlTag, Maybe HtmlRawAttrs)]
nextStack [HtmlEntity]
xs
where
parentLang :: Maybe LanguageTag
parentLang :: Maybe HtmlRawAttrs
parentLang = case [(HtmlTag, Maybe HtmlRawAttrs)]
stack of
(HtmlTag
_, Maybe HtmlRawAttrs
l):[(HtmlTag, Maybe HtmlRawAttrs)]
_ -> Maybe HtmlRawAttrs
l
[(HtmlTag, Maybe HtmlRawAttrs)]
_ -> Maybe HtmlRawAttrs
forall a. Maybe a
Nothing
thisLang :: Maybe LanguageTag
thisLang :: Maybe HtmlRawAttrs
thisLang = HtmlRawAttrs -> Maybe HtmlRawAttrs
extractLang HtmlRawAttrs
attrs Maybe HtmlRawAttrs -> Maybe HtmlRawAttrs -> Maybe HtmlRawAttrs
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Maybe HtmlRawAttrs
parentLang
nextStack :: [(HtmlTag, Maybe LanguageTag)]
nextStack :: [(HtmlTag, Maybe HtmlRawAttrs)]
nextStack = (HtmlTag
tag', Maybe HtmlRawAttrs
thisLang) (HtmlTag, Maybe HtmlRawAttrs)
-> [(HtmlTag, Maybe HtmlRawAttrs)]
-> [(HtmlTag, Maybe HtmlRawAttrs)]
forall a. a -> [a] -> [a]
: [(HtmlTag, Maybe HtmlRawAttrs)]
stack
annotate [(HtmlTag, Maybe HtmlRawAttrs)]
stack (x :: HtmlEntity
x@HtmlEndTag { tag :: HtmlEntity -> HtmlTag
tag = HtmlTag
tag' } : [HtmlEntity]
xs) =
Maybe HtmlRawAttrs -> HtmlEntity -> LangHtmlEntity
LangHtmlEntity Maybe HtmlRawAttrs
thisLang HtmlEntity
x LangHtmlEntity -> [LangHtmlEntity] -> [LangHtmlEntity]
forall a. a -> [a] -> [a]
: [(HtmlTag, Maybe HtmlRawAttrs)] -> [HtmlEntity] -> [LangHtmlEntity]
annotate [(HtmlTag, Maybe HtmlRawAttrs)]
nextStack [HtmlEntity]
xs
where
([(HtmlTag, Maybe HtmlRawAttrs)]
nextStack, Maybe HtmlRawAttrs
thisLang) = case [(HtmlTag, Maybe HtmlRawAttrs)]
stack of
[] -> ([], Maybe HtmlRawAttrs
forall a. Maybe a
Nothing)
s :: [(HtmlTag, Maybe HtmlRawAttrs)]
s@((HtmlTag
t, Maybe HtmlRawAttrs
lang'):[(HtmlTag, Maybe HtmlRawAttrs)]
ys) ->
(if HtmlTag
t HtmlTag -> HtmlTag -> Bool
forall a. Eq a => a -> a -> Bool
== HtmlTag
tag' then [(HtmlTag, Maybe HtmlRawAttrs)]
ys else [(HtmlTag, Maybe HtmlRawAttrs)]
s, Maybe HtmlRawAttrs
lang')
annotate [(HtmlTag, Maybe HtmlRawAttrs)]
stack (HtmlEntity
x : [HtmlEntity]
xs) =
Maybe HtmlRawAttrs -> HtmlEntity -> LangHtmlEntity
LangHtmlEntity Maybe HtmlRawAttrs
parentLang HtmlEntity
x LangHtmlEntity -> [LangHtmlEntity] -> [LangHtmlEntity]
forall a. a -> [a] -> [a]
: [(HtmlTag, Maybe HtmlRawAttrs)] -> [HtmlEntity] -> [LangHtmlEntity]
annotate [(HtmlTag, Maybe HtmlRawAttrs)]
stack [HtmlEntity]
xs
where
parentLang :: Maybe LanguageTag
parentLang :: Maybe HtmlRawAttrs
parentLang = case [(HtmlTag, Maybe HtmlRawAttrs)]
stack of
(HtmlTag
_, Maybe HtmlRawAttrs
l):[(HtmlTag, Maybe HtmlRawAttrs)]
_ -> Maybe HtmlRawAttrs
l
[(HtmlTag, Maybe HtmlRawAttrs)]
_ -> Maybe HtmlRawAttrs
forall a. Maybe a
Nothing
isKorean :: LanguageTag -> Bool
isKorean :: HtmlRawAttrs -> Bool
isKorean HtmlRawAttrs
lang' =
HtmlRawAttrs
l HtmlRawAttrs -> HtmlRawAttrs -> Bool
forall a. Eq a => a -> a -> Bool
== HtmlRawAttrs
"ko" Bool -> Bool -> Bool
|| HtmlRawAttrs
l HtmlRawAttrs -> HtmlRawAttrs -> Bool
forall a. Eq a => a -> a -> Bool
== HtmlRawAttrs
"kor" Bool -> Bool -> Bool
||
HtmlRawAttrs
"ko-" HtmlRawAttrs -> HtmlRawAttrs -> Bool
`isPrefixOf` HtmlRawAttrs
l Bool -> Bool -> Bool
||
HtmlRawAttrs
"kor-" HtmlRawAttrs -> HtmlRawAttrs -> Bool
`isPrefixOf` HtmlRawAttrs
l
where
l :: Text
l :: HtmlRawAttrs
l = HtmlRawAttrs -> HtmlRawAttrs
toLower HtmlRawAttrs
lang'
isNeverKorean :: Maybe LanguageTag -> Bool
isNeverKorean :: Maybe HtmlRawAttrs -> Bool
isNeverKorean Maybe HtmlRawAttrs
Nothing = Bool
False
isNeverKorean (Just HtmlRawAttrs
lang') = Bool -> Bool
not (HtmlRawAttrs -> Bool
isKorean HtmlRawAttrs
lang')