{-# LANGUAGE OverloadedStrings #-} {-| Description: Token processing rules within the @\@ section. Copyright: (c) 2020 Sam May License: MPL-2.0 Maintainer: ag.eitilt@gmail.com Stability: stable Portability: portable -} module Web.Mangrove.Parse.Tree.InHead ( treeInHead ) where import qualified Control.Applicative as A import qualified Control.Monad as N import qualified Control.Monad.Trans.State as N.S import qualified Data.Bifunctor as F.B import qualified Data.ByteString.Short as BS.SH import qualified Data.HashMap.Strict as M import qualified Data.Text as T import qualified Data.Text.Encoding as T import Web.Mangrove.Parse.Common.Error import Web.Mangrove.Parse.Tokenize.Common import Web.Mangrove.Parse.Tree.Common import Web.Mangrove.Parse.Tree.InText import Web.Mangrove.Parse.Tree.Patch import Web.Willow.Common.Encoding import Web.Willow.Common.Encoding.Character import Web.Willow.Common.Encoding.Labels import Web.Willow.Common.Encoding.Sniffer import Web.Willow.Common.Parser import Web.Willow.Common.Parser.Switch import {-# SOURCE #-} Web.Mangrove.Parse.Tree.InBody import Control.Applicative ( (<|>) ) -- | __HTML:__ -- @[the "in head" insertion mode] -- (https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inhead)@ -- -- The parsing instructions corresponding to the 'InHead' section of the state -- machine. treeInHead :: TreeBuilder TreeOutput treeInHead = next >>= switch [ If isWhitespace insertCharacter , If isComment insertComment , If isDoctype $ \t' -> packTreeErrors [UnexpectedDoctype $ tokenDocumentType t'] t' , If (isStartTag ["html"]) $ \t' -> do push t' treeInBody , If (isStartTag ["base", "basefont", "bgsound", "link"]) insertNullElement , If (isStartTag ["meta"]) $ \t' -> do insert <- insertNullElement t' change' <- A.optional $ changeEncoding t' return $ case change' of Just change -> insert |++| change Nothing -> insert , If (isStartTag ["title"]) genericRCDataElement , If (isStartTag ["noscript"]) $ \t' -> do state <- N.S.get if scriptingEnabled state then genericRawTextElement t' else do switchMode InHeadNoscript insertElement t' , If (isStartTag ["noframes", "style"]) genericRawTextElement , If (isStartTag ["script"]) $ \t' -> do N.S.modify $ \state -> state { originalInsertionMode = Just $ insertionMode state } switchMode InText insert <- insertElement . mapTokenState t' $ \state -> state { currentState = ScriptDataState } return insert , If (isEndTag ["head"]) $ \t' -> do switchMode AfterHead packTree t' softCloseCurrentNode_ , If (isEndTag ["body", "html", "br"]) anythingElse , If (isStartTag ["template"]) $ \t' -> do insert <- insertElement t' insertFormattingMarker setFramesetNotOk switchMode InTemplate pushTemplateMode InTemplate return insert , If (isEndTag ["template"]) $ \t' -> do generate <- generateEndTags thoroughlyImpliedEndTags current <- currentNode let errF = case nodeIsElement "template" <$> current of Just True -> id _ -> consTreeError_ UnexpectedElementWithImpliedEndTag clear <- closeElement "template" clearFormattingElements popTemplateMode resetInsertionMode packTree t' $ errF generate ++ clear , If (isStartTag ["head"]) $ packTreeErrors [NestedSingletonElement] , If isAnyEndTag $ \t' -> packTreeErrors [UnmatchedEndTag $ tokenElement t'] t' , Else anythingElse ] where anythingElse t' = do push t' switchMode AfterHead packTree_ softCloseCurrentNode_ changeEncoding t' = do let d = tokenTag t' state = tokenState t' >>= decoderState . fst enc' <- maybe A.empty return $ do e <- changeEncodingCharset d <|> changeEncodingContentType d return $ case e of Utf16be -> Utf8 Utf16le -> Utf8 UserDefined -> Windows1252 enc -> enc case state of Just s -> case decoderEncoding s of Utf16be -> putDecoderState t' $ setEncodingCertain Utf16be s Utf16le -> putDecoderState t' $ setEncodingCertain Utf16be s enc | enc == enc' -> putDecoderState t' $ setEncodingCertain enc' s _ -> case decoderConfidence s of Tentative _ rec | not $ encodingEquivalent rec enc' -> do _ <- putDecoderState t' $ initialDecoderState enc' restartParsing $ streamStart rec _ -> putDecoderState t' $ initialDecoderState enc' Nothing -> putDecoderState t' $ initialDecoderState enc' changeEncodingCharset d = do charset <- M.lookup "charset" $ tagAttributes d lookupEncoding charset changeEncodingContentType d = do httpEquiv <- M.lookup "http-equiv" $ tagAttributes d N.unless (T.map toAsciiLower httpEquiv == "content-type") A.empty content <- M.lookup "content" $ tagAttributes d extractEncoding $ T.encodeUtf8 content encodingEquivalent rec enc = uncurry (==) . F.B.bimap (map $ parseChar enc) (map T.singleton) . unzip . M.toList $ parsedChars rec parseChar enc = fst . decode' (initialDecoderState enc) . BS.SH.fromShort putDecoderState t' decState = flip packTree [] . mapTokenState' t' . fmap . F.B.first $ \state -> state { decoderState_ = Right $ Just decState }