{-# LANGUAGE StrictData #-} {-# OPTIONS_GHC -Wno-orphans #-} module HTML where import Control.Monad (join) import Data.Bool (bool) import Data.Foldable (traverse_) import Data.Set (Set) import Data.Text (Text) import Lasercutter import Text.HTML.TagSoup (Tag(TagText, TagOpen)) import Text.HTML.TagSoup.Tree type HTML = TagTree Text data Selector = Both Selector Selector | Alt Selector Selector | Negate Selector | HasTag Text | WithAttr Text (Maybe Text -> Bool) matchSelector :: Selector -> TagTree Text -> Bool matchSelector :: Selector -> TagTree Text -> Bool matchSelector (Both Selector se1 Selector se2) TagTree Text tt = Selector -> TagTree Text -> Bool matchSelector Selector se1 TagTree Text tt Bool -> Bool -> Bool && Selector -> TagTree Text -> Bool matchSelector Selector se2 TagTree Text tt matchSelector (Alt Selector se1 Selector se2) TagTree Text tt = Selector -> TagTree Text -> Bool matchSelector Selector se1 TagTree Text tt Bool -> Bool -> Bool || Selector -> TagTree Text -> Bool matchSelector Selector se2 TagTree Text tt matchSelector (Negate Selector se) TagTree Text tt = Bool -> Bool not (Bool -> Bool) -> Bool -> Bool forall a b. (a -> b) -> a -> b $ Selector -> TagTree Text -> Bool matchSelector Selector se TagTree Text tt matchSelector (HasTag Text txt) (TagBranch Text txt' [Attribute Text] _ [TagTree Text] _) = Text txt Text -> Text -> Bool forall a. Eq a => a -> a -> Bool == Text txt' matchSelector (HasTag Text txt) (TagLeaf (TagOpen Text txt' [Attribute Text] _)) = Text txt Text -> Text -> Bool forall a. Eq a => a -> a -> Bool == Text txt' matchSelector (HasTag Text _) (TagLeaf Tag Text _) = Bool False matchSelector (WithAttr Text txt Maybe Text -> Bool f) (TagBranch Text _ [Attribute Text] x0 [TagTree Text] _) = Maybe Text -> Bool f (Maybe Text -> Bool) -> Maybe Text -> Bool forall a b. (a -> b) -> a -> b $ Text -> [Attribute Text] -> Maybe Text forall a b. Eq a => a -> [(a, b)] -> Maybe b lookup Text txt [Attribute Text] x0 matchSelector (WithAttr Text txt Maybe Text -> Bool f) (TagLeaf (TagOpen Text _ [Attribute Text] attrs)) = Maybe Text -> Bool f (Maybe Text -> Bool) -> Maybe Text -> Bool forall a b. (a -> b) -> a -> b $ Text -> [Attribute Text] -> Maybe Text forall a b. Eq a => a -> [(a, b)] -> Maybe b lookup Text txt [Attribute Text] attrs matchSelector (WithAttr Text _ Maybe Text -> Bool _) (TagLeaf Tag Text _) = Bool False instance IsTree (TagTree t) where getChildren :: TagTree t -> [TagTree t] getChildren (TagBranch t _ [Attribute t] _ [TagTree t] tts) = [TagTree t] tts getChildren (TagLeaf Tag t _) = [] at :: Text -> Parser bc (TagTree Text) a -> Parser bc (TagTree Text) a at :: Text -> Parser bc (TagTree Text) a -> Parser bc (TagTree Text) a at Text t Parser bc (TagTree Text) a p = Parser bc (TagTree Text) (Maybe a) -> Parser bc (TagTree Text) a forall bc t a. Parser bc t (Maybe a) -> Parser bc t a expect (Parser bc (TagTree Text) (Maybe a) -> Parser bc (TagTree Text) a) -> Parser bc (TagTree Text) (Maybe a) -> Parser bc (TagTree Text) a forall a b. (a -> b) -> a -> b $ Maybe a -> Maybe a -> Bool -> Maybe a forall a. a -> a -> Bool -> a bool (Maybe a -> Maybe a -> Bool -> Maybe a) -> Parser bc (TagTree Text) (Maybe a) -> Parser bc (TagTree Text) (Maybe a -> Bool -> Maybe a) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b <$> Maybe a -> Parser bc (TagTree Text) (Maybe a) forall (f :: * -> *) a. Applicative f => a -> f a pure Maybe a forall a. Maybe a Nothing Parser bc (TagTree Text) (Maybe a -> Bool -> Maybe a) -> Parser bc (TagTree Text) (Maybe a) -> Parser bc (TagTree Text) (Bool -> Maybe a) forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b <*> (a -> Maybe a) -> Parser bc (TagTree Text) a -> Parser bc (TagTree Text) (Maybe a) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b fmap a -> Maybe a forall a. a -> Maybe a Just Parser bc (TagTree Text) a p Parser bc (TagTree Text) (Bool -> Maybe a) -> Parser bc (TagTree Text) Bool -> Parser bc (TagTree Text) (Maybe a) forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b <*> (TagTree Text -> Bool) -> Parser bc (TagTree Text) Bool forall t a bc. (t -> a) -> Parser bc t a proj (Selector -> TagTree Text -> Bool matchSelector (Selector -> TagTree Text -> Bool) -> Selector -> TagTree Text -> Bool forall a b. (a -> b) -> a -> b $ Text -> Selector HasTag Text t) textOf :: TagTree a -> Maybe a textOf :: TagTree a -> Maybe a textOf = \case TagLeaf (TagText a txt) -> a -> Maybe a forall a. a -> Maybe a Just a txt TagTree a _ -> Maybe a forall a. Maybe a Nothing text :: Parser bc (TagTree a) (Maybe a) text :: Parser bc (TagTree a) (Maybe a) text = (TagTree a -> Maybe a) -> Parser bc (TagTree a) (Maybe a) forall t a bc. (t -> a) -> Parser bc t a proj TagTree a -> Maybe a forall a. TagTree a -> Maybe a textOf getText :: Parser bc (TagTree Text) Text getText :: Parser bc (TagTree Text) Text getText = Parser bc (TagTree Text) (Maybe Text) -> Parser bc (TagTree Text) Text forall bc t a. Parser bc t (Maybe a) -> Parser bc t a expect Parser bc (TagTree Text) (Maybe Text) forall bc a. Parser bc (TagTree a) (Maybe a) text example :: TagTree Text example :: TagTree Text example = Text -> [Attribute Text] -> [TagTree Text] -> TagTree Text forall str. str -> [Attribute str] -> [TagTree str] -> TagTree str TagBranch Text "html" [(Text "lang", Text "en")] [ Text -> [Attribute Text] -> [TagTree Text] -> TagTree Text forall str. str -> [Attribute str] -> [TagTree str] -> TagTree str TagBranch Text "head" [] [ Text -> [Attribute Text] -> [TagTree Text] -> TagTree Text forall str. str -> [Attribute str] -> [TagTree str] -> TagTree str TagBranch Text "title" [] [ Tag Text -> TagTree Text forall str. Tag str -> TagTree str TagLeaf (Tag Text -> TagTree Text) -> Tag Text -> TagTree Text forall a b. (a -> b) -> a -> b $ Text -> Tag Text forall str. str -> Tag str TagText Text "Hello World!" ] , Text -> [Attribute Text] -> [TagTree Text] -> TagTree Text forall str. str -> [Attribute str] -> [TagTree str] -> TagTree str TagBranch Text "style" [(Text "type", Text "text/css")] [ Tag Text -> TagTree Text forall str. Tag str -> TagTree str TagLeaf (Tag Text -> TagTree Text) -> Tag Text -> TagTree Text forall a b. (a -> b) -> a -> b $ Text -> Tag Text forall str. str -> Tag str TagText Text "css" ] ] , Text -> [Attribute Text] -> [TagTree Text] -> TagTree Text forall str. str -> [Attribute str] -> [TagTree str] -> TagTree str TagBranch Text "body" [] [ Text -> [Attribute Text] -> [TagTree Text] -> TagTree Text forall str. str -> [Attribute str] -> [TagTree str] -> TagTree str TagBranch Text "h1" [] [ Tag Text -> TagTree Text forall str. Tag str -> TagTree str TagLeaf (Tag Text -> TagTree Text) -> Tag Text -> TagTree Text forall a b. (a -> b) -> a -> b $ Text -> Tag Text forall str. str -> Tag str TagText Text "Hi" ] , Text -> [Attribute Text] -> [TagTree Text] -> TagTree Text forall str. str -> [Attribute str] -> [TagTree str] -> TagTree str TagBranch Text "p" [(Text "id", Text "lorem")] [ Tag Text -> TagTree Text forall str. Tag str -> TagTree str TagLeaf (Tag Text -> TagTree Text) -> Tag Text -> TagTree Text forall a b. (a -> b) -> a -> b $ Text -> Tag Text forall str. str -> Tag str TagText Text "lorem ipsum" ] , Text -> [Attribute Text] -> [TagTree Text] -> TagTree Text forall str. str -> [Attribute str] -> [TagTree str] -> TagTree str TagBranch Text "p" [] [ Tag Text -> TagTree Text forall str. Tag str -> TagTree str TagLeaf (Tag Text -> TagTree Text) -> Tag Text -> TagTree Text forall a b. (a -> b) -> a -> b $ Text -> Tag Text forall str. str -> Tag str TagText Text "more p" , Text -> [Attribute Text] -> [TagTree Text] -> TagTree Text forall str. str -> [Attribute str] -> [TagTree str] -> TagTree str TagBranch Text "b" [] [ Tag Text -> TagTree Text forall str. Tag str -> TagTree str TagLeaf (Tag Text -> TagTree Text) -> Tag Text -> TagTree Text forall a b. (a -> b) -> a -> b $ Text -> Tag Text forall str. str -> Tag str TagText Text "bold" ] , Tag Text -> TagTree Text forall str. Tag str -> TagTree str TagLeaf (Tag Text -> TagTree Text) -> Tag Text -> TagTree Text forall a b. (a -> b) -> a -> b $ Text -> Tag Text forall str. str -> Tag str TagText Text "done" ] , Text -> [Attribute Text] -> [TagTree Text] -> TagTree Text forall str. str -> [Attribute str] -> [TagTree str] -> TagTree str TagBranch Text "script" [] [ Tag Text -> TagTree Text forall str. Tag str -> TagTree str TagLeaf (Tag Text -> TagTree Text) -> Tag Text -> TagTree Text forall a b. (a -> b) -> a -> b $ Text -> Tag Text forall str. str -> Tag str TagText Text "dont want no scripts" ] ] ] chroot :: Selector -> Parser bc HTML a -> Parser bc HTML a chroot :: Selector -> Parser bc (TagTree Text) a -> Parser bc (TagTree Text) a chroot Selector s = Parser bc (TagTree Text) [a] -> Parser bc (TagTree Text) a forall bc t a. Parser bc t [a] -> Parser bc t a one (Parser bc (TagTree Text) [a] -> Parser bc (TagTree Text) a) -> (Parser bc (TagTree Text) a -> Parser bc (TagTree Text) [a]) -> Parser bc (TagTree Text) a -> Parser bc (TagTree Text) a forall b c a. (b -> c) -> (a -> b) -> a -> c . Selector -> Parser bc (TagTree Text) a -> Parser bc (TagTree Text) [a] forall bc a. Selector -> Parser bc (TagTree Text) a -> Parser bc (TagTree Text) [a] chroots Selector s chroots :: Selector -> Parser bc HTML a -> Parser bc HTML [a] chroots :: Selector -> Parser bc (TagTree Text) a -> Parser bc (TagTree Text) [a] chroots = (TagTree Text -> Bool) -> Parser bc (TagTree Text) a -> Parser bc (TagTree Text) [a] forall t bc a. (t -> Bool) -> Parser bc t a -> Parser bc t [a] target ((TagTree Text -> Bool) -> Parser bc (TagTree Text) a -> Parser bc (TagTree Text) [a]) -> (Selector -> TagTree Text -> Bool) -> Selector -> Parser bc (TagTree Text) a -> Parser bc (TagTree Text) [a] forall b c a. (b -> c) -> (a -> b) -> a -> c . Selector -> TagTree Text -> Bool matchSelector texts :: Parser bc HTML [Text] texts :: Parser bc (TagTree Text) [Text] texts = (TagTree Text -> Maybe Text) -> Parser bc (TagTree Text) [Text] forall t a bc. (t -> Maybe a) -> Parser bc t [a] targetMap TagTree Text -> Maybe Text forall a. TagTree a -> Maybe a textOf texts' :: Selector -> Parser bc HTML [Text] texts' :: Selector -> Parser bc (TagTree Text) [Text] texts' Selector sel = ([[Maybe Text]] -> [Text]) -> Parser bc (TagTree Text) [[Maybe Text]] -> Parser bc (TagTree Text) [Text] forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b fmap ([Maybe Text] -> [Text] forall (f :: * -> *) a. Filterable f => f (Maybe a) -> f a catMaybes ([Maybe Text] -> [Text]) -> ([[Maybe Text]] -> [Maybe Text]) -> [[Maybe Text]] -> [Text] forall b c a. (b -> c) -> (a -> b) -> a -> c . [[Maybe Text]] -> [Maybe Text] forall (m :: * -> *) a. Monad m => m (m a) -> m a join) (Parser bc (TagTree Text) [[Maybe Text]] -> Parser bc (TagTree Text) [Text]) -> Parser bc (TagTree Text) [[Maybe Text]] -> Parser bc (TagTree Text) [Text] forall a b. (a -> b) -> a -> b $ (TagTree Text -> Bool) -> Parser bc (TagTree Text) [Maybe Text] -> Parser bc (TagTree Text) [[Maybe Text]] forall t bc a. (t -> Bool) -> Parser bc t a -> Parser bc t [a] target (Selector -> TagTree Text -> Bool matchSelector Selector sel) (Parser bc (TagTree Text) [Maybe Text] -> Parser bc (TagTree Text) [[Maybe Text]]) -> Parser bc (TagTree Text) [Maybe Text] -> Parser bc (TagTree Text) [[Maybe Text]] forall a b. (a -> b) -> a -> b $ Parser bc (TagTree Text) (Maybe Text) -> Parser bc (TagTree Text) [Maybe Text] forall bc t a. Parser bc t a -> Parser bc t [a] onChildren Parser bc (TagTree Text) (Maybe Text) forall bc a. Parser bc (TagTree a) (Maybe a) text isText :: HTML -> Bool isText :: TagTree Text -> Bool isText (TagLeaf (TagText Text _)) = Bool True isText TagTree Text _ = Bool False textNoScript :: Parser (Set Text) HTML [Text] textNoScript :: Parser (Set Text) (TagTree Text) [Text] textNoScript = [Parser (Set Text) (TagTree Text) [Text]] -> Parser (Set Text) (TagTree Text) [Text] forall (t :: * -> *) (f :: * -> *) a. (Foldable t, Alternative f) => t (f a) -> f a asum [ Text -> Parser (Set Text) (TagTree Text) [Text] -> Parser (Set Text) (TagTree Text) [Text] forall bc a. Text -> Parser bc (TagTree Text) a -> Parser bc (TagTree Text) a at Text "h1" Parser (Set Text) (TagTree Text) [Text] forall bc. Parser bc (TagTree Text) [Text] texts , Text -> Parser (Set Text) (TagTree Text) [Text] -> Parser (Set Text) (TagTree Text) [Text] forall bc a. Text -> Parser bc (TagTree Text) a -> Parser bc (TagTree Text) a at Text "p" Parser (Set Text) (TagTree Text) [Text] forall bc. Parser bc (TagTree Text) [Text] texts , Text -> Parser (Set Text) (TagTree Text) [Text] -> Parser (Set Text) (TagTree Text) [Text] forall bc a. Text -> Parser bc (TagTree Text) a -> Parser bc (TagTree Text) a at Text "b" Parser (Set Text) (TagTree Text) [Text] forall bc. Parser bc (TagTree Text) [Text] texts ] getTag :: HTML -> [Text] getTag :: TagTree Text -> [Text] getTag (TagBranch Text txt [Attribute Text] _ [TagTree Text] _) = Text -> [Text] forall (f :: * -> *) a. Applicative f => a -> f a pure Text txt getTag (TagLeaf (TagOpen Text txt [Attribute Text] _)) = Text -> [Text] forall (f :: * -> *) a. Applicative f => a -> f a pure Text txt getTag TagTree Text _ = [Text] forall a. Monoid a => a mempty main :: IO () main :: IO () main = ([Text] -> IO ()) -> Maybe [Text] -> IO () forall (t :: * -> *) (f :: * -> *) a b. (Foldable t, Applicative f) => (a -> f b) -> t a -> f () traverse_ ((Text -> IO ()) -> [Text] -> IO () forall (t :: * -> *) (f :: * -> *) a b. (Foldable t, Applicative f) => (a -> f b) -> t a -> f () traverse_ Text -> IO () forall a. Show a => a -> IO () print) (Maybe [Text] -> IO ()) -> Maybe [Text] -> IO () forall a b. (a -> b) -> a -> b $ (TagTree Text -> [Text]) -> TagTree Text -> Parser [Text] (TagTree Text) [Text] -> Maybe [Text] forall bc t a. (Monoid bc, IsTree t) => (t -> bc) -> t -> Parser bc t a -> Maybe a runParser TagTree Text -> [Text] getTag TagTree Text example (Parser [Text] (TagTree Text) [Text] -> Maybe [Text]) -> Parser [Text] (TagTree Text) [Text] -> Maybe [Text] forall a b. (a -> b) -> a -> b $ ([Maybe Text] -> [Text]) -> Parser [Text] (TagTree Text) [Maybe Text] -> Parser [Text] (TagTree Text) [Text] forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b fmap [Maybe Text] -> [Text] forall (f :: * -> *) a. Filterable f => f (Maybe a) -> f a catMaybes (Parser [Text] (TagTree Text) [Maybe Text] -> Parser [Text] (TagTree Text) [Text]) -> Parser [Text] (TagTree Text) [Maybe Text] -> Parser [Text] (TagTree Text) [Text] forall a b. (a -> b) -> a -> b $ (TagTree Text -> Bool) -> Parser [Text] (TagTree Text) (Maybe Text) -> Parser [Text] (TagTree Text) [Maybe Text] forall t bc a. (t -> Bool) -> Parser bc t a -> Parser bc t [a] target TagTree Text -> Bool isText (Parser [Text] (TagTree Text) (Maybe Text) -> Parser [Text] (TagTree Text) [Maybe Text]) -> Parser [Text] (TagTree Text) (Maybe Text) -> Parser [Text] (TagTree Text) [Maybe Text] forall a b. (a -> b) -> a -> b $ Maybe Text -> Maybe Text -> Bool -> Maybe Text forall a. a -> a -> Bool -> a bool (Maybe Text -> Maybe Text -> Bool -> Maybe Text) -> Parser [Text] (TagTree Text) (Maybe Text) -> Parser [Text] (TagTree Text) (Maybe Text -> Bool -> Maybe Text) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b <$> Parser [Text] (TagTree Text) (Maybe Text) forall bc a. Parser bc (TagTree a) (Maybe a) text Parser [Text] (TagTree Text) (Maybe Text -> Bool -> Maybe Text) -> Parser [Text] (TagTree Text) (Maybe Text) -> Parser [Text] (TagTree Text) (Bool -> Maybe Text) forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b <*> Maybe Text -> Parser [Text] (TagTree Text) (Maybe Text) forall (f :: * -> *) a. Applicative f => a -> f a pure Maybe Text forall a. Maybe a Nothing Parser [Text] (TagTree Text) (Bool -> Maybe Text) -> Parser [Text] (TagTree Text) Bool -> Parser [Text] (TagTree Text) (Maybe Text) forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b <*> ([Text] -> Bool) -> Parser [Text] (TagTree Text) [Text] -> Parser [Text] (TagTree Text) Bool forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b fmap ((Text -> [Text] -> Bool forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool `elem` [Text "script", Text "style"]) (Text -> Bool) -> ([Text] -> Text) -> [Text] -> Bool forall b c a. (b -> c) -> (a -> b) -> a -> c . [Text] -> Text forall a. [a] -> a head) Parser [Text] (TagTree Text) [Text] forall bc t. Parser bc t bc breadcrumbs