{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE OverloadedStrings #-} module Main (main) where import Text.HTML.Scalpel.Core import Control.Applicative import Control.Monad (guard) import Data.List (isInfixOf) import System.Exit import Test.HUnit import qualified Text.HTML.TagSoup as TagSoup import qualified Text.Regex.TDFA main = exit . failures =<< runTestTT (TestList [scrapeTests]) exit :: Int -> IO () exit 0 = exitSuccess exit n = exitWith $ ExitFailure n re :: String -> Text.Regex.TDFA.Regex re = Text.Regex.TDFA.makeRegex scrapeTests = "scrapeTests" ~: TestList [ scrapeTest "foo" (Just ["foo"]) (htmls ("a" @: [])) , scrapeTest "foobar" (Just ["foo", "bar"]) (htmls ("a" @: [])) , scrapeTest "foo" (Just ["foo"]) (htmls ("a" @: [])) , scrapeTest "foo" (Just ["foo", "foo"]) (htmls ("a" @: [])) , scrapeTest "foo" (Just []) (htmls ("b" @: [])) , scrapeTest "foo" (Just [""]) (htmls ("a" @: [])) , scrapeTest "foobar" (Just ["bar"]) (htmls ("a" @: ["key" @= "value"])) , scrapeTest "foo" (Just ["foo"]) (htmls ("a" // "b" @: [] // "c")) , scrapeTest "foobarbaz" (Just ["foo", "bar"]) (htmls ("a" // "b")) , scrapeTest "foo" (Just ["foo"]) (htmls ("a" @: [hasClass "a"])) , scrapeTest "foo" (Just []) (htmls ("a" @: [hasClass "c"])) , scrapeTest "foobarbaz" (Just ["foo", "baz"]) (texts ("a" @: [notP $ hasClass "a"])) , scrapeTest "foo" (Just ["foo"]) (htmls ("a" @: ["key" @=~ re "va(foo|bar|lu)e"])) , scrapeTest "foobar" (Just ["foo", "bar"]) (htmls ("a" @: [AnyAttribute @= "value"])) , scrapeTest "foobar" (Just ["bar"]) (htmls ("a" @: [AnyAttribute @= "value"])) , scrapeTest "foobar" (Just ["foo", "bar"]) (htmls (AnyTag @: [AnyAttribute @= "value"])) , scrapeTest "foobar" (Just ["bar"]) (htmls (AnyTag @: [AnyAttribute @= "value"])) , scrapeTest "123" (Just ["2", "3"]) (htmls (AnyTag @: [match (==)])) , scrapeTest "foo" (Just "foo") (text "a") , scrapeTest "foobar" (Just "foo") (text "a") , scrapeTest "foobar" (Just ["foo", "bar"]) (texts "a") , scrapeTest "foobar" (Just [True, False]) (map (== "foo") <$> texts "a") , scrapeTest "" (Just "foo") (attr "key" "a") , scrapeTest "" (Just "baz") (attr "key2" $ "a" @: ["key1" @= "bar"]) , scrapeTest "foobar" (Just ["foo"]) (chroot "a" $ texts "b") , scrapeTest "foobar" (Just ["foo", "bar"]) (chroots "a" $ text "b") , scrapeTest "foobar" (Just "foo") (text ("a" // "b") <|> text ("a" // "c")) , scrapeTest "foobar" (Just "bar") (text ("a" // "d") <|> text ("a" // "c")) , scrapeTest "" (Just "foobar") (attr "src" "img") , scrapeTest "" (Just "foobar") (attr "src" "img") , scrapeTest "foobar" (Just ["foo", "bar"]) (texts "a") , scrapeTest "foobar" (Just ["foo", "bar"]) (texts "A") , scrapeTest "foo" (Just ["foo"]) (texts $ "A" @: ["b" @= "C"]) , scrapeTest "foo" (Just []) (texts $ "A" @: ["b" @= "c"]) , scrapeTest "foobarbaz" (Just ["foo", "baz"]) (texts ("a" @: [notP $ "b" @= "C"])) , scrapeTest "foo" (Just "foo") (html "a") , scrapeTest "
" (Just "
  • 1
  • ") (html "li") , scrapeTest "
    " (Just "
    ") (html "div") , scrapeTest "foobar" (Just ["foo","bar"]) (htmls "a") , scrapeTest "
    " (Just ["
  • 1
  • ", "
  • 2
  • "]) (htmls "li") , scrapeTest "
    " (Just ["
    "]) (htmls "div") , scrapeTest "123" (Just "123") (innerHTML anySelector) , scrapeTest "" (Just "") (innerHTML anySelector) , scrapeTest "foobar" (Just ["foo","bar"]) (innerHTMLs "a") , scrapeTest "foobarbaz" (Just "bar") (chroot "a" $ do t <- text anySelector guard ("b" `isInfixOf` t) html anySelector) , scrapeTest "
    inner text
    " (Just ["inner"]) (attrs "id" ("div" // "div")) , scrapeTest "
    " (Just ["b", "c"]) (attrs "id" ("div" // "div")) , scrapeTest "12345" (Just "12345") (text anySelector) , scrapeTest "1" Nothing $ do "Bad pattern" <- text "a" return "OK" , scrapeTest "1" (Just "OK") $ do "1" <- text "a" return "OK" , scrapeTest "

    A

    B

    C

    " (Just [(0, "A"), (1, "B"), (2, "C")]) (chroots ("article" // "p") $ do index <- position content <- text anySelector return (index, content)) , scrapeTest "

    A

    B

    C

    " (Just [[(0, "A")], [(0, "B"), (1, "C")]]) (chroots "article" $ chroots "p" $ do index <- position content <- text anySelector return (index, content)) , scrapeTest "

    p1

    p2

    p3

    p4

    " (Just ["p1", "p2", "p3", "p4"]) (texts "p") , scrapeTest "123" (Just ["1","2","3"]) (texts "a") , scrapeTest "123" (Just ["1","2","3"]) (texts $ "a" // "b") , scrapeTest "123" (Just ["1","2","3"]) (texts "b") ] scrapeTest :: (Eq a, Show a) => String -> Maybe a -> Scraper String a -> Test scrapeTest html expected scraper = label ~: expected @=? actual where label = "scrape (" ++ show html ++ ")" actual = scrape scraper (TagSoup.parseTags html)