-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Extracts text of main article from HTML document -- -- Give readability an HTML document and it will detect and -- extract text of the article while removing everything unnecessary like -- menus, advertisements or sidebars. It is more or less reimplementation -- of python-readability. @package readability @version 0.1.0.0 module Readability.Helper innerText :: Cursor -> Text content' :: Cursor -> [Text] elin :: Element -> [Name] -> Bool getElement :: Node -> Maybe Element guarded :: Alternative f => (a -> Bool) -> a -> f a module Readability.Title -- | Extracts title (title) from HTML document and -- normalizes by removing exceeding white spaces. title :: Document -> Maybe Text -- | Attempts to find shortened version of title without page name. shortTitle :: Document -> Maybe Text module Readability.Types -- | Result of processing HTML through readability. data Article Article :: Document -> Maybe Text -> Maybe Text -> Article -- | Body of article of original HTML. [summary] :: Article -> Document -- | Title of original HTML if found. [title] :: Article -> Maybe Text -- | Possibly simplified title. [shortTitle] :: Article -> Maybe Text data Settings Settings :: (Name -> Bool) -> Settings -- | Remove HTML attributes for which the function returns true [reRemoveAttributes] :: Settings -> Name -> Bool newtype Scores Scores :: Map Node (Cursor, Double) -> Scores [scoreMap] :: Scores -> Map Node (Cursor, Double) alterScores :: (Maybe (Cursor, Double) -> Maybe (Cursor, Double)) -> Node -> Scores -> Scores emptyScores :: Scores lookupCursor :: Node -> Scores -> Maybe Cursor lookupScore :: Node -> Scores -> Maybe Double mapScores :: (Cursor -> Double -> (Cursor, Double)) -> Scores -> Scores maxScore :: Scores -> (Cursor, Double) nullScores :: Scores -> Bool instance GHC.Show.Show Readability.Types.Scores instance GHC.Show.Show Readability.Types.Article module Readability.Metrics classWeight :: Element -> Double contentScore :: Cursor -> Maybe Double linkDensity :: Cursor -> Double scoreAncestor :: Cursor -> Double scoreParagraph :: Scores -> Cursor -> Scores scoreParagraphs :: [Cursor] -> Maybe Scores textualSibling :: Cursor -> Bool module Readability.Clean cleanElement :: Bool -> Element -> Element paradivs :: Node -> Node -- | Remove elements that do not contribute to the article. -- -- Removes: -- -- -- -- Preserves: -- -- sanitizeNode :: Settings -> Scores -> Node -> Maybe Node module Readability.Internal summary :: Settings -> Document -> Maybe Document -- | Attempts to find shortened version of title without page name. shortTitle :: Document -> Maybe Text -- | Extracts title (title) from HTML document and -- normalizes by removing exceeding white spaces. title :: Document -> Maybe Text rootSummary :: Settings -> Bool -> Element -> Maybe Element module Readability -- | Result of processing HTML through readability. data Article Article :: Document -> Maybe Text -> Maybe Text -> Article -- | Body of article of original HTML. [summary] :: Article -> Document -- | Title of original HTML if found. [title] :: Article -> Maybe Text -- | Possibly simplified title. [shortTitle] :: Article -> Maybe Text -- | Extracts article from HTML in ByteString. fromByteString :: ByteString -> Maybe Article -- | Extracts article from HTML represented as HTML document. fromDocument :: Document -> Maybe Article -- | Extracts article from HTML in given file. fromFile :: FilePath -> IO (Maybe Article) -- | Extracts article from HTML in given text. fromText :: Text -> Maybe Article