{-# LANGUAGE OverloadedStrings #-}

module Readability.Internal
  ( Readability.Internal.summary,
    Readability.Title.shortTitle,
    Readability.Title.title,
    rootSummary,
  )
where

import qualified Data.Map.Strict as Map
import qualified Data.Text as T
import Readability.Clean
import Readability.Helper
import Readability.Metrics
import Readability.Title
import Readability.Types
import Text.XML
import Text.XML.Cursor
import Prelude hiding (div)

summary :: Settings -> Document -> Maybe Document
summary s doc = document <$> rootSummary s True (documentRoot doc)

rootSummary :: Settings -> Bool -> Element -> Maybe Element
rootSummary s ruthless root =
  if ruthless && T.length (innerText clean) <= 250
    then rootSummary s False root
    else cleanedArticle
  where
    clean = fromNode $ paradivs $ NodeElement (cleanElement ruthless root)
    candidates = clean $// checkName (`elem` ["p", "pre", "td"])

    cleanedArticle = case scoreParagraphs candidates of
      Nothing -> Nothing
      Just scores ->
        let (bestCursor, score) = maxScore scores
            article = getArticle bestCursor score scores
         in sanitizeNode s scores article >>= getElement

getArticle :: Cursor -> Double -> Scores -> Node
getArticle best score scores = html $ body $ div $ node <$> filter f siblings
  where
    siblings = precedingSibling best ++ best : followingSibling best
    threshold = 10 `max` score * 0.2
    f c =
      node c == node best
        || any (>= threshold) (lookupScore (node c) scores)
        || (any (\e -> elementName e == "p") (getElement (node c)) && textualSibling c)

html, body :: Node -> Node
html n = NodeElement $ Element "html" Map.empty [n]
body n = NodeElement $ Element "body" Map.empty [n]

div :: [Node] -> Node
div ns = NodeElement $ Element "div" Map.empty ns

document :: Element -> Document
document e = Document (Prologue [] Nothing []) e []