{-# LANGUAGE OverloadedStrings #-}

module Data.PublicSuffix
    ( publicSuffix
    , registeredDomain
    ) where

import           Data.Function
import           Data.List
import           Data.Monoid

import           Data.PublicSuffix.Types
import           Data.PublicSuffix.Rules

import           Prelude


-- | Convert a domain into a list of labels. This is essentialy splitting the
-- string on dots ('.'). However, the Haskell base library doesn't have a split
-- function. Sigh...
toLabels :: String -> [String]
toLabels [] = [""]
toLabels x =
    let (y, z) = break (== '.') x in
    y : (if z == "" then [] else toLabels $ drop 1 z)


-- | A domain is said to match a rule if and only if all of the following
-- conditions are met:
--
--  - When the domain and rule are split into corresponding labels, that the
--    domain contains as many or more labels than the rule.
--
--  - Beginning with the right-most labels of both the domain and the rule,
--    and continuing for all labels in the rule, one finds that for every pair,
--    either they are identical, or that the label from the rule is "*".

matchRule :: [String] -> Rule -> Bool
matchRule domainLabels rule =
    domainLabelsLength >= ruleLabelsLength &&
    all labelMatches (zip (ruleLabels rule) domainLabels)

  where
    ruleLabelsLength   = length $ ruleLabels rule
    domainLabelsLength = length domainLabels


-- | True if the label from the rule matches a label from the domain.
labelMatches :: (String, String) -> Bool
labelMatches ("*"      , _          ) = True
labelMatches (ruleLabel, domainLabel) = ruleLabel == domainLabel



-- | Return the public suffix of the given domain name (a dot-delimited unicode
-- string). The public suffix is the part of a domain which should be protected.
--
-- Notes:
--
--  - The domain MUST NOT start with a dot. Normalize the domain before passing
--    it to functions in this module.
--  - The domain MUST NOT be in punycode encoding. The matching of domain labels
--    is done on the original encoding, as specified in the upstream
--    publicsuffix list.
--
--
-- In particular that means applications SHOULD reject:
--
--  - HTTP cookies which try to set domain to a public suffix.
--  - X509 wildcard certificates which try to match all subdomains of a public
--    suffix.

publicSuffix :: String -> String
publicSuffix domain =
    -- Algorithm (see https://publicsuffix.org/list/)
    --
    --   Match domain against all rules and take note of the matching ones.
    --   If no rules match, the prevailing rule is "*".
    --   If more than one rule matches, the prevailing rule is the one which is an exception rule.
    --   If there is no matching exception rule, the prevailing rule is the one with the most labels.
    --   If the prevailing rule is a exception rule, modify it by removing the leftmost label.
    --   The public suffix is the set of labels from the domain which match the labels of the prevailing rule, using the matching algorithm above.
    --   The registered or registrable domain is the public suffix plus one additional label.

    mconcat $ intersperse "." $ reverse $ take numMatchingLabels domainLabels

  where
    rule              = prevailingRule domainLabels
    domainLabels      = reverse $ toLabels domain
    numMatchingLabels = length $ takeWhile labelMatches $ zip (ruleLabels rule) domainLabels


prevailingRule :: [String] -> Rule
prevailingRule domainLabels = case filter (matchRule domainLabels) rules of
    []  -> Rule False ["*"]
    [x] -> x
    xs  -> case filter isException xs of
        []   -> head $ reverse $ sortBy (compare `on` (length . ruleLabels)) xs
        ex:_ -> Rule (isException ex) (init $ ruleLabels ex)



-- | Return the domain that was registered or is registrable by a user. These
-- domains are fully controlled by users, and applications SHOULD accept
-- cookies and wildcard certificates for those.

registeredDomain :: String -> Maybe String
registeredDomain domain = if domain == suffix
    then Nothing
    else Just $ mconcat $ intersperse "." $ reverse $ take (suffixLabelsLength + 1) domainLabels

  where
    suffix             = publicSuffix domain
    suffixLabelsLength = length $ toLabels suffix
    domainLabels       = reverse $ toLabels domain