-- | Module providing pattern matching and capturing on 'Identifier's.
-- 'Pattern's come in two kinds:
--
-- * Simple glob patterns, like @foo\/*@;
--
-- * Custom, arbitrary predicates of the type @Identifier -> Bool@.
--
-- They both have advantages and disadvantages. By default, globs are used,
-- unless you construct your 'Pattern' using the 'predicate' function.
--
-- A very simple pattern could be, for example, @foo\/bar@. This pattern will
-- only match the exact @foo\/bar@ identifier.
--
-- To match more than one identifier, there are different captures that one can
-- use:
--
-- * @*@: matches at most one element of an identifier;
--
-- * @**@: matches one or more elements of an identifier.
--
-- Some examples:
--
-- * @foo\/*@ will match @foo\/bar@ and @foo\/foo@, but not @foo\/bar\/qux@;
--
-- * @**@ will match any identifier;
--
-- * @foo\/**@ will match @foo\/bar@ and @foo\/bar\/qux@, but not @bar\/foo@;
--
-- * @foo\/*.html@ will match all HTML files in the @foo\/@ directory.
--
-- The 'capture' function allows the user to get access to the elements captured
-- by the capture elements in the pattern.
--
-- Like an 'Identifier', a 'Pattern' also has a type parameter. This is simply
-- an extra layer of safety, and can be discarded using the 'castPattern'
-- function.
--
module Hakyll.Core.Identifier.Pattern
    ( -- * The pattern type
      Pattern
    , castPattern

      -- * Creating patterns
    , parseGlob
    , predicate
    , list
    , regex
    , inGroup
    , complement

      -- * Applying patterns
    , matches
    , filterMatches
    , capture
    , fromCapture
    , fromCaptures
    ) where

import Data.List (isPrefixOf, inits, tails)
import Control.Arrow ((&&&), (>>>))
import Control.Monad (msum)
import Data.Maybe (isJust, fromMaybe)
import Data.Monoid (Monoid, mempty, mappend)

import GHC.Exts (IsString, fromString)
import Text.Regex.TDFA ((=~~))

import Hakyll.Core.Identifier

-- | One base element of a pattern
--
data GlobComponent = Capture
                   | CaptureMany
                   | Literal String
                   deriving (Eq, Show)

-- | Type that allows matching on identifiers
--
data Pattern a = Glob [GlobComponent]
               | Predicate (Identifier a -> Bool)
               | List [Identifier a]

instance IsString (Pattern a) where
    fromString = parseGlob

instance Monoid (Pattern a) where
    mempty = Predicate (const True)
    p1 `mappend` p2 = Predicate $ \i -> matches p1 i && matches p2 i

-- | Discard the phantom type parameter
--
castPattern :: Pattern a -> Pattern b
castPattern (Glob g)      = Glob g
castPattern (Predicate p) = Predicate $ p . castIdentifier
castPattern (List l)      = List $ map castIdentifier l
{-# INLINE castPattern #-}

-- | Parse a pattern from a string
--
parseGlob :: String -> Pattern a
parseGlob = Glob . parse'
  where
    parse' str =
        let (chunk, rest) = break (`elem` "\\*") str
        in case rest of
            ('\\' : x   : xs) -> Literal (chunk ++ [x]) : parse' xs
            ('*'  : '*' : xs) -> Literal chunk : CaptureMany : parse' xs
            ('*'  : xs)       -> Literal chunk : Capture : parse' xs
            xs                -> Literal chunk : Literal xs : []

-- | Create a 'Pattern' from an arbitrary predicate
--
-- Example:
--
-- > predicate (\i -> matches "foo/*" i && not (matches "foo/bar" i))
--
predicate :: (Identifier a -> Bool) -> Pattern a
predicate = Predicate

-- | Create a 'Pattern' from a list of 'Identifier's it should match
--
list :: [Identifier a] -> Pattern a
list = List

-- | Create a 'Pattern' from a regex
--
-- Example:
--
-- > regex "^foo/[^x]*$
--
regex :: String -> Pattern a
regex str = predicate $ fromMaybe False . (=~~ str) . toFilePath

-- | Create a 'Pattern' which matches if the identifier is in a certain group
-- (or in no group)
--
inGroup :: Maybe String -> Pattern a
inGroup group = predicate $ (== group) . identifierGroup

-- | Inverts a pattern, e.g.
--
-- > complement "foo/bar.html"
--
-- will match /anything/ except @\"foo\/bar.html\"@
--
complement :: Pattern a -> Pattern a
complement p = predicate (not . matches p)

-- | Check if an identifier matches a pattern
--
matches :: Pattern a -> Identifier a -> Bool
matches (Glob p)      = isJust . capture (Glob p)
matches (Predicate p) = (p $)
matches (List l)      = (`elem` l)

-- | Given a list of identifiers, retain only those who match the given pattern
--
filterMatches :: Pattern a -> [Identifier a] -> [Identifier a]
filterMatches = filter . matches

-- | Split a list at every possible point, generate a list of (init, tail)
-- cases. The result is sorted with inits decreasing in length.
--
splits :: [a] -> [([a], [a])]
splits = inits &&& tails >>> uncurry zip >>> reverse

-- | Match a glob against a pattern, generating a list of captures
--
capture :: Pattern a -> Identifier a -> Maybe [String]
capture (Glob p) (Identifier _ i) = capture' p i
capture _        _                = Nothing

-- | Internal verion of 'capture'
--
capture' :: [GlobComponent] -> String -> Maybe [String]
capture' [] [] = Just []  -- An empty match
capture' [] _  = Nothing  -- No match
capture' (Literal l : ms) str
    -- Match the literal against the string
    | l `isPrefixOf` str = capture' ms $ drop (length l) str
    | otherwise          = Nothing
capture' (Capture : ms) str =
    -- Match until the next /
    let (chunk, rest) = break (== '/') str
    in msum $ [ fmap (i :) (capture' ms (t ++ rest)) | (i, t) <- splits chunk ]
capture' (CaptureMany : ms) str =
    -- Match everything
    msum $ [ fmap (i :) (capture' ms t) | (i, t) <- splits str ]
    
-- | Create an identifier from a pattern by filling in the captures with a given
-- string
--
-- Example:
--
-- > fromCapture (parseGlob "tags/*") "foo"
--
-- Result:
--
-- > "tags/foo"
--
fromCapture :: Pattern a -> String -> Identifier a
fromCapture pattern = fromCaptures pattern . repeat

-- | Create an identifier from a pattern by filling in the captures with the
-- given list of strings
--
fromCaptures :: Pattern a -> [String] -> Identifier a
fromCaptures (Glob p) = Identifier Nothing . fromCaptures' p
fromCaptures _        = error $
    "Hakyll.Core.Identifier.Pattern.fromCaptures: fromCaptures only works " ++
    "on simple globs!"

-- | Internally used version of 'fromCaptures'
--
fromCaptures' :: [GlobComponent] -> [String] -> String
fromCaptures' []        _ = mempty
fromCaptures' (m : ms) [] = case m of
    Literal l -> l `mappend` fromCaptures' ms []
    _         -> error $  "Hakyll.Core.Identifier.Pattern.fromCaptures': "
                       ++ "identifier list exhausted"
fromCaptures' (m : ms) ids@(i : is) = case m of
    Literal l -> l `mappend` fromCaptures' ms ids
    _         -> i `mappend` fromCaptures' ms is