{-# LANGUAGE PatternGuards #-}
-----------------------------------------------------------------------------
-- |
-- Module      :  Text.CSL.Proc.Disamb
-- Copyright   :  (c) Andrea Rossato
-- License     :  BSD-style (see LICENSE)
--
-- Maintainer  :  Andrea Rossato <andrea.rossato@unitn.it>
-- Stability   :  unstable
-- Portability :  unportable
--
-- This module provides functions for processing the evaluated
-- 'Output' for citation disambiguation.
--
-- Describe the disambiguation process.
--
-----------------------------------------------------------------------------

module Text.CSL.Proc.Disamb where

import Control.Arrow ( (&&&), (>>>) )
import Data.List ( (\\), elemIndex, sortBy, mapAccumL
                 , nub, groupBy, isPrefixOf )
import Data.Maybe
import Data.Ord ( comparing )

import Text.CSL.Eval
import Text.CSL.Reference
import Text.CSL.Style

-- | Given the 'Style', the list of references and the citation
-- groups, disambiguate citations according to the style options.
disambCitations :: Style -> [Reference] -> [[(String, String)]] -> [CitationGroup]
disambCitations s bibs cs
    = citOutput
    where
      -- the list of the position and the reference of each citation
      -- for each citation group.
      refs   = getRefs bibs cs
      -- citation groups
      groups  = map (procGroup s) refs
      -- citation data of duplicates
      duplics = getDuplCiteData groups

      -- check the options set in the style
      disOpts  = getCitDisambOptions s
      hasNames = "disambiguate-add-names"       `elem` disOpts
      hasGName = "disambiguate-add-givenname"   `elem` disOpts
      hasYSuff = "disambiguate-add-year-suffix" `elem` disOpts

      -- citations for which adding a name is not enough.
      needYSuff = map (allTheSame . map disambData) duplics
      needName  = concatMap fst . filter (not . snd) $ zip duplics needYSuff

      -- apply name disambiguation options
      addNames  = disambWithNames hasNames hasGName needName

      -- the list of citations that need re-evaluation with the
      -- \"disambiguate\" condition set to 'True'
      reEval       = let chk = if hasYSuff then filter ((==) [] . citYear) else id
                     in  chk . concatMap fst . filter snd $ zip duplics needYSuff
      reEvaluated  = if or (query hasIfDis s) && reEval /= []
                     then map (uncurry $ reEvaluate s reEval) $ zip refs groups
                     else groups

      -- generate the final FormattedOutput.
      citOutput    = if disOpts /= []
                     then if hasYSuff
                          then proc addYearSuffix $ proc (updateOutput addNames) reEvaluated
                          else proc (updateOutput addNames) reEvaluated
                     else groups

-- | Disambiguate a list of 'CiteData' by adding names to the list of
-- contributors, thus overriding @et-al@ options, if the first 'Bool'
-- is 'True', and/or by adding given-names if the second 'Bool' is
-- 'True'.
disambWithNames :: Bool -> Bool -> [CiteData] -> [CiteData]
disambWithNames bn bg needName
    | bn && bg  = let (n ,ns ) = done   addLNames
                      (n',ns') = done $ addGNames ns
                  in   n ++ n' ++ addGNames ns'
    | bn        = fst . done $ addLNames
    | bg        = fst . done $ addGNames needName
    | otherwise = []
    where
      -- do name disambiguation
      disSolved = zip needName (disambiguate $ map disambData needName)
      addLNames = map (\(c,n) -> c { disambed = head n }) disSolved
      addGNames = map (\ c    -> c { disambed = addGivenNames (disambed c) })

      chkdup = same . proc rmGivenNames . map disambed
      need a = map fst . filter (not . snd) . zip a
      done a = (,) (need a $ chkdup a) (a \\ (need a $ chkdup a))

-- | Given the list of 'CiteData' with the disambiguated field set
-- update the evaluated citations by setting the contributor list
-- accordingly.
updateOutput :: [CiteData] -> Output -> Output
updateOutput m o
    | OContrib k x _ <- o = case elemIndex (CD k x [] [] []) m of
                              Just i -> OContrib k (disambed $ m !! i) []
                              _      -> o
    | otherwise           = o

-- | Given the CSL 'Style' and the list of 'Reference's coupled with
-- their position, generate a 'CitationGroup'. The citations are
-- sorted according to the 'Style'.
procGroup :: Style -> [(String,Reference)] -> CitationGroup
procGroup (Style {citation = c, csMacros = ms , csTerms = ts})
    = CG (layFormat $ citLayout c) (layDelim $ citLayout c) . concat . process
    where
      format (p,r) = evalLayout (citLayout c) False ts ms (citOptions c) p r
      sort    = evalSorting ts ms (citOptions c) (citSort c) . snd
      process = map fst . sortBy (comparing snd) . map (format &&& sort)


-- | Evaluate again a citation group with the 'EvalState' 'disamb'
-- field set to 'True' (for matching the @\"disambiguate\"@
-- condition).
reEvaluate :: Style -> [CiteData] -> [(String,Reference)] -> CitationGroup -> CitationGroup
reEvaluate (Style {citation = c, csMacros = ms , csTerms = ts}) l pr (CG f d os)
    = CG f d . flip concatMap (zip pr os) $
      \((p,r),out) -> if citeKey r `elem` map key l
                      then evalLayout (citLayout c) True ts ms (citOptions c) p r
                      else [out]

-- | Check if the 'Style' has any conditional for disambiguation. In
-- this case the conditional will be try after all other
-- disambiguation strategies have failed. To be used with the generic
-- 'query' function.
hasIfDis :: IfThen -> [Bool]
hasIfDis o
    | IfThen (Condition {disambiguation = d}) _ _ <- o = [d /= []]
    | otherwise                                        = [False  ]

-- | Get the list of disambiguation options set in the 'Style' for
-- citations.
getCitDisambOptions :: Style -> [String]
getCitDisambOptions
    = map fst . filter ((==) "true" . snd) .
      filter (isPrefixOf "disambiguate" . fst) . citOptions . citation

-- | Set the locator, the 'citationNumber' and the position for each
-- citation, with the list of 'Reference's and the list of citation
-- groups (citation keys with locators).
getRefs :: [Reference] -> [[(String, String)]] -> [[(String, Reference)]]
getRefs r = map (map $ getReference' r) . generatePosition

-- | Group citation data (with possible alternative names) of
-- citations which have a duplicate (same 'collision' and same
-- 'citYear').
getDuplCiteData :: [CitationGroup] -> [[CiteData]]
getDuplCiteData g
    = groupBy (\a b -> collide a == collide b) . sortBy  (comparing collide) $ duplicates
    where
      collide    = proc rmGivenNames . collision
      citeData   = nub $ concatMap (mapGroupOutput getCiteData) g
      duplicates = filter (collide &&& citYear >>> flip elem (getDuplNamesYear g)) citeData

-- | For an evaluated citation get its 'CiteData'. The disambiguated
-- citation and the year fields are empty.
getCiteData :: Output -> [CiteData]
getCiteData
    = query contribs &&& years >>>
      zipData
    where
      yearsQ  = query getYears
      years o = if yearsQ o /= [] then yearsQ o else [""]
      zipData = uncurry . zipWith $ \c y -> c {citYear = y}
      contribs o
          | OContrib k x xs <- o = [CD k x xs [] []]
          | otherwise            = []

-- | The contributors diambiguation data, the list of names and
-- give-names, and the citation year ('OYear').
type NamesYear = ([Output],String)

-- | Get the contributors list ('OContrib') and the year occurring in
-- more then one citation.
getDuplNamesYear :: [CitationGroup] -> [NamesYear]
getDuplNamesYear
    = nub . catMaybes . snd . mapAccumL dupl [] . getData
    where
      getData  = concat . map nub . map (mapGroupOutput getNamesYear)
      dupl a c = if snd c `elem` map snd a
                 then if fst c `elem` map fst a then (a,Nothing) else (c:a,Just $ snd c)
                 else if fst c `elem` map fst a then (a,Nothing) else (c:a,Nothing     )

-- | Get the list of citation keys coupled with their 'NamesYear' in
-- the evaluated 'Output'.
getNamesYear :: Output -> [(String,NamesYear)]
getNamesYear
    = proc rmGivenNames        >>>
      query contribs &&& years >>>
      zipData
    where
      yearsQ  = query getYears
      years o = if yearsQ o /= [] then yearsQ o else [""]
      zipData = uncurry . zipWith $ \(k,c) y -> (,) k (c,y)
      contribs o
          | OContrib k x _ <- o = [(k,x)]
          | otherwise           = []

getYears :: Output -> [String]
getYears o
    | OYear x _ _ <- o = [x]
    | otherwise        = []


-- List Utilities

-- | Try to disambiguate a list of lists by returning the first non
-- colliding element, if any, of each list:
--
-- > disambiguate [[1,2],[1,3],[2]] = [[2],[3],[2]]
disambiguate :: (Eq a) => [[a]] -> [[a]]
disambiguate [] = []
disambiguate l
    = if  hasMult l && not (allTheSame l) && hasDuplicates heads
      then disambiguate (rest l)
      else heads
    where
      heads = map head' l
      rest  = map (\(b,x) -> if b then tail' x else head' x) . zip (same heads)

      hasMult []     = False
      hasMult (x:xs) = length x > 1 || hasMult xs

      tail' [x] = [x]
      tail'  x  = if null x then x else tail x

-- | For each element a list of 'Bool': 'True' if the element has a
-- duplicate in the list:
--
-- > same [1,2,1] = [True,False,True]
same :: Eq a => [a] -> [Bool]
same [] = []
same l
    = map (flip elem dupl) l
    where
      dupl = catMaybes . snd . macc [] $ l
      macc = mapAccumL $ \a x -> if x `elem` a then (a,Just x) else (x:a,Nothing)

hasDuplicates :: Eq a => [a] -> Bool
hasDuplicates = or . same

allTheSame :: Eq a => [a] -> Bool
allTheSame = and . same