{-# LANGUAGE PatternGuards #-}
-----------------------------------------------------------------------------
-- |
-- Module      :  Text.CSL.Proc.Disamb
-- Copyright   :  (c) Andrea Rossato
-- License     :  BSD-style (see LICENSE)
--
-- Maintainer  :  Andrea Rossato <andrea.rossato@unitn.it>
-- Stability   :  unstable
-- Portability :  unportable
--
-- This module provides functions for processing the evaluated
-- 'Output' for citation disambiguation.
--
-- Describe the disambiguation process.
--
-----------------------------------------------------------------------------

module Text.CSL.Proc.Disamb where

import Control.Arrow ( (&&&), (>>>), second )
import Data.Char ( chr )
import Data.List ( elemIndex, find, findIndex, sortBy, mapAccumL
                 , nub, groupBy, isPrefixOf )
import Data.Maybe
import Data.Ord ( comparing )

import Text.CSL.Eval
import Text.CSL.Output.Plain
import Text.CSL.Reference
import Text.CSL.Style

-- | Given the 'Style', the list of references and the citation
-- groups, disambiguate citations according to the style options.
disambCitations :: Style -> [Reference] -> Citations -> [CitationGroup]
                -> ([(String, String)], [CitationGroup])
disambCitations s bibs cs groups
   = (,) yearSuffs citOutput
    where
      -- utils
      when_ b f = if b then f else []
      filter_ f = concatMap (map fst) . map (filter f) . map (uncurry zip)

      -- the list of the position and the reference of each citation
      -- for each citation group.
      refs   = processCites bibs cs
      -- name data of name duplicates
      nameDupls = getDuplNameData groups
      -- citation data of ambiguous cites
      duplics   = getDuplCiteData hasNamesOpt $ addGName groups

      -- check the options set in the style
      --isByCite = getOptionVal "givenname-disambiguation-rule" (citOptions . citation $ s) == "by-cite"
      disOpts  = getCitDisambOptions s
      hasNamesOpt = "disambiguate-add-names"       `elem` disOpts
      hasGNameOpt = "disambiguate-add-givenname"   `elem` disOpts
      hasYSuffOpt = "disambiguate-add-year-suffix" `elem` disOpts

      withNames = flip map duplics $ same . proc rmNameHash . proc rmGivenNames .
                  map (if hasNamesOpt then disambData else return . disambYS)

      needNames = filter_ (not . snd) $ zip (map disambAddNames duplics) withNames
      needYSuff = filter_        snd  $ zip (map disambAddLast  duplics) withNames

      newNames :: [CiteData]
      newNames  = when_ hasNamesOpt $ needNames ++ needYSuff

      newGName :: [NameData]
      newGName  = when_ hasGNameOpt $ concatMap disambAddGivenNames nameDupls

      -- the list of citations that need re-evaluation with the
      -- \"disambiguate\" condition set to 'True'
      reEval      = let chk = if hasYSuffOpt then filter ((==) [] . citYear) else id
                    in  chk needYSuff
      reEvaluated = if or (query hasIfDis s) && reEval /= []
                    then map (uncurry $ reEvaluate s reEval) $ zip refs groups
                    else groups

      addGName  = proc (updateOutput []       newGName)
      addNames  = proc (updateOutput newNames newGName)

      withYearS = if hasYSuffOpt
                  then map (mapCitationGroup $ setYearSuffCollision hasNamesOpt needYSuff) $ reEvaluated
                  else rmYearSuff $ reEvaluated

      yearSuffs = when_ hasYSuffOpt . generateYearSuffix bibs . query getYearSuffixes $ withYearS

      processed = if hasYSuffOpt
                  then proc (updateYearSuffixes yearSuffs) .
                       addNames $ withYearS
                  else addNames $ withYearS

      citOutput = if disOpts /= [] then processed else groups

mapDisambData :: (Output -> Output) -> CiteData -> CiteData
mapDisambData f (CD k c ys d r y) = CD k c ys (proc f d) r y

mapCitationGroup :: ([Output] -> [Output]) -> CitationGroup ->  CitationGroup
mapCitationGroup f (CG cs fm d os) = CG cs fm d (zip (map fst os) . f $ map snd os)

disambAddNames :: [CiteData] -> [CiteData]
disambAddNames needName = addLNames
    where
      disSolved = zip needName (disambiguate $ map disambData needName)
      addLNames = map (\(c,n) -> c { disambed = if null n then collision c else head n }) disSolved

disambAddLast :: [CiteData] -> [CiteData]
disambAddLast = map last_
    where
      last_ c = c { disambed   = if disambData c /= [] then last $ disambData c else collision  c }

disambAddGivenNames :: [NameData] -> [NameData]
disambAddGivenNames needName = addGName
    where
      disSolved = zip needName (disambiguate $ map nameDisambData needName)
      addGName = map (\(c,n) -> c { nameDataSolved = if null n then nameCollision c else head n }) disSolved

-- | Given the list of 'CiteData' with the disambiguated field set
-- update the evaluated citations by setting the contributor list
-- accordingly.
updateOutput :: [CiteData] -> [NameData] -> Output -> Output
updateOutput c n o
    | OContrib k r x  _ d <- o = case elemIndex (CD k (clean x) [] [] [] []) c of
                                   Just i -> OContrib k r (disambed $ c !! i) [] d
                                   _      -> o
    | OName _ _ [] _ <- o = o
    | OName k x _  f <- o = case elemIndex (ND k (clean x) [] []) n of
                              Just i -> OName k (nameDataSolved $ n !! i) [] f
                              _      -> o
    | otherwise           = o
    where
      clean = proc rmGivenNames

-- | Evaluate again a citation group with the 'EvalState' 'disamb'
-- field set to 'True' (for matching the @\"disambiguate\"@
-- condition).
reEvaluate :: Style -> [CiteData] -> [(Cite, Reference)] -> CitationGroup -> CitationGroup
reEvaluate (Style {citation = ct, csMacros = ms , styleLocale = lo}) l cr (CG a f d os)
    = CG a f d . flip concatMap (zip cr os) $
      \((c,r),out) -> if refId r `elem` map key l
                      then return . second (flip Output emptyFormatting) $
                           (,) c $ evalLayout (citLayout ct) (EvalCite c) True lo ms (citOptions ct) r
                      else [out]

-- | Check if the 'Style' has any conditional for disambiguation. In
-- this case the conditional will be try after all other
-- disambiguation strategies have failed. To be used with the generic
-- 'query' function.
hasIfDis :: IfThen -> [Bool]
hasIfDis o
    | IfThen (Condition {disambiguation = d}) _ _ <- o = [d /= []]
    | otherwise                                        = [False  ]

-- | Get the list of disambiguation options set in the 'Style' for
-- citations.
getCitDisambOptions :: Style -> [String]
getCitDisambOptions
    = map fst . filter ((==) "true" . snd) .
      filter (isPrefixOf "disambiguate" . fst) . citOptions . citation

-- | Group citation data (with possible alternative names) of
-- citations which have a duplicate (same 'collision' and same
-- 'citYear'). If the first 'Bool' is 'False', then we need to
-- retrieve data for year suffix disambiguation.
getDuplCiteData :: Bool -> [CitationGroup] -> [[CiteData]]
getDuplCiteData b g
    = groupBy (\x y -> collide x == collide y) . sortBy (comparing collide) $ duplicates
    where
      whatToGet  = if b then collision else disambYS
      collide    = proc rmNameHash . proc rmGivenNames . whatToGet
      citeData   = nub $ concatMap (mapGroupOutput $ getCiteData) g
      duplicates = filter (collide &&& citYear >>> flip elem (getDuplNamesYear b g)) citeData

-- | For an evaluated citation get its 'CiteData'. The disambiguated
-- citation and the year fields are empty.
getCiteData :: Output -> [CiteData]
getCiteData
    = contribs &&& years >>>
      zipData
    where
      contribs x = if query contribsQ x /= []
                   then query contribsQ x
                   else [CD [] [] [] [] [] []]
      yearsQ  = query getYears
      years o = if yearsQ o /= [] then yearsQ o else [([],[])]
      zipData = uncurry . zipWith $ \c y -> if key c /= []
                                            then c {citYear = snd y}
                                            else c {key     = fst y
                                                   ,citYear = snd y}
      contribsQ o
          | OContrib k _ c d dd <- o = [CD k c d dd [] []]
          | otherwise                = []

-- | The contributors diambiguation data, the list of names and
-- give-names, and the citation year ('OYear').
type NamesYear = ([Output],String)

-- | Get the contributors list ('OContrib') and the year occurring in
-- more then one citation.
getDuplNamesYear :: Bool -> [CitationGroup] -> [NamesYear]
getDuplNamesYear b
    = nub . catMaybes . snd . mapAccumL dupl [] . getData
    where
      getData  = nub . concatMap (mapGroupOutput $ getNamesYear b)
      dupl a c = if snd c `elem` a
                 then (a,Just $ snd c) else (snd c:a,Nothing)

-- | Get the list of citation keys coupled with their 'NamesYear' in
-- the evaluated 'Output'. If the 'Bool' is 'False' then the function
-- retrieves the names used in citations not occuring for the first
-- time.
getNamesYear :: Bool -> Output -> [(String,NamesYear)]
getNamesYear b
    = proc rmNameHash    >>>
      proc rmGivenNames  >>>
      contribs &&& years >>>
      zipData
    where
      contribs x = if query contribsQ x /= []
                   then query contribsQ x
                   else [([],[])]
      yearsQ  = query getYears
      years o = if yearsQ o /= [] then yearsQ o else [([],[])]
      zipData = uncurry . zipWith $ \(k,c) y -> if k /= []
                                                then (,) k       (c, snd y)
                                                else (,) (fst y) (c, snd y)
      contribsQ o
          | OContrib k _ c d _ <- o = [(k,if b then c else d)]
          | otherwise               = []

getYears :: Output -> [(String,String)]
getYears o
    | OYear x k _ <- o = [(k,x)]
    | otherwise        = []

getDuplNameData :: [CitationGroup] -> [[NameData]]
getDuplNameData g
    = groupBy (\a b -> collide a == collide b) . sortBy (comparing collide) $ duplicates
    where
      collide    = nameCollision
      nameData   = nub $ concatMap (mapGroupOutput getName) g
      duplicates = filter (flip elem (getDuplNames g) . collide) nameData

getDuplNames :: [CitationGroup] -> [[Output]]
getDuplNames xs
    = nub . catMaybes . snd . mapAccumL dupl [] . getData $ xs
    where
      getData = concatMap (mapGroupOutput getName)
      dupl a c = if nameCollision c `elem` map nameCollision a
                 then (a,Just $ nameCollision c)
                 else (c:a,Nothing)

getName :: Output -> [NameData]
getName = query getName'
    where
      getName' o
          | OName i n ns _ <- o = [ND i n (n:ns) []]
          | otherwise           = []

generateYearSuffix :: [Reference] -> [(String, [Output])] -> [(String,String)]
generateYearSuffix refs
    = concatMap (flip zip suffs) .
      -- sort clashing cites using their position in the sorted bibliography
      getFst . map sort' . map (filter ((/=) 0 . snd)) . map (map getP) .
      -- group clashing cites
      getFst . map nub . groupBy (\a b -> snd a == snd b) . sort' . filter ((/=) [] . snd)
    where
      sort'  :: (Ord a, Ord b) => [(a,b)] -> [(a,b)]
      sort'  = sortBy (comparing snd)
      getFst = map $ map fst
      getP k = case findIndex ((==) k . refId) refs of
                   Just x -> (k, x + 1)
                   _      -> (k,     0)
      suffs = l ++ [x ++ y | x <- l, y <- l ]
      l = map (return . chr) [97..122]

setYearSuffCollision :: Bool -> [CiteData] -> [Output] -> [Output]
setYearSuffCollision b cs = proc (setYS cs) . (map $ \x -> if hasYearSuf x then x else addYearSuffix x)
    where
      setYS c o
          | OYearSuf _ k _ f <- o = OYearSuf [] k (getCollision k c) f
          | otherwise             = o
      collide = if b then disambed else disambYS
      getCollision k c = case find ((==) k . key) c of
                           Just x -> if collide x == []
                                     then [OStr (citYear x) emptyFormatting]
                                     else collide x
                           _      -> []

updateYearSuffixes :: [(String, String)] -> Output -> Output
updateYearSuffixes yss o
 | OYearSuf _ k c f <- o = case lookup k yss of
                             Just x -> OYearSuf x k c f
                             _      -> ONull
 | otherwise             = o

getYearSuffixes :: Output -> [(String,[Output])]
getYearSuffixes o
    | OYearSuf _ k c _ <- o = [(k,c)]
    | otherwise             = []

rmYearSuff :: [CitationGroup] -> [CitationGroup]
rmYearSuff = proc rmYS
    where
      rmYS o
          | OYearSuf _ _ _ _  <- o = ONull
          | otherwise              = o

-- List Utilities

-- | Try to disambiguate a list of lists by returning the first non
-- colliding element, if any, of each list:
--
-- > disambiguate [[1,2],[1,3],[2]] = [[2],[3],[2]]
disambiguate :: (Eq a) => [[a]] -> [[a]]
disambiguate [] = []
disambiguate l
    = if  hasMult l && not (allTheSame l) && hasDuplicates heads
      then disambiguate (rest l)
      else heads
    where
      heads = map head' l
      rest  = map (\(b,x) -> if b then tail_ x else head' x) . zip (same heads)

      hasMult []     = False
      hasMult (x:xs) = length x > 1 || hasMult xs

      tail_ [x] = [x]
      tail_  x  = if null x then x else tail x

-- | For each element a list of 'Bool': 'True' if the element has a
-- duplicate in the list:
--
-- > same [1,2,1] = [True,False,True]
same :: Eq a => [a] -> [Bool]
same [] = []
same l
    = map (flip elem dupl) l
    where
      dupl = catMaybes . snd . macc [] $ l
      macc = mapAccumL $ \a x -> if x `elem` a then (a,Just x) else (x:a,Nothing)

hasDuplicates :: Eq a => [a] -> Bool
hasDuplicates = or . same

allTheSame :: Eq a => [a] -> Bool
allTheSame = and . same