{-# LANGUAGE PatternGuards #-} ----------------------------------------------------------------------------- -- | -- Module : Text.CSL.Proc.Disamb -- Copyright : (c) Andrea Rossato -- License : BSD-style (see LICENSE) -- -- Maintainer : Andrea Rossato -- Stability : unstable -- Portability : unportable -- -- This module provides functions for processing the evaluated -- 'Output' for citation disambiguation. -- -- Describe the disambiguation process. -- ----------------------------------------------------------------------------- module Text.CSL.Proc.Disamb where import Control.Arrow ( (&&&), (>>>) ) import Data.List ( (\\), elemIndex, sortBy, mapAccumL , nub, groupBy, isPrefixOf ) import Data.Maybe import Data.Ord ( comparing ) import Text.CSL.Eval import Text.CSL.Reference import Text.CSL.Style -- | Given the 'Style', the list of references and the citation -- groups, disambiguate citations according to the style options. disambCitations :: Style -> [Reference] -> [[(String, String)]] -> [CitationGroup] disambCitations s bibs cs = citOutput where -- the list of the position and the reference of each citation -- for each citation group. refs = getRefs bibs cs -- citation groups groups = map (procGroup s) refs -- citation data of duplicates duplics = getDuplCiteData groups -- check the options set in the style disOpts = getCitDisambOptions s hasNames = "disambiguate-add-names" `elem` disOpts hasGName = "disambiguate-add-givenname" `elem` disOpts hasYSuff = "disambiguate-add-year-suffix" `elem` disOpts -- citations for which adding a name is not enough. needYSuff = map (allTheSame . map disambData) duplics needName = concatMap fst . filter (not . snd) $ zip duplics needYSuff -- apply name disambiguation options addNames = disambWithNames hasNames hasGName needName -- the list of citations that need re-evaluation with the -- \"disambiguate\" condition set to 'True' reEval = let chk = if hasYSuff then filter ((==) [] . citYear) else id in chk . concatMap fst . filter snd $ zip duplics needYSuff reEvaluated = if or (query hasIfDis s) && reEval /= [] then map (uncurry $ reEvaluate s reEval) $ zip refs groups else groups -- generate the final FormattedOutput. citOutput = if disOpts /= [] then if hasYSuff then proc addYearSuffix $ proc (updateOutput addNames) reEvaluated else proc (updateOutput addNames) reEvaluated else groups -- | Disambiguate a list of 'CiteData' by adding names to the list of -- contributors, thus overriding @et-al@ options, if the first 'Bool' -- is 'True', and/or by adding given-names if the second 'Bool' is -- 'True'. disambWithNames :: Bool -> Bool -> [CiteData] -> [CiteData] disambWithNames bn bg needName | bn && bg = let (n ,ns ) = done addLNames (n',ns') = done $ addGNames ns in n ++ n' ++ addGNames ns' | bn = fst . done $ addLNames | bg = fst . done $ addGNames needName | otherwise = [] where -- do name disambiguation disSolved = zip needName (disambiguate $ map disambData needName) addLNames = map (\(c,n) -> c { disambed = head n }) disSolved addGNames = map (\ c -> c { disambed = addGivenNames (disambed c) }) chkdup = same . proc rmGivenNames . map disambed need a = map fst . filter (not . snd) . zip a done a = (,) (need a $ chkdup a) (a \\ (need a $ chkdup a)) -- | Given the list of 'CiteData' with the disambiguated field set -- update the evaluated citations by setting the contributor list -- accordingly. updateOutput :: [CiteData] -> Output -> Output updateOutput m o | OContrib k x _ <- o = case elemIndex (CD k x [] [] []) m of Just i -> OContrib k (disambed $ m !! i) [] _ -> o | otherwise = o -- | Given the CSL 'Style' and the list of 'Reference's coupled with -- their position, generate a 'CitationGroup'. The citations are -- sorted according to the 'Style'. procGroup :: Style -> [(String,Reference)] -> CitationGroup procGroup (Style {citation = c, csMacros = ms , csTerms = ts}) = CG (layFormat $ citLayout c) (layDelim $ citLayout c) . concat . process where format (p,r) = evalLayout (citLayout c) False ts ms (citOptions c) p r sort = evalSorting ts ms (citOptions c) (citSort c) . snd process = map fst . sortBy (comparing snd) . map (format &&& sort) -- | Evaluate again a citation group with the 'EvalState' 'disamb' -- field set to 'True' (for matching the @\"disambiguate\"@ -- condition). reEvaluate :: Style -> [CiteData] -> [(String,Reference)] -> CitationGroup -> CitationGroup reEvaluate (Style {citation = c, csMacros = ms , csTerms = ts}) l pr (CG f d os) = CG f d . flip concatMap (zip pr os) $ \((p,r),out) -> if citeKey r `elem` map key l then evalLayout (citLayout c) True ts ms (citOptions c) p r else [out] -- | Check if the 'Style' has any conditional for disambiguation. In -- this case the conditional will be try after all other -- disambiguation strategies have failed. To be used with the generic -- 'query' function. hasIfDis :: IfThen -> [Bool] hasIfDis o | IfThen (Condition {disambiguation = d}) _ _ <- o = [d /= []] | otherwise = [False ] -- | Get the list of disambiguation options set in the 'Style' for -- citations. getCitDisambOptions :: Style -> [String] getCitDisambOptions = map fst . filter ((==) "true" . snd) . filter (isPrefixOf "disambiguate" . fst) . citOptions . citation -- | Set the locator, the 'citationNumber' and the position for each -- citation, with the list of 'Reference's and the list of citation -- groups (citation keys with locators). getRefs :: [Reference] -> [[(String, String)]] -> [[(String, Reference)]] getRefs r = map (map $ getReference' r) . generatePosition -- | Group citation data (with possible alternative names) of -- citations which have a duplicate (same 'collision' and same -- 'citYear'). getDuplCiteData :: [CitationGroup] -> [[CiteData]] getDuplCiteData g = groupBy (\a b -> collide a == collide b) . sortBy (comparing collide) $ duplicates where collide = proc rmGivenNames . collision citeData = nub $ concatMap (mapGroupOutput getCiteData) g duplicates = filter (collide &&& citYear >>> flip elem (getDuplNamesYear g)) citeData -- | For an evaluated citation get its 'CiteData'. The disambiguated -- citation and the year fields are empty. getCiteData :: Output -> [CiteData] getCiteData = query contribs &&& years >>> zipData where yearsQ = query getYears years o = if yearsQ o /= [] then yearsQ o else [""] zipData = uncurry . zipWith $ \c y -> c {citYear = y} contribs o | OContrib k x xs <- o = [CD k x xs [] []] | otherwise = [] -- | The contributors diambiguation data, the list of names and -- give-names, and the citation year ('OYear'). type NamesYear = ([Output],String) -- | Get the contributors list ('OContrib') and the year occurring in -- more then one citation. getDuplNamesYear :: [CitationGroup] -> [NamesYear] getDuplNamesYear = nub . catMaybes . snd . mapAccumL dupl [] . getData where getData = concat . map nub . map (mapGroupOutput getNamesYear) dupl a c = if snd c `elem` map snd a then if fst c `elem` map fst a then (a,Nothing) else (c:a,Just $ snd c) else if fst c `elem` map fst a then (a,Nothing) else (c:a,Nothing ) -- | Get the list of citation keys coupled with their 'NamesYear' in -- the evaluated 'Output'. getNamesYear :: Output -> [(String,NamesYear)] getNamesYear = proc rmGivenNames >>> query contribs &&& years >>> zipData where yearsQ = query getYears years o = if yearsQ o /= [] then yearsQ o else [""] zipData = uncurry . zipWith $ \(k,c) y -> (,) k (c,y) contribs o | OContrib k x _ <- o = [(k,x)] | otherwise = [] getYears :: Output -> [String] getYears o | OYear x _ _ <- o = [x] | otherwise = [] -- List Utilities -- | Try to disambiguate a list of lists by returning the first non -- colliding element, if any, of each list: -- -- > disambiguate [[1,2],[1,3],[2]] = [[2],[3],[2]] disambiguate :: (Eq a) => [[a]] -> [[a]] disambiguate [] = [] disambiguate l = if hasMult l && not (allTheSame l) && hasDuplicates heads then disambiguate (rest l) else heads where heads = map head' l rest = map (\(b,x) -> if b then tail' x else head' x) . zip (same heads) hasMult [] = False hasMult (x:xs) = length x > 1 || hasMult xs tail' [x] = [x] tail' x = if null x then x else tail x -- | For each element a list of 'Bool': 'True' if the element has a -- duplicate in the list: -- -- > same [1,2,1] = [True,False,True] same :: Eq a => [a] -> [Bool] same [] = [] same l = map (flip elem dupl) l where dupl = catMaybes . snd . macc [] $ l macc = mapAccumL $ \a x -> if x `elem` a then (a,Just x) else (x:a,Nothing) hasDuplicates :: Eq a => [a] -> Bool hasDuplicates = or . same allTheSame :: Eq a => [a] -> Bool allTheSame = and . same