{-# LANGUAGE PatternGuards #-} #if MIN_VERSION_base(4,9,0) {-# OPTIONS_GHC -fno-warn-redundant-constraints #-} #endif ----------------------------------------------------------------------------- -- | -- Module : Text.CSL.Proc.Disamb -- Copyright : (c) Andrea Rossato -- License : BSD-style (see LICENSE) -- -- Maintainer : Andrea Rossato -- Stability : unstable -- Portability : unportable -- -- This module provides functions for processing the evaluated -- 'Output' for citation disambiguation. -- -- Describe the disambiguation process. -- ----------------------------------------------------------------------------- module Text.CSL.Proc.Disamb where import Control.Arrow (second, (&&&), (>>>)) import Data.List (elemIndex, find, findIndex, groupBy, isPrefixOf, mapAccumL, nub, nubBy, sortBy) import Data.Maybe import Data.Ord (comparing) import Text.CSL.Eval import Text.CSL.Reference import Text.CSL.Style import Text.CSL.Util (proc, query) -- | Given the 'Style', the list of references and the citation -- groups, disambiguate citations according to the style options. disambCitations :: Style -> [Reference] -> Citations -> [CitationGroup] -> ([(String, String)], [CitationGroup]) disambCitations s bibs cs groups = (,) yearSuffs citOutput where -- utils when_ b f = if b then f else [] filter_ f = concatMap (map fst) . map (filter f) . map (uncurry zip) -- the list of the position and the reference of each citation -- for each citation group. refs = processCites bibs cs -- name data of name duplicates nameDupls = getDuplNameData groups -- citation data of ambiguous cites duplics = getDuplCiteData giveNameDisamb hasNamesOpt hasYSuffOpt groups -- check the options set in the style disOpts = getCitDisambOptions s hasNamesOpt = "disambiguate-add-names" `elem` disOpts hasGNameOpt = "disambiguate-add-givenname" `elem` disOpts hasYSuffOpt = "disambiguate-add-year-suffix" `elem` disOpts giveNameDisamb = case getOptionVal "givenname-disambiguation-rule" (citOptions (citation s)) of "by-cite" -> ByCite "all-names" -> AllNames "all-names-with-initials" -> AllNames "primary-name" -> PrimaryName "primary-name-with-initials" -> PrimaryName _ -> ByCite -- default as of CSL 1.0 clean = if hasGNameOpt then id else proc rmHashAndGivenNames withNames = flip map duplics $ same . clean . map (if hasNamesOpt then disambData else return . disambYS) needNames = filter_ (not . snd) $ zip duplics withNames needYSuff = filter_ snd $ zip duplics withNames newNames :: [CiteData] newNames = when_ (hasNamesOpt || hasGNameOpt) $ disambAddNames giveNameDisamb $ needNames ++ if hasYSuffOpt && giveNameDisamb == NoGiven then [] else needYSuff newGName :: [NameData] newGName = when_ hasGNameOpt $ concatMap disambAddGivenNames nameDupls -- the list of citations that need re-evaluation with the -- \"disambiguate\" condition set to 'True' reEval = let chk = if hasYSuffOpt then filter ((==) [] . citYear) else id in chk needYSuff reEvaluated = if or (query hasIfDis s) && not (null reEval) then zipWith (reEvaluate s reEval) refs groups else groups withYearS = addNames $ if hasYSuffOpt then map (mapCitationGroup $ setYearSuffCollision hasNamesOpt needYSuff) $ reEvaluated else rmYearSuff $ reEvaluated yearSuffs = when_ hasYSuffOpt . generateYearSuffix bibs . concatMap getYearSuffixes $ withYearS addNames = proc (updateContrib giveNameDisamb newNames newGName) processed = if hasYSuffOpt then proc (updateYearSuffixes yearSuffs) $ withYearS else withYearS citOutput = if disOpts /= [] then processed else reEvaluated mapDisambData :: (Output -> Output) -> CiteData -> CiteData mapDisambData f (CD k c ys d r s y) = CD k c ys (proc f d) r s y mapCitationGroup :: ([Output] -> [Output]) -> CitationGroup -> CitationGroup mapCitationGroup f (CG cs fm d os) = CG cs fm d (zip (map fst os) . f $ map snd os) data GiveNameDisambiguation = NoGiven -- TODO this is no longer used? | ByCite | AllNames | PrimaryName deriving (Show, Eq) disambAddNames :: GiveNameDisambiguation -> [CiteData] -> [CiteData] disambAddNames b needName = addLNames where clean = if b == NoGiven then proc rmHashAndGivenNames else id disSolved = zip needName' . disambiguate . map disambData $ needName' needName' = nub' needName [] addLNames = map (\(c,n) -> c { disambed = if null n then collision c else head n }) disSolved nub' [] r = r nub' (x:xs) r = case elemIndex (disambData $ clean x) (map (disambData . clean) r) of Nothing -> nub' xs (x:r) Just i -> let y = r !! i in nub' xs (y {sameAs = key x : sameAs y} : filter (/= y) r) disambAddGivenNames :: [NameData] -> [NameData] disambAddGivenNames needName = addGName where disSolved = zip needName (disambiguate $ map nameDisambData needName) addGName = map (\(c,n) -> c { nameDataSolved = if null n then nameCollision c else head n }) disSolved updateContrib :: GiveNameDisambiguation -> [CiteData] -> [NameData] -> Output -> Output updateContrib g c n o | OContrib k r s d dd <- o = case filter (key &&& sameAs >>> uncurry (:) >>> elem k) c of x:_ | clean (disambData x) == clean (d:dd) -> OContrib k r (map processGNames $ disambed x) [] dd _ | null c, AllNames <- g -> OContrib k r (map processGNames s) d dd | otherwise -> o | otherwise = o where clean = if g == NoGiven then proc rmHashAndGivenNames else id processGNames = if g /= NoGiven then updateOName n else id updateOName :: [NameData] -> Output -> Output updateOName n o | OName _ _ [] _ <- o = o | OName k x _ f <- o = case elemIndex (ND k (clean x) [] []) n of Just i -> OName emptyAgent (nameDataSolved $ n !! i) [] f _ -> o | otherwise = o where clean = proc rmGivenNames -- | Evaluate again a citation group with the 'EvalState' 'disamb' -- field set to 'True' (for matching the @\"disambiguate\"@ -- condition). reEvaluate :: Style -> [CiteData] -> [(Cite, Maybe Reference)] -> CitationGroup -> CitationGroup reEvaluate (Style {citation = ct, csMacros = ms , styleLocale = lo, styleAbbrevs = as}) l cr (CG a f d os) = CG a f d . flip concatMap (zip cr os) $ \((c,mbr),out) -> case mbr of Just r | unLiteral (refId r) `elem` lkeys -> return . second (flip Output emptyFormatting) $ (,) c $ evalLayout (citLayout ct) (EvalCite c) True lo ms (citOptions ct) as mbr _ -> [out] where lkeys = map key l -- | Check if the 'Style' has any conditional for disambiguation. In -- this case the conditional will be try after all other -- disambiguation strategies have failed. To be used with the generic -- 'query' function. hasIfDis :: IfThen -> [Bool] hasIfDis (IfThen (Condition {disambiguation = (_:_)}) _ _) = [True] hasIfDis _ = [False] -- | Get the list of disambiguation options set in the 'Style' for -- citations. getCitDisambOptions :: Style -> [String] getCitDisambOptions = map fst . filter ((==) "true" . snd) . filter (isPrefixOf "disambiguate" . fst) . citOptions . citation -- | Group citation data (with possible alternative names) of -- citations which have a duplicate (same 'collision', and same -- 'citYear' if year suffix disambiiguation is used). If the first -- 'Bool' is 'False', then we need to retrieve data for year suffix -- disambiguation. The second 'Bool' is 'True' when comparing both -- year and contributors' names for finding duplicates (when the -- year-suffix option is set). getDuplCiteData :: GiveNameDisambiguation -> Bool -> Bool -> [CitationGroup] -> [[CiteData]] getDuplCiteData giveNameDisamb b1 b2 g = groupBy (\x y -> collide x == collide y) . sortBy (comparing collide) $ duplicates where whatToGet = if b1 then collision else disambYS collide = proc (rmExtras giveNameDisamb) . proc rmHashAndGivenNames . whatToGet citeData = nubBy (\a b -> collide a == collide b && key a == key b) $ concatMap (mapGroupOutput $ getCiteData) g duplicates = [c | c <- citeData , d <- citeData , collides c d] collides x y = x /= y && (collide x == collide y) && (not b2 || citYear x == citYear y) rmExtras :: GiveNameDisambiguation -> [Output] -> [Output] rmExtras g os | Output x _ : xs <- os = case rmExtras g x of [] -> rmExtras g xs ys -> ys ++ rmExtras g xs | OContrib _ _ (y:ys) _ _ : xs <- os = if g == PrimaryName then OContrib [] [] [y] [] [] : rmExtras g xs else OContrib [] [] (y:ys) [] [] : rmExtras g xs | OYear{} : xs <- os = rmExtras g xs | OYearSuf{} : xs <- os = rmExtras g xs | OLabel{} : xs <- os = rmExtras g xs | ODel _ : xs <- os = rmExtras g xs | OLoc _ _ : xs <- os = rmExtras g xs | x : xs <- os = x : rmExtras g xs | otherwise = [] -- | For an evaluated citation get its 'CiteData'. The disambiguated -- citation and the year fields are empty. Only the first list of -- contributors' disambiguation data are collected for disambiguation -- purposes. getCiteData :: Output -> [CiteData] getCiteData out = (contribs &&& years >>> zipData) out where contribs x = case query contribsQ x of [] -> [CD [] [out] [] [] [] [] []] -- allow title to disambiguate xs -> xs years o = case query getYears o of [] -> [([],[])] r -> r zipData = uncurry . zipWith $ \c y -> if key c /= [] then c {citYear = snd y} else c {key = fst y ,citYear = snd y} contribsQ o | OContrib k _ _ d dd <- o = [CD k [out] d (d:dd) [] [] []] | otherwise = [] getYears :: Output -> [(String,String)] getYears o | OYear x k _ <- o = [(k,x)] | otherwise = [] getDuplNameData :: [CitationGroup] -> [[NameData]] getDuplNameData g = groupBy (\a b -> collide a == collide b) . sortBy (comparing collide) $ duplicates where collide = nameCollision nameData = nub $ concatMap (mapGroupOutput getName) g duplicates = filter (flip elem (getDuplNames g) . collide) nameData getDuplNames :: [CitationGroup] -> [[Output]] getDuplNames xs = nub . catMaybes . snd . mapAccumL dupl [] . getData $ xs where getData = concatMap (mapGroupOutput getName) dupl a c = if nameCollision c `elem` map nameCollision a then (a,Just $ nameCollision c) else (c:a,Nothing) getName :: Output -> [NameData] getName = query getName' where getName' o | OName i n ns _ <- o = [ND i n (n:ns) []] | otherwise = [] generateYearSuffix :: [Reference] -> [(String, [Output])] -> [(String,String)] generateYearSuffix refs = concatMap (`zip` suffs) . -- sort clashing cites using their position in the sorted bibliography getFst . map (sort' . (filter ((/=) 0 . snd)) . (map getP)) . -- group clashing cites getFst . filter (\grp -> length grp >= 2) . map nub . groupBy (\a b -> snd a == snd b) . sort' . filter (not . null . snd) where sort' :: (Ord a, Ord b) => [(a,b)] -> [(a,b)] sort' = sortBy (comparing snd) getFst = map $ map fst getP k = case findIndex ((==) k . unLiteral . refId) refs of Just x -> (k, x + 1) _ -> (k, 0) suffs = letters ++ [x ++ y | x <- letters, y <- letters ] letters = map (:[]) ['a'..'z'] setYearSuffCollision :: Bool -> [CiteData] -> [Output] -> [Output] setYearSuffCollision b cs = proc (setYS cs) . (map $ \x -> if hasYearSuf x then x else addYearSuffix x) where setYS c o | OYearSuf _ k _ f <- o = OYearSuf [] k (getCollision k c) f | otherwise = o collide = if b then disambed else disambYS getCollision k c = case find ((==) k . key) c of Just x -> if collide x == [] then [OStr (citYear x) emptyFormatting] else collide x _ -> [] updateYearSuffixes :: [(String, String)] -> Output -> Output updateYearSuffixes yss o | OYearSuf _ k c f <- o = case lookup k yss of Just x -> OYearSuf x k c f _ -> ONull | otherwise = o getYearSuffixes :: CitationGroup -> [(String,[Output])] getYearSuffixes (CG _ _ _ d) = map go d where go (c,x) = (citeId c, relevant False [x]) relevant :: Bool -> [Output] -> [Output] -- bool is true if has contrib -- we're only interested in OContrib and OYear, -- unless there is no OContrib relevant c (Output xs _ : rest) = relevant c xs ++ relevant c rest relevant c (OYear n _ _ : rest) = OStr n emptyFormatting : relevant c rest relevant c (ODate xs : rest) = relevant c xs ++ relevant c rest relevant False (OStr s _ : rest) = OStr s emptyFormatting : relevant False rest relevant False (OSpace : rest) = OSpace : relevant False rest relevant False (OPan ils : rest) = OPan ils : relevant False rest relevant _ (OContrib _ _ v _ _ : rest ) = relevant False v ++ relevant True rest relevant c (OName _ v _ _ : rest ) = relevant c v ++ relevant c rest relevant c (_ : rest) = relevant c rest relevant _ [] = [] rmYearSuff :: [CitationGroup] -> [CitationGroup] rmYearSuff = proc rmYS where rmYS o | OYearSuf _ _ _ _ <- o = ONull | otherwise = o -- List Utilities -- | Try to disambiguate a list of lists by returning the first non -- colliding element, if any, of each list: -- -- > disambiguate [[1,2],[1,3],[2]] = [[2],[3],[2]] disambiguate :: (Eq a) => [[a]] -> [[a]] disambiguate [] = [] disambiguate l = if hasMult l && not (allTheSame l) && hasDuplicates heads then disambiguate (rest l) else heads where heads = map (take 1) l rest = map (\(b,x) -> if b then tail_ x else take 1 x) . zip (same heads) hasMult [] = False hasMult (x:xs) = length x > 1 || hasMult xs tail_ [x] = [x] tail_ x = if null x then x else tail x -- | For each element a list of 'Bool': 'True' if the element has a -- duplicate in the list: -- -- > same [1,2,1] = [True,False,True] same :: Eq a => [a] -> [Bool] same l = map (`elem` dupl) l where dupl = catMaybes . snd . macc [] $ l macc = mapAccumL $ \a x -> if x `elem` a then (a,Just x) else (x:a,Nothing) hasDuplicates :: Eq a => [a] -> Bool hasDuplicates = or . same allTheSame :: Eq a => [a] -> Bool allTheSame [] = True allTheSame (x:xs) = all (== x) xs -- | Add the year suffix to the year. Needed for disambiguation. addYearSuffix :: Output -> Output addYearSuffix o | OYear y k f <- o = Output [OYear y k emptyFormatting, OYearSuf [] k [] emptyFormatting] f | ODate (x:xs) <- o = if any hasYear xs then Output (x : [addYearSuffix $ ODate xs]) emptyFormatting else addYearSuffix (Output (x:xs) emptyFormatting) | Output (x:xs) f <- o = if any hasYearSuf (x : xs) then Output (x : xs) f else if hasYear x then Output (addYearSuffix x : xs) f else Output (x : [addYearSuffix $ Output xs emptyFormatting]) f | otherwise = o hasYear :: Output -> Bool hasYear = not . null . query getYear where getYear o | OYear _ _ _ <- o = [o] | otherwise = [] hasYearSuf :: Output -> Bool hasYearSuf = not . null . query getYearSuf where getYearSuf :: Output -> [String] getYearSuf o | OYearSuf _ _ _ _ <- o = ["a"] | otherwise = [] -- | Removes all given names and name hashes from OName elements. rmHashAndGivenNames :: Output -> Output rmHashAndGivenNames (OName _ s _ f) = OName emptyAgent s [] f rmHashAndGivenNames o = o rmGivenNames :: Output -> Output rmGivenNames (OName a s _ f) = OName a s [] f rmGivenNames o = o -- | Add, with 'proc', a give name to the family name. Needed for -- disambiguation. addGivenNames :: [Output] -> [Output] addGivenNames = addGN True where addGN _ [] = [] addGN b (o:os) | OName i _ xs f <- o , xs /= [] = if b then OName i (head xs) (tail xs) f : addGN False os else o:os | otherwise = o : addGN b os -- | Map the evaluated output of a citation group. mapGroupOutput :: (Output -> [a]) -> CitationGroup -> [a] mapGroupOutput f (CG _ _ _ os) = concatMap f $ map snd os