-----------------------------------------------------------------------------
-- |
-- Module      :  Data.List.UniqueUnsorted
-- Copyright   :  (c) Volodymyr Yashchenko
-- License     :  BSD3
--
-- Maintainer  :  ualinuxcn@gmail.com
-- Stability   :  Unstable
-- Portability :  portable
--
-- Library provides functions to find unique and duplicate elements in the list.
-- Unlike Unique or UniqueStrict modules this one uses Data.HashMap.Strict for calculation.
--
-- The elements in the list can be unsorted (do not have an instance of Ord class, but Hashable is needed).
-- This implementation is good for ByteStrings.

module Data.List.UniqueUnsorted
        ( isUnique
        , isRepeated
        , removeDuplicates
        , repeated
        , repeatedBy
        , unique
        , allUnique
        , count
        , count_
        , occurrences
        )
        where

import           Data.Hashable
import qualified Data.HashMap.Strict as HS (HashMap, filter, fromListWith, keys,
                                            toList, lookup, map, foldr)

import qualified Data.HashSet        as DHS (toList, fromList)

import qualified Data.IntMap.Strict  as IM (fromAscListWith, fromListWith, toAscList)


countMap :: (Hashable a, Eq a) => [a] -> HS.HashMap a Int
countMap :: [a] -> HashMap a Int
countMap = (Int -> Int -> Int) -> [(a, Int)] -> HashMap a Int
forall k v.
(Eq k, Hashable k) =>
(v -> v -> v) -> [(k, v)] -> HashMap k v
HS.fromListWith Int -> Int -> Int
forall a. Num a => a -> a -> a
(+) ([(a, Int)] -> HashMap a Int)
-> ([a] -> [(a, Int)]) -> [a] -> HashMap a Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([a] -> [Int] -> [(a, Int)]) -> [Int] -> [a] -> [(a, Int)]
forall a b c. (a -> b -> c) -> b -> a -> c
flip [a] -> [Int] -> [(a, Int)]
forall a b. [a] -> [b] -> [(a, b)]
zip (Int -> [Int]
forall a. a -> [a]
repeat Int
1)

-- | 'isUnique' function is to check whether the given element is unique in the list or not.
--
-- It returns Nothing when the element does not present in the list. Examples:
--
-- > isUnique 'f' "foo bar" == Just True
-- > isUnique 'o' "foo bar" == Just False
-- > isUnique '!' "foo bar" == Nothing
--
-- Since 0.4.7.2
--

isUnique :: (Hashable a, Eq a) => a -> [a] -> Maybe Bool
isUnique :: a -> [a] -> Maybe Bool
isUnique a
x = (Int -> Bool) -> Maybe Int -> Maybe Bool
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
1) (Maybe Int -> Maybe Bool)
-> ([a] -> Maybe Int) -> [a] -> Maybe Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> HashMap a Int -> Maybe Int
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
HS.lookup a
x (HashMap a Int -> Maybe Int)
-> ([a] -> HashMap a Int) -> [a] -> Maybe Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [a] -> HashMap a Int
forall a. (Hashable a, Eq a) => [a] -> HashMap a Int
countMap

-- | 'isRepeated' is a reverse function to 'isUnique'
--
-- Since 0.4.7.2

isRepeated :: (Hashable a, Eq a) => a -> [a] -> Maybe Bool
isRepeated :: a -> [a] -> Maybe Bool
isRepeated a
x = (Bool -> Bool) -> Maybe Bool -> Maybe Bool
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Bool -> Bool
not (Maybe Bool -> Maybe Bool)
-> ([a] -> Maybe Bool) -> [a] -> Maybe Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> [a] -> Maybe Bool
forall a. (Hashable a, Eq a) => a -> [a] -> Maybe Bool
isUnique a
x

-- | 'removeDuplicates' removes the duplicates of elements. Example:
--
-- > removeDuplicates "foo bar" == " abrfo"

removeDuplicates :: (Hashable a, Eq a) => [a] -> [a]
removeDuplicates :: [a] -> [a]
removeDuplicates = HashSet a -> [a]
forall a. HashSet a -> [a]
DHS.toList (HashSet a -> [a]) -> ([a] -> HashSet a) -> [a] -> [a]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [a] -> HashSet a
forall a. (Eq a, Hashable a) => [a] -> HashSet a
DHS.fromList

-- | The 'repeatedBy' function behaves just like 'repeated', except it uses a user-supplied equality predicate.
--
-- > repeatedBy (>2) "This is the test line" == " stei"

repeatedBy :: (Hashable a, Eq a) => (Int -> Bool) -> [a] -> [a]
repeatedBy :: (Int -> Bool) -> [a] -> [a]
repeatedBy Int -> Bool
p = HashMap a Int -> [a]
forall k v. HashMap k v -> [k]
HS.keys (HashMap a Int -> [a]) -> ([a] -> HashMap a Int) -> [a] -> [a]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Int -> Bool) -> HashMap a Int -> HashMap a Int
forall v k. (v -> Bool) -> HashMap k v -> HashMap k v
HS.filter Int -> Bool
p (HashMap a Int -> HashMap a Int)
-> ([a] -> HashMap a Int) -> [a] -> HashMap a Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [a] -> HashMap a Int
forall a. (Hashable a, Eq a) => [a] -> HashMap a Int
countMap

-- | 'repeated' finds only the elements that are present more than once in the list. Example:
--
-- >  repeated  "foo bar" == "o"

repeated :: (Hashable a, Eq a) => [a] -> [a]
repeated :: [a] -> [a]
repeated = (Int -> Bool) -> [a] -> [a]
forall a. (Hashable a, Eq a) => (Int -> Bool) -> [a] -> [a]
repeatedBy (Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>Int
1)

-- | 'unique' gets only unique elements, that do not have duplicates.
--
-- > unique  "foo bar" == " abrf"

unique :: (Hashable a, Eq a) => [a] -> [a]
unique :: [a] -> [a]
unique = (Int -> Bool) -> [a] -> [a]
forall a. (Hashable a, Eq a) => (Int -> Bool) -> [a] -> [a]
repeatedBy (Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
==Int
1)


-- | 'allUnique' checks whether all elements of the list are unique
--
-- > allUnique "foo bar" == False
-- > allUnique ['a'..'z'] == True
-- > allUnique [] == True (!)
-- Since 0.4.7.2

allUnique :: (Hashable a, Eq a) => [a] -> Bool
allUnique :: [a] -> Bool
allUnique = (Bool -> Bool -> Bool) -> Bool -> HashMap a Bool -> Bool
forall v a k. (v -> a -> a) -> a -> HashMap k v -> a
HS.foldr Bool -> Bool -> Bool
(&&) Bool
True (HashMap a Bool -> Bool) -> ([a] -> HashMap a Bool) -> [a] -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Int -> Bool) -> HashMap a Int -> HashMap a Bool
forall v1 v2 k. (v1 -> v2) -> HashMap k v1 -> HashMap k v2
HS.map (Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
==Int
1) (HashMap a Int -> HashMap a Bool)
-> ([a] -> HashMap a Int) -> [a] -> HashMap a Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [a] -> HashMap a Int
forall a. (Hashable a, Eq a) => [a] -> HashMap a Int
countMap

-- | 'count' of each element in the list. Example:
--
-- > count "This is the test line" == [(' ',4),('s',3),('T',1),('t',3),('e',3),('h',2),('i',3),('l',1),('n',1)]

count :: (Hashable a, Eq a) => [a] -> [(a, Int)]
count :: [a] -> [(a, Int)]
count = HashMap a Int -> [(a, Int)]
forall k v. HashMap k v -> [(k, v)]
HS.toList (HashMap a Int -> [(a, Int)])
-> ([a] -> HashMap a Int) -> [a] -> [(a, Int)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [a] -> HashMap a Int
forall a. (Hashable a, Eq a) => [a] -> HashMap a Int
countMap

-- | 'count_' of each elements in the list, it sorts by their number. Example:
--
-- >  count_ "This is the test line" == [('n',1),('l',1),('T',1),('h',2),('i',3),('e',3),('t',3),('s',3),(' ',4)]

count_ :: (Hashable a, Eq a) => [a] -> [(a, Int)]
count_ :: [a] -> [(a, Int)]
count_ = IntMap [a] -> [(a, Int)]
forall a. IntMap [a] -> [(a, Int)]
fromIntMap (IntMap [a] -> [(a, Int)])
-> ([a] -> IntMap [a]) -> [a] -> [(a, Int)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [(a, Int)] -> IntMap [a]
forall a. [(a, Int)] -> IntMap [a]
toIntMap ([(a, Int)] -> IntMap [a])
-> ([a] -> [(a, Int)]) -> [a] -> IntMap [a]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. HashMap a Int -> [(a, Int)]
forall k v. HashMap k v -> [(k, v)]
HS.toList (HashMap a Int -> [(a, Int)])
-> ([a] -> HashMap a Int) -> [a] -> [(a, Int)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [a] -> HashMap a Int
forall a. (Hashable a, Eq a) => [a] -> HashMap a Int
countMap
  where toIntMap :: [(a, Int)] -> IntMap [a]
toIntMap   = ([a] -> [a] -> [a]) -> [(Int, [a])] -> IntMap [a]
forall a. (a -> a -> a) -> [(Int, a)] -> IntMap a
IM.fromListWith [a] -> [a] -> [a]
forall a. [a] -> [a] -> [a]
(++) ([(Int, [a])] -> IntMap [a])
-> ([(a, Int)] -> [(Int, [a])]) -> [(a, Int)] -> IntMap [a]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((a, Int) -> (Int, [a])) -> [(a, Int)] -> [(Int, [a])]
forall a b. (a -> b) -> [a] -> [b]
map (\(a
x,Int
y) -> (Int
y,[a
x]))
        fromIntMap :: IntMap [a] -> [(a, Int)]
fromIntMap = ((Int, [a]) -> [(a, Int)]) -> [(Int, [a])] -> [(a, Int)]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap (\(Int
x,[a]
y) -> [a] -> [Int] -> [(a, Int)]
forall a b. [a] -> [b] -> [(a, b)]
zip [a]
y ([Int] -> [(a, Int)]) -> [Int] -> [(a, Int)]
forall a b. (a -> b) -> a -> b
$ Int -> [Int]
forall a. a -> [a]
repeat Int
x) ([(Int, [a])] -> [(a, Int)])
-> (IntMap [a] -> [(Int, [a])]) -> IntMap [a] -> [(a, Int)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. IntMap [a] -> [(Int, [a])]
forall a. IntMap a -> [(Int, a)]
IM.toAscList

-- | 'occurrences' like 'count' or 'count_' but shows the list of elements that occur X times
--
-- > occurrences "This is the test line" == [(1,"Tln"),(2,"h"),(3,"eist"),(4," ")]
-- Since 0.4.7.5
--

occurrences :: (Hashable a, Eq a) => [a] -> [(Int, [a])]
occurrences :: [a] -> [(Int, [a])]
occurrences = IntMap [a] -> [(Int, [a])]
forall a. IntMap a -> [(Int, a)]
IM.toAscList (IntMap [a] -> [(Int, [a])])
-> ([a] -> IntMap [a]) -> [a] -> [(Int, [a])]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([a] -> [a] -> [a]) -> [(Int, [a])] -> IntMap [a]
forall a. (a -> a -> a) -> [(Int, a)] -> IntMap a
IM.fromAscListWith [a] -> [a] -> [a]
forall a. [a] -> [a] -> [a]
(++) ([(Int, [a])] -> IntMap [a])
-> ([a] -> [(Int, [a])]) -> [a] -> IntMap [a]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((a, Int) -> (Int, [a])) -> [(a, Int)] -> [(Int, [a])]
forall a b. (a -> b) -> [a] -> [b]
map (\(a
k , Int
x) -> (Int
x, [a
k]) ) ([(a, Int)] -> [(Int, [a])])
-> ([a] -> [(a, Int)]) -> [a] -> [(Int, [a])]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [a] -> [(a, Int)]
forall a. (Hashable a, Eq a) => [a] -> [(a, Int)]
count_