-- | -- Module : String.Languages.UniquenessPeriods.Vector -- Copyright : (c) OleksandrZhabenko 2020 -- License : MIT -- Stability : Experimental -- Maintainer : olexandr543@yahoo.com -- -- Generalization of the uniqueness-periods and uniqueness-periods-general -- packages functionality. -- {-# LANGUAGE BangPatterns, FlexibleInstances, MultiParamTypeClasses #-} module String.Languages.UniquenessPeriods.Vector where import qualified Data.Vector as V data UniquenessGeneral1 a b = UG1 a [b] (V.Vector b) | UG2 a [b] (V.Vector b) | UG3 [b] (V.Vector b) deriving Eq class UniquenessGeneral a b where get :: a -> b type UniquenessGeneral2 a = V.Vector ([Int], a) instance (Eq a) => UniquenessGeneral (UniquenessGeneral1 Bool a) (UniquenessGeneral2 a) where get (UG1 y whspss v) = uniquenessPeriodsVector1 y whspss v get (UG2 y whspss v) = uniquenessPeriodsVector2 y whspss v get (UG3 whspss v) = uniquenessPeriodsVector3 whspss v -- | List of 'Int' in the result is a list of indexes for the occurrences of the value of the @a@ (usually, @a@ is a sound representation or its duration). -- The first 'Bool' argument defines whether to apply the filtering for not informative (possibly) \"whitespace symbols\" given as the -- second argument list. The resulting 'V.Vector' is sorted in the order of the first occurrence of each of the @a@ (usually, @a@ is the sound -- representation, or its duration, or some other its characteristics) in the given third argument. uniquenessPeriodsVector1 :: Eq a => Bool -> [a] -> V.Vector a -> UniquenessGeneral2 a uniquenessPeriodsVector1 y whspss v | V.null v = V.empty | otherwise = let !v1 = V.indexed v in let f !x = if V.null x then Nothing else Just . (\(v2,v3) -> ((V.toList . V.map fst $ v2,snd . V.unsafeIndex v2 $ 0),v3)) . V.partition (\(_,xs) -> xs == (snd . V.unsafeIndex x $ 0)) $ x in (if y then V.filter (\(_,!zs) -> zs `notElem` whspss) else id) . V.unfoldr f $ v1 -- | List of 'Int' in the result is a list of distances between the consequential occurrences of the @a@ (usually, @a@ is a sound representation or its duration) -- in the given 'V.Vector'. The first 'Bool' argument defines whether to apply the filtering for not informative -- (possibly) \"whitespace symbols\" given as the second argument list. The resulting 'V.Vector' is sorted in the order of the first occurrence of each of -- the @a@ (usually, @a@ is the sound representation or its duration, or some other its characteristics) in the given third argument. uniquenessPeriodsVector2 :: Eq a => Bool -> [a] -> V.Vector a -> UniquenessGeneral2 a uniquenessPeriodsVector2 y whspss v | V.null v = V.empty | otherwise = let !v1 = V.indexed v in let f !x = if V.null x then Nothing else Just . (\(v2,v3) -> ((V.toList . (\v4 -> V.zipWith subtract v4 (V.unsafeSlice 1 (V.length v4 -1) v4)) . V.map fst $ v2,snd . V.unsafeIndex v2 $ 0),v3)) . V.partition (\(_,xs) -> xs == (snd . V.unsafeIndex x $ 0)) $ x in (if y then V.filter (\(ys,!zs) -> not (null ys) && zs `notElem` whspss) else id) . V.unfoldr f $ v1 -- | List of 'Int' in the result is a list of distances between the consequential occurrences of the @a@ (usually, @a@ is a sound representation or its duration) -- in the given 'V.Vector'. But unlikely the 'uniquenessPeriodsVector2' function it finds out only the distances for the repeated not \"whitespece symbols\" -- occurring in different sublists separated with these \"whitespace symbols\". Therefore, it is much more perceptable for the words order than the former one. -- The resulting 'V.Vector' is sorted in the order of the first occurrence of each of -- the @a@ (usually, @a@ is the sound representation or its duration, or some other its characteristics) in the given second argument. uniquenessPeriodsVector3 :: Eq a => [a] -> V.Vector a -> UniquenessGeneral2 a uniquenessPeriodsVector3 whspss v | V.null v = V.empty | otherwise = let !v1 = V.indexed v in let !vs = V.toList . V.findIndices (`elem` whspss) $ v in let f !x = if V.null x then Nothing else Just . (\vws (v2,v3) -> ((helpUPV3 vws . V.toList . V.map fst $ v2,snd . V.unsafeIndex v2 $ 0),v3)) vs . V.partition (\(_,xs) -> xs == (snd . V.unsafeIndex x $ 0)) $ x in V.filter (\(ys,!zs) -> not (null ys) && (zs `notElem` whspss)) . V.unfoldr f $ v1 -- | Is used inside the 'uniquenessPeriodsVector3'. Both list arguments of non-negative numbers (if not empty) must be sorted in the ascending order. helpUPV3 :: [Int] -> [Int] -> [Int] helpUPV3 (z:zs) (x:y:xs) | compare ((x - z) * (y - z)) 0 == LT = (y - x):helpUPV3 zs (y:xs) | compare y z == GT = helpUPV3 zs (x:y:xs) | otherwise = helpUPV3 (z:zs) (y:xs) helpUPV3 _ _ = []