module Cluster
    ( getClusterIdentity
    , groupBy'
    ) where
import qualified Data.Map.Strict as Map
import qualified Data.Sequence as Seq
import qualified Data.Foldable as F
import qualified Data.Text as T
import Data.Fasta.Text
import Types
negHamming :: T.Text -> T.Text -> Double
negHamming x = sum . map (\(x, y) -> if x == y then 1 else 0) . T.zip x
getClusterIdentity :: Identity
                -> Seq.Seq FastaSequence
                -> ClusterMap
getClusterIdentity identity = ClusterMap
                            . Map.fromList
                            . zip [1..]
                            . clusterIdentityGo identity
clusterIdentityGo :: Identity
                  -> Seq.Seq FastaSequence
                  -> [Seq.Seq FastaSequence]
clusterIdentityGo identity = F.toList
                           . groupBy' (compareSeqs identity)
groupBy' :: (a -> a -> Bool) -> Seq.Seq a -> Seq.Seq (Seq.Seq a)
groupBy' _ (Seq.null -> True) = Seq.empty
groupBy' f (Seq.viewl -> x Seq.:< xs) = eqX Seq.<| groupBy' f neqX
  where
    (!eqX, !neqX) = eqTo f Seq.empty (Seq.singleton x) xs
eqTo :: (a -> a -> Bool)
     -> Seq.Seq a
     -> Seq.Seq a
     -> Seq.Seq a
     -> (Seq.Seq a, Seq.Seq a)
eqTo _ acc (Seq.null -> True) zs = (acc, zs)
eqTo f acc (Seq.viewl -> x Seq.:< xs) zs =
    eqTo f (x Seq.<| acc) (eqX Seq.>< xs) neqX
  where
    (!eqX, !neqX) = Seq.partition (f x) zs
compareSeqs :: Identity -> FastaSequence -> FastaSequence -> Bool
compareSeqs identity x y = getIdentity x y >= identity
getIdentity :: FastaSequence -> FastaSequence -> Identity
getIdentity xs ys = Identity
                  . (* 100)
                  . (/ (fromIntegral . T.length . fastaSeq $ xs))
                  . negHamming (fastaSeq xs)
                  $ (fastaSeq ys)