-- |
--
-- TODO need to check performance of 'drop' vs 'unsafeDrop'
--
-- TODO use 'Word' instead of 'Int' -- but check performance due to all
-- those conversions

module Data.SuffixStructure.NaiveArray where

import           Data.ByteString (ByteString(..))
import           Data.Int
import           Data.IntMap.Strict (IntMap(..))
import           Data.ListLike (ListLike)
import           Data.Vector.Unboxed (Vector(..))
import           Data.Word
import qualified Data.ByteString as B
import qualified Data.IntMap.Strict as IM
import qualified Data.ListLike as LL
import qualified Data.Vector.Algorithms.AmericanFlag as AA
import qualified Data.Vector.Algorithms.Intro as AI
import qualified Data.Vector.Unboxed as VU

import           Data.SuffixStructure.ESA



-- | Create Suffix Array via Introsort

genSA :: (ListLike ll a, Ord ll, Eq a) => ll -> SA
genSA ll = SA sa lcp lcpLong where
  sa      = VU.modify (AI.sortBy srt) $ VU.enumFromN 0 (LL.length ll)
  (lcp,lcpLong) = buildLCP ll sa
  srt i j = LL.drop i ll `compare` LL.drop j ll
{-# INLINE genSA #-}

-- | Create Suffix Array via American Flag sort

genSAaf :: (ListLike ll a, Ord ll, AA.Lexicographic ll, Eq a) => ll -> SA
genSAaf ll = SA sa lcp lcpLong where
  sa       = VU.modify (AA.sortBy srt strp bckt rdx) $ VU.enumFromN 0 (LL.length ll)
  (lcp,lcpLong) = buildLCP ll sa
  srt i j  = LL.drop i ll `compare` LL.drop j ll
  strp _ i = i >= LL.length ll
  bckt     = AA.size ll
  rdx i _  = AA.index i ll
{-# INLINE genSAaf #-}

-- | Build LCP array

buildLCP :: (ListLike ll a, Eq a) => ll -> VU.Vector Int -> (VU.Vector Int8, IM.IntMap Int)
buildLCP inp sa = (lcp,lcpLong) where
  lcp = (-1) `VU.cons` VU.zipWith golcp sa (VU.tail sa)
  lcpLong = VU.foldl' golcpLong IM.empty $ VU.zip4 (VU.enumFromN 1 $ VU.length sa) sa (VU.tail sa) (VU.tail lcp)
  golcp p k = let cpl = commonPrefixLength (LL.drop p inp) (LL.drop k inp)
              in  if   cpl <= 127
                  then fromIntegral cpl
                  else (-2)
  golcpLong im (k,s,t,l)
    | l== -2    = IM.insert k (commonPrefixLength (LL.drop s inp) (LL.drop t inp)) im
    | otherwise = im
{-# INLINE buildLCP #-}

-- | Return the shared prefix of two strings.

commonPrefix :: (ListLike ll a, Eq a) => ll -> ll -> ll
commonPrefix xs ys = LL.take k xs where
  k = commonPrefixLength xs ys
{-# INLINE commonPrefixLength #-}

-- | Return the length of the common prefix.

commonPrefixLength :: (ListLike ll a, Eq a) => ll -> ll -> Int
commonPrefixLength = go 0 where
  go !k !x !y
    | LL.null x || LL.null y = k
    | LL.head x == LL.head y = go (k+1) (LL.tail x) (LL.tail y)
    | otherwise              = k
{-# INLINE commonPrefix #-}