module Hoogle.DataBase.SubstrSearch
    (SubstrSearch, createSubstrSearch
    ,searchSubstrSearch
    ,completionsSubstrSearch
    ) where

import Data.Binary.Defer
import qualified Data.ByteString as BS
import qualified Data.ByteString.Unsafe as BS
import qualified Data.ByteString.Char8 as BSC
import qualified Data.ByteString.Lazy as LBS
import qualified Data.Binary as Bin
import qualified Data.Binary.Get as Bin
import qualified Data.Binary.Put as Bin
import qualified Data.Set as Set
import General.Base
import Hoogle.Type.All
import Hoogle.Score.All


-- idea for speed improvement
-- store as one long bytestring with \0 between the words, then do findSubstrings to find the indexes
-- store the lengths in a separate bytestring then use index to step through them, retrieving the data as Word8 via foldl
-- store the links in another bytestring with the lengths, but only unpack them when they are needed
-- can even make length==0 code for it's the same string as before, to compress it and reduce searching
-- was previously ~ 0.047 seconds

{-
Description:

Data is stored flattened. For default we expect ~200Kb of disk usage.
-}

-- keys are sorted after being made lower case
data SubstrSearch a = SubstrSearch
    {text :: BS.ByteString -- all the bytestrings, in preference order
    ,lens :: BS.ByteString -- a list of lengths
    ,inds :: Int -> a -- a way of retrieving each index
    }


-- | Create a substring search index. Values are returned in order where possible.
createSubstrSearch :: [(String,a)] -> SubstrSearch a
createSubstrSearch xs = SubstrSearch
    (BSC.pack $ concat ts2)
    (BS.pack $ map fromIntegral ls2)
    (is !!)
    where
        (ts,is) = unzip $ map (first $ map toLower) xs
        (ts2,ls2) = f "" ts

        f x (y:ys) = first (y:) $ second (length y:) $ f y ys
        f x [] = ([],[])


data S a = S
    {sCount :: !Int -- which one are we on
    ,sFocus :: !BS.ByteString -- where we are in the string
    ,sPrefix :: ![(a,EntryView,Score)] -- the prefixes
    ,sInfix :: ![(a,EntryView,Score)] -- the infixes
    }


searchSubstrSearch :: SubstrSearch a -> String -> [(a, EntryView, Score)]
searchSubstrSearch x y = reverse (sPrefix sN) ++ reverse (sInfix sN)
    where
        view = FocusOn y
        match = bsMatch (BSC.pack $ map toLower y)
        sN = BS.foldl f s0 $ lens x
        s0 = S 0 (text x) [] []

        f s ii = addCount $ moveFocus i $ maybe id addMatch t s
            where t = match i $ BS.unsafeTake i $ sFocus s
                  i = fromIntegral ii

        addCount s = s{sCount=sCount s+1}
        moveFocus i s = s{sFocus=BS.unsafeDrop i $ sFocus s}
        addMatch MatchSubstr s = s{sInfix =(inds x $ sCount s,view,textScore MatchSubstr):sInfix s}
        addMatch t s = s{sPrefix=(inds x $ sCount s,view,textScore t):sPrefix s}


data S2 = S2
    {_s2Focus :: !BS.ByteString -- where we are in the string
    ,s2Result :: Set.Set BS.ByteString
    }

completionsSubstrSearch :: SubstrSearch a -> String -> [String]
completionsSubstrSearch x y = map (\x -> y ++ drop ny (BSC.unpack x)) $ take 10 $ Set.toAscList $
                              s2Result $ BS.foldl f (S2 (text x) Set.empty) $ lens x
    where
        ny = length y
        ly = BSC.pack $ map toLower y
        f (S2 foc res) ii = S2 (BS.unsafeDrop i foc) (if ly `BS.isPrefixOf` x then Set.insert x res else res)
            where x = BS.unsafeTake i foc
                  i = fromIntegral ii


instance Show a => Show (SubstrSearch a) where
    show x = "SubstrSearch"

instance (Bin.Binary a, BinaryDeferGet a, FixedBinary a) => BinaryDefer (SubstrSearch a) where
    put x = putDefer $ putLazyByteString $ Bin.runPut $ putBinary Bin.put x

    get = do
        g <- binaryDeferGet
        x <- getDefer getLazyByteString
        return $ Bin.runGet (getBinary (fixedSize $ tyUnGet g) g) x
        where
            tyUnGet :: Bin.Get a -> a
            tyUnGet = undefined


putBinary :: (a -> Bin.Put) -> SubstrSearch a -> Bin.Put
putBinary p x = do
    Bin.put $ text x
    Bin.put $ lens x
    Bin.put $ fromLBS $ Bin.runPut $ mapM_ (p . inds x) [0.. fromIntegral (BS.length $ lens x) - 1]


getBinary :: Int -> Bin.Get a -> Bin.Get (SubstrSearch a)
getBinary size g = do
    text <- Bin.get
    lens <- Bin.get
    indsData <- Bin.get
    let inds i = Bin.runGet g $ toLBS $ BS.take size $ BS.drop (i * size) indsData
    return $ SubstrSearch text lens inds


fromLBS = BS.concat . LBS.toChunks
toLBS = LBS.fromChunks . return


-- if first word is empty, always return Exact/Prefix
-- if first word is a single letter, do elemIndex
-- if first word is multiple, do isPrefixOf's but only up until n from the end
-- partially apply on the first word
bsMatch :: BS.ByteString -> Int -> BS.ByteString -> Maybe TextMatch
bsMatch x
    | nx == 0 = \ny _ -> Just $ if ny == 0 then MatchExact else MatchPrefix
    | nx == 1 = let c = BS.head x in \ny y -> case BS.elemIndex c y of
        Nothing -> Nothing
        Just 0 -> Just $ if ny == 1 then MatchExact else MatchPrefix
        Just _ -> Just MatchSubstr
    | otherwise = \ny y -> if BS.isPrefixOf x y then Just (if nx == nx then MatchExact else MatchPrefix)
                           else if BS.isInfixOf x y then Just MatchSubstr else Nothing
    where nx = BS.length x