{-# LANGUAGE ViewPatterns, TupleSections, RecordWildCards, ScopedTypeVariables #-} {-# LANGUAGE DeriveDataTypeable, GeneralizedNewtypeDeriving, BangPatterns, GADTs #-} module Output.Types(writeTypes, searchTypes, searchTypesDebug) where {- Approach: Each signature is stored, along with a fingerprint A quick search finds the most promising 100 fingerprints A slow search ranks the 100 items, excluding some -} import qualified Data.Map.Strict as Map import qualified Data.ByteString.Char8 as BS import qualified Data.Vector.Storable as V import qualified Data.Vector.Storable.Mutable as VM import Data.Binary import Data.Maybe import Data.List.Extra import Data.Tuple.Extra import Data.Generics.Uniplate.Data import Data.Data import System.FilePath import System.IO.Extra import Control.Monad.Extra import Foreign.Storable import Control.Applicative import Numeric.Extra import Prelude import Input.Item import General.Store import General.IString import General.Util writeTypes :: StoreWrite -> Maybe FilePath -> [(Maybe TargetId, Item)] -> IO () writeTypes store debug xs = do let debugger ext body = whenJust debug $ \file -> writeFileUTF8 (file <.> ext) body inst <- return $ Map.fromListWith (+) [(fromIString x,1) | (_, IInstance (Sig _ [TCon x _])) <- xs] xs <- writeDuplicates store [(i, fromIString <$> t) | (Just i, ISignature t) <- xs] names <- writeNames store debugger inst xs xs <- return $ map (lookupNames names (error "Unknown name in writeTypes")) xs writeFingerprints store xs writeSignatures store xs searchTypes :: StoreRead -> Sig String -> [TargetId] searchTypes store q = concatMap (expandDuplicates $ readDuplicates store) $ searchFingerprints store names 100 $ lookupNames names name0 q -- map unknown fields to name0, i.e. _ where names = readNames store searchTypesDebug :: StoreRead -> (String, Sig String) -> [(String, Sig String)] -> [String] searchTypesDebug store query answers = intercalate [""] $ f False "Query" query : zipWith (\i -> f True ("Answer " ++ show i)) [1..] answers where qsig = lookupNames names name0 $ snd query names = readNames store f match name (raw, sig) = [name ++ ": " ++ raw ,"Sig String: " ++ prettySig sig ,"Sig Name: " ++ prettySig (fmap prettyName sn) ,"Fingerprint: " ++ prettyFingerprint fp] ++ if not match then [] else ["Cost: " ++ maybe "X, no match" show (matchFingerprint qsig fp) ,"Explain: " ++ showExplain (matchFingerprintDebug qsig fp)] where sn = lookupNames names name0 sig fp = toFingerprint sn showExplain = intercalate ", " . map g . sortOn (either (const minBound) (negate . snd)) g (Left s) = "X " ++ s g (Right (s, x)) = show x ++ " " ++ s --------------------------------------------------------------------- -- NAME/CTOR INFORMATION data TypesNames a where TypesNames :: TypesNames (BS.ByteString, V.Vector Name) deriving Typeable -- Must be a unique Name per String. -- First 0-99 are variables, rest are constructors. -- More popular type constructors have higher numbers. -- There are currently about 14K names, so about 25% of the bit patterns are taken newtype Name = Name Word16 deriving (Eq,Ord,Show,Data,Typeable,Storable,Binary) name0 = Name 0 -- use to represent _ isCon, isVar :: Name -> Bool isVar (Name x) = x < 100 isCon = not . isVar prettyName :: Name -> String prettyName x@(Name i) | x == name0 = "_" | isVar x = "v" ++ show i | otherwise = "C" ++ show i -- | Give a name a popularity, where 0 is least popular, 1 is most popular popularityName :: Name -> Double popularityName (Name n) | isVar $ Name n = error "Can't call popularityName on a Var" | otherwise = fromIntegral (n - 100) / fromIntegral (maxBound - 100 :: Word16) newtype Names = Names {lookupName :: String -> Maybe Name} lookupNames :: Names -> Name -> Sig String -> Sig Name lookupNames Names{..} def (Sig ctx typ) = Sig (map f ctx) (map g typ) where vars = nubOrd $ "_" : [x | Ctx _ x <- ctx] ++ [x | TVar x _ <- universeBi typ] var x = Name $ min 99 $ fromIntegral $ fromMaybe (error "lookupNames") $ elemIndex x vars con = fromMaybe def . lookupName f (Ctx a b) = Ctx (con $ '~':a) (var b) g (TCon x xs) = TCon (con x) $ map g xs g (TVar x xs) = TVar (var x) $ map g xs writeNames :: StoreWrite -> (String -> String -> IO ()) -> Map.Map String Int -> [Sig String] -> IO Names writeNames store debug inst xs = do let sigNames (Sig ctx typ) = nubOrd ['~':x | Ctx x _ <- ctx] ++ nubOrd [x | TCon x _ <- universeBi typ] -- want to rank highly instances that have a lot of types, and a lot of definitions -- eg Eq is used and defined a lot. Constructor is used in 3 places but defined a lot. let freq :: Map.Map String Int = -- how many times each identifier occurs Map.unionWith (\typ sig -> sig + min sig typ) (Map.mapKeysMonotonic ('~':) inst) $ Map.fromListWith (+) $ map (,1::Int) $ concatMap sigNames xs let names = spreadNames $ Map.toList freq debug "names" $ unlines [s ++ " = " ++ show n ++ " (" ++ show (freq Map.! s) ++ " uses)" | (s,n) <- names] names <- return $ sortOn fst names storeWrite store TypesNames (BS.pack $ intercalate "\0" $ map fst names, V.fromList $ map snd names) let mp2 = Map.fromAscList names return $ Names $ \x -> Map.lookup x mp2 -- | Given a list of names, spread them out uniquely over the range [Name 100 .. Name maxBound] -- Aim for something with a count of p to be at position (p / pmax) linear interp over the range spreadNames :: [(a, Int)] -> [(a, Name)] spreadNames [] = [] spreadNames (reverse . sortOn snd -> xs@((_,limit):_)) = check $ f (99 + fromIntegral (length xs)) maxBound xs where check xs | all (isCon . snd) xs && length (nubOrd $ map snd xs) == length xs = xs | otherwise = error "Invalid spreadNames" -- I can only assign values between mn and mx inclusive f :: Word16 -> Word16 -> [(a, Int)] -> [(a, Name)] f !mn !mx [] = [] f mn mx ((a,i):xs) = (a, Name real) : f (mn-1) (real-1) xs where real = fromIntegral $ max mn $ min mx ideal ideal = mn + floor (fromIntegral (min commonNameThreshold i) * fromIntegral (mx - mn) / fromIntegral (min commonNameThreshold limit)) -- WARNING: Magic constant. -- Beyond this count names don't accumulate extra points for being common. -- Ensures that things like Bool (4523 uses) ranks much higher than ShakeOptions (24 uses) by not having -- [] (10237 uses) skew the curve too much and use up all the available bits of discrimination. commonNameThreshold = 1024 readNames :: StoreRead -> Names readNames store = Names $ \x -> Map.lookup (BS.pack x) mp where mp = Map.fromAscList $ zip (BS.split '\0' s) $ V.toList n (s, n) = storeRead store TypesNames --------------------------------------------------------------------- -- DUPLICATION INFORMATION data TypesDuplicates a where TypesDuplicates :: TypesDuplicates (Jagged TargetId) deriving Typeable newtype Duplicates = Duplicates {expandDuplicates :: Int -> [TargetId]} -- writeDuplicates xs == nub (map snd xs) -- all duplicates are removed, order of first element is preserved -- (i,x) <- zip [0..] (writeDuplicates xs); expandDuplicates i == map fst (filter ((==) x . snd) xs) -- given the result at position i, expandDuplicates gives the TargetId's related to it writeDuplicates :: Ord a => StoreWrite -> [(TargetId, Sig a)] -> IO [Sig a] writeDuplicates store xs = do -- s=signature, t=targetid, p=popularity (incoing index), i=index (outgoing index) xs <- return $ map (second snd) $ sortOn (fst . snd) $ Map.toList $ Map.fromListWith (\(x1,x2) (y1,y2) -> (, x2 ++ y2) $! min x1 y1) [(s,(p,[t])) | (p,(t,s)) <- zip [0::Int ..] xs] -- give a list of TargetId's at each index storeWrite store TypesDuplicates $ jaggedFromList $ map (reverse . snd) xs return $ map fst xs readDuplicates :: StoreRead -> Duplicates readDuplicates store = Duplicates $ V.toList . ask where ask = jaggedAsk $ storeRead store TypesDuplicates --------------------------------------------------------------------- -- FINGERPRINT INFORMATION data TypesFingerprints a where TypesFingerprints :: TypesFingerprints (V.Vector Fingerprint) deriving Typeable data Fingerprint = Fingerprint {fpRare1 :: {-# UNPACK #-} !Name -- Most rare ctor, or 0 if no rare stuff ,fpRare2 :: {-# UNPACK #-} !Name -- 2nd rare ctor ,fpRare3 :: {-# UNPACK #-} !Name -- 3rd rare ctor ,fpArity :: {-# UNPACK #-} !Word8 -- Artiy, where 0 = CAF ,fpTerms :: {-# UNPACK #-} !Word8 -- Number of terms (where 255 = 255 and above) } deriving (Eq,Show,Typeable) prettyFingerprint :: Fingerprint -> String prettyFingerprint Fingerprint{..} = "arity=" ++ show fpArity ++ ", terms=" ++ show fpTerms ++ ", rarity=" ++ unwords (map prettyName [fpRare1, fpRare2, fpRare3]) {-# INLINE fpRaresFold #-} fpRaresFold :: (b -> b -> b) -> (Name -> b) -> Fingerprint -> b fpRaresFold g f Fingerprint{..} = f fpRare1 `g` f fpRare2 `g` f fpRare3 instance Storable Fingerprint where sizeOf _ = 64 alignment _ = 4 peekByteOff ptr i = Fingerprint <$> peekByteOff ptr (i+0) <*> peekByteOff ptr (i+2) <*> peekByteOff ptr (i+4) <*> peekByteOff ptr (i+6) <*> peekByteOff ptr (i+7) pokeByteOff ptr i Fingerprint{..} = do pokeByteOff ptr (i+0) fpRare1 >> pokeByteOff ptr (i+2) fpRare2 >> pokeByteOff ptr (i+4) fpRare3 pokeByteOff ptr (i+6) fpArity >> pokeByteOff ptr (i+7) fpTerms toFingerprint :: Sig Name -> Fingerprint toFingerprint sig = Fingerprint{..} where fpRare1:fpRare2:fpRare3:_ = sort (nubOrd $ filter isCon $ universeBi sig) ++ [name0,name0,name0] fpArity = fromIntegral $ min 255 $ max 0 $ pred $ length $ sigTy sig fpTerms = fromIntegral $ min 255 $ length (universeBi sig :: [Name]) writeFingerprints :: StoreWrite -> [Sig Name] -> IO () writeFingerprints store xs = storeWrite store TypesFingerprints $ V.fromList $ map toFingerprint xs data MatchFingerprint a ma = MatchFingerprint {mfpAdd :: a -> a -> a ,mfpAddM :: ma -> ma -> ma ,mfpJust :: a -> ma ,mfpCost :: String -> Int -> a ,mfpMiss :: String -> ma } matchFingerprint :: Sig Name -> Fingerprint -> Maybe Int matchFingerprint = matchFingerprintEx MatchFingerprint{..} where mfpAdd = (+) mfpAddM = liftM2 (+) mfpJust = Just mfpCost _ x = x mfpMiss _ = Nothing matchFingerprintDebug :: Sig Name -> Fingerprint -> [Either String (String, Int)] matchFingerprintDebug = matchFingerprintEx MatchFingerprint{..} where mfpAdd = (++) mfpAddM = (++) mfpJust = id mfpCost s x = [Right (s,x)] mfpMiss s = [Left s] {-# INLINE matchFingerprintEx #-} matchFingerprintEx :: forall a ma . MatchFingerprint a ma -> Sig Name -> Fingerprint -> ma -- lower is better matchFingerprintEx MatchFingerprint{..} sig@(toFingerprint -> target) = \candidate -> arity (fpArity candidate) `mfpAddM` terms (fpTerms candidate) `mfpAddM` rarity candidate where -- CAFs must match perfectly, otherwise too many is better than too few arity | ta == 0 = \ca -> if ca == 0 then mfpJust $ mfpCost "arity equal" 0 else mfpMiss "arity different and query a CAF" -- searching for a CAF | otherwise = \ca -> case fromIntegral $ ca - ta of _ | ca == 0 -> mfpMiss "arity different and answer a CAF" -- searching for a CAF 0 -> mfpJust $ mfpCost "arity equal" 0 -- perfect match -1 -> mfpJust $ mfpCost "arity 1 to remove" 1000 -- not using something the user carefully wrote n | n > 0 && allowMore -> mfpJust $ mfpCost ("arity " ++ show n ++ " to add with wildcard") $ 300 * n -- user will have to make up a lot, but they said _ in their search 1 -> mfpJust $ mfpCost "arity 1 to add" 300 -- user will have to make up an extra param 2 -> mfpJust $ mfpCost "arity 2 to add" 900 -- user will have to make up two params _ -> mfpMiss "" where ta = fpArity target allowMore = TVar name0 [] `elem` sigTy sig -- missing terms are a bit worse than invented terms, but it's fairly balanced, clip at large numbers terms = \ct -> case fromIntegral $ ct - tt of n | abs n > 20 -> mfpMiss $ "terms " ++ show n ++ " different" -- too different | n == 0 -> mfpJust $ mfpCost "terms equal" 0 | n > 0 -> mfpJust $ mfpCost ("terms " ++ show n ++ " to add") $ n * 10 -- candidate has more terms | otherwise -> mfpJust $ mfpCost ("terms " ++ show (-n) ++ " to remove") $ abs n * 12 -- candidate has less terms where tt = fpTerms target -- given two fingerprints, you have three sets: -- Those in common; those in one but not two; those in two but not one -- those that are different rarity = \cr -> let tr = target in mfpJust $ differences 5000 400 tr cr `mfpAdd` -- searched for T but its not in the candidate, bad if rare, not great if common differences 1000 50 cr tr -- T is in the candidate but I didn't search for it, bad if rare, OK if common where fpRaresElem :: Name -> Fingerprint -> Bool fpRaresElem !x = fpRaresFold (||) (== x) differences :: Double -> Double -> Fingerprint -> Fingerprint -> a differences !rare !common !want !have = fpRaresFold mfpAdd f want where f n | fpRaresElem n have = mfpCost ("term in common " ++ prettyName n) 0 | n == name0 = mfpCost ("term _ missing") $ floor rare -- should this be common? | otherwise = let p = popularityName n in mfpCost ("term " ++ prettyName n ++ " (" ++ showDP 2 p ++ ") missing") $ floor $ (p*common) + ((1-p)*rare) searchFingerprints :: StoreRead -> Names -> Int -> Sig Name -> [Int] searchFingerprints store names n sig = map snd $ takeSortOn fst n [(v, i) | (i,f) <- zip [0..] fs, Just v <- [test f]] where fs = V.toList $ storeRead store TypesFingerprints :: [Fingerprint] test = matchFingerprint sig --------------------------------------------------------------------- -- SIGNATURES data TypesSigPositions a where TypesSigPositions :: TypesSigPositions (V.Vector Word32) deriving Typeable data TypesSigData a where TypesSigData :: TypesSigData BS.ByteString deriving Typeable writeSignatures :: StoreWrite -> [Sig Name] -> IO () writeSignatures store xs = do v <- VM.new $ length xs forM_ (zip [0..] xs) $ \(i,x) -> do let b = encodeBS x storeWritePart store TypesSigData b VM.write v i $ fromIntegral $ BS.length b v <- V.freeze v storeWrite store TypesSigPositions v