{-# LANGUAGE ForeignFunctionInterface #-}
--------------------------------------------------------------------------
-- |
-- Module:     System.Cpuid
-- Copyright:  (c) 2008,2010 Martin Grabmueller
-- License:    GPL
-- 
-- Maintainer:  martin@grabmueller.de
-- Stability:   provisional
-- Portability: non-portable (requires IA-32 processor)
--
-- This module provides the function "cpuid" for accessing the cpuid
-- instruction on modern IA-32 processors.  Additionally, some convenience
-- functions are provided, which perform some of the (really complicated and
-- obstruse) decoding.
--
-- As an example, you may use the following program to measure the overhead
-- of calling this function:
--
-- > module Main(main) where
-- > 
-- > import Text.Printf
-- > import System.Cpuid
-- > 
-- > main :: IO ()
-- > main =
-- >     do (a, b, c, d) <- cpuid 0
-- >        printf "basic CPUID usage: EAX=0: %8x %8x %8x %8x\n\n" a b c d
-- >        vs <- vendorString
-- >        printf "Vendor string: %s\n\n" vs
-- >        vs <- brandString
-- >        printf "Brand string: %s\n\n" vs
-- >        printf "Cache information:\n"
-- >        infos <- cacheInfo
-- >        mapM_ (\ v -> putStrLn $ "  " ++ show v) infos
-- >        putStrLn ""
-- >        ProcessorInfo{piFamily = fam, piModel = mod, piStepping = step, piType = typ} <- processorInfo
-- >        printf "processor info: family: %d, model: %d, stepping: %d, processor type: %d\n" fam mod step typ
--------------------------------------------------------------------------
module System.Cpuid
    (-- * Data types
     Associativity(..),
     PageSize(..),
     Ways(..),
     Entries(..),
     CacheSize(..),
     CacheInfo(..),
     LineSize(..),
     MuOps(..),
     BytesPerSector(..),
     ProcessorInfo(..),
     -- * Functions
     cpuid, 
     processorInfo,
     vendorString, 
     brandString,
     cacheInfo) where

import Foreign
import Foreign.C.String

foreign import ccall unsafe "cpuid" _cpuid :: Word32 -> Ptr Word32 -> Ptr Word32 -> Ptr Word32 -> Ptr Word32 -> IO ()

-- | Execute the @cpuid@ instructions with the given argument
-- in the EAX register.  Return the values of the registers
-- EAX, EBX, ECX and EDX in that order.
cpuid :: Word32 -> IO (Word32, Word32, Word32, Word32)
cpuid val =
    alloca (\ a -> 
    alloca (\ b -> 
    alloca (\ c -> 
    alloca (\ d -> do _cpuid val a b c d
                      aa <- peek a
                      bb <- peek b
                      cc <- peek c
                      dd <- peek d
                      return (aa, bb, cc, dd)))))

-- | Execute the @cpuid@ instruction and return the vendor
-- string reported by that instruction.
vendorString :: IO String
vendorString =
    do (_, b, c, d) <- cpuid 0
       allocaBytes 20 (\ str -> do let str' :: Ptr Word32
                                       str' = castPtr str 
                                   poke str' b
                                   poke (str' `plusPtr` 4) d
                                   poke (str' `plusPtr` 8) c
                                   peekCStringLen (str, 12))

-- | Execute the @cpuid@ instruction and return the brand string
-- (processor name and maximum frequency) reported by that
-- instruction.
brandString :: IO String
brandString =
    do allocaBytes 80
         (\ str -> 
              do let str' :: Ptr Word32
                     str' = castPtr str 
                 (a1, b1, c1, d1) <- cpuid 0x80000002
                 poke str' a1
                 poke (str' `plusPtr` 4) b1
                 poke (str' `plusPtr` 8) c1
                 poke (str' `plusPtr` 12) d1
                 (a2, b2, c2, d2) <- cpuid 0x80000003
                 poke (str' `plusPtr` 16) a2
                 poke (str' `plusPtr` 20) b2
                 poke (str' `plusPtr` 24) c2
                 poke (str' `plusPtr` 28) d2
                 (a3, b3, c3, d3) <- cpuid 0x80000004
                 poke (str' `plusPtr` 32) a3
                 poke (str' `plusPtr` 36) b3
                 poke (str' `plusPtr` 40) c3
                 poke (str' `plusPtr` 44) d3
                 peekCStringLen (str, 16 * 3))

-- | Number of entries in a TLB.
data Entries = Entries Int
                     deriving (Show)

-- | Associativity in a set-associative cache.
data Ways = Ways Int
                     deriving (Show)

-- | MuOps in a processors trace cache.
data MuOps = MuOps Int
                     deriving (Show)

-- | Page size. Some entries can have alternative page sizes,
-- therefore the complicated type.
data PageSize = PageSize Int
              | PageSizeOr PageSize PageSize
                 deriving (Show)

-- | Cache size. Some entries can have alternative cache sizes,
-- therefore the complicated type.
data CacheSize = CacheSize Int
               | CacheSizeOr CacheSize CacheSize
                 deriving (Show)

-- | Line size in a cache.
data LineSize = LineSize Int
                     deriving (Show)

-- | Bytes per sector in a cache.  
data BytesPerSector = BytesPerSector Int
                     deriving (Show)

-- | Cache associativity.  For some entries, this is not specified in
-- the manual.  We report these as 'DirectMapped'.
data Associativity = SetAssociative Ways
                   | DirectMapped
                     deriving (Show)

-- | Information for caches and TLBs.
data CacheInfo = InstructionTLB (Maybe CacheSize) PageSize Associativity Entries -- ^ Configuration of code TLB.
               | DataTLB (Maybe CacheSize) PageSize Associativity Entries -- ^ Configuration of data TLB.
               | FirstLevelICache CacheSize Associativity LineSize -- ^ First-level code cache configuration.
               | FirstLevelDCache CacheSize Associativity LineSize -- ^ First-level code cache configuration.
               | NoSecondLevelCache -- ^ No second level support.
               | SecondLevelCache CacheSize Associativity LineSize (Maybe BytesPerSector) -- ^ Second-level cache configuration.
               | NoThirdLevelCache -- ^ No third level support.
               | NoSecondOrThirdLevelCache -- ^ Internal use only.
               | ThirdLevelCache CacheSize Associativity LineSize (Maybe BytesPerSector) -- ^ Second-level cache configuration.
               | TraceCache MuOps Associativity -- ^ Trace cache (1st-level code cache) configuration.
               | Prefetching Int -- ^ Prefetching information.
                 deriving (Show)

-- | Fetch all available cache information from the processor, using
-- the @cpuid@ instruction.  The list is not ordered.
cacheInfo :: IO [CacheInfo]
cacheInfo =
    do (a, b, c, d) <- cpuid 2
       collectCacheDescriptors (fromIntegral ((a .&. 0xff) - 1)) [] (a .&. complement 0xff) b c d

-- | Call the @cpuid@ instruction as often as needed and merge the values.
collectCacheDescriptors :: Int -> [CacheInfo] -> Word32 -> Word32 -> Word32 -> Word32 -> IO [CacheInfo]
collectCacheDescriptors 0 infos a b c d = return $ postProcess $ updateCD infos [a, b, c, d]
collectCacheDescriptors n infos a b c d =
    do let infos' = updateCD infos [a, b, c, d]
       (a', b', c', d') <- cpuid 2
       collectCacheDescriptors (n - 1) infos' a' b' c' d'

-- | Convert the strange 0x40 code to valie entries.
postProcess :: [CacheInfo] -> [CacheInfo]
postProcess infos = 
    let has2ndLevel = any (\ info -> case info of
                                       SecondLevelCache{} -> True
                                       _ -> False) infos
        hasNo2ndOr3rd = any (\ info -> case info of
                                       NoSecondOrThirdLevelCache{} -> True
                                       _ -> False) infos
        infos' = filter (\ info -> case info of
                                     NoSecondOrThirdLevelCache{} -> False
                                     _ -> True) infos
    in if has2ndLevel
          then if hasNo2ndOr3rd 
                 then NoThirdLevelCache : infos'
                 else infos'
          else NoSecondLevelCache : infos'

-- | Convert the values from the registers to cache information
-- records.
updateCD :: [CacheInfo] -> [Word32] -> [CacheInfo]
updateCD infos wss = updateBytes infos (splitBytes wss)
  where splitBytes :: [Word32] -> [Word8]
        splitBytes [] = []
        splitBytes (w:ws) | w .&. 0x80000000 /= 0 = splitBytes ws
        splitBytes (w:ws) = [fromIntegral $ w .&. 0xff,
                             fromIntegral $ (w `shiftR` 8) .&. 0xff,
                             fromIntegral $ (w `shiftR` 16) .&. 0xff,
                             fromIntegral $ (w `shiftR` 24) .&. 0xff] ++ splitBytes ws
        updateBytes :: [CacheInfo] -> [Word8] -> [CacheInfo]
        updateBytes infos' [] = infos'
        updateBytes infos' (b:bs) = case lookup b cacheTable of
                                      Nothing -> updateBytes infos' bs
                                      Just info -> updateBytes (info : infos') bs

-- | Convert kBytes to bytes.
kByte :: Int -> Int
kByte b = b * 1024

-- | Convert mBytes to bytes.
mByte :: Int -> Int
mByte b = b * 1024 * 1024

-- | Table of cache configuration.  Information from the CPUID
-- documentation in the /IA-32 Intel Architecture Software Developer's
-- Manual Volumes 2A/.
cacheTable :: [(Word8, CacheInfo)]
cacheTable =
    [
     (0x01, InstructionTLB Nothing (PageSize (kByte 4)) (SetAssociative (Ways 4)) (Entries 32)),
     (0x02, InstructionTLB Nothing (PageSize (mByte 4)) (SetAssociative (Ways 4)) (Entries 2)),
     (0x03, DataTLB Nothing (PageSize (kByte 4)) (SetAssociative (Ways 4)) (Entries 64)),
     (0x04, DataTLB Nothing (PageSize (mByte 4)) (SetAssociative (Ways 4)) (Entries 8)),
     (0x06, FirstLevelICache (CacheSize (kByte 4)) (SetAssociative (Ways 4)) (LineSize 32)),
     (0x08, FirstLevelICache (CacheSize (kByte 16)) (SetAssociative (Ways 4)) (LineSize 32)),
     (0x0a, FirstLevelDCache (CacheSize (kByte 8)) (SetAssociative (Ways 2)) (LineSize 32)),
     (0x0c, FirstLevelDCache (CacheSize (kByte 16)) (SetAssociative (Ways 4)) (LineSize 32)),
     (0x22, ThirdLevelCache (CacheSize (kByte 512)) (SetAssociative (Ways 4)) (LineSize 64) (Just (BytesPerSector 2))),
     (0x23, ThirdLevelCache (CacheSize (mByte 1)) (SetAssociative (Ways 8)) (LineSize 64) (Just (BytesPerSector 2))),
     (0x25, ThirdLevelCache (CacheSize (mByte 2)) (SetAssociative (Ways 8)) (LineSize 64) (Just (BytesPerSector 2))),
     (0x29, ThirdLevelCache (CacheSize (mByte 4)) (SetAssociative (Ways 8)) (LineSize 64) (Just (BytesPerSector 2))),
     (0x2c, FirstLevelDCache (CacheSize (kByte 32)) (SetAssociative (Ways 8)) (LineSize 64)),
     (0x30, FirstLevelICache (CacheSize (kByte 32)) (SetAssociative (Ways 8)) (LineSize 64)),
     (0x40, NoSecondOrThirdLevelCache),
     (0x41, SecondLevelCache (CacheSize (kByte 128)) (SetAssociative (Ways 4)) (LineSize 32) Nothing),
     (0x42, SecondLevelCache (CacheSize (kByte 256)) (SetAssociative (Ways 4)) (LineSize 32) Nothing),
     (0x43, SecondLevelCache (CacheSize (kByte 512)) (SetAssociative (Ways 4)) (LineSize 32) Nothing),
     (0x44, SecondLevelCache (CacheSize (mByte 1)) (SetAssociative (Ways 4)) (LineSize 32) Nothing),
     (0x45, SecondLevelCache (CacheSize (mByte 2)) (SetAssociative (Ways 4)) (LineSize 32) Nothing),
     (0x46, ThirdLevelCache (CacheSize (mByte 4)) (SetAssociative (Ways 4)) (LineSize 64) Nothing),
     (0x47, ThirdLevelCache (CacheSize (mByte 8)) (SetAssociative (Ways 8)) (LineSize 64) Nothing),
     (0x50, InstructionTLB (Just (CacheSize (kByte 4))) (PageSizeOr (PageSize (mByte 2)) (PageSize (mByte 4))) DirectMapped (Entries 64)),
     (0x51, InstructionTLB (Just (CacheSize (kByte 4))) (PageSizeOr (PageSize (mByte 2)) (PageSize (mByte 4))) DirectMapped (Entries 128)),
     (0x52, InstructionTLB (Just (CacheSize (kByte 4))) (PageSizeOr (PageSize (mByte 2)) (PageSize (mByte 4))) DirectMapped (Entries 256)),
     (0x5b, DataTLB (Just (CacheSize (kByte 4))) (PageSize (mByte 4)) DirectMapped (Entries 64)),
     (0x5c, DataTLB (Just (CacheSize (kByte 4))) (PageSize (mByte 4)) DirectMapped (Entries 128)),
     (0x5d, DataTLB (Just (CacheSize (kByte 4))) (PageSize (mByte 4)) DirectMapped (Entries 256)),
     (0x60, FirstLevelDCache (CacheSize (kByte 16)) (SetAssociative (Ways 8)) (LineSize 64)),
     (0x66, FirstLevelDCache (CacheSize (kByte 8)) (SetAssociative (Ways 4)) (LineSize 64)),
     (0x67, FirstLevelDCache (CacheSize (kByte 16)) (SetAssociative (Ways 4)) (LineSize 64)),
     (0x68, FirstLevelDCache (CacheSize (kByte 32)) (SetAssociative (Ways 4)) (LineSize 64)),
     (0x70, TraceCache (MuOps (kByte 12)) (SetAssociative (Ways 8))),
     (0x71, TraceCache (MuOps (kByte 16)) (SetAssociative (Ways 8))),
     (0x72, TraceCache (MuOps (kByte 32)) (SetAssociative (Ways 8))),
     (0x78, SecondLevelCache (CacheSize (mByte 1)) (SetAssociative (Ways 4)) (LineSize 64) Nothing),
     (0x79, SecondLevelCache (CacheSize (kByte 128)) (SetAssociative (Ways 8)) (LineSize 64) (Just (BytesPerSector 2))),
     (0x7a, SecondLevelCache (CacheSize (kByte 256)) (SetAssociative (Ways 8)) (LineSize 64) (Just (BytesPerSector 2))),
     (0x7b, SecondLevelCache (CacheSize (kByte 512)) (SetAssociative (Ways 8)) (LineSize 64) (Just (BytesPerSector 2))),
     (0x7c, SecondLevelCache (CacheSize (mByte 1)) (SetAssociative (Ways 8)) (LineSize 64) (Just (BytesPerSector 2))),
     (0x7d, SecondLevelCache (CacheSize (mByte 2)) (SetAssociative (Ways 8)) (LineSize 64) Nothing),
     (0x7f, SecondLevelCache (CacheSize (kByte 512)) (SetAssociative (Ways 2)) (LineSize 64) Nothing),
     (0x82, SecondLevelCache (CacheSize (kByte 256)) (SetAssociative (Ways 8)) (LineSize 32) Nothing),
     (0x83, SecondLevelCache (CacheSize (kByte 512)) (SetAssociative (Ways 8)) (LineSize 32) Nothing),
     (0x84, SecondLevelCache (CacheSize (mByte 1)) (SetAssociative (Ways 8)) (LineSize 32) Nothing),
     (0x85, SecondLevelCache (CacheSize (mByte 2)) (SetAssociative (Ways 8)) (LineSize 32) Nothing),
     (0x86, SecondLevelCache (CacheSize (kByte 512)) (SetAssociative (Ways 4)) (LineSize 64) Nothing),
     (0x87, SecondLevelCache (CacheSize (mByte 1)) (SetAssociative (Ways 8)) (LineSize 64) Nothing),
     (0xb0, InstructionTLB Nothing (PageSize (kByte 4)) (SetAssociative (Ways 4)) (Entries 128)),
     (0xb3, DataTLB Nothing (PageSize (kByte 4)) (SetAssociative (Ways 4)) (Entries 128)),
     (0xf0, Prefetching 64),
     (0xf1, Prefetching 128)
    ]


-- | Processor information.
data ProcessorInfo = ProcessorInfo{piFamily :: Int, -- ^ Processor family.
                                   piModel :: Int, -- ^ Processor model.
                                   piStepping :: Int, -- ^ Processor stepping.
                                   piType :: Int --  ^ Processor type.
                                  }

-- | Retrieve basic processor information from the processor using the
-- @cpuid@ instruction.
processorInfo :: IO ProcessorInfo
processorInfo =
    do (a, _, _, _) <- cpuid 1
       let stepping = a .&. 0xf
           model = (a `shiftR` 4) .&. 0xf
           family = (a `shiftR` 8) .&. 0x0f
           extFamily = (a `shiftR` 20) .&. 0xff
           typ = (a `shiftR` 12) .&. 0x3
       return (ProcessorInfo{piFamily = fromIntegral (if family /= 0xf
                                                      then family
                                                      else family + extFamily),
                             piModel = fromIntegral model,
                             piStepping = fromIntegral stepping,
                             piType = fromIntegral typ})