{-# LANGUAGE DeriveFunctor #-} {-# LANGUAGE DeriveFoldable #-} {-# LANGUAGE DeriveTraversable #-} {- | Parse an Intrinsics file and generate a Haskell interface to every intrinsic. This is currently only tested and used for IntrinsicsX86.td and relies on the flat structure of IntrinsicsX86.td. In contrast to that, IntrinsicsPowerPC.td uses custom classes and thus cannot be processed by this program. A safer way would be to invoke the llvm-tblgen utility in some way. 1. We could write some Haskell or C++ code, that queries the intrinsics from the include/llvm/Intrinsics.h interface. 2. We could write a custom variant of llvm-tblgen with a back-end that creates the Haskell interface for intrinsics. This can be written in C++ or we have to call the TableGen library functions from Haskell somehow. 3. We could ask llvm-tblgen for a list of all records and parse its output. This requires no C++ coding, but we rely on the output format of @-print-records@. > llvm-tblgen -I /usr/local/llvm-3.1/include /usr/local/llvm-3.1/include/llvm/Intrinsics.td -print-records -} module Main where import qualified Text.ParserCombinators.Parsec.Token as T import qualified Text.ParserCombinators.Parsec.Language as L import qualified Text.ParserCombinators.Parsec as Parsec import Text.ParserCombinators.Parsec (CharParser, (<|>), ) import qualified Control.Monad.Trans.Writer as MW import qualified Data.Map as M import qualified Data.Set as S import qualified Data.Traversable as Trav import qualified Data.Foldable as Fold import qualified Data.List.HT as ListHT import qualified Data.List as List import qualified Data.Char as Char import Control.Monad (mzero, ) import Control.Functor.HT (void, ) import Data.Maybe (fromMaybe, ) import qualified System.IO as IO data Intrinsic typ = Intrinsic Name Name (FunctionType typ) deriving (Show, Functor, Fold.Foldable, Trav.Traversable) data FunctionType typ = FunctionType [typ] [typ] deriving (Show, Functor, Fold.Foldable, Trav.Traversable) type Name = String data QualName = QualName String String String newtype LLVMType = LLVMType String deriving (Show) newtype HaskellType = HaskellType {haskellTypeDecons :: String} deriving (Show, Eq, Ord) gccBuiltinPrefix :: String gccBuiltinPrefix = "__builtin_ia32_" unsignedFunctions :: S.Set String unsignedFunctions = S.fromList $ map (gccBuiltinPrefix++) $ "packusdw128" : "packusdw256" : "packuswb128" : "packuswb256" : "paddusb128" : "paddusb256" : "paddusw128" : "paddusw256" : "phminposuw128" : "pmaddubsw128" : "pmaddubsw256" : "pmaxub128" : "pmaxub256" : "pmaxud128" : "pmaxud256" : "pmaxuw128" : "pmaxuw256" : "pminub128" : "pminub256" : "pminud128" : "pminud256" : "pminuw128" : "pminuw256" : "pmulhuw128" : "pmulhuw256" : "pmuludq128" : "pmuludq256" : "psubusb128" : "psubusb256" : "psubusw128" : "psubusw256" : "vphaddubd" : "vphaddubq" : "vphaddubw" : "vphaddudq" : "vphadduwd" : "vphadduwq" : -- it's only the flag set that is unsigned -- the floating point operands are always signed "roundps" : "roundpd" : "roundps256" : "roundpd256" : "roundss" : "roundsd" : "cmpps" : "cmppd" : "cmpps256" : "cmppd256" : "cmpss" : "cmpsd" : [] translateType :: Bool -> LLVMType -> MW.Writer (M.Map HaskellType HaskellType) HaskellType translateType signed (LLVMType llvmTypeStr) = let formatQType (mqual, typ) = maybe "" (++".") mqual ++ typ returnType shortType longType = do MW.tell (M.singleton shortType longType) return shortType composedType = do vec <- Parsec.optionMaybe $ do void $ Parsec.char 'v' Parsec.many1 Parsec.digit prim <- Parsec.choice $ (do void $ Parsec.char 'i' fmap (\n -> if signed then (Just "I", "Int"++n) else (Just "W", "Word"++n)) $ Parsec.many1 Parsec.digit) : (do void $ Parsec.char 'f' n <- Parsec.many1 Parsec.digit case n of "32" -> return (Nothing, "Float") "64" -> return (Nothing, "Double") _ -> return $ (Just "LLVM", "FP" ++ n)) : [] return $ case vec of Nothing -> return $ HaskellType $ "LLVM.Value " ++ formatQType prim Just d -> returnType (HaskellType $ "V" ++ d ++ snd prim) (HaskellType $ "LLVM.Value (LLVM.Vector TypeNum.D" ++ d ++ " " ++ formatQType prim ++ ")") p = do void $ Parsec.string "llvm_" haskType <- Parsec.choice $ (Parsec.string "x86mmx" >> return (returnType (HaskellType "MMX") (HaskellType "LLVM.Value (LLVM.Vector TypeNum.D8 W.Word8)"))) : (Parsec.string "ptr" >> return (return (HaskellType "LLVM.Value (Ptr ())"))) : composedType : [] void $ Parsec.string "_ty" return haskType in case Parsec.parse p "" llvmTypeStr of Left _msg -> let typeSyn = HaskellType $ case llvmTypeStr of c:cs -> Char.toUpper c : cs _ -> "" in do MW.tell (M.singleton typeSyn (HaskellType "LLVM.Value ()")) return typeSyn Right act -> act splitName :: Name -> QualName splitName name = let p = do void $ Parsec.string "int_" arch <- Parsec.many1 Parsec.alphaNum void $ Parsec.char '_' feature <- Parsec.many1 Parsec.alphaNum void $ Parsec.char '_' stem <- Parsec.many1 Parsec.anyChar return $ QualName arch feature stem in case Parsec.parse p "" name of Left _msg -> QualName "" "" name Right qname -> qname featureMap :: M.Map String String featureMap = M.fromList $ ("sse", "sse1") : ("aesni", "aes") : ("3dnow", "amd3dnow") : ("3dnowa", "amd3dnowa") : [] formatIntrinsicInHaskell :: Intrinsic HaskellType -> String formatIntrinsicInHaskell (Intrinsic name gccblt (FunctionType parameters results)) = let (QualName _arch feature stem) = splitName name dotStem = map (\c -> case c of '_' -> '.'; _ -> c) stem haskName = fromMaybe gccblt $ ListHT.maybePrefixOf gccBuiltinPrefix gccblt resultStr = if null results then "LLVM.Value ()" else List.intercalate ", " $ map haskellTypeDecons results in unlines $ (haskName ++ " :: Ext.T (" ++ concatMap (\(HaskellType typ) -> typ ++ " -> ") parameters ++ "LLVM.CodeGenFunction r (" ++ resultStr ++ "))") : (haskName ++ " = Ext.intrinsic ExtX86." ++ M.findWithDefault feature feature featureMap ++ " " ++ show dotStem) : [] convertIntrinsics :: [Intrinsic LLVMType] -> String convertIntrinsics intrinsics = unlines $ "{- Do not edit! This file was created with the PrepareIntrinsics tool. -}" : "module LLVM.Extra.Extension.X86Auto where" : "" : "import qualified LLVM.Extra.Extension as Ext" : "import qualified LLVM.Extra.ExtensionCheck.X86 as ExtX86" : "import qualified LLVM.Core as LLVM" : "import qualified Types.Data.Num as TypeNum" : "import qualified Data.Int as I" : "import qualified Data.Word as W" : "import Foreign.Ptr (Ptr, )" : "" : case MW.runWriter $ mapM (\intr@(Intrinsic _ gccblt _) -> Trav.traverse (translateType (not $ S.member gccblt unsignedFunctions)) intr) $ filter (\(Intrinsic _ gccblt _) -> not $ null gccblt) intrinsics of (funcs, types) -> (map (\(HaskellType short, HaskellType long) -> "type " ++ short ++ " = " ++ long) $ M.toList types) ++ "" : (map formatIntrinsicInHaskell funcs) lexer :: T.TokenParser st lexer = T.makeTokenParser $ L.emptyDef { L.commentStart = "/*", L.commentEnd = "*/", L.commentLine = "//", L.nestedComments = False, L.identStart = identifierStart, L.identLetter = identifierLetter, L.opStart = mzero, L.opLetter = mzero, L.caseSensitive = True, L.reservedNames = [ "let", "def", "in" ], L.reservedOpNames = [ "=", ":", "," ] } identifierStart, identifierLetter :: CharParser st Char identifierStart = Parsec.letter <|> Parsec.char '_' identifierLetter = Parsec.alphaNum <|> Parsec.char '_' <|> Parsec.char '.' gccBuiltin :: CharParser st String gccBuiltin = do T.reserved lexer "GCCBuiltin" T.angles lexer $ T.stringLiteral lexer llvmType :: CharParser st LLVMType llvmType = fmap LLVMType $ T.identifier lexer intrinsic :: CharParser st (FunctionType LLVMType) intrinsic = Parsec.between (T.reserved lexer "Intrinsic") (T.semi lexer) $ T.angles lexer $ do results <- T.brackets lexer $ T.commaSep lexer llvmType void $ T.comma lexer parameters <- T.brackets lexer $ T.commaSep lexer llvmType Parsec.optional $ do void $ T.comma lexer _attributes <- T.brackets lexer $ T.commaSep lexer $ T.identifier lexer return () return $ FunctionType parameters results letBlock :: CharParser st [Intrinsic LLVMType] letBlock = do T.reserved lexer "let" T.reserved lexer "TargetPrefix" void $ T.symbol lexer "=" _prefix <- T.stringLiteral lexer T.reserved lexer "in" T.braces lexer $ Parsec.many $ do T.reserved lexer "def" name <- T.identifier lexer void $ T.colon lexer gccblt <- Parsec.option "" $ do gccblt <- gccBuiltin void $ T.comma lexer return gccblt intr <- intrinsic return $ Intrinsic name gccblt intr parser :: CharParser st [Intrinsic LLVMType] parser = fmap concat $ Parsec.many1 letBlock main :: IO () main = do parsed <- Parsec.parseFromFile (T.whiteSpace lexer >> parser) "/usr/local/llvm-3.1/include/llvm/IntrinsicsX86.td" case parsed of Left msg -> IO.hPutStrLn IO.stderr $ show msg Right intrinsics -> writeFile "src/LLVM/Extra/Extension/X86Auto.hs" $ convertIntrinsics intrinsics