module JarFind
( Class(..)
, Member(..)
, Access(..)
, ClassFileSource(..)
, Location(..)
, parseClassFile
, parseFileSource
) where
import Codec.Archive.Zip
import qualified Data.ByteString.Lazy as B
import Control.Monad
import Control.Applicative ((<$>))
import Data.List
import Data.Bits
import Data.Maybe
import Data.Char
import Data.Array.ST
import Data.Array.Unboxed
import Data.Binary
import Data.Binary.Get
import System.Console.GetOpt
import System.Environment
import Text.Regex.TDFA
data Class = Class { clsName :: String
, clsAccess :: Access
, clsIsInterface :: Bool
, clsMembers :: [Member]
}
deriving Show
data Member = Method { mName :: String
, mAccess :: Access
, mSig :: String
}
| Field { mName :: String
, mAccess :: Access
, mSig :: String
}
deriving Show
data Access = Public
| Private
| Protected
| Package
deriving Eq
instance Show Access where
show Public = "public"
show Private = "private"
show Protected = "protected"
show Package = ""
data ConstantPool = ConstantPool
{ cpPos :: UArray Word16 Word16
, cpData :: B.ByteString
}
getUTF8FromCP :: ConstantPool -> Word16 -> B.ByteString
getUTF8FromCP cp@(ConstantPool cpPos cpData) i = res
where pos = cpPos!i
begin = fromIntegral $ pos+3
len = fromIntegral $ getWord16FromCP cp (pos+1)
res = B.take len (B.drop begin cpData)
getWord16FromCP :: Integral a => ConstantPool -> a -> Word16
getWord16FromCP (ConstantPool cpPos cpData) iData =
(do skip (fromIntegral iData); readWord16) `runGet` cpData
readByte :: Get Word8
readByte = get
readWord16 :: Get Word16
readWord16 = get
readInt16 :: Get Int
readInt16 = fromIntegral `liftM` readWord16
readWord32 :: Get Word32
readWord32 = get
parseClassFile :: B.ByteString -> Class
parseClassFile = runGet $ do
skip (4 + 2 + 2)
cpSize <- readWord16
cp <- parseConstantPool cpSize
accessFlags <- readWord16
thisClass <- readWord16
let classNameIndex = getWord16FromCP cp (1 + (cpPos cp)!thisClass)
let className = getUTF8FromCP cp classNameIndex
let access = flagsToAccess accessFlags
let isInterface = (0 /= accessFlags .&. 0x0200)
skip 2
ifCount <- readInt16
skip (2 * ifCount)
fieldCount <- readInt16
fields <- replicateM fieldCount (parseMember cp Field decryptType)
methodCount <- readInt16
methods <- replicateM methodCount (parseMember cp Method decryptSig)
return (Class (bToString className) access isInterface (fields++methods))
parseMember :: ConstantPool ->
(String -> Access -> String -> Member) ->
(B.ByteString -> B.ByteString) ->
Get Member
parseMember cp ctor decryptor = do
accessFlags <- readWord16
nameIndex <- readWord16
descrIndex <- readWord16
attCount <- readInt16
replicateM attCount $ do
skip 2
attLen <- readWord32
skip (fromIntegral attLen)
return $ ctor (bToString (getUTF8FromCP cp nameIndex))
(flagsToAccess accessFlags)
(bToString (decryptor (getUTF8FromCP cp descrIndex)))
data List' a = Nil'
| (:!) !a
!(List' a)
infixr 5 :!
toArray' (lo,hi) list = do
arr <- newArray_ (lo,hi)
fill hi list arr
return arr
fill i list arr = fill' i list
where fill' !i (b:!bs') = writeArray arr i b >> fill' (i1) bs'
fill' !i Nil' = return ()
parseConstantPool :: Word16 -> Get ConstantPool
parseConstantPool n = do
bs <- getRemainingLazyByteString
cpPosList <- parseConstantPool' (n1) 0 (0 :! Nil')
cpPosArray <- return $ runSTUArray (toArray' (0,n1) cpPosList)
return (ConstantPool cpPosArray bs)
parseConstantPool' :: Word16 -> Word16 -> List' Word16 -> Get (List' Word16)
parseConstantPool' 0 pos res = return res
parseConstantPool' n pos res = do
tag <- readByte
let skipRet n = do skip n; return n
delta <- fromIntegral `liftM` case tag of
1 -> do len <- readInt16
skip len
return (len+2)
2 -> error "Tag 2 is unused"
3 -> skipRet 4
4 -> skipRet 4
5 -> skipRet 8
6 -> skipRet 8
7 -> skipRet 2
8 -> skipRet 2
9 -> skipRet 4
10 -> skipRet 4
11 -> skipRet 4
12 -> skipRet 4
_ -> error $ "Unknown tag "++show tag++" at position "++show pos++" in CP"
let n' = if (tag==5 || tag==6) then (n2) else (n1)
let p = pos+delta+1
p `seq` case tag of
5 -> parseConstantPool' n' p (pos :! (0 :! res))
6 -> parseConstantPool' n' p (pos :! (0 :! res))
_ -> parseConstantPool' n' p (pos :! res)
decryptType :: B.ByteString -> B.ByteString
decryptType = onString (fst . decryptType')
decryptType' :: String -> (String, String)
decryptType' "" = ("","")
decryptType' (a:s) = case a of
'B' -> ("byte",s)
'C' -> ("char",s)
'D' -> ("double",s)
'F' -> ("float",s)
'I' -> ("int",s)
'J' -> ("long",s)
'S' -> ("short",s)
'Z' -> ("boolean",s)
'V' -> ("void",s)
'[' -> let (t,s') = decryptType' s in (t++"[]",s')
'L' -> go "" s
where go c (';':s) = (reverse c, s)
go c ('/':s) = go ('.':c) s
go c (a:s) = go (a:c) s
go _ _ = error $ "Unrecognized type ending with "++s
_ -> error $ "Unrecognized type: "++s
decryptSig :: B.ByteString -> B.ByteString
decryptSig = onString decryptSig'
decryptSig' :: String -> String
decryptSig' s = concat (if null parTypes
then ["()"]
else (intersperse " -> " parTypes))
++ " -> " ++ retType
where ('(':params, ')':ret) = break (==')') s
retType = fst (decryptType' ret)
parTypes = go [] params
go ps "" = reverse ps
go ps s = let (p, s') = decryptType' s in go (p:ps) s'
flagsToAccess :: Word16 -> Access
flagsToAccess w | 0 /= w.&.0x0001 = Public
| 0 /= w.&.0x0002 = Private
| 0 /= w.&.0x0004 = Protected
| otherwise = Package
bToString :: B.ByteString -> String
bToString = map (chr . fromIntegral) . B.unpack
bFromString :: String -> B.ByteString
bFromString = B.pack . map (fromIntegral . ord)
onString :: (String -> String) -> (B.ByteString -> B.ByteString)
onString f = bFromString . f . bToString
data ClassFileSource = ClassFile { path :: FilePath }
| JarFile { path :: FilePath }
| ClassPath { paths :: [ClassFileSource] }
data Location = InFile FilePath
| InJar { jarPath :: FilePath
, innerPath :: FilePath
}
deriving Show
parseFileSource :: ClassFileSource -> IO [(Location, Class)]
parseFileSource (ClassFile path) = do
f <- parseClassFile <$> B.readFile path
return [(InFile path, f)]
parseFileSource (JarFile jar) = do
archive <- toArchive <$> B.readFile jar
return [(InJar jar fileName, parseClassFile $ fromEntry entry)
| entry <- zEntries archive,
let fileName = eRelativePath entry,
".class" `isSuffixOf` fileName]
parseFileSource (ClassPath paths) = concat <$> (sequence $ parseFileSource <$> paths)