-- ------------------------------------------------------------ {- | Module : ColorizeSourceCode Copyright : Copyright (C) 2009 Uwe Schmidt License : BSD3 Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Portability: portable Colorize Source Code Supports Java and Haskell -} -- ------------------------------------------------------------ module Main where import Control.Arrow import Data.List import Data.Maybe import System import System.IO -- import the IO and commandline option stuff import System.Environment import System.Console.GetOpt import System.Exit import Text.Regex.XMLSchema.String import Text.XML.HXT.Arrow import Text.XML.HXT.Parser.XhtmlEntities -- ------------------------------------------------------------ data Process = P { inFilter :: String -> String , tokenRE :: Regex , markupRE :: Regex -> Regex , formatToken :: (String, String) -> String , formatDoc :: [String] -> String , outFilter :: String -> String , input :: Handle , output :: Handle , inputFile :: String } defaultProcess = P { inFilter = id , tokenRE = plainRE , markupRE = id , formatToken = uncurry (++) , formatDoc = unlines , outFilter = id , input = stdin , output = stdout , inputFile = " " } -- ------------------------------------------------------------ main :: IO () main = do argv <- getArgs p <- evalArgs (getOpt Permute options argv) s <- hGetContents (input p) hPutStr (output p) (process p s) hFlush (output p) hClose (output p) exitWith ExitSuccess options :: [OptDescr (String, String)] options = [ Option "h?" ["help"] (NoArg ("help", "1")) "this message" , Option "" ["plain"] (NoArg ("plain", "1")) "don't colorize lines" , Option "" ["haskell"] (NoArg ("haskell", "1")) "colorize haskell" , Option "" ["java"] (NoArg ("java", "1")) "colorize java" , Option "" ["cpp"] (NoArg ("cpp", "1")) "colorize C or C++" , Option "" ["sh"] (NoArg ("sh", "1")) "colorize sh or bash" , Option "" ["ruby"] (NoArg ("ruby", "1")) "colorize ruby" , Option "" ["bnf"] (NoArg ("bnf", "1")) "colorize extended BNF grammar rules" , Option "" ["ppl"] (NoArg ("ppl", "1")) "colorize ppl" , Option "" ["pplass"] (NoArg ("pplass", "1")) "colorize ppl assembler" , Option "n" ["number"] (NoArg ("number", "1")) "with line numbers" , Option "t" ["tabs"] (NoArg ("tabs", "1")) "substitute tabs by blanks" , Option "m" ["markup"] (NoArg ("markup", "1")) "text contains embedded markup" , Option "e" ["erefs"] (NoArg ("erefs", "1")) "resolve HTML entity refs before processing" , Option "o" ["output"] (ReqArg ((,) "output") "FILE") "output file, \"-\" stands for stdout" , Option "s" ["scan"] (NoArg ("scan", "1")) "just scan input, for testing" , Option "x" ["html"] (NoArg ("html", "1")) "html output" , Option "f" ["full"] (NoArg ("full", "1")) "full HTML document with header and css" ] exitErr :: String -> IO a exitErr msg = do hPutStrLn stderr msg usage exitWith (ExitFailure (-1)) evalArgs (opts, files, errs) | not (null errs) = exitErr ("illegal arguments " ++ show errs) | null files = evalOpts opts defaultProcess | not (null fns) = exitErr ("only one input file allowed") | otherwise = do inp <- openFile fn ReadMode evalOpts opts (defaultProcess { input = inp , inputFile = fn } ) where (fn:fns) = files evalOpts :: [(String, String)] -> Process -> IO Process evalOpts [] res = return res evalOpts (o:os) res = do res' <- evalOpt o res evalOpts os res' evalOpt :: (String, String) -> Process -> IO Process evalOpt ("help","1") _ = do usage exitWith ExitSuccess evalOpt ("output", "-") p = return $ p {output = stdout} evalOpt ("output", fn) p = do outp <- openFile fn WriteMode return $ p {output = outp} evalOpt ("haskell","1") p = return $ p { tokenRE = haskellRE } evalOpt ("java", "1") p = return $ p { tokenRE = javaRE } evalOpt ("cpp", "1") p = return $ p { tokenRE = cppRE } evalOpt ("sh", "1") p = return $ p { tokenRE = shRE } evalOpt ("ruby", "1") p = return $ p { tokenRE = rubyRE } evalOpt ("bnf", "1") p = return $ p { tokenRE = bnfRE } evalOpt ("ppl", "1") p = return $ p { tokenRE = pplRE } evalOpt ("pplass", "1") p = return $ p { tokenRE = pplassRE } evalOpt ("plain", "1") p = return $ p { tokenRE = plainRE } evalOpt ("scan", "1") p = return $ p { tokenRE = plainRE , formatToken = uncurry formatTok , formatDoc = formatHList } evalOpt ("number", "1") p = return $ p { formatDoc = numberLines >>> formatDoc p } evalOpt ("tabs", "1") p = return $ p { inFilter = inFilter p >>> substTabs } evalOpt ("erefs", "1") p = return $ p { inFilter = resolveHtmlEntities >>> inFilter p } evalOpt ("markup", "1") p = return $ p { markupRE = addMarkup } evalOpt ("html", "1") p = return $ p { formatToken = formatHtmlTok , formatDoc = formatHtmlDoc } evalOpt ("full", "1") p = return $ p { outFilter = outFilter p >>> fullHtml (inputFile p) } usage :: IO () usage = hPutStrLn stderr use where use = usageInfo header options header = "colorizeSourceCode - colorize source code with HTML, version 0.1.1" -- ------------------------------------------------------------ process :: Process -> String -> String process p = inFilter p >>> tokenizeSubexRE (markupRE p (tokenRE p)) >>> map (formatToken p) >>> concat >>> lines >>> formatDoc p >>> outFilter p addMarkup :: Regex -> Regex addMarkup = mkElse (parseRegex . mkLE $ markupT) tokenizeLines :: String -> [(String, String)] tokenizeLines = map (\ l -> ("",l ++ "\n")) . lines numberLines :: [String] -> [String] numberLines = zipWith addNum [1..] where addNum i l = "" ++ fmt 4 i ++ "" ++ l fmt l = sed (const " ") " " . reverse . take l . reverse . (replicate l ' ' ++) . show substTabs :: String -> String substTabs = subs 0 subs _ "" = "" subs i (x:xs) | x == '\t' = replicate (8 - (i `mod` 8)) ' ' ++ subs 0 xs | x == '\n' = x : subs 0 xs | otherwise = x : subs (i+1) xs -- ------------------------------------------------------------ resolveHtmlEntities :: String -> String resolveHtmlEntities = sed (replaceEntity . drop 1 . init) "&\\i\\c*;" where replaceEntity e = maybe ("&" ++ e ++ ";") ((:[]) . toEnum) . lookup e $ xhtmlEntities -- ------------------------------------------------------------ formatHList :: [String] -> String formatHList = ("[" ++) . (++ "\n]") . intercalate "\n, " formatTok :: String -> String -> String formatTok kw tok = " (" ++ show kw ++ ",\t" ++ show tok ++ "\t)\n" formatHtmlDoc = map (("