module Main where -- TODO -- ---- -- o figure out how to use utf8-string for the decoding with -- errors [this lets us get rid of the UTF8 module] -- o colourise badly encoded characters (switch to something -- easier to type than guillmets) -- o fancy flags import Control.Monad (forM_) import Data.Bits import qualified Data.ByteString as B import Data.Char (isPrint) import Data.List (intersperse, intercalate, inits) import Data.List.Split (split, keepDelimsR, whenElt) import Data.Word (Word8) import System.Environment (getArgs) import System.IO (stdin, hClose, IOMode(ReadMode), openBinaryFile) import qualified Numeric as N import UTF8 (decodeOne, Error(..)) main :: IO () main = do args <- getArgs hs <- case args of [] -> return [stdin] _ -> mapM (\f -> openBinaryFile f ReadMode) args forM_ hs $ \h -> do bs <- B.unpack `fmap` B.hGetContents h putStr . unlines $ map (dump h_size) $ concatMap (clump h_size) $ split (keepDelimsR (whenElt isNewline)) $ decode bs hClose h where clump sz = clumpBy (length . showHex . decoratedBytes) sz 1 h_size = 40 -- keep in mind that each byte takes 2 chars a -- space; and that each code point takes at -- most 4 bytes. So we need (3 * 4 * n) just -- to display the hex part of the window isNewline :: HexChar -> Bool isNewline (HexChar c _) = c == '\n' isNewline _ = False dump :: Int -> [HexChar] -> String dump hex_sz cs = hexPart ++ padding ++ charPart where hexPart = intercalate " " (map showHex dbs) dbs = map decoratedBytes cs charPart = concatMap show cs padding = replicate (hex_sz - length hexPart) ' ' -- | break a list of items into sublists of length < the clump -- size, taking into consideration that each item in the clump -- will have a sep-size padding interspersed -- -- any item whose length is greater than the clump size -- is put into a clump by itself -- -- given a length function -- @clumpBy (length.show) 8 ["hello", "this", "is", "a", "list"]@ clumpBy :: (a -> Int) -> Int -> Int -> [a] -> [[a]] clumpBy f l sep items = iter [] items where iter acc [] = reverse acc iter acc cs = case break toobig (drop 1 $ inits cs) of ([],_) -> next 1 -- first too big (_,[]) -> iter (cs:acc) [] -- none too big (_,(x:_)) -> next (length x - 1) where next n = iter (take n cs : acc) (drop n cs) toobig x = (sum . intersperse sep . map f) x > l -- ---------------------------------------------------------------------- -- -- ---------------------------------------------------------------------- data ByteSequence = ByteSequence [Word8] Char -- bytes and sep decoratedBytes :: HexChar -> ByteSequence decoratedBytes c = ByteSequence bytes seps where bytes = getBytes c seps = case c of HexChar _ _ -> '-' HexError _ _ -> '#' -- ------------------------------------------------------------------- -- -- ------------------------------------------------------------------- class ShowHex a where showHex :: a -> String instance ShowHex Int where showHex x = N.showHex x "" instance ShowHex Word8 where showHex w = showHex big ++ showHex small where small, big :: Int small = fromIntegral $ w .&. 0x0f big = fromIntegral $ shiftR w 4 instance ShowHex HexChar where showHex h = concatMap showHex (getBytes h) instance ShowHex ByteSequence where showHex (ByteSequence bs s) = intercalate [s] (map showHex bs) -- ------------------------------------------------------------------- -- -- ------------------------------------------------------------------- data HexChar = HexChar Char [Word8] | HexError (Error,Int) [Word8] instance Show HexChar where show (HexChar '\r' _) = noshow show (HexChar '\n' _) = noshow show (HexChar '\t' _) = noshow show (HexChar c _) | isPrint c = [c] show (HexChar _ _) = noshow show (HexError _ _) = noshow noshow :: String noshow = "." getBytes :: HexChar -> [Word8] getBytes (HexChar _ ws) = ws getBytes (HexError _ ws) = ws decode :: [Word8] -> [HexChar] decode bytes = iter 0 [] bytes where iter :: Int -> [HexChar] -> [Word8] -> [HexChar] iter _ cs [] = reverse cs iter idx cs bs = case decodeOne bs of (Left e, n, rest) -> let hc = HexError (e,idx) (take n bs) in iter (idx+n) (hc:cs) rest (Right c, n, rest) -> let hc = HexChar c (take n bs) in iter (idx+n) (hc:cs) rest