-- Executable : 'only' -- Copyright : (C) 2008 Andrew Robbins -- Maintainer : Andrew Robbins -- Stability : experimental module Main where import Control.Monad (when, unless) import Data.IORef import Data.Maybe import Data.List import System.Console.GetOpt import System.Environment (getArgs) import System.Exit import System.IO import System.IO.Unsafe import Text.ParserCombinators.Parsec import Text.Regex import qualified Data.Foldable as Fold -- global stuff progName = "only" -- usage stuff version = "only (GNU Only) 0.0.6.0\n" header = unlines [ version, "Usage:\n", " only -w EXPR FILES... Print words matching EXPR", " only -l EXPR FILES... Print lines matching EXPR", " only -l EXPR -w WORD FILES... Print words matching WORD from lines matching EXPR", "\nOptions:"] footer = unlines [ "\nExpressions:\n", " The matching expressions [EXPR] are one of:", "\tNUMBER Token (word/line) selection", "\t/REGEX/NUMBER Token (word/line) relative to match (0-based, +/-)", "\tNUMBER/REGEX/ Select which matches to print (1-based, +)", "\tNUMBER/REGEX/NUMBER Both", "\t/REGEX/:/REGEX/ Between two expressions", "", " where [NUMBER] is one of:", "\tAT One", "\tFROM:TO Range", "\tFROM:TO;STEP Range with step", "\tFROM;NEXT:TO Range with implicit step", "\tL,I,S,T,I,N,G Explicit listing"] {- manpage = unlines [ "\t/[REGEX]/=/[TEMPLATE]/"] "\tAlthough this utility is very similar to 'grep' or 'sed'", "in functionality, the '-A' and '-B' options are missing.", "However, the same functionality exists with the EXPR field,", "because '/patt/0:3' means -A3 and '/patt/-3:0' means -B3.", "", "[REGEX] is an Extended Regular Expression", "", -} options = [ Option ['h', '?'] ["help"] (NoArg ('h', "")) "Help and usage" -- , Option ['H'] ["help-man"] (NoArg ('H', "")) "Manpage and documentation" -- , Option ['F'] ["filenames"] (NoArg ('F', "")) "Print filenames" -- not implemented , Option ['V'] ["version"] (NoArg ('V', "")) "Verison number" -- , Option ['v'] ["invert"] (NoArg ('v', "")) "Invert match" -- not implemented -- , Option ['i'] ["insensitive"] (NoArg ('i', "")) "Case insensitive" -- not implemented -- , Option ['r'] ["recursive"] (NoArg ('r', "")) "Recursive descent" -- not implemented -- , Option ['N'] ["negative"] (NoArg ('N', "")) "Default to - (+ to override, like tail)" -- not implemented -- , Option ['s'] ["sort"] (NoArg ('s', "")) "Sort all indices before processing" -- not implemented -- , Option ['t'] ["type"] (ReqArg (\x -> ('t', x)) "NAME") "Custom expression type" -- , Option ['b'] ["bits"] (ReqArg (\x -> ('b', x)) "EXPR") "Bit expression" -- , Option ['B'] ["bytes"] (ReqArg (\x -> ('B', x)) "EXPR") "Byte expression" -- , Option ['c'] ["chars"] (ReqArg (\x -> ('c', x)) "EXPR") "Character expression" , Option ['W'] ["wordsep"] (ReqArg (\x -> ('W', x)) "EXPR") "Word seperator" , Option ['w'] ["words"] (ReqArg (\x -> ('w', x)) "EXPR") "Word expression" , Option ['l'] ["lines"] (ReqArg (\x -> ('l', x)) "EXPR") "Line expression"] -- , Option ['f'] ["files"] (ReqArg (\x -> ('f', x)) "EXPR") "File expression" -- , Option ['m'] ["match"] (ReqArg (\x -> ('m', x)) "EXPR") -- "Matching expression (see manpage for more)" -- data stuff type Match = (Int, String) data OnlyExpr = OnlyRange [Int] String String Int | OnlyExpr { oeAbs :: [Int], oePat :: String, oeRel :: [Int]} deriving (Read, Show, Eq) data OnlyCtx = OnlyLeaf | OnlyCtx { ocValue :: String, ocParts :: [OnlyCtx]} deriving (Read, Show, Eq) data OnlyMode = NoMode | FileMode -- not implemented | ByteMode String -- not implemented | CharMode String -- not implemented | WordMode String [Char] | LineMode String deriving (Read, Show, Eq) initCtx :: String -> OnlyCtx initCtx s = OnlyCtx s [OnlyLeaf] showCtx ctx = do putStrLn "---" print ctx -- parser stuff parseNum :: CharParser () Int parseNum = do -- this is where to implement '--negative' behaviour sign <- option "" (char '-' >> return "-") num <- many1 digit return (read $ sign ++ num) parseNums :: CharParser () [Int] parseNums = do lists <- sepBy ((try nexts) <|> (try steps) <|> (try range) <|> number) (char ',' >> return []) return (concat lists) where number :: CharParser () [Int] number = do num <- parseNum return [num] range :: CharParser () [Int] range = do start <- parseNum char ':' end <- parseNum return [start .. end] steps :: CharParser () [Int] steps = do start <- parseNum char ':' end <- parseNum char ';' step <- parseNum return [start, (start + step) .. end] nexts :: CharParser () [Int] nexts = do start <- parseNum char ';' next <- parseNum char ':' end <- parseNum return [start, next .. end] parseRegex :: CharParser () OnlyExpr parseRegex = (try pattRange) <|> (try pattIndex) <|> (try number) <|> word where word = do str <- many anyChar return (OnlyExpr [] str []) number = do abs <- parseNums if abs == [] then word else return (OnlyExpr abs "" []) regexChar = do ch <- noneOf $ ".,:;" ++ ['A'..'Z'] ++ ['a'..'z'] return ch regex = do ch <- regexChar pat <- many $ noneOf [ch] char ch return pat pattIndex = do -- N/regex/M expression abs <- parseNums pat <- regex rel <- parseNums return (OnlyExpr abs pat rel) pattRange = do -- N/begin/:/end/ expression abs <- parseNums pat1 <- regex -- rel1 <- parseNum -- only one number allowed! char ':' pat2 <- regex rel2 <- parseNum -- only one number allowed! return (OnlyRange abs pat1 pat2 rel2) readRegex :: FilePath -> String -> IO OnlyExpr readRegex path expr = if expr == "" then return (OnlyExpr [] "" []) else case parse parseRegex path expr of Left err -> ioError$userError (show err) Right oe -> return oe -- mode stuff {-# NOINLINE modesRef #-} {-# NOINLINE modesMod #-} {-# NOINLINE modesSet #-} {-# NOINLINE modesGet #-} {-# NOINLINE modeAdd #-} modesRef :: IORef [OnlyMode] modesRef = unsafePerformIO (newIORef []) modesMod :: ([OnlyMode] -> [OnlyMode]) -> IO () modesMod = modifyIORef modesRef modesSet :: [OnlyMode] -> IO () modesSet = writeIORef modesRef modesGet :: IO [OnlyMode] modesGet = readIORef modesRef modeAdd :: OnlyMode -> IO () modeAdd x = modifyIORef modesRef (++ [x]) -- file stuff {-# NOINLINE filesRef #-} {-# NOINLINE filesGet #-} {-# NOINLINE fileAdd #-} filesRef :: IORef [FilePath] filesRef = unsafePerformIO (newIORef []) filesGet :: IO [FilePath] filesGet = readIORef filesRef fileAdd :: FilePath -> IO () fileAdd x = modifyIORef filesRef (++ [x]) {-# NOINLINE fileRef #-} {-# NOINLINE fileSet #-} {-# NOINLINE fileGet #-} fileRef :: IORef FilePath fileRef = unsafePerformIO (newIORef "") fileSet :: FilePath -> IO () fileSet = writeIORef fileRef fileGet :: IO FilePath fileGet = readIORef fileRef -- exit stuff exitStr e s = do putStr s ; exitWith e showVersion = exitStr ExitSuccess version help = exitStr ExitSuccess (usageInfo header options ++ footer) --helpMan = exitStr ExitSuccess manpage helpExit msg = putStrLn msg >> help >> return ([], []) notImpl = exitStr ExitSuccess "not implemented" -- option stuff type Opt = (Char, String) processOpt :: [Opt] -> Opt -> IO () processOpt opts (key, value) = case key of 'h' -> help 'V' -> showVersion -- 'H' -> helpMan -- 'b' -> modeAdd $ ByteMode value -- 'c' -> modeAdd $ CharMode value 'W' -> return () 'w' -> modeAdd $ WordMode value $ maybe [] id (lookup 'W' opts) 'l' -> modeAdd $ LineMode value -- -- main logic stuff -- doFile :: [OnlyMode] -> FilePath -> IO OnlyCtx doFile [] "-" = getContents >>= (return . initCtx) doFile [] path = readFile path >>= (return . initCtx) doFile modes path = do fileSet path ctx <- doFile [] path ctx <- Fold.foldlM doMode ctx modes ctx <- Fold.foldrM unMode ctx modes putStr (ocValue ctx) return ctx doMode :: OnlyCtx -> OnlyMode -> IO OnlyCtx doMode context mode = case context of OnlyCtx _ [OnlyLeaf] -> case mode of WordMode expr ch -> doSep (sepWith ch) expr context mode LineMode expr -> doSep lines expr context mode FileMode -> return context OnlyCtx _ ctxs -> do ctxs' <- mapM (\ctx -> doMode (initCtx (ocValue ctx)) mode) ctxs return (context {ocParts = ctxs'}) doSep :: (String -> [String]) -> String -> OnlyCtx -> OnlyMode -> IO OnlyCtx doSep sep expr ctx@(OnlyCtx orig _) mode = do -- initial state let full = map (\n -> (n, getIndex seps n "")) [1..length seps] seps = sep orig -- normalize expression path <- fileGet oexpr <- readRegex path expr parts <- case oexpr of oe@(OnlyExpr abs pat rel) -> return [doParts full oe] oe@(OnlyRange _ _ _ _) -> do let oes = doRange full oe return (map (doParts full) oes) -- final processing return (ctx {ocParts = concat parts}) doAbs :: [a] -> [Int] -> [Int] doAbs some abs = if abs == [] then [1..length some] else abs doRel :: [Int] -> [Int] doRel rel = if rel == [] then [0] else rel doParts :: [Match] -> OnlyExpr -> [OnlyCtx] doParts full oe@(OnlyExpr abs pat rel) = parts where abs2ls :: [Match] -> Int -> Match abs2ls ms n = getIndex ms n (-1, "") rel2ls :: [Match] -> [Match] -> Match -> Int -> Match rel2ls fs as (na, _) nr = tupleLookup n fs where n = na + nr tupleLookup n xs = (n, (fromMaybe "")$lookup n xs) abss = [abs2ls some a | a <- doAbs some abs] rels = [rel2ls full abss a r | r <- doRel rel, a <- abss] parts = map (initCtx . snd) rels some = doMatch oe full doRange :: [Match] -> OnlyExpr -> [OnlyExpr] doRange full (OnlyRange abs pat1 pat2 rel) = oes where oes = map (m2exp ls) (doAbs ls abs) ls = filter (/=[]) $ map (toRange match2) match1 match1 = doMatch (OnlyExpr [] pat1 []) full match2 = doMatch (OnlyExpr [] pat2 []) full m2exp ls k = OnlyExpr [k] pat1 (getIndex ls k []) nextAfter :: Int -> [Match] -> [Match] nextAfter i ms = dropWhile (\(j,_) -> i >= j) ms toRange :: [Match] -> Match -> [Int] toRange ms m@(i,_) = case nextAfter i ms of (j,_):_ -> [0 .. j - i + rel] [] -> [] doMatch :: OnlyExpr -> [Match] -> [Match] doMatch oe = if pat == "" then id else filter (isJust . (matchRegex re) . snd) where noJust (Just []) = False noJust (Just _) = True noJust Nothing = False pat = oePat oe re = mkRegex pat unMode :: OnlyMode -> OnlyCtx -> IO OnlyCtx unMode mode ctx@(OnlyCtx _ [OnlyLeaf]) = return ctx unMode mode context = do let parts = ocParts context (if leaf parts then simplify else recurse) mode context where leaf ps = all (\c -> ocParts c == [OnlyLeaf]) ps recurse mode context = do parts <- mapM (unMode mode) (ocParts context) return (context {ocParts = parts}) simplify mode context = case mode of WordMode _ ch -> unSep (unsepWith ch) context LineMode _ -> unSep unlines context _ -> return context unSep :: ([String] -> String) -> OnlyCtx -> IO OnlyCtx unSep unsep context = return $ context {ocParts = [OnlyLeaf], ocValue = unsep $ map ocValue (ocParts context)} split :: Eq a => a -> [a] -> [[a]] split c = map(tail).groupBy(const(c/=)).(c:) sepWith :: [Char] -> String -> [String] sepWith [c] str = split c str sepWith [] str = words str unsepWith :: [Char] -> [String] -> String unsepWith [c] ls = concat $ intersperse [c] ls unsepWith [] ls = unwords ls getIndex :: [a] -> Int -> a -> a getIndex xs n nil = if n < 1 then xs !! ((length xs) + n) else if n > (length xs) then nil else xs !! (n - 1) main :: IO () main = do -- get arguments args <- getArgs let optErrs = getOpt Permute options args (opts, nonOpts) <- case optErrs of ([], [], _) -> helpExit "No arguments!" (op, n, []) -> return (op, n) (_, _, err) -> ioError$userError (concat err) -- process options mapM (processOpt opts) opts mapM fileAdd nonOpts -- handle some edge cases when (nonOpts == []) $ helpExit "No files!" >> return () when (opts == []) $ modeAdd (LineMode "") -- do the tasks fileList <- filesGet modeList <- modesGet contexts <- mapM (doFile modeList) fileList -- possibly more return ()