{-
Copyright (C) 2010-2015 Dr. Alistair Ward
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
-}
{- |
[@AUTHOR@] Dr. Alistair Ward
[@DESCRIPTION@]
* Contains the entry-point to the program.
* Provides command-line access to "ExtendedRegExChar".
* Defines the permissible command-line arguments; aping /egrep/, but with the addition of /--verbose/, which enables one to see the details of the data-capture.
[@TODO@] Coordinate the output of 'printVersion' with the definition in '.cabal'; this may not be feasible.
-}
module Main(main) where
import qualified Data.Default
import qualified Data.List
import qualified Data.Version
import qualified Grecce.CommandOptions as CommandOptions
import qualified Grecce.Grep as Grep
import qualified Grecce.Test.Assert.RegExOptsChar as Test.Assert.RegExOptsChar
import qualified Grecce.Test.Performance.ExtendedRegEx as Test.Performance.ExtendedRegEx
import qualified Grecce.Test.Performance.ExtendedRegExTestsNegative as Test.Performance.ExtendedRegExTestsNegative
import qualified Grecce.Test.Performance.ExtendedRegExTestsPositive as Test.Performance.ExtendedRegExTestsPositive
import qualified Paths_regexchar as Paths -- Either local stub, or package-instance autogenerated by 'Setup.hs build'.
import qualified RegExDot.CompilationOptions
import qualified RegExDot.ExecutionOptions
import qualified System.Console.GetOpt as G
import qualified System.Environment
import qualified System.Exit
import qualified System.Info
import qualified System.IO
import qualified System.IO.Error
-- | Used to thread user-defined command-line options, though the list of functions which implement them.
type CommandLineAction = CommandOptions.CommandOptions -> IO CommandOptions.CommandOptions -- Supplied as the type-argument to 'G.OptDescr'.
-- | On failure to parse the specified string, returns an explanatory error.
read' :: Read a => String -> String -> a
read' errorMessage s = case reads s of
[(x, "")] -> x
_ -> error $ errorMessage ++ show s
-- | On failure to parse a command-line argument, returns an explanatory error.
readCommandArg :: Read a => String -> a
readCommandArg = read' "failed to parse command-line argument "
{- |
* Parses the command-line arguments, to determine 'CommandOptions.CommandOptions'.
* Arguments following 'CommandOptions.CommandOptions', are interpreted as a /regex/, followed by the names of the files to which it is applied.
-}
main :: IO ()
main = do
progName <- System.Environment.getProgName
let
usageMessage :: String
usageMessage = "Usage:\t" ++ G.usageInfo progName optDescrList ++ " [, ...]"
-- Define the command-line options, & the 'CommandLineAction's used to handle them.
optDescrList :: [G.OptDescr CommandLineAction]
optDescrList = [
-- String [String] (G.ArgDescr CommandLineAction) String
G.Option "?" ["help"] (G.NoArg $ const printUsage) "Output:\tdisplay this help-text & then exit.",
G.Option "" ["verbose"] (G.NoArg $ return {-to IO-monad-} . CommandOptions.setVerbose) ("Output:\tsee how the input data was captured by the RegEx; default '" ++ show (CommandOptions.verbose Data.Default.def) ++ "'. CAVEAT: to be effective, it should precede other options."),
G.Option "" ["version"] (G.NoArg $ const printVersion) "Output:\tprint version-information & then exit.",
G.Option "c" ["count"] (G.NoArg $ return {-to IO-monad-} . CommandOptions.setCountMatches) ("Output:\tprint only a count of matching lines per file; default '" ++ show (CommandOptions.countMatches Data.Default.def) ++ "'."),
G.Option "l" ["files-with-matches"] (G.NoArg $ return {-to IO-monad-} . CommandOptions.setListFilesWithMatches) ("Output:\tlist names of files containing at least one match; default '" ++ show (CommandOptions.listFilesWithMatches Data.Default.def) ++ "'."),
G.Option "L" ["files-without-match"] (G.NoArg $ return {-to IO-monad-} . CommandOptions.setListFilesWithoutMatch) ("Output:\tlist names of files not containing any matches; default '" ++ show (CommandOptions.listFilesWithoutMatch Data.Default.def) ++ "'."),
G.Option "n" ["number"] (G.NoArg $ return {-to IO-monad-} . CommandOptions.setPrependLineNumbers) ("Output:\tprepend the line-number of the input, to the output; default '" ++ show (CommandOptions.prependLineNumbers Data.Default.def) ++ "'."),
G.Option "v" ["invert-match"] (G.NoArg $ return {-to IO-monad-} . CommandOptions.setInvertMatch) ("Output:\tselect non-matching lines; default '" ++ show (CommandOptions.invertMatch Data.Default.def) ++ "'."),
G.Option "" ["printCurrentSettings"] (G.NoArg printCurrentSettings) "Output:\tdisplay the currently defined options & then exit.",
G.Option "e" ["regex"] (setExtendedRegExChar `G.ReqArg` "") "Input:\tdefine the regex.",
G.Option "f" ["file"] (readExtendedRegExCharFromFile `G.ReqArg` "") "Input:\tread the regex from file.",
G.Option "" ["performance"] (
extendedRegExCharPerformance `G.ReqArg` "(Int,Either [TestName])"
) (
"Test:\tcompare performance with other RegEx-engines, using the specified pair (, Left " ++ show [
minBound :: Test.Performance.ExtendedRegExTestsNegative.TestName .. maxBound
] ++ " or Right " ++ show [
minBound :: Test.Performance.ExtendedRegExTestsPositive.TestName .. maxBound
] ++ "). N.B.: specify this option last."
),
G.Option "" ["assert"] (assertFromFile `G.ReqArg` "") "Test:\trun the assertions, defined in the specified file, & then exit. N.B.: specify this option last.",
G.Option "" ["assertPosix"] (assertPosixFromFile `G.ReqArg` "") "Test:\trun the assertions, defined in the specified file, against Text.Regex.Posix, & then exit. N.B.: specify this option last.",
G.Option "" ["abortTrialRepetitionsOnInherentFailure"] (setExecutionOption abortTrialRepetitionsOnInherentFailure `G.ReqArg` "Bool") ("Performance:\tif an alternative can't match, irrespective of the subsequent concatenation, then terminate futile trial repetitions; default '" ++ show (RegExDot.ExecutionOptions.abortTrialRepetitionsOnInherentFailure Data.Default.def) ++ "'."),
-- G.Option "" ["abortTrialRepetitionsOnZeroConsumption"] (setExecutionOption abortTrialRepetitionsOnZeroConsumption `G.ReqArg` "Bool") ("Performance:\tcheck for zero data-consumption by the n-th repetition of an alternative, before considering (n + 1); default '" ++ show (RegExDot.ExecutionOptions.abortTrialRepetitionsOnZeroConsumption Data.Default.def) ++ "'."),
-- G.Option "" ["bypassInputDataForLiberalConsumer"] (setExecutionOption bypassInputDataForLiberalConsumer `G.ReqArg` "Bool") ("Performance:\tbypass reading of the unconsumed input data, if the remaining RegEx can consume a sufficient quantity of anything; default '" ++ show (RegExDot.ExecutionOptions.bypassInputDataForLiberalConsumer Data.Default.def) ++ "'."),
G.Option "" ["catchIncompatibleAnchors"] (setExecutionOption catchIncompatibleAnchors `G.ReqArg` "Bool") ("Performance:\tavoid futile trial solutions, involving repetitions of anchored alternatives, which must consume data; default '" ++ show (RegExDot.ExecutionOptions.catchIncompatibleAnchors Data.Default.def) ++ "'."),
G.Option "" ["checkExistenceOfInelasticTail"] (setExecutionOption checkExistenceOfInelasticTail `G.ReqArg` "Bool") ("Performance:\tif the regex ends in one or more Meta-data, whose repetition-range is precisely defined, check that this sequence exists at the end of the input data; default '" ++ show (RegExDot.ExecutionOptions.checkExistenceOfInelasticTail Data.Default.def) ++ "'."),
G.Option "" ["checkForUnconsumableData"] (setExecutionOption checkForUnconsumableData `G.ReqArg` "Bool") ("Performance:\tcheck whether there's no possibility of consuming some of the input data; default '" ++ show (RegExDot.ExecutionOptions.checkForUnconsumableData Data.Default.def) ++ "'."),
G.Option "" ["moderateGreed"] (setExecutionOption moderateGreed `G.ReqArg` "Bool") ("Performance:\tgreedily consume data, only up to the limit beyond which, future requirements would be compromised; default '" ++ show (RegExDot.ExecutionOptions.moderateGreed Data.Default.def) ++ "'."),
G.Option "" ["permitReorderingOfAlternatives"] (setExecutionOption permitReorderingOfAlternatives `G.ReqArg` "Bool") ("Performance:\tpermit alternatives to be re-ordered, in an attempt to process the cheapest first; default '" ++ show (RegExDot.ExecutionOptions.permitReorderingOfAlternatives Data.Default.def) ++ "'."),
G.Option "" ["preferAlternativesWhichFeedTheGreedy"] (setExecutionOption preferAlternativesWhichFeedTheGreedy `G.ReqArg` "Bool") ("Preference:\tprefer solutions in which the choice of alternatives directs data from non-greedy to greedy consumers; default '" ++ show (RegExDot.ExecutionOptions.preferAlternativesWhichFeedTheGreedy Data.Default.def) ++ "'."),
G.Option "" ["preferAlternativesWhichMimickUnrolling"] (setExecutionOption preferAlternativesWhichMimickUnrolling `G.ReqArg` "Bool") ("Preference:\tprefer solutions in which the choice of alternatives consumes data like the unrolled repeatable group; default '" ++ show (RegExDot.ExecutionOptions.preferAlternativesWhichMimickUnrolling Data.Default.def) ++ "'."),
G.Option "" ["preferFewerRepeatedAlternatives"] (setExecutionOption preferFewerRepeatedAlternatives `G.ReqArg` "Bool") ("Preference:\tprefer solutions employing fewer repetitions of alternatives, to discourage the capture of null lists; default '" ++ show (RegExDot.ExecutionOptions.preferFewerRepeatedAlternatives Data.Default.def) ++ "'."),
G.Option "" ["unrollRepeatedSingletonAlternative"] (setExecutionOption unrollRepeatedSingletonAlternative `G.ReqArg` "Bool") ("Performance:\tunroll repetitions of singleton alternatives; this doesn't affect the result; default '" ++ show (RegExDot.ExecutionOptions.unrollRepeatedSingletonAlternative Data.Default.def) ++ "'."),
G.Option "" ["useFirstMatchAmongAlternatives"] (setExecutionOption useFirstMatchAmongAlternatives `G.ReqArg` "Bool") ("Performance:\trather than performing an exhaustive search for the optimal choice amongst alternatives, merely select the first that matches; conform to Perl rather than Posix; default '" ++ show (RegExDot.ExecutionOptions.useFirstMatchAmongAlternatives Data.Default.def) ++ "'."),
G.Option "" ["validateMinConsumptionOfAlternatives"] (setExecutionOption validateMinConsumptionOfAlternatives `G.ReqArg` "Bool") ("Performance:\twhen the number of repetitions of a group of alternatives is precisely specified, check the availability of the resulting minimum data-requirement; default '" ++ show (RegExDot.ExecutionOptions.validateMinConsumptionOfAlternatives Data.Default.def) ++ "'."),
G.Option "" ["complyStrictlyWithPosix"] (setCompilationOption complyStrictlyWithPosix `G.ReqArg` "Bool") ("Output:\tdefine the offset of captured data, corresponding to a sub-expression which matched zero times, as the artificial value -1 specified by Posix; currently only affects results of 'assert'; default '" ++ show (RegExDot.CompilationOptions.complyStrictlyWithPosix Data.Default.def) ++ "'.")
] where
printVersion, printUsage :: IO CommandOptions.CommandOptions
printVersion = System.IO.hPutStrLn System.IO.stderr (
showString progName . showChar '-' . showsVersion Paths.version . showString "\n\nCompiled by " . showString System.Info.compilerName . showChar '-' . showsVersion System.Info.compilerVersion . showString ".\n\nCopyright (C) 2010-2017 " . showString author . showString ".\nThis program comes with ABSOLUTELY NO WARRANTY.\nThis is free software, and you are welcome to redistribute it under certain conditions.\n\nWritten by " $ showString author "."
) >> System.Exit.exitSuccess where
author :: String
author = "Dr. Alistair Ward"
showsVersion :: Data.Version.Version -> ShowS
showsVersion = foldr (.) id . Data.List.intersperse (showChar '.') . map shows . Data.Version.versionBranch
printUsage = System.IO.hPutStrLn System.IO.stderr usageMessage >> System.Exit.exitSuccess
printCurrentSettings :: CommandLineAction
printCurrentSettings commandOptions = print commandOptions >> System.Exit.exitSuccess
extendedRegExCharPerformance, readExtendedRegExCharFromFile, assertFromFile, assertPosixFromFile, setExtendedRegExChar :: String -> CommandLineAction
extendedRegExCharPerformance s commandOptions = (
uncurry $ Test.Performance.ExtendedRegEx.run (CommandOptions.executionOptions commandOptions)
) (readCommandArg s) >> System.Exit.exitSuccess
readExtendedRegExCharFromFile s commandOptions = ((head . dropWhile null . lines) `fmap` readFile s) >>= (`setExtendedRegExChar` commandOptions)
assertFromFile fileName commandOptions = do
failedTests <- Test.Assert.RegExOptsChar.findFailures (CommandOptions.compilationOptions commandOptions) (CommandOptions.executionOptions commandOptions) `fmap` Test.Assert.RegExOptsChar.readTests fileName
if null failedTests
then System.Exit.exitSuccess
else do
mapM_ print failedTests
System.Exit.exitWith $ System.Exit.ExitFailure 2
assertPosixFromFile fileName _ = do
failedTests <- Test.Assert.RegExOptsChar.findFailuresPosix `fmap` Test.Assert.RegExOptsChar.readTests fileName
if null failedTests
then System.Exit.exitSuccess
else do
mapM_ print failedTests
System.Exit.exitWith $ System.Exit.ExitFailure 2
setExtendedRegExChar s commandOptions = case CommandOptions.extendedRegExChar commandOptions of
Just extendedRegExChar -> error $ "The regex has already been defined as /" ++ show extendedRegExChar ++ "/."
_ -> return {-to IO-monad-} commandOptions { CommandOptions.extendedRegExChar = Just $ readCommandArg s }
abortTrialRepetitionsOnInherentFailure, {-abortTrialRepetitionsOnZeroConsumption, bypassInputDataForLiberalConsumer,-} catchIncompatibleAnchors, checkExistenceOfInelasticTail, checkForUnconsumableData, moderateGreed, permitReorderingOfAlternatives, preferAlternativesWhichFeedTheGreedy, preferAlternativesWhichMimickUnrolling, preferFewerRepeatedAlternatives, unrollRepeatedSingletonAlternative, useFirstMatchAmongAlternatives, validateMinConsumptionOfAlternatives :: CommandOptions.ExecutionOptionsMutator
abortTrialRepetitionsOnInherentFailure s e = e { RegExDot.ExecutionOptions.abortTrialRepetitionsOnInherentFailure = readCommandArg s }
-- abortTrialRepetitionsOnZeroConsumption s e = e { RegExDot.ExecutionOptions.abortTrialRepetitionsOnZeroConsumption = readCommandArg s }
-- bypassInputDataForLiberalConsumer s e = e { RegExDot.ExecutionOptions.bypassInputDataForLiberalConsumer = readCommandArg s }
catchIncompatibleAnchors s e = e { RegExDot.ExecutionOptions.catchIncompatibleAnchors = readCommandArg s }
checkExistenceOfInelasticTail s e = e { RegExDot.ExecutionOptions.checkExistenceOfInelasticTail = readCommandArg s }
checkForUnconsumableData s e = e { RegExDot.ExecutionOptions.checkForUnconsumableData = readCommandArg s }
moderateGreed s e = e { RegExDot.ExecutionOptions.moderateGreed = readCommandArg s }
permitReorderingOfAlternatives s e = e { RegExDot.ExecutionOptions.permitReorderingOfAlternatives = readCommandArg s }
preferAlternativesWhichFeedTheGreedy s e = e { RegExDot.ExecutionOptions.preferAlternativesWhichFeedTheGreedy = readCommandArg s }
preferAlternativesWhichMimickUnrolling s e = e { RegExDot.ExecutionOptions.preferAlternativesWhichMimickUnrolling = readCommandArg s }
preferFewerRepeatedAlternatives s e = e { RegExDot.ExecutionOptions.preferFewerRepeatedAlternatives = readCommandArg s }
unrollRepeatedSingletonAlternative s e = e { RegExDot.ExecutionOptions.unrollRepeatedSingletonAlternative = readCommandArg s }
useFirstMatchAmongAlternatives s e = e { RegExDot.ExecutionOptions.useFirstMatchAmongAlternatives = readCommandArg s }
validateMinConsumptionOfAlternatives s e = e { RegExDot.ExecutionOptions.validateMinConsumptionOfAlternatives = readCommandArg s }
complyStrictlyWithPosix :: CommandOptions.CompilationOptionsMutator
complyStrictlyWithPosix s e = e { RegExDot.CompilationOptions.complyStrictlyWithPosix = readCommandArg s }
setCompilationOption :: CommandOptions.CompilationOptionsMutator -> String -> CommandLineAction
setCompilationOption compilationOptionsMutator s = return {-to IO-monad-} . CommandOptions.setCompilationOption compilationOptionsMutator s
setExecutionOption :: CommandOptions.ExecutionOptionsMutator -> String -> CommandLineAction
setExecutionOption executionOptionsMutator s = return {-to IO-monad-} . CommandOptions.setExecutionOption executionOptionsMutator s
args <- System.Environment.getArgs
-- G.getOpt :: G.ArgOrder CommandLineAction -> [G.OptDescr Action] -> [String] -> ([Action], [String], [String])
case G.getOpt G.RequireOrder optDescrList args of
(commandLineActions, nonOptions, []) -> do
commandOptions <- Data.List.foldl' (>>=) (return {-to IO-monad-} Data.Default.def) commandLineActions -- Sequentially transform the 'CommandOptions', using 'CommandLineAction's corresponding to the specified command-line flags.
Grep.grep commandOptions nonOptions -- Perform the requested match, against the list of input-data files.
(_, _, errors) -> System.IO.Error.ioError . System.IO.Error.userError $ concat errors ++ usageMessage -- Throw.