module Smap.Flags
( Hdl(Std, File)
, SetDescriptor(Keyed, UnKeyed)
, Command(Union, Subtract, Intersect, Xor)
, Accuracy(Approximate, Exact)
, command
)
where
import qualified Data.Attoparsec.Text as A
import qualified Data.Text as Text
import qualified Options.Applicative as O
import Control.Applicative (many, (<|>))
import Crypto.MAC.SipHash (SipKey(..))
data Hdl = Std
| File FilePath
data SetDescriptor = Keyed Hdl Hdl
| UnKeyed Hdl
data Accuracy = Approximate SipKey
| Exact
data Command = Union Accuracy [SetDescriptor] Hdl
| Subtract Accuracy SetDescriptor [SetDescriptor] Hdl
| Intersect Accuracy [SetDescriptor] Hdl
| Xor Accuracy [SetDescriptor] Hdl
hdl :: A.Parser Hdl
hdl = stdin <|> path
where
stdin = Std <$ A.char '-'
path = File . Text.unpack <$> A.takeWhile (/= ',')
setExplanation :: String
setExplanation = "Use '-' for stdin. Use +keyfile,valfile for separate keys and values."
multiSetExplanation :: String
multiSetExplanation = "Can specify 0 or more files. " ++ setExplanation
outHdl :: O.Parser Hdl
outHdl = O.option
(aToO hdl)
(O.metavar "OUTFILE" <> O.short 'o' <> O.long "out" <> O.value Std <> O.help "Defaults to stdout."
)
inFiles :: O.Parser [SetDescriptor]
inFiles = many $ O.argument (aToO setDescriptor) (O.metavar "FILE*" <> O.help multiSetExplanation)
setDescriptor :: A.Parser SetDescriptor
setDescriptor =
(keyed <|> unkeyed)
<* (A.endOfInput A.<?> "more filepath characters than expected")
A.<?> "Could not parse filepath"
where
unkeyed = UnKeyed <$> hdl
keyed = do
_ <- A.char '+'
k <- hdl
_ <- A.char ','
v <- hdl
return (Keyed k v)
aToO :: A.Parser a -> O.ReadM a
aToO p = O.eitherReader (A.parseOnly p . Text.pack)
accuracy :: O.Parser Accuracy
accuracy = approx <|> exact
where
approx = O.flag'
(Approximate (SipKey 0 0))
(O.short 'a' <> O.long "approximate" <> O.help
(concat
[ "For deduplication, store a 64-bit siphash rather than the whole line. "
, "Can save memory, depending on the set operation"
]
)
)
exact = pure Exact
catCommand :: O.Mod O.CommandFields Command
catCommand = O.command "cat" $ O.info
(value O.<**> O.helper)
(O.fullDesc <> O.progDesc "Set union. Output everything from the input sets, deduplicated.")
where value = Union <$> accuracy <*> inFiles <*> outHdl
subCommand :: O.Mod O.CommandFields Command
subCommand = O.command "sub" $ O.info
(value O.<**> O.helper)
(O.fullDesc <> O.progDesc
(concat
[ "Set subtraction. "
, "Given stream A and sets B,C,..., output values in A but not in B,C,... "
, "Does not deduplicate values from A."
]
)
)
where
value = Subtract <$> accuracy <*> plus <*> minuses <*> outHdl
plus = O.argument (aToO setDescriptor) (O.metavar "PLUSFILE" <> O.help setExplanation)
minuses =
many $ O.argument (aToO setDescriptor) (O.metavar "MINUSFILE*" <> O.help multiSetExplanation)
intersectCommand :: O.Mod O.CommandFields Command
intersectCommand = O.command "int" $ O.info
(value O.<**> O.helper)
(O.fullDesc <> O.progDesc
"Set intersection. Output values which are in all input sets. Deduplicates values."
)
where value = Intersect <$> accuracy <*> inFiles <*> outHdl
xorCommand :: O.Mod O.CommandFields Command
xorCommand = O.command "xor" $ O.info
(value O.<**> O.helper)
(O.fullDesc <> O.progDesc
(concat
[ "Set xor/symmetric difference. "
, "Given files A and B, output all values which are A or B but not both. "
, "Given more than 2 files, output everything that appears in an odd number of files."
]
)
)
where value = Xor <$> accuracy <*> inFiles <*> outHdl
command :: IO Command
command = O.execParser
(O.info
((O.subparser (catCommand <> subCommand <> intersectCommand <> xorCommand)) O.<**> O.helper)
(O.fullDesc <> O.progDesc "Use -h for help. Available commands: cat, sub, int, xor.")
)