module Smap.Flags
  ( Hdl(Std, File)
  , SetDescriptor(Keyed, UnKeyed)
  , Command(Union, Subtract, Intersect, Xor)
  , Accuracy(Approximate, Exact)
  , command
  )
where

import qualified Data.Attoparsec.Text as A
import qualified Data.Text as Text
import qualified Options.Applicative as O
import Control.Applicative (many, (<|>))
import Crypto.MAC.SipHash (SipKey(..))

data Hdl = Std -- stdin/stdout
         | File FilePath

data SetDescriptor = Keyed Hdl Hdl -- Keys are in the first file, values in the second (map behavior)
                   | UnKeyed Hdl -- key = value (set behavior)

data Accuracy = Approximate SipKey -- Don't keep the actual value as a key, just its hash
              | Exact -- Keep the actual value as a key

data Command = Union Accuracy [SetDescriptor] Hdl
             | Subtract Accuracy SetDescriptor [SetDescriptor] Hdl
             | Intersect Accuracy [SetDescriptor] Hdl
             | Xor Accuracy [SetDescriptor] Hdl

hdl :: A.Parser Hdl
hdl = stdin <|> path
 where
  stdin = Std <$ A.char '-'
  path  = File . Text.unpack <$> A.takeWhile (/= ',')

setExplanation :: String
setExplanation = "Use '-' for stdin. Use +keyfile,valfile for separate keys and values."

multiSetExplanation :: String
multiSetExplanation = "Can specify 0 or more files. " ++ setExplanation

outHdl :: O.Parser Hdl
outHdl = O.option
  (aToO hdl)
  (O.metavar "OUTFILE" <> O.short 'o' <> O.long "out" <> O.value Std <> O.help "Defaults to stdout."
  )

inFiles :: O.Parser [SetDescriptor]
inFiles = many $ O.argument (aToO setDescriptor) (O.metavar "FILE*" <> O.help multiSetExplanation)

setDescriptor :: A.Parser SetDescriptor
setDescriptor =
  (keyed <|> unkeyed)
    <*    (A.endOfInput A.<?> "more filepath characters than expected")
    A.<?> "Could not parse filepath"
 where
  unkeyed = UnKeyed <$> hdl
  keyed   = do
    _ <- A.char '+'
    k <- hdl
    _ <- A.char ','
    v <- hdl
    return (Keyed k v)

aToO :: A.Parser a -> O.ReadM a
aToO p = O.eitherReader (A.parseOnly p . Text.pack)

accuracy :: O.Parser Accuracy
accuracy = approx <|> exact
 where
  approx = O.flag'
    (Approximate (SipKey 0 0))
    (O.short 'a' <> O.long "approximate" <> O.help
      (concat
        [ "For deduplication, store a 64-bit siphash rather than the whole line. "
        , "Can save memory, depending on the set operation"
        ]
      )
    )
  exact = pure Exact

catCommand :: O.Mod O.CommandFields Command
catCommand = O.command "cat" $ O.info
  (value O.<**> O.helper)
  (O.fullDesc <> O.progDesc "Set union. Output everything from the input sets, deduplicated.")
  where value = Union <$> accuracy <*> inFiles <*> outHdl

subCommand :: O.Mod O.CommandFields Command
subCommand = O.command "sub" $ O.info
  (value O.<**> O.helper)
  (O.fullDesc <> O.progDesc
    (concat
      [ "Set subtraction. "
      , "Given stream A and sets B,C,..., output values in A but not in B,C,... "
      , "Does not deduplicate values from A."
      ]
    )
  )
 where
  value = Subtract <$> accuracy <*> plus <*> minuses <*> outHdl
  plus  = O.argument (aToO setDescriptor) (O.metavar "PLUSFILE" <> O.help setExplanation)
  minuses =
    many $ O.argument (aToO setDescriptor) (O.metavar "MINUSFILE*" <> O.help multiSetExplanation)

intersectCommand :: O.Mod O.CommandFields Command
intersectCommand = O.command "int" $ O.info
  (value O.<**> O.helper)
  (O.fullDesc <> O.progDesc
    "Set intersection. Output values which are in all input sets. Deduplicates values."
  )
  where value = Intersect <$> accuracy <*> inFiles <*> outHdl

xorCommand :: O.Mod O.CommandFields Command
xorCommand = O.command "xor" $ O.info
  (value O.<**> O.helper)
  (O.fullDesc <> O.progDesc
    (concat
      [ "Set xor/symmetric difference. "
      , "Given files A and B, output all values which are A or B but not both. "
      , "Given more than 2 files, output everything that appears in an odd number of files."
      ]
    )
  )
  where value = Xor <$> accuracy <*> inFiles <*> outHdl

command :: IO Command
command = O.execParser
  (O.info
    ((O.subparser (catCommand <> subCommand <> intersectCommand <> xorCommand)) O.<**> O.helper)
    (O.fullDesc <> O.progDesc "Use -h for help. Available commands: cat, sub, int, xor.")
  )