{-# LANGUAGE GADTs #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE LambdaCase #-} {-# OPTIONS_GHC -fno-warn-unused-imports #-} module Text.Enum.Text ( EnumText(..) , UsingEnumText(..) , TextParsable(..) , EnumTextConfig(..) , defaultEnumTextConfig ) where import Control.Monad (void) import Control.Monad.Fail as CMF import Data.Array import qualified Data.Attoparsec.Text as AP import qualified Data.ByteString.Char8 as B import Data.Coerce import Data.Hashable import Data.Possibly import Data.Scientific (toBoundedInteger) import qualified Data.HashMap.Strict as HM import qualified Data.Text as T import qualified Data.Text.Encoding as TE import Data.Text.Read import Data.Time import Fmt import Text.Read {- | Our toolkit for enumerated types which should be defined as follows: @ import Fmt import Text.Enum.Text data Foo = FOO_bar | FOO_bar_baz deriving (Bounded,Enum,Eq,Ord,Show) instance EnumText Foo instance Buildable Foo where build = buildEnumText instance TextParsable Foo where parseText = parseEnumText @ With the @DeriveAnyClass@ language extension you can list @EnumText@ in the @deriving@ clause, and with @DerivingVia@ (available from GHC 8.6.1) you can derive @via@ @UsingEnumText@ as follows: @ {\-\# LANGUAGE DeriveAnyClass #-\} {\-\# LANGUAGE DerivingVia #-\} import Fmt import Text.Enum.Text data Foo = FOO_bar | FOO_bar_baz deriving (Bounded,Enum,EnumText,Eq,Ord,Show) deriving (Buildable,TextParsable) via UsingEnumText Foo @ -} class ( Buildable e , Bounded e , Enum e , Eq e , Ord e , Show e , TextParsable e ) => EnumText e where -- | Configures the textual representation of @e@ generated by renderEnumText. configEnumText :: e -> EnumTextConfig configEnumText _ = defaultEnumTextConfig -- | Generate the standard textual representation according to -- 'configEnumText' by default. renderEnumText :: e -> T.Text renderEnumText e = enumTextArray ! I e -- | Sames as 'renderEnumText', but generating a 'Builder'. buildEnumText :: e -> Builder buildEnumText = build . renderEnumText -- | Parses an @e@ according to the 'renderEnumText' render. parseEnumText :: T.Text -> Possibly e parseEnumText txt = maybe (Left msg) Right $ HM.lookup txt hashmap_t where msg = "parseEnumText: enumeration not recognised: "++show txt -- | A cassava field encoder, using 'the renderEnumText' format. toFieldEnumText :: e -> B.ByteString toFieldEnumText e = enumByteStringArray ! I e -- | A cassava field parser using the 'renderEnumText' format. fromFieldEnumText_ :: MonadFail m => B.ByteString -> m e fromFieldEnumText_ bs = maybe (CMF.fail msg) return $ HM.lookup bs hashmap_b where msg = "fromFieldEnumText_: enumeration not recognised: "++show bs -- | For hashing @e@ with the 'renderEnumText' representation. hashWithSaltEnumText :: Int -> e -> Int hashWithSaltEnumText n = hashWithSalt n . toFieldEnumText newtype UsingEnumText a = UsingEnumText { _UsingEnumText :: a } instance EnumText a => Buildable (UsingEnumText a) where build (UsingEnumText x) = buildEnumText x instance EnumText a => TextParsable (UsingEnumText a) where parseText x = UsingEnumText <$> parseEnumText x ------------------------------------------------------------------------------- -- EnumTextConfig, defaultEnumTextConfig ------------------------------------------------------------------------------- -- | Configures the default implementation of 'renderEnumText' data EnumTextConfig = EnumTextConfig { _etc_text_prep :: T.Text -> T.Text -- ^ applied to the output of 'show' -- once converted to 'T.Text'; by -- default strips each data -- constructor up to and including -- the first '_' , _etc_char_prep :: Char -> Char -- ^ applied to each character of -- the outpout of '_etc_text_prep' -- (by default flips underscores (@_@) -- to dashes (@-@) } -- | The default 'configEnumText' for 'EnumText': -- -- * '_etc_text_prep' removes the prefix up to and including the first -- underscore ('_') -- * '_etc_char_prep' flips the underscores (@_@) to dashes (@-@) defaultEnumTextConfig :: EnumTextConfig defaultEnumTextConfig = EnumTextConfig { _etc_text_prep = defaultTextPrep , _etc_char_prep = defaultCharPrep } defaultTextPrep :: T.Text -> T.Text defaultTextPrep txt = case T.uncons $ T.dropWhile (/='_') txt of Just (_,rst) | not $ T.null rst -> rst _ -> error $ "defaultTextPrep: bad data constructor: "++T.unpack txt defaultCharPrep :: Char -> Char defaultCharPrep c = case c of '_' -> '-' _ -> c ------------------------------------------------------------------------------- -- TextParsable ------------------------------------------------------------------------------- -- | a class for 'T.Text' parsers. class TextParsable a where parseText :: T.Text -> Possibly a instance TextParsable T.Text where parseText = return -- | Robust 'TextParsable' instance for 'UTCTime'. -- -- Examples: -- -- >>> show <$> parseText @UTCTime (T.pack "2020-01-01 10:12:30") -- Right "2020-01-01 10:12:30 UTC" -- -- >>> show <$> parseText @UTCTime (T.pack "2020-01-01T10:12:30") -- Right "2020-01-01 10:12:30 UTC" -- -- >>> show <$> parseText @UTCTime (T.pack "2020-01-01T10:12:30Z") -- Right "2020-01-01 10:12:30 UTC" -- instance TextParsable UTCTime where parseText = parseTextUsing parseUTC "UTCTime" instance TextParsable Day where parseText = parseTextRead "Day" instance TextParsable Int where parseText = parseDecimal instance a ~ Char => TextParsable [a] where parseText = return . T.unpack instance TextParsable a => TextParsable (Maybe a) where parseText = \case "" -> Right Nothing s -> Just <$> parseText s ------------------------------------------------------------------------------- -- arrays ------------------------------------------------------------------------------- newtype I a = I { _I :: a } deriving (Eq,Ord) instance EnumText e => Ix (I e) where range (l,h) = coerce [_I l.._I h] index (l,_) x = fromEnum (_I x) - fromEnum (_I l) inRange (l,h) x = _I l <= _I x && _I x <= _I h -- | array of texts constructed with 'configEnumText' enumTextArray :: forall e . EnumText e => Array (I e) T.Text enumTextArray = listArray (I minBound,I maxBound) [ T.map _etc_char_prep $ _etc_text_prep $ T.pack $ show e | e <- [minBound..maxBound :: e] ] where EnumTextConfig{..} = configEnumText (minBound :: e) -- | array of 'B.ByteString' generated from 'renderEnumText' enumByteStringArray :: forall e . EnumText e => Array (I e) B.ByteString enumByteStringArray = listArray (I minBound,I maxBound) [ TE.encodeUtf8 $ renderEnumText e | e <- [minBound..maxBound :: e] ] ------------------------------------------------------------------------------- -- hashmaps ------------------------------------------------------------------------------- -- | 'T.Text' 'HM.HashMap' based on 'renderEnumText' representation hashmap_t :: EnumText e => HM.HashMap T.Text e hashmap_t = HM.fromList [ (renderEnumText c,c) | c <- [minBound..maxBound] ] -- | 'B.ByteString' 'HM.HashMap' based on 'renderEnumText' representation hashmap_b :: EnumText e => HM.HashMap B.ByteString e hashmap_b = HM.fromList [ (TE.encodeUtf8 $ renderEnumText c,c) | c <- [minBound..maxBound] ] ------------------------------------------------------------------------------- -- internal parsers ------------------------------------------------------------------------------- -- | parse a decimal integer using the "Text.Read" toolkit parseDecimal :: T.Text -> Possibly Int parseDecimal txt = either (Left . typeError "integer") return $ do (x,r) <- signed decimal txt case T.null r of True -> return x False -> Left $ "residual input: " ++ T.unpack r -- | Convert a 'Read' parser into a 'TextParsable' -- -- Examples: -- -- >>> parseTextRead @Int "Int" (T.pack "10") -- Right 10 -- -- >>> parseTextRead @Int "Int" (T.pack "a") -- Left "failed to parse Int: \"a\"" -- parseTextRead :: Read a => String -- ^ name of type bing parsed (for failure message) -> T.Text -- ^ 'T.Text' to be parsed -> Possibly a parseTextRead = parseTextUsing (readMaybe . T.unpack) -- | Use the given function to turn the input string into a 'TextParsable' -- -- Examples: -- -- >>> parseTextUsing @Int (readMaybe . T.unpack) "Int" (T.pack "10") -- Right 10 -- -- >>> parseTextUsing @Int (readMaybe . T.unpack) "Int" (T.pack "a") -- Left "failed to parse Int: \"a\"" -- parseTextUsing :: (T.Text -> Maybe a) -> String -- ^ name of type bing parsed (for failure message) -> T.Text -- ^ 'T.Text' to be parsed -> Possibly a parseTextUsing maybeParser ty_s txt = maybe (Left $ typeError ty_s $ show str) Right $ maybeParser txt where str = T.unpack txt typeError :: String -> String -> String typeError ty_s msg = "failed to parse "++ty_s++": "++msg ------------------------------------------------------------------------------- -- support for parsing UTCTime (see enum-text#1) ------------------------------------------------------------------------------- -- The following has been cribbed from the \"Data.API.Time\" module from the -- \"api-tools\" package. -- | Parse text as a 'UTCTime' in ISO 8601 format or a number of slight -- variations thereof (the @T@ may be replaced with a space, and the seconds, -- milliseconds and/or @Z@ timezone indicator may optionally be omitted). -- -- Time zone designations other than @Z@ for UTC are not currently supported. parseUTC :: T.Text -> Maybe UTCTime parseUTC t = case AP.parseOnly (parserUTCTime <* AP.endOfInput) t of Left _ -> Nothing Right r -> Just r parserUTCTime :: AP.Parser UTCTime parserUTCTime = do day <- parserDay void $ AP.skip (\c -> c == ' ' || c == 'T') time <- parserTime mb_offset <- parserTimeZone pure (maybe id addUTCTime mb_offset $ UTCTime day time) -- | Parser for @YYYY-MM-DD@ format. parserDay :: AP.Parser Day parserDay = do y :: Int <- AP.decimal <* AP.char '-' m :: Int <- AP.decimal <* AP.char '-' d :: Int <- AP.decimal case fromGregorianValid (fromIntegral y) m d of Just x -> pure x Nothing -> fail "invalid date" -- | Parser for times in the format @HH:MM@, @HH:MM:SS@ or @HH:MM:SS.QQQ...@. parserTime :: AP.Parser DiffTime parserTime = do h :: Int <- AP.decimal void $ AP.char ':' m :: Int <- AP.decimal c <- AP.peekChar s <- case c of Just ':' -> AP.anyChar *> AP.scientific _ -> pure 0 case toBoundedInteger (10^(12::Int) * (s + fromIntegral (60*(m + 60*h)))) of Just n -> pure (picosecondsToDiffTime (fromIntegral (n :: Int))) Nothing -> fail "seconds out of range" -- | Parser for time zone indications such as @Z@, @ UTC@ or an explicit offset -- like @+HH:MM@ or @-HH@. Returns 'Nothing' for UTC. Local times (without a -- timezone designator) are assumed to be UTC. If there is an explicit offset, -- returns its negation. parserTimeZone :: AP.Parser (Maybe NominalDiffTime) parserTimeZone = do c <- AP.option 'Z' AP.anyChar case c of 'Z' -> pure Nothing ' ' -> "UTC" *> pure Nothing '+' -> parse_offset True '-' -> parse_offset False _ -> fail "unexpected time zone character" where parse_offset pos = do hh :: Int <- read <$> AP.count 2 AP.digit AP.option () (AP.skip (== ':')) mm :: Int <- AP.option 0 (read <$> AP.count 2 AP.digit) let v = (if pos then negate else id) ((hh*60 + mm) * 60) return (Just (fromIntegral v))