{-# LANGUAGE CPP, DataKinds, KindSignatures, MagicHash,
             ScopedTypeVariables, TemplateHaskell, TypeFamilies,
             ViewPatterns #-}
-- | Support for representing so-called categorical variables: a
-- (usually small) finite set of textual values. We map these onto
-- regular Haskell data types and offer help to generate useful type
-- class instances for such types.
module Frames.Categorical where
import Control.Applicative (ZipList(..))
import Control.DeepSeq (NFData(..))
import Control.Monad (MonadPlus(mzero))
import Data.Char (toUpper)
import Data.Readable (Readable(..))
import Data.Set (Set)
import qualified Data.Set as S
import Data.String (IsString(..))
import Data.Text (Text)
import qualified Data.Text as T
import Data.Typeable (Typeable)
import Data.Vector.Unboxed.Deriving
import Data.Vinyl.Functor (Const(..))
import Data.Word
import qualified Data.Vector.Unboxed as VU
import Frames.ColumnTypeable
import Frames.InCore (VectorFor)
import Frames.ShowCSV
import Frames.Utils
import GHC.Exts (Proxy#, proxy#)
import GHC.TypeNats
import Language.Haskell.TH

-- | A categorical variable can take on one of a finite number of
-- textual names. Any value of type @Categorical n@ has no more than
-- @n@ variants.
newtype Categorical (n :: Nat) = Categorical { forall (n :: Nat). Categorical n -> Set Text
categories :: Set Text }
  deriving (Categorical n -> Categorical n -> Bool
forall (n :: Nat). Categorical n -> Categorical n -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Categorical n -> Categorical n -> Bool
$c/= :: forall (n :: Nat). Categorical n -> Categorical n -> Bool
== :: Categorical n -> Categorical n -> Bool
$c== :: forall (n :: Nat). Categorical n -> Categorical n -> Bool
Eq, Int -> Categorical n -> ShowS
forall (n :: Nat). Int -> Categorical n -> ShowS
forall (n :: Nat). [Categorical n] -> ShowS
forall (n :: Nat). Categorical n -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Categorical n] -> ShowS
$cshowList :: forall (n :: Nat). [Categorical n] -> ShowS
show :: Categorical n -> String
$cshow :: forall (n :: Nat). Categorical n -> String
showsPrec :: Int -> Categorical n -> ShowS
$cshowsPrec :: forall (n :: Nat). Int -> Categorical n -> ShowS
Show, Typeable)

-- | Ensure the first character of a 'String' is uppercase.
cap :: String -> String
cap :: ShowS
cap [] = []
cap (Char
c : String
cs) = Char -> Char
toUpper Char
c forall a. a -> [a] -> [a]
: String
cs

-- | Helper for working with 'derivingUnbox'. Takes the name of the
-- type and the number of variants in the sum type in order to
-- determine a compact representation.
unboxDecls :: String -> Int -> DecsQ
unboxDecls :: String -> Int -> DecsQ
unboxDecls String
name Int
numVariants =
  String -> TypeQ -> ExpQ -> ExpQ -> DecsQ
derivingUnbox String
name
                [t|() => $(conT (mkName name)) -> $(conT repTy)|]
                [|fromIntegral . fromEnum|]
                [|toEnum . fromIntegral|]
  where repTy :: Name
repTy
          | Int
numVariants forall a. Ord a => a -> a -> Bool
< Int
2forall a b. (Num a, Integral b) => a -> b -> a
^(Int
8 :: Int) = ''Word8
          | Int
numVariants forall a. Ord a => a -> a -> Bool
< Int
2forall a b. (Num a, Integral b) => a -> b -> a
^(Int
16 :: Int) = ''Word16
          | Int
numVariants forall a. Ord a => a -> a -> Bool
< Int
2forall a b. (Num a, Integral b) => a -> b -> a
^(Int
32 :: Int) = ''Word32
          | Bool
otherwise = ''Word64

-- | Generate a splice with data type declaration and associated
-- instances for type suitable for representing a categorical
-- variable. This is a type that maps between a finite set of textual
-- names and Haskell data constructors. Usage: @declareCategorical
-- typeName optionalConPrefix variantNames@ will produce a data type
-- with name @typeName@ and data constructors whose names are a
-- concatenation of @optionalConPrefix@ and each element of
-- @variantNames@.
declareCategorical :: String -> Maybe String -> [String] -> Q [Dec]
declareCategorical :: String -> Maybe String -> [String] -> DecsQ
declareCategorical (ShowS
cap -> String
name) (forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ShowS
cap -> Maybe String
prefix) [String]
variants =
  ([ Dec
dataDecl, Dec
iIsString, Dec
iReadable, Dec
iParseable
   , Dec
iShowCSV, Dec
iVectorFor, Dec
iNFData ] forall a. [a] -> [a] -> [a]
++)
  forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> String -> Int -> DecsQ
unboxDecls String
name (forall (t :: * -> *) a. Foldable t => t a -> Int
length [String]
variants)
  where variantCons :: [Name]
variantCons = forall a b. (a -> b) -> [a] -> [b]
map (String -> Name
mkName forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
T.unpack forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text
sanitizeTypeName forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Text
T.pack forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall b a. b -> (a -> b) -> Maybe a -> b
maybe forall a. a -> a
id forall a. [a] -> [a] -> [a]
(++) Maybe String
prefix forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShowS
cap) [String]
variants
        onVariants :: (String -> Name -> a) -> [a]
        onVariants :: forall a. (String -> Name -> a) -> [a]
onVariants String -> Name -> a
f =
          forall a. ZipList a -> [a]
getZipList (String -> Name -> a
f forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall a. [a] -> ZipList a
ZipList [String]
variants forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall a. [a] -> ZipList a
ZipList [Name]
variantCons)
        nameName :: Name
nameName = String -> Name
mkName forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
T.unpack forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text
sanitizeTypeName forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Text
T.pack forall a b. (a -> b) -> a -> b
$ String
name
        fromStringClause :: String -> Name -> Clause
fromStringClause String
variant Name
variantCon =
          [Pat] -> Body -> [Dec] -> Clause
Clause [Lit -> Pat
LitP (String -> Lit
StringL String
variant)] (Exp -> Body
NormalB (Name -> Exp
ConE Name
variantCon)) []
        showCSVClause :: String -> Name -> Clause
showCSVClause String
variant Name
variantCon =
#if MIN_VERSION_template_haskell(2,18,0)
          [Pat] -> Body -> [Dec] -> Clause
Clause [Name -> [Type] -> [Pat] -> Pat
ConP Name
variantCon [] []]
#else
          Clause [ConP variantCon []]
#endif
                 (Exp -> Body
NormalB (Exp -> Exp -> Exp
AppE (Name -> Exp
VarE 'T.pack) (Lit -> Exp
LitE (String -> Lit
StringL String
variant))))
                 []
        readableGuarded :: Name -> String -> Name -> (Guard, Exp)
        readableGuarded :: Name -> String -> Name -> (Guard, Exp)
readableGuarded Name
argName String
variant Name
variantCon =
          ( Exp -> Guard
NormalG (Maybe Exp -> Exp -> Maybe Exp -> Exp
InfixE (forall a. a -> Maybe a
Just (Name -> Exp
VarE Name
argName))
                    (Name -> Exp
VarE '(==))
                    (forall a. a -> Maybe a
Just (Exp -> Exp -> Exp
AppE (Name -> Exp
VarE 'T.pack) (Lit -> Exp
LitE (String -> Lit
StringL String
variant)))))
          , Exp -> Exp -> Exp
AppE (Name -> Exp
VarE 'return ) (Name -> Exp
ConE Name
variantCon) )
        dataDecl :: Dec
dataDecl = [Type]
-> Name
-> [TyVarBndr ()]
-> Maybe Type
-> [Con]
-> [DerivClause]
-> Dec
DataD [] Name
nameName [] forall a. Maybe a
Nothing
                         (forall a b. (a -> b) -> [a] -> [b]
map (forall a b c. (a -> b -> c) -> b -> a -> c
flip Name -> [BangType] -> Con
NormalC []) [Name]
variantCons)
                         [Maybe DerivStrategy -> [Type] -> DerivClause
DerivClause forall a. Maybe a
Nothing [ Name -> Type
ConT ''Eq
                                              , Name -> Type
ConT ''Enum
                                              , Name -> Type
ConT ''Bounded
                                              , Name -> Type
ConT ''Ord
                                              , Name -> Type
ConT ''Show ]]
        iIsString :: Dec
iIsString =
          Maybe Overlap -> [Type] -> Type -> [Dec] -> Dec
InstanceD forall a. Maybe a
Nothing [] (Type -> Type -> Type
AppT (Name -> Type
ConT ''IsString) (Name -> Type
ConT Name
nameName))
                    [Name -> [Clause] -> Dec
FunD 'fromString
                          (forall a. (String -> Name -> a) -> [a]
onVariants String -> Name -> Clause
fromStringClause)]
        iReadable :: Dec
iReadable =
          let argName :: Name
argName = String -> Name
mkName String
"t"
              clauses :: [(Guard, Exp)]
clauses = forall a. (String -> Name -> a) -> [a]
onVariants (Name -> String -> Name -> (Guard, Exp)
readableGuarded Name
argName)
              clausesTotal :: [(Guard, Exp)]
clausesTotal = [(Guard, Exp)]
clauses forall a. [a] -> [a] -> [a]
++ [(Exp -> Guard
NormalG (Name -> Exp
ConE 'True), Name -> Exp
VarE 'mzero)]
          in Maybe Overlap -> [Type] -> Type -> [Dec] -> Dec
InstanceD forall a. Maybe a
Nothing [] (Type -> Type -> Type
AppT (Name -> Type
ConT ''Readable) (Name -> Type
ConT Name
nameName))
                       [Name -> [Clause] -> Dec
FunD 'fromText
                             [[Pat] -> Body -> [Dec] -> Clause
Clause [Name -> Pat
VarP Name
argName] ([(Guard, Exp)] -> Body
GuardedB [(Guard, Exp)]
clausesTotal) []]]
        iParseable :: Dec
iParseable =
          Maybe Overlap -> [Type] -> Type -> [Dec] -> Dec
InstanceD forall a. Maybe a
Nothing [] (Type -> Type -> Type
AppT (Name -> Type
ConT ''Parseable) (Name -> Type
ConT Name
nameName)) []
        iShowCSV :: Dec
iShowCSV =
          Maybe Overlap -> [Type] -> Type -> [Dec] -> Dec
InstanceD forall a. Maybe a
Nothing [] (Type -> Type -> Type
AppT (Name -> Type
ConT ''ShowCSV) (Name -> Type
ConT Name
nameName))
                    [Name -> [Clause] -> Dec
FunD 'showCSV (forall a. (String -> Name -> a) -> [a]
onVariants String -> Name -> Clause
showCSVClause)]
        iVectorFor :: Dec
iVectorFor =
#if __GLASGOW_HASKELL__ >= 808
          TySynEqn -> Dec
TySynInstD (Maybe [TyVarBndr ()] -> Type -> Type -> TySynEqn
TySynEqn forall a. Maybe a
Nothing (Type -> Type -> Type
AppT (Name -> Type
ConT ''VectorFor) (Name -> Type
ConT Name
nameName)) (Name -> Type
ConT ''VU.Vector))
#else
          TySynInstD ''VectorFor (TySynEqn [ConT nameName] (ConT ''VU.Vector))
#endif
        iNFData :: Dec
iNFData =
          let argName :: Name
argName = String -> Name
mkName String
"x"
          in Maybe Overlap -> [Type] -> Type -> [Dec] -> Dec
InstanceD forall a. Maybe a
Nothing [] (Type -> Type -> Type
AppT (Name -> Type
ConT ''NFData) (Name -> Type
ConT Name
nameName))
                       [Name -> [Clause] -> Dec
FunD 'rnf [[Pat] -> Body -> [Dec] -> Clause
Clause [Name -> Pat
VarP Name
argName]
                                  (Exp -> Body
NormalB
                                   (Exp -> Exp -> Exp
AppE (Exp -> Exp -> Exp
AppE (Name -> Exp
VarE 'seq) (Name -> Exp
VarE Name
argName))
                                         ([Maybe Exp] -> Exp
TupE [])))
                                  []]]

instance KnownNat n => Parseable (Categorical n) where
  parse :: forall (m :: * -> *).
MonadPlus m =>
Text -> m (Parsed (Categorical n))
parse Text
txt = forall (m :: * -> *) a. Monad m => a -> m a
return (forall a. a -> Parsed a
Possibly (forall (n :: Nat). Set Text -> Categorical n
Categorical (forall a. a -> Set a
S.singleton Text
txt)))
  parseCombine :: forall (m :: * -> *).
MonadPlus m =>
Parsed (Categorical n)
-> Parsed (Categorical n) -> m (Parsed (Categorical n))
parseCombine Parsed (Categorical n)
p1 Parsed (Categorical n)
p2
    | forall a. Set a -> Int
S.size Set Text
catCombined forall a. Ord a => a -> a -> Bool
<= Int
maxVariants =
      forall (m :: * -> *) a. Monad m => a -> m a
return (forall a. a -> Parsed a
Possibly (forall (n :: Nat). Set Text -> Categorical n
Categorical Set Text
catCombined))
    | Bool
otherwise = forall (m :: * -> *) a. MonadPlus m => m a
mzero
    where getCats :: Parsed (Categorical n) -> Set Text
getCats = forall (n :: Nat). Categorical n -> Set Text
categories forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Parsed a -> a
parsedValue
          catCombined :: Set Text
catCombined = forall a. Ord a => Set a -> Set a -> Set a
S.union (forall {n :: Nat}. Parsed (Categorical n) -> Set Text
getCats Parsed (Categorical n)
p1) (forall {n :: Nat}. Parsed (Categorical n) -> Set Text
getCats Parsed (Categorical n)
p2)
          maxVariants :: Int
          maxVariants :: Int
maxVariants = forall a b. (Integral a, Num b) => a -> b
fromIntegral (forall a. Integral a => a -> Integer
toInteger (forall (n :: Nat). KnownNat n => Proxy# n -> Nat
natVal' (forall {k} (a :: k). Proxy# a
proxy# :: Proxy# n)))
  representableAsType :: Parsed (Categorical n)
-> Const (Either (String -> DecsQ) Type) (Categorical n)
representableAsType (forall a. Set a -> [a]
S.toList forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (n :: Nat). Categorical n -> Set Text
categories forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Parsed a -> a
parsedValue -> [Text]
cats) =
    forall k a (b :: k). a -> Const a b
Const (forall a b. a -> Either a b
Left (\String
n -> String -> Maybe String -> [String] -> DecsQ
declareCategorical String
n (forall a. a -> Maybe a
Just String
n) (forall a b. (a -> b) -> [a] -> [b]
map Text -> String
T.unpack [Text]
cats)))