{-# LANGUAGE CPP, DataKinds, KindSignatures, MagicHash,
             ScopedTypeVariables, TemplateHaskell, TypeFamilies,
             ViewPatterns #-}
-- | Support for representing so-called categorical variables: a
-- (usually small) finite set of textual values. We map these onto
-- regular Haskell data types and offer help to generate useful type
-- class instances for such types.
module Frames.Categorical where
import Control.Applicative (ZipList(..))
import Control.DeepSeq (NFData(..))
import Control.Monad (MonadPlus(mzero))
import Data.Char (toUpper)
import Data.Readable (Readable(..))
import Data.Set (Set)
import qualified Data.Set as S
import Data.String (IsString(..))
import Data.Text (Text)
import qualified Data.Text as T
import Data.Typeable (Typeable)
import Data.Vector.Unboxed.Deriving
import Data.Vinyl.Functor (Const(..))
import Data.Word
import qualified Data.Vector.Unboxed as VU
import Frames.ColumnTypeable
import Frames.InCore (VectorFor)
import Frames.ShowCSV
import Frames.Utils
import GHC.Exts (Proxy#, proxy#)
import GHC.TypeNats
import Language.Haskell.TH

-- | A categorical variable can take on one of a finite number of
-- textual names. Any value of type @Categorical n@ has no more than
-- @n@ variants.
newtype Categorical (n :: Nat) = Categorical { Categorical n -> Set Text
categories :: Set Text }
  deriving (Categorical n -> Categorical n -> Bool
(Categorical n -> Categorical n -> Bool)
-> (Categorical n -> Categorical n -> Bool) -> Eq (Categorical n)
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
forall (n :: Nat). Categorical n -> Categorical n -> Bool
/= :: Categorical n -> Categorical n -> Bool
$c/= :: forall (n :: Nat). Categorical n -> Categorical n -> Bool
== :: Categorical n -> Categorical n -> Bool
$c== :: forall (n :: Nat). Categorical n -> Categorical n -> Bool
Eq, Int -> Categorical n -> ShowS
[Categorical n] -> ShowS
Categorical n -> String
(Int -> Categorical n -> ShowS)
-> (Categorical n -> String)
-> ([Categorical n] -> ShowS)
-> Show (Categorical n)
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
forall (n :: Nat). Int -> Categorical n -> ShowS
forall (n :: Nat). [Categorical n] -> ShowS
forall (n :: Nat). Categorical n -> String
showList :: [Categorical n] -> ShowS
$cshowList :: forall (n :: Nat). [Categorical n] -> ShowS
show :: Categorical n -> String
$cshow :: forall (n :: Nat). Categorical n -> String
showsPrec :: Int -> Categorical n -> ShowS
$cshowsPrec :: forall (n :: Nat). Int -> Categorical n -> ShowS
Show, Typeable)

-- | Ensure the first character of a 'String' is uppercase.
cap :: String -> String
cap :: ShowS
cap [] = []
cap (Char
c : String
cs) = Char -> Char
toUpper Char
c Char -> ShowS
forall a. a -> [a] -> [a]
: String
cs

-- | Helper for working with 'derivingUnbox'. Takes the name of the
-- type and the number of variants in the sum type in order to
-- determine a compact representation.
unboxDecls :: String -> Int -> DecsQ
unboxDecls :: String -> Int -> DecsQ
unboxDecls String
name Int
numVariants =
  String -> TypeQ -> ExpQ -> ExpQ -> DecsQ
derivingUnbox String
name
                [t|() => $(conT (mkName name)) -> $(conT repTy)|]
                [|fromIntegral . fromEnum|]
                [|toEnum . fromIntegral|]
  where repTy :: Name
repTy
          | Int
numVariants Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
2Int -> Int -> Int
forall a b. (Num a, Integral b) => a -> b -> a
^(Int
8 :: Int) = ''Word8
          | Int
numVariants Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
2Int -> Int -> Int
forall a b. (Num a, Integral b) => a -> b -> a
^(Int
16 :: Int) = ''Word16
          | Int
numVariants Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
2Int -> Int -> Int
forall a b. (Num a, Integral b) => a -> b -> a
^(Int
32 :: Int) = ''Word32
          | Bool
otherwise = ''Word64

-- | Generate a splice with data type declaration and associated
-- instances for type suitable for representing a categorical
-- variable. This is a type that maps between a finite set of textual
-- names and Haskell data constructors. Usage: @declareCategorical
-- typeName optionalConPrefix variantNames@ will produce a data type
-- with name @typeName@ and data constructors whose names are a
-- concatenation of @optionalConPrefix@ and each element of
-- @variantNames@.
declareCategorical :: String -> Maybe String -> [String] -> Q [Dec]
declareCategorical :: String -> Maybe String -> [String] -> DecsQ
declareCategorical (ShowS
cap -> String
name) (ShowS -> Maybe String -> Maybe String
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ShowS
cap -> Maybe String
prefix) [String]
variants =
  ([ Dec
dataDecl, Dec
iIsString, Dec
iReadable, Dec
iParseable
   , Dec
iShowCSV, Dec
iVectorFor, Dec
iNFData ] [Dec] -> [Dec] -> [Dec]
forall a. [a] -> [a] -> [a]
++)
  ([Dec] -> [Dec]) -> DecsQ -> DecsQ
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> String -> Int -> DecsQ
unboxDecls String
name ([String] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [String]
variants)
  where variantCons :: [Name]
variantCons = (String -> Name) -> [String] -> [Name]
forall a b. (a -> b) -> [a] -> [b]
map (String -> Name
mkName (String -> Name) -> ShowS -> String -> Name
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
T.unpack (Text -> String) -> (String -> Text) -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text
sanitizeTypeName (Text -> Text) -> (String -> Text) -> String -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Text
T.pack (String -> Text) -> ShowS -> String -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShowS -> (String -> ShowS) -> Maybe String -> ShowS
forall b a. b -> (a -> b) -> Maybe a -> b
maybe ShowS
forall a. a -> a
id String -> ShowS
forall a. [a] -> [a] -> [a]
(++) Maybe String
prefix ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShowS
cap) [String]
variants
        onVariants :: (String -> Name -> a) -> [a]
        onVariants :: (String -> Name -> a) -> [a]
onVariants String -> Name -> a
f =
          ZipList a -> [a]
forall a. ZipList a -> [a]
getZipList (String -> Name -> a
f (String -> Name -> a) -> ZipList String -> ZipList (Name -> a)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [String] -> ZipList String
forall a. [a] -> ZipList a
ZipList [String]
variants ZipList (Name -> a) -> ZipList Name -> ZipList a
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> [Name] -> ZipList Name
forall a. [a] -> ZipList a
ZipList [Name]
variantCons)
        nameName :: Name
nameName = String -> Name
mkName (String -> Name) -> ShowS -> String -> Name
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
T.unpack (Text -> String) -> (String -> Text) -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text
sanitizeTypeName (Text -> Text) -> (String -> Text) -> String -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Text
T.pack (String -> Name) -> String -> Name
forall a b. (a -> b) -> a -> b
$ String
name
        fromStringClause :: String -> Name -> Clause
fromStringClause String
variant Name
variantCon =
          [Pat] -> Body -> [Dec] -> Clause
Clause [Lit -> Pat
LitP (String -> Lit
StringL String
variant)] (Exp -> Body
NormalB (Name -> Exp
ConE Name
variantCon)) []
        showCSVClause :: String -> Name -> Clause
showCSVClause String
variant Name
variantCon =
          [Pat] -> Body -> [Dec] -> Clause
Clause [Name -> [Pat] -> Pat
ConP Name
variantCon []]
                 (Exp -> Body
NormalB (Exp -> Exp -> Exp
AppE (Name -> Exp
VarE 'T.pack) (Lit -> Exp
LitE (String -> Lit
StringL String
variant))))
                 []
        readableGuarded :: Name -> String -> Name -> (Guard, Exp)
        readableGuarded :: Name -> String -> Name -> (Guard, Exp)
readableGuarded Name
argName String
variant Name
variantCon =
          ( Exp -> Guard
NormalG (Maybe Exp -> Exp -> Maybe Exp -> Exp
InfixE (Exp -> Maybe Exp
forall a. a -> Maybe a
Just (Name -> Exp
VarE Name
argName))
                    (Name -> Exp
VarE '(==))
                    (Exp -> Maybe Exp
forall a. a -> Maybe a
Just (Exp -> Exp -> Exp
AppE (Name -> Exp
VarE 'T.pack) (Lit -> Exp
LitE (String -> Lit
StringL String
variant)))))
          , Exp -> Exp -> Exp
AppE (Name -> Exp
VarE 'return ) (Name -> Exp
ConE Name
variantCon) )
        dataDecl :: Dec
dataDecl = Cxt
-> Name
-> [TyVarBndr]
-> Maybe Kind
-> [Con]
-> [DerivClause]
-> Dec
DataD [] Name
nameName [] Maybe Kind
forall a. Maybe a
Nothing
                         ((Name -> Con) -> [Name] -> [Con]
forall a b. (a -> b) -> [a] -> [b]
map ((Name -> [BangType] -> Con) -> [BangType] -> Name -> Con
forall a b c. (a -> b -> c) -> b -> a -> c
flip Name -> [BangType] -> Con
NormalC []) [Name]
variantCons)
                         [Maybe DerivStrategy -> Cxt -> DerivClause
DerivClause Maybe DerivStrategy
forall a. Maybe a
Nothing [ Name -> Kind
ConT ''Eq
                                              , Name -> Kind
ConT ''Enum
                                              , Name -> Kind
ConT ''Bounded
                                              , Name -> Kind
ConT ''Ord
                                              , Name -> Kind
ConT ''Show ]]
        iIsString :: Dec
iIsString =
          Maybe Overlap -> Cxt -> Kind -> [Dec] -> Dec
InstanceD Maybe Overlap
forall a. Maybe a
Nothing [] (Kind -> Kind -> Kind
AppT (Name -> Kind
ConT ''IsString) (Name -> Kind
ConT Name
nameName))
                    [Name -> [Clause] -> Dec
FunD 'fromString
                          ((String -> Name -> Clause) -> [Clause]
forall a. (String -> Name -> a) -> [a]
onVariants String -> Name -> Clause
fromStringClause)]
        iReadable :: Dec
iReadable =
          let argName :: Name
argName = String -> Name
mkName String
"t"
              clauses :: [(Guard, Exp)]
clauses = (String -> Name -> (Guard, Exp)) -> [(Guard, Exp)]
forall a. (String -> Name -> a) -> [a]
onVariants (Name -> String -> Name -> (Guard, Exp)
readableGuarded Name
argName)
              clausesTotal :: [(Guard, Exp)]
clausesTotal = [(Guard, Exp)]
clauses [(Guard, Exp)] -> [(Guard, Exp)] -> [(Guard, Exp)]
forall a. [a] -> [a] -> [a]
++ [(Exp -> Guard
NormalG (Name -> Exp
ConE 'True), Name -> Exp
VarE 'mzero)]
          in Maybe Overlap -> Cxt -> Kind -> [Dec] -> Dec
InstanceD Maybe Overlap
forall a. Maybe a
Nothing [] (Kind -> Kind -> Kind
AppT (Name -> Kind
ConT ''Readable) (Name -> Kind
ConT Name
nameName))
                       [Name -> [Clause] -> Dec
FunD 'fromText
                             [[Pat] -> Body -> [Dec] -> Clause
Clause [Name -> Pat
VarP Name
argName] ([(Guard, Exp)] -> Body
GuardedB [(Guard, Exp)]
clausesTotal) []]]
        iParseable :: Dec
iParseable =
          Maybe Overlap -> Cxt -> Kind -> [Dec] -> Dec
InstanceD Maybe Overlap
forall a. Maybe a
Nothing [] (Kind -> Kind -> Kind
AppT (Name -> Kind
ConT ''Parseable) (Name -> Kind
ConT Name
nameName)) []
        iShowCSV :: Dec
iShowCSV =
          Maybe Overlap -> Cxt -> Kind -> [Dec] -> Dec
InstanceD Maybe Overlap
forall a. Maybe a
Nothing [] (Kind -> Kind -> Kind
AppT (Name -> Kind
ConT ''ShowCSV) (Name -> Kind
ConT Name
nameName))
                    [Name -> [Clause] -> Dec
FunD 'showCSV ((String -> Name -> Clause) -> [Clause]
forall a. (String -> Name -> a) -> [a]
onVariants String -> Name -> Clause
showCSVClause)]
        iVectorFor :: Dec
iVectorFor =
#if __GLASGOW_HASKELL__ >= 808
          TySynEqn -> Dec
TySynInstD (Maybe [TyVarBndr] -> Kind -> Kind -> TySynEqn
TySynEqn Maybe [TyVarBndr]
forall a. Maybe a
Nothing (Kind -> Kind -> Kind
AppT (Name -> Kind
ConT ''VectorFor) (Name -> Kind
ConT Name
nameName)) (Name -> Kind
ConT ''VU.Vector))
#else
          TySynInstD ''VectorFor (TySynEqn [ConT nameName] (ConT ''VU.Vector))
#endif
        iNFData :: Dec
iNFData =
          let argName :: Name
argName = String -> Name
mkName String
"x"
          in Maybe Overlap -> Cxt -> Kind -> [Dec] -> Dec
InstanceD Maybe Overlap
forall a. Maybe a
Nothing [] (Kind -> Kind -> Kind
AppT (Name -> Kind
ConT ''NFData) (Name -> Kind
ConT Name
nameName))
                       [Name -> [Clause] -> Dec
FunD 'rnf [[Pat] -> Body -> [Dec] -> Clause
Clause [Name -> Pat
VarP Name
argName]
                                  (Exp -> Body
NormalB
                                   (Exp -> Exp -> Exp
AppE (Exp -> Exp -> Exp
AppE (Name -> Exp
VarE 'seq) (Name -> Exp
VarE Name
argName))
                                         ([Maybe Exp] -> Exp
TupE [])))
                                  []]]

instance KnownNat n => Parseable (Categorical n) where
  parse :: Text -> m (Parsed (Categorical n))
parse Text
txt = Parsed (Categorical n) -> m (Parsed (Categorical n))
forall (m :: * -> *) a. Monad m => a -> m a
return (Categorical n -> Parsed (Categorical n)
forall a. a -> Parsed a
Possibly (Set Text -> Categorical n
forall (n :: Nat). Set Text -> Categorical n
Categorical (Text -> Set Text
forall a. a -> Set a
S.singleton Text
txt)))
  parseCombine :: Parsed (Categorical n)
-> Parsed (Categorical n) -> m (Parsed (Categorical n))
parseCombine Parsed (Categorical n)
p1 Parsed (Categorical n)
p2
    | Set Text -> Int
forall a. Set a -> Int
S.size Set Text
catCombined Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
maxVariants =
      Parsed (Categorical n) -> m (Parsed (Categorical n))
forall (m :: * -> *) a. Monad m => a -> m a
return (Categorical n -> Parsed (Categorical n)
forall a. a -> Parsed a
Possibly (Set Text -> Categorical n
forall (n :: Nat). Set Text -> Categorical n
Categorical Set Text
catCombined))
    | Bool
otherwise = m (Parsed (Categorical n))
forall (m :: * -> *) a. MonadPlus m => m a
mzero
    where getCats :: Parsed (Categorical n) -> Set Text
getCats = Categorical n -> Set Text
forall (n :: Nat). Categorical n -> Set Text
categories (Categorical n -> Set Text)
-> (Parsed (Categorical n) -> Categorical n)
-> Parsed (Categorical n)
-> Set Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Parsed (Categorical n) -> Categorical n
forall a. Parsed a -> a
parsedValue
          catCombined :: Set Text
catCombined = Set Text -> Set Text -> Set Text
forall a. Ord a => Set a -> Set a -> Set a
S.union (Parsed (Categorical n) -> Set Text
forall (n :: Nat). Parsed (Categorical n) -> Set Text
getCats Parsed (Categorical n)
p1) (Parsed (Categorical n) -> Set Text
forall (n :: Nat). Parsed (Categorical n) -> Set Text
getCats Parsed (Categorical n)
p2)
          maxVariants :: Int
          maxVariants :: Int
maxVariants = Integer -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Natural -> Integer
forall a. Integral a => a -> Integer
toInteger (Proxy# n -> Natural
forall (n :: Nat). KnownNat n => Proxy# n -> Natural
natVal' (Proxy# n
forall k (a :: k). Proxy# a
proxy# :: Proxy# n)))
  representableAsType :: Parsed (Categorical n)
-> Const (Either (String -> DecsQ) Kind) (Categorical n)
representableAsType (Set Text -> [Text]
forall a. Set a -> [a]
S.toList (Set Text -> [Text])
-> (Parsed (Categorical n) -> Set Text)
-> Parsed (Categorical n)
-> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Categorical n -> Set Text
forall (n :: Nat). Categorical n -> Set Text
categories (Categorical n -> Set Text)
-> (Parsed (Categorical n) -> Categorical n)
-> Parsed (Categorical n)
-> Set Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Parsed (Categorical n) -> Categorical n
forall a. Parsed a -> a
parsedValue -> [Text]
cats) =
    Either (String -> DecsQ) Kind
-> Const (Either (String -> DecsQ) Kind) (Categorical n)
forall k a (b :: k). a -> Const a b
Const ((String -> DecsQ) -> Either (String -> DecsQ) Kind
forall a b. a -> Either a b
Left (\String
n -> String -> Maybe String -> [String] -> DecsQ
declareCategorical String
n (String -> Maybe String
forall a. a -> Maybe a
Just String
n) ((Text -> String) -> [Text] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map Text -> String
T.unpack [Text]
cats)))