module Hunt.Index.Schema
(
Schema
, ContextSchema (..)
, ContextType (..)
, ContextTypes
, CValidator (..)
, CNormalizer (..)
, normalize'
, ctText
, ctTextSimple
, ctInt
, ctDate
, ctPosition
, ctPositionRTree
, cnUpperCase
, cnLowerCase
, cnZeroFill
)
where
import Control.Applicative
import Control.Monad (mzero)
import Data.Aeson
import Data.Binary hiding (Word)
import Data.Default
import qualified Data.List as L
import Data.Map hiding (null)
import Data.Maybe (isNothing)
import Data.Text hiding (null)
import qualified Data.Text as T
import Data.Text.Binary ()
import Hunt.Common.BasicTypes
import qualified Hunt.Index as Ix
import Hunt.Index.IndexImpl (IndexImpl, mkIndex)
import Hunt.Index.InvertedIndex
import Hunt.Index.PrefixTreeIndex ( PrefixTreeIndexInt
, PrefixTreeIndexDate
, SimplePrefixTreeIndex )
import Hunt.Index.PrefixTreeIndex2Dim ( PrefixTreeIndexPosition )
import Hunt.Index.RTreeIndex
import qualified Hunt.Index.Schema.Normalize.Date as Date
import qualified Hunt.Index.Schema.Normalize.Int as Int
import qualified Hunt.Index.Schema.Normalize.Position as Pos
import Hunt.Utility
type Schema
= Map Context ContextSchema
data ContextSchema = ContextSchema
{
cxRegEx :: Maybe RegEx
, cxNormalizer :: [CNormalizer]
, cxWeight :: Weight
, cxDefault :: Bool
, cxType :: ContextType
}
deriving Show
instance Default ContextSchema where
def = ContextSchema Nothing [] 1.0 True def
type ContextTypes = [ContextType]
data ContextType = CType
{
ctName :: Text
, ctRegEx :: RegEx
, ctValidate :: CValidator
, ctIxImpl :: IndexImpl
}
deriving Show
instance Default ContextType where
def = ctText
ctText :: ContextType
ctText = CType
{ ctName = "text"
, ctRegEx = "\\w*"
, ctValidate = def
, ctIxImpl = def
}
ctTextSimple :: ContextType
ctTextSimple = CType
{ ctName = "text-small"
, ctRegEx = "\\w*"
, ctValidate = def
, ctIxImpl = simplePT
}
ctInt :: ContextType
ctInt = CType
{ ctName = "int"
, ctRegEx = "([-]?[0-9]*)"
, ctValidate = CValidator $ Int.isInt
, ctIxImpl = intInv
}
ctDate :: ContextType
ctDate = CType
{ ctName = "date"
, ctRegEx = "[0-9]{4}-((0[1-9])|(1[0-2]))-((0[1-9])|([12][0-9])|(3[01]))"
, ctValidate = CValidator $ Date.isAnyDate . unpack
, ctIxImpl = dateInv
}
ctPosition :: ContextType
ctPosition = CType
{ ctName = "position"
, ctRegEx = "-?(90(\\.0*)?|[1-8]?[0-9](\\.[0-9]*)?)--?((180(\\.0*)?)|(1[0-7][0-9])|([1-9]?[0-9]))(\\.[0-9]*)?"
, ctValidate = CValidator $ Pos.isPosition
, ctIxImpl = positionInv
}
ctPositionRTree :: ContextType
ctPositionRTree = CType
{ ctName = "position-rtree"
, ctRegEx = "-?(90(\\.0*)?|[1-8]?[0-9](\\.[0-9]*)?)--?((180(\\.0*)?)|(1[0-7][0-9])|([1-9]?[0-9]))(\\.[0-9]*)?"
, ctValidate = CValidator $ Pos.isPosition
, ctIxImpl = positionRTree
}
instance Default IndexImpl where
def = defaultInv
defaultInv :: IndexImpl
defaultInv = mkIndex (Ix.empty :: InvertedIndex)
simplePT :: IndexImpl
simplePT = mkIndex (Ix.empty :: SimplePrefixTreeIndex)
intInv :: IndexImpl
intInv = mkIndex (Ix.empty :: PrefixTreeIndexInt)
dateInv :: IndexImpl
dateInv = mkIndex (Ix.empty :: PrefixTreeIndexDate)
positionInv :: IndexImpl
positionInv = mkIndex (Ix.empty :: PrefixTreeIndexPosition)
positionRTree :: IndexImpl
positionRTree = mkIndex (Ix.empty :: SimpleRTreeIndex)
data CValidator = CValidator { validate :: Word -> Bool }
instance Default CValidator where
def = CValidator $ const True
instance Show CValidator where
show _ = "CValidator"
data CNormalizer = CNormalizer
{ cnName :: Text
, normalize :: Text -> Text
}
normalize' :: [CNormalizer] -> Word -> Word
normalize' ns = L.foldl' (\f2 (CNormalizer _ f1) -> f1 . f2) id $ ns
instance Show CNormalizer where
show = unpack . cnName
instance Default CNormalizer where
def = CNormalizer "" id
cnUpperCase :: CNormalizer
cnUpperCase = CNormalizer "UpperCase" T.toUpper
cnLowerCase :: CNormalizer
cnLowerCase = CNormalizer "LowerCase" T.toLower
cnZeroFill :: CNormalizer
cnZeroFill = CNormalizer "ZeroFill" Int.normalizeToText
instance FromJSON ContextType where
parseJSON (String s) = return $ def { ctName = s }
parseJSON _ = mzero
instance ToJSON ContextType where
toJSON (CType n _ _ _) = String n
instance FromJSON CNormalizer where
parseJSON (String s) = return $ def { cnName = s }
parseJSON _ = mzero
instance ToJSON CNormalizer where
toJSON (CNormalizer n _) = String n
instance FromJSON ContextSchema where
parseJSON (Object o) = do
r <- o .:? "regexp"
n <- o .:? "normalizers" .!= []
w <- o .:? "weight" .!= 1.0
d <- o .:? "default" .!= True
ct <- o .: "type"
return $ ContextSchema r n w d ct
parseJSON _ = mzero
instance ToJSON ContextSchema where
toJSON (ContextSchema r n w d ct) = object' $
[ "type" .== ct
, "weight" .=? w .\. (== 1.0)
, "regexp" .=? r .\. isNothing
, "normalizers" .=? n .\. null
, "default" .=? d .\. id
]
instance Binary ContextSchema where
get = ContextSchema <$> get <*> get <*> get <*> get <*> get
put (ContextSchema a b c d e) = put a >> put b >> put c >> put d >> put e
instance Binary ContextType where
put (CType n _ _ _) = put n
get = get >>= \n -> return $ def { ctName = n }
instance Binary CNormalizer where
put (CNormalizer n _) = put n
get = get >>= \n -> return $ def { cnName = n }