-- Copyright (c) 2016-present, Facebook, Inc. -- All rights reserved. -- -- This source code is licensed under the BSD-style license found in the -- LICENSE file in the root directory of this source tree. An additional grant -- of patent rights can be found in the PATENTS file in the same directory. {-# LANGUAGE DeriveAnyClass #-} {-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE ExistentialQuantification #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE NoRebindableSyntax #-} {-# LANGUAGE RankNTypes #-} {-# LANGUAGE StandaloneDeriving #-} module Duckling.Types where import Control.DeepSeq import Data.Aeson import qualified Data.ByteString.Lazy as LB import Data.GADT.Compare import Data.Hashable import Data.Maybe import Data.String import Data.Text (Text) import qualified Data.Text.Encoding as Text import Data.Typeable ((:~:)(Refl), Typeable) import GHC.Generics import Prelude import qualified Text.Regex.Base as R import qualified Text.Regex.PCRE as PCRE import Duckling.Dimensions.Types import Duckling.Resolve -- ----------------------------------------------------------------- -- Token data Token = forall a . (Resolve a, Eq a, Hashable a, Show a, NFData a) => Token (Dimension a) a deriving instance Show Token instance Eq Token where Token d1 v1 == Token d2 v2 = case geq d1 d2 of Just Refl -> v1 == v2 Nothing -> False instance Hashable Token where hashWithSalt s (Token dim v) = hashWithSalt s (dim, v) instance NFData Token where rnf (Token _ v) = rnf v isDimension :: Dimension a -> Token -> Bool isDimension dim (Token dim' _) = isJust $ geq dim dim' data Node = Node { nodeRange :: Range , token :: Token , children :: [Node] , rule :: Maybe Text } deriving (Eq, Generic, Hashable, Show, NFData) data ResolvedToken = Resolved { range :: Range , node :: Node , jsonValue :: Value } deriving (Eq, Show) instance Ord ResolvedToken where compare (Resolved range1 _ json1) (Resolved range2 _ json2) = case compare range1 range2 of EQ -> compare (toJText json1) (toJText json2) z -> z data Candidate = Candidate ResolvedToken Double Bool deriving (Eq, Show) instance Ord Candidate where compare (Candidate (Resolved{range = Range s1 e1, node = Node{token = Token d1 _}}) score1 t1) (Candidate (Resolved{range = Range s2 e2, node = Node{token = tok2}}) score2 t2) | isDimension d1 tok2 = case starts of EQ -> case ends of EQ -> compare score1 score2 z -> z LT -> case ends of LT -> EQ _ -> GT GT -> case ends of GT -> EQ _ -> LT | t1 == t2 = compRange | t1 && compRange == GT = GT | t2 && compRange == LT = LT | otherwise = EQ where starts = compare s1 s2 ends = compare e1 e2 -- a > b if a recovers b compRange = case starts of EQ -> ends LT -> case ends of LT -> EQ _ -> GT GT -> case ends of GT -> EQ _ -> LT data Range = Range Int Int deriving (Eq, Ord, Generic, Hashable, Show, NFData) type Production = [Token] -> Maybe Token type Predicate = Token -> Bool data PatternItem = Regex PCRE.Regex | Predicate Predicate type Pattern = [PatternItem] data Rule = Rule { name :: Text , pattern :: Pattern , prod :: Production } instance Show Rule where show (Rule name _ _) = show name data Entity = Entity { dim :: Text , body :: Text , value :: Value , start :: Int , end :: Int } deriving (Eq, Generic, Show, NFData) instance ToJSON Entity where toEncoding = genericToEncoding defaultOptions toJText :: ToJSON x => x -> Text toJText = Text.decodeUtf8 . LB.toStrict . encode -- ----------------------------------------------------------------- -- Predicates helpers regex :: String -> PatternItem regex = Regex . R.makeRegexOpts compOpts execOpts where compOpts = PCRE.defaultCompOpt + PCRE.compCaseless + PCRE.compUTF8 execOpts = PCRE.defaultExecOpt dimension :: Typeable a => Dimension a -> PatternItem dimension value = Predicate $ isDimension value