-- Copyright (c) 2016-present, Facebook, Inc. -- All rights reserved. -- -- This source code is licensed under the BSD-style license found in the -- LICENSE file in the root directory of this source tree. An additional grant -- of patent rights can be found in the PATENTS file in the same directory. {-# LANGUAGE GADTs #-} {-# LANGUAGE OverloadedStrings #-} module Duckling.Numeral.AR.Rules ( rules ) where import Data.HashMap.Strict (HashMap) import Data.Maybe import Data.String import Data.Text (Text) import Prelude import qualified Data.HashMap.Strict as HashMap import qualified Data.Text as Text import Duckling.Dimensions.Types import Duckling.Numeral.Helpers import Duckling.Numeral.Types (NumeralData (..)) import Duckling.Regex.Types import Duckling.Types import qualified Duckling.Numeral.Types as TNumeral ruleInteger5 :: Rule ruleInteger5 = Rule { name = "integer 4" , pattern = [ regex "([أا]ربع[ةه]?)" ] , prod = \_ -> integer 4 } ruleInteger23 :: Rule ruleInteger23 = Rule { name = "integer 101..999" , pattern = [ oneOf [100, 200 .. 900] , regex "و" , numberBetween 1 100 ] , prod = \tokens -> case tokens of (Token Numeral NumeralData{TNumeral.value = v1}: _: Token Numeral NumeralData{TNumeral.value = v2}: _) -> double $ v1 + v2 _ -> Nothing } ruleInteger18 :: Rule ruleInteger18 = Rule { name = "integer 12" , pattern = [ regex "([إا]?ثن(ت)?[يىا] ?عشر[ةه]?)" ] , prod = \_ -> integer 12 } digitsMap :: HashMap Text Integer digitsMap = HashMap.fromList [ ("عشر", 2) , ("ثلاث", 3) , ("اربع", 4) , ("أربع", 4) , ("خمس", 5) , ("ست", 6) , ("سبع", 7) , ("ثمان", 8) , ("تسع", 9) ] ruleInteger19 :: Rule ruleInteger19 = Rule { name = "integer (20..90)" , pattern = [ regex "(عشر|ثلاث|[أا]ربع|خمس|ست|سبع|ثمان|تسع)(ون|ين)" ] , prod = \tokens -> case tokens of Token RegexMatch (GroupMatch (match:_)):_ -> (* 10) <$> HashMap.lookup match digitsMap >>= integer _ -> Nothing } ruleInteger200 :: Rule ruleInteger200 = Rule { name = "integer (200)" , pattern = [ regex "مائتان|مائتين" ] , prod = const $ integer 200 } ruleInteger22 :: Rule ruleInteger22 = Rule { name = "integer 21..99" , pattern = [ numberBetween 1 10 , regex "و" , oneOf [20, 30 .. 90] ] , prod = \tokens -> case tokens of (Token Numeral NumeralData{TNumeral.value = v1}: _: Token Numeral NumeralData{TNumeral.value = v2}: _) -> double $ v1 + v2 _ -> Nothing } ruleInteger21 :: Rule ruleInteger21 = Rule { name = "integer (13..19)" , pattern = [ numberBetween 3 10 , numberWith TNumeral.value (== 10) ] , prod = \tokens -> case tokens of (Token Numeral NumeralData{TNumeral.value = v}:_) -> double $ v + 10 _ -> Nothing } ruleDecimalWithThousandsSeparator :: Rule ruleDecimalWithThousandsSeparator = Rule { name = "decimal with thousands separator" , pattern = [ regex "(\\d+(,\\d\\d\\d)+\\.\\d+)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> parseDouble (Text.replace "," Text.empty match) >>= double _ -> Nothing } ruleMultiply :: Rule ruleMultiply = Rule { name = "compose by multiplication" , pattern = [ dimension Numeral , numberWith TNumeral.multipliable id ] , prod = \tokens -> case tokens of (token1:token2:_) -> multiply token1 token2 _ -> Nothing } ruleInteger15 :: Rule ruleInteger15 = Rule { name = "integer 11" , pattern = [ regex "([إاأ]حد[يى]? عشر[ةه]?)" ] , prod = \_ -> integer 11 } ruleDecimalNumeral :: Rule ruleDecimalNumeral = Rule { name = "decimal number" , pattern = [ regex "(\\d*\\.\\d+)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> parseDecimal True match _ -> Nothing } rulePowersOfTen :: Rule rulePowersOfTen = Rule { name = "powers of tens" , pattern = [ regex "(ما?[ئي][ةه]|مئات|[أا]لف|ال??|[آا]لاف|ملايين)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> case Text.toLower match of "مئة" -> double 1e2 >>= withGrain 2 >>= withMultipliable "مئه" -> double 1e2 >>= withGrain 2 >>= withMultipliable "مائة" -> double 1e2 >>= withGrain 2 >>= withMultipliable "مائه" -> double 1e2 >>= withGrain 2 >>= withMultipliable "مئات" -> double 1e2 >>= withGrain 2 >>= withMultipliable "ألف" -> double 1e3 >>= withGrain 3 >>= withMultipliable "الف" -> double 1e3 >>= withGrain 3 >>= withMultipliable "الاف" -> double 1e3 >>= withGrain 3 >>= withMultipliable "آلاف" -> double 1e3 >>= withGrain 3 >>= withMultipliable "ملايي" -> double 1e6 >>= withGrain 6 >>= withMultipliable "ملايين" -> double 1e6 >>= withGrain 6 >>= withMultipliable _ -> Nothing _ -> Nothing } ruleInteger3 :: Rule ruleInteger3 = Rule { name = "integer 2" , pattern = [ regex "[إا]ثنت?[اي]ن" ] , prod = \_ -> integer 2 } ruleInteger13 :: Rule ruleInteger13 = Rule { name = "integer 9" , pattern = [ regex "تسع[ةه]?" ] , prod = \_ -> integer 9 } ruleInteger12 :: Rule ruleInteger12 = Rule { name = "integer 8" , pattern = [ regex "ثما??ني?[ةه]?" ] , prod = \_ -> integer 8 } ruleNumeralsPrefixWithMinus :: Rule ruleNumeralsPrefixWithMinus = Rule { name = "numbers prefix with -, minus" , pattern = [ regex "-" , dimension Numeral ] , prod = \tokens -> case tokens of (_:Token Numeral NumeralData{TNumeral.value = v}:_) -> double (v * (- 1)) _ -> Nothing } ruleInteger7 :: Rule ruleInteger7 = Rule { name = "integer 5" , pattern = [ regex "خمس[ةه]?" ] , prod = \_ -> integer 5 } ruleInteger14 :: Rule ruleInteger14 = Rule { name = "integer 10" , pattern = [ regex "عشر[ةه]?" ] , prod = \_ -> integer 10 } ruleInteger9 :: Rule ruleInteger9 = Rule { name = "integer 6" , pattern = [ regex "ست[ةه]?" ] , prod = \_ -> integer 6 } ruleInteger :: Rule ruleInteger = Rule { name = "integer 0" , pattern = [ regex "صفر" ] , prod = \_ -> integer 0 } ruleInteger4 :: Rule ruleInteger4 = Rule { name = "integer 3" , pattern = [ regex "(ثلاث[ةه]?)" ] , prod = \_ -> integer 3 } ruleInteger2 :: Rule ruleInteger2 = Rule { name = "integer 1" , pattern = [ regex "واحد[ةه]?" ] , prod = \_ -> integer 1 } ruleInteger11 :: Rule ruleInteger11 = Rule { name = "integer 7" , pattern = [ regex "سبع[ةه]?" ] , prod = \_ -> integer 7 } ruleNumeralDotNumeral :: Rule ruleNumeralDotNumeral = Rule { name = "number dot number" , pattern = [ dimension Numeral , regex "فاصل[ةه]" , Predicate $ not . hasGrain ] , prod = \tokens -> case tokens of (Token Numeral NumeralData{TNumeral.value = v1}: _: Token Numeral NumeralData{TNumeral.value = v2}: _) -> double $ v1 + decimalsToDouble v2 _ -> Nothing } ruleIntegerWithThousandsSeparator :: Rule ruleIntegerWithThousandsSeparator = Rule { name = "integer with thousands separator ," , pattern = [ regex "(\\d{1,3}(,\\d\\d\\d){1,5})" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> parseDouble (Text.replace "," Text.empty match) >>= double _ -> Nothing } rules :: [Rule] rules = [ ruleDecimalNumeral , ruleDecimalWithThousandsSeparator , ruleInteger , ruleInteger11 , ruleInteger12 , ruleInteger13 , ruleInteger14 , ruleInteger15 , ruleInteger18 , ruleInteger19 , ruleInteger2 , ruleInteger21 , ruleInteger22 , ruleInteger23 , ruleInteger3 , ruleInteger4 , ruleInteger5 , ruleInteger7 , ruleInteger9 , ruleIntegerWithThousandsSeparator , ruleMultiply , ruleNumeralDotNumeral , ruleNumeralsPrefixWithMinus , rulePowersOfTen , ruleInteger200 ]