-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree.

{-# LANGUAGE GADTs #-}
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE NoRebindableSyntax #-}

module Duckling.Engine
  ( parseAndResolve
  , lookupRegexAnywhere
  , runDuckling
  ) where

import Control.DeepSeq
import Control.Monad.Extra
import Data.Aeson (toJSON)
import Data.ByteString (ByteString)
import Data.Functor.Identity
import Data.Maybe
import Data.Text (Text)
import Prelude
import qualified Data.Array as Array
import qualified Data.Foldable as Foldable
import qualified Data.List as L
import qualified Text.Regex.PCRE as PCRE

import Duckling.Dimensions.Types
import Duckling.Regex.Types
import Duckling.Resolve
import Duckling.Types
import Duckling.Types.Document (Document)
import Duckling.Types.Stash (Stash)
import qualified Duckling.Engine.Regex as Regex
import qualified Duckling.Types.Document as Document
import qualified Duckling.Types.Stash as Stash

-- -----------------------------------------------------------------
-- Engine

type Duckling a = Identity a

runDuckling :: Duckling a -> a
runDuckling :: Duckling a -> a
runDuckling Duckling a
ma = Duckling a -> a
forall a. Identity a -> a
runIdentity Duckling a
ma

parseAndResolve :: [Rule] -> Text -> Context -> Options -> [ResolvedToken]
parseAndResolve :: [Rule] -> Text -> Context -> Options -> [ResolvedToken]
parseAndResolve [Rule]
rules Text
input Context
context Options
options =
  (Node -> Maybe ResolvedToken) -> [Node] -> [ResolvedToken]
forall a b. (a -> Maybe b) -> [a] -> [b]
mapMaybe (Context -> Options -> Node -> Maybe ResolvedToken
resolveNode Context
context Options
options) ([Node] -> [ResolvedToken])
-> ([Node] -> [Node]) -> [Node] -> [ResolvedToken]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Node] -> [Node]
forall a. NFData a => a -> a
force ([Node] -> [ResolvedToken]) -> [Node] -> [ResolvedToken]
forall a b. (a -> b) -> a -> b
$ Stash -> [Node]
Stash.toPosOrderedList (Stash -> [Node]) -> Stash -> [Node]
forall a b. (a -> b) -> a -> b
$
  Duckling Stash -> Stash
forall a. Identity a -> a
runDuckling (Duckling Stash -> Stash) -> Duckling Stash -> Stash
forall a b. (a -> b) -> a -> b
$ [Rule] -> Document -> Duckling Stash
parseString [Rule]
rules (Text -> Document
Document.fromText Text
input)

produce :: Match -> Maybe Node
produce :: Match -> Maybe Node
produce (Rule
_, Int
_, []) = Maybe Node
forall a. Maybe a
Nothing
produce (Rule Text
name Pattern
_ Production
production, Int
_, etuor :: [Node]
etuor@(Node {nodeRange :: Node -> Range
nodeRange = Range Int
_ Int
e}:[Node]
_)) = do
  let route :: [Node]
route = [Node] -> [Node]
forall a. [a] -> [a]
reverse [Node]
etuor
  Token
token <- Maybe Token -> Maybe Token
forall a. NFData a => a -> a
force (Maybe Token -> Maybe Token) -> Maybe Token -> Maybe Token
forall a b. (a -> b) -> a -> b
$ Production
production Production -> Production
forall a b. (a -> b) -> a -> b
$ (Node -> Token) -> [Node] -> [Token]
forall a b. (a -> b) -> [a] -> [b]
map Node -> Token
token [Node]
route
  case [Node]
route of
    (Node {nodeRange :: Node -> Range
nodeRange = Range Int
p Int
_}:[Node]
_) -> Node -> Maybe Node
forall a. a -> Maybe a
Just Node :: Range -> Token -> [Node] -> Maybe Text -> Node
Node
      { nodeRange :: Range
nodeRange = Int -> Int -> Range
Range Int
p Int
e
      , token :: Token
token = Token
token
      , children :: [Node]
children = [Node]
route
      , rule :: Maybe Text
rule = Text -> Maybe Text
forall a. a -> Maybe a
Just Text
name
      }
    [] -> Maybe Node
forall a. Maybe a
Nothing

-- | Handle a regex match at a given position
lookupRegex :: Document -> PCRE.Regex -> Int -> Duckling [Node]
lookupRegex :: Document -> Regex -> Int -> Duckling [Node]
lookupRegex Document
doc Regex
_regex Int
position | Int
position Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Document -> Int
Document.length Document
doc = [Node] -> Duckling [Node]
forall (m :: * -> *) a. Monad m => a -> m a
return []
lookupRegex Document
doc Regex
regex Int
position =
  Document
-> Regex
-> Int
-> (Regex -> ByteString -> Maybe MatchArray)
-> Duckling [Node]
forall (t :: * -> *).
Foldable t =>
Document
-> Regex
-> Int
-> (Regex -> ByteString -> t MatchArray)
-> Duckling [Node]
lookupRegexCommon Document
doc Regex
regex Int
position Regex -> ByteString -> Maybe MatchArray
forall regex source.
RegexLike regex source =>
regex -> source -> Maybe MatchArray
Regex.matchOnce

-- | Handle a regex match anywhere in the text
lookupRegexAnywhere :: Document -> PCRE.Regex -> Duckling [Node]
lookupRegexAnywhere :: Document -> Regex -> Duckling [Node]
lookupRegexAnywhere Document
doc Regex
regex = Document
-> Regex
-> Int
-> (Regex -> ByteString -> [MatchArray])
-> Duckling [Node]
forall (t :: * -> *).
Foldable t =>
Document
-> Regex
-> Int
-> (Regex -> ByteString -> t MatchArray)
-> Duckling [Node]
lookupRegexCommon Document
doc Regex
regex Int
0 Regex -> ByteString -> [MatchArray]
forall regex source.
RegexLike regex source =>
regex -> source -> [MatchArray]
Regex.matchAll

{-# INLINE lookupRegexCommon #-}
-- INLINE bloats the code a bit, but the code is better
lookupRegexCommon
  :: Foldable t
  => Document
  -> PCRE.Regex
  -> Int
  -> (PCRE.Regex -> ByteString -> t PCRE.MatchArray)
  -> Duckling [Node]
lookupRegexCommon :: Document
-> Regex
-> Int
-> (Regex -> ByteString -> t MatchArray)
-> Duckling [Node]
lookupRegexCommon Document
doc Regex
regex Int
position Regex -> ByteString -> t MatchArray
matchFun = [Node] -> Duckling [Node]
forall (m :: * -> *) a. Monad m => a -> m a
return [Node]
nodes
  where
  -- See Note [Regular expressions and Text] to understand what's going
  -- on here
  (ByteString
substring, (Int, Int) -> Text
rangeToText, Int -> Int -> (Int, Int)
translateRange) =
    Document
-> Int
-> (ByteString, (Int, Int) -> Text, Int -> Int -> (Int, Int))
Document.byteStringFromPos Document
doc Int
position
  nodes :: [Node]
nodes = (MatchArray -> Maybe Node) -> [MatchArray] -> [Node]
forall a b. (a -> Maybe b) -> [a] -> [b]
mapMaybe ([(Int, Int)] -> Maybe Node
f ([(Int, Int)] -> Maybe Node)
-> (MatchArray -> [(Int, Int)]) -> MatchArray -> Maybe Node
forall b c a. (b -> c) -> (a -> b) -> a -> c
. MatchArray -> [(Int, Int)]
forall i e. Array i e -> [e]
Array.elems)
    ([MatchArray] -> [Node]) -> [MatchArray] -> [Node]
forall a b. (a -> b) -> a -> b
$ t MatchArray -> [MatchArray]
forall (t :: * -> *) a. Foldable t => t a -> [a]
Foldable.toList
    (t MatchArray -> [MatchArray]) -> t MatchArray -> [MatchArray]
forall a b. (a -> b) -> a -> b
$ Regex -> ByteString -> t MatchArray
matchFun Regex
regex ByteString
substring
  f :: [(Int, Int)] -> Maybe Node
  f :: [(Int, Int)] -> Maybe Node
f [] = Maybe Node
forall a. Maybe a
Nothing
  f ((Int
0,Int
0):[(Int, Int)]
_) = Maybe Node
forall a. Maybe a
Nothing
  f ((Int
bsStart, Int
bsLen):[(Int, Int)]
groups) =
    if Document -> Int -> Int -> Bool
Document.isRangeValid Document
doc Int
start Int
end
      then Node -> Maybe Node
forall a. a -> Maybe a
Just Node
node
      else Maybe Node
forall a. Maybe a
Nothing
    where
    textGroups :: [Text]
textGroups = ((Int, Int) -> Text) -> [(Int, Int)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Int, Int) -> Text
rangeToText [(Int, Int)]
groups
    (Int
start, Int
end) = Int -> Int -> (Int, Int)
translateRange Int
bsStart Int
bsLen
    node :: Node
node = Node :: Range -> Token -> [Node] -> Maybe Text -> Node
Node
      { nodeRange :: Range
nodeRange = Int -> Int -> Range
Range Int
start Int
end
      , token :: Token
token = Dimension GroupMatch -> GroupMatch -> Token
forall a.
(Resolve a, Eq a, Hashable a, Show a, NFData a) =>
Dimension a -> a -> Token
Token Dimension GroupMatch
RegexMatch ([Text] -> GroupMatch
GroupMatch [Text]
textGroups)
      , children :: [Node]
children = []
      , rule :: Maybe Text
rule = Maybe Text
forall a. Maybe a
Nothing
      }

-- | Handle one PatternItem at a given position
lookupItem :: Document -> PatternItem -> Stash -> Int -> Duckling [Node]
lookupItem :: Document -> PatternItem -> Stash -> Int -> Duckling [Node]
lookupItem Document
doc (Regex Regex
re) Stash
_ Int
position =
  (Node -> Bool) -> [Node] -> [Node]
forall a. (a -> Bool) -> [a] -> [a]
filter (Int -> Document -> Node -> Bool
isPositionValid Int
position Document
doc) ([Node] -> [Node]) -> Duckling [Node] -> Duckling [Node]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$>
  Document -> Regex -> Int -> Duckling [Node]
lookupRegex Document
doc Regex
re Int
position
lookupItem Document
doc (Predicate Predicate
p) Stash
stash Int
position =
  [Node] -> Duckling [Node]
forall (m :: * -> *) a. Monad m => a -> m a
return ([Node] -> Duckling [Node]) -> [Node] -> Duckling [Node]
forall a b. (a -> b) -> a -> b
$
  (Node -> Bool) -> [Node] -> [Node]
forall a. (a -> Bool) -> [a] -> [a]
filter (Predicate
p Predicate -> (Node -> Token) -> Node -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Node -> Token
token) ([Node] -> [Node]) -> [Node] -> [Node]
forall a b. (a -> b) -> a -> b
$
  (Node -> Bool) -> [Node] -> [Node]
forall a. (a -> Bool) -> [a] -> [a]
takeWhile (Int -> Document -> Node -> Bool
isPositionValid Int
position Document
doc) ([Node] -> [Node]) -> [Node] -> [Node]
forall a b. (a -> b) -> a -> b
$
  Stash -> Int -> [Node]
Stash.toPosOrderedListFrom Stash
stash Int
position

-- | Handle one PatternItem anywhere in the text
lookupItemAnywhere :: Document -> PatternItem -> Stash -> Duckling [Node]
lookupItemAnywhere :: Document -> PatternItem -> Stash -> Duckling [Node]
lookupItemAnywhere Document
doc (Regex Regex
re) Stash
_ = Document -> Regex -> Duckling [Node]
lookupRegexAnywhere Document
doc Regex
re
lookupItemAnywhere Document
_doc (Predicate Predicate
p) Stash
stash =
  [Node] -> Duckling [Node]
forall (m :: * -> *) a. Monad m => a -> m a
return ([Node] -> Duckling [Node]) -> [Node] -> Duckling [Node]
forall a b. (a -> b) -> a -> b
$ (Node -> Bool) -> [Node] -> [Node]
forall a. (a -> Bool) -> [a] -> [a]
filter (Predicate
p Predicate -> (Node -> Token) -> Node -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Node -> Token
token) ([Node] -> [Node]) -> [Node] -> [Node]
forall a b. (a -> b) -> a -> b
$ Stash -> [Node]
Stash.toPosOrderedList Stash
stash

isPositionValid :: Int -> Document -> Node -> Bool
isPositionValid :: Int -> Document -> Node -> Bool
isPositionValid Int
position Document
sentence Node{nodeRange :: Node -> Range
nodeRange = Range Int
start Int
_} =
  Document -> Int -> Int -> Bool
Document.isAdjacent Document
sentence Int
position Int
start

-- | A match is full if its rule pattern is empty.
-- (rule, endPosition, reversedRoute)
type Match = (Rule, Int, [Node])

-- | Recursively augments `matches`.
-- Discards partial matches stuck by a regex.
matchAll :: Document -> Stash -> [Match] -> Duckling [Match]
matchAll :: Document -> Stash -> [Match] -> Duckling [Match]
matchAll Document
sentence Stash
stash [Match]
matches = (Match -> Duckling [Match]) -> [Match] -> Duckling [Match]
forall (m :: * -> *) a b. Monad m => (a -> m [b]) -> [a] -> m [b]
concatMapM Match -> Duckling [Match]
mkNextMatches [Match]
matches
  where
    mkNextMatches :: Match -> Duckling [Match]
    mkNextMatches :: Match -> Duckling [Match]
mkNextMatches match :: Match
match@(Rule {pattern :: Rule -> Pattern
pattern = []}, Int
_, [Node]
_) = [Match] -> Duckling [Match]
forall (m :: * -> *) a. Monad m => a -> m a
return [ Match
match ]
    mkNextMatches match :: Match
match@(Rule {pattern :: Rule -> Pattern
pattern = PatternItem
p:Pattern
_}, Int
_, [Node]
_) = do
      [Match]
nextMatches <- Document -> Stash -> [Match] -> Duckling [Match]
matchAll Document
sentence Stash
stash ([Match] -> Duckling [Match])
-> Duckling [Match] -> Duckling [Match]
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< Document -> Stash -> Match -> Duckling [Match]
matchFirst Document
sentence Stash
stash Match
match
      [Match] -> Duckling [Match]
forall (m :: * -> *) a. Monad m => a -> m a
return ([Match] -> Duckling [Match]) -> [Match] -> Duckling [Match]
forall a b. (a -> b) -> a -> b
$ case PatternItem
p of
        Regex Regex
_ -> [Match]
nextMatches
        Predicate Predicate
_ -> Match
matchMatch -> [Match] -> [Match]
forall a. a -> [a] -> [a]
:[Match]
nextMatches

-- | Returns all matches matching the first pattern item of `match`,
-- resuming from a Match position
matchFirst :: Document -> Stash -> Match -> Duckling [Match]
matchFirst :: Document -> Stash -> Match -> Duckling [Match]
matchFirst Document
_ Stash
_ (Rule {pattern :: Rule -> Pattern
pattern = []}, Int
_, [Node]
_) = [Match] -> Duckling [Match]
forall (m :: * -> *) a. Monad m => a -> m a
return []
matchFirst Document
sentence Stash
stash (rule :: Rule
rule@Rule{pattern :: Rule -> Pattern
pattern = PatternItem
p : Pattern
ps}, Int
position, [Node]
route) =
  (Node -> Match) -> [Node] -> [Match]
forall a b. (a -> b) -> [a] -> [b]
map ([Node] -> Rule -> Node -> Match
mkMatch [Node]
route Rule
newRule) ([Node] -> [Match]) -> Duckling [Node] -> Duckling [Match]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Document -> PatternItem -> Stash -> Int -> Duckling [Node]
lookupItem Document
sentence PatternItem
p Stash
stash Int
position
  where
  newRule :: Rule
newRule = Rule
rule { pattern :: Pattern
pattern = Pattern
ps }

-- | Returns all matches matching the first pattern item of `match`,
-- starting anywhere
matchFirstAnywhere :: Document -> Stash -> Rule -> Duckling [Match]
matchFirstAnywhere :: Document -> Stash -> Rule -> Duckling [Match]
matchFirstAnywhere Document
_sentence Stash
_stash Rule {pattern :: Rule -> Pattern
pattern = []} = [Match] -> Duckling [Match]
forall (m :: * -> *) a. Monad m => a -> m a
return []
matchFirstAnywhere Document
sentence Stash
stash rule :: Rule
rule@Rule{pattern :: Rule -> Pattern
pattern = PatternItem
p : Pattern
ps} =
  (Node -> Match) -> [Node] -> [Match]
forall a b. (a -> b) -> [a] -> [b]
map ([Node] -> Rule -> Node -> Match
mkMatch [] Rule
newRule) ([Node] -> [Match]) -> Duckling [Node] -> Duckling [Match]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Document -> PatternItem -> Stash -> Duckling [Node]
lookupItemAnywhere Document
sentence PatternItem
p Stash
stash
  where
  newRule :: Rule
newRule = Rule
rule { pattern :: Pattern
pattern = Pattern
ps }

{-# INLINE mkMatch #-}
mkMatch :: [Node] -> Rule -> Node -> Match
mkMatch :: [Node] -> Rule -> Node -> Match
mkMatch [Node]
route Rule
newRule (node :: Node
node@Node {nodeRange :: Node -> Range
nodeRange = Range Int
_ Int
pos'}) =
  [Node]
newRoute [Node] -> Match -> Match
`seq` (Rule
newRule, Int
pos', [Node]
newRoute)
  where newRoute :: [Node]
newRoute = Node
nodeNode -> [Node] -> [Node]
forall a. a -> [a] -> [a]
:[Node]
route

-- | Finds new matches resulting from newly added tokens.
-- Produces new tokens from full matches.
parseString1
  :: [Rule] -> Document -> Stash -> Stash -> [Match]
  -> Duckling (Stash, [Match])
parseString1 :: [Rule]
-> Document
-> Stash
-> Stash
-> [Match]
-> Duckling (Stash, [Match])
parseString1 [Rule]
rules Document
sentence Stash
stash Stash
new [Match]
matches = do
  -- Recursively match patterns.
  -- Find which `matches` can advance because of `new`.
  [Match]
newPartial <- (Match -> Duckling [Match]) -> [Match] -> Duckling [Match]
forall (m :: * -> *) a b. Monad m => (a -> m [b]) -> [a] -> m [b]
concatMapM (Document -> Stash -> Match -> Duckling [Match]
matchFirst Document
sentence Stash
new) [Match]
matches

  -- Find new matches resulting from newly added tokens (`new`)
  [Match]
newMatches <- (Rule -> Duckling [Match]) -> [Rule] -> Duckling [Match]
forall (m :: * -> *) a b. Monad m => (a -> m [b]) -> [a] -> m [b]
concatMapM (Document -> Stash -> Rule -> Duckling [Match]
matchFirstAnywhere Document
sentence Stash
new) [Rule]
rules

  ([Match]
full, [Match]
partial) <- (Match -> Bool) -> [Match] -> ([Match], [Match])
forall a. (a -> Bool) -> [a] -> ([a], [a])
L.partition (\(Rule {Pattern
pattern :: Pattern
pattern :: Rule -> Pattern
pattern}, Int
_, [Node]
_) -> Pattern -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null Pattern
pattern)
    ([Match] -> ([Match], [Match]))
-> Duckling [Match] -> Identity ([Match], [Match])
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Document -> Stash -> [Match] -> Duckling [Match]
matchAll Document
sentence Stash
stash ([Match]
newPartial [Match] -> [Match] -> [Match]
forall a. [a] -> [a] -> [a]
++ [Match]
newMatches)

  -- Produce full matches as new tokens
  (Stash, [Match]) -> Duckling (Stash, [Match])
forall (m :: * -> *) a. Monad m => a -> m a
return ( [Node] -> Stash
Stash.fromList ([Node] -> Stash) -> [Node] -> Stash
forall a b. (a -> b) -> a -> b
$ (Match -> Maybe Node) -> [Match] -> [Node]
forall a b. (a -> Maybe b) -> [a] -> [b]
mapMaybe Match -> Maybe Node
produce [Match]
full
         , [Match]
partial [Match] -> [Match] -> [Match]
forall a. [a] -> [a] -> [a]
++ [Match]
matches
         )

-- | Produces all tokens recursively.
saturateParseString
  :: [Rule] -> Document -> Stash -> Stash -> [Match] -> Duckling Stash
saturateParseString :: [Rule] -> Document -> Stash -> Stash -> [Match] -> Duckling Stash
saturateParseString [Rule]
rules Document
sentence Stash
stash Stash
new [Match]
matches = do
  (Stash
new', [Match]
matches') <- [Rule]
-> Document
-> Stash
-> Stash
-> [Match]
-> Duckling (Stash, [Match])
parseString1 [Rule]
rules Document
sentence Stash
stash Stash
new [Match]
matches
  let stash' :: Stash
stash' = Stash -> Stash -> Stash
Stash.union Stash
stash Stash
new'
  if Stash -> Bool
Stash.null Stash
new'
    then Stash -> Duckling Stash
forall (m :: * -> *) a. Monad m => a -> m a
return Stash
stash
    else [Rule] -> Document -> Stash -> Stash -> [Match] -> Duckling Stash
saturateParseString [Rule]
rules Document
sentence Stash
stash' Stash
new' [Match]
matches'

parseString :: [Rule] -> Document -> Duckling Stash
parseString :: [Rule] -> Document -> Duckling Stash
parseString [Rule]
rules Document
sentence = do
  (Stash
new, [Match]
partialMatches) <-
    -- One the first pass we try all the rules
    [Rule]
-> Document
-> Stash
-> Stash
-> [Match]
-> Duckling (Stash, [Match])
parseString1 [Rule]
rules Document
sentence Stash
Stash.empty Stash
Stash.empty []
  if Stash -> Bool
Stash.null Stash
new
    then Stash -> Duckling Stash
forall (m :: * -> *) a. Monad m => a -> m a
return Stash
Stash.empty
    else
    -- For subsequent passes, we only try rules starting with a predicate.
    [Rule] -> Document -> Stash -> Stash -> [Match] -> Duckling Stash
saturateParseString [Rule]
headPredicateRules Document
sentence Stash
new Stash
new [Match]
partialMatches
  where
  headPredicateRules :: [Rule]
headPredicateRules =
    [ Rule
rule | rule :: Rule
rule@Rule{pattern :: Rule -> Pattern
pattern = (Predicate Predicate
_ : Pattern
_)} <- [Rule]
rules ]

resolveNode :: Context -> Options -> Node -> Maybe ResolvedToken
resolveNode :: Context -> Options -> Node -> Maybe ResolvedToken
resolveNode Context
context Options
options n :: Node
n@Node{token :: Node -> Token
token = (Token Dimension a
dim a
dd), nodeRange :: Node -> Range
nodeRange = Range
r}
  = do
  (ResolvedValue a
val, Bool
latent) <- Context -> Options -> a -> Maybe (ResolvedValue a, Bool)
forall a.
Resolve a =>
Context -> Options -> a -> Maybe (ResolvedValue a, Bool)
resolve Context
context Options
options a
dd
  ResolvedToken -> Maybe ResolvedToken
forall a. a -> Maybe a
Just Resolved :: Range -> Node -> ResolvedVal -> Bool -> ResolvedToken
Resolved
    { range :: Range
range = Range
r
    , node :: Node
node = Node
n
    , rval :: ResolvedVal
rval = Dimension a -> ResolvedValue a -> ResolvedVal
forall a.
(Resolve a, Eq (ResolvedValue a), Show (ResolvedValue a),
 ToJSON (ResolvedValue a)) =>
Dimension a -> ResolvedValue a -> ResolvedVal
RVal Dimension a
dim ResolvedValue a
val
    , isLatent :: Bool
isLatent = Bool
latent
    }