module GF.Text.Clitics (getClitics,getCliticsText) where

import Data.List

-- AR 6/2/2011
-- Analyse word as stem+clitic whenever 
--   (1) clitic is in clitic list
--   (2) either 
--      (a) stem is in Lexicon
--      (b) stem can be analysed as stem0+clitic0
-- 
-- Examples: 
--   Italian amarmi = amar+mi
--   Finnish autossanikohan = autossa+ni+kohan
--
-- The analysis gives all results, including the case where the whole word is in Lexicon.
-- 
-- The clitics in the list are expected to be reversed.

getClitics :: (String -> Bool) -> [String] -> String -> [[String]]
getClitics :: (String -> Bool) -> [String] -> String -> [[String]]
getClitics String -> Bool
isLex [String]
rclitics = ([String] -> [String]) -> [[String]] -> [[String]]
forall a b. (a -> b) -> [a] -> [b]
map ([String] -> [String]
forall a. [a] -> [a]
reverse ([String] -> [String])
-> ([String] -> [String]) -> [String] -> [String]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (String -> String) -> [String] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map String -> String
forall a. [a] -> [a]
reverse) ([[String]] -> [[String]])
-> (String -> [[String]]) -> String -> [[String]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> [[String]]
clits (String -> [[String]])
-> (String -> String) -> String -> [[String]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String
forall a. [a] -> [a]
reverse where
  clits :: String -> [[String]]
clits String
rword = String -> [[String]] -> [[String]]
ifLex String
rword [String
rclitString -> [String] -> [String]
forall a. a -> [a] -> [a]
:[String]
more | 
                  String
rclit <- [String]
rclitics, String
stem <- String -> String -> [String]
forall a. Eq a => [a] -> [a] -> [[a]]
splits String
rclit String
rword, [String]
more <- String -> [[String]]
clits String
stem]
  splits :: [a] -> [a] -> [[a]]
splits [a]
c = [[a]] -> ([a] -> [[a]]) -> Maybe [a] -> [[a]]
forall b a. b -> (a -> b) -> Maybe a -> b
maybe [] [a] -> [[a]]
forall (m :: * -> *) a. Monad m => a -> m a
return (Maybe [a] -> [[a]]) -> ([a] -> Maybe [a]) -> [a] -> [[a]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [a] -> [a] -> Maybe [a]
forall a. Eq a => [a] -> [a] -> Maybe [a]
stripPrefix [a]
c

  ifLex :: String -> [[String]] -> [[String]]
ifLex String
w [[String]]
ws = if String -> Bool
isLex (String -> String
forall a. [a] -> [a]
reverse String
w) then [String
w] [String] -> [[String]] -> [[String]]
forall a. a -> [a] -> [a]
: [[String]]
ws else [[String]]
ws


getCliticsText :: (String -> Bool) -> [String] -> [String] -> [String]
getCliticsText :: (String -> Bool) -> [String] -> [String] -> [String]
getCliticsText String -> Bool
isLex [String]
rclitics = 
  ([String] -> String) -> [[String]] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map [String] -> String
unwords ([[String]] -> [String])
-> ([String] -> [[String]]) -> [String] -> [String]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [[String]] -> [[String]]
forall (t :: * -> *) (m :: * -> *) a.
(Traversable t, Monad m) =>
t (m a) -> m (t a)
sequence ([[String]] -> [[String]])
-> ([String] -> [[String]]) -> [String] -> [[String]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (String -> [String]) -> [String] -> [[String]]
forall a b. (a -> b) -> [a] -> [b]
map (([String] -> String) -> [[String]] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map [String] -> String
render ([[String]] -> [String])
-> (String -> [[String]]) -> String -> [String]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (String -> Bool) -> [String] -> String -> [[String]]
getClitics String -> Bool
isLex [String]
rclitics) 
 where
  render :: [String] -> String
render = [String] -> String
unwords ([String] -> String)
-> ([String] -> [String]) -> [String] -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> [String] -> [String]
forall a. a -> [a] -> [a]
intersperse String
"&+"


-- example

--getClitics1 = getClitics exlex1 exclits1
--exlex1   = flip elem ["auto", "naise", "rahan","maa","maahan","maahankaan"]
--exclits1 = map reverse ["ni","ko","han","pas","nsa","kin","kaan"]