{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE DeriveGeneric  #-}
{-# LANGUAGE DeriveAnyClass #-}
{-# OPTIONS_HADDOCK hide #-}

-- | Defines the interfaces for Pusan National University spell check service.
module Language.Hanspell.PnuSpellChecker
    ( PnuSpellChecker
    , spellCheckByPnu
    , pnuSpellCheckerMaxWords
    ) where

import qualified Data.ByteString.UTF8 as BU
import qualified Data.ByteString.Lazy.UTF8 as BLU

import Data.List
import Data.List.Split
import Network.HTTP.Types.Status
import Text.Regex
import Debug.Trace
import Control.Monad.Trans.Maybe
import Control.Monad.Trans.Class
import Network.HTTP.Client
import Network.HTTP.Client.TLS
import Data.Aeson
import GHC.Generics

import Language.Hanspell.Typo
import Language.Hanspell.Decoder

-- | Defines a class for 'spellCheckByPnu' function overloading.
class Monad m => PnuSpellChecker m where

    -- | Requests spell check to PNU server, parses the responses, 
    -- and returns @m [Typo]@. @spellCheckByPnu@ has two return types.
    -- One is @MaybeT IO [Typo]@, and the other is @IO [Typo]@. 
    --
    -- @
    -- import Language.Hanspell 
    --
    -- example = do
    --     let sentence = "위에계신분, 잘들리세요?"
    --     typos <- spellCheckByPnu sentence
    --     mapM_ (putStrLn . typoToStringWithStyle False) typos
    -- @
    --
    -- The expected output is:
    --
    -- @
    -- 위에계신분 -> 위에 계신 분
    -- '계신 분'으로 띄어 씁니다.
    -- 
    -- 잘들리세요 -> 잘 들리세요
    -- 부사는 뒤의 말과 띄어 써야 합니다.
    -- (예)
    -- 곧돌아오마 (x)-> 곧 돌아오마 (o)
    -- 부디건강해라(x) -> 부디 건강해라(o)
    -- 어서오십시오(x) -> 어서 오십시오(o)
    -- ...
    -- @
    spellCheckByPnu :: String -> m [Typo]

-- | Obssesive version returning @MaybeT IO [Typo]@.
instance PnuSpellChecker (MaybeT IO) where
    spellCheckByPnu :: String -> MaybeT IO [Typo]
spellCheckByPnu String
text = String -> [Typo]
htmlToTypos (String -> [Typo]) -> MaybeT IO String -> MaybeT IO [Typo]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> String -> MaybeT IO String
requestToPnu String
text

-- | Bold version returning @IO [Typo]@.
instance PnuSpellChecker IO where
    spellCheckByPnu :: String -> IO [Typo]
spellCheckByPnu String
text = do
        Maybe [Typo]
maybe <- MaybeT IO [Typo] -> IO (Maybe [Typo])
forall (m :: * -> *) a. MaybeT m a -> m (Maybe a)
runMaybeT (MaybeT IO [Typo] -> IO (Maybe [Typo]))
-> MaybeT IO [Typo] -> IO (Maybe [Typo])
forall a b. (a -> b) -> a -> b
$ String -> [Typo]
htmlToTypos (String -> [Typo]) -> MaybeT IO String -> MaybeT IO [Typo]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> String -> MaybeT IO String
requestToPnu String
text
        case Maybe [Typo]
maybe of 
            Maybe [Typo]
Nothing -> [Typo] -> IO [Typo]
forall (m :: * -> *) a. Monad m => a -> m a
return []
            Just [Typo]
typos -> [Typo] -> IO [Typo]
forall (m :: * -> *) a. Monad m => a -> m a
return [Typo]
typos

-- | Maximum words count for a 'spellCheckByPnu' request.
pnuSpellCheckerMaxWords :: Int
pnuSpellCheckerMaxWords :: Int
pnuSpellCheckerMaxWords = Int
295
 
-- 'spellCheckByPnu' prints the message below when the HTTP status code is 
-- not 200. Mainly due to timeout.
pnuConnectError :: String
pnuConnectError :: String
pnuConnectError = 
    String
"-- 한스펠 오류: 부산대 서버의 접속 오류로 일부 문장 교정에 실패했습니다."

-- 'spellCheckByPnu' prints the message below when the response is not of 
-- spell checker. Mainly due to the changes of service URL.
invalidResponseFromPnu :: String
invalidResponseFromPnu :: String
invalidResponseFromPnu = 
    String
"-- 한스펠 오류: 부산대 서비스가 유효하지 않은 양식을 반환했습니다. ("
    String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
pnuSpellCheckUrl String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
")"

-- Pnu spell checker URL.
--
-- Try `curl -H "Content-Type: application/x-www-form-urlencoded" \
-- -X POST http://speller.cs.pusan.ac.kr/results -d \
-- --data-urlencode "text1=안녕 하세요"`.
pnuSpellCheckUrl :: String
pnuSpellCheckUrl :: String
pnuSpellCheckUrl = String
"http://speller.cs.pusan.ac.kr/results"

-- For convenience
gsub :: String -> String -> String -> String
gsub :: String -> String -> String -> String
gsub String
from String
to String
text = Regex -> String -> String -> String
subRegex (String -> Regex
mkRegex String
from) String
text String
to

-- Requests spell check to the server, check the responses, and returns it. 
-- When the status code is not 200, or the response is not of spell checker, 
-- traces error message, and returns Nothing.
requestToPnu :: String -> MaybeT IO String
requestToPnu :: String -> MaybeT IO String
requestToPnu String
text = if [String] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null (String -> [String]
words String
text) then String -> MaybeT IO String
forall (m :: * -> *) a. Monad m => a -> m a
return String
"" else do
    -- Walkaround for PNU server's weired logic
    let text' :: String
text' = String -> [String] -> String
forall a. [a] -> [[a]] -> [a]
intercalate String
"\r\n" ([String] -> String) -> (String -> [String]) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String -> [String]
forall a. Eq a => [a] -> [a] -> [[a]]
splitOn String
"\n" (String -> String) -> String -> String
forall a b. (a -> b) -> a -> b
$ String
text
    Manager
manager <- IO Manager -> MaybeT IO Manager
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (IO Manager -> MaybeT IO Manager)
-> IO Manager -> MaybeT IO Manager
forall a b. (a -> b) -> a -> b
$ ManagerSettings -> IO Manager
newManager ManagerSettings
tlsManagerSettings
    let pair :: [(ByteString, ByteString)]
pair = [(ByteString
"text1",String -> ByteString
BU.fromString String
text')]
    Request
initialRequest <- IO Request -> MaybeT IO Request
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (IO Request -> MaybeT IO Request)
-> IO Request -> MaybeT IO Request
forall a b. (a -> b) -> a -> b
$ String -> IO Request
forall (m :: * -> *). MonadThrow m => String -> m Request
parseRequest String
pnuSpellCheckUrl
    let request :: Request
request = ([(ByteString, ByteString)] -> Request -> Request
urlEncodedBody [(ByteString, ByteString)]
pair Request
initialRequest) { method :: ByteString
method = ByteString
"POST" }
    Response ByteString
response <- IO (Response ByteString) -> MaybeT IO (Response ByteString)
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (IO (Response ByteString) -> MaybeT IO (Response ByteString))
-> IO (Response ByteString) -> MaybeT IO (Response ByteString)
forall a b. (a -> b) -> a -> b
$ Request -> Manager -> IO (Response ByteString)
httpLbs Request
request Manager
manager
    let errCode :: Int
errCode = Status -> Int
statusCode (Response ByteString -> Status
forall body. Response body -> Status
responseStatus Response ByteString
response)

    let pnuResponseInfix :: String
pnuResponseInfix = String
"<title>한국어 맞춤법/문법 검사기</title>"
    if Int
errCode Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
200
       then let body :: String
body = ByteString -> String
BLU.toString (Response ByteString -> ByteString
forall body. Response body -> body
responseBody Response ByteString
response)
             in if String
pnuResponseInfix String -> String -> Bool
forall a. Eq a => [a] -> [a] -> Bool
`isInfixOf` String
body
                   then String -> MaybeT IO String
forall (m :: * -> *) a. Monad m => a -> m a
return String
body
                   else String -> MaybeT IO String -> MaybeT IO String
forall a. String -> a -> a
trace String
invalidResponseFromPnu (IO (Maybe String) -> MaybeT IO String
forall (m :: * -> *) a. m (Maybe a) -> MaybeT m a
MaybeT (IO (Maybe String) -> MaybeT IO String)
-> IO (Maybe String) -> MaybeT IO String
forall a b. (a -> b) -> a -> b
$ Maybe String -> IO (Maybe String)
forall (m :: * -> *) a. Monad m => a -> m a
return Maybe String
forall a. Maybe a
Nothing)
       else String -> MaybeT IO String -> MaybeT IO String
forall a. String -> a -> a
trace (String
pnuConnectError String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
" ("String -> String -> String
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
errCode String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
")")
            (IO (Maybe String) -> MaybeT IO String
forall (m :: * -> *) a. m (Maybe a) -> MaybeT m a
MaybeT (IO (Maybe String) -> MaybeT IO String)
-> IO (Maybe String) -> MaybeT IO String
forall a b. (a -> b) -> a -> b
$ Maybe String -> IO (Maybe String)
forall (m :: * -> *) a. Monad m => a -> m a
return Maybe String
forall a. Maybe a
Nothing)

-- PNU response format for a Typo.
data PnuTypo = PnuTypo
    { PnuTypo -> String
help :: String
    , PnuTypo -> Int
errorIdx :: Int
    , PnuTypo -> Int
correctMethod :: Int
    , PnuTypo -> Int
start :: Int
    , PnuTypo -> Int
end :: Int
    , PnuTypo -> String
orgStr :: String
    , PnuTypo -> String
candWord :: String
    } deriving (Int -> PnuTypo -> String -> String
[PnuTypo] -> String -> String
PnuTypo -> String
(Int -> PnuTypo -> String -> String)
-> (PnuTypo -> String)
-> ([PnuTypo] -> String -> String)
-> Show PnuTypo
forall a.
(Int -> a -> String -> String)
-> (a -> String) -> ([a] -> String -> String) -> Show a
showList :: [PnuTypo] -> String -> String
$cshowList :: [PnuTypo] -> String -> String
show :: PnuTypo -> String
$cshow :: PnuTypo -> String
showsPrec :: Int -> PnuTypo -> String -> String
$cshowsPrec :: Int -> PnuTypo -> String -> String
Show, (forall x. PnuTypo -> Rep PnuTypo x)
-> (forall x. Rep PnuTypo x -> PnuTypo) -> Generic PnuTypo
forall x. Rep PnuTypo x -> PnuTypo
forall x. PnuTypo -> Rep PnuTypo x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep PnuTypo x -> PnuTypo
$cfrom :: forall x. PnuTypo -> Rep PnuTypo x
Generic, [PnuTypo] -> Encoding
[PnuTypo] -> Value
PnuTypo -> Encoding
PnuTypo -> Value
(PnuTypo -> Value)
-> (PnuTypo -> Encoding)
-> ([PnuTypo] -> Value)
-> ([PnuTypo] -> Encoding)
-> ToJSON PnuTypo
forall a.
(a -> Value)
-> (a -> Encoding)
-> ([a] -> Value)
-> ([a] -> Encoding)
-> ToJSON a
toEncodingList :: [PnuTypo] -> Encoding
$ctoEncodingList :: [PnuTypo] -> Encoding
toJSONList :: [PnuTypo] -> Value
$ctoJSONList :: [PnuTypo] -> Value
toEncoding :: PnuTypo -> Encoding
$ctoEncoding :: PnuTypo -> Encoding
toJSON :: PnuTypo -> Value
$ctoJSON :: PnuTypo -> Value
ToJSON, Value -> Parser [PnuTypo]
Value -> Parser PnuTypo
(Value -> Parser PnuTypo)
-> (Value -> Parser [PnuTypo]) -> FromJSON PnuTypo
forall a.
(Value -> Parser a) -> (Value -> Parser [a]) -> FromJSON a
parseJSONList :: Value -> Parser [PnuTypo]
$cparseJSONList :: Value -> Parser [PnuTypo]
parseJSON :: Value -> Parser PnuTypo
$cparseJSON :: Value -> Parser PnuTypo
FromJSON)

-- PNU response format for Typos.
data PnuTypos = PnuTypos
    { PnuTypos -> String
str :: String
    , PnuTypos -> [PnuTypo]
errInfo :: [PnuTypo]
    , PnuTypos -> Int
idx :: Int
    } deriving (Int -> PnuTypos -> String -> String
[PnuTypos] -> String -> String
PnuTypos -> String
(Int -> PnuTypos -> String -> String)
-> (PnuTypos -> String)
-> ([PnuTypos] -> String -> String)
-> Show PnuTypos
forall a.
(Int -> a -> String -> String)
-> (a -> String) -> ([a] -> String -> String) -> Show a
showList :: [PnuTypos] -> String -> String
$cshowList :: [PnuTypos] -> String -> String
show :: PnuTypos -> String
$cshow :: PnuTypos -> String
showsPrec :: Int -> PnuTypos -> String -> String
$cshowsPrec :: Int -> PnuTypos -> String -> String
Show, (forall x. PnuTypos -> Rep PnuTypos x)
-> (forall x. Rep PnuTypos x -> PnuTypos) -> Generic PnuTypos
forall x. Rep PnuTypos x -> PnuTypos
forall x. PnuTypos -> Rep PnuTypos x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep PnuTypos x -> PnuTypos
$cfrom :: forall x. PnuTypos -> Rep PnuTypos x
Generic, [PnuTypos] -> Encoding
[PnuTypos] -> Value
PnuTypos -> Encoding
PnuTypos -> Value
(PnuTypos -> Value)
-> (PnuTypos -> Encoding)
-> ([PnuTypos] -> Value)
-> ([PnuTypos] -> Encoding)
-> ToJSON PnuTypos
forall a.
(a -> Value)
-> (a -> Encoding)
-> ([a] -> Value)
-> ([a] -> Encoding)
-> ToJSON a
toEncodingList :: [PnuTypos] -> Encoding
$ctoEncodingList :: [PnuTypos] -> Encoding
toJSONList :: [PnuTypos] -> Value
$ctoJSONList :: [PnuTypos] -> Value
toEncoding :: PnuTypos -> Encoding
$ctoEncoding :: PnuTypos -> Encoding
toJSON :: PnuTypos -> Value
$ctoJSON :: PnuTypos -> Value
ToJSON, Value -> Parser [PnuTypos]
Value -> Parser PnuTypos
(Value -> Parser PnuTypos)
-> (Value -> Parser [PnuTypos]) -> FromJSON PnuTypos
forall a.
(Value -> Parser a) -> (Value -> Parser [a]) -> FromJSON a
parseJSONList :: Value -> Parser [PnuTypos]
$cparseJSONList :: Value -> Parser [PnuTypos]
parseJSON :: Value -> Parser PnuTypos
$cparseJSON :: Value -> Parser PnuTypos
FromJSON)

-- Parses the response HTML to [Typo].
htmlToTypos :: String -> [Typo]
htmlToTypos :: String -> [Typo]
htmlToTypos String
body =
    case Regex -> String -> Maybe [String]
matchRegex (String -> Regex
mkRegex String
"^\tdata = (.*);") String
body of 
        Maybe [String]
Nothing -> []
        Just [String
jsonText] -> (PnuTypo -> Typo) -> [PnuTypo] -> [Typo]
forall a b. (a -> b) -> [a] -> [b]
map PnuTypo -> Typo
pnuTypoToTypo [PnuTypo]
pnuTypos
          where
            Just [PnuTypos]
pnuTyopsList = ByteString -> Maybe [PnuTypos]
forall a. FromJSON a => ByteString -> Maybe a
decode (ByteString -> Maybe [PnuTypos])
-> (String -> ByteString) -> String -> Maybe [PnuTypos]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> ByteString
BLU.fromString (String -> Maybe [PnuTypos]) -> String -> Maybe [PnuTypos]
forall a b. (a -> b) -> a -> b
$ String
jsonText
            pnuTypos :: [PnuTypo]
pnuTypos = [[PnuTypo]] -> [PnuTypo]
forall a. Monoid a => [a] -> a
mconcat ([[PnuTypo]] -> [PnuTypo])
-> ([PnuTypos] -> [[PnuTypo]]) -> [PnuTypos] -> [PnuTypo]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (PnuTypos -> [PnuTypo]) -> [PnuTypos] -> [[PnuTypo]]
forall a b. (a -> b) -> [a] -> [b]
map PnuTypos -> [PnuTypo]
errInfo ([PnuTypos] -> [PnuTypo]) -> [PnuTypos] -> [PnuTypo]
forall a b. (a -> b) -> a -> b
$ [PnuTypos]
pnuTyopsList

-- Converts PnuTypo (response JSON) to Typo.
pnuTypoToTypo :: PnuTypo -> Typo
pnuTypoToTypo :: PnuTypo -> Typo
pnuTypoToTypo PnuTypo
pnuTypo = 
    Typo :: String -> String -> [String] -> String -> String -> Typo
Typo { errorType :: String
errorType = String
""
         , token :: String
token = String -> String
decodeEntity (String -> String) -> (PnuTypo -> String) -> PnuTypo -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. PnuTypo -> String
orgStr (PnuTypo -> String) -> PnuTypo -> String
forall a b. (a -> b) -> a -> b
$ PnuTypo
pnuTypo
         , suggestions :: [String]
suggestions = String -> String -> [String]
forall a. Eq a => [a] -> [a] -> [[a]]
splitOn String
"|" (String -> [String]) -> (String -> String) -> String -> [String]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String
decodeEntity (String -> [String]) -> String -> [String]
forall a b. (a -> b) -> a -> b
$ String
suggestions'
         , context :: String
context = String
""
         , info :: String
info = String -> String
decodeEntity (String -> String) -> (PnuTypo -> String) -> PnuTypo -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String -> String -> String
gsub String
"\n\\(예\\) " String
"(예)\n"
                (String -> String) -> (PnuTypo -> String) -> PnuTypo -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String -> String -> String
gsub String
" *<br/> *" String
"\n" (String -> String) -> (PnuTypo -> String) -> PnuTypo -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"\n") (String -> String) -> (PnuTypo -> String) -> PnuTypo -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. PnuTypo -> String
help (PnuTypo -> String) -> PnuTypo -> String
forall a b. (a -> b) -> a -> b
$ PnuTypo
pnuTypo
         } where
    suggestions' :: String
suggestions' = if String -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null (String -> Bool) -> (PnuTypo -> String) -> PnuTypo -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. PnuTypo -> String
candWord (PnuTypo -> Bool) -> PnuTypo -> Bool
forall a b. (a -> b) -> a -> b
$ PnuTypo
pnuTypo
                      then PnuTypo -> String
orgStr PnuTypo
pnuTypo
                      else PnuTypo -> String
candWord PnuTypo
pnuTypo