{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE FlexibleInstances #-}
{-# OPTIONS_HADDOCK hide #-}
module Language.Hanspell.DaumSpellChecker
( DaumSpellChecker
, spellCheckByDaum
, daumSpellCheckerMaxChars
) where
import qualified Data.ByteString.UTF8 as BU
import qualified Data.ByteString.Lazy.UTF8 as BLU
import Data.List
import Data.List.Split
import Network.HTTP.Types.Status
import Text.Regex
import Debug.Trace
import Control.Monad.Trans.Maybe
import Control.Monad.Trans.Class
import Network.HTTP.Client
import Network.HTTP.Client.TLS
import Language.Hanspell.Typo
import Language.Hanspell.Decoder
class Monad m => DaumSpellChecker m where
spellCheckByDaum :: String -> m [Typo]
instance DaumSpellChecker (MaybeT IO) where
spellCheckByDaum text = htmlToTypos <$> requestToDaum text
instance DaumSpellChecker IO where
spellCheckByDaum text = do
maybe <- runMaybeT $ htmlToTypos <$> requestToDaum text
case maybe of
Nothing -> return []
Just typos -> return typos
daumSpellCheckerMaxChars :: Int
daumSpellCheckerMaxChars = 1000
daumConnectError :: String
daumConnectError =
"-- 한스펠 오류: 다음 서버의 접속 오류로 일부 문장 교정에 실패했습니다."
invalidResponseFromDaum :: String
invalidResponseFromDaum =
"-- 한스펠 오류: 다음 서비스가 유효하지 않은 양식을 반환했습니다. ("
++ daumSpellCheckUrl ++ ")"
daumSpellCheckUrl :: String
daumSpellCheckUrl = "https://dic.daum.net/grammar_checker.do"
requestToDaum :: String -> MaybeT IO String
requestToDaum text = do
manager <- lift $ newManager tlsManagerSettings
let pair = [("sentence",BU.fromString text)]
initialRequest <- lift $ parseRequest daumSpellCheckUrl
let request = (urlEncodedBody pair initialRequest) { method = "POST" }
response <- lift $ httpLbs request manager
let errCode = statusCode (responseStatus response)
let daumResponseInfix = "=\"screen_out\">맞춤법 검사기 본문</h2>"
if errCode == 200
then let body = BLU.toString (responseBody response)
in if daumResponseInfix `isInfixOf` body
then return body
else trace invalidResponseFromDaum (MaybeT $ return Nothing)
else trace (daumConnectError ++ " ("++ show errCode ++ ")")
(MaybeT $ return Nothing)
htmlToTypos :: String -> [Typo]
htmlToTypos body =
let stripped = head . splitOn "<span class=\"info_byte\">" $ body
splitted = tail . splitOn "data-error-type" $ stripped
in map htmlToTypo splitted
htmlToTypo :: String -> Typo
htmlToTypo body = Typo { errorType = decodeEntity (splitted!!1)
, token = decodeEntity (splitted!!3)
, suggestions = [decodeEntity (splitted!!5)]
, context = decodeEntity (splitted!!7)
, info = info'''
} where
gsub from to text = subRegex (mkRegex from) text to
splitted = splitOn "\"" $ head (lines body)
info' = splitOn "<div>" body!!1
info'' = head (splitOn "<span class=\"info_byte\">" info')
info''' = gsub "^[ \n][ \n]*" ""
. gsub "<[^>]*>" ""
. gsub "<br[^>]*>" "\n"
. gsub "</span><span class.*\n" ""
. gsub "<a href=\"#none\".*\n" ""
. gsub "^<span>.*\n" ""
. gsub "<strong class.*\n" ""
. gsub ".*strong class=.tit_help.>예문</strong.*\n" "(예)"
. gsub "\t" ""
$ info''