{-# LANGUAGE OverloadedLists #-} {-# LANGUAGE OverloadedStrings #-} module Text.Seonbi.HanjaSpec (spec) where import Test.Hspec import Text.Seonbi.Hanja import Text.Seonbi.Html inputFixture :: [HtmlEntity] inputFixture = [ HtmlStartTag [] H1 "" , HtmlText [H1] "異蹟" , HtmlEndTag [] H1 , HtmlStartTag [] P "" , HtmlText [P] "발에 터분한 것을 다 빼어 바리고" , HtmlStartTag [P] BR "", HtmlEndTag [P] BR , HtmlText [P] "黃昏이 湖水우로 걸어 오듯이" , HtmlStartTag [P] BR "", HtmlEndTag [P] BR , HtmlText [P] "나도 삽분삽분 걸어 보리이까?" , HtmlEndTag [] P , HtmlStartTag [] P "" , HtmlText [P] "내사 이 湖水가로" , HtmlStartTag [P] BR "", HtmlEndTag [P] BR , HtmlText [P] "부르는 이 없이" , HtmlStartTag [P] BR "", HtmlEndTag [P] BR , HtmlText [P] "불리워 온것은" , HtmlStartTag [P] BR "", HtmlEndTag [P] BR , HtmlText [P] "참말 異蹟이외다." , HtmlEndTag [] P , HtmlStartTag [] P "" , HtmlText [P] "오늘 따라" , HtmlStartTag [P] BR "", HtmlEndTag [P] BR , HtmlText [P] "戀情, 自惚, 猜忌, 이것들이" , HtmlStartTag [P] BR "", HtmlEndTag [P] BR , HtmlText [P] "자꼬 金메달처럼 만져지는구려" , HtmlEndTag [] P , HtmlStartTag [] P "" , HtmlText [P] "하나, 내 모든 것을 餘念없이" , HtmlStartTag [P] BR "", HtmlEndTag [P] BR , HtmlText [P] "물결에 써서 보내려니" , HtmlStartTag [P] BR "", HtmlEndTag [P] BR , HtmlText [P] "당신은 湖面으로 나를 불러 내소서." , HtmlEndTag [] P ] outputFixture :: [HtmlEntity] outputFixture = [ HtmlStartTag [] H1 "" , HtmlText [H1] "리적" , HtmlEndTag [] H1 , HtmlStartTag [] P "" , HtmlText [P] "발에 터분한 것을 다 빼어 바리고" , HtmlStartTag [P] BR "", HtmlEndTag [P] BR , HtmlText [P] "황혼이 호수우로 걸어 오듯이" , HtmlStartTag [P] BR "", HtmlEndTag [P] BR , HtmlText [P] "나도 삽분삽분 걸어 보리이까?" , HtmlEndTag [] P , HtmlStartTag [] P "" , HtmlText [P] "내사 이 호수가로" , HtmlStartTag [P] BR "", HtmlEndTag [P] BR , HtmlText [P] "부르는 이 없이" , HtmlStartTag [P] BR "", HtmlEndTag [P] BR , HtmlText [P] "불리워 온것은" , HtmlStartTag [P] BR "", HtmlEndTag [P] BR , HtmlText [P] "참말 리적이외다." , HtmlEndTag [] P , HtmlStartTag [] P "" , HtmlText [P] "오늘 따라" , HtmlStartTag [P] BR "", HtmlEndTag [P] BR , HtmlText [P] "련정, 자홀, 시기, 이것들이" , HtmlStartTag [P] BR "", HtmlEndTag [P] BR , HtmlText [P] "자꼬 금메달처럼 만져지는구려" , HtmlEndTag [] P , HtmlStartTag [] P "" , HtmlText [P] "하나, 내 모든 것을 여념없이" , HtmlStartTag [P] BR "", HtmlEndTag [P] BR , HtmlText [P] "물결에 써서 보내려니" , HtmlStartTag [P] BR "", HtmlEndTag [P] BR , HtmlText [P] "당신은 호면으로 나를 불러 내소서." , HtmlEndTag [] P ] outputWithInitialSoundLawFixture :: [HtmlEntity] outputWithInitialSoundLawFixture = [ HtmlStartTag [] H1 "" , HtmlText [H1] "이적" , HtmlEndTag [] H1 , HtmlStartTag [] P "" , HtmlText [P] "발에 터분한 것을 다 빼어 바리고" , HtmlStartTag [P] BR "", HtmlEndTag [P] BR , HtmlText [P] "황혼이 호수우로 걸어 오듯이" , HtmlStartTag [P] BR "", HtmlEndTag [P] BR , HtmlText [P] "나도 삽분삽분 걸어 보리이까?" , HtmlEndTag [] P , HtmlStartTag [] P "" , HtmlText [P] "내사 이 호수가로" , HtmlStartTag [P] BR "", HtmlEndTag [P] BR , HtmlText [P] "부르는 이 없이" , HtmlStartTag [P] BR "", HtmlEndTag [P] BR , HtmlText [P] "불리워 온것은" , HtmlStartTag [P] BR "", HtmlEndTag [P] BR , HtmlText [P] "참말 이적이외다." , HtmlEndTag [] P , HtmlStartTag [] P "" , HtmlText [P] "오늘 따라" , HtmlStartTag [P] BR "", HtmlEndTag [P] BR , HtmlText [P] "연정, 자홀, 시기, 이것들이" , HtmlStartTag [P] BR "", HtmlEndTag [P] BR , HtmlText [P] "자꼬 금메달처럼 만져지는구려" , HtmlEndTag [] P , HtmlStartTag [] P "" , HtmlText [P] "하나, 내 모든 것을 여념없이" , HtmlStartTag [P] BR "", HtmlEndTag [P] BR , HtmlText [P] "물결에 써서 보내려니" , HtmlStartTag [P] BR "", HtmlEndTag [P] BR , HtmlText [P] "당신은 호면으로 나를 불러 내소서." , HtmlEndTag [] P ] spec :: Spec spec = do describe "phoneticizeHanjaChar" $ do it "returns the input as is if not a Hanja character" $ do phoneticizeHanjaChar 'A' `shouldBe` 'A' phoneticizeHanjaChar '가' `shouldBe` '가' it "returns the most frequent way to read" $ do phoneticizeHanjaChar '金' `shouldBe` '금' phoneticizeHanjaChar '北' `shouldBe` '북' it "does not follow Initial Sound Law" $ do phoneticizeHanjaChar '六' `shouldBe` '륙' phoneticizeHanjaChar '禮' `shouldBe` '례' describe "phoneticizeHanjaWord" $ do it "returns the input as is if there are no hanja at all" $ do phoneticizeHanjaWord "foo" `shouldBe` "foo" phoneticizeHanjaWord "테스트" `shouldBe` "테스트" it "transforms hanja chars into the corresponding hangul readings" $ do phoneticizeHanjaWord "漢字" `shouldBe` "한자" phoneticizeHanjaWord "言文" `shouldBe` "언문" phoneticizeHanjaWord "餘念" `shouldBe` "여념" phoneticizeHanjaWord "來日" `shouldBe` "래일" phoneticizeHanjaWord "未來" `shouldBe` "미래" phoneticizeHanjaWord "良質" `shouldBe` "량질" phoneticizeHanjaWord "力量" `shouldBe` "력량" describe "phoneticizeHanjaWordWithInitialSoundLaw" $ do let phone = phoneticizeHanjaWordWithInitialSoundLaw it "returns the input as is if there are no hanja at all" $ do phone "foo" `shouldBe` "foo" phone "테스트" `shouldBe` "테스트" it "transforms hanja chars into the corresponding Hangul readings" $ do phone "漢字" `shouldBe` "한자" phone "言文" `shouldBe` "언문" phone "餘念" `shouldBe` "여념" phone "未來" `shouldBe` "미래" phone "法律" `shouldBe` "법률" phone "一列" `shouldBe` "일렬" it "converts the first letter according to Initial Sound Law" $ do phone "來日" `shouldBe` "내일" phone "良質" `shouldBe` "양질" phone "力量" `shouldBe` "역량" it ("converts a letter followed by no batchim or nieun (N) according " ++ "to Initial Sound Law") $ do phone "羅列" `shouldBe` "나열" phone "序列" `shouldBe` "서열" phone "義烈" `shouldBe` "의열" phone "規律" `shouldBe` "규율" phone "煨栗" `shouldBe` "외율" phone "自律" `shouldBe` "자율" phone "卑劣" `shouldBe` "비열" phone "優劣" `shouldBe` "우열" phone "熾烈" `shouldBe` "치열" phone "比率" `shouldBe` "비율" phone "利率" `shouldBe` "이율" phone "棗栗" `shouldBe` "조율" phone "分裂" `shouldBe` "분열" phone "前列" `shouldBe` "전열" phone "百分率" `shouldBe` "백분율" phone "韻律" `shouldBe` "운율" phone "煥率" `shouldBe` "환율" phone "分列" `shouldBe` "분열" phone "先烈" `shouldBe` "선열" phone "賤劣" `shouldBe` "천열" phone "旋律" `shouldBe` "선율" phone "戰慄" `shouldBe` "전율" it "converts all hanja digits according to Initial Sound Law" $ do phone "千九百八十六年" `shouldBe` "천구백팔십육년" phone "第六共和國" `shouldBe` "제육공화국" phone "拾萬圓" `shouldBe` "십만원" phone "參佰拾圓" `shouldBe` "삼백십원" phone "仟參佰圓" `shouldBe` "천삼백원" describe "withDictionary" $ do let dict = [ ("標識", "표지") , ("毛澤東", "마오쩌둥") , ("交通", "교통") ] let phone = withDictionary dict phoneticizeHanjaWordWithInitialSoundLaw it "replaces Sino-Korean words with hangul letters in a dictionary" $ do phone "標識" `shouldBe` "표지" phone "毛澤東" `shouldBe` "마오쩌둥" phone "交通標識" `shouldBe` "교통표지" it "uses a fallback phoneticizer for unknown morphemes" $ phone "知識" `shouldBe` "지식" it "uses a fallback phoneticizer for unknown prefixes" $ phone "安全標識" `shouldBe` "안전표지" it "uses a fallback phoneticizer for unknown suffixes" $ do phone "毛澤東語錄" `shouldBe` "마오쩌둥어록" phone "毛澤東理論" `shouldBe` "마오쩌둥이론" describe "phoneticizeHanja" $ do specify "without initial sound law" $ do let conf = def { phoneticizer = phoneticizeHanjaWord } normalizeText (phoneticizeHanja conf inputFixture) `shouldBe` normalizeText outputFixture specify "with initial sound law" $ do let conf = def { phoneticizer = phoneticizeHanjaWordWithInitialSoundLaw } let phone = normalizeText . phoneticizeHanja conf phone inputFixture `shouldBe` normalizeText outputWithInitialSoundLawFixture phone [HtmlText [] "1996年 그들이 地球를 支配했을 때"] `shouldBe` [HtmlText [] "1996년 그들이 지구를 지배했을 때"] specify "with hanjaInParentheses renderer" $ do let conf = def { wordRenderer = hanjaInParentheses } let phone = normalizeText . phoneticizeHanja conf phone [HtmlText [] "1996年 그들이 地球를 支配했을 때"] `shouldBe` [HtmlText [] "1996년(年) 그들이 지구(地球)를 지배(支配)했을 때"] specify "with hanjaInRuby renderer" $ do let conf = def { wordRenderer = hanjaInRuby } let phone = normalizeText . phoneticizeHanja conf phone [HtmlText [] "1996年 그들이 地球를 支配했을 때"] `shouldBe` [ HtmlText [] "1996" , HtmlStartTag [] Ruby "" , HtmlText [Ruby] "年" , HtmlStartTag [Ruby] RP "" , HtmlText [Ruby, RP] "(" , HtmlEndTag [Ruby] RP , HtmlStartTag [Ruby] RT "" , HtmlText [Ruby, RT] "년" , HtmlEndTag [Ruby] RT , HtmlStartTag [Ruby] RP "" , HtmlText [Ruby, RP] ")" , HtmlEndTag [Ruby] RP , HtmlEndTag [] Ruby , HtmlText [] " 그들이 " , HtmlStartTag [] Ruby "" , HtmlText [Ruby] "地球" , HtmlStartTag [Ruby] RP "" , HtmlText [Ruby, RP] "(" , HtmlEndTag [Ruby] RP , HtmlStartTag [Ruby] RT "" , HtmlText [Ruby, RT] "지구" , HtmlEndTag [Ruby] RT , HtmlStartTag [Ruby] RP "" , HtmlText [Ruby, RP] ")" , HtmlEndTag [Ruby] RP , HtmlEndTag [] Ruby , HtmlText [] "를 " , HtmlStartTag [] Ruby "" , HtmlText [Ruby] "支配" , HtmlStartTag [Ruby] RP "" , HtmlText [Ruby, RP] "(" , HtmlEndTag [Ruby] RP , HtmlStartTag [Ruby] RT "" , HtmlText [Ruby, RT] "지배" , HtmlEndTag [Ruby] RT , HtmlStartTag [Ruby] RP "" , HtmlText [Ruby, RP] ")" , HtmlEndTag [Ruby] RP , HtmlEndTag [] Ruby , HtmlText [] "했을 때" ] it "disambiguate homophones" $ do let conf = def { wordRenderer = hangulOnly , homophoneRenderer = hanjaInParentheses } let phone = normalizeText . phoneticizeHanja conf let input = [ HtmlStartTag [] P "" , HtmlText [P] "同音 異義語 例: 連霸와 連敗" , HtmlEndTag [] P ] phone input `shouldBe` normalizeText [ HtmlStartTag [] P "" , HtmlText [P] "동음 이의어 예: 연패(連霸)와 연패(連敗)" , HtmlEndTag [] P ] it "does not transform numeral-only words" $ do let conf = def { wordRenderer = hanjaInParentheses } let phone = normalizeText . phoneticizeHanja conf phone [HtmlText [] "4·19革命"] `shouldBe` [HtmlText [] "4·19혁명(革命)"] phone [HtmlText [] "1987年10月29日"] `shouldBe` [HtmlText [] "1987년(年)10월(月)29일(日)"] it "transforms hanja characters in HTML entities" $ do let phone = normalizeText . phoneticizeHanja def phone [HtmlText [] "<1996年 그들이 地球를 支配했을 때>"] `shouldBe` [HtmlText [] "<1996년 그들이 지구를 지배했을 때>"] it "transforms nothing in preserved tags" $ do let phone = normalizeText . phoneticizeHanja def phone [HtmlText [Pre] "1996年 그들이 地球를 支配했을 때"] `shouldBe` [HtmlText [Pre] "1996年 그들이 地球를 支配했을 때"] describe "convertInitialSoundLaw" $ do specify "녀, 뇨, 뉴, 니 should be 여, 요, 유, 이" $ do convertInitialSoundLaw '녀' `shouldBe` '여' convertInitialSoundLaw '뉴' `shouldBe` '유' convertInitialSoundLaw '년' `shouldBe` '연' convertInitialSoundLaw '니' `shouldBe` '이' convertInitialSoundLaw '뇨' `shouldBe` '요' convertInitialSoundLaw '닉' `shouldBe` '익' specify "랴, 려, 례, 료, 류, 리 should be 야, 여, 예, 요, 유, 이" $ do convertInitialSoundLaw '량' `shouldBe` '양' convertInitialSoundLaw '룡' `shouldBe` '용' convertInitialSoundLaw '력' `shouldBe` '역' convertInitialSoundLaw '류' `shouldBe` '유' convertInitialSoundLaw '례' `shouldBe` '예' convertInitialSoundLaw '림' `shouldBe` '임' specify "라, 래, 로, 뢰, 루, 르 should be 나, 내, 노, 뇌, 누, 느" $ do convertInitialSoundLaw '락' `shouldBe` '낙' convertInitialSoundLaw '뢰' `shouldBe` '뇌' convertInitialSoundLaw '래' `shouldBe` '내' convertInitialSoundLaw '루' `shouldBe` '누' convertInitialSoundLaw '로' `shouldBe` '노' convertInitialSoundLaw '릉' `shouldBe` '능' specify "revertInitialSoundLaw" $ do revertInitialSoundLaw '여' `shouldBe` ['녀', '려'] revertInitialSoundLaw '유' `shouldBe` ['뉴', '류'] revertInitialSoundLaw '연' `shouldBe` ['년', '련'] revertInitialSoundLaw '이' `shouldBe` ['니', '리'] revertInitialSoundLaw '요' `shouldBe` ['뇨', '료'] revertInitialSoundLaw '입' `shouldBe` ['닙', '립'] revertInitialSoundLaw '양' `shouldBe` ['량'] revertInitialSoundLaw '예' `shouldBe` ['례'] revertInitialSoundLaw '낙' `shouldBe` ['락'] revertInitialSoundLaw '뇌' `shouldBe` ['뢰'] revertInitialSoundLaw '내' `shouldBe` ['래'] revertInitialSoundLaw '누' `shouldBe` ['루'] revertInitialSoundLaw '노' `shouldBe` ['로'] revertInitialSoundLaw '능' `shouldBe` ['릉']