{-# LANGUAGE OverloadedStrings#-} module Text.Han2Zen.Text ( han2zen ) where import qualified Data.Text as T -- | テキスト内の半角カタカナを全角にに変える han2zen :: T.Text -> T.Text han2zen x = T.concat $ map (T.singleton.hanKanaToZenKana) (chars x) -- | 半角の濁点の有無で文字ごとに分ける chars :: T.Text -> [T.Text] chars txt | txt == T.empty = [] | T.length txt == 1 = [txt] | isDakuten (T.index txt 1) = T.take 2 txt : chars (T.drop 2 txt) | otherwise = T.take 1 txt : chars (T.drop 1 txt) -- | 濁点または半濁点を判別する isDakuten :: Char -> Bool isDakuten '゙' = True isDakuten '゚' = True isDakuten _ = False -- | 半角を全角文字に hanKanaToZenKana :: T.Text -> Char hanKanaToZenKana "。" = '。' hanKanaToZenKana "「" = '「' hanKanaToZenKana "」" = '」' hanKanaToZenKana "、" = '、' hanKanaToZenKana "・" = '・' hanKanaToZenKana "ヲ" = 'ヲ' hanKanaToZenKana "ァ" = 'ァ' hanKanaToZenKana "ィ" = 'ィ' hanKanaToZenKana "ゥ" = 'ゥ' hanKanaToZenKana "ェ" = 'ェ' hanKanaToZenKana "ォ" = 'ォ' hanKanaToZenKana "ャ" = 'ャ' hanKanaToZenKana "ュ" = 'ュ' hanKanaToZenKana "ョ" = 'ョ' hanKanaToZenKana "ッ" = 'ッ' hanKanaToZenKana "ー" = 'ー' hanKanaToZenKana "ア" = 'ア' hanKanaToZenKana "イ" = 'イ' hanKanaToZenKana "ウ" = 'ウ' hanKanaToZenKana "エ" = 'エ' hanKanaToZenKana "オ" = 'オ' hanKanaToZenKana "カ" = 'カ' hanKanaToZenKana "キ" = 'キ' hanKanaToZenKana "ク" = 'ク' hanKanaToZenKana "ケ" = 'ケ' hanKanaToZenKana "コ" = 'コ' hanKanaToZenKana "サ" = 'サ' hanKanaToZenKana "シ" = 'シ' hanKanaToZenKana "ス" = 'ス' hanKanaToZenKana "セ" = 'セ' hanKanaToZenKana "ソ" = 'ソ' hanKanaToZenKana "タ" = 'タ' hanKanaToZenKana "チ" = 'チ' hanKanaToZenKana "ツ" = 'ツ' hanKanaToZenKana "テ" = 'テ' hanKanaToZenKana "ト" = 'ト' hanKanaToZenKana "ナ" = 'ナ' hanKanaToZenKana "ニ" = 'ニ' hanKanaToZenKana "ヌ" = 'ヌ' hanKanaToZenKana "ネ" = 'ネ' hanKanaToZenKana "ノ" = 'ノ' hanKanaToZenKana "ハ" = 'ハ' hanKanaToZenKana "ヒ" = 'ヒ' hanKanaToZenKana "フ" = 'フ' hanKanaToZenKana "ヘ" = 'ヘ' hanKanaToZenKana "ホ" = 'ホ' hanKanaToZenKana "マ" = 'マ' hanKanaToZenKana "ミ" = 'ミ' hanKanaToZenKana "ム" = 'ム' hanKanaToZenKana "メ" = 'メ' hanKanaToZenKana "モ" = 'モ' hanKanaToZenKana "ヤ" = 'ヤ' hanKanaToZenKana "ユ" = 'ユ' hanKanaToZenKana "ヨ" = 'ヨ' hanKanaToZenKana "ラ" = 'ラ' hanKanaToZenKana "リ" = 'リ' hanKanaToZenKana "ル" = 'ル' hanKanaToZenKana "レ" = 'レ' hanKanaToZenKana "ロ" = 'ロ' hanKanaToZenKana "ワ" = 'ワ' hanKanaToZenKana "ン" = 'ン' hanKanaToZenKana "゙" = '゛' -- これいる? hanKanaToZenKana "゚" = '゜' -- これいる? hanKanaToZenKana "ガ" = 'ガ' hanKanaToZenKana "ギ" = 'ギ' hanKanaToZenKana "グ" = 'グ' hanKanaToZenKana "ゲ" = 'ゲ' hanKanaToZenKana "ゴ" = 'ゴ' hanKanaToZenKana "ザ" = 'ザ' hanKanaToZenKana "ジ" = 'ジ' hanKanaToZenKana "ズ" = 'ズ' hanKanaToZenKana "ゼ" = 'ゼ' hanKanaToZenKana "ゾ" = 'ゾ' hanKanaToZenKana "ダ" = 'ダ' hanKanaToZenKana "ヂ" = 'ヂ' hanKanaToZenKana "ヅ" = 'ヅ' hanKanaToZenKana "デ" = 'デ' hanKanaToZenKana "ド" = 'ド' hanKanaToZenKana "バ" = 'バ' hanKanaToZenKana "ビ" = 'ビ' hanKanaToZenKana "ブ" = 'ブ' hanKanaToZenKana "ベ" = 'ベ' hanKanaToZenKana "ボ" = 'ボ' hanKanaToZenKana "パ" = 'パ' hanKanaToZenKana "ピ" = 'ピ' hanKanaToZenKana "プ" = 'プ' hanKanaToZenKana "ペ" = 'ペ' hanKanaToZenKana "ポ" = 'ポ' hanKanaToZenKana "ヴ" = 'ヴ' hanKanaToZenKana cs | T.length cs == 1 = (\(Just (c,_)) -> c) $ T.uncons cs hanKanaToZenKana xs = error . T.unpack $ T.append "syntax error:" xs hanKana :: String hanKana = "。「」、・ヲァィゥェォャュョッーアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン゙゚" zenKana :: String zenKana = "。「」、・ヲァィゥェォャュョッーアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン゛゜" daku :: String daku = "カキクケコサシスセソタチツテトハヒフヘホ" dakuzen :: String dakuzen = "ガギグゲゴザジズゼゾダヂヅデドバビブベボ" handaku :: String handaku = "ハヒフヘホ" handakuzen :: String handakuzen = "パピプペポ"