{-# LANGUAGE OverloadedStrings#-}
module Text.Han2Zen.Text
( han2zen
) where
import qualified Data.Text as T
han2zen :: T.Text -> T.Text
han2zen x = T.concat $ map (T.singleton.hanKanaToZenKana) (chars x)
chars :: T.Text -> [T.Text]
chars txt
| txt == T.empty = []
| T.length txt == 1 = [txt]
| isDakuten (T.index txt 1) = T.take 2 txt : chars (T.drop 2 txt)
| otherwise = T.take 1 txt : chars (T.drop 1 txt)
isDakuten :: Char -> Bool
isDakuten '゙' = True
isDakuten '゚' = True
isDakuten _ = False
hanKanaToZenKana :: T.Text -> Char
hanKanaToZenKana "。" = '。'
hanKanaToZenKana "「" = '「'
hanKanaToZenKana "」" = '」'
hanKanaToZenKana "、" = '、'
hanKanaToZenKana "・" = '・'
hanKanaToZenKana "ヲ" = 'ヲ'
hanKanaToZenKana "ァ" = 'ァ'
hanKanaToZenKana "ィ" = 'ィ'
hanKanaToZenKana "ゥ" = 'ゥ'
hanKanaToZenKana "ェ" = 'ェ'
hanKanaToZenKana "ォ" = 'ォ'
hanKanaToZenKana "ャ" = 'ャ'
hanKanaToZenKana "ュ" = 'ュ'
hanKanaToZenKana "ョ" = 'ョ'
hanKanaToZenKana "ッ" = 'ッ'
hanKanaToZenKana "ー" = 'ー'
hanKanaToZenKana "ア" = 'ア'
hanKanaToZenKana "イ" = 'イ'
hanKanaToZenKana "ウ" = 'ウ'
hanKanaToZenKana "エ" = 'エ'
hanKanaToZenKana "オ" = 'オ'
hanKanaToZenKana "カ" = 'カ'
hanKanaToZenKana "キ" = 'キ'
hanKanaToZenKana "ク" = 'ク'
hanKanaToZenKana "ケ" = 'ケ'
hanKanaToZenKana "コ" = 'コ'
hanKanaToZenKana "サ" = 'サ'
hanKanaToZenKana "シ" = 'シ'
hanKanaToZenKana "ス" = 'ス'
hanKanaToZenKana "セ" = 'セ'
hanKanaToZenKana "ソ" = 'ソ'
hanKanaToZenKana "タ" = 'タ'
hanKanaToZenKana "チ" = 'チ'
hanKanaToZenKana "ツ" = 'ツ'
hanKanaToZenKana "テ" = 'テ'
hanKanaToZenKana "ト" = 'ト'
hanKanaToZenKana "ナ" = 'ナ'
hanKanaToZenKana "ニ" = 'ニ'
hanKanaToZenKana "ヌ" = 'ヌ'
hanKanaToZenKana "ネ" = 'ネ'
hanKanaToZenKana "ノ" = 'ノ'
hanKanaToZenKana "ハ" = 'ハ'
hanKanaToZenKana "ヒ" = 'ヒ'
hanKanaToZenKana "フ" = 'フ'
hanKanaToZenKana "ヘ" = 'ヘ'
hanKanaToZenKana "ホ" = 'ホ'
hanKanaToZenKana "マ" = 'マ'
hanKanaToZenKana "ミ" = 'ミ'
hanKanaToZenKana "ム" = 'ム'
hanKanaToZenKana "メ" = 'メ'
hanKanaToZenKana "モ" = 'モ'
hanKanaToZenKana "ヤ" = 'ヤ'
hanKanaToZenKana "ユ" = 'ユ'
hanKanaToZenKana "ヨ" = 'ヨ'
hanKanaToZenKana "ラ" = 'ラ'
hanKanaToZenKana "リ" = 'リ'
hanKanaToZenKana "ル" = 'ル'
hanKanaToZenKana "レ" = 'レ'
hanKanaToZenKana "ロ" = 'ロ'
hanKanaToZenKana "ワ" = 'ワ'
hanKanaToZenKana "ン" = 'ン'
hanKanaToZenKana "゙" = '゛'
hanKanaToZenKana "゚" = '゜'
hanKanaToZenKana "ガ" = 'ガ'
hanKanaToZenKana "ギ" = 'ギ'
hanKanaToZenKana "グ" = 'グ'
hanKanaToZenKana "ゲ" = 'ゲ'
hanKanaToZenKana "ゴ" = 'ゴ'
hanKanaToZenKana "ザ" = 'ザ'
hanKanaToZenKana "ジ" = 'ジ'
hanKanaToZenKana "ズ" = 'ズ'
hanKanaToZenKana "ゼ" = 'ゼ'
hanKanaToZenKana "ゾ" = 'ゾ'
hanKanaToZenKana "ダ" = 'ダ'
hanKanaToZenKana "ヂ" = 'ヂ'
hanKanaToZenKana "ヅ" = 'ヅ'
hanKanaToZenKana "デ" = 'デ'
hanKanaToZenKana "ド" = 'ド'
hanKanaToZenKana "バ" = 'バ'
hanKanaToZenKana "ビ" = 'ビ'
hanKanaToZenKana "ブ" = 'ブ'
hanKanaToZenKana "ベ" = 'ベ'
hanKanaToZenKana "ボ" = 'ボ'
hanKanaToZenKana "パ" = 'パ'
hanKanaToZenKana "ピ" = 'ピ'
hanKanaToZenKana "プ" = 'プ'
hanKanaToZenKana "ペ" = 'ペ'
hanKanaToZenKana "ポ" = 'ポ'
hanKanaToZenKana "ヴ" = 'ヴ'
hanKanaToZenKana cs | T.length cs == 1 = (\(Just (c,_)) -> c) $ T.uncons cs
hanKanaToZenKana xs = error . T.unpack $ T.append "syntax error:" xs
hanKana :: String
hanKana = "。「」、・ヲァィゥェォャュョッーアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン゙゚"
zenKana :: String
zenKana = "。「」、・ヲァィゥェォャュョッーアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン゛゜"
daku :: String
daku = "カキクケコサシスセソタチツテトハヒフヘホ"
dakuzen :: String
dakuzen = "ガギグゲゴザジズゼゾダヂヅデドバビブベボ"
handaku :: String
handaku = "ハヒフヘホ"
handakuzen :: String
handakuzen = "パピプペポ"