-- -------------------------------------------------------------------------- -- $Revision: 262 $ $Date: 2007-04-12 12:19:50 +0200 (Thu, 12 Apr 2007) $ -- -------------------------------------------------------------------------- -- | -- -- Module : Encode.Arabic.Buckwalter -- Copyright : Otakar Smrz 2005-2007 -- License : GPL -- -- Maintainer : otakar.smrz mff.cuni.cz -- Stability : provisional -- Portability : portable -- -- Tim Buckwalter's notation is a one-to-one transliteration of the Arabic -- script for Modern Standard Arabic, using lower ASCII characters to encode -- the graphemes of the original script. This system has been very popular in -- Natural Language Processing, however, there are limits to its -- applicability due to numerous non-alphabetic codes involved. -- -- /Encode::Arabic::Buckwalter/ in Perl: -- module Encode.Arabic.Buckwalter ( -- * Types Buckwalter (..) ) where import Encode import PureFP.OrdMap --import Data.Map (Map) --import qualified Data.Map as Map import Version version = revised "$Revision: 262 $" data Buckwalter = Buckwalter | Tim deriving (Enum, Show) instance Encoding Buckwalter where encode _ = recode (recoder decoded encoded) decode _ = recode (recoder encoded decoded) --makeMapWith f = Map.fromListWith f --lookupWith f m x = Map.findWithDefault (f x) x m recode :: (Eq a, Enum a, Enum b, Ord a, OrdMap m) => m a b -> [a] -> [b] recode xry xs = [ lookupWith ((toEnum . fromEnum) x) xry x | x <- xs ] --recode xry xs = [ lookupWith (toEnum . fromEnum) xry x | x <- xs ] recoder :: Ord a => [a] -> [b] -> Map a b recoder xs ys = makeMapWith const (zip xs ys) decoded :: [UPoint] decoded = map toEnum ( [] ++ [0x0640] ++ [0x0623, 0x0624, 0x0625] ++ [0x060C, 0x061B, 0x061F] ++ [0x0621, 0x0622] ++ [0x0626 .. 0x063A] ++ [0x0641 .. 0x064A] ++ [0x067E, 0x0686, 0x06A4, 0x06AF] ++ [0x0660 .. 0x0669] ++ [0x0671] ++ [0x0651] ++ [0x064B .. 0x0650] ++ [0x0670] ++ [0x0652] ) encoded :: [Char] encoded = map id ( [] ++ "_" ++ "OWI" -- ">&<" ++ ",;?" ++ "'|" ++ "}AbptvjHxd*rzs$SDTZEg" ++ "fqklmnhwYy" ++ "PJVG" ++ ['0' .. '9'] ++ "{" -- "A" ++ "~" ++ "FNKaui" ++ "`" ++ "o" )