-- --------------------------------------------------------------------------
--  $Revision: 262 $ $Date: 2007-04-12 12:19:50 +0200 (Thu, 12 Apr 2007) $
-- --------------------------------------------------------------------------

-- |
--
-- Module      :  Encode.Arabic.Buckwalter
-- Copyright   :  Otakar Smrz 2005-2007
-- License     :  GPL
--
-- Maintainer  :  otakar.smrz mff.cuni.cz
-- Stability   :  provisional
-- Portability :  portable
--
-- Tim Buckwalter's notation is a one-to-one transliteration of the Arabic
-- script for Modern Standard Arabic, using lower ASCII characters to encode
-- the graphemes of the original script. This system has been very popular in
-- Natural Language Processing, however, there are limits to its
-- applicability due to numerous non-alphabetic codes involved.
--
-- /Encode::Arabic::Buckwalter/ in Perl:
-- <http://search.cpan.org/dist/Encode-Arabic/lib/Encode/Arabic/Buckwalter.pm>


module Encode.Arabic.Buckwalter (

        -- * Types

        Buckwalter (..)

    ) where


import Encode

import PureFP.OrdMap

--import Data.Map (Map)
--import qualified Data.Map as Map

import Version

version = revised "$Revision: 262 $"


data Buckwalter = Buckwalter | Tim

    deriving (Enum, Show)


instance Encoding Buckwalter where

    encode _ = recode (recoder decoded encoded)

    decode _ = recode (recoder encoded decoded)


--makeMapWith f = Map.fromListWith f
--lookupWith f m x = Map.findWithDefault (f x) x m


recode :: (Eq a, Enum a, Enum b, Ord a, OrdMap m) => m a b -> [a] -> [b]

recode xry xs = [ lookupWith ((toEnum . fromEnum) x) xry x | x <- xs ]
--recode xry xs = [ lookupWith (toEnum . fromEnum) xry x | x <- xs ]


recoder :: Ord a => [a] -> [b] -> Map a b

recoder xs ys = makeMapWith const (zip xs ys)


decoded :: [UPoint]

decoded = map toEnum ( []

            ++ [0x0640]
            ++ [0x0623, 0x0624, 0x0625]
            ++ [0x060C, 0x061B, 0x061F]
            ++ [0x0621, 0x0622] ++ [0x0626 .. 0x063A] ++ [0x0641 .. 0x064A]
            ++ [0x067E, 0x0686, 0x06A4, 0x06AF]
            ++ [0x0660 .. 0x0669]
            ++ [0x0671]
            ++ [0x0651]
            ++ [0x064B .. 0x0650] ++ [0x0670]
            ++ [0x0652]

            )


encoded :: [Char]

encoded = map id ( []

            ++ "_"
            ++ "OWI"
            -- ">&<"
            ++ ",;?"
            ++ "'|" ++ "}AbptvjHxd*rzs$SDTZEg" ++ "fqklmnhwYy"
            ++ "PJVG"
            ++ ['0' .. '9']
            ++ "{"
            -- "A"
            ++ "~"
            ++ "FNKaui" ++ "`"
            ++ "o"

            )