-- -------------------------------------------------------------------------- -- $Revision: 190 $ $Date: 2007-01-27 14:47:15 +0100 (Sat, 27 Jan 2007) $ -- -------------------------------------------------------------------------- -- | -- -- Module : Encode.Arabic.ArabTeX.ZDMG -- Copyright : Otakar Smrz 2005-2007 -- License : GPL -- -- Maintainer : otakar.smrz mff.cuni.cz -- Stability : provisional -- Portability : portable -- -- /Encode::Arabic::ArabTeX::ZDMG/ in Perl: -- -- -- "Encode.Extend" -- "Encode.Mapper" module Encode.Arabic.ArabTeX.ZDMG ( -- * Types ZDMG (..) -- * Functions ) where import Encode import Encode.Mapper import Encode.Extend import FunParsing.OrdMap import Version version = revised "$Revision: 190 $" data ZDMG = ZDMG | ArabTeX_ZDMG deriving (Enum, Show) instance Encoding ZDMG where encode _ = error "Encode.Arabic.ArabTeX.ZDMG: 'encode' not implemented" decode _ = concat . parseFull decoderParsing . concat . parseLongest decoderMapping encoder :: Mapper UPoint [[[Char]]] encoder = error "Encode.Arabic.ArabTeX.ZDMG: 'encoder' not implemented" -- -------------------------------------------------------------------------- -- Extend and ExtEnv Definitions -- -------------------------------------------------------------------------- data Mode = NonePlus | Novocalize | Vocalize | Fullvocalize deriving (Eq, Ord) data Env i = Env { envQuote :: Bool, envMode :: Mode, envWasla :: Bool, envVerb :: Bool, envCap :: Bool } setQuote q (Env _ m w v c) = Env q m w v c setMode m (Env q _ w v c) = Env q m w v c setWasla w (Env q m _ v c) = Env q m w v c setVerb v (Env q m w _ c) = Env q m w v c setCap c (Env q m w v _) = Env q m w v c instance ExtEnv Env where initEnv = Env False Vocalize False False False -- -------------------------------------------------------------------------- -- Parsing Definitions -- -------------------------------------------------------------------------- type Parsing = Extend Env [Char] ([UPoint] -> [UPoint]) type Environ = Env [Char] consControl :: OrdMap m => [Char] -> [m [Char] [UPoint]] -> Environ -> [[UPoint]] consControl x l e = if envCap e then [ capFirst n | n <- noChange ] else noChange where noChange = lookupList x l capFirst [] = [] capFirst (x:xs) = (toEnum . flip (-) 0x0020 . fromEnum) x : xs vowelControl :: OrdMap m => [Char] -> [Char] -> [m [Char] [UPoint]] -> Environ -> [[UPoint]] vowelControl c x l e = if envCap e then consControl x l e else case c of "W" -> case sukun ? x of Just s -> if envQuote e then theWasla else [[]] Nothing -> if envQuote e then [ j ++ n | j <- justAlif, n <- noChange ] else case envMode e of Fullvocalize -> if envWasla e then [ j ++ drop 1 n | j <- theWasla, n <- noChange ] else [ n | n <- noChange ] Vocalize -> if envWasla e then [ j ++ drop 1 n | j <- justAlif, n <- noChange ] else [ n | n <- noChange ] _ -> [ j ++ f | j <- justAlif, f <- filterIt ] "|" -> case sukun ? x of Just s -> if envQuote e then noChange else [[]] Nothing -> if envQuote e then filterIt else noChange _ -> case sukun ? x of Just s -> if envMode e > Vocalize && not (envQuote e) || not (envMode e > Vocalize) && envQuote e then noChange else [[]] Nothing -> if envMode e > Novocalize && not (envQuote e) || not (envMode e > Novocalize) && envQuote e then noChange else filterIt where theWasla = lookupList "W" [wasla] justAlif = lookupList "W" [wasla] --[silent] noChange = lookupList x l filterIt = [ filter (flip all ([0x064B .. 0x0650] ++ [0x0656, 0x0657, 0x0670]) . ((/=) . fromEnum)) s | s <- lookupList x l ] shaddaControl :: (OrdMap m, Ord s) => s -> [m s [a]] -> Environ -> [[a]] shaddaControl x l e = if envMode e > NonePlus then lookupList x l else [[]] infixr 7 `plus` -- infixr 9 . -- infixr 5 ++ plus :: (a -> b) -> (c -> a) -> c -> b plus = (.) decoderParsing :: Extend Env [Char] [UPoint] decoderParsing = (fmap (foldr ($) []) . again) $ parseHyphen <|> parseDoubleCons <|> parseSingleCons <|> parseInitVowel <|> parseWhite <|> parsePunct <|> parseDigit <|> parseCap <|> parseControl <|> parseAnything <|> returnError returnError :: Parsing returnError = do x <- inspectIList sat (const True) return (error (show x)) parseAnything :: Parsing parseAnything = do x <- sat (const True) return ((++) (map (toEnum . fromEnum) x)) parseNothing :: Parsing parseNothing = return id parseCap = do lower ["\\cap"] [] processControl "cap" parseControl = do i <- inspectIList case i of [] -> zero (c : s) -> case c of '\\' : t : r -> do returnIList s processControl (t : r) "\\" -> fail "Single \\" _ -> zero processControl :: [Char] -> Parsing processControl t = do e <- inspectEList let envList = case e of [] -> error "Empty environment" (q : r) -> case t of "{" -> q : q : r "}" -> case r of [] -> error "Minus group" _ -> r "\"" -> setQuote True q : r "cap" -> setCap True q : r "fullvocalize" -> setMode Fullvocalize q : r "full" -> setMode Fullvocalize q : r "vocalize" -> setMode Vocalize q : r "nosukuun" -> setMode Vocalize q : r "novocalize" -> setMode Novocalize q : r "novowels" -> setMode Novocalize q : r "none" -> setMode Novocalize q : r "noshadda" -> setMode NonePlus q : r "noneplus" -> setMode NonePlus q : r "setverb" -> setVerb True q : r "setarab" -> setVerb False q : r _ -> error "Weird control sequence" returnEList envList parseNothing parseInitVowel = do v <- oneof [vowel] -- x <- upper ["W"] [silent] -- depends on 'vowelControl' y <- upperWith (vowelControl "W") [v] [vowel] completeSyllable y parseSyllVowel :: [Char] -> ([UPoint] -> [UPoint]) -> Parsing parseSyllVowel c x = do v <- oneof [vowel] <|> return "" y <- upperWith (vowelControl c) [v] [vowel, sukun] completeSyllable (x `plus` y) completeSyllable :: ([UPoint] -> [UPoint]) -> Parsing completeSyllable x = do resetEnv setQuote False resetEnv setWasla True resetEnv setCap False return x parseSingleCons = do c <- oneof [consonant, extra, invis] x <- upperWith consControl [c] [consonant, extra, invis] resetEnv setCap False parseSyllVowel c x <|> do c <- oneof [minor] x <- upper [c] [minor] parseSyllVowel c x parseDoubleCons = do c <- oneof [consonant, extra, invis] lower [c] [] x <- upperWith consControl [c] [consonant, extra, invis] resetEnv setCap False y <- upperWith shaddaControl [c] [consonant, extra, invis] -- ["*"] [shadda] parseSyllVowel c (x `plus` y) <|> do c <- oneof [minor] lower [c] [] x <- upper [c] [minor] y <- upperWith shaddaControl [c] [minor] -- ["*"] [shadda] parseSyllVowel c (x `plus` y) parseHyphen = do lower ["-"] [] upper ["-"] [hyphen] parseDigit = do d <- oneof [digit] upper [d] [digit] parseWhite = do w <- oneof [white] upper [w] [white] parsePunct = do p <- oneof [punct] resetEnv setWasla False upper [p] [punct] -- -------------------------------------------------------------------------- -- Mapping Definitions -- -------------------------------------------------------------------------- type Mapping = Mapper Char (Quit Char [[Char]]) pairs :: (OrdMap m, Ord s) => [m s a] -> [(s, a)] pairs l = concat [ assocs i | i <- l ] elems :: (OrdMap m, Ord s) => [m s a] -> [s] elems l = (map fst . concat) [ assocs i | i <- l ] quote :: OrdMap m => [m [Char] a] -> [[Char]] quote = map ("\"" ++) . elems decoderMapping :: Mapper Char (Quit Char [[Char]]) decoderMapping = defineMapping ( pairs [ sunny, moony, minor, extra, invis, empty, -- digit, punct, white, vowel ] ) <+> rules `others` (\ s -> (Just . return) ([], [[s]])) -- <+> "" |.| error "Illegal symbol" defineMapping :: [([Char], [a])] -> Mapping defineMapping = foldr (listing . mapping) zero where listing = (<+>) mapping (encoded, _) = symbols encoded symbols = fmap (((,) []) . (: [])) . syms whites :: Mapper Char (Quit Char Char) whites = (fmap ((,) []) . anySymbol) [' ', '\r', '\v', '\f'] -- [' ', '\n', '\r', '\t', '\v', '\f'] rules :: Mapping rules = "N_A" |-| "NY" |:| [] |+| "_A" |-| "Y" |:| [] |+| ruleVerbalSilentAlif |+| ruleInternalTaaaa -- |+| ruleLiWithArticle |+| ruleDefArticle |+| ruleIndefArticle |+| ruleMultiVowel |+| ruleHyphenedVowel |+| ruleWhitePlusControl |+| ruleIgnoreCapControl |+| ruleControlSequence |+| rulePunctuation rulePunctuation = "-" |.| ["-"] |+| "\"" |.| ["\\\""] |+| "\\\"" |.| ["\""] ruleVerbalSilentAlif = "aWA" |-| "aw" |:| [] |+| "aW" |-| "aw" |:| [] |+| "UA" |-| "U" |:| [] |+| "uW" |-| "U" |:| [] ruleWhitePlusControl = "{" |.| ["\\{"] |+| "}" |.| ["\\}"] |+| "\\{" |.| ["{"] |+| "\\}" |.| ["}"] |+| "\\\\" |.| ["\\\\"] |+| "\\" |.| ["\\"] <+> sym '\\' <.> some whites <-> [" "] <+> some whites <-> [" "] ruleIgnoreCapControl = anyof [ "l" ++ "-" ++ c ++ "\\cap " |-| "l" ++ "-" ++ c ++ "\\cap " |:| [] |+| "l" ++ "-" ++ c ++ "\\cap " ++ c |-| "-\\cap " ++ c |:| [c] |+| c ++ "-" ++ "\\cap " |-| c ++ "-" ++ "\\cap " |:| [] |+| c ++ "-" ++ "\\cap " ++ c |-| "-" ++ "\\cap " ++ c |:| [c] |+| "l" ++ "-\\cap " ++ c ++ "\\cap " |-| "l" ++ "-\\cap " ++ c ++ "\\cap " |:| [] |+| "l" ++ "-\\cap " ++ c ++ "\\cap " ++ c |-| "-\\cap " ++ c |:| ["\\cap", c] |+| "l" ++ "-\\cap " ++ c ++ c |-| "-\\cap " ++ c |:| [c] | c <- elems [sunny, moony] ] ruleControlSequence = do x <- sym '\\' <:> some (anySymbol (['A'..'Z'] ++ ['a'..'z'])) many whites return ([], [x]) ruleLiWithArticle = anyof [ "l" ++ v ++ "-a" ++ c ++ "-" ++ c |-| "l" ++ v ++ c ++ "-" ++ c |:| [] | c <- elems [sunny, moony], c /= "l", v <- elems [vowel, sukun] ++ quote [vowel, sukun] ] |+| anyof [ "l" ++ v ++ "-a" ++ c ++ "-" ++ c |-| "l" ++ v ++ "|-" ++ c ++ c |:| [] |+| "l" ++ v ++ "-a" ++ c ++ "-" ++ c ++ c |-| "l" ++ v ++ "|-" ++ c ++ c |:| [] |+| "l" ++ v ++ "-a" ++ c ++ "-" |-| "l" ++ v ++ c ++ "-" |:| [] |+| "l" ++ v ++ "-a" ++ c ++ c |-| "l" ++ v ++ "|-" ++ c ++ c |:| [] | c <- elems [sunny, moony], c == "l", v <- elems [vowel, sukun] ++ quote [vowel, sukun] ] ruleDefArticle = anyof [ "l" ++ "-" ++ c ++ c |-| "-" ++ c |:| [c] | c <- elems [sunny, moony] ] {- foldr (\ c -> (|+|) ( "l" ++ "-" ++ c ++ c |-| "-" ++ c |:| [c] ) ) zero (elems [sunny, moony]) -} ruleIndefArticle = "NA" |-| "N" |:| [] |+| "NU" |-| "N" |:| [] |+| "NY" |-| "N" |:| [] ruleMultiVowel = -- "iy" |-| "I" |:| [] |+| -- "uw" |-| "U" |:| [] |+| "ii" |-| "I" |:| [] |+| "uu" |-| "U" |:| [] |+| "aa" |-| "A" |:| [] -- -- |+| anyof [ -- -- "iy" ++ v |-| "y" ++ v |:| ["i"] |+| -- "uw" ++ v |-| "w" ++ v |:| ["u"] -- -- | v <- elems [vowel] ++ quote [vowel, sukun] ] ruleHyphenedVowel = anyof [ "-" ++ v |-| v |:| [] |+| -- "iy-" ++ v |-| "y-" ++ v |:| ["i"] |+| -- "uw-" ++ v |-| "w-" ++ v |:| ["u"] |+| "W-" ++ v |-| "W" |:| [v] | v <- elems [vowel] ++ quote [vowel] ] |+| anyof [ "-" ++ v ++ c |-| v ++ c |:| ["-"] |+| -- "iy-" ++ v ++ c |-| "-" ++ v ++ c |:| ["I"] |+| -- "uw-" ++ v ++ c |-| "-" ++ v ++ c |:| ["U"] |+| "W-" ++ v ++ c |-| v ++ c |:| ["W", "-"] | c <- elems [sunny, moony, invis], v <- elems [vowel] ++ quote [vowel] ] ruleInternalTaaaa = anyof [ "H" ++ v |-| "H" |:| [] | v <- elems [vowel] ++ quote [vowel, sukun] ] |+| anyof [ "T" ++ v ++ c |-| "t" ++ v ++ c |:| [] |+| "H" ++ v ++ c |-| "t" ++ v ++ c |:| [] | c <- elems [sunny, moony, minor, invis], v <- elems [vowel, sukun] ++ quote [vowel, sukun] ] -- -------------------------------------------------------------------------- -- LowerUp Definitions -- -------------------------------------------------------------------------- type LowerUp = Map [Char] [UPoint] unionMap :: (OrdMap m, Ord s, Ord a) => [m s a] -> m s a unionMap = unionMapWith (\ x y -> if compare x y == EQ then error "Inconsistent mapping in the Maps" else y) define :: [([Char], [Int])] -> LowerUp define l = makeMapWith const [ (x, map toEnum y) | (x, y) <- l ] consonant :: LowerUp consonant = unionMap [sunny, moony] sunny = define [ ( "t", [ 0x0074 ] ), ( "_t", [ 0x0074, 0x0331 ] ), ( "d", [ 0x0064 ] ), ( "_d", [ 0x0064, 0x0331 ] ), ( "r", [ 0x0072 ] ), ( "z", [ 0x007A ] ), ( "s", [ 0x0073 ] ), ( "^s", [ 0x0073, 0x030C ] ), ( ".s", [ 0x0073, 0x0323 ] ), ( ".d", [ 0x0064, 0x0323 ] ), ( ".t", [ 0x0074, 0x0323 ] ), ( ".z", [ 0x007A, 0x0323 ] ), ( "l", [ 0x006C ] ), ( "n", [ 0x006E ] ) ] invis = define [ ( "|", [ ] ) ] empty = define [ ( "", [ ] ) ] hyphen = define [ ( "-", [ 0x002D ] ) ] sukun = define [ ( "", [ ] ), ( "+", [ ] ) ] shadda = define [ ( "*", [ ] ) ] wasla = define [ ( "W", [ 0x02BC ] ) ] extra = define [ ( "T", [ 0x0074 ] ), ( "H", [ 0x0068 ] ), ( "N", [ 0x006E ] ), ( "W", [ ] ) ] {- hamza = define [ ( "'A", [ 0x0622 ] ), ( "'a", [ 0x0623 ] ), ( "'i", [ 0x0625 ] ), ( "'w", [ 0x0624 ] ), ( "'y", [ 0x0626 ] ), ( "'|", [ 0x0621 ] ) ] -} minor = define [ ( "'", [ 0x02BE ] ), -- [ 0x02BC ] ( "`", [ 0x02BF ] ) -- [ 0x02BB ] ] moony = define [ ( "b", [ 0x0062 ] ), ( "^g", [ 0x0067, 0x030C ] ), ( ".h", [ 0x0068, 0x0323 ] ), ( "_h", [ 0x0068, 0x032E ] ), ( ".g", [ 0x0067, 0x0307 ] ), ( "f", [ 0x0066 ] ), ( "q", [ 0x0071 ] ), ( "k", [ 0x006B ] ), ( "m", [ 0x006D ] ), ( "h", [ 0x0068 ] ), ( "w", [ 0x0077 ] ), ( "y", [ 0x0079 ] ), ( "p", [ 0x0070 ] ), ( "v", [ 0x0076 ] ), ( "g", [ 0x0067 ] ), ( "c", [ 0x0063 ] ), ( "^c", [ 0x0063, 0x030C ] ), ( ",c", [ 0x0063, 0x0301 ] ), ( "^z", [ 0x007A, 0x030C ] ), ( "^n", [ 0x006E, 0x0303 ] ), ( "^l", [ 0x006C, 0x0303 ] ), ( ".r", [ 0x0072, 0x0307 ] ) ] vowel = define [ ( "a", [ 0x0061 ] ), ( "_a", [ 0x0061, 0x0304 ] ), ( "_aA", [ 0x0061, 0x0304 ] ), ( "_aY", [ 0x0061, 0x0304 ] ), ( "_aU", [ 0x0061, 0x0304 ] ), ( "_aI", [ 0x0061, 0x0304 ] ), ( "A", [ 0x0061, 0x0304 ] ), ( "^A", [ 0x0061, 0x0304 ] ), ( "e", [ 0x0065 ] ), ( "E", [ 0x0065, 0x0304 ] ), ( "i", [ 0x0069 ] ), ( "_i", [ 0x0069, 0x0304 ] ), ( "I", [ 0x0069, 0x0304 ] ), ( "^I", [ 0x0069, 0x0304 ] ), ( "_I", [ 0x0069 ] ), ( "o", [ 0x006F ] ), ( "O", [ 0x006F, 0x0304 ] ), ( "u", [ 0x0075 ] ), ( "_u", [ 0x0075, 0x0304 ] ), ( "U", [ 0x0075, 0x0304 ] ), ( "^U", [ 0x0075, 0x0304 ] ), ( "_U", [ 0x0075 ] ), ( "Y", [ 0x0061, 0x0304 ] ) ] digit = define [ ( "0", [ 0x0030 ] ), ( "1", [ 0x0031 ] ), ( "2", [ 0x0032 ] ), ( "3", [ 0x0033 ] ), ( "4", [ 0x0034 ] ), ( "5", [ 0x0035 ] ), ( "6", [ 0x0036 ] ), ( "7", [ 0x0037 ] ), ( "8", [ 0x0038 ] ), ( "9", [ 0x0039 ] ) ] white = define [ ( " ", [ 0x0020 ] ), ( "\n", [ 0x000A ] ), ( "\r", [ 0x000D ] ), ( "\t", [ 0x0009 ] ), ( "\v", [ 0x000B ] ), ( "\f", [ 0x000C ] ) ] punct = define [ ( ".", [ 0x002E ] ), ( ":", [ 0x003A ] ), ( "!", [ 0x0021 ] ), ( ",", [ 0x002C ] ), ( ";", [ 0x003B ] ), ( "?", [ 0x003F ] ) ]