module Graphics.SVGFonts.CharReference (charsFromFullName, characterStrings) where import Control.Applicative ((<|>), many) import Data.Attoparsec.Text import qualified Data.Text as T import Data.List (sortBy) charRef :: Parser Int charRef = do _ <- try (string (T.pack "&#x")) d <- hexadecimal _ <- char ';' return d <|> do _ <- try (string (T.pack "&#")) d <- decimal _ <- char ';' return d <|> do c <- anyChar return (fromEnum c) "character reference" charRefs :: Parser [Int] charRefs = do l <- many1 charRef return l fromCharRefs :: T.Text -> [Int] fromCharRefs str = case (parseOnly charRefs str) of Right x -> x Left _ -> [] -- | Parsing of xml character references. -- -- I.e. \"\&\#x2e\;\&\#x2e\;\&\#x2e\;\" is converted into a list of three Chars. -- -- \"ffb\" is also parsed and converted into three Chars (not changing it). charsFromFullName :: String -> String charsFromFullName str = map toEnum ( fromCharRefs (T.pack str) ) -- | A string represents a glyph, i.e. the ligature \"ffi\" is a string that represents the ligature glyph ffi characterStrings :: String -> [String] -> [T.Text] characterStrings str ligs | null ligs = map ((T.pack).(\x->[x])) str | otherwise = case parseOnly myParser (T.pack str) of Right x -> x Left _ -> [] where myParser = many (try ligatures <|> charToText) ligatures = buildChain $ sortBy -- sort so that the longest ligatures come first, i.e. "ffi", "ff", .. (\x y -> compare (length y) (length x)) $ ligs buildChain [] = string (T.pack "") -- will never be called, just to get rid of the warning message buildChain [x] = parseLigature x -- try to parse with the first parsers in the chain first buildChain (x:xs) = try (parseLigature x) <|> buildChain xs parseLigature x = string (T.pack x) charToText = do c <- anyChar -- or accept a single char return (T.singleton c)