-- | -- Attoparsec parsers. module HTMLEntities.Parser where import Data.Attoparsec.Text import qualified Data.Text as Text import qualified HTMLEntities.NameTable as NameTable import HTMLEntities.Prelude -- | -- A parser of a single entity. -- -- Parses numeric encoding: -- -- >>> mapM_ Data.Text.IO.putStrLn $ Data.Attoparsec.Text.parseOnly htmlEntity "©" -- © -- -- as well as the named entities: -- -- >>> mapM_ Data.Text.IO.putStrLn $ Data.Attoparsec.Text.parseOnly htmlEntity "©" -- © {-# INLINE htmlEntity #-} htmlEntity :: Parser Text htmlEntity = char '&' *> htmlEntityBody <* char ';' -- | -- A parser of the body of a single entity. -- -- Parses numeric encoding: -- -- >>> mapM_ Data.Text.IO.putStrLn $ Data.Attoparsec.Text.parseOnly htmlEntityBody "#169" -- © -- -- as well as the named entities: -- -- >>> mapM_ Data.Text.IO.putStrLn $ Data.Attoparsec.Text.parseOnly htmlEntityBody "copy" -- © {-# INLINEABLE htmlEntityBody #-} htmlEntityBody :: Parser Text htmlEntityBody = numeric <|> named where numeric = Text.singleton . chr <$> (char '#' *> (decimal <|> (char 'x' *> hexadecimal))) named = takeWhile1 isAlpha >>= liftMaybe . NameTable.lookupTextByName liftMaybe = maybe empty pure