{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE LambdaCase #-}
module Haskus.Format.Text.Unicode.UCDParser
( parseCodePointValue
, parseCodePoint
, parseCodePointRange
, parseCodePointValueOrRange
, parseCommentLine
, skipCommentLines
, parseFile
, stripComments
, parseBlocks
, parseDerivedName
)
where
import Text.Megaparsec
import Text.Megaparsec.Char
import qualified Text.Megaparsec.Char.Lexer as L
import Language.Haskell.TH
import Language.Haskell.TH.Syntax
import Haskus.Format.Text.Unicode.CodePoint
import Haskus.Utils.Flow
type Parser = Parsec () String
parseCodePointValue :: Parser CodePoint
parseCodePointValue = do
v <- L.hexadecimal
let c = CodePoint v
when (v > 0x10FFFF) $ do
error ("Parsed invalid CodePoint: " ++ show c)
return c
parseCodePointRange :: Parser CodePointRange
parseCodePointRange = do
r1 <- parseCodePointValue
void <| string ".."
r2 <- parseCodePointValue
return (CodePointRange r1 r2)
parseCodePointValueOrRange :: Parser (Either CodePoint CodePointRange)
parseCodePointValueOrRange = do
(Right <$> try parseCodePointRange)
<|> (Left <$> parseCodePointValue)
parseCodePoint :: Parser CodePoint
parseCodePoint = do
void <| string "U+"
parseCodePointValue
parseCommentLine :: Parser String
parseCommentLine = do
void <| string "#"
anySingle `manyTill` (void eol <|> try eof)
skipCommentLines :: Parser a -> Parser [a]
skipCommentLines p = do
skipMany (eol <|> parseCommentLine)
atEnd >>= \case
True -> return []
False -> do
x <- p
xs <- skipCommentLines p
return (x:xs)
parseFile :: Lift a => FilePath -> Parser a -> ExpQ
parseFile fp p = do
addDependentFile fp
str <- liftIO (readFile fp)
case runParser p fp str of
Right e -> [| e |]
Left err -> fail (show err)
stripComments :: Parser [String]
stripComments = skipCommentLines (anySingle `manyTill` eol)
parseBlocks :: Parser [(CodePointRange,String)]
parseBlocks = skipCommentLines parseLine
where
parseLine = do
r <- parseCodePointRange
void <| string "; "
n <- anySingle `manyTill` eol
return (r,n)
parseDerivedName :: Parser [(Either CodePoint CodePointRange,String)]
parseDerivedName = skipCommentLines parseLine
where
parseLine = do
e <- parseCodePointValueOrRange
space
void <| string "; "
n <- anySingle `manyTill` (void eol <|> try eof)
return (e,n)