{-# LANGUAGE LambdaCase, NoMonomorphismRestriction, FlexibleContexts, RankNTypes, Safe, DeriveGeneric, DeriveDataTypeable, CPP, StandaloneDeriving #-} {-# OPTIONS_HADDOCK prune #-} ----------------------------------------------------------------------------- -- | -- Module : Text.Parse.Units -- Copyright : (C) 2014 Richard Eisenberg -- License : BSD-style (see LICENSE) -- Stability : experimental -- Portability : non-portable -- -- This module defines a parser for unit expressions. The syntax for -- these expressions is like F#'s. There are four arithmetic operators -- (@*@, @/@, @^@, and juxtaposition). Exponentiation binds the -- tightest, and it allows an integer to its right (possibly with -- minus signs and parentheses). Next tightest is juxtaposition, which -- indicates multiplication. Because juxtaposition binds tighter than -- division, the expressions @m/s^2@ and @m/s s@ are -- equivalent. Multiplication and division bind the loosest and are -- left-associative, meaning that @m/s*s@ is equivalent to @(m/s)*s@, -- probably not what you meant. Parentheses in unit expressions are -- allowed, of course. -- -- Within a unit string (that is, a unit with an optional prefix), -- there may be ambiguity. If a unit string can be interpreted as a -- unit without a prefix, that parsing is preferred. Thus, @min@ would -- be minutes, not milli-inches (assuming appropriate prefixes and -- units available.) There still may be ambiguity between unit -- strings, even interpreting the string as a prefix and a base -- unit. If a unit string is amiguous in this way, it is rejected. -- For example, if we have prefixes @da@ and @d@ and units @m@ and -- @am@, then @dam@ is ambiguous like this. ----------------------------------------------------------------------------- module Text.Parse.Units ( -- * Parsing units UnitExp(..), parseUnit, -- * Symbol tables SymbolTable(..), PrefixTable, UnitTable, mkSymbolTable, unsafeMkSymbolTable, universalSymbolTable, lex, unitStringParser -- these are pruned from the Haddock output ) where import Prelude hiding ( lex, div ) import GHC.Generics (Generic) import Text.Parsec hiding ( tab ) import Text.Parsec.String import Text.Parsec.Pos import qualified Data.Map.Strict as Map import qualified Data.MultiMap as MM import Control.Monad.Reader import Control.Arrow hiding ( app) import Data.Data (Data) import Data.Maybe import Data.Char #if __GLASGOW_HASKELL__ < 709 import Data.Typeable ( Typeable ) #endif ---------------------------------------------------------------------- -- Basic combinators ---------------------------------------------------------------------- -- copied from GHC partitionWith :: (a -> Either b c) -> [a] -> ([b], [c]) partitionWith _ [] = ([],[]) partitionWith f (x:xs) = case f x of Left b -> (b:bs, cs) Right c -> (bs, c:cs) where (bs,cs) = partitionWith f xs ---------------------------------------------------------------------- -- Extra parser combinators ---------------------------------------------------------------------- -- | @experiment p@ runs @p@. If @p@ succeeds, @experiment@ returns the -- result of running @p@. If @p@ fails, then @experiment@ returns @Nothing@. -- In either case, no input is consumed and @experiment@ never fails. experiment :: Stream s m t => ParsecT s u m a -> ParsecT s u m (Maybe a) experiment = lookAhead . optionMaybe . try consumeAll :: (Stream s m t, Show t) => ParsecT s u m a -> ParsecT s u m a consumeAll p = do result <- p eof return result nochar :: Stream s m Char => Char -> ParsecT s u m () nochar = void . char ---------------------------------------------------------------------- -- Datatypes ---------------------------------------------------------------------- data Op = NegO | MultO | DivO | PowO | OpenP | CloseP instance Show Op where show NegO = "-" show MultO = "*" show DivO = "/" show PowO = "^" show OpenP = "(" show CloseP = ")" data Token = UnitT String | NumberT Integer | OpT Op instance Show Token where show (UnitT s) = s show (NumberT i) = show i show (OpT op) = show op -- | Parsed unit expressions, parameterized by a prefix identifier type and -- a unit identifier type data UnitExp pre u = Unity -- ^ "1" | Unit (Maybe pre) u -- ^ a unit with, perhaps, a prefix | Mult (UnitExp pre u) (UnitExp pre u) | Div (UnitExp pre u) (UnitExp pre u) | Pow (UnitExp pre u) Integer deriving (Eq, Ord, Generic, Data) #if __GLASGOW_HASKELL__ < 709 deriving instance Typeable UnitExp #endif instance (Show pre, Show u) => Show (UnitExp pre u) where show Unity = "1" show (Unit (Just pre) u) = show pre ++ " :@ " ++ show u show (Unit Nothing u) = show u show (Mult e1 e2) = "(" ++ show e1 ++ " :* " ++ show e2 ++ ")" show (Div e1 e2) = "(" ++ show e1 ++ " :/ " ++ show e2 ++ ")" show (Pow e i) = show e ++ " :^ " ++ show i ---------------------------------------------------------------------- -- Lexer ---------------------------------------------------------------------- type Lexer = Parser unitL :: Lexer Token unitL = UnitT `fmap` (many1 letter) opL :: Lexer Token opL = fmap OpT $ do { nochar '-'; return NegO } <|> do { nochar '*'; return MultO } <|> do { nochar '/'; return DivO } <|> do { nochar '^'; return PowO } <|> do { nochar '('; return OpenP } <|> do { nochar ')'; return CloseP } numberL :: Lexer Token numberL = (NumberT . read) `fmap` (many1 digit) lexer1 :: Lexer Token lexer1 = unitL <|> opL <|> numberL lexer :: Lexer [Token] lexer = do spaces choice [ do eof "" return [] , do tok <- lexer1 spaces toks <- lexer return (tok : toks) ] lex :: String -> Either ParseError [Token] lex = parse lexer "" ---------------------------------------------------------------------- -- Symbol tables ---------------------------------------------------------------------- -- | A finite mapping from prefix spellings to prefix identifiers (of -- unspecified type @pre@). All prefix spellings must be strictly alphabetic. type PrefixTable pre = Map.Map String pre -- | A mapping from unit spellings to unit identifiers (of unspecified type -- @u@). All unit spellings must be strictly alphabetic. type UnitTable u = String -> Maybe u -- | A "symbol table" for the parser, mapping prefixes and units to their -- representations. data SymbolTable pre u = SymbolTable { prefixTable :: PrefixTable pre , unitTable :: UnitTable u } deriving (Generic) -- | Build a 'Map' from an association list, checking for ambiguity unambFromList :: (Ord a, Show b) => [(a,b)] -> Either [(a,[String])] (Map.Map a b) unambFromList list = let multimap = MM.fromList list assocs = MM.assocs multimap (errs, goods) = partitionWith (\(key, vals) -> case vals of [val] -> Right (key, val) _ -> Left (key, map show vals)) assocs result = Map.fromList goods in if null errs then Right result else Left errs -- | Build a symbol table from prefix mappings and unit mappings. The prefix mapping -- can be empty. This function checks to make sure that the strings are not -- inherently ambiguous and are purely alphabetic. mkSymbolTable :: (Show pre, Show u) => [(String, pre)] -- ^ Association list of prefixes -> [(String, u)] -- ^ Association list of units -> Either String (SymbolTable pre u) mkSymbolTable prefixes units = let bad_strings = filter (not . all isLetter) (map fst prefixes ++ map fst units) in if not (null bad_strings) then Left $ "All prefixes and units must be composed entirely of letters.\nThe following are illegal: " ++ show bad_strings else let result = do prefixTab <- unambFromList prefixes unitTab <- unambFromList units return $ SymbolTable { prefixTable = prefixTab, unitTable = flip Map.lookup unitTab } in left ((++ error_suffix) . concatMap mk_error_string) result where mk_error_string :: Show x => (String, [x]) -> String mk_error_string (k, vs) = "The label `" ++ k ++ "' is assigned to the following meanings:\n" ++ show vs ++ "\n" error_suffix = "This is ambiguous. Please fix before building a unit parser." -- | Make a symbol table without checking for ambiguity or non-purely -- alphabetic strings. The prefixes must be a (potentially empty) -- finite map, but the units mapping need not be finite. -- Note that this is unsafe in that the resulting parser may behave -- unpredictably. It surely won't launch the rockets, though. unsafeMkSymbolTable :: PrefixTable pre -> UnitTable u -> SymbolTable pre u unsafeMkSymbolTable = SymbolTable -- | A symbol table that accepts all unit strings, but supports no prefixes. universalSymbolTable :: SymbolTable a String universalSymbolTable = SymbolTable Map.empty Just ---------------------------------------------------------------------- -- Unit string parser ---------------------------------------------------------------------- -- We assume that no symbol table is inherently ambiguous! type GenUnitStringParser pre u = ParsecT String () (Reader (SymbolTable pre u)) type UnitStringParser_UnitExp = forall pre u. (Show pre, Show u) => GenUnitStringParser pre u (UnitExp pre u) -- parses just a unit (no prefix) justUnitP :: GenUnitStringParser pre u u justUnitP = do full_string <- getInput units <- asks unitTable case units full_string of Nothing -> fail (full_string ++ " does not match any known unit") Just u -> return u -- parses a unit and prefix, failing in the case of ambiguity prefixUnitP :: UnitStringParser_UnitExp prefixUnitP = do prefixTab <- asks prefixTable let assocs = Map.assocs prefixTab -- these are in the right order results <- catMaybes `liftM` mapM (experiment . parse_one) assocs full_string <- getInput case results of [] -> fail $ "No known interpretation for " ++ full_string [(pre_name, unit_name)] -> return $ Unit (Just pre_name) unit_name lots -> fail $ "Multiple possible interpretations for " ++ full_string ++ ":\n" ++ (concatMap (\(pre_name, unit_name) -> " " ++ show pre_name ++ " :@ " ++ show unit_name ++ "\n") lots) where parse_one :: (String, pre) -> GenUnitStringParser pre u (pre, u) parse_one (pre, name) = do void $ string pre unit_name <- justUnitP return (name, unit_name) -- parse a unit string unitStringParser :: UnitStringParser_UnitExp unitStringParser = try (Unit Nothing `liftM` justUnitP) <|> prefixUnitP ---------------------------------------------------------------------- -- Unit expression parser ---------------------------------------------------------------------- type GenUnitParser pre u = ParsecT [Token] () (Reader (SymbolTable pre u)) type UnitParser a = forall pre u. GenUnitParser pre u a type UnitParser_UnitExp = forall pre u. (Show pre, Show u) => GenUnitParser pre u (UnitExp pre u) -- move a source position past a token updatePosToken :: SourcePos -> Token -> [Token] -> SourcePos updatePosToken pos (UnitT unit_str) _ = updatePosString pos unit_str updatePosToken pos (NumberT i) _ = updatePosString pos (show i) updatePosToken pos (OpT _) _ = incSourceColumn pos 1 -- parse a Token uToken :: (Token -> Maybe a) -> UnitParser a uToken = tokenPrim show updatePosToken -- consume an lparen lparenP :: UnitParser () lparenP = uToken $ \case OpT OpenP -> Just () _ -> Nothing -- consume an rparen rparenP :: UnitParser () rparenP = uToken $ \case OpT CloseP -> Just () _ -> Nothing -- parse a unit string unitStringP :: String -> UnitParser_UnitExp unitStringP str = do symbolTable <- ask case flip runReader symbolTable $ runParserT unitStringParser () "" str of Left err -> fail (show err) Right e -> return e -- parse a number, possibly negated and nested in parens numP :: UnitParser Integer numP = do lparenP n <- numP rparenP return n <|> do uToken $ \case OpT NegO -> Just () _ -> Nothing negate `liftM` numP <|> do uToken $ \case NumberT i -> Just i _ -> Nothing -- parse an exponentiation, like "^2" powP :: GenUnitParser pre u (UnitExp pre u -> UnitExp pre u) powP = option id $ do uToken $ \case OpT PowO -> Just () _ -> Nothing n <- numP return $ flip Pow n -- parse a unit, possibly with an exponent unitP :: UnitParser_UnitExp unitP = do n <- numP case n of 1 -> return Unity _ -> unexpected $ "number " ++ show n <|> do unit_str <- uToken $ \case UnitT unit_str -> Just unit_str _ -> Nothing u <- unitStringP unit_str maybe_pow <- powP return $ maybe_pow u -- parse a "unit factor": either a juxtaposed sequence of units -- or a paranthesized unit exp. unitFactorP :: UnitParser_UnitExp unitFactorP = do lparenP unitExp <- parser rparenP return unitExp <|> (foldl1 Mult `liftM` many1 unitP) -- parse * or / opP :: GenUnitParser pre u (UnitExp pre u -> UnitExp pre u -> UnitExp pre u) opP = uToken $ \case OpT MultO -> Just Mult OpT DivO -> Just Div _ -> Nothing -- parse a whole unit expression parser :: UnitParser_UnitExp parser = chainl unitFactorP opP Unity -- | Parse a unit expression, interpreted with respect the given symbol table. -- Returns either an error message or the successfully-parsed unit expression. parseUnit :: (Show pre, Show u) => SymbolTable pre u -> String -> Either String (UnitExp pre u) parseUnit tab s = left show $ do toks <- lex s flip runReader tab $ runParserT (consumeAll parser) () "" toks