{-# OPTIONS_GHC -fno-cse #-} module Data.RDF.Utils ( FastString(uniq, value), mkFastString, equalFS, compareFS, s2b, b2s, hPutStrRev, hPutStrLnRev, canonicalize, maybeHead ) where import qualified Data.ByteString.Lazy as BL import Data.ByteString.Lazy.Char8(ByteString) import qualified Data.ByteString.Lazy.Char8 as B import Data.Map(Map) import qualified Data.Map as Map import System.IO import Data.IORef import System.IO.Unsafe(unsafePerformIO) bs_newline :: ByteString bs_newline = B.pack "\n" -- |A safe version of head that returns 'Nothing' for an empty list or 'Just (head lst)' for -- a non-empty list. maybeHead :: [a] -> Maybe a maybeHead lst | null lst = Nothing | otherwise = Just (head lst) -- |'FastString' is a bytestring-based string type that provides constant-time equality -- testing. -- -- A 'FastString' value consists of a unique identifier and a (strict) 'ByteString' value. -- The unique identifier is used for constant-time equality testing, and all other operations -- are provided by the 'ByteString' value itself. -- -- 'FastString' values are created by the 'mkFastString' function, which maintains a table -- of all created values, and reuses old values whenever possible. The 'ByteString' is -- maintained internally in reverse order of the string passed to 'mkFastString'; this -- is to provide faster comparison testing for unequal values, since it is very common in -- RDF to have URIs that are equal apart from the last few characters (localname). data FastString = FS { uniq :: !Int, value :: !ByteString } instance Show FastString where show = B.unpack . B.reverse . value -- |Two 'FastString' values are equal iff they have the same unique identifer. instance Eq FastString where (==) = equalFS {-# INLINE equalFS #-} equalFS :: FastString -> FastString -> Bool equalFS fs1 fs2 = uniq fs1 == uniq fs2 -- |Two 'FastString' values are equal if they have the same unique identifier, -- and are otherwise ordered using the natural ordering of 'ByteString' in the -- internal (reversed) representation. instance Ord FastString where compare = compareFS {-# INLINE compareFS #-} compareFS :: FastString -> FastString -> Ordering compareFS fs1 fs2 = if uniq fs1 == uniq fs2 then EQ else compare (value fs1) (value fs2) -- |A convenience function for converting from a bytestring to a string. {-# INLINE b2s #-} b2s :: ByteString -> String b2s = B.unpack -- |A convenience function for converting from a string to a bytestring. {-# INLINE s2b #-} s2b :: String -> ByteString s2b = B.pack -- |Write to the handle the reversed value of the bytestring, with no newline. {-# INLINE hPutStrRev #-} hPutStrRev :: Handle -> ByteString -> IO () hPutStrRev h bs = BL.hPutStr h (B.reverse bs) -- |Write to the handle the reversed value of the bytestring, followed by -- a newline. {-# INLINE hPutStrLnRev #-} hPutStrLnRev :: Handle -> ByteString -> IO () hPutStrLnRev h bs = BL.hPutStr h (B.reverse bs) >> BL.hPutStr h bs_newline -- |Return a 'FastString' value for the given 'ByteString', reusing a 'FastString' -- if one has been created for equal bytestrings, or creating a new one if necessary. -- The 'FastString' values created maintain the invariant that two values have the -- same unique identifier (accessible via 'uniq') iff their respective bytestring -- values are equal. -- -- The unique identifier is only for the given session, and equal 'ByteString' values -- will generally not be assigned the same identifier under different processes and -- different executions. {-# NOINLINE mkFastString #-} mkFastString :: ByteString -> FastString mkFastString bs = unsafePerformIO $ do m <- readIORef fsMap let mFs = Map.lookup bs m case mFs of Just fs -> return fs Nothing -> newFastString bs >>= \fs -> writeIORef fsMap (Map.insert bs fs m) >> return fs -- |Canonicalize the given 'ByteString' value using the 'FastString' -- as the datatype URI. {-# NOINLINE canonicalize #-} canonicalize :: FastString -> ByteString -> ByteString canonicalize typeFs litValue = case Map.lookup typeFs canonicalizerTable of Nothing -> litValue Just fn -> fn litValue {-# INLINE newFastString #-} newFastString :: ByteString -> IO FastString newFastString str = do curr <- readIORef fsCounter modifyIORef fsCounter (+1) return $! FS curr (B.reverse str) {-# NOINLINE fsCounter #-} fsCounter :: IORef Int fsCounter = unsafePerformIO $ newIORef 0 {-# NOINLINE fsMap #-} fsMap :: IORef (Map ByteString FastString) fsMap = unsafePerformIO $ newIORef Map.empty -- A table of mappings from a FastString URI (reversed as -- they are) to a function that canonicalizes a ByteString -- assumed to be of that type. {-# NOINLINE canonicalizerTable #-} canonicalizerTable :: Map FastString (ByteString -> ByteString) canonicalizerTable = Map.fromList [(integerUri, _integerStr), (doubleUri, _doubleStr), (decimalUri, _decimalStr)] where integerUri = mkFsUri "http://www.w3.org/2001/XMLSchema#integer" decimalUri = mkFsUri "http://www.w3.org/2001/XMLSchema#decimal" doubleUri = mkFsUri "http://www.w3.org/2001/XMLSchema#double" mkFsUri :: String -> FastString mkFsUri uri = mkFastString . s2b $! uri _integerStr, _decimalStr, _doubleStr :: ByteString -> ByteString _integerStr = B.dropWhile (== '0') -- exponent: [eE] ('-' | '+')? [0-9]+ -- ('-' | '+') ? ( [0-9]+ '.' [0-9]* exponent | '.' ([0-9])+ exponent | ([0-9])+ exponent ) _doubleStr s = B.pack $ show (read $ B.unpack s :: Double) -- ('-' | '+')? ( [0-9]+ '.' [0-9]* | '.' ([0-9])+ | ([0-9])+ ) _decimalStr s = -- haskell double parser doesn't handle '1.'.., case B.last s of -- so we add a zero if that's the case and then parse '.' -> f (s `B.snoc` '0') _ -> f s where f s' = B.pack $ show (read $ B.unpack s' :: Double)