Bio/PDB/EventParser/FastParse.hs

{-# LANGUAGE ScopedTypeVariables, MagicHash, BangPatterns #-}
-- | Utility functions for faster parsing using 'Data.ByteString.Internal'.
module Bio.PDB.EventParser.FastParse(strtof, trim, trimFront)
where

import qualified Data.ByteString.Char8    as BS
import qualified Data.ByteString.Internal as BSI
import qualified Data.ByteString.Unsafe   as BSU

--import Char(ord)
import GHC.Base(Char(..), Int(..))
import GHC.Prim

-- | Alias for 'ord' to make sure it is inlined.
-- Why on earth GHC 7.4 doesn't inline functions like ord?! They should normally compile to no-ops!!!
{-# INLINE ord #-}
ord :: Char -> Int
ord (C# c#) = I# (ord# c#)

-- | Faster 'String' to 'Float' conversion.
-- Top level function dealing with negation.
{-# INLINE strtof #-}
strtof :: BS.ByteString -> Maybe Double
strtof bs | BS.null bs = Nothing
strtof bs              = case chr of 
                               '-'                      -> strtof0 True 0 rest -- this allows for "-" == 0.0
                               _   | dv >= 0 && dv <= 9 -> strtof0 False dv rest
                               ' '                      -> strtof rest
                               _                        -> Nothing
  where chr  = BSI.w2c $ BSU.unsafeHead bs
        dv   = digitValue chr
        rest = BSU.unsafeTail bs

-- | Value of a decimal digit.
{-# INLINE digitValue #-}
digitValue :: Char -> Int
digitValue !c = ord c - ord '0'

-- | Strict negation.
{-# INLINE final #-}
{-# ANN final ("HLint: ignore Too strict if" :: [Char]) #-}
final :: Bool -> Double -> Maybe Double
final !sign !f = if sign then Just (-f) else Just f

-- | First helper function for fast 'ByteString' to 'Float' conversion
-- This function deals with things before the comma.
{-# INLINE strtof0 #-}
strtof0 :: Bool -> Int -> BS.ByteString -> Maybe Double
strtof0 !sign !f !bs | BS.null bs = final sign (fromIntegral f :: Double)
strtof0  sign  f  bs = case chr of _   | dv >= 0 && dv <= 9 -> strtof0 sign (10 * f + dv) rest
                                   '.'                      -> strtof1 sign 0 f rest
                                   _                        -> Nothing
  where chr  = BSI.w2c $ BSU.unsafeHead bs
        rest = BSU.unsafeTail bs
        dv   = digitValue chr

-- | Second helper function for fast 'ByteString' to 'Float' conversion
-- This function deals with things after the comma.
{-# INLINE strtof1 #-}
strtof1 :: Bool -> Int -> Int -> BS.ByteString -> Maybe Double
strtof1 !sign !e !f !bs | BS.null bs = makeDouble sign e f
strtof1  sign  e  f  bs              = case chr of
                                         _   | dv >= 0 && dv <= 9 -> strtof1 sign (e+1) (f*10 + dv) rest
                                         ' '                      -> fs `seq` checkSpaces fs rest
                                         _                        -> Nothing
  where chr  = BSI.w2c $ BSU.unsafeHead bs
        dv   = digitValue chr
        fs   = makeDouble sign e f
        rest = BSU.unsafeTail bs

-- | Finalize parsing of 'Double' and construct result.
{-# INLINE makeDouble #-}
makeDouble !sign !e !f = final sign (fromIntegral f * 0.1 ** fromIntegral e)

-- | Checks that remaining part of 'ByteString' is pure spaces,
-- and return 'Nothing' if there is something else (for error handling.)
{-# INLINE checkSpaces #-}
checkSpaces ::  Maybe a -> BS.ByteString -> Maybe a
checkSpaces !result !blanks = if BS.all (==' ') blanks then result else Nothing

-- | Trim spaces in front of the 'ByteString'.
{-# INLINE trimFront #-}
trimFront !s | BS.null s = s
trimFront !s = if BSU.unsafeHead s == 32 -- space
                 then trimFront $ BSU.unsafeTail s
                 else s
-- No idea why it is faster than BS.span version?

-- | Trim spaces at the end of the 'ByteString'.
{-# INLINE trimRear #-}
trimRear !s | BS.null s = s
trimRear !s = if BSU.unsafeIndex s (BS.length s - 1) == 32
                then trimRear $ butlast s
                else s

-- | Discard last character within 'ByteString' without checking. 
butlast (BSI.PS fp o l) = BSI.PS fp o (l-1)

-- | Trim spaces in the front and at the end of 'ByteString'.
trim !s = trimRear $ trimFront s