root/compiler/utils/StringBuffer.lhs

Revision c838658103c644ef6c32e92025b1d4999aa0f9bd, 8.1 KB (checked in by David Terei <davidterei@…>, 12 months ago)

SafeHaskell: Fix validation errors when unsafe base used

  • Property mode set to 100644
Line 
1%
2% (c) The University of Glasgow 2006
3% (c) The University of Glasgow, 1997-2006
4%
5
6Buffers for scanning string input stored in external arrays.
7
8\begin{code}
9{-# LANGUAGE BangPatterns #-}
10{-# OPTIONS_GHC -O -funbox-strict-fields #-}
11-- We always optimise this, otherwise performance of a non-optimised
12-- compiler is severely affected
13
14module StringBuffer
15       (
16        StringBuffer(..),
17        -- non-abstract for vs\/HaskellService
18
19         -- * Creation\/destruction
20        hGetStringBuffer,
21        hGetStringBufferBlock,
22        appendStringBuffers,
23        stringToStringBuffer,
24
25        -- * Inspection
26        nextChar,
27        currentChar,
28        prevChar,
29        atEnd,
30
31        -- * Moving and comparison
32        stepOn,
33        offsetBytes,
34        byteDiff,
35
36        -- * Conversion
37        lexemeToString,
38        lexemeToFastString,
39
40         -- * Parsing integers
41        parseUnsignedInteger,
42       ) where
43
44#include "HsVersions.h"
45
46import Encoding
47import FastString hiding ( buf )
48import FastTypes
49import FastFunctions
50
51import System.IO                ( hGetBuf, hFileSize,IOMode(ReadMode), hClose
52                                , Handle, hTell, openBinaryFile )
53import System.IO.Unsafe         ( unsafePerformIO )
54
55import GHC.Exts
56
57#if __GLASGOW_HASKELL__ >= 701
58import Foreign.Safe
59#else
60import Foreign hiding           ( unsafePerformIO )
61#endif
62
63-- -----------------------------------------------------------------------------
64-- The StringBuffer type
65
66-- |A StringBuffer is an internal pointer to a sized chunk of bytes.
67-- The bytes are intended to be *immutable*.  There are pure
68-- operations to read the contents of a StringBuffer.
69--
70-- A StringBuffer may have a finalizer, depending on how it was
71-- obtained.
72--
73data StringBuffer
74 = StringBuffer {
75     buf :: {-# UNPACK #-} !(ForeignPtr Word8),
76     len :: {-# UNPACK #-} !Int,        -- length
77     cur :: {-# UNPACK #-} !Int         -- current pos
78  }
79  -- The buffer is assumed to be UTF-8 encoded, and furthermore
80  -- we add three '\0' bytes to the end as sentinels so that the
81  -- decoder doesn't have to check for overflow at every single byte
82  -- of a multibyte sequence.
83
84instance Show StringBuffer where
85        showsPrec _ s = showString "<stringbuffer("
86                      . shows (len s) . showString "," . shows (cur s)
87                      . showString ")>"
88
89-- -----------------------------------------------------------------------------
90-- Creation / Destruction
91
92hGetStringBuffer :: FilePath -> IO StringBuffer
93hGetStringBuffer fname = do
94   h <- openBinaryFile fname ReadMode
95   size_i <- hFileSize h
96   let size = fromIntegral size_i
97   buf <- mallocForeignPtrArray (size+3)
98   withForeignPtr buf $ \ptr -> do
99     r <- if size == 0 then return 0 else hGetBuf h ptr size
100     hClose h
101     if (r /= size)
102        then ioError (userError "short read of file")
103        else newUTF8StringBuffer buf ptr size
104
105hGetStringBufferBlock :: Handle -> Int -> IO StringBuffer
106hGetStringBufferBlock handle wanted
107    = do size_i <- hFileSize handle
108         offset_i <- hTell handle
109         let size = min wanted (fromIntegral $ size_i-offset_i)
110         buf <- mallocForeignPtrArray (size+3)
111         withForeignPtr buf $ \ptr ->
112             do r <- if size == 0 then return 0 else hGetBuf handle ptr size
113                if r /= size
114                   then ioError (userError $ "short read of file: "++show(r,size,size_i,handle))
115                   else newUTF8StringBuffer buf ptr size
116
117newUTF8StringBuffer :: ForeignPtr Word8 -> Ptr Word8 -> Int -> IO StringBuffer
118newUTF8StringBuffer buf ptr size = do
119  pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0]
120  -- sentinels for UTF-8 decoding
121  let
122      sb0 = StringBuffer buf size 0
123      (first_char, sb1) = nextChar sb0
124        -- skip the byte-order mark if there is one (see #1744)
125        -- This is better than treating #FEFF as whitespace,
126        -- because that would mess up layout.  We don't have a concept
127        -- of zero-width whitespace in Haskell: all whitespace codepoints
128        -- have a width of one column.
129  return (if first_char == '\xfeff' then sb1 else sb0)
130
131appendStringBuffers :: StringBuffer -> StringBuffer -> IO StringBuffer
132appendStringBuffers sb1 sb2
133    = do newBuf <- mallocForeignPtrArray (size+3)
134         withForeignPtr newBuf $ \ptr ->
135          withForeignPtr (buf sb1) $ \sb1Ptr ->
136           withForeignPtr (buf sb2) $ \sb2Ptr ->
137             do copyArray ptr (sb1Ptr `advancePtr` cur sb1) sb1_len
138                copyArray (ptr `advancePtr` sb1_len) (sb2Ptr `advancePtr` cur sb2) sb2_len
139                pokeArray (ptr `advancePtr` size) [0,0,0]
140                return (StringBuffer newBuf size 0)
141    where sb1_len = calcLen sb1
142          sb2_len = calcLen sb2
143          calcLen sb = len sb - cur sb
144          size =  sb1_len + sb2_len
145
146stringToStringBuffer :: String -> StringBuffer
147stringToStringBuffer str =
148 unsafePerformIO $ do
149  let size = utf8EncodedLength str
150  buf <- mallocForeignPtrArray (size+3)
151  withForeignPtr buf $ \ptr -> do
152    utf8EncodeString ptr str
153    pokeArray (ptr `plusPtr` size :: Ptr Word8) [0,0,0]
154    -- sentinels for UTF-8 decoding
155  return (StringBuffer buf size 0)
156
157-- -----------------------------------------------------------------------------
158-- Grab a character
159
160-- Getting our fingers dirty a little here, but this is performance-critical
161{-# INLINE nextChar #-}
162nextChar :: StringBuffer -> (Char,StringBuffer)
163nextChar (StringBuffer buf len (I# cur#)) =
164  inlinePerformIO $ do
165    withForeignPtr buf $ \(Ptr a#) -> do
166        case utf8DecodeChar# (a# `plusAddr#` cur#) of
167          (# c#, b# #) ->
168             let cur' = I# (b# `minusAddr#` a#) in
169             return (C# c#, StringBuffer buf len cur')
170
171currentChar :: StringBuffer -> Char
172currentChar = fst . nextChar
173
174prevChar :: StringBuffer -> Char -> Char
175prevChar (StringBuffer _   _   0)   deflt = deflt
176prevChar (StringBuffer buf _   cur) _     =
177  inlinePerformIO $ do
178    withForeignPtr buf $ \p -> do
179      p' <- utf8PrevChar (p `plusPtr` cur)
180      return (fst (utf8DecodeChar p'))
181
182-- -----------------------------------------------------------------------------
183-- Moving
184
185stepOn :: StringBuffer -> StringBuffer
186stepOn s = snd (nextChar s)
187
188offsetBytes :: Int -> StringBuffer -> StringBuffer
189offsetBytes i s = s { cur = cur s + i }
190
191byteDiff :: StringBuffer -> StringBuffer -> Int
192byteDiff s1 s2 = cur s2 - cur s1
193
194atEnd :: StringBuffer -> Bool
195atEnd (StringBuffer _ l c) = l == c
196
197-- -----------------------------------------------------------------------------
198-- Conversion
199
200lexemeToString :: StringBuffer -> Int {-bytes-} -> String
201lexemeToString _ 0 = ""
202lexemeToString (StringBuffer buf _ cur) bytes =
203  inlinePerformIO $
204    withForeignPtr buf $ \ptr ->
205      utf8DecodeString (ptr `plusPtr` cur) bytes
206
207lexemeToFastString :: StringBuffer -> Int {-bytes-} -> FastString
208lexemeToFastString _ 0 = nilFS
209lexemeToFastString (StringBuffer buf _ cur) len =
210   inlinePerformIO $
211     withForeignPtr buf $ \ptr ->
212       return $! mkFastStringBytes (ptr `plusPtr` cur) len
213
214-- -----------------------------------------------------------------------------
215-- Parsing integer strings in various bases
216{-
217byteOff :: StringBuffer -> Int -> Char
218byteOff (StringBuffer buf _ cur) i =
219  inlinePerformIO $ withForeignPtr buf $ \ptr -> do
220--    return $! cBox (indexWord8OffFastPtrAsFastChar
221--                         (pUnbox ptr) (iUnbox (cur+i)))
222--or
223--    w <- peek (ptr `plusPtr` (cur+i))
224--    return (unsafeChr (fromIntegral (w::Word8)))
225-}
226-- | XXX assumes ASCII digits only (by using byteOff)
227parseUnsignedInteger :: StringBuffer -> Int -> Integer -> (Char->Int) -> Integer
228parseUnsignedInteger (StringBuffer buf _ cur) len radix char_to_int
229  = inlinePerformIO $ withForeignPtr buf $ \ptr -> return $! let
230    --LOL, in implementations where the indexing needs slow unsafePerformIO,
231    --this is less (not more) efficient than using the IO monad explicitly
232    --here.
233    !ptr' = pUnbox ptr
234    byteOff i = cBox (indexWord8OffFastPtrAsFastChar ptr' (iUnbox (cur + i)))
235    go i x | i == len  = x
236           | otherwise = case byteOff i of
237               char -> go (i + 1) (x * radix + toInteger (char_to_int char))
238  in go 0 0
239
240\end{code}
Note: See TracBrowser for help on using the browser.