root/compiler/main/HeaderInfo.hs

Revision 6f1a4327263385d8056d7cf754ee357d2b14c24b, 12.8 KB (checked in by Simon Marlow <marlowsd@…>, 7 weeks ago)

fix quadratic performance issue with long module names (#5981)

  • Property mode set to 100644
Line 
1-----------------------------------------------------------------------------
2--
3-- | Parsing the top of a Haskell source file to get its module name,
4-- imports and options.
5--
6-- (c) Simon Marlow 2005
7-- (c) Lemmih 2006
8--
9-----------------------------------------------------------------------------
10
11{-# OPTIONS -fno-warn-tabs #-}
12-- The above warning supression flag is a temporary kludge.
13-- While working on this module you are encouraged to remove it and
14-- detab the module (please do the detabbing in a separate patch). See
15--     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#TabsvsSpaces
16-- for details
17
18module HeaderInfo ( getImports
19                  , mkPrelImports -- used by the renamer too
20                  , getOptionsFromFile, getOptions
21                  , optionsErrorMsgs,
22                    checkProcessArgsResult ) where
23
24#include "HsVersions.h"
25
26import RdrName
27import HscTypes
28import Parser           ( parseHeader )
29import Lexer
30import FastString
31import HsSyn
32import Module
33import PrelNames
34import StringBuffer
35import SrcLoc
36import DynFlags
37import ErrUtils
38import Util
39import Outputable
40import Pretty           ()
41import Maybes
42import Bag              ( emptyBag, listToBag, unitBag )
43import MonadUtils
44import Exception
45
46import Control.Monad
47import System.IO
48import System.IO.Unsafe
49import Data.List
50
51------------------------------------------------------------------------------
52
53-- | Parse the imports of a source file.
54--
55-- Throws a 'SourceError' if parsing fails.
56getImports :: DynFlags
57           -> StringBuffer -- ^ Parse this.
58           -> FilePath     -- ^ Filename the buffer came from.  Used for
59                           --   reporting parse error locations.
60           -> FilePath     -- ^ The original source filename (used for locations
61                           --   in the function result)
62           -> IO ([Located (ImportDecl RdrName)], [Located (ImportDecl RdrName)], Located ModuleName)
63              -- ^ The source imports, normal imports, and the module name.
64getImports dflags buf filename source_filename = do
65  let loc  = mkRealSrcLoc (mkFastString filename) 1 1
66  case unP parseHeader (mkPState dflags buf loc) of
67    PFailed span err -> parseError span err
68    POk pst rdr_module -> do
69      let _ms@(_warns, errs) = getMessages pst
70      -- don't log warnings: they'll be reported when we parse the file
71      -- for real.  See #2500.
72          ms = (emptyBag, errs)
73      -- logWarnings warns
74      if errorsFound dflags ms
75        then throwIO $ mkSrcErr errs
76        else
77          case rdr_module of
78            L _ (HsModule mb_mod _ imps _ _ _) ->
79              let
80                main_loc = srcLocSpan (mkSrcLoc (mkFastString source_filename) 1 1)
81                mod = mb_mod `orElse` L main_loc mAIN_NAME
82                (src_idecls, ord_idecls) = partition (ideclSource.unLoc) imps
83
84                     -- GHC.Prim doesn't exist physically, so don't go looking for it.
85                ordinary_imps = filter ((/= moduleName gHC_PRIM) . unLoc . ideclName . unLoc) 
86                                        ord_idecls
87
88                implicit_prelude = xopt Opt_ImplicitPrelude dflags
89                implicit_imports = mkPrelImports (unLoc mod) main_loc implicit_prelude imps
90              in
91              return (src_idecls, implicit_imports ++ ordinary_imps, mod)
92
93mkPrelImports :: ModuleName 
94              -> SrcSpan    -- Attribute the "import Prelude" to this location
95              -> Bool -> [LImportDecl RdrName]
96              -> [LImportDecl RdrName]
97-- Consruct the implicit declaration "import Prelude" (or not)
98--
99-- NB: opt_NoImplicitPrelude is slightly different to import Prelude ();
100-- because the former doesn't even look at Prelude.hi for instance
101-- declarations, whereas the latter does.
102mkPrelImports this_mod loc implicit_prelude import_decls
103  | this_mod == pRELUDE_NAME
104   || explicit_prelude_import
105   || not implicit_prelude
106  = []
107  | otherwise = [preludeImportDecl]
108  where
109      explicit_prelude_import
110       = notNull [ () | L _ (ImportDecl { ideclName = mod
111                                        , ideclPkgQual = Nothing }) 
112                          <- import_decls
113                      , unLoc mod == pRELUDE_NAME ]
114
115      preludeImportDecl :: LImportDecl RdrName
116      preludeImportDecl
117        = L loc $ ImportDecl { ideclName      = L loc pRELUDE_NAME,
118                               ideclPkgQual   = Nothing,
119                               ideclSource    = False,
120                               ideclSafe      = False,  -- Not a safe import
121                               ideclQualified = False,
122                               ideclImplicit  = True,   -- Implicit!
123                               ideclAs        = Nothing,
124                               ideclHiding    = Nothing  }
125
126parseError :: SrcSpan -> MsgDoc -> IO a
127parseError span err = throwOneError $ mkPlainErrMsg span err
128
129--------------------------------------------------------------
130-- Get options
131--------------------------------------------------------------
132
133-- | Parse OPTIONS and LANGUAGE pragmas of the source file.
134--
135-- Throws a 'SourceError' if flag parsing fails (including unsupported flags.)
136getOptionsFromFile :: DynFlags
137                   -> FilePath            -- ^ Input file
138                   -> IO [Located String] -- ^ Parsed options, if any.
139getOptionsFromFile dflags filename
140    = Exception.bracket
141              (openBinaryFile filename ReadMode)
142              (hClose)
143              (\handle -> do
144                  opts <- fmap getOptions' $ lazyGetToks dflags' filename handle
145                  seqList opts $ return opts)
146    where -- We don't need to get haddock doc tokens when we're just
147          -- getting the options from pragmas, and lazily lexing them
148          -- correctly is a little tricky: If there is "\n" or "\n-"
149          -- left at the end of a buffer then the haddock doc may
150          -- continue past the end of the buffer, despite the fact that
151          -- we already have an apparently-complete token.
152          -- We therefore just turn Opt_Haddock off when doing the lazy
153          -- lex.
154          dflags' = dopt_unset dflags Opt_Haddock
155
156blockSize :: Int
157-- blockSize = 17 -- for testing :-)
158blockSize = 1024
159
160lazyGetToks :: DynFlags -> FilePath -> Handle -> IO [Located Token]
161lazyGetToks dflags filename handle = do
162  buf <- hGetStringBufferBlock handle blockSize
163  unsafeInterleaveIO $ lazyLexBuf handle (pragState dflags buf loc) False blockSize
164 where
165  loc  = mkRealSrcLoc (mkFastString filename) 1 1
166
167  lazyLexBuf :: Handle -> PState -> Bool -> Int -> IO [Located Token]
168  lazyLexBuf handle state eof size = do
169    case unP (lexer return) state of
170      POk state' t -> do
171        -- pprTrace "lazyLexBuf" (text (show (buffer state'))) (return ())
172        if atEnd (buffer state') && not eof
173           -- if this token reached the end of the buffer, and we haven't
174           -- necessarily read up to the end of the file, then the token might
175           -- be truncated, so read some more of the file and lex it again.
176           then getMore handle state size
177           else case t of
178                  L _ ITeof -> return [t]
179                  _other    -> do rest <- lazyLexBuf handle state' eof size
180                                  return (t : rest)
181      _ | not eof   -> getMore handle state size
182        | otherwise -> return [L (RealSrcSpan (last_loc state)) ITeof]
183                         -- parser assumes an ITeof sentinel at the end
184
185  getMore :: Handle -> PState -> Int -> IO [Located Token]
186  getMore handle state size = do
187     -- pprTrace "getMore" (text (show (buffer state))) (return ())
188     let new_size = size * 2
189       -- double the buffer size each time we read a new block.  This
190       -- counteracts the quadratic slowdown we otherwise get for very
191       -- large module names (#5981)
192     nextbuf <- hGetStringBufferBlock handle new_size
193     if (len nextbuf == 0) then lazyLexBuf handle state True new_size else do
194     newbuf <- appendStringBuffers (buffer state) nextbuf
195     unsafeInterleaveIO $ lazyLexBuf handle state{buffer=newbuf} False new_size
196
197
198getToks :: DynFlags -> FilePath -> StringBuffer -> [Located Token]
199getToks dflags filename buf = lexAll (pragState dflags buf loc)
200 where
201  loc  = mkRealSrcLoc (mkFastString filename) 1 1
202
203  lexAll state = case unP (lexer return) state of
204                   POk _      t@(L _ ITeof) -> [t]
205                   POk state' t -> t : lexAll state'
206                   _ -> [L (RealSrcSpan (last_loc state)) ITeof]
207
208
209-- | Parse OPTIONS and LANGUAGE pragmas of the source file.
210--
211-- Throws a 'SourceError' if flag parsing fails (including unsupported flags.)
212getOptions :: DynFlags
213           -> StringBuffer -- ^ Input Buffer
214           -> FilePath     -- ^ Source filename.  Used for location info.
215           -> [Located String] -- ^ Parsed options.
216getOptions dflags buf filename
217    = getOptions' (getToks dflags filename buf)
218
219-- The token parser is written manually because Happy can't
220-- return a partial result when it encounters a lexer error.
221-- We want to extract options before the buffer is passed through
222-- CPP, so we can't use the same trick as 'getImports'.
223getOptions' :: [Located Token]      -- Input buffer
224            -> [Located String]     -- Options.
225getOptions' toks
226    = parseToks toks
227    where 
228          getToken (L _loc tok) = tok
229          getLoc (L loc _tok) = loc
230
231          parseToks (open:close:xs)
232              | IToptions_prag str <- getToken open
233              , ITclose_prag       <- getToken close
234              = map (L (getLoc open)) (words str) ++
235                parseToks xs
236          parseToks (open:close:xs)
237              | ITinclude_prag str <- getToken open
238              , ITclose_prag       <- getToken close
239              = map (L (getLoc open)) ["-#include",removeSpaces str] ++
240                parseToks xs
241          parseToks (open:close:xs)
242              | ITdocOptions str <- getToken open
243              , ITclose_prag     <- getToken close
244              = map (L (getLoc open)) ["-haddock-opts", removeSpaces str]
245                ++ parseToks xs
246          parseToks (open:xs)
247              | ITdocOptionsOld str <- getToken open
248              = map (L (getLoc open)) ["-haddock-opts", removeSpaces str]
249                ++ parseToks xs
250          parseToks (open:xs)
251              | ITlanguage_prag <- getToken open
252              = parseLanguage xs
253          parseToks _ = []
254          parseLanguage (L loc (ITconid fs):rest)
255              = checkExtension (L loc fs) :
256                case rest of
257                  (L _loc ITcomma):more -> parseLanguage more
258                  (L _loc ITclose_prag):more -> parseToks more
259                  (L loc _):_ -> languagePragParseError loc
260                  [] -> panic "getOptions'.parseLanguage(1) went past eof token"
261          parseLanguage (tok:_)
262              = languagePragParseError (getLoc tok)
263          parseLanguage []
264              = panic "getOptions'.parseLanguage(2) went past eof token"
265
266-----------------------------------------------------------------------------
267
268-- | Complain about non-dynamic flags in OPTIONS pragmas.
269--
270-- Throws a 'SourceError' if the input list is non-empty claiming that the
271-- input flags are unknown.
272checkProcessArgsResult :: MonadIO m => [Located String] -> m ()
273checkProcessArgsResult flags
274  = when (notNull flags) $
275      liftIO $ throwIO $ mkSrcErr $ listToBag $ map mkMsg flags
276    where mkMsg (L loc flag)
277              = mkPlainErrMsg loc $
278                  (text "unknown flag in  {-# OPTIONS_GHC #-} pragma:" <+>
279                   text flag)
280
281-----------------------------------------------------------------------------
282
283checkExtension :: Located FastString -> Located String
284checkExtension (L l ext)
285-- Checks if a given extension is valid, and if so returns
286-- its corresponding flag. Otherwise it throws an exception.
287 =  let ext' = unpackFS ext in
288    if ext' `elem` supportedLanguagesAndExtensions
289    then L l ("-X"++ext')
290    else unsupportedExtnError l ext'
291
292languagePragParseError :: SrcSpan -> a
293languagePragParseError loc =
294  throw $ mkSrcErr $ unitBag $
295     (mkPlainErrMsg loc $
296       vcat [ text "Cannot parse LANGUAGE pragma"
297            , text "Expecting comma-separated list of language options,"
298            , text "each starting with a capital letter"
299            , nest 2 (text "E.g. {-# LANGUAGE RecordPuns, Generics #-}") ])
300
301unsupportedExtnError :: SrcSpan -> String -> a
302unsupportedExtnError loc unsup =
303  throw $ mkSrcErr $ unitBag $
304    mkPlainErrMsg loc $
305        text "Unsupported extension: " <> text unsup $$
306        if null suggestions then empty else text "Perhaps you meant" <+> quotedListWithOr (map text suggestions)
307  where
308     suggestions = fuzzyMatch unsup supportedLanguagesAndExtensions
309
310
311optionsErrorMsgs :: [String] -> [Located String] -> FilePath -> Messages
312optionsErrorMsgs unhandled_flags flags_lines _filename
313  = (emptyBag, listToBag (map mkMsg unhandled_flags_lines))
314  where unhandled_flags_lines = [ L l f | f <- unhandled_flags, 
315                                          L l f' <- flags_lines, f == f' ]
316        mkMsg (L flagSpan flag) = 
317            ErrUtils.mkPlainErrMsg flagSpan $
318                    text "unknown flag in  {-# OPTIONS_GHC #-} pragma:" <+> text flag
Note: See TracBrowser for help on using the browser.