{- |
Copyright: (c) 2020-2022 Kowainik
SPDX-License-Identifier: MPL-2.0
Maintainer: Kowainik <xrom.xkov@gmail.com>

Parser for Haskell Modules to get all Haskell Language Extensions used.
-}

module Extensions.Module
       ( parseFile
       , parseSource
       , parseSourceWithPath

         -- * Internal Parsers
       , extensionsP
       , singleExtensionsP
       , extensionP
       , languagePragmaP
       , optionsGhcP
       , pragmaP
       , commentP
       , cppP
       ) where

import Data.ByteString (ByteString)
import Data.Char (toLower, toUpper)
import Data.Either (partitionEithers)
import Data.Foldable (traverse_)
import Data.Functor ((<&>))
import Data.List (nub)
import Data.List.NonEmpty (NonEmpty (..))
import System.Directory (doesFileExist)
import Text.Parsec (alphaNum, between, char, eof, many, many1, manyTill, noneOf, oneOf, parse,
                    sepBy1, skipMany, try, (<|>))
import Text.Parsec.ByteString (Parser)
import Text.Parsec.Char (anyChar, endOfLine, letter, newline, space, spaces, string)
import Text.Read (readMaybe)

import Extensions.Types (ModuleParseError (..), OnOffExtension (..), ParsedExtensions (..),
                         SafeHaskellExtension (..), readOnOffExtension)

import qualified Data.ByteString as BS


-- | Internal data type for known and unknown extensions.
data ParsedExtension
    = KnownExtension OnOffExtension
    | SafeExtension SafeHaskellExtension
    | UnknownExtension String

handleParsedExtensions :: [ParsedExtension] -> Either ModuleParseError ParsedExtensions
handleParsedExtensions :: [ParsedExtension] -> Either ModuleParseError ParsedExtensions
handleParsedExtensions = ([String], [Either SafeHaskellExtension OnOffExtension])
-> Either ModuleParseError ParsedExtensions
handleResult forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. [Either a b] -> ([a], [b])
partitionEithers forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a -> b) -> [a] -> [b]
map ParsedExtension
-> Either String (Either SafeHaskellExtension OnOffExtension)
toEither
  where
    toEither :: ParsedExtension -> Either String (Either SafeHaskellExtension OnOffExtension)
    toEither :: ParsedExtension
-> Either String (Either SafeHaskellExtension OnOffExtension)
toEither (UnknownExtension String
ext) = forall a b. a -> Either a b
Left String
ext
    toEither (KnownExtension OnOffExtension
ext)   = forall a b. b -> Either a b
Right forall a b. (a -> b) -> a -> b
$ forall a b. b -> Either a b
Right OnOffExtension
ext
    toEither (SafeExtension SafeHaskellExtension
ext)    = forall a b. b -> Either a b
Right forall a b. (a -> b) -> a -> b
$ forall a b. a -> Either a b
Left SafeHaskellExtension
ext

    -- Make sure that there is no conflicting 'SafeHaskellExtension's.
    handleResult
        :: ([String], [Either SafeHaskellExtension OnOffExtension])
        -> Either ModuleParseError ParsedExtensions
    handleResult :: ([String], [Either SafeHaskellExtension OnOffExtension])
-> Either ModuleParseError ParsedExtensions
handleResult ([String]
unknown, [Either SafeHaskellExtension OnOffExtension]
knownAndSafe) = case [String]
unknown of
        []   -> let ([SafeHaskellExtension]
safe, [OnOffExtension]
known) = forall a b. [Either a b] -> ([a], [b])
partitionEithers [Either SafeHaskellExtension OnOffExtension]
knownAndSafe in case forall a. Eq a => [a] -> [a]
nub [SafeHaskellExtension]
safe of
            []   -> forall a b. b -> Either a b
Right ParsedExtensions
                { parsedExtensionsAll :: [OnOffExtension]
parsedExtensionsAll = [OnOffExtension]
known
                , parsedExtensionsSafe :: Maybe SafeHaskellExtension
parsedExtensionsSafe = forall a. Maybe a
Nothing
                }
            [SafeHaskellExtension
s]  -> forall a b. b -> Either a b
Right ParsedExtensions
                { parsedExtensionsAll :: [OnOffExtension]
parsedExtensionsAll = [OnOffExtension]
known
                , parsedExtensionsSafe :: Maybe SafeHaskellExtension
parsedExtensionsSafe = forall a. a -> Maybe a
Just SafeHaskellExtension
s
                }
            SafeHaskellExtension
s:[SafeHaskellExtension]
ss -> forall a b. a -> Either a b
Left forall a b. (a -> b) -> a -> b
$ NonEmpty SafeHaskellExtension -> ModuleParseError
ModuleSafeHaskellConflict forall a b. (a -> b) -> a -> b
$ SafeHaskellExtension
s forall a. a -> [a] -> NonEmpty a
:| [SafeHaskellExtension]
ss
        String
x:[String]
xs -> forall a b. a -> Either a b
Left forall a b. (a -> b) -> a -> b
$ NonEmpty String -> ModuleParseError
UnknownExtensions forall a b. (a -> b) -> a -> b
$ String
x forall a. a -> [a] -> NonEmpty a
:| [String]
xs

{- | By the given file path, reads the file and returns 'ParsedExtensions', if
parsing succeeds.
-}
parseFile :: FilePath -> IO (Either ModuleParseError ParsedExtensions)
parseFile :: String -> IO (Either ModuleParseError ParsedExtensions)
parseFile String
file = String -> IO Bool
doesFileExist String
file forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \Bool
hasFile ->
    if Bool
hasFile
    then String -> ByteString -> Either ModuleParseError ParsedExtensions
parseSourceWithPath String
file forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> String -> IO ByteString
BS.readFile String
file
    else forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a b. (a -> b) -> a -> b
$ forall a b. a -> Either a b
Left forall a b. (a -> b) -> a -> b
$ String -> ModuleParseError
FileNotFound String
file

{- | By the given file path and file source content, returns 'ParsedExtensions',
if parsing succeeds.

This function takes a path to a Haskell source file. The path is only used for
error message. Pass empty string or use 'parseSource', if you don't have a path
to a Haskell module.
-}
parseSourceWithPath :: FilePath -> ByteString -> Either ModuleParseError ParsedExtensions
parseSourceWithPath :: String -> ByteString -> Either ModuleParseError ParsedExtensions
parseSourceWithPath String
path ByteString
src = case forall s t a.
Stream s Identity t =>
Parsec s () a -> String -> s -> Either ParseError a
parse Parser [ParsedExtension]
extensionsP String
path ByteString
src of
    Left ParseError
err         -> forall a b. a -> Either a b
Left forall a b. (a -> b) -> a -> b
$ ParseError -> ModuleParseError
ParsecError ParseError
err
    Right [ParsedExtension]
parsedExts -> [ParsedExtension] -> Either ModuleParseError ParsedExtensions
handleParsedExtensions [ParsedExtension]
parsedExts

{- | By the given file source content, returns 'ParsedExtensions', if parsing
succeeds.
-}
parseSource :: ByteString -> Either ModuleParseError ParsedExtensions
parseSource :: ByteString -> Either ModuleParseError ParsedExtensions
parseSource = String -> ByteString -> Either ModuleParseError ParsedExtensions
parseSourceWithPath String
"SourceName"

{- | The main parser of 'ParsedExtension'.

It parses language pragmas or comments until end of file or the first line with
the function/import/module name.
-}
extensionsP :: Parser [ParsedExtension]
extensionsP :: Parser [ParsedExtension]
extensionsP = forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$>
    ( Parser ()
newLines forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*>
      forall s (m :: * -> *) t u a end.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m end -> ParsecT s u m [a]
manyTill
        (forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try Parser [ParsedExtension]
singleExtensionsP forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try forall a. Parser [a]
optionsGhcP forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try forall a. Parser [a]
commentP forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try forall a. Parser [a]
cppP)
        (forall s (m :: * -> *) t u.
(Stream s m t, Show t) =>
ParsecT s u m ()
eof forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> (() forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ forall s (m :: * -> *) t u a end.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m end -> ParsecT s u m [a]
manyTill forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
endOfLine forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
letter))
    )

{- | Single LANGUAGE pragma parser.

@
 \{\-# LANGUAGE XXX
  , YYY ,
  ZZZ
 #\-\}
@
-}
singleExtensionsP :: Parser [ParsedExtension]
singleExtensionsP :: Parser [ParsedExtension]
singleExtensionsP =
    forall a. Parser a -> Parser a
languagePragmaP (forall a. Parser a -> Parser [a]
commaSep (Parser ()
nonExtP forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> ParsecT ByteString () Identity ParsedExtension
extensionP forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
nonExtP) forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m ()
spaces)

nonExtP :: Parser ()
nonExtP :: Parser ()
nonExtP = forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m ()
skipMany (forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try forall a. Parser [a]
cppP forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try forall a. Parser [a]
commentP)

{- | Parses all known and unknown 'OnOffExtension's or 'SafeHaskellExtension's.
-}
extensionP :: Parser ParsedExtension
extensionP :: ParsecT ByteString () Identity ParsedExtension
extensionP = (forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m ()
spaces forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
alphaNum forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m ()
spaces) forall (f :: * -> *) a b. Functor f => f a -> (a -> b) -> f b
<&> \String
txt ->
    case String -> Maybe OnOffExtension
readOnOffExtension String
txt of
        Just OnOffExtension
ext -> OnOffExtension -> ParsedExtension
KnownExtension OnOffExtension
ext
        Maybe OnOffExtension
Nothing  -> case forall a. Read a => String -> Maybe a
readMaybe @SafeHaskellExtension String
txt of
            Just SafeHaskellExtension
ext -> SafeHaskellExtension -> ParsedExtension
SafeExtension SafeHaskellExtension
ext
            Maybe SafeHaskellExtension
Nothing  -> String -> ParsedExtension
UnknownExtension String
txt

{- | Parser for standard language pragma keywords: @\{\-\# LANGUAGE XXX \#\-\}@
-}
languagePragmaP :: Parser a -> Parser a
languagePragmaP :: forall a. Parser a -> Parser a
languagePragmaP = forall a. Parser () -> Parser a -> Parser a
pragmaP forall a b. (a -> b) -> a -> b
$ String -> Parser ()
istringP String
"LANGUAGE"

{- | Parser for GHC options pragma keywords: @\{\-\# OPTIONS_GHC YYY \#\-\}@
-}
optionsGhcP :: Parser [a]
optionsGhcP :: forall a. Parser [a]
optionsGhcP = [] forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ forall a. Parser a -> Parser a
optionsGhcPragmaP (forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT ByteString () Identity String
ghcOptionP)
  where
    ghcOptionP :: Parser String
    ghcOptionP :: ParsecT ByteString () Identity String
ghcOptionP = Parser ()
newLines forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 (forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
alphaNum forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'-') forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
newLines

optionsGhcPragmaP :: Parser a -> Parser a
optionsGhcPragmaP :: forall a. Parser a -> Parser a
optionsGhcPragmaP = forall a. Parser () -> Parser a -> Parser a
pragmaP forall a b. (a -> b) -> a -> b
$ String -> Parser ()
istringP String
"OPTIONS_GHC"

-- | Parser for case-insensitive strings.
istringP :: String -> Parser ()
istringP :: String -> Parser ()
istringP = forall (t :: * -> *) (f :: * -> *) a b.
(Foldable t, Applicative f) =>
(a -> f b) -> t a -> f ()
traverse_ forall a b. (a -> b) -> a -> b
$ \Char
c -> forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m Char
oneOf [Char -> Char
toUpper Char
c, Char -> Char
toLower Char
c]

{- | Parser for GHC pragmas with a given pragma word.
-}
pragmaP :: Parser () -> Parser a -> Parser a
pragmaP :: forall a. Parser () -> Parser a -> Parser a
pragmaP Parser ()
pragmaNameP Parser a
p = forall s (m :: * -> *) t u open close a.
Stream s m t =>
ParsecT s u m open
-> ParsecT s u m close -> ParsecT s u m a -> ParsecT s u m a
between
    (forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"{-#") (forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"#-}" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
newLines)
    (Parser ()
newLines forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser ()
nonExtP forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser ()
pragmaNameP forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser ()
newLines forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser a
p forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
newLines)

-- | Comma separated parser. Newlines and spaces are allowed around comma.
commaSep :: Parser a -> Parser [a]
commaSep :: forall a. Parser a -> Parser [a]
commaSep Parser a
p = Parser a
p forall s (m :: * -> *) t u a end.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m end -> ParsecT s u m [a]
`sepBy1` forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try (Parser ()
newLines forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
',' forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
newLines)

{- | Haskell comment parser.
Supports both single-line comments:

  @
  -- I am a single comment
  @

and multi-line comments:

  @
  \{\- I
  AM
  MULTILINE
  \-\}
  @
-}
commentP :: Parser [a]
commentP :: forall a. Parser [a]
commentP = Parser ()
newLines forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> (forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try forall a. Parser [a]
singleLineCommentP forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try forall a. Parser [a]
multiLineCommentP) forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
newLines
  where
    singleLineCommentP :: Parser [a]
    singleLineCommentP :: forall a. Parser [a]
singleLineCommentP = [] forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$
        (forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"--" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall s (m :: * -> *) t u a end.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m end -> ParsecT s u m [a]
manyTill forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
anyChar (forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try (() forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
endOfLine) forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> forall s (m :: * -> *) t u.
(Stream s m t, Show t) =>
ParsecT s u m ()
eof))

    multiLineCommentP :: Parser [a]
    multiLineCommentP :: forall a. Parser [a]
multiLineCommentP = [] forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$
        (forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"{-" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall s (m :: * -> *) t u a end.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m end -> ParsecT s u m [a]
manyTill forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
anyChar (forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try forall a b. (a -> b) -> a -> b
$ forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"-}"))

{- | CPP syntax parser.

  @
  #if \_\_GLASGOW_HASKELL\_\_ < 810
  -- Could be more Language pragmas that should be parsed
  #endif
  @
-}
cppP :: Parser [a]
cppP :: forall a. Parser [a]
cppP =
    [] forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
newline
       forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try (forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'#' forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m Char
noneOf String
"-")
       forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall s (m :: * -> *) t u a end.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m end -> ParsecT s u m [a]
manyTill forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
anyChar (forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
endOfLine)
       forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
newLines

-- | Any combination of spaces and newlines.
newLines :: Parser ()
newLines :: Parser ()
newLines = forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m ()
skipMany (forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
space forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
endOfLine)