{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
module GhcTags.CTag.Parser
( parseTagsFile
, parseTagLine
, parseTag
, parseHeader
) where
import Control.Arrow ((***))
import Control.Applicative (many, (<|>))
import Data.Attoparsec.Text (Parser, (<?>))
import qualified Data.Attoparsec.Text as AT
import Data.Functor (void, ($>))
import Data.Text (Text)
import qualified Data.Text as Text
import System.FilePath (FilePath)
import GhcTags.Tag
import qualified GhcTags.Utils as Utils
import GhcTags.CTag.Header
import GhcTags.CTag.Utils
parseTag :: Parser CTag
parseTag =
(\tagName tagFilePath tagAddr (tagKind, tagFields)
-> Tag { tagName
, tagFilePath
, tagAddr
, tagKind
, tagFields
, tagDefinition = NoTagDefinition
})
<$> parseTagName
<* separator
<*> parseFileName
<* separator
<*> parseTagAddress
<*> (
((,) <$> ( separator *> parseKindField )
<*> ( separator *> parseFields <* endOfLine
<|>
endOfLine $> mempty)
)
<|> curry id NoKind
<$> ( separator *> parseFields <* endOfLine
<|>
endOfLine $> mempty
)
<|> curry (charToTagKind *** id)
<$> ( separator *> AT.satisfy notTabOrNewLine )
<*> ( separator *> parseFields <* endOfLine
<|>
endOfLine $> mempty
)
<|> endOfLine $> (NoKind, mempty)
)
where
separator :: Parser Char
separator = AT.char '\t'
parseTagName :: Parser TagName
parseTagName = TagName <$> AT.takeWhile (/= '\t')
<?> "parsing tag name failed"
parseFileName :: Parser FilePath
parseFileName = Text.unpack <$> AT.takeWhile (/= '\t')
parseExCommand :: Parser ExCommand
parseExCommand = (\x -> ExCommand $ Text.take (Text.length x - 1) x)
<$> AT.scan "" go
<* AT.anyChar
where
go :: String -> Char -> Maybe String
go !s c |
take (length Utils.endOfLine) (c : s)
== reverse Utils.endOfLine
= Nothing
|
l == "\";" = Nothing
| otherwise = Just l
where
l = take 2 (c : s)
parseTagAddress :: Parser CTagAddress
parseTagAddress =
TagLine <$> AT.decimal <* (endOfLine <|> void (AT.string ";\""))
<|>
TagCommand <$> parseExCommand
parseKindField :: Parser CTagKind
parseKindField =
charToTagKind <$>
(AT.string "kind:" *> AT.satisfy notTabOrNewLine)
parseFields :: Parser CTagFields
parseFields = TagFields <$> AT.sepBy parseField separator
parseField :: Parser TagField
parseField =
TagField
<$> AT.takeWhile (\x -> x /= ':' && notTabOrNewLine x)
<* AT.char ':'
<*> AT.takeWhile notTabOrNewLine
parseTags :: Parser [Either Header CTag]
parseTags = many parseTagLine
parseTagLine :: Parser (Either Header CTag)
parseTagLine =
AT.eitherP
(parseHeader <?> "failed parsing tag")
(parseTag <?> "failed parsing header")
parseHeader :: Parser Header
parseHeader = do
e <- AT.string "!_TAG_" $> False
<|>
AT.string "!_" $> True
case e of
True ->
flip parsePseudoTagArgs (AT.takeWhile notTabOrNewLine)
. PseudoTag
=<< AT.takeWhile (\x -> notTabOrNewLine x && x /= '!')
False -> do
headerType <-
AT.string "FILE_ENCODING" $> SomeHeaderType FileEncoding
<|> AT.string "FILE_FORMAT" $> SomeHeaderType FileFormat
<|> AT.string "FILE_SORTED" $> SomeHeaderType FileSorted
<|> AT.string "OUTPUT_MODE" $> SomeHeaderType OutputMode
<|> AT.string "KIND_DESCRIPTION" $> SomeHeaderType KindDescription
<|> AT.string "KIND_SEPARATOR" $> SomeHeaderType KindSeparator
<|> AT.string "PROGRAM_AUTHOR" $> SomeHeaderType ProgramAuthor
<|> AT.string "PROGRAM_NAME" $> SomeHeaderType ProgramName
<|> AT.string "PROGRAM_URL" $> SomeHeaderType ProgramUrl
<|> AT.string "PROGRAM_VERSION" $> SomeHeaderType ProgramVersion
<|> AT.string "EXTRA_DESCRIPTION" $> SomeHeaderType ExtraDescription
<|> AT.string "FIELD_DESCRIPTION" $> SomeHeaderType FieldDescription
case headerType of
SomeHeaderType ht@FileEncoding ->
parsePseudoTagArgs ht (AT.takeWhile notTabOrNewLine)
SomeHeaderType ht@FileFormat ->
parsePseudoTagArgs ht AT.decimal
SomeHeaderType ht@FileSorted ->
parsePseudoTagArgs ht AT.decimal
SomeHeaderType ht@OutputMode ->
parsePseudoTagArgs ht (AT.takeWhile notTabOrNewLine)
SomeHeaderType ht@KindDescription ->
parsePseudoTagArgs ht (AT.takeWhile notTabOrNewLine)
SomeHeaderType ht@KindSeparator ->
parsePseudoTagArgs ht (AT.takeWhile notTabOrNewLine)
SomeHeaderType ht@ProgramAuthor ->
parsePseudoTagArgs ht (AT.takeWhile notTabOrNewLine)
SomeHeaderType ht@ProgramName ->
parsePseudoTagArgs ht (AT.takeWhile notTabOrNewLine)
SomeHeaderType ht@ProgramUrl ->
parsePseudoTagArgs ht (AT.takeWhile notTabOrNewLine)
SomeHeaderType ht@ProgramVersion ->
parsePseudoTagArgs ht (AT.takeWhile notTabOrNewLine)
SomeHeaderType ht@ExtraDescription ->
parsePseudoTagArgs ht (AT.takeWhile notTabOrNewLine)
SomeHeaderType ht@FieldDescription ->
parsePseudoTagArgs ht (AT.takeWhile notTabOrNewLine)
SomeHeaderType PseudoTag {} ->
error "parseHeader: impossible happened"
where
parsePseudoTagArgs :: Show ty
=> HeaderType ty
-> Parser ty
-> Parser Header
parsePseudoTagArgs ht parseArg =
Header ht
<$> ( (Just <$> (AT.char '!' *> AT.takeWhile notTabOrNewLine))
<|> pure Nothing
)
<*> (AT.char '\t' *> parseArg)
<*> (AT.char '\t' *> parseComment)
parseComment :: Parser Text
parseComment =
AT.char '/'
*> (Text.init <$> AT.takeWhile notNewLine)
<* endOfLine
parseTagsFile :: Text
-> IO (Either String [Either Header CTag])
parseTagsFile =
fmap AT.eitherResult
. AT.parseWith (pure mempty) parseTags
endOfLine :: Parser ()
endOfLine = AT.string "\r\n" $> ()
<|> AT.char '\r' $> ()
<|> AT.char '\n' $> ()
notTabOrNewLine :: Char -> Bool
notTabOrNewLine = \x -> x /= '\t' && notNewLine x
notNewLine :: Char -> Bool
notNewLine = \x -> x /= '\n' && x /= '\r'