------------------------------------------------------------------------------
-- |
-- Module      : LiterateX.Parser
-- Description : source parser
-- Copyright   : Copyright (c) 2021-2025 Travis Cardwell
-- License     : MIT
--
-- This module implements the source parser.
------------------------------------------------------------------------------

{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}

module LiterateX.Parser
  ( -- * API
    parse
  ) where

-- https://hackage.haskell.org/package/conduit
import qualified Data.Conduit as C
import Data.Conduit (ConduitT)

-- https://hackage.haskell.org/package/text
import qualified Data.Text as T
import Data.Text (Text)

-- (literatex)
import LiterateX.Types (SourceFormat, SourceLine)
import qualified LiterateX.Types.SourceFormat as SourceFormat
import qualified LiterateX.Types.SourceLine as SourceLine

------------------------------------------------------------------------------
-- $API

-- | Create a "Conduit" transformer that parses the specified source format
--
-- The transformer consumes lines of the input and produces a 'SourceLine' for
-- each line of input.
--
-- @since 0.0.1.0
parse
  :: Monad m
  => SourceFormat
  -> ConduitT Text SourceLine m ()
parse :: forall (m :: * -> *).
Monad m =>
SourceFormat -> ConduitT Text SourceLine m ()
parse SourceFormat
sourceFormat = do
    let parseLine' :: Text -> SourceLine
parseLine' = SourceFormat -> Text -> SourceLine
parseLine SourceFormat
sourceFormat
    Maybe Text
mLine <- ConduitT Text SourceLine m (Maybe Text)
forall (m :: * -> *) i o. Monad m => ConduitT i o m (Maybe i)
C.await
    case Maybe Text
mLine of
      Just Text
line -> do
        SourceLine -> ConduitT Text SourceLine m ()
forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
C.yield (SourceLine -> ConduitT Text SourceLine m ())
-> SourceLine -> ConduitT Text SourceLine m ()
forall a b. (a -> b) -> a -> b
$ if Text
"#!" Text -> Text -> Bool
`T.isPrefixOf` Text
line
          then Text -> SourceLine
SourceLine.Shebang Text
line
          else Text -> SourceLine
parseLine' Text
line
        (Text -> ConduitT Text SourceLine m ())
-> ConduitT Text SourceLine m ()
forall (m :: * -> *) i o r.
Monad m =>
(i -> ConduitT i o m r) -> ConduitT i o m ()
C.awaitForever ((Text -> ConduitT Text SourceLine m ())
 -> ConduitT Text SourceLine m ())
-> (Text -> ConduitT Text SourceLine m ())
-> ConduitT Text SourceLine m ()
forall a b. (a -> b) -> a -> b
$ SourceLine -> ConduitT Text SourceLine m ()
forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
C.yield (SourceLine -> ConduitT Text SourceLine m ())
-> (Text -> SourceLine) -> Text -> ConduitT Text SourceLine m ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> SourceLine
parseLine'
      Maybe Text
Nothing -> () -> ConduitT Text SourceLine m ()
forall a. a -> ConduitT Text SourceLine m a
forall (m :: * -> *) a. Monad m => a -> m a
return ()

------------------------------------------------------------------------------
-- $Internal

-- | Parse a source line according to the source format
parseLine :: SourceFormat -> Text -> SourceLine
parseLine :: SourceFormat -> Text -> SourceLine
parseLine = \case
    SourceFormat
SourceFormat.DoubleDash      -> Char -> Int -> Text -> SourceLine
parseLineCommentLine Char
'-' Int
2
    SourceFormat
SourceFormat.DoubleSlash     -> Char -> Int -> Text -> SourceLine
parseLineCommentLine Char
'/' Int
2
    SourceFormat
SourceFormat.Hash            -> Char -> Int -> Text -> SourceLine
parseLineCommentLine Char
'#' Int
1
    SourceFormat
SourceFormat.LiterateHaskell -> Text -> SourceLine
parseLiterateHaskellLine
    SourceFormat
SourceFormat.Percent         -> Char -> Int -> Text -> SourceLine
parseLineCommentLine Char
'%' Int
1
    SourceFormat
SourceFormat.LispSemicolons  -> Text -> SourceLine
parseLispCommentLine

------------------------------------------------------------------------------

-- | Parse a source line using line-based comments
parseLineCommentLine
  :: Char  -- ^ comment character
  -> Int   -- ^ number of comment characters to create line comment
  -> Text  -- ^ source line
  -> SourceLine
parseLineCommentLine :: Char -> Int -> Text -> SourceLine
parseLineCommentLine Char
char Int
count Text
line
    | Text -> Bool
T.null Text
line = SourceLine
SourceLine.CodeBlank
    | Bool
otherwise = case Text -> Maybe (Char, Text)
T.uncons (Text -> Maybe (Char, Text))
-> (Text, Text) -> (Text, Maybe (Char, Text))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Text -> (Text, Text)
T.span (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
char) Text
line of
        (Text
"", Maybe (Char, Text)
_) -> Text -> SourceLine
SourceLine.Code Text
line
        (Text
_, Maybe (Char, Text)
Nothing) -> case Text -> Int -> Ordering
T.compareLength Text
line Int
count of
          Ordering
EQ -> SourceLine
SourceLine.DocBlank
          Ordering
GT -> SourceLine
SourceLine.Rule
          Ordering
LT -> Text -> SourceLine
SourceLine.Code Text
line
        (Text
l, Just (Char
' ', Text
r)) | Text -> Int -> Ordering
T.compareLength Text
l Int
count Ordering -> Ordering -> Bool
forall a. Eq a => a -> a -> Bool
== Ordering
EQ -> Text -> SourceLine
SourceLine.Doc Text
r
        (Text, Maybe (Char, Text))
_otherwise -> Text -> SourceLine
SourceLine.Code Text
line

------------------------------------------------------------------------------

-- | Parse a source line using Lisp-style comments
--
-- Lisp-style comments begin with one or more semicolons.
parseLispCommentLine
  :: Text  -- ^ source line
  -> SourceLine
parseLispCommentLine :: Text -> SourceLine
parseLispCommentLine Text
line
    | Text -> Bool
T.null Text
line = SourceLine
SourceLine.CodeBlank
    | Bool
otherwise = case Text -> Maybe (Char, Text)
T.uncons (Text -> Maybe (Char, Text))
-> (Text, Text) -> (Text, Maybe (Char, Text))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Text -> (Text, Text)
T.span (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
';') Text
line of
        (Text
"", Maybe (Char, Text)
_) -> Text -> SourceLine
SourceLine.Code Text
line
        (Text
_, Maybe (Char, Text)
Nothing)
          | Text -> Int -> Ordering
T.compareLength Text
line Int
4 Ordering -> Ordering -> Bool
forall a. Eq a => a -> a -> Bool
== Ordering
GT -> SourceLine
SourceLine.Rule
          | Bool
otherwise -> SourceLine
SourceLine.DocBlank
        (Text
_, Just (Char
' ', Text
r)) -> Text -> SourceLine
SourceLine.Doc Text
r
        (Text, Maybe (Char, Text))
_otherwise -> Text -> SourceLine
SourceLine.Code Text
line

------------------------------------------------------------------------------

-- | Parse a Literate Haskell source line
parseLiterateHaskellLine
  :: Text  -- ^ source line
  -> SourceLine
parseLiterateHaskellLine :: Text -> SourceLine
parseLiterateHaskellLine Text
line = case Text -> Maybe (Char, Text)
T.uncons Text
line of
    Maybe (Char, Text)
Nothing -> SourceLine
SourceLine.DocBlank
    Just (Char
'>', Text
r1) -> case Text -> Maybe (Char, Text)
T.uncons Text
r1 of
      Maybe (Char, Text)
Nothing -> SourceLine
SourceLine.CodeBlank
      Just (Char
' ', Text
r2) -> Text -> SourceLine
SourceLine.Code Text
r2
      Maybe (Char, Text)
_otherwise -> Text -> SourceLine
SourceLine.Doc Text
line
    Maybe (Char, Text)
_otherwise -> Text -> SourceLine
SourceLine.Doc Text
line