module Text.PCLT.Parser.AdvancedSepBy where
import Control.Monad.State.Strict
import qualified Data.ByteString.Lazy.UTF8.Unified as Lazy (ByteString)
import qualified Data.ByteString.Lazy.UTF8.Unified as B hiding (ByteString)
import Data.MyHelpers
import Data.Int
import Data.List
import qualified Data.Map as M
import Data.Map (Map, (!))
import System.IO.Unsafe
import Text.PCLT.Parser.ParserInternals
type SeparationMarkerIdx = Int
data SeparatedSectorMarker =
Beginning_SSM
| EOF_SSM
| InnerMarker_SSM SeparationMarkerIdx
| Error_SSM StandartMarkingStrategyError
deriving (Eq, Ord)
type MarkedChunkLength = Int64
manyTill_EOForEitherOf :: Parser Char -> [Parser Lazy.ByteString] -> Parser (SeparatedSectorMarker, Lazy.ByteString, MarkedChunkLength)
manyTill_EOForEitherOf !p !ends_list = Parser $! manyTill_EOForEitherOf' B.empty 0
where
alternatives_set = map (\ (end_p, i) -> end_p >> return (InnerMarker_SSM i) ) (zip ends_list [1..])
manyTill_EOForEitherOf' :: Lazy.ByteString -> MarkedChunkLength -> State ParserState (ParserResult (SeparatedSectorMarker, Lazy.ByteString, MarkedChunkLength))
manyTill_EOForEitherOf' !soFar !len = do
saved <- get
result <- runParser $ choice alternatives_set
let returnResult marker = return $ Success (marker, B.pack $ B.unpack $ B.reverse soFar, len)
if_bad = do
put saved
ps_c <- runParser p
case ps_c of
Success c -> manyTill_EOForEitherOf' (B.cons c soFar) (len + 1)
IllegalInput -> returnResult $ Error_SSM $ UnallowedCharacter_SMSE (B.head $ pstInput saved)
ReachedEOF -> returnResult EOF_SSM
case result of
Success a -> returnResult a
IllegalInput -> if_bad
ReachedEOF -> if_bad
type New_Active_Sep_SSM = SeparatedSectorMarker
type Previous_Active_Sep_SSM = SeparatedSectorMarker
type Current_Sep_SSM = SeparatedSectorMarker
type Current_Chunk_SSM = SeparatedSectorMarker
type SectorMarkingStrategy = (Previous_Active_Sep_SSM, Current_Sep_SSM) -> (Current_Chunk_SSM, New_Active_Sep_SSM)
sepBySome :: Parser Char -> SectorMarkingStrategy -> [Parser Lazy.ByteString] -> Parser [(SeparatedSectorMarker, Lazy.ByteString, MarkedChunkLength)]
sepBySome !p !sectorMarkingStrategy !sep_str_list = _getMarkedChunks1 [] Beginning_SSM
where
_getMarkedChunks1 :: [(SeparatedSectorMarker, Lazy.ByteString, MarkedChunkLength)] -> SeparatedSectorMarker -> Parser [(SeparatedSectorMarker, Lazy.ByteString, MarkedChunkLength)]
_getMarkedChunks1 chunks_so_far prev_active_sep_marker =
do (cur_sep_marker, x, len) <- manyTill_EOForEitherOf p sep_str_list
let (cur_chunk_marker, new_active_sep_marker) = sectorMarkingStrategy (prev_active_sep_marker, cur_sep_marker)
new_chunks_set = (cur_chunk_marker, x, len) : chunks_so_far
case new_active_sep_marker of
EOF_SSM -> return $ reverse new_chunks_set
_ -> _getMarkedChunks1 new_chunks_set new_active_sep_marker
data StandartMarkingStrategyError =
InputAfterEOF_SMSE
| BeginningMNotInTheBeginning_SMSE
| OverlappingMarkedChunks_SMSE
| UnsupportedMarkersSequence_SMSE SeparatedSectorMarker SeparatedSectorMarker
| OpenMarkerAtEOF_SMSE SeparationMarkerIdx
| UnallowedCharacter_SMSE Char
deriving (Eq, Ord)
standardMarkingStrategy :: SectorMarkingStrategy
standardMarkingStrategy (prev_active_sep_marker, cur_sep_marker) =
case (prev_active_sep_marker, cur_sep_marker) of
( Beginning_SSM, EOF_SSM) -> (InnerMarker_SSM 0, EOF_SSM)
( EOF_SSM, _) -> (Error_SSM InputAfterEOF_SMSE, EOF_SSM)
( Beginning_SSM, InnerMarker_SSM i) -> (InnerMarker_SSM 0, InnerMarker_SSM i)
(InnerMarker_SSM i, EOF_SSM) -> case i == 0 of
True -> (InnerMarker_SSM 0, EOF_SSM)
False -> (Error_SSM $ OpenMarkerAtEOF_SMSE i, EOF_SSM)
(InnerMarker_SSM i, Beginning_SSM) -> (InnerMarker_SSM i, Error_SSM BeginningMNotInTheBeginning_SMSE)
(InnerMarker_SSM i, InnerMarker_SSM j) ->
case i == j of
True -> (InnerMarker_SSM i, InnerMarker_SSM 0)
False -> case i == 0 of
True -> (InnerMarker_SSM i, InnerMarker_SSM j)
False -> (Error_SSM OverlappingMarkedChunks_SMSE, InnerMarker_SSM j)
_ -> let err_m = Error_SSM $ UnsupportedMarkersSequence_SMSE prev_active_sep_marker cur_sep_marker
in (err_m, err_m)
standardMarkingStrategyFix_StripEmptyChunks :: [(SeparatedSectorMarker, Lazy.ByteString, MarkedChunkLength)] -> [(SeparatedSectorMarker, Lazy.ByteString, MarkedChunkLength)]
standardMarkingStrategyFix_StripEmptyChunks = foldr foldr_f []
where
foldr_f marked_chunk accum =
case marked_chunk == (InnerMarker_SSM 0, B.empty, 0) of
True -> accum
False -> marked_chunk : accum
type ChunkIndexInList_ = Int
retrieveNonPlainMarkingsMap :: [(SeparatedSectorMarker, Lazy.ByteString, MarkedChunkLength)] -> M.Map SeparatedSectorMarker [(Lazy.ByteString, ChunkIndexInList_)]
retrieveNonPlainMarkingsMap marked_seq = fst $ foldl foldr_f (M.empty, 0) marked_seq
where
foldr_f (accum, ind) marked_chunk =
case marked_chunk of
(InnerMarker_SSM i, str, _) ->
case i == 0 of
False -> ( M.unionWith
(++)
accum
( M.singleton (InnerMarker_SSM i) [(str, ind)])
, ind + 1
)
True -> (accum, ind + 1)
_ -> (accum, ind + 1)
getListOfMarkings :: M.Map SeparatedSectorMarker [(Lazy.ByteString, ChunkIndexInList_)] -> Int -> [(Lazy.ByteString, ChunkIndexInList_)]
getListOfMarkings m i = case M.lookup (InnerMarker_SSM i) m of
Just l -> l
Nothing -> []
retrieveErrorsMarkingsList :: [(SeparatedSectorMarker, Lazy.ByteString, MarkedChunkLength)] -> [(SeparatedSectorMarker, Lazy.ByteString, ChunkIndexInList_)]
retrieveErrorsMarkingsList marked_seq = fst $ foldl foldr_f ([], 0) marked_seq
where
foldr_f (accum, ind) marked_chunk =
case marked_chunk of
(Error_SSM err_msg, str, _) -> ((Error_SSM err_msg, str, ind): accum, ind + 1)
_ -> (accum, ind + 1)
insertInsteadOf_inLBS :: (Lazy.ByteString, Lazy.ByteString) -> Lazy.ByteString -> Lazy.ByteString
insertInsteadOf_inLBS (old_sep, new_sep) lbs =
case parse (sepBySome anyChar standardMarkingStrategy [stringLBS old_sep]) lbs of
( IllegalInput , _ ) -> error "This should have never happened! Unexpected error in 'insertInsteadOf_inLBS': parse returned unexpected result!"
( ReachedEOF , _ ) -> error "This should have never happened! Unexpected error in 'insertInsteadOf_inLBS': parse returned unexpected result!"
( Success chunks_list, _ ) ->
B.concat $ intersperse new_sep $ map snd3 chunks_list
instance Show SeparatedSectorMarker where
show sms = _prefix ++ _body ++ "."
where
_prefix = "Sector separation marker: "
_body = case sms of
Beginning_SSM -> "input beginning"
EOF_SSM -> "input ending"
InnerMarker_SSM sm_idx -> "marker #" ++ show sm_idx
Error_SSM smse -> "error '" ++ show smse ++ "'"
instance Show StandartMarkingStrategyError where
show smse = _prefix ++ _body
where
_prefix = "An error occured when parsing a marked text. Marking failed: "
_body =
case smse of
InputAfterEOF_SMSE -> "input is not allowed after EOF."
BeginningMNotInTheBeginning_SMSE -> "beginning is allowed to occur only as the first input marker."
OverlappingMarkedChunks_SMSE -> "the marking strategy doesn't allow overlapping marked text chunks."
UnsupportedMarkersSequence_SMSE prev_active_sm cur_sm -> "the marking strategy doesn't support markers sequence (active previous separator marker -> current separator marker): " ++ show prev_active_sm ++ " -> " ++ show cur_sm ++ " ."
OpenMarkerAtEOF_SMSE sm_idx -> "text ends with unclosed chunk of nonplain (marker index: " ++ show sm_idx ++ ") text."