-- |
-- Module      :  Parsec
-- Copyright   :  (c) Vitaliy Rukavishnikov
-- License     :  BSD-style (see the file LICENSE)
-- 
-- Maintainer  :  virukav@gmail.com
-- Stability   :  experimental
-- Portability :  non-portable
--
-- Sed commands parser. See "The Open Group Base Specifications Issue 7" for
-- parsing requirements. The current version of the Haskell Sed doesn't supports
-- the back-references in the RE.

module Hsed.Parsec where

import Prelude hiding (readFile, writeFile)
import Text.ParserCombinators.Parsec hiding (label)
import qualified Data.ByteString.Char8 as B
import Hsed.Ast
import Hsed.SedRegex

-- | If an RE is empty last RE used in the last command applied 
data ParserState = ParserState {
    lastRE :: Pattern
} 

emptyState = ParserState { 
    lastRE = B.pack ""   
}

type SedParser = GenParser Char ParserState
type Stream = String

eol = oneOf "\n\r" >> return ()
eoleof = choice [eol, eof]
slash = char '/'
comma = char ','
semi = char ';'
backslash = char '\\'
number = many1 digit >>= \n -> return $ read n
invert = (spaces >> char '!' >> return True) <|> return False

parseSed :: SedParser a -> Stream -> Either ParseError a
parseSed p = runParser p emptyState ""

parseRE :: String -> SedParser Pattern
parseRE pat = do
    let patB = B.pack pat
    updateState (\(ParserState _)  -> ParserState patB)
    return patB

pattern open close val = do
    pat <- between open close val
    if null pat then do
        s <- getState
        return $ lastRE s
     else parseRE (unesc pat)

addr = 
    fmap (Just . LineNumber) number <|>
    (char '$' >> return (Just LastLine)) <|>
    (pattern slash slash val >>= \pat -> return $ Just (Pat pat))
    where val = many (noneOf "/")

addr1 = do
    a1 <- addr
    spaces
    b <- invert
    return $ Address a1 Nothing b

addr2 = do
    a1 <- addr
    comma <?> ","
    a2 <- addr <?> "bad address" 
    spaces
    b <- invert
    return $ Address a1 a2 b

address :: SedParser Address
address = 
    try addr2 <|> try addr1 <|> 
    (invert >>= \b -> return $ Address Nothing Nothing b) 

sedCmds :: SedParser [SedCmd]
sedCmds = 
    many1 $ try (space >> return emptyCmd) <|>
            (do { x <- sedCmd; endCmd; return x })
    where
    endCmd = choice [eol, eof, semiend, comm, spaces >> return ()]
       where
       semiend = try (spaces >> semi >> spaces >> return ())
       comm = lookAhead (char '#') >> return ()

sedCmd :: SedParser SedCmd
sedCmd = do
    a <- address
    fun <- sedFun
    return $ SedCmd a fun

sedFun :: SedParser SedFun
sedFun = choice functions >>= \f -> return f

functions = 
    [substitute, group, append, change, insert, lineNum, delete, deletePat, replacePat, 
     appendPat, replaceHold, appendHold, list, next, appendLinePat, printPat,
     writeUpPat, quit, exchange, comment, branch, test, readFile, writeFile,
     label, transform
    ]

append        = textFun 'a' Append
change        = textFun 'c' Change 
insert        = textFun 'i' Insert

readFile      = fileFun 'r' ReadFile
writeFile     = fileFun 'w' WriteFile
label         = argFun ':' Label

lineNum       = bareFun '=' LineNum
delete        = bareFun 'd' DeleteLine
deletePat     = bareFun 'D' DeletePat
replacePat    = bareFun 'g' ReplacePat
appendPat     = bareFun 'G' AppendPat
replaceHold   = bareFun 'h' ReplaceHold
appendHold    = bareFun 'H' AppendHold
list          = bareFun 'l' List
next          = bareFun 'n' NextLine
appendLinePat = bareFun 'N' AppendLinePat
printPat      = bareFun 'p' PrintPat
writeUpPat    = bareFun 'P' WriteUpPat
quit          = bareFun 'q' Quit
exchange      = bareFun 'x' Exchange

branch        = gotoFun 'b' Branch
test          = gotoFun 't' Test

bareFun :: Char -> SedFun -> SedParser SedFun
bareFun c f = char c >> return f

textFun :: Char -> (Text -> SedFun) -> SedParser SedFun
textFun c f = do
    char c
    backslash <?> "backslash"
    eol <?> "end of line"
    parts <- lines
    return $ f (B.pack (init $ unlines parts))
    where
       lines = do {x <- line; try eoleof; return x}
       line = sepBy part (backslash >> eol)
       part = many (noneOf "\\\n")

fileFun :: Char -> (FilePath -> SedFun) -> SedParser SedFun
fileFun c f = 
    char c >> spaces >> 
    manyTill anyChar (lookAhead eoleof) >>= \l -> return $ f l

argFun :: Char -> (B.ByteString -> SedFun) -> SedParser SedFun
argFun c f = 
    char c >> spaces >> 
    manyTill anyChar (lookAhead eoleof) >>= \l -> return $ f (B.pack l)

gotoFun :: Char -> (Maybe Label -> SedFun) -> SedParser SedFun
gotoFun c f = do
    char c
    many $ choice[char ' ', char '\t']
    label <- manyTill anyChar (lookAhead eoleof)
    if null label then return $ f Nothing
      else return $ f (Just $ B.pack label)

group = do
    char '{'
    cmds <- sedCmds
    spaces  
    char '}' <?> "}"
    return $ Group cmds

comment = do
    char '#'
    manyTill anyChar (lookAhead eoleof)
    return Comment

transform = do
    char 'y'
    slash <?> "/"
    str1 <- manyTill anyChar slash
    str2 <- manyTill anyChar slash
    return $ Transform (B.pack str1) (B.pack str2)

substitute = do
    char 's'
    delim <- lookAhead anyChar
    let val = many $ noneOf [delim]
    pat <- pattern (char delim) (char delim) val
    repl <- rhs delim
    fs <- flags 
    return $ Substitute (B.pack $ unesc (B.unpack pat)) (B.pack $ esc repl) fs
    where
      esc [] = []
      esc [x] | x == '&' = "\\0"
              | otherwise = [x]
      esc (x:y:ys) | [x,y] == "\\n" = '\n':esc ys
                   | [x,y] == "\\\n" = esc (y:ys)
                   | [x,y] == "\\&" = '&':esc ys
                   | x     == '&' = "\\0" ++ esc (y:ys)
                   | otherwise = x:esc(y:ys)           

      rhs delim = manyTill anyChar (char delim)

flags = do
    op <- occur
    out <- outFile
    return $ Flags op out
    where
      occur = occurPrint <|> return Nothing
      outFile = 
          (char 'w' >> spaces >> 
          manyTill anyChar (lookAhead eoleof) >>= \f -> return $ Just f) <|>
          return Nothing
      occurPrint = 
          occurrence >>= \o -> prn >>= \p ->
          return $ Just $ OccurrencePrint o p
      occurrence = 
          (char 'g' >> return (Just ReplaceAll)) <|>
          (number >>= \n -> return $ Just $ Replace n) <|>
          return Nothing
      prn = 
          (char 'p' >> return True) <|>
          return False

unesc [] = []
unesc [x] = [x]
unesc (x:y:xs) | x:[y] == "\\t" = '\t':unesc xs
               | x:[y] == "\\n" = '\n':unesc xs
               | otherwise = x : unesc (y:xs)

emptyCmd = SedCmd (Address Nothing Nothing False) EmptyCmd