module Language.Docker.Parser ( parseText, parseFile, parseStdin, Parser, Error, DockerfileError (..), ) where import qualified Data.ByteString as B import qualified Data.Text as T import qualified Data.Text.Encoding as E import qualified Data.Text.Encoding.Error as E import Language.Docker.Parser.Instruction (parseInstruction, parseComment) import Language.Docker.Parser.Prelude import Language.Docker.Syntax contents :: Parser a -> Parser a contents p = do void onlyWhitespaces r <- p eof return r dockerfile :: (?esc :: Char) => Parser Dockerfile dockerfile = many $ do pos <- getSourcePos i <- parseInstruction eol <|> eof "a new line followed by the next instruction" return $ InstructionPos i (T.pack . sourceName $ pos) (unPos . sourceLine $ pos) parseText :: Text -> Either Error Dockerfile parseText txt = do let ?esc = findEscapePragma (T.lines (dos2unix txt)) in parse (contents dockerfile) "" (dos2unix txt) parseFile :: FilePath -> IO (Either Error Dockerfile) parseFile file = doParse file <$> B.readFile file -- | Reads the standard input until the end and parses the contents as a Dockerfile parseStdin :: IO (Either Error Dockerfile) parseStdin = doParse "/dev/stdin" <$> B.getContents -- | Parses a list of lines from a dockerfile one by one until either the escape -- | pragma has been found, or pragmas are no longer expected. -- | Pragmas can occur only until a comment, an empty line or another -- | instruction occurs (i.e. they have to be the first lines of a Dockerfile). findEscapePragma :: [Text] -> Char findEscapePragma [] = defaultEsc findEscapePragma (l:ls) = case parse (contents parseComment) "" l of Left _ -> defaultEsc Right (Pragma (Escape (EscapeChar c))) -> c Right (Pragma _) -> findEscapePragma ls Right _ -> defaultEsc where ?esc = defaultEsc doParse :: FilePath -> B.ByteString -> Either Error Dockerfile doParse path txt = do let ?esc = findEscapePragma (T.lines src) in parse (contents dockerfile) path src where src = case B.take 4 txt of "\255\254\NUL\NUL" -> dos2unix (E.decodeUtf32LEWith E.lenientDecode $ B.drop 4 txt) "\NUL\NUL\254\255" -> dos2unix (E.decodeUtf32BEWith E.lenientDecode $ B.drop 4 txt) _ -> case B.take 2 txt of "\255\254" -> dos2unix (E.decodeUtf16LEWith E.lenientDecode $ B.drop 2 txt) "\254\255" -> dos2unix (E.decodeUtf16BEWith E.lenientDecode $ B.drop 2 txt) _ -> case B.take 3 txt of "\239\187\191" -> dos2unix (E.decodeUtf8With E.lenientDecode $ B.drop 3 txt) _ -> dos2unix (E.decodeUtf8With E.lenientDecode txt) -- | Changes crlf line endings to simple line endings dos2unix :: T.Text -> T.Text dos2unix = T.replace "\r\n" "\n"