-- | Parser defintions -- -- Defines parsers used by the I2P SAM protocol -- -- __Warning__: This function is used internally by 'Network.Anonymous.I2P' -- and using these functions directly is unsupported. The -- interface of these functions might change at any time without -- prior notice. -- module Network.Anonymous.I2P.Protocol.Parser where import Control.Applicative ((*>), (<$>), (<*), (<|>)) import qualified Data.Attoparsec.ByteString as Atto import qualified Data.Attoparsec.ByteString.Char8 as Atto8 import qualified Data.ByteString as BS import qualified Data.ByteString.Char8 as BS8 import Data.Word (Word8) import qualified Network.Anonymous.I2P.Protocol.Parser.Ast as A -- | Ascii offset representation of a double quote. doubleQuote :: Word8 doubleQuote = 34 -- | Ascii offset representation of a single quote. singleQuote :: Word8 singleQuote = 39 -- | Ascii offset representation of a backslash. backslash :: Word8 backslash = 92 -- | Ascii offset representation of an equality sign. equals :: Word8 equals = 61 -- | Parses a single- or double-quoted value, and returns all bytes within the -- value; the unescaping is beyond the scope of this function (since different -- unescaping mechanisms might be desired). -- -- Looking at the SAMv3 code on github, it appears as if the protocol is kind -- hacked together at the moment: no character escaping is performed at all, -- and no formal tokens / AST is used. -- -- So this function already goes way beyond what is required, but it cannot -- hurt to do so. quotedValue :: Atto.Parser BS.ByteString quotedValue = let quoted :: Word8 -- ^ The character used for quoting -> Atto.Parser BS.ByteString -- ^ The value inside the quotes, without the surrounding quotes quoted c = (Atto.word8 c *> escaped c <* Atto.word8 c) -- | Parses an escaped string, with an arbitrary surrounding quote type. escaped :: Word8 -> Atto.Parser BS.ByteString escaped c = BS8.concat <$> Atto8.many' -- Make sure that we eat pairs of backslashes; this will make sure -- that a string such as "\\\\" is interpreted correctly, and the -- ending quoted will not be interpreted as escaped. ( Atto8.string (BS8.pack "\\\\") -- This eats all escaped quotes and leaves them in tact; the unescaping -- is beyond the scope of this function. <|> Atto8.string (BS.pack [backslash, c]) -- And for the rest: eat everything that is not a quote. <|> (BS.singleton <$> Atto.satisfy (/= c))) in quoted doubleQuote <|> quoted singleQuote -- | An unquoted value is "everything until a whitespace or newline is reached". -- This is pretty broad, but the SAM implementation in I2P just uses a strtok, -- and is quite hackish. unquotedValue :: Atto.Parser BS.ByteString unquotedValue = Atto8.takeWhile1 (not . Atto8.isSpace) -- | Parses either a quoted value or an unquoted value value :: Atto.Parser BS.ByteString value = quotedValue <|> unquotedValue -- | Parses key and value keyValue :: Atto.Parser A.Token keyValue = do A.Token k _ <- key _ <- Atto.word8 equals v <- value return (A.Token k (Just v)) -- | Parses a key, which, after studying the SAMv3 code, is anything until either -- a space has been reached, or an '=' is reached. key :: Atto.Parser A.Token key = let isKeyEnd '=' = True isKeyEnd c = Atto8.isSpace c in flip A.Token Nothing <$> Atto8.takeWhile1 (not . isKeyEnd) -- | A Token is either a Key or a Key/Value combination. token :: Atto.Parser A.Token token = Atto.skipWhile Atto8.isHorizontalSpace *> (keyValue <|> key) -- | Parser that reads keys or key/values tokens :: Atto.Parser [A.Token] tokens = Atto.many' token -- | A generic parser that reads a whole line of key/values and ends in a newline line :: Atto.Parser A.Line line = tokens <* Atto8.endOfLine