{-# LANGUAGE LambdaCase #-} {-# LANGUAGE OverloadedStrings #-} module Df1.Parse ( parse ) where import Control.Applicative ((<|>), many, empty) import Data.Bits (shiftL) import qualified Data.Sequence as Seq import qualified Data.ByteString as B import qualified Data.ByteString.Lazy as BL import Data.Function (fix) import Data.Functor (($>)) import qualified Data.Attoparsec.ByteString as AB import qualified Data.Attoparsec.ByteString.Lazy as ABL import qualified Data.Text.Lazy as TL import qualified Data.Text.Lazy.Encoding as TL import qualified Data.Time as Time import qualified Data.Time.Clock.System as Time import Data.Word (Word8, Word16, Word32) import Df1.Types (Log(Log, log_time, log_level, log_path, log_message), Level(Debug, Info, Notice, Warning, Error, Critical, Alert, Emergency), Path(Attr, Push), Segment, segment, Key, key, Value, value, Message, message) -------------------------------------------------------------------------------- -- | If sucessful, parsing will stop after the first CR or LF newline marker if -- any, otherwise it will consume all input. parse :: AB.Parser Log {-# INLINABLE parse #-} parse = (AB. "parse") $ do t <- AB.skipWhile (== 32) *> pIso8601 -- :space: p <- AB.skipWhile (== 32) *> pPath l <- AB.skipWhile (== 32) *> pLevel m <- AB.skip (== 32) *> pMessage pure (Log { log_time = Time.utcToSystemTime t , log_level = l, log_path = p, log_message = m }) pIso8601 :: AB.Parser Time.UTCTime {-# INLINABLE pIso8601 #-} pIso8601 = (AB. "pIso8601") $ do year <- (pNum4Digits AB. "year") <* (AB.skip (== 45) AB. "-") month <- (pNum2Digits AB. "month") <* (AB.skip (== 45) AB. "-") day <- (pNum2Digits AB. "day") <* (AB.skip (== 84) AB. "T") Just tday <- pure (Time.fromGregorianValid (fromIntegral year) (fromIntegral month) (fromIntegral day)) hour <- (pNum2Digits AB. "hour") <* (AB.skip (== 58) AB. ":") min' <- (pNum2Digits AB. "minute") <* (AB.skip (== 58) AB. ":") sec <- (pNum2Digits AB. "second") <* (AB.skip (== 46) AB. ".") nsec <- (pNum9Digits AB. "nanosecond") <* (AB.skip (== 90) AB. "Z") Just ttod <- pure (Time.makeTimeOfDayValid (fromIntegral hour) (fromIntegral min') (fromIntegral sec + (fromIntegral nsec / 1000000000))) pure (Time.UTCTime tday (Time.timeOfDayToTime ttod)) pNum1Digit :: AB.Parser Word8 {-# INLINE pNum1Digit #-} pNum1Digit = AB.satisfyWith (subtract 48) (< 10) AB. "pNum1Digit" pNum2Digits :: AB.Parser Word8 {-# INLINE pNum2Digits #-} pNum2Digits = (AB. "pNum2Digits") $ do (+) <$> fmap (* 10) pNum1Digit <*> pNum1Digit pNum4Digits :: AB.Parser Word16 {-# INLINE pNum4Digits #-} pNum4Digits = (AB. "pNum4Digits") $ do (\a b c d -> a + b + c + d) <$> fmap ((* 1000) . fromIntegral) pNum1Digit <*> fmap ((* 100) . fromIntegral) pNum1Digit <*> fmap ((* 10) . fromIntegral) pNum1Digit <*> fmap fromIntegral pNum1Digit pNum9Digits :: AB.Parser Word32 {-# INLINE pNum9Digits #-} pNum9Digits = (AB. "pNum9Digits") $ do (\a b c d e f g h i -> a + b + c + d + e + f + g + h + i) <$> fmap ((* 100000000) . fromIntegral) pNum1Digit <*> fmap ((* 10000000) . fromIntegral) pNum1Digit <*> fmap ((* 1000000) . fromIntegral) pNum1Digit <*> fmap ((* 100000) . fromIntegral) pNum1Digit <*> fmap ((* 10000) . fromIntegral) pNum1Digit <*> fmap ((* 1000) . fromIntegral) pNum1Digit <*> fmap ((* 100) . fromIntegral) pNum1Digit <*> fmap ((* 10) . fromIntegral) pNum1Digit <*> fmap fromIntegral pNum1Digit pLevel :: AB.Parser Level {-# INLINE pLevel #-} pLevel = (AB. "pLevel") -- In decreasing frequency we expect logs to happen. -- We expect 'Debug' to mostly be muted, so 'Info' is prefered. (AB.string "INFO" $> Info) <|> (AB.string "DEBUG" $> Debug) <|> (AB.string "NOTICE" $> Notice) <|> (AB.string "WARNING" $> Warning) <|> (AB.string "ERROR" $> Error) <|> (AB.string "CRITICAL" $> Critical) <|> (AB.string "ALERT" $> Alert) <|> (AB.string "EMERGENCY" $> Emergency) pPath :: AB.Parser (Seq.Seq Path) {-# INLINABLE pPath #-} pPath = (AB. "pPath") $ do fix (\k ps -> ((pPush <|> pAttr) >>= \p -> k (ps Seq.|> p)) <|> pure ps) mempty where {-# INLINE pPush #-} pPush :: AB.Parser Path pPush = (AB. "pPush") $ do seg <- pSegment <* AB.skipWhile (== 32) pure (Push seg) {-# INLINE pAttr #-} pAttr :: AB.Parser Path pAttr = do k <- pKey <* AB.skip (== 61) v <- pValue <* AB.skipWhile (== 32) pure (Attr k v) pSegment :: AB.Parser Segment pSegment = (AB. "pSegment") $ do AB.skip (== 47) AB. "/" bl <- pUtf8LtoL =<< pDecodePercents =<< AB.takeWhile (/= 32) -- :space: pure (segment bl) pKey :: AB.Parser Key pKey = (AB. "pKey") $ do bl <- pUtf8LtoL =<< pDecodePercents =<< AB.takeWhile (\w -> w /= 61 && w /= 32) -- '=' or :space: pure (key bl) pValue :: AB.Parser Value pValue = (AB. "pValue") $ do bl <- pUtf8LtoL =<< pDecodePercents =<< AB.takeWhile (/= 32) -- :space: pure (value bl) pMessage :: AB.Parser Message {-# INLINE pMessage #-} pMessage = (AB. "pMessage") $ do b <- AB.takeWhile (\w -> w /= 10 && w /= 13) -- CR and LF tl <- pUtf8LtoL =<< pDecodePercents b pure (message tl) pUtf8LtoL :: BL.ByteString -> AB.Parser TL.Text {-# INLINE pUtf8LtoL #-} pUtf8LtoL = \bl -> case TL.decodeUtf8' bl of Right x -> pure x Left e -> fail (show e) AB. "pUtf8LtoL" -- | Parse @\"%FF\"@. Always consumes 3 bytes from the input, if successful. pNumPercent :: AB.Parser Word8 {-# INLINE pNumPercent #-} pNumPercent = (AB. "pNum2Nibbles") $ do AB.skip (== 37) -- percent wh <- pHexDigit wl <- pHexDigit pure (shiftL wh 4 + wl) pHexDigit :: AB.Parser Word8 {-# INLINE pHexDigit #-} pHexDigit = AB.satisfyWith (\case w | w >= 48 && w <= 57 -> w - 48 | w >= 65 && w <= 70 -> w - 55 | w >= 97 && w <= 102 -> w - 87 | otherwise -> 99) (\w -> w /= 99) -- | Like 'pDecodePercentsL' but takes strict bytes. pDecodePercents :: B.ByteString -> AB.Parser BL.ByteString {-# INLINE pDecodePercents #-} pDecodePercents = pDecodePercentsL . BL.fromStrict -- | Decodes all 'pNumPercent' occurences from the given input. -- -- TODO: Make faster and more space efficient. pDecodePercentsL :: BL.ByteString -> AB.Parser BL.ByteString {-# INLINABLE pDecodePercentsL #-} pDecodePercentsL = \bl -> either fail pure (ABL.eitherResult (ABL.parse p bl)) where p :: AB.Parser BL.ByteString p = AB.atEnd >>= \case True -> pure mempty False -> fix $ \k -> do b <- AB.peekWord8 >>= \case Nothing -> empty Just 37 -> fmap B.singleton pNumPercent Just _ -> AB.takeWhile1 (\w -> w /= 37) bls <- many k <* AB.endOfInput pure (mconcat (BL.fromStrict b : bls))