{- LogFormat.hs Copyright (C) 2012 Harold Lee This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . -} {-# LANGUAGE NoMonomorphismRestriction #-} {- | LogFormat is a Haskell module that makes it trivial to parse access log records. -} module Text.LogFormat where import Data.Map import Text.Parsec -- | Parser a is a Parsec parser for Strings that parses an 'a'. type Parser a = Parsec String () a -- | A LogFormat string is made up of literal strings (which must match -- exactly) and % directives that match a certain pattern and can have -- an optional modifier string. data Rule = Literal String | Keyword Char (Maybe String) deriving Show logFormatParser :: String -> Either ParseError (Parser (Map String String)) logFormatParser logFormat = parse rulesParser parserName logFormat where rulesParser = do rules <- logFormatSpecParser return $ buildLogRecordParser rules parserName = "Parsing LogFormat [" ++ logFormat ++ "]" {- Tokenize the LogFormat string that specifies the grammar for log records into a list of Rules. -} logFormatSpecParser = do rules <- many1 (rule <|> literal) return $ combineLiterals rules combineLiterals [] = [] combineLiterals (Literal l1 : Literal l2 : rs) = combineLiterals $ Literal (l1 ++ l2) : rs combineLiterals (r:rs) = r : combineLiterals rs -- Parser for a single % rule in the LogFormat string, including %%. rule = try simpleRule <|> try literalRule <|> try sRule <|> iRule simpleRule = do char '%' format <- oneOf "aABbCDefhHlmnopPqrtTuUvVXIO" return $ Keyword format Nothing literalRule = do string "%%" return $ Literal "%" sRule = do char '%' mod <- optionMaybe $ string ">" char 's' return $ Keyword 's' mod iRule = do char '%' mod <- optionMaybe $ between (char '{') (char '}') (many $ alphaNum <|> char '-') char 'i' return $ Keyword 'i' mod literal = do str <- many1 $ noneOf "%" return $ Literal str buildLogRecordParser :: [Rule] -> Parser (Map String String) buildLogRecordParser rules = Prelude.foldr combiner eolParser rules where eolParser = do newline return empty combiner (Keyword 'i' mod) followingParser = headerParser mod followingParser combiner rule followingParser = do m1 <- parserFor rule m2 <- followingParser return $ union m1 m2 headerParser mod followingParser = do value <- manyTill anyChar (lookAhead (try followingParser)) rest <- followingParser return $ insert key value rest where key = case mod of Nothing -> "header" Just m -> "header:" ++ m -- | Take a parser and convert it to parse a Map instead of just a value. keyValueParser :: a -> Parser b -> Parser (Map a b) keyValueParser key parser = do value <- parser return $ singleton key value concatParser :: String -> Parser (String -> String -> String) concatParser sepStr = do value <- string sepStr return (\a b -> a ++ sepStr ++ b) -- | Parser for IP addresses ipParser :: Parser String ipParser = chainl1 (many1 digit) (concatParser ".") -- | Parser for hostnames hostnameParser :: Parser String hostnameParser = chainl1 (many1 alphaNum) (concatParser ".") digits = many1 digit -- | Build a parser for a 'Rule'. -- -- For 'Keyword' 'Rule's: -- -- Take a character that is used to define a field in the LogFormat -- specification and return a 'Parser' that will parse out a key-value -- for that field from the input. For example, %U in a LogFormat means -- the URL path, so a URL path parser is available as -- -- @ -- parserFor (Keyword \'U\' Nothing) -- @ parserFor :: Rule -> Parser (Map String String) -- Build a parser that matches an exact string literal and returns Nothing. parserFor (Literal lit) = do string lit return empty -- The URL path requested, not including any query string. parserFor (Keyword 'U' Nothing) = keyValueParser "path" (many1 $ alphaNum <|> char '/') -- The request method parserFor (Keyword 'm' Nothing) = keyValueParser "method" $ (many1 $ oneOf ['A'..'Z']) -- The process ID or thread id of the child that serviced the request. parserFor (Keyword 'P' Nothing) = keyValueParser "processId" digits -- The time taken to serve the request, in seconds. parserFor (Keyword 'T' Nothing) = keyValueParser "timeTakenSeconds" digits -- The time taken to serve the request, in microseconds. parserFor (Keyword 'D' Nothing) = keyValueParser "timeTakenMicroseconds" digits -- Size of response in bytes, excluding HTTP headers. parserFor (Keyword 'B' Nothing) = keyValueParser "bytes" $ digits -- Size of response in bytes, excluding HTTP headers. -- In CLF format, i.e. a '-' rather than a 0 when no bytes are sent. parserFor (Keyword 'b' Nothing) = keyValueParser "bytesCLF" valueParser where valueParser = digits <|> (string "-") -- Remote IP-address parserFor (Keyword 'a' Nothing) = keyValueParser "remoteIP" ipParser -- Local IP-address parserFor (Keyword 'A' Nothing) = keyValueParser "localIP" ipParser -- The query string (prepended with a ? if a query string exists, otherwise an empty string) parserFor (Keyword 'q' Nothing) = do value <- (string "") <|> queryStringParser return $ singleton "queryString" value where queryStringParser = do char '?' qs <- many1 $ alphaNum <|> char '&' <|> char '=' return $ "?" ++ qs -- Status. -- For requests that got internally redirected, this is the status of the *original* request, -- %...>s for the last. parserFor (Keyword 's' mod) = keyValueParser (format mod) (many1 alphaNum) where format Nothing = "statusOriginal" format (Just ">") = "statusLast" -- Remote host parserFor (Keyword 'h' Nothing) = keyValueParser "remoteHost" hostnameParser -- The canonical ServerName of the server serving the request. parserFor (Keyword 'v' Nothing) = keyValueParser "canonicalServerName" hostnameParser -- The server name according to the UseCanonicalName setting. parserFor (Keyword 'V' Nothing) = keyValueParser "serverName" hostnameParser -- Next up: l t r