From ba56bc72ece4b8b57a31196ad182c9a61a0998a9 Mon Sep 17 00:00:00 2001 From: Yuanle Song Date: Mon, 7 Sep 2015 19:44:35 +0800 Subject: [PATCH] now a2p can parse files specified in command line arguments --- apache2-log-parser.cabal | 2 ++ src/Main.hs | 62 ++++++++++++++++++++++++++++++++++++---- 2 files changed, 59 insertions(+), 5 deletions(-) diff --git a/apache2-log-parser.cabal b/apache2-log-parser.cabal index 1556532..9a6ef87 100644 --- a/apache2-log-parser.cabal +++ b/apache2-log-parser.cabal @@ -20,3 +20,5 @@ executable a2p ,attoparsec ,MissingH ,bytestring + ,optparse-applicative + ,time diff --git a/src/Main.hs b/src/Main.hs index 0f6ae39..8358dd8 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -6,9 +6,14 @@ module Main where -- represented using an 8-bit character set, e.g. ASCII or ISO-8859-15. import Data.Attoparsec.ByteString.Char8 import Data.Word -import Data.ByteString +import qualified Data.ByteString as B +import Data.ByteString (ByteString) -import Data.Bits.Utils (c2w8) +import Options.Applicative hiding (Parser) +import qualified Options.Applicative as O + +import Data.Time.Clock (getCurrentTime, diffUTCTime) +import Text.Printf (printf) data AccessLog = AccessLog { clientIp :: ByteString , datetime :: ByteString @@ -18,6 +23,10 @@ data AccessLog = AccessLog { clientIp :: ByteString , browser :: ByteString , responseTime :: Int} deriving (Show) +--------------- +-- log parsing +--------------- + tillChar :: Char -> Parser ByteString tillChar c = takeTill (== c) @@ -34,7 +43,7 @@ parseDate = do char ' ' timezone <- tillChar ']' char ']' - return $ Data.ByteString.concat [date, " ", timezone] + return $ B.concat [date, " ", timezone] parseRequest :: Parser (ByteString, ByteString) parseRequest = do @@ -76,6 +85,49 @@ parseLine = do ,browser=browser ,responseTime=responseTime} +logParser :: Parser [AccessLog] +logParser = many $ parseLine <* endOfLine + +---------------------------------- +-- command line argument handling +---------------------------------- + +data AppArguments = AppArguments { inputFile :: String } + +argumentParser :: O.Parser AppArguments +argumentParser = AppArguments + <$> strOption + (long "input" + <> short 'i' + <> metavar "ACCESS_LOG_FILE" + <> help "the access filename to parse") + +parseArguments :: IO AppArguments +parseArguments = execParser opts + where + opts = info (helper <*> argumentParser) + ( fullDesc + <> progDesc "parse apache2 access log" + <> header "a2p - parse apache2 access log") + +parseFile :: AppArguments -> IO () +parseFile args = do + let filename = inputFile args + putStrLn $ "parsing file " ++ filename + startTime <- getCurrentTime + bytes <- B.readFile filename + case (parseOnly logParser) bytes of + Left msg -> putStrLn "parse failed" + Right logs -> do + let linesParsed = length logs + finishTime <- getCurrentTime + -- duration in seconds + let duration = (round . toRational) (diffUTCTime startTime finishTime) + if duration == 0 then + printf "parsed %d lines instantly.\n" linesParsed + else + printf "parsed %d lines in %d seconds. (%d line/s)\n" + linesParsed duration (linesParsed `div` duration) + main :: IO () -main = do - print $ parseOnly parseLine "10.21.176.7 - - [06/Sep/2015:06:26:02 +0800] \"GET /v3/auth/tokens HTTP/1.1\" 200 3997 \"-\" \"-\" 1011068" +main = parseArguments >>= parseFile -- GitLab