diff --git a/apache2-log-parser.cabal b/apache2-log-parser.cabal index 15565322038420c1681b028066b083610b42912b..9a6ef87fc039f109b3e7f3a7cb0ccf73284e3364 100644 --- a/apache2-log-parser.cabal +++ b/apache2-log-parser.cabal @@ -20,3 +20,5 @@ executable a2p ,attoparsec ,MissingH ,bytestring + ,optparse-applicative + ,time diff --git a/src/Main.hs b/src/Main.hs index 0f6ae39190663b26051670c83f4db7c328de5eb2..8358dd87e6e9a702e7ef6ba145a5c7c467f3cfd8 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -6,9 +6,14 @@ module Main where -- represented using an 8-bit character set, e.g. ASCII or ISO-8859-15. import Data.Attoparsec.ByteString.Char8 import Data.Word -import Data.ByteString +import qualified Data.ByteString as B +import Data.ByteString (ByteString) -import Data.Bits.Utils (c2w8) +import Options.Applicative hiding (Parser) +import qualified Options.Applicative as O + +import Data.Time.Clock (getCurrentTime, diffUTCTime) +import Text.Printf (printf) data AccessLog = AccessLog { clientIp :: ByteString , datetime :: ByteString @@ -18,6 +23,10 @@ data AccessLog = AccessLog { clientIp :: ByteString , browser :: ByteString , responseTime :: Int} deriving (Show) +--------------- +-- log parsing +--------------- + tillChar :: Char -> Parser ByteString tillChar c = takeTill (== c) @@ -34,7 +43,7 @@ parseDate = do char ' ' timezone <- tillChar ']' char ']' - return $ Data.ByteString.concat [date, " ", timezone] + return $ B.concat [date, " ", timezone] parseRequest :: Parser (ByteString, ByteString) parseRequest = do @@ -76,6 +85,49 @@ parseLine = do ,browser=browser ,responseTime=responseTime} +logParser :: Parser [AccessLog] +logParser = many $ parseLine <* endOfLine + +---------------------------------- +-- command line argument handling +---------------------------------- + +data AppArguments = AppArguments { inputFile :: String } + +argumentParser :: O.Parser AppArguments +argumentParser = AppArguments + <$> strOption + (long "input" + <> short 'i' + <> metavar "ACCESS_LOG_FILE" + <> help "the access filename to parse") + +parseArguments :: IO AppArguments +parseArguments = execParser opts + where + opts = info (helper <*> argumentParser) + ( fullDesc + <> progDesc "parse apache2 access log" + <> header "a2p - parse apache2 access log") + +parseFile :: AppArguments -> IO () +parseFile args = do + let filename = inputFile args + putStrLn $ "parsing file " ++ filename + startTime <- getCurrentTime + bytes <- B.readFile filename + case (parseOnly logParser) bytes of + Left msg -> putStrLn "parse failed" + Right logs -> do + let linesParsed = length logs + finishTime <- getCurrentTime + -- duration in seconds + let duration = (round . toRational) (diffUTCTime startTime finishTime) + if duration == 0 then + printf "parsed %d lines instantly.\n" linesParsed + else + printf "parsed %d lines in %d seconds. (%d line/s)\n" + linesParsed duration (linesParsed `div` duration) + main :: IO () -main = do - print $ parseOnly parseLine "10.21.176.7 - - [06/Sep/2015:06:26:02 +0800] \"GET /v3/auth/tokens HTTP/1.1\" 200 3997 \"-\" \"-\" 1011068" +main = parseArguments >>= parseFile