From 148e5fba019afd0a36f47a4e0d0f83a41e10bc11 Mon Sep 17 00:00:00 2001 From: Yuanle Song Date: Mon, 7 Sep 2015 22:16:26 +0800 Subject: [PATCH] support print top N most frequently accessed request --- apache2-log-parser.cabal | 1 + src/Main.hs | 43 ++++++++++++++++++++++++++++++++++++++-- 2 files changed, 42 insertions(+), 2 deletions(-) diff --git a/apache2-log-parser.cabal b/apache2-log-parser.cabal index 9a6ef87..8341330 100644 --- a/apache2-log-parser.cabal +++ b/apache2-log-parser.cabal @@ -22,3 +22,4 @@ executable a2p ,bytestring ,optparse-applicative ,time + ,containers diff --git a/src/Main.hs b/src/Main.hs index 258a3c1..df2cbf1 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -4,7 +4,7 @@ module Main where -- This attoparsec module is intended for parsing text that is -- represented using an 8-bit character set, e.g. ASCII or ISO-8859-15. -import Data.Attoparsec.ByteString.Char8 +import Data.Attoparsec.ByteString.Char8 hiding (option, take) import Data.Word import qualified Data.ByteString as B import Data.ByteString (ByteString) @@ -17,6 +17,11 @@ import Text.Printf (printf) import Data.ByteString.Char8 (pack, unpack) +import qualified Data.Map.Strict as Map +import Data.Map.Strict (Map) +import Data.List (sortOn) +import Data.Ord (Down(..)) + data AccessLog = AccessLog { clientIp :: ByteString , datetime :: ByteString , requestVerb :: ByteString @@ -98,7 +103,12 @@ logParser = many $ parseLine <* endOfLine ---------------------------------- data AppArguments = AppArguments { inputFile :: String - , slowRequestDuration :: Maybe String} + , slowRequestDuration :: Maybe String + , topNRequest :: Maybe Int} + +-- | minimum request count a request must have to be included in --top +-- results. +minRequestCount = 5 argumentParser :: O.Parser AppArguments argumentParser = AppArguments @@ -112,6 +122,11 @@ argumentParser = AppArguments <> short 'g' <> metavar "DURATION" <> help "Print requests that take longer than DURATION, duration could have a suffix of us, ms, s. Default suffix is s.")) + <*> (optional $ option auto + (long "top" + <> short 't' + <> metavar "N" + <> help ("Print top N URL that is viewed most frequently. Omit URL that is accessed less than " ++ show minRequestCount ++ " times."))) parseArguments :: IO AppArguments parseArguments = execParser opts @@ -144,6 +159,26 @@ toMicroseconds duration = unitValue "s" = 1000000 unitValue "" = 1000000 +type RequestPair = (ByteString, ByteString) + +countRequests :: [AccessLog] -> Map RequestPair Int +countRequests logs = go Map.empty logs where + go :: Map RequestPair Int -> [AccessLog] -> Map RequestPair Int + go m (x:xs) = let key = (requestVerb x, requestPath x) in + case Map.lookup key m of + Nothing -> go (Map.insert key 1 m) xs + Just v -> go (Map.adjust (+1) key m) xs + go m [] = m + +-- | print a top URLs report for user. +-- list m is ordered by view count DESC. +prettyPrintFrequentRequests :: Int -> [(RequestPair, Int)] -> IO () +prettyPrintFrequentRequests totalRequestCount m = mapM_ ppLine m where + ppLine :: (RequestPair, Int) -> IO () + ppLine ((verb, path), count) = do + let percentage = (fromIntegral count :: Double) * 100.0 / (fromIntegral totalRequestCount :: Double) + printf "%.1f%%\t%d\t%s %s\n" percentage count (unpack verb) (unpack path) + parseFile :: AppArguments -> IO () parseFile args = do let filename = inputFile args @@ -171,6 +206,10 @@ parseFile args = do durationMicroseconds mapM_ print (filter (\log -> responseTime log >= durationMicroseconds) logs) + -- print top N most frequently viewed URL if -t is used. + case topNRequest args of + Nothing -> return () + Just n -> prettyPrintFrequentRequests linesParsed (take n (sortOn (Down . snd) (Map.toList (Map.filter (> minRequestCount) (countRequests logs))))) main :: IO () main = parseArguments >>= parseFile -- GitLab