diff --git a/apache2-log-parser.cabal b/apache2-log-parser.cabal index 64d053746faeb6dfcde8dc307a5944a9be050246..ef5c6686ac5a34d13349305063417e243ea5ac18 100644 --- a/apache2-log-parser.cabal +++ b/apache2-log-parser.cabal @@ -1,5 +1,5 @@ name: apache2-log-parser -version: 1.0.1 +version: 1.1.0 synopsis: Simple project template from stack description: Please see README.md homepage: http://github.com/githubuser/apache2-log-parser#readme diff --git a/operational b/operational new file mode 100644 index 0000000000000000000000000000000000000000..c64a9fe826dc3882db4304ee5c5f830c37811dfe --- /dev/null +++ b/operational @@ -0,0 +1,16 @@ +* COMMENT -*- mode: org -*- +#+Date: 2015-09-08 +Time-stamp: <2015-09-08> +#+STARTUP: content +* later :entry: +* current :entry: +** +** 2015-09-08 support reading input from stdin +* done :entry: +** DONE 2015-09-08 it doens't handle parse failures. +Should warn about lines that failed to parse, ignore that line and continue +parse the rest. + +Return value should be changed to failed lines [ByteString] and successfully +parsed lines [AccessLog]. Parser should have a alternative failure case. + diff --git a/src/Main.hs b/src/Main.hs index 902c1a70bd57bf07dbe36ceea761b16a534ee8c5..e61531320f7fc80c3c1fdd8f6768e00133ec8f09 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -8,10 +8,14 @@ import Data.Attoparsec.ByteString.Char8 hiding (option, take) import Data.Word import qualified Data.ByteString as B import Data.ByteString (ByteString) +import qualified Data.ByteString.Char8 as C8 import Options.Applicative hiding (Parser) import qualified Options.Applicative as O +import Data.Either (rights, lefts) +import Control.Monad (when) + import Data.Time.Clock (getCurrentTime, diffUTCTime) import Text.Printf (printf) @@ -95,8 +99,13 @@ parseLine = do ,browser=browser ,responseTime=responseTime} -logParser :: Parser [AccessLog] -logParser = many $ parseLine <* endOfLine +-- | if parse fail, return the whole line as Left. otherwise, return parsed +-- AccessLog as Right. +parseLineSafe :: Parser (Either ByteString AccessLog) +parseLineSafe = (Right <$> (parseLine <* endOfLine)) <|> (Left <$> (tillChar '\n' <* endOfLine)) + +logParser :: Parser [Either ByteString AccessLog] +logParser = many $ parseLineSafe ---------------------------------- -- command line argument handling @@ -174,7 +183,7 @@ countRequests logs = go Map.empty logs where -- list m is ordered by view count DESC. prettyPrintFrequentRequests :: Int -> [(RequestPair, Int)] -> IO () prettyPrintFrequentRequests totalRequestCount m = do - putStrLn "== most frequently accessed URLs ==" + printf "== most frequently accessed URLs (top %d) ==\n" (length m) mapM_ ppLine m where ppLine :: (RequestPair, Int) -> IO () ppLine ((verb, path), count) = do @@ -199,6 +208,13 @@ parseFile args = do else printf "parsed %d lines in %d seconds. (%d line/s)\n" linesParsed duration (linesParsed `div` duration) + -- warn about failed lines + let failures = lefts logs + when (not . null $ failures) (do + printf "== failed lines (%d) ==\n" (length failures) + mapM_ C8.putStrLn failures) + -- now show statistics on Right lines + let rlogs = rights logs -- print slow requests if -g is used. case slowRequestDuration args of Nothing -> return () @@ -207,11 +223,12 @@ parseFile args = do printf "== requests that take longer than %dus ==\n" durationMicroseconds mapM_ print - (filter (\log -> responseTime log >= durationMicroseconds) logs) + (filter (\log -> responseTime log >= durationMicroseconds) + rlogs) -- print top N most frequently viewed URL if -t is used. case topNRequest args of Nothing -> return () - Just n -> prettyPrintFrequentRequests linesParsed (take n (sortOn (Down . snd) (Map.toList (Map.filter (> minRequestCount) (countRequests logs))))) + Just n -> prettyPrintFrequentRequests linesParsed (take n (sortOn (Down . snd) (Map.toList (Map.filter (> minRequestCount) (countRequests rlogs))))) main :: IO () main = parseArguments >>= parseFile