From 9f4151aa839c6f4752bf87dc1801514ab41e979a Mon Sep 17 00:00:00 2001
From: Yuanle Song <sylecn@gmail.com>
Date: Tue, 8 Sep 2015 11:49:07 +0800
Subject: [PATCH] v1.1.0 support ignoring lines that can't be parsed.

- small tweak on output guidelines
---
 apache2-log-parser.cabal |  2 +-
 operational              | 16 ++++++++++++++++
 src/Main.hs              | 27 ++++++++++++++++++++++-----
 3 files changed, 39 insertions(+), 6 deletions(-)
 create mode 100644 operational

diff --git a/apache2-log-parser.cabal b/apache2-log-parser.cabal
index 64d0537..ef5c668 100644
--- a/apache2-log-parser.cabal
+++ b/apache2-log-parser.cabal
@@ -1,5 +1,5 @@
 name:                apache2-log-parser
-version:             1.0.1
+version:             1.1.0
 synopsis:            Simple project template from stack
 description:         Please see README.md
 homepage:            http://github.com/githubuser/apache2-log-parser#readme
diff --git a/operational b/operational
new file mode 100644
index 0000000..c64a9fe
--- /dev/null
+++ b/operational
@@ -0,0 +1,16 @@
+* COMMENT -*- mode: org -*-
+#+Date: 2015-09-08
+Time-stamp: <2015-09-08>
+#+STARTUP: content
+* later								      :entry:
+* current							      :entry:
+** 
+** 2015-09-08 support reading input from stdin
+* done								      :entry:
+** DONE 2015-09-08 it doens't handle parse failures.
+Should warn about lines that failed to parse, ignore that line and continue
+parse the rest.
+
+Return value should be changed to failed lines [ByteString] and successfully
+parsed lines [AccessLog]. Parser should have a alternative failure case.
+
diff --git a/src/Main.hs b/src/Main.hs
index 902c1a7..e615313 100644
--- a/src/Main.hs
+++ b/src/Main.hs
@@ -8,10 +8,14 @@ import Data.Attoparsec.ByteString.Char8 hiding (option, take)
 import Data.Word
 import qualified Data.ByteString as B
 import Data.ByteString (ByteString)
+import qualified Data.ByteString.Char8 as C8
 
 import Options.Applicative hiding (Parser)
 import qualified Options.Applicative as O
 
+import Data.Either (rights, lefts)
+import Control.Monad (when)
+
 import Data.Time.Clock (getCurrentTime, diffUTCTime)
 import Text.Printf (printf)
 
@@ -95,8 +99,13 @@ parseLine = do
                      ,browser=browser
                      ,responseTime=responseTime}
 
-logParser :: Parser [AccessLog]
-logParser = many $ parseLine <* endOfLine
+-- | if parse fail, return the whole line as Left. otherwise, return parsed
+-- AccessLog as Right.
+parseLineSafe :: Parser (Either ByteString AccessLog)
+parseLineSafe = (Right <$> (parseLine <* endOfLine)) <|> (Left <$> (tillChar '\n' <* endOfLine))
+
+logParser :: Parser [Either ByteString AccessLog]
+logParser = many $ parseLineSafe
 
 ----------------------------------
 -- command line argument handling
@@ -174,7 +183,7 @@ countRequests logs = go Map.empty logs where
 -- list m is ordered by view count DESC.
 prettyPrintFrequentRequests :: Int -> [(RequestPair, Int)] -> IO ()
 prettyPrintFrequentRequests totalRequestCount m = do
-  putStrLn "== most frequently accessed URLs =="
+  printf "== most frequently accessed URLs (top %d) ==\n" (length m)
   mapM_ ppLine m where
     ppLine :: (RequestPair, Int) -> IO ()
     ppLine ((verb, path), count) = do
@@ -199,6 +208,13 @@ parseFile args = do
       else
           printf "parsed %d lines in %d seconds. (%d line/s)\n"
                  linesParsed duration (linesParsed `div` duration)
+      -- warn about failed lines
+      let failures = lefts logs
+      when (not . null $ failures) (do
+        printf "== failed lines (%d) ==\n" (length failures)
+        mapM_ C8.putStrLn failures)
+      -- now show statistics on Right lines
+      let rlogs = rights logs
       -- print slow requests if -g is used.
       case slowRequestDuration args of
         Nothing -> return ()
@@ -207,11 +223,12 @@ parseFile args = do
           printf "== requests that take longer than %dus ==\n"
                  durationMicroseconds
           mapM_ print
-                (filter (\log -> responseTime log >= durationMicroseconds) logs)
+                (filter (\log -> responseTime log >= durationMicroseconds)
+                        rlogs)
       -- print top N most frequently viewed URL if -t is used.
       case topNRequest args of
         Nothing -> return ()
-        Just n -> prettyPrintFrequentRequests linesParsed (take n (sortOn (Down . snd) (Map.toList (Map.filter (> minRequestCount) (countRequests logs)))))
+        Just n -> prettyPrintFrequentRequests linesParsed (take n (sortOn (Down . snd) (Map.toList (Map.filter (> minRequestCount) (countRequests rlogs)))))
 
 main :: IO ()
 main = parseArguments >>= parseFile
-- 
GitLab