{-# LANGUAGE OverloadedStrings #-}

module Main where

-- This attoparsec module is intended for parsing text that is
-- represented using an 8-bit character set, e.g. ASCII or ISO-8859-15.
import Data.Attoparsec.ByteString.Char8
import Data.Word
import qualified Data.ByteString as B
import Data.ByteString (ByteString)

import Options.Applicative hiding (Parser)
import qualified Options.Applicative as O

import Data.Time.Clock (getCurrentTime, diffUTCTime)
import Text.Printf (printf)

data AccessLog = AccessLog { clientIp :: ByteString
                           , datetime :: ByteString
                           , requestVerb :: ByteString
                           , requestPath :: ByteString
                           , statusCode :: Int
                           , browser :: ByteString
                           , responseTime :: Int} deriving (Show)

---------------
-- log parsing
---------------

tillChar :: Char -> Parser ByteString
tillChar c = takeTill (== c)

nonEmpty :: Parser ByteString
nonEmpty = tillChar ' '

parseIP :: Parser ByteString
parseIP = nonEmpty

parseDate :: Parser ByteString
parseDate = do
  char '['
  date <- nonEmpty
  char ' '
  timezone <- tillChar ']'
  char ']'
  return $ B.concat [date, " ", timezone]

parseRequest :: Parser (ByteString, ByteString)
parseRequest = do
  char '"'
  verb <- nonEmpty
  char ' '
  path <- nonEmpty
  char ' '
  nonEmpty
  return (verb, path)

parseBrowserInfo :: Parser ByteString
parseBrowserInfo = do
  char '"'
  result <- tillChar '"'
  char '"'
  return result

parseLine :: Parser AccessLog
parseLine = do
  clientIp <- parseIP
  string " - - "
  date <- parseDate
  char ' '
  (requestVerb, requestPath) <- parseRequest
  char ' '
  statusCode <- decimal
  char ' '
  responseSize <- decimal
  string " \"-\" "
  browser <- parseBrowserInfo
  char ' '
  responseTime <- decimal
  return $ AccessLog {clientIp=clientIp
                     ,datetime=date
                     ,requestVerb=requestVerb
                     ,requestPath=requestPath
                     ,statusCode=statusCode
                     ,browser=browser
                     ,responseTime=responseTime}

logParser :: Parser [AccessLog]
logParser = many $ parseLine <* endOfLine

----------------------------------
-- command line argument handling
----------------------------------

data AppArguments = AppArguments { inputFile :: String }

argumentParser :: O.Parser AppArguments
argumentParser = AppArguments
                 <$> strOption
                     (long "input"
                      <> short 'i'
                      <> metavar "ACCESS_LOG_FILE"
                      <> help "the access filename to parse")

parseArguments :: IO AppArguments
parseArguments = execParser opts
    where
      opts = info (helper <*> argumentParser)
             ( fullDesc
               <> progDesc "parse apache2 access log"
               <> header "a2p - parse apache2 access log")

parseFile :: AppArguments -> IO ()
parseFile args = do
  let filename = inputFile args
  putStrLn $ "parsing file " ++ filename
  startTime <- getCurrentTime
  bytes <- B.readFile filename
  case (parseOnly logParser) bytes of
    Left msg -> putStrLn "parse failed"
    Right logs -> do
      let linesParsed = length logs
      finishTime <- getCurrentTime
      -- duration in seconds
      let duration = (round . toRational) (diffUTCTime startTime finishTime)
      if duration == 0 then
          printf "parsed %d lines instantly.\n" linesParsed
      else
          printf "parsed %d lines in %d seconds. (%d line/s)\n"
                 linesParsed duration (linesParsed `div` duration)

main :: IO ()
main = parseArguments >>= parseFile
