{-# LANGUAGE OverloadedStrings #-}

module Main where

-- This attoparsec module is intended for parsing text that is
-- represented using an 8-bit character set, e.g. ASCII or ISO-8859-15.
import Data.Attoparsec.ByteString.Char8 hiding (option, take)
import Data.Word
import qualified Data.ByteString as B
import Data.ByteString (ByteString)

import Options.Applicative hiding (Parser)
import qualified Options.Applicative as O

import Data.Time.Clock (getCurrentTime, diffUTCTime)
import Text.Printf (printf)

import Data.ByteString.Char8 (pack, unpack)

import qualified Data.Map.Strict as Map
import Data.Map.Strict (Map)
import Data.List (sortOn)
import Data.Ord (Down(..))

data AccessLog = AccessLog { clientIp :: ByteString
                           , datetime :: ByteString
                           , requestVerb :: ByteString
                           , requestPath :: ByteString
                           , statusCode :: Int
                           , browser :: ByteString
                           , responseTime :: Int}

instance Show AccessLog where
    show (AccessLog clientIp datetime requestVerb requestPath statusCode browser responseTime) = printf "%s [%s] \"%s %s\" %s %s %s" (unpack clientIp) (unpack datetime) (unpack requestVerb) (unpack requestPath) (show statusCode) (unpack browser) (show responseTime)

---------------
-- log parsing
---------------

tillChar :: Char -> Parser ByteString
tillChar c = takeTill (== c)

nonEmpty :: Parser ByteString
nonEmpty = tillChar ' '

parseIP :: Parser ByteString
parseIP = nonEmpty

parseDate :: Parser ByteString
parseDate = do
  char '['
  date <- nonEmpty
  char ' '
  timezone <- tillChar ']'
  char ']'
  return $ B.concat [date, " ", timezone]

parseRequest :: Parser (ByteString, ByteString)
parseRequest = do
  char '"'
  verb <- nonEmpty
  char ' '
  path <- nonEmpty
  char ' '
  nonEmpty
  return (verb, path)

parseBrowserInfo :: Parser ByteString
parseBrowserInfo = do
  char '"'
  result <- tillChar '"'
  char '"'
  return result

parseLine :: Parser AccessLog
parseLine = do
  clientIp <- parseIP
  string " - - "
  date <- parseDate
  char ' '
  (requestVerb, requestPath) <- parseRequest
  char ' '
  statusCode <- decimal
  char ' '
  responseSize <- decimal
  string " \"-\" "
  browser <- parseBrowserInfo
  char ' '
  responseTime <- decimal
  return $ AccessLog {clientIp=clientIp
                     ,datetime=date
                     ,requestVerb=requestVerb
                     ,requestPath=requestPath
                     ,statusCode=statusCode
                     ,browser=browser
                     ,responseTime=responseTime}

logParser :: Parser [AccessLog]
logParser = many $ parseLine <* endOfLine

----------------------------------
-- command line argument handling
----------------------------------

data AppArguments = AppArguments { inputFile :: String
                                 , slowRequestDuration :: Maybe String
                                 , topNRequest :: Maybe Int}

-- | minimum request count a request must have to be included in --top
-- results.
minRequestCount = 5

argumentParser :: O.Parser AppArguments
argumentParser = AppArguments
                 <$> strOption
                     (long "input"
                      <> short 'i'
                      <> metavar "ACCESS_LOG_FILE"
                      <> help "The access log filename to parse")
                 <*> (optional $ strOption
                      (long "gt"
                       <> short 'g'
                       <> metavar "DURATION"
                       <> help "Print requests that take longer than DURATION, duration could have a suffix of us, ms, s. Default suffix is s."))
                 <*> (optional $ option auto
                      (long "top"
                       <> short 't'
                       <> metavar "N"
                       <> help ("Print top N URL that is viewed most frequently. Omit URL that is accessed less than " ++ show minRequestCount ++ " times.")))

parseArguments :: IO AppArguments
parseArguments = execParser opts
    where
      opts = info (helper <*> argumentParser)
             ( fullDesc
               <> progDesc "parse apache2 access log"
               <> header "a2p - parse apache2 access log")

durationParser :: Parser (Int, String)
durationParser = do
  number <- decimal
  suffix <- nonEmpty
  return (number, unpack suffix)

parseDuration :: ByteString -> Either String (Int, String)
parseDuration = parseOnly durationParser

-- | convert a duration string to microseconds.
-- if there is no suffix, default is seconds.
-- supported suffix are us (microseconds), ms (milliseconds) and s (seconds)
toMicroseconds :: String -> Int
toMicroseconds duration =
    case parseDuration (pack duration) of
      Left msg -> error ("Invalid duration: " ++ duration)
      Right (number, unit) -> number * unitValue unit where
                                -- convert it to microseconds
                                unitValue "us" = 1
                                unitValue "ms" = 1000
                                unitValue "s" = 1000000
                                unitValue "" = 1000000

type RequestPair = (ByteString, ByteString)

countRequests :: [AccessLog] -> Map RequestPair Int
countRequests logs = go Map.empty logs where
    go :: Map RequestPair Int -> [AccessLog] -> Map RequestPair Int
    go m (x:xs) = let key = (requestVerb x, requestPath x) in
                  case Map.lookup key m of
                    Nothing -> go (Map.insert key 1 m) xs
                    Just v -> go (Map.adjust (+1) key m) xs
    go m [] = m

-- | print a top URLs report for user.
-- list m is ordered by view count DESC.
prettyPrintFrequentRequests :: Int -> [(RequestPair, Int)] -> IO ()
prettyPrintFrequentRequests totalRequestCount m = do
  putStrLn "== most frequently accessed URLs =="
  mapM_ ppLine m where
    ppLine :: (RequestPair, Int) -> IO ()
    ppLine ((verb, path), count) = do
      let percentage = (fromIntegral count :: Double) * 100.0 / (fromIntegral totalRequestCount :: Double)
      printf "%.1f%%\t%d\t%s %s\n" percentage count (unpack verb) (unpack path)

parseFile :: AppArguments -> IO ()
parseFile args = do
  let filename = inputFile args
  putStrLn $ "parsing file " ++ filename
  startTime <- getCurrentTime
  bytes <- B.readFile filename
  case (parseOnly logParser) bytes of
    Left msg -> putStrLn "parse failed"
    Right logs -> do
      let linesParsed = length logs
      finishTime <- getCurrentTime
      -- duration in seconds
      let duration = (round . toRational) (diffUTCTime finishTime startTime)
      if duration == 0 then
          printf "parsed %d lines instantly.\n" linesParsed
      else
          printf "parsed %d lines in %d seconds. (%d line/s)\n"
                 linesParsed duration (linesParsed `div` duration)
      -- print slow requests if -g is used.
      case slowRequestDuration args of
        Nothing -> return ()
        Just str -> do
          let durationMicroseconds = toMicroseconds str
          printf "== requests that take longer than %dus ==\n"
                 durationMicroseconds
          mapM_ print
                (filter (\log -> responseTime log >= durationMicroseconds) logs)
      -- print top N most frequently viewed URL if -t is used.
      case topNRequest args of
        Nothing -> return ()
        Just n -> prettyPrintFrequentRequests linesParsed (take n (sortOn (Down . snd) (Map.toList (Map.filter (> minRequestCount) (countRequests logs)))))

main :: IO ()
main = parseArguments >>= parseFile
