Skip to content
Commits on Source (3)
......@@ -12,13 +12,13 @@ Here is the command help:
$ rd-api --help
rd-api - reliable download server
Usage: rd-api [-h|--host HOST] [-p|--port PORT] [--redis-host REDIS_HOST]
[--redis-port REDIS_PORT] [-d|--web-root DIR] [-w|--worker INT]
Usage: rd-api [-h|--host HOST] [-p|--port PORT] [--redis-host REDIS_HOST]
[--redis-port REDIS_PORT] [-d|--web-root DIR] [-w|--worker INT]
[-v|--verbose] [-V|--version]
rd-api is an HTTP file server that provides static file hosting and reliable
download api for rd client.
rd-api serves files under web-root. You can use it like ```python3 -m http.server```
rd-api serves files under web-root. You can use it like python3 -m http.server
In addition, if rd command line tool is used to do the download, it will
download in a reliable way by downloading in 2MiB blocks and verify checksum
......@@ -57,7 +57,6 @@ Available options:
-v,--verbose show more debug message
-V,--version show program version and exit
-h,--help Show this help text
```
```
......@@ -66,7 +65,8 @@ rd - reliable download client
Usage: rd [-r|--block-max-retry INT] [-k|--keep] [-l|--rolling-combine]
[-d|--temp-dir TEMP_DIR] [-o|--output-dir OUTPUT_DIR]
[-w|--worker INT] [-f|--force] [-v|--verbose] [-V|--version] [URL...]
[-w|--worker INT] [-f|--force] [-i|--progress-interval N]
[-v|--verbose] [-V|--version] [URL...]
Download large files across slow and unstable network reliably. Requires using
rd-api on server side. For more information, see rd-api --help
......@@ -76,13 +76,15 @@ Available options:
combined
-l,--rolling-combine delete each block data right after combine, conflict
with --keep
-d,--temp-dir TEMP_DIR the dir to keep block download
data (default: ".blocks")
-d,--temp-dir TEMP_DIR the dir to keep block download data
(default: ".blocks")
-o,--output-dir OUTPUT_DIR
the dir to keep the final combined
file (default: ".")
the dir to keep the final combined file
(default: ".")
-w,--worker INT concurrent HTTP download worker (default: 5)
-f,--force overwrite exiting target file in OUTPUT_DIR
-i,--progress-interval N how often to show download progress, in seconds
(default: 10)
-v,--verbose show more debug message
-V,--version show version number and exit
-h,--help Show this help text
......
module CliVersion where
cliVersion :: String
cliVersion = "1.2.0.0"
module RD.CliVersion (cliVersion) where
cliVersion :: String
cliVersion = "1.3.0.0"
-- | Lib contains functions that is useful for current project.
module Lib
module RD.Lib
( sha1sum
, sha1sumOnBytes
, guessFilename
......@@ -15,7 +15,7 @@ import qualified Data.ByteString.Lazy as LB
import Crypto.Hash (digestToHexByteString, hashlazy, Digest, SHA1)
import Formatting hiding (bytes)
import Type
import RD.Types
-- | convert byte number to MiB. small number will become 0.
humanReadableSize :: Integer -> T.Text
......
module Type where
module RD.Types where
import qualified Data.ByteString.Char8 as Char8
import qualified Data.ByteString as B
......
-- | Utils contains general functions that is useful for all haskell projects.
module Utils where
module RD.Utils where
import qualified Data.Text as T
......
* COMMENT -*- mode: org -*-
#+Date: 2018-05-04
Time-stamp: <2022-03-14>
Time-stamp: <2022-03-15>
#+STARTUP: content
* notes :entry:
** 2022-03-14 project dir structure
- lib shared code for server and client, the reliable-download library
- api rd-api cli Main
- api/lib rd-api cli logic
- client rd cli Main
- cli-version cli-version string shared by rd-api and rd cli Main
- misc/ learning tools and temp codes
- test/ tests
- lib shared code for server and client, the reliable-download library
- rd-api rd-api cli tool, server side
- rd rd cli tool, client side
- misc/ learning tools and temp codes
- test/ tests
- package.yaml stack project description
- pypi/ for release on pypi, see pypi/Makefile
** 2018-05-09 how to release latest code on PyPI? how to make a release?
- update version number in package.yaml, src/CliVersion.hs
......@@ -34,21 +34,26 @@ Time-stamp: <2022-03-14>
on ryzen5,
cd ~/projects/reliable-download/
scp .stack-work/install/x86_64-linux-nopie/lts-10.3/8.2.2/bin/rd-api de01:d/
FN=`stack exec which rd-api`
gzip -k "$FN"
scp "$FN.gz" de03:d/
on de01,
on de03,
cd ~/d/
gunzip rd-api.gz
chmod +x rd-api
env WEB_ROOT=$PWD ./rd-api
curl -v http://138.201.95.248:8082/rd/
curl -I http://138.201.95.248:8082/gitlab-ce_10.3.5-ce.0_amd64_xenial.deb
curl -v http://de03.dev.emacsos.com:8082/rd/
curl -I http://de03.dev.emacsos.com:8082/virtio-win-0.1.215.iso
okay.
378M gitlab-ce_10.3.5-ce.0_amd64_xenial.deb
516M virtio-win-0.1.215.iso
on ryzen5,
tmake stack exec rd -- -d ~/d/.blocks -o ~/d/ http://138.201.95.248:8082/gitlab-ce_10.3.5-ce.0_amd64_xenial.deb
tmake stack exec rd -- -d ~/d/.blocks -o ~/d/ http://de03.dev.emacsos.com:8082/virtio-win-0.1.215.iso
tmake ~/d/rd -d ~/d/.blocks -o ~/d/ http://de03.dev.emacsos.com:8082/virtio-win-0.1.215.iso
below is a run log from old rd version.
#+BEGIN_SRC sh
sylecn@ryzen5:~/projects/reliable-download$ tmake stack exec rd -- -d ~/d/.blocks -o ~/d/ http://138.201.95.248:8082/gitlab-ce_10.3.5-ce.0_amd64_xenial.deb
Tue May 8 00:45:31 CST 2018
......@@ -347,20 +352,6 @@ https://www.stackage.org/lts-10.3/package/optparse-applicative-0.14.0.0
$HOME/.cache/reliable-downloader/rd-api.db
-
** 2018-05-07 loopUntilAllBlocksReady, how to track progress?
use a thread pool to download blocks, print overall progress when some parts
done or some time elapsed.
- how to track progress?
I used mapM to fetch block.
results <- mapM (fetchBlockAsync opts rc url rdResp) newReadyBlocks
how to show some progress info?
I need a supervisor thread. and I need a shared data structure.
a mapM is not enough to do this.
-
** 2018-05-05 allow config app at runtime.
via env var and command line parameter.
......@@ -551,6 +542,23 @@ only first character is in path key.
* current :entry:
**
** 2022-03-15 rd client, is there a built-in repeat/loop function?
IO () -> IO ()
I should not need to write showProgressLoop explicitly.
** 2022-03-15 TODO stack test should not rely on
/home/sylecn/persist/cache/ideaIC-2018.1.tar.gz
try use a smaller file within git tree.
** 2022-03-14 build on debian 9. push a new release to pypi.
- binary built on ryzen5 won't work because of high libc version.
#+BEGIN_SRC sh
root@de03:~/d# ./rd-api --version
./rd-api: /lib/x86_64-linux-gnu/libm.so.6: version `GLIBC_2.27' not found (required by ./rd-api)
./rd-api: /lib/x86_64-linux-gnu/libm.so.6: version `GLIBC_2.29' not found (required by ./rd-api)
#+END_SRC
** 2019-02-28 bug: rd-api -d java/
option -d: cannot parse value `java/'
......@@ -581,6 +589,210 @@ policy in ovs can do it.
see stretch01 daylog.
* done :entry:
** 2018-05-07 loopUntilAllBlocksReady, how to track progress?
use a thread pool to download blocks, print overall progress when some parts
done or some time elapsed.
- how to track progress?
I used mapM to fetch block.
results <- mapM (fetchBlockAsync opts rc url rdResp) newReadyBlocks
how to show some progress info?
I need a supervisor thread. and I need a shared data structure.
a mapM is not enough to do this.
-
** 2022-03-15 client log, don't show each block fetch. show progress instead.
- log overall progress every 30s
- xx/xx blocks fetched, xx%
percentage show integer.
when percentage is 100% but still a few blocks to DL, show "almost there"
could also just use floor float, which would show 99%.
that's better.
- design
- a ShowProgress thread, show a msg every 30s.
or a configurable amount of time via --progress-interval N
use a MVar or TVar to store state.
totalFileCount
downloadedFileCount
totalBlockCount
downloadedBlockCount
track block count is easy.
how to track file count?
well, it's similar.
create a MVar or TVar in main thread, pass it to ShowProgress thread.
when add URL, increment totalFileCount.
when file combined, increment downloadedFileCount.
when add block, increment totalBlockCount.
when DL block, increment downloadedBlockCount.
when downloadedFileCount == totalFileCount, ShowProgress thread exits.
I will use MVar.
Control.Concurrent.MVar
https://hackage.haskell.org/package/base-4.16.0.0/docs/Control-Concurrent-MVar.html
- when main thread modify progress and know DL finished, immediately show a
progress msg and terminate ShowProgress thread.
- implementation
add MVar progress in rc.
only master thread will run putMVar, other thread only run readMVar.
resultsMaybe <- mapM (runMaybeT . downloadFile rc) (urls opts)
DONE update url/file count and block count when they happen.
DONE only show progress if it has some progress. otherwise, it will be false
positive. 5/123 blocks to 5/123 blocks is not a progress.
// code didn't work. why? I see, I didn't update lastDownloadedBlockCount
- test it locally
use a shorter progress interval
stack exec rd -- -f -i 1 -d ~/d/.blocks -o ~/d/ http://localhost:8082/ideaIC-2018.1.tar.gz http://localhost:8082/dropbear-2019.78.tar.bz2
stack exec rd -- -f -i 1 -d ~/d/.blocks -o ~/d/ http://localhost:8082/guile-3.0.0.tar.xz http://localhost:8082/dropbear-2019.78.tar.bz2
- build a version to test in de03 prod.
make sure client side log is good.
on ryzen5,
rsync -n -air lib rd rd-api misc test package.yaml s02:projects/reliable-download/
rsync -air lib rd rd-api misc test package.yaml s02:projects/reliable-download/
- problems
- incrementDownloadedBlockCount rc 1
this is not run. dl block count is always 0.
is it because of lazy evaluation? nope. it's inside IO(), it will
eventually run when result is used.
I see. fetchBlock will reuse existing file on disk. DL happens in
fetchBlockFromHttp. I didn't increment dl block count there.
just
incrementDownloadedBlockCount rc 1
when fetchBlock return IO True.
who calls fetchBlock?
loopUntilAllBlocksReady
addTasks downloadTask $ map (fetchBlock rc url rdResp) newReadyBlocks
I need to wrap fetchBlock here. do book keeping on dl block progress.
#+BEGIN_SRC sh
addTasks downloadTask $
map (\b -> do
isSuccess <- fetchBlock rc url rdResp b
when isSuccess $ incrementDownloadedBlockCount rc 1
return isSuccess
) newReadyBlocks
#+END_SRC
haskell is good at function and monad composition.
I don't even need the do notation.
I will leave it to another time. may not be easy to read.
- DONE current progress is overall block percentage.
that's not for current file.
I don't think that's meaningful.
overall DL block percentage may as well go from 90% to 30%, when it switch
files.
keep track of blocks for current file instead?
is there a current file concept? does rd finish one file before starting
to fetch block for next file?
resultsMaybe <- mapM (runMaybeT . downloadFile rc) (urls opts)
mapM, it's sequential map.
so there is a current file concept.
add two variable
, piCurrentFileBlockCount :: Int
, piCurrentFileDownloadedBlockCount :: Int
also include current DL file name.
show it in progress log.
- DONE how to rewrite this more concisely?
showProgress :: RDClientRuntimeConfig -> IO ()
showProgress rc = do
p <- readMVar (rdProgress rc)
showProgress1 rc p
(a -> IO b) -> IO a -> IO b
this is (reverse) bind.
=<<
readMVar (rdProgress rc) >>= showProgress1 rc
- DONE always show a 100% progress when all blocks for a file fetched.
** 2022-03-15 client log.
#+BEGIN_SRC sh
sylecn@ryzen5:~/projects/reliable-download$ tmake ~/d/rd -d ~/d/.blocks -o ~/d/ http://de03.dev.emacsos.com:8082/virtio-win-0.1.215.iso
Tue 15 Mar 2022 11:08:14 AM CST
running command: /home/sylecn/d/rd -d /home/sylecn/d/.blocks -o /home/sylecn/d/ http://de03.dev.emacsos.com:8082/virtio-win-0.1.215.iso
2022-03-15T03:08:15 I GET /rd/ api ok
2022-03-15T03:08:15 I Downloading file: virtio-win-0.1.215.iso, 515.9 MiB, 258 blocks
2022-03-15T03:08:15 I 0 new block(s) ready on server side
2022-03-15T03:08:15 I No new block ready on server side, waiting 1s
2022-03-15T03:08:17 I 92 new block(s) ready on server side
2022-03-15T03:08:18 I 44 new block(s) ready on server side
2022-03-15T03:08:19 I 95 new block(s) ready on server side
2022-03-15T03:08:19 I 27 new block(s) ready on server side
2022-03-15T03:08:22 I block 0 fetched
2022-03-15T03:08:22 I block 4 fetched
2022-03-15T03:08:22 I block 2 fetched
2022-03-15T03:08:23 I block 5 fetched
#+END_SRC
- DONE "xx new blocks ready on server side" is confusing.
is it total blocks or new blocks?
this should be debug log.
print one info log when all blocks ready on server side.
- server side log.
DONE when fetch block, there is no need to log "user request xxx".
#+BEGIN_SRC sh
root@de03:~/d# env WEB_ROOT=$PWD ./rd-api
2022-03-15T03:06:18 I creating 2 file worker(s)
2022-03-15T03:06:18 I fileWorker is waiting for jobs...
2022-03-15T03:06:18 I fileWorker is waiting for jobs...
2022-03-15T03:06:18 I webRoot is /root/d
2022-03-15T03:06:18 I will listen on 0.0.0.0:8082
2022-03-15T03:08:15 I user request "/root/d/virtio-win-0.1.215.iso"
2022-03-15T03:08:15 I "/root/d/virtio-win-0.1.215.iso" is a new file, sending task to worker
2022-03-15T03:08:15 I fileWorker working on "/root/d/virtio-win-0.1.215.iso"
2022-03-15T03:08:16 I user request "/root/d/virtio-win-0.1.215.iso"
2022-03-15T03:08:16 I file is being processed by worker
2022-03-15T03:08:17 I user request "/root/d/virtio-win-0.1.215.iso"
2022-03-15T03:08:17 I file is being processed by worker
2022-03-15T03:08:18 I user request "/root/d/virtio-win-0.1.215.iso"
2022-03-15T03:08:18 I file is being processed by worker
2022-03-15T03:08:18 I fileWorker done for "/root/d/virtio-win-0.1.215.iso"
2022-03-15T03:08:18 I fileWorker is waiting for jobs...
2022-03-15T03:08:19 I user request "/root/d/virtio-win-0.1.215.iso"
2022-03-15T03:08:19 I file was processed before
#+END_SRC
fileWorker: when all blocks done, show how many blocks created, block size,
total file size.
** 2021-10-07 When combine blocks to final file, do not use more disk space than original file size.
- Note taken on [2022-03-14 Mon 21:44] \\
in 1.2.0.0 when -l is given, it will do rolling combine. won't take 2x
......
......@@ -56,51 +56,44 @@ library:
executables:
rd-api:
source-dirs:
- api
- api/lib
- cli-version
main: Main.hs
source-dirs: rd-api
main: RD.Server.Cli.Main
dependencies:
- reliable-download
ghc-options:
- -threaded
- -O2
rd:
source-dirs:
- client
- cli-version
main: Main.hs
source-dirs: rd
main: RD.Client.Main
dependencies:
- reliable-download # I only need the Type module
- reliable-download
- io-thread-pool
- http-conduit
- http-client
- http-types
- retry
- socket
- io-thread-pool
ghc-options:
- -threaded
- -O2
maybet:
source-dirs: misc/maybet
main: Main.hs
dependencies:
- reliable-download
main: Main
ghc-options:
- -threaded
logtest:
source-dirs: misc/logtest
main: Main.hs
main: Main
ghc-options:
- -threaded
tests:
all-tests:
main: Main.hs
main: Main
source-dirs:
- test
- api/lib
- rd-api
dependencies:
- reliable-download
- hspec
......
......@@ -51,6 +51,9 @@ see rd-api_.
ChangeLog
---------
* v1.3.0.0 2022-03-15
- feature: add download progress logging
* v1.2.0.0 2022-03-14
- feature: add --rolling-combine option. allow combine big file when disk space is low.
......
module App (mkApp, mkWaiApp) where
module RD.Server.App (mkApp, mkWaiApp) where
import Control.Monad.IO.Class (liftIO)
import Control.Monad.Trans.Class (lift)
......@@ -20,11 +20,12 @@ import Control.Error
import qualified Database.Redis as R
import qualified Data.HashMap.Strict as M
import Type
import Config
import Lib (sha1sum, genBlocks)
import Utils
import qualified DB
import RD.CliVersion (cliVersion)
import RD.Types
import RD.Server.Config
import RD.Lib (sha1sum, genBlocks)
import RD.Utils
import qualified RD.Server.DB as DB
-- | fill block sha1sum, if sha1sum is not ready yet, put "pending" there.
fillSha1sum :: RDRuntimeConfig -> FillBlockParam -> IO [BlockWithChecksum]
......@@ -66,12 +67,12 @@ processNewFileAsyncMaybe rc fbp = do
case fmap fsFromBytes oldStatus of
Just FileStatusError -> do
setResultE <- liftIO $ do
infol rc $ showt strKey <> " was in " <> showt FileStatusError <> " status"
infol rc $ "file status was " <> showt FileStatusError <> ", retry now"
DB.set rc strKey $ fsBytes FileStatusWorking
throwOnLeftMsg setResultE $ "set file status to " <> showt FileStatusWorking <> " failed"
liftIO $ writeChan (rcFileChan rc) fbp
Just FileStatusDone -> liftIO $ infol rc "file was processed before"
Just FileStatusWorking -> liftIO $ infol rc "file is being processed by worker"
Just FileStatusDone -> liftIO $ debugl rc $ "file status is " <> showt FileStatusDone
Just FileStatusWorking -> liftIO $ debugl rc $ "file status is " <> showt FileStatusWorking
_ -> liftIO $ errorl rc "Unexpected file status"
-- | GET /rd/.* handler
......@@ -83,7 +84,7 @@ getRdHandler rc = do
let filepath = webRoot (rcConfig rc) </> T.unpack path
fileStatusE <- lift $ do
liftIO $ infol rc $ "user request " <> showt filepath
liftIO $ infol rc $ "user request rd metadata for " <> showt filepath
liftIO $ catchIOError
(fmap Right (getFileStatus filepath))
(\e -> do
......@@ -125,7 +126,8 @@ mkApp :: RDRuntimeConfig -> ScottyM ()
mkApp rc = do
get (literal "/rd/") $ json $
object ["ok" .= True
,"app" .= ("reliable-download api" :: T.Text)]
,"app" .= ("reliable-download api" :: T.Text)
,"version" .= T.pack cliVersion]
get (regex "^/rd/(.*)") $ do
result <- runExceptT $ getRdHandler rc
......
module Main (main) where
module RD.Server.Cli.Main (main) where
import Data.String (fromString)
import System.Environment (getEnvironment)
......@@ -17,12 +17,12 @@ import System.Exit (die)
import qualified Database.Redis as R
import qualified Text.PrettyPrint.ANSI.Leijen as D
import Config
import CliVersion (cliVersion)
import Opts (argParser)
import OptsDoc (rdApiDescription)
import App (mkWaiApp)
import Worker (startWorkers)
import RD.Server.Config
import RD.CliVersion (cliVersion)
import RD.Server.Cli.Opts (argParser)
import RD.Server.Cli.OptsDoc (rdApiDescription)
import RD.Server.App (mkWaiApp)
import RD.Server.Worker (startWorkers)
-- | parse int env var, if it exists and is an int, return Right (Just i).
-- if it exists and doesn't parse, return Left msg with key and value info.
......@@ -97,6 +97,7 @@ runApiServer rdConfig = do
startWorkers rc
else
warnl rc $ sformat "No redis, not starting workers"
infol rc $ sformat ("rd-api " % string) cliVersion
infol rc $ sformat ("webRoot is " % string) (webRoot config)
infol rc $ sformat ("will listen on " % string % ":" % int) (host config) (port config)
let warpSettings = ( setFdCacheDuration 10
......
module Opts (argParser) where
module RD.Server.Cli.Opts (argParser) where
import Options.Applicative
import Config
import RD.Server.Config
argParser :: Parser RDConfig
argParser = RDConfig
......
module OptsDoc where
module RD.Server.Cli.OptsDoc where
rdApiDescription :: String
rdApiDescription = "rd-api is an HTTP file server that provides static file hosting and reliable\n\
......
module Config where
module RD.Server.Config where
import qualified Database.Redis as R
import Control.Concurrent.Chan
......@@ -9,7 +9,7 @@ import Control.Monad.IO.Class
import qualified System.Logger as L
import Type
import RD.Types
-- | rd-api configuration, supports cli arguments or env variable.
data RDConfig = RDConfig {
......
module DB where
module RD.Server.DB where
import qualified Data.Text as T
import qualified Data.ByteString as B
import qualified Database.Redis as R
import Config
import Utils
import RD.Utils
import RD.Server.Config
-- | insert a key value to db, if key does not exist in db.
insertIfNotExist :: RDRuntimeConfig -> B.ByteString -> B.ByteString -> IO (Either T.Text Bool)
......
module Worker (startWorkers, sha1sumFileRange, fileRange) where
module RD.Server.Worker (startWorkers, sha1sumFileRange, fileRange) where
import Control.Concurrent.Chan
import System.IO (IOMode(ReadMode), withBinaryFile)
......@@ -10,11 +10,12 @@ import qualified Data.ByteString.Lazy as LB
import qualified Data.Text as T
import qualified Database.Redis as R
import Formatting
import Type
import Config
import Lib (sha1sumOnBytes)
import Utils
import RD.Lib (sha1sumOnBytes, humanReadableSize)
import RD.Utils
import RD.Types
import RD.Server.Config
-- | read file range data as LB.ByteString. handle must be a handle to an
-- opened file.
......@@ -55,7 +56,9 @@ fileWorker rc = forever $ do
errorl rc $ "Set file status failed: " <> showt reply
Right _ -> do
debugl rc $ "Set file status to " <> showt resultStatus <> " for " <> showt filepath
infol rc $ "fileWorker done for " <> showt filepath
infol rc $ sformat
("fileWorker done for " % string % ", " % stext % ", " % int % " blocks")
filepath (humanReadableSize (fbpFileSize fbp)) (length (fbpBlocks fbp))
return ()
where
-- | calculate sha1 for a single block. return IO True on success.
......
module RD.Client.Logging
( debugl
, infol
, warnl
, errorl
) where
import qualified Data.Text as T
import qualified System.Logger as L
import RD.Client.Types
-- | log a msg using given log level
clientLogl :: L.Level -> RDClientRuntimeConfig -> T.Text -> IO ()
clientLogl level rc msg = do
let logger = rdLogger rc
L.log logger level $ L.msg msg
L.flush logger
-- | log a debug msg
debugl :: RDClientRuntimeConfig -> T.Text -> IO ()
debugl = clientLogl L.Debug
-- | log an info msg
infol :: RDClientRuntimeConfig -> T.Text -> IO ()
infol = clientLogl L.Info
-- | log an warn msg
warnl :: RDClientRuntimeConfig -> T.Text -> IO ()
warnl = clientLogl L.Warn
-- | log an error msg
errorl :: RDClientRuntimeConfig -> T.Text -> IO ()
errorl = clientLogl L.Error
module Main (main) where
module RD.Client.Main (main) where
import Options.Applicative
import Data.Maybe (isJust, fromMaybe)
......@@ -12,7 +12,8 @@ import System.IO.Error
import System.Exit
import System.FilePath ((</>))
import Data.Text.Encoding (decodeUtf8)
import Control.Concurrent (threadDelay)
import Control.Concurrent.MVar
import Control.Concurrent (threadDelay, forkIO)
import Control.Monad.Trans.Maybe
import Control.Monad.IO.Class (liftIO)
import System.Socket (SocketException)
......@@ -27,36 +28,16 @@ import Network.HTTP.Client (path, responseStatus)
import Formatting
import Control.Retry (retrying, constantDelay, limitRetries, rsIterNumber)
import qualified System.Logger as L
import Lib (sha1sumOnBytes, guessFilename, humanReadableSize)
import CliVersion (cliVersion)
import Utils
import Type
import Opts
import Task
-- | log a msg using given log level
clientLogl :: L.Level -> RDClientRuntimeConfig -> T.Text -> IO ()
clientLogl level rc msg = do
let logger = rdLogger rc
L.log logger level $ L.msg msg
L.flush logger
-- | log a debug msg
debugl :: RDClientRuntimeConfig -> T.Text -> IO ()
debugl = clientLogl L.Debug
-- | log an info msg
infol :: RDClientRuntimeConfig -> T.Text -> IO ()
infol = clientLogl L.Info
-- | log an warn msg
warnl :: RDClientRuntimeConfig -> T.Text -> IO ()
warnl = clientLogl L.Warn
-- | log an error msg
errorl :: RDClientRuntimeConfig -> T.Text -> IO ()
errorl = clientLogl L.Error
import RD.Types
import RD.Lib (sha1sumOnBytes, guessFilename, humanReadableSize)
import RD.CliVersion (cliVersion)
import RD.Utils
import RD.Client.Types
import RD.Client.Logging
import RD.Client.Opts
import RD.Client.Progress
-- | best padding for this many blocks
bestPadding :: Integer -> Int
......@@ -113,7 +94,7 @@ fetchBlockFromHttp rc fbp = do
let blockTargetFile = fbpBlockTargetFile fbp
debugl rc $ "writing block data to " <> showt blockTargetFile
LB.writeFile blockTargetFile bodyLBS
infol rc $ "block " <> showt blockId <> " fetched"
debugl rc $ "block " <> showt blockId <> " fetched"
return True
else do
errorl rc $ "sha1sum verification failed for " <> showt filename <> " block " <> showt blockId <> ", expect " <> showt sha1sum
......@@ -186,8 +167,8 @@ combineBlocks rc rdResp = do
forM_ (getBlockTargetFilenames opts rdResp) $ \blockFilename -> do
debugl rc $ "appending block file " <> showt blockFilename
content <- LB.readFile blockFilename
LB.appendFile targetFilename content -- TODO how to handle error here?
-- let it crash?
LB.appendFile targetFilename content -- how to handle error here?
-- let it crash.
when (rollingCombine (rdOptions rc)) $ do
debugl rc $ "delete block file " <> showt blockFilename
catchIOError (removeFile blockFilename)
......@@ -229,8 +210,8 @@ downloadFile rc url = do
unless (respOk rdResp) $ do
liftIO $ errorl rc $ "GET /rd/ api failed: " <> showt (respMsg rdResp)
mzero
liftIO $ infol rc "GET /rd/ api ok"
let (_filename, targetFilename) = getTargetFilename opts rdResp
let (baseFilename, targetFilename) = getTargetFilename opts rdResp
liftIO $ infol rc $ "GET /rd/ api ok for " <> showt baseFilename
fileExist <- liftIO $ doesFileExist targetFilename
when (fileExist && not (forceOverwrite opts)) $ do
liftIO $ warnl rc $ "Warning: skip already existing file " <> showt targetFilename <> ", use -f to force overwrite"
......@@ -239,10 +220,17 @@ downloadFile rc url = do
infol rc $ "Downloading file: " <> respPath rdResp <> ", "
<> humanReadableSize (respFileSize rdResp)
<> ", " <> showt (respBlockCount rdResp) <> " blocks"
incrementTotalBlockCount rc (fromIntegral (respBlockCount rdResp))
setCurrentFileName rc (respPath rdResp)
setCurrentFileTotalBlockCount rc (fromIntegral (respBlockCount rdResp))
rdResp2 <- loopUntilAllBlocksReady rc url rdResp [] downloadTask
results <- getTaskResults downloadTask
if and results then do
showProgress rc
resultMaybe <- runMaybeT $ combineBlocks rc rdResp2
when (isJust resultMaybe) $ do
incrementDLFileCount rc 1
return $ isJust resultMaybe
else do
errorl rc $ (showt . length . filter id) results <> " blocks failed."
......@@ -263,10 +251,16 @@ loopUntilAllBlocksReady rc url rdResp oldReadyBlocks downloadTask = do
readyBlocks = filter blockIsReady blocks
newReadyBlocks = filter ((`notElem` oldReadyBlocks) . getBlockId) readyBlocks
allBlocksReady = all blockIsReady blocks
infol rc $ (showt . length) newReadyBlocks <> " new block(s) ready on server side"
addTasks downloadTask $ map (fetchBlock rc url rdResp) newReadyBlocks
if allBlocksReady then
debugl rc $ (showt . length) newReadyBlocks <> " new block(s) ready on server side"
addTasks downloadTask $
map (\b -> do
isSuccess <- fetchBlock rc url rdResp b
when isSuccess $ incrementDownloadedBlockCount rc 1
return isSuccess
) newReadyBlocks
if allBlocksReady then do
-- loop finished
infol rc $ "all " <> (showt . length) blocks <> " block(s) ready on server side"
return rdResp
else do
when (null newReadyBlocks) $ do
......@@ -287,8 +281,12 @@ cliApp opts = do
L.setDelimiter " ")
L.defSettings
logger <- L.new logSettings
progress <- newMVar emptyProgress {
piTotalFileCount=length (urls opts)
}
let rc = RDClientRuntimeConfig { rdOptions=opts
, rdLogger=logger}
, rdLogger=logger
, rdProgress=progress }
debugl rc $ "command line options: " <> showt opts
let dir = tempDir opts
catchIOError (createDirectoryIfMissing True dir)
......@@ -296,11 +294,13 @@ cliApp opts = do
errorl rc $ "Create temp dir " <> showt dir <> " failed: " <> showt e
exitFailure)
debugl rc $ "using temp dir: " <> showt dir
_progressTid <- forkIO $ showProgressLoop rc 0
-- resultsMaybe :: [Maybe Bool]
resultsMaybe <- mapM (runMaybeT . downloadFile rc) (urls opts)
let results = map (fromMaybe False) resultsMaybe
if and results then
infol rc "All urls downloaded."
if and results then do
showProgressAllDone rc
exitSuccess -- will auto terminate _progressTid thread.
else do
errorl rc $ (showt . length . filter not) results <> " urls failed/skipped."
exitFailure
......@@ -311,15 +311,18 @@ main = do
if showVersion opts then
putStrLn $ "rd " <> cliVersion
else
if null $ urls opts then do
putStrLn "No URLs given, nothing to do. See rd --help"
exitFailure
else
if keepBlockData opts && rollingCombine opts then do
putStrLn "Error: option --keep and --rolling-combine can not be used at the same time.\nSee rd --help"
-- check options are valid, logically.
if null $ urls opts then
do
putStrLn "No URLs given, nothing to do. See rd --help"
exitFailure
else
cliApp opts
else
if keepBlockData opts && rollingCombine opts then
do
putStrLn "Error: option --keep and --rolling-combine can not be used at the same time.\nSee rd --help"
exitFailure
else
cliApp opts
where
parserInfo = info (argParser <**> helper)
( fullDesc
......
module Opts (RDOptions(..), argParser, RDClientRuntimeConfig(..)) where
import qualified Data.Text as T
module RD.Client.Opts
( RDOptions(..)
, argParser
, RDClientRuntimeConfig(..)) where
import Options.Applicative
import qualified System.Logger as L
data RDOptions = RDOptions
{ blockMaxRetry :: Int
, keepBlockData :: Bool
, rollingCombine :: Bool
, tempDir :: FilePath
, outputDir :: FilePath
, workerCount :: Int
, forceOverwrite :: Bool
, verbose :: Bool
, showVersion :: Bool
, urls :: [T.Text] } deriving (Show)
data RDClientRuntimeConfig = RDClientRuntimeConfig
{ rdOptions :: RDOptions
, rdLogger :: L.Logger }
import RD.Client.Types
argParser :: Parser RDOptions
argParser = RDOptions
......@@ -66,6 +52,13 @@ argParser = RDOptions
<> short 'f'
<> help "overwrite exiting target file in OUTPUT_DIR"
<> showDefault )
<*> option auto
( long "progress-interval"
<> short 'i'
<> help "how often to show download progress, in seconds"
<> showDefault
<> value 10
<> metavar "N" )
<*> switch
( long "verbose"
<> short 'v'
......
module RD.Client.Progress
( emptyProgress
, incrementDLFileCount
, incrementTotalBlockCount
, incrementDownloadedBlockCount
, setCurrentFileName
, setCurrentFileTotalBlockCount
, showProgress
, showProgressAllDone
, showProgressLoop) where
import Control.Concurrent.MVar
import qualified Data.Text as T
import Control.Concurrent (threadDelay)
import Formatting
import RD.Client.Types
import RD.Client.Logging
emptyProgress :: Progress
emptyProgress = Progress {
piTotalFileCount=0
, piDownloadedFileCount=0
, piTotalBlockCount=0
, piDownloadedBlockCount=0
, piCurrentFileName=""
, piCurrentFileTotalBlockCount=0
, piCurrentFileDownloadedBlockCount=0}
-- | increment piDownloadedFileCount by n
incrementDLFileCount :: RDClientRuntimeConfig -> Int -> IO ()
incrementDLFileCount rc n =
modifyMVar_ (rdProgress rc) (\p -> return p {piDownloadedFileCount=piDownloadedFileCount p + n})
-- | increment piTotalBlockCount by n
incrementTotalBlockCount :: RDClientRuntimeConfig -> Int -> IO ()
incrementTotalBlockCount rc n =
modifyMVar_ (rdProgress rc) (\p -> return p {piTotalBlockCount=piTotalBlockCount p + n})
-- | increment piDownloadedBlockCount and piCurrentFileDownloadedBlockCount by n
incrementDownloadedBlockCount :: RDClientRuntimeConfig -> Int -> IO ()
incrementDownloadedBlockCount rc n =
modifyMVar_ (rdProgress rc)
(\p -> return p
{ piDownloadedBlockCount=piDownloadedBlockCount p + n
, piCurrentFileDownloadedBlockCount=piCurrentFileDownloadedBlockCount p + n})
-- | set piCurrentFileName
setCurrentFileName :: RDClientRuntimeConfig -> T.Text -> IO ()
setCurrentFileName rc fnFromUrl =
modifyMVar_ (rdProgress rc) (\p -> return p {piCurrentFileName=fnFromUrl})
-- | set piCurrentFileTotalBlockCount to n, reset piCurrentFileDownloadedBlockCount to 0
setCurrentFileTotalBlockCount :: RDClientRuntimeConfig -> Int -> IO ()
setCurrentFileTotalBlockCount rc n = do
modifyMVar_ (rdProgress rc)
(\p -> return p { piCurrentFileTotalBlockCount=n
, piCurrentFileDownloadedBlockCount=0 })
-- | show download progress in console. it will not touch progress MVar.
showProgress1 :: RDClientRuntimeConfig -> Progress -> IO ()
showProgress1 rc p = do
let dlblockc = piCurrentFileDownloadedBlockCount p
totalblockc = piCurrentFileTotalBlockCount p
infol rc $ sformat
("progress: [" % int % "%] " % int % "/" % int % " blocks, " % stext)
((dlblockc * 100) `div` totalblockc) dlblockc totalblockc
(piCurrentFileName p)
-- -- | show download progress in console. used to force show 100% progress msg when a file is fully downloaded.
showProgress :: RDClientRuntimeConfig -> IO ()
showProgress rc = showProgress1 rc =<< readMVar (rdProgress rc)
-- | show a final progress when all DL completed.
showProgressAllDone :: RDClientRuntimeConfig -> IO ()
showProgressAllDone rc = do
progress <- readMVar (rdProgress rc)
let totalblockc = piTotalBlockCount progress
totalfilec = piTotalFileCount progress
infol rc $ sformat
("All urls downloaded. " % int % " files, " % int % " blocks.")
totalfilec totalblockc
-- | show progress if at least one new block is fetched since last time. Otherwise, give a hint there may be a DL hang.
showProgressMaybe :: RDClientRuntimeConfig -> Int -> IO Int
showProgressMaybe rc lastDownloadedBlockCount = do
p <- readMVar (rdProgress rc)
let newDLBC = piDownloadedBlockCount p
if newDLBC > lastDownloadedBlockCount then do
showProgress1 rc p
return newDLBC
else do
warnl rc $ sformat ("No block fetched in last " % int % " seconds")
(progressInterval (rdOptions rc))
return lastDownloadedBlockCount
-- | show download progress in console. designed to run in a thread.
showProgressLoop :: RDClientRuntimeConfig -> Int -> IO ()
showProgressLoop rc lastDownloadedBlockCount = do
threadDelay (progressInterval (rdOptions rc) * 1000000)
newCount <- showProgressMaybe rc lastDownloadedBlockCount
showProgressLoop rc newCount