diff --git a/README.rst b/README.rst index e1eda6e6..63499e60 100644 --- a/README.rst +++ b/README.rst @@ -115,24 +115,16 @@ Performance To run the benchmarks, execute the included bench/bench_zfec.py script with optional --k= and --m= arguments. -On my Athlon 64 2.4 GHz workstation (running Linux), the "zfec" command-line -tool encoded a 160 MB file with m=100, k=94 (about 6% redundancy) in 3.9 -seconds, where the "par2" tool encoded the file with about 6% redundancy in -27 seconds. zfec encoded the same file with m=12, k=6 (100% redundancy) in -4.1 seconds, where par2 encoded it with about 100% redundancy in 7 minutes -and 56 seconds. - -The underlying C library in benchmark mode encoded from a file at about 4.9 -million bytes per second and decoded at about 5.8 million bytes per second. - -On Peter's fancy Intel Mac laptop (2.16 GHz Core Duo), it encoded from a file -at about 6.2 million bytes per second. - -On my even fancier Intel Mac laptop (2.33 GHz Core Duo), it encoded from a -file at about 6.8 million bytes per second. - -On my old PowerPC G4 867 MHz Mac laptop, it encoded from a file at about 1.3 -million bytes per second. +Here's the results for an i7-12700k: + +``` +measuring encoding of data with K=3, M=10, encoding 1000000 bytes 1000 times in a row... +Average MB/s: 364 +measuring decoding of primary-only data with K=3, M=10, 1000 times in a row... +Average MB/s: 1894750 +measuring decoding of secondary-only data with K=3, M=10, 1000 times in a row... +Average MB/s: 3298 +``` Here is a paper analyzing the performance of various erasure codes and their implementations, including zfec: diff --git a/bench/bench_zfec.py b/bench/bench_zfec.py index 9a9b8e2f..7e9fbe79 100644 --- a/bench/bench_zfec.py +++ b/bench/bench_zfec.py @@ -1,9 +1,8 @@ -from zfec import easyfec, Encoder, filefec +from zfec import easyfec, Encoder, filefec, Decoder from pyutil import mathutil import os, sys - -from pyutil import benchutil +from time import time FNAME="benchrandom.data" @@ -16,7 +15,7 @@ def donothing(results, reslenthing): K=3 M=10 -d = "" +d = b"" ds = [] easyfecenc = None fecenc = None @@ -30,12 +29,12 @@ def _make_new_rand_data(size, k, m): blocksize = mathutil.div_ceil(size, k) for i in range(k): ds[i] = d[i*blocksize:(i+1)*blocksize] - ds[-1] = ds[-1] + "\x00" * (len(ds[-2]) - len(ds[-1])) + ds[-1] = ds[-1] + b"\x00" * (len(ds[-2]) - len(ds[-1])) easyfecenc = easyfec.Encoder(k, m) fecenc = Encoder(k, m) -import sha -hashers = [ sha.new() for i in range(M) ] +from hashlib import sha256 +hashers = [ sha256() for i in range(M) ] def hashem(results, reslenthing): for i, result in enumerate(results): hashers[i].update(result) @@ -77,26 +76,46 @@ def _encode_data_fec(N): def bench(k, m): SIZE = 10**6 - MAXREPS = 64 + MAXREPS = 1000 # for f in [_encode_file_stringy_easyfec, _encode_file_stringy, _encode_file, _encode_file_not_really,]: # for f in [_encode_file,]: # for f in [_encode_file_not_really, _encode_file_not_really_and_hash, _encode_file, _encode_file_and_hash,]: # for f in [_encode_data_not_really, _encode_data_easyfec, _encode_data_fec,]: - print "measuring encoding of data with K=%d, M=%d, reporting results in nanoseconds per byte after encoding %d bytes %d times in a row..." % (k, m, SIZE, MAXREPS) + print("measuring encoding of data with K=%d, M=%d, encoding %d bytes %d times in a row..." % (k, m, SIZE, MAXREPS)) # for f in [_encode_data_fec, _encode_data_not_really]: for f in [_encode_data_fec]: def _init_func(size): return _make_new_rand_data(size, k, m) for BSIZE in [SIZE]: - results = benchutil.rep_bench(f, n=BSIZE, initfunc=_init_func, MAXREPS=MAXREPS, MAXTIME=None, UNITS_PER_SECOND=1000000000) - print "and now represented in MB/s..." - print - best = results['best'] - mean = results['mean'] - worst = results['worst'] - print "best: % 4.3f MB/sec" % (10**3 / best) - print "mean: % 4.3f MB/sec" % (10**3 / mean) - print "worst: % 4.3f MB/sec" % (10**3 / worst) + _init_func(BSIZE) + start = time() + for _ in range(MAXREPS): + f(BSIZE) + elapsed = (time() - start) / MAXREPS + print("Average MB/s:", (BSIZE / (1024 * 1024)) / elapsed) + + print("measuring decoding of primary-only data with K=%d, M=%d, %d times in a row..." % (k, m, MAXREPS)) + blocks = fecenc.encode(ds) + sharenums = list(range(len(blocks))) + decer = Decoder(k, m) + start = time() + for _ in range(MAXREPS): + decer.decode(blocks[:k], sharenums[:k]) + assert b"".join(decer.decode(blocks[:k], sharenums[:k]))[:SIZE] == b"".join(ds)[:SIZE] + elapsed = (time() - start) / MAXREPS + print("Average MB/s:", (sum(len(b) for b in blocks) / (1024 * 1024)) / elapsed) + + print("measuring decoding of secondary-only data with K=%d, M=%d, %d times in a row..." % (k, m, MAXREPS)) + blocks = fecenc.encode(ds) + sharenums = list(range(len(blocks))) + decer = Decoder(k, m) + start = time() + for _ in range(MAXREPS): + decer.decode(blocks[k:k+k], sharenums[k:k+k]) + assert b"".join(decer.decode(blocks[k:k+k], sharenums[k:k+k]))[:SIZE] == b"".join(ds)[:SIZE] + elapsed = (time() - start) / MAXREPS + print("Average MB/s:", (sum(len(b) for b in blocks) / (1024 * 1024)) / elapsed) + k = K m = M