From 118c9ef8bee4ae00923172cfa0fead0150808311 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 20 Oct 2023 10:59:36 -0400 Subject: [PATCH 1/4] Basic benchmarking works now --- bench/bench_zfec.py | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/bench/bench_zfec.py b/bench/bench_zfec.py index 9a9b8e2f..229c1345 100644 --- a/bench/bench_zfec.py +++ b/bench/bench_zfec.py @@ -2,8 +2,7 @@ from pyutil import mathutil import os, sys - -from pyutil import benchutil +from time import time FNAME="benchrandom.data" @@ -16,7 +15,7 @@ def donothing(results, reslenthing): K=3 M=10 -d = "" +d = b"" ds = [] easyfecenc = None fecenc = None @@ -30,12 +29,12 @@ def _make_new_rand_data(size, k, m): blocksize = mathutil.div_ceil(size, k) for i in range(k): ds[i] = d[i*blocksize:(i+1)*blocksize] - ds[-1] = ds[-1] + "\x00" * (len(ds[-2]) - len(ds[-1])) + ds[-1] = ds[-1] + b"\x00" * (len(ds[-2]) - len(ds[-1])) easyfecenc = easyfec.Encoder(k, m) fecenc = Encoder(k, m) -import sha -hashers = [ sha.new() for i in range(M) ] +from hashlib import sha256 +hashers = [ sha256() for i in range(M) ] def hashem(results, reslenthing): for i, result in enumerate(results): hashers[i].update(result) @@ -77,26 +76,23 @@ def _encode_data_fec(N): def bench(k, m): SIZE = 10**6 - MAXREPS = 64 + MAXREPS = 1000 # for f in [_encode_file_stringy_easyfec, _encode_file_stringy, _encode_file, _encode_file_not_really,]: # for f in [_encode_file,]: # for f in [_encode_file_not_really, _encode_file_not_really_and_hash, _encode_file, _encode_file_and_hash,]: # for f in [_encode_data_not_really, _encode_data_easyfec, _encode_data_fec,]: - print "measuring encoding of data with K=%d, M=%d, reporting results in nanoseconds per byte after encoding %d bytes %d times in a row..." % (k, m, SIZE, MAXREPS) + print("measuring encoding of data with K=%d, M=%d, encoding %d bytes %d times in a row..." % (k, m, SIZE, MAXREPS)) # for f in [_encode_data_fec, _encode_data_not_really]: for f in [_encode_data_fec]: def _init_func(size): return _make_new_rand_data(size, k, m) for BSIZE in [SIZE]: - results = benchutil.rep_bench(f, n=BSIZE, initfunc=_init_func, MAXREPS=MAXREPS, MAXTIME=None, UNITS_PER_SECOND=1000000000) - print "and now represented in MB/s..." - print - best = results['best'] - mean = results['mean'] - worst = results['worst'] - print "best: % 4.3f MB/sec" % (10**3 / best) - print "mean: % 4.3f MB/sec" % (10**3 / mean) - print "worst: % 4.3f MB/sec" % (10**3 / worst) + start = time() + _init_func(BSIZE) + for _ in range(MAXREPS): + f(BSIZE) + elapsed = (time() - start) / MAXREPS + print("Average MB/s:", (BSIZE / (1024 * 1024)) / elapsed) k = K m = M From 62054dca02555832badb1ea45f4237c92e018f84 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 20 Oct 2023 11:52:42 -0400 Subject: [PATCH 2/4] Add decoding, increase accuracy of encoding --- bench/bench_zfec.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/bench/bench_zfec.py b/bench/bench_zfec.py index 229c1345..d22f23bf 100644 --- a/bench/bench_zfec.py +++ b/bench/bench_zfec.py @@ -1,4 +1,4 @@ -from zfec import easyfec, Encoder, filefec +from zfec import easyfec, Encoder, filefec, Decoder from pyutil import mathutil import os, sys @@ -87,13 +87,25 @@ def bench(k, m): def _init_func(size): return _make_new_rand_data(size, k, m) for BSIZE in [SIZE]: - start = time() _init_func(BSIZE) + start = time() for _ in range(MAXREPS): f(BSIZE) elapsed = (time() - start) / MAXREPS print("Average MB/s:", (BSIZE / (1024 * 1024)) / elapsed) + print("measuring decoding of data with K=%d, M=%d, %d times in a row..." % (k, m, MAXREPS)) + blocks = fecenc.encode(ds) + sharenums = list(range(len(blocks))) + decer = Decoder(k, m) + start = time() + for _ in range(MAXREPS): + decer.decode(blocks[:k], sharenums[:k]) + assert b"".join(decer.decode(blocks[:k], sharenums[:k]))[:SIZE] == b"".join(ds)[:SIZE] + elapsed = (time() - start) / MAXREPS + print("Average MB/s:", (sum(len(b) for b in blocks) / (1024 * 1024)) / elapsed) + + k = K m = M for arg in sys.argv: From 8c207e01db1e647891079a0a64ac5884dc07bc29 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 23 Oct 2023 14:19:29 -0400 Subject: [PATCH 3/4] Add secondary-only benchmark too --- bench/bench_zfec.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/bench/bench_zfec.py b/bench/bench_zfec.py index d22f23bf..7e9fbe79 100644 --- a/bench/bench_zfec.py +++ b/bench/bench_zfec.py @@ -94,7 +94,7 @@ def _init_func(size): elapsed = (time() - start) / MAXREPS print("Average MB/s:", (BSIZE / (1024 * 1024)) / elapsed) - print("measuring decoding of data with K=%d, M=%d, %d times in a row..." % (k, m, MAXREPS)) + print("measuring decoding of primary-only data with K=%d, M=%d, %d times in a row..." % (k, m, MAXREPS)) blocks = fecenc.encode(ds) sharenums = list(range(len(blocks))) decer = Decoder(k, m) @@ -105,6 +105,17 @@ def _init_func(size): elapsed = (time() - start) / MAXREPS print("Average MB/s:", (sum(len(b) for b in blocks) / (1024 * 1024)) / elapsed) + print("measuring decoding of secondary-only data with K=%d, M=%d, %d times in a row..." % (k, m, MAXREPS)) + blocks = fecenc.encode(ds) + sharenums = list(range(len(blocks))) + decer = Decoder(k, m) + start = time() + for _ in range(MAXREPS): + decer.decode(blocks[k:k+k], sharenums[k:k+k]) + assert b"".join(decer.decode(blocks[k:k+k], sharenums[k:k+k]))[:SIZE] == b"".join(ds)[:SIZE] + elapsed = (time() - start) / MAXREPS + print("Average MB/s:", (sum(len(b) for b in blocks) / (1024 * 1024)) / elapsed) + k = K m = M From 6b2fa0c422ef32109bd7f4bd20921b9abd26853d Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 23 Oct 2023 14:19:46 -0400 Subject: [PATCH 4/4] Update numbers to be less ancient --- README.rst | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/README.rst b/README.rst index e1eda6e6..63499e60 100644 --- a/README.rst +++ b/README.rst @@ -115,24 +115,16 @@ Performance To run the benchmarks, execute the included bench/bench_zfec.py script with optional --k= and --m= arguments. -On my Athlon 64 2.4 GHz workstation (running Linux), the "zfec" command-line -tool encoded a 160 MB file with m=100, k=94 (about 6% redundancy) in 3.9 -seconds, where the "par2" tool encoded the file with about 6% redundancy in -27 seconds. zfec encoded the same file with m=12, k=6 (100% redundancy) in -4.1 seconds, where par2 encoded it with about 100% redundancy in 7 minutes -and 56 seconds. - -The underlying C library in benchmark mode encoded from a file at about 4.9 -million bytes per second and decoded at about 5.8 million bytes per second. - -On Peter's fancy Intel Mac laptop (2.16 GHz Core Duo), it encoded from a file -at about 6.2 million bytes per second. - -On my even fancier Intel Mac laptop (2.33 GHz Core Duo), it encoded from a -file at about 6.8 million bytes per second. - -On my old PowerPC G4 867 MHz Mac laptop, it encoded from a file at about 1.3 -million bytes per second. +Here's the results for an i7-12700k: + +``` +measuring encoding of data with K=3, M=10, encoding 1000000 bytes 1000 times in a row... +Average MB/s: 364 +measuring decoding of primary-only data with K=3, M=10, 1000 times in a row... +Average MB/s: 1894750 +measuring decoding of secondary-only data with K=3, M=10, 1000 times in a row... +Average MB/s: 3298 +``` Here is a paper analyzing the performance of various erasure codes and their implementations, including zfec: