Merge pull request #4088 from Kelimion/benchmark

Improve benchmarks.
odin-lang · Aug 16, 2024 · 14e2070 · 14e2070
2 parents 40b8150 + 6c46c9e
commit 14e2070
Show file tree

Hide file tree

Showing 3 changed files with 52 additions and 54 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -35,7 +35,6 @@ jobs:
           ./odin test tests/core/normal.odin -file -all-packages -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
           ./odin test tests/core/speed.odin -file -all-packages -o:speed -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
           ./odin test tests/vendor -all-packages -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
-          ./odin test tests/benchmark -all-packages -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
           (cd tests/issues; ./run.sh)
   build_freebsd:
     name: FreeBSD Build, Check, and Test
@@ -64,7 +63,6 @@ jobs:
           ./odin test tests/core/normal.odin -file -all-packages -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
           ./odin test tests/core/speed.odin -file -all-packages -o:speed -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
           ./odin test tests/vendor -all-packages -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
-          ./odin test tests/benchmark -all-packages -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
           (cd tests/issues; ./run.sh)
   ci:
     strategy:
@@ -125,8 +123,6 @@ jobs:
         run: ./odin test tests/vendor -all-packages -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
       - name: Internals tests
         run: ./odin test tests/internal -all-packages -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
-      - name: Core library benchmarks
-        run: ./odin test tests/benchmark -all-packages -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
       - name: GitHub Issue tests
         run: |
           cd tests/issues
@@ -196,11 +192,6 @@ jobs:
         run: |
           call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
           odin test tests/core/speed.odin -o:speed -file -all-packages -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
-      - name: Core library benchmarks
-        shell: cmd
-        run: |
-          call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
-          odin test tests/benchmark -all-packages -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
       - name: Vendor library tests
         shell: cmd
         run: |

diff --git a/tests/benchmark/bytes/benchmark_bytes.odin b/tests/benchmark/bytes/benchmark_bytes.odin
@@ -4,8 +4,21 @@ import "core:bytes"
 import "core:fmt"
 import "core:log"
 import "core:testing"
+import "core:strings"
+import "core:text/table"
 import "core:time"
 
+RUNS_PER_SIZE :: 2500
+
+sizes := [?]int {
+	15, 16, 17,
+	31, 32, 33,
+	256,
+	512,
+	1024,
+	1024 * 1024,
+	// 1024 * 1024 * 1024,
+}
 
 // These are the normal, unoptimized algorithms.
 
@@ -27,17 +40,7 @@ plain_last_index_byte :: proc(s: []u8, c: byte) -> (res: int) #no_bounds_check {
 	return -1
 }
 
-sizes := [?]int {
-	15, 16, 17,
-	31, 32, 33,
-	256,
-	512,
-	1024,
-	1024 * 1024,
-	1024 * 1024 * 1024,
-}
-
-run_trial_size :: proc(p: proc([]u8, byte) -> int, size: int, idx: int, warmup: int, runs: int) -> (timing: time.Duration) {
+run_trial_size :: proc(p: proc([]u8, byte) -> int, size: int, idx: int, runs: int) -> (timing: time.Duration) {
 	data := make([]u8, size)
 	defer delete(data)
 
@@ -48,10 +51,6 @@ run_trial_size :: proc(p: proc([]u8, byte) -> int, size: int, idx: int, warmup:
 
 	accumulator: int
 
-	for _ in 0..<warmup {
-		accumulator += p(data, 'z')
-	}
-
 	for _ in 0..<runs {
 		start := time.now()
 		accumulator += p(data, 'z')
@@ -65,44 +64,51 @@ run_trial_size :: proc(p: proc([]u8, byte) -> int, size: int, idx: int, warmup:
 	return
 }
 
-HOT :: 3
+bench_table :: proc(algo_name: string, forward: bool, plain: proc([]u8, byte) -> int, simd: proc([]u8, byte) -> int) {
+	string_buffer := strings.builder_make()
+	defer strings.builder_destroy(&string_buffer)
 
-@test
-benchmark_plain_index_cold :: proc(t: ^testing.T) {
-	report: string
-	for size in sizes {
-		timing := run_trial_size(plain_index_byte, size, size - 1, 0, 1)
-		report = fmt.tprintf("%s\n        +++ % 8M | %v", report, size, timing)
-		timing = run_trial_size(plain_last_index_byte, size, 0, 0, 1)
-		report = fmt.tprintf("%s\n (last) +++ % 8M | %v", report, size, timing)
-	}
-	log.info(report)
-}
+	tbl: table.Table
+	table.init(&tbl)
+	defer table.destroy(&tbl)
+
+	// table.caption(&tbl, "index_byte benchmark")
+	table.aligned_header_of_values(&tbl, .Right, "Algorithm", "Size", "Iterations", "Scalar", "SIMD", "SIMD Relative (%)", "SIMD Relative (x)")
 
-@test
-benchmark_plain_index_hot :: proc(t: ^testing.T) {
-	report: string
 	for size in sizes {
-		timing := run_trial_size(plain_index_byte, size, size - 1, HOT, HOT)
-		report = fmt.tprintf("%s\n        +++ % 8M | %v", report, size, timing)
-		timing = run_trial_size(plain_last_index_byte, size, 0, HOT, HOT)
-		report = fmt.tprintf("%s\n (last) +++ % 8M | %v", report, size, timing)
+		needle_index := size - 1 if forward else 0
+
+		plain_timing := run_trial_size(plain, size, needle_index, RUNS_PER_SIZE)
+		simd_timing  := run_trial_size(simd,  size, needle_index, RUNS_PER_SIZE)
+
+		_plain := fmt.tprintf("%8M",  plain_timing)
+		_simd  := fmt.tprintf("%8M",  simd_timing)
+		_relp  := fmt.tprintf("%.3f %%", f64(simd_timing) / f64(plain_timing) * 100.0)
+		_relx  := fmt.tprintf("%.3f x",  1 / (f64(simd_timing) / f64(plain_timing)))
+
+		table.aligned_row_of_values(
+			&tbl,
+			.Right,
+			algo_name,
+			size, RUNS_PER_SIZE, _plain, _simd, _relp, _relx)
 	}
-	log.info(report)
+
+	builder_writer := strings.to_writer(&string_buffer)
+
+	fmt.sbprintln(&string_buffer)
+	table.write_plain_table(builder_writer, &tbl)
+
+	my_table_string := strings.to_string(string_buffer)
+	log.info(my_table_string)
 }
 
 @test
-benchmark_simd_index_cold :: proc(t: ^testing.T) {
-	report: string
-	for size in sizes {
-		timing := run_trial_size(bytes.index_byte, size, size - 1, 0, 1)
-		report = fmt.tprintf("%s\n        +++ % 8M | %v", report, size, timing)
-		timing = run_trial_size(bytes.last_index_byte, size, 0, 0, 1)
-		report = fmt.tprintf("%s\n (last) +++ % 8M | %v", report, size, timing)
-	}
-	log.info(report)
+benchmark_index_byte :: proc(t: ^testing.T) {
+	bench_table("index_byte",      true,  plain_index_byte,      bytes.index_byte)
+	// bench_table("last_index_byte", false, plain_last_index_byte, bytes.last_index_byte)
 }
 
+/*
 @test
 benchmark_simd_index_hot :: proc(t: ^testing.T) {
 	report: string
@@ -114,3 +120,4 @@ benchmark_simd_index_hot :: proc(t: ^testing.T) {
 	}
 	log.info(report)
 }
+*/
diff --git a/tests/benchmark/crypto/benchmark_crypto.odin b/tests/benchmark/crypto/benchmark_crypto.odin
@@ -392,7 +392,7 @@ _benchmark_aes256_gcm :: proc(
 	iv: [aes.GCM_IV_SIZE]byte
 	tag: [aes.GCM_TAG_SIZE]byte = ---
 
-	ctx := transmute(^aes.Context_GCM)context.user_ptr
+	ctx := (^aes.Context_GCM)(context.user_ptr)
 
 	for _ in 0 ..= options.rounds {
 		aes.seal_gcm(ctx, buf, tag[:], iv[:], nil, buf)