From a238941215c45637abfd9b4ff6b632e39ab45eaa Mon Sep 17 00:00:00 2001 From: Katelyn Gadd Date: Mon, 9 Sep 2024 10:36:37 -0700 Subject: [PATCH 1/2] Cleanup some fixmes in the jiterpreter --- .../runtime/jiterpreter-trace-generator.ts | 17 ++++++++--------- src/mono/browser/runtime/jiterpreter.ts | 13 ------------- 2 files changed, 8 insertions(+), 22 deletions(-) diff --git a/src/mono/browser/runtime/jiterpreter-trace-generator.ts b/src/mono/browser/runtime/jiterpreter-trace-generator.ts index 8d88b2ff98e42..6b05d6f41b011 100644 --- a/src/mono/browser/runtime/jiterpreter-trace-generator.ts +++ b/src/mono/browser/runtime/jiterpreter-trace-generator.ts @@ -449,10 +449,9 @@ export function generateWasmBody ( if (pruneOpcodes) { // We emit an unreachable opcode so that if execution somehow reaches a pruned opcode, we will abort // This should be impossible anyway but it's also useful to have pruning visible in the wasm - // FIXME: Ideally we would stop generating opcodes after the first unreachable, but that causes v8 to hang if (!hasEmittedUnreachable) builder.appendU8(WasmOpcode.unreachable); - // Each unreachable opcode could generate a bunch of native code in a bad wasm jit so generate nops after it + // Don't generate multiple unreachable opcodes in a row hasEmittedUnreachable = true; } break; @@ -875,9 +874,8 @@ export function generateWasmBody ( } case MintOpcode.MINT_LD_DELEGATE_METHOD_PTR: { - // FIXME: ldloca invalidation size - append_ldloca(builder, getArgU16(ip, 1), 8); - append_ldloca(builder, getArgU16(ip, 2), 8); + append_ldloca(builder, getArgU16(ip, 1), 4); + append_ldloca(builder, getArgU16(ip, 2), 4); builder.callImport("ld_del_ptr"); break; } @@ -1383,7 +1381,9 @@ export function generateWasmBody ( (builder.callHandlerReturnAddresses.length <= maxCallHandlerReturnAddresses) ) { // mono_log_info(`endfinally @0x${(ip).toString(16)}. return addresses:`, builder.callHandlerReturnAddresses.map(ra => (ra).toString(16))); - // FIXME: Clean this codegen up + // FIXME: Replace this with a chain of selects to more efficiently map from RA -> index, then + // a single jump table at the end to jump to the right place. This will generate much smaller + // code and be able to handle a larger number of return addresses. // Load ret_ip const clauseIndex = getArgU16(ip, 1), clauseDataOffset = get_imethod_clause_data_offset(frame, clauseIndex); @@ -2476,7 +2476,8 @@ function emit_sfieldop ( builder.ptr_const(pStaticData); // src append_ldloca(builder, localOffset, 0); - // FIXME: Use mono_gc_wbarrier_set_field_internal + // We don't need to use gc_wbarrier_set_field_internal here because there's no object + // reference, interp does a raw write builder.callImport("copy_ptr"); return true; case MintOpcode.MINT_LDSFLD_VT: { @@ -2903,7 +2904,6 @@ function emit_branch ( ); cwraps.mono_jiterp_boost_back_branch_target(destination); - // FIXME: Should there be a safepoint here? append_bailout(builder, destination, BailoutReason.BackwardBranch); modifyCounter(JiterpCounter.BackBranchesNotEmitted, 1); return true; @@ -4036,7 +4036,6 @@ function emit_atomics ( if (!builder.options.enableAtomics) return false; - // FIXME: memory barrier might be worthwhile to implement // FIXME: We could probably unify most of the xchg/cmpxchg implementation into one implementation const xchg = xchgTable[opcode]; diff --git a/src/mono/browser/runtime/jiterpreter.ts b/src/mono/browser/runtime/jiterpreter.ts index 7aab49e6bf266..996e7ed34f9d1 100644 --- a/src/mono/browser/runtime/jiterpreter.ts +++ b/src/mono/browser/runtime/jiterpreter.ts @@ -851,18 +851,6 @@ function generate_wasm ( // Get the exported trace function const fn = traceInstance.exports[traceName]; - // FIXME: Before threading can be supported, we will need to ensure that - // once we assign a function pointer index to a given trace, the trace is - // broadcast to all the JS workers and compiled + installed at the appropriate - // index in every worker's function pointer table. This also means that we - // would need to fill empty slots with a dummy function when growing the table - // so that any erroneous ENTERs will skip the opcode instead of crashing due - // to calling a null function pointer. - // Table grow operations will need to be synchronized between workers somehow, - // probably by storing the table size in a volatile global or something so that - // we know the range of indexes available to us and can ensure that threads - // independently jitting traces will not stomp on each other and all threads - // have a globally consistent view of which function pointer maps to each trace. rejected = false; let idx: number; @@ -995,7 +983,6 @@ export function mono_interp_tier_prepare_jiterpreter ( if (!mostRecentOptions) mostRecentOptions = getOptions(); - // FIXME: We shouldn't need this check if (!mostRecentOptions.enableTraces) return JITERPRETER_NOT_JITTED; else if (mostRecentOptions.wasmBytesLimit <= getCounter(JiterpCounter.BytesGenerated)) From 250b793b698e8d321bf58a410d513c77f578600d Mon Sep 17 00:00:00 2001 From: Katelyn Gadd Date: Mon, 9 Sep 2024 11:04:10 -0700 Subject: [PATCH 2/2] Flow through size of the var in MINT_LDLOCA_S so jiterpreter can do accurate invalidation --- .../browser/runtime/jiterpreter-trace-generator.ts | 13 ++++++------- src/mono/mono/mini/interp/interp.c | 3 ++- src/mono/mono/mini/interp/mintops.def | 8 ++++++-- src/mono/mono/mini/interp/transform.c | 6 ++++++ 4 files changed, 20 insertions(+), 10 deletions(-) diff --git a/src/mono/browser/runtime/jiterpreter-trace-generator.ts b/src/mono/browser/runtime/jiterpreter-trace-generator.ts index 6b05d6f41b011..fb36b03e2cdfe 100644 --- a/src/mono/browser/runtime/jiterpreter-trace-generator.ts +++ b/src/mono/browser/runtime/jiterpreter-trace-generator.ts @@ -466,7 +466,7 @@ export function generateWasmBody ( } case MintOpcode.MINT_LOCALLOC: { // dest - append_ldloca(builder, getArgU16(ip, 1)); + append_ldloca(builder, getArgU16(ip, 1), 0); // len append_ldloc(builder, getArgU16(ip, 2), WasmOpcode.i32_load); // frame @@ -647,10 +647,12 @@ export function generateWasmBody ( // locals[ip[1]] = &locals[ip[2]] const offset = getArgU16(ip, 2), flag = isAddressTaken(builder, offset), - destOffset = getArgU16(ip, 1); + destOffset = getArgU16(ip, 1), + // Size value stored for us by emit_compacted_instruction so we can do invalidation + size = getArgU16(ip, 3); if (!flag) mono_log_error(`${traceName}: Expected local ${offset} to have address taken flag`); - append_ldloca(builder, offset); + append_ldloca(builder, offset, size); append_stloc_tail(builder, destOffset, WasmOpcode.i32_store); // Record this ldloca as a known constant so that later uses of it turn into a lea, // and the wasm runtime can constant fold them with other constants. It's not uncommon @@ -1972,10 +1974,7 @@ function append_stloc_tail (builder: WasmBuilder, offset: number, opcodeOrPrefix // Pass bytesInvalidated=0 if you are reading from the local and the address will never be // used for writes -function append_ldloca (builder: WasmBuilder, localOffset: number, bytesInvalidated?: number) { - if (typeof (bytesInvalidated) !== "number") - bytesInvalidated = 512; - // FIXME: We need to know how big this variable is so we can invalidate the whole space it occupies +function append_ldloca (builder: WasmBuilder, localOffset: number, bytesInvalidated: number) { if (bytesInvalidated > 0) invalidate_local_range(localOffset, bytesInvalidated); builder.lea("pLocals", localOffset); diff --git a/src/mono/mono/mini/interp/interp.c b/src/mono/mono/mini/interp/interp.c index 12f2cdd50350e..1dda6c2164891 100644 --- a/src/mono/mono/mini/interp/interp.c +++ b/src/mono/mono/mini/interp/interp.c @@ -7657,7 +7657,8 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK; MINT_IN_CASE(MINT_LDLOCA_S) LOCAL_VAR (ip [1], gpointer) = locals + ip [2]; - ip += 3; + // ip[3] reserved for size data for jiterpreter + ip += 4; MINT_IN_BREAK; #define MOV(argtype1,argtype2) \ diff --git a/src/mono/mono/mini/interp/mintops.def b/src/mono/mono/mini/interp/mintops.def index 11e65fb97ade6..5e2c41d60c47f 100644 --- a/src/mono/mono/mini/interp/mintops.def +++ b/src/mono/mono/mini/interp/mintops.def @@ -128,7 +128,11 @@ OPDEF(MINT_MOV_8_2, "mov.8.2", 5, 0, 0, MintOpPair2) OPDEF(MINT_MOV_8_3, "mov.8.3", 7, 0, 0, MintOpPair3) OPDEF(MINT_MOV_8_4, "mov.8.4", 9, 0, 0, MintOpPair4) -OPDEF(MINT_LDLOCA_S, "ldloca.s", 3, 1, 0, MintOpUShortInt) +// NOTE: We reserve an extra ushort at the end of this specifically to communicate information +// to the jiterpreter about how large the local is so that invalidation can be correct. +// FIXME: genmintops.py is way too simple to handle this having a different size on different targets, +// so it's got a size of 4 everywhere. +OPDEF(MINT_LDLOCA_S, "ldloca.s", 4, 1, 0, MintOpUShortInt) OPDEF(MINT_LDIND_I1, "ldind.i1", 3, 1, 1, MintOpNoArgs) OPDEF(MINT_LDIND_U1, "ldind.u1", 3, 1, 1, MintOpNoArgs) @@ -838,7 +842,7 @@ OPDEF(MINT_INTRINS_WIDEN_ASCII_TO_UTF16, "intrins_widen_ascii_to_utf16", 5, 1, 3 OPDEF(MINT_METADATA_UPDATE_LDFLDA, "metadata_update.ldflda", 5, 1, 1, MintOpTwoShorts) // This ifdef is fine because genmintops.py is generating output for HOST_BROWSER -#if HOST_BROWSER +#ifdef HOST_BROWSER OPDEF(MINT_TIER_PREPARE_JITERPRETER, "tier_prepare_jiterpreter", 4, 0, 0, MintOpShortAndInt) OPDEF(MINT_TIER_NOP_JITERPRETER, "tier_nop_jiterpreter", 4, 0, 0, MintOpShortAndInt) OPDEF(MINT_TIER_ENTER_JITERPRETER, "tier_enter_jiterpreter", 4, 0, 0, MintOpShortAndInt) diff --git a/src/mono/mono/mini/interp/transform.c b/src/mono/mono/mini/interp/transform.c index 4b1865b8310b5..22c5518a82c04 100644 --- a/src/mono/mono/mini/interp/transform.c +++ b/src/mono/mono/mini/interp/transform.c @@ -9241,6 +9241,12 @@ emit_compacted_instruction (TransformData *td, guint16* start_ip, InterpInst *in } else if (opcode == MINT_LDLOCA_S) { // This opcode receives a local but it is not viewed as a sreg since we don't load the value *ip++ = GINT_TO_UINT16 (get_var_offset (td, ins->sregs [0])); + +#if HOST_BROWSER + // We reserve an extra 2 bytes at the end of ldloca_s so the jiterpreter knows how large + // the var is when taking its address so that it can invalidate a properly sized range. + *ip++ = GINT_TO_UINT16 (td->vars [ins->sregs [0]].size); +#endif } int left = interp_get_ins_length (ins) - GPTRDIFF_TO_INT(ip - start_ip);