Skip to content

Commit

Permalink
[mono][interp] Add vectorization for Vector4 (#87822)
Browse files Browse the repository at this point in the history
* [mono][interp] Reuse vectorization for Vector4

Which is pretty much Vector<float>

* [mono][interp] Implement Vector.One for float gparam

* [mono][interp] Extract code to be reused later

* [mono][interp] Intrinsify Vector<T> ctor and Vector4 ctor

The constructors receive each element as an argument. This is the same as `Vector128.Create` so we reuse that code. This ends up generating a MINT_SIMD_V128_I*_CREATE opcode which receives each element value as an argument.

* [mono][interp] Track r4 var value during interpreter optimizations

Since it is common to instantiate Vector4 with constant values, add also support for tracking R4 values during optimizations. If all elements are constant, we generate instead a single MINT_SIMD_V128_LDC, which has the entire vector value embedded in the instruction stream.

* [mono][interp] Fix Vector ctor instrinsic when called explicitly

* [mono][interp] Replace ldloca + stobj.vt pair with mov.vt

* [mono][interp] Return early if vector_klass is not simd type

It is not marked as simd type if the generic param is not valid (ex not primitive type). Future checks are not expecting this scenario and can potentially crash.
  • Loading branch information
BrzVlad committed Jul 5, 2023
1 parent 443b3bb commit 6d74424
Show file tree
Hide file tree
Showing 5 changed files with 142 additions and 30 deletions.
2 changes: 1 addition & 1 deletion src/mono/mono/mini/interp/mintops.h
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ typedef enum {
#define MINT_IS_SIMD_CREATE(op) ((op) >= MINT_SIMD_V128_I1_CREATE && (op) <= MINT_SIMD_V128_I8_CREATE)

// TODO Add more
#define MINT_NO_SIDE_EFFECTS(op) (MINT_IS_MOV (op) || MINT_IS_LDC_I4 (op) || MINT_IS_LDC_I8 (op) || op == MINT_LDPTR || op == MINT_BOX)
#define MINT_NO_SIDE_EFFECTS(op) (MINT_IS_MOV (op) || MINT_IS_LDC_I4 (op) || MINT_IS_LDC_I8 (op) || op == MINT_LDC_R4 || op == MINT_LDC_R8 || op == MINT_LDPTR || op == MINT_BOX)

#define MINT_CALL_ARGS 2
#define MINT_CALL_ARGS_SREG -2
Expand Down
1 change: 1 addition & 0 deletions src/mono/mono/mini/interp/simd-methods.def
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
SIMD_METHOD2(".ctor", ctor)
SIMD_METHOD(get_Count)
SIMD_METHOD(get_AllBitsSet)
SIMD_METHOD(get_IsHardwareAccelerated)
Expand Down
128 changes: 104 additions & 24 deletions src/mono/mono/mini/interp/transform-simd.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,21 @@
#define MSGSTRFIELD1(line) str##line
static const struct msgstr_t {
#define SIMD_METHOD(name) char MSGSTRFIELD(__LINE__) [sizeof (#name)];
#define SIMD_METHOD2(str,name) char MSGSTRFIELD(__LINE__) [sizeof (str)];
#include "simd-methods.def"
#undef SIMD_METHOD
#undef SIMD_METHOD2
} method_names = {
#define SIMD_METHOD(name) #name,
#define SIMD_METHOD2(str,name) str,
#include "simd-methods.def"
#undef SIMD_METHOD
#undef SIMD_METHOD2
};

enum {
#define SIMD_METHOD(name) SN_ ## name = offsetof (struct msgstr_t, MSGSTRFIELD(__LINE__)),
#define SIMD_METHOD2(str,name) SN_ ## name = offsetof (struct msgstr_t, MSGSTRFIELD(__LINE__)),
#include "simd-methods.def"
};

Expand Down Expand Up @@ -91,6 +96,7 @@ static guint16 sri_vector128_t_methods [] = {
};

static guint16 sn_vector_t_methods [] = {
SN_ctor,
SN_get_AllBitsSet,
SN_get_Count,
SN_get_One,
Expand Down Expand Up @@ -157,6 +163,12 @@ emit_common_simd_operations (TransformData *td, int id, int atype, int vector_si
for (int i = 0; i < vector_size / arg_size; i++)
data [i] = 1;
return TRUE;
} else if (atype == MONO_TYPE_R4) {
interp_add_ins (td, MINT_SIMD_V128_LDC);
float *data = (float*)&td->last_ins->data [0];
for (int i = 0; i < vector_size / arg_size; i++)
data [i] = 1.0f;
return TRUE;
}
break;
case SN_get_Zero:
Expand Down Expand Up @@ -310,6 +322,31 @@ emit_common_simd_epilogue (TransformData *td, MonoClass *vector_klass, MonoMetho
td->ip += 5;
}

static void
emit_vector_create (TransformData *td, MonoMethodSignature *csignature, MonoClass *vector_klass, int vector_size)
{
int num_args = csignature->param_count;
if (num_args == 16) interp_add_ins (td, MINT_SIMD_V128_I1_CREATE);
else if (num_args == 8) interp_add_ins (td, MINT_SIMD_V128_I2_CREATE);
else if (num_args == 4) interp_add_ins (td, MINT_SIMD_V128_I4_CREATE);
else if (num_args == 2) interp_add_ins (td, MINT_SIMD_V128_I8_CREATE);
else g_assert_not_reached ();

// We use call args machinery since we have too many args
interp_ins_set_sreg (td->last_ins, MINT_CALL_ARGS_SREG);
int *call_args = (int*)mono_mempool_alloc (td->mempool, (num_args + 1) * sizeof (int));
td->sp -= csignature->param_count;
for (int i = 0; i < num_args; i++)
call_args [i] = td->sp [i].local;
call_args [num_args] = -1;
init_last_ins_call (td);
td->last_ins->info.call_info->call_args = call_args;
if (!td->optimized)
td->last_ins->info.call_info->call_offset = get_tos_offset (td);
push_type_vt (td, vector_klass, vector_size);
interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
}

static gboolean
emit_sri_vector128 (TransformData *td, MonoMethod *cmethod, MonoMethodSignature *csignature)
{
Expand Down Expand Up @@ -352,26 +389,7 @@ emit_sri_vector128 (TransformData *td, MonoMethod *cmethod, MonoMethodSignature
else if (arg_size == 4) simd_intrins = INTERP_SIMD_INTRINSIC_V128_I4_CREATE;
else if (arg_size == 8) simd_intrins = INTERP_SIMD_INTRINSIC_V128_I8_CREATE;
} else if (csignature->param_count == vector_size / arg_size && atype == csignature->params [0]->type) {
int num_args = csignature->param_count;
if (num_args == 16) interp_add_ins (td, MINT_SIMD_V128_I1_CREATE);
else if (num_args == 8) interp_add_ins (td, MINT_SIMD_V128_I2_CREATE);
else if (num_args == 4) interp_add_ins (td, MINT_SIMD_V128_I4_CREATE);
else if (num_args == 2) interp_add_ins (td, MINT_SIMD_V128_I8_CREATE);
else g_assert_not_reached ();

// We use call args machinery since we have too many args
interp_ins_set_sreg (td->last_ins, MINT_CALL_ARGS_SREG);
int *call_args = (int*)mono_mempool_alloc (td->mempool, (num_args + 1) * sizeof (int));
td->sp -= csignature->param_count;
for (int i = 0; i < num_args; i++)
call_args [i] = td->sp [i].local;
call_args [num_args] = -1;
init_last_ins_call (td);
td->last_ins->info.call_info->call_args = call_args;
if (!td->optimized)
td->last_ins->info.call_info->call_offset = get_tos_offset (td);
push_type_vt (td, vector_klass, vector_size);
interp_ins_set_dreg (td->last_ins, td->sp [-1].local);
emit_vector_create (td, csignature, vector_klass, vector_size);
td->ip += 5;
return TRUE;
}
Expand Down Expand Up @@ -507,7 +525,7 @@ emit_sri_vector128_t (TransformData *td, MonoMethod *cmethod, MonoMethodSignatur
}

static gboolean
emit_sn_vector_t (TransformData *td, MonoMethod *cmethod, MonoMethodSignature *csignature)
emit_sn_vector_t (TransformData *td, MonoMethod *cmethod, MonoMethodSignature *csignature, gboolean newobj)
{
int id = lookup_intrins (sn_vector_t_methods, sizeof (sn_vector_t_methods), cmethod);
if (id == -1)
Expand All @@ -518,14 +536,74 @@ emit_sn_vector_t (TransformData *td, MonoMethod *cmethod, MonoMethodSignature *c

// First argument is always vector
MonoClass *vector_klass = cmethod->klass;
if (!m_class_is_simd_type (vector_klass))
return FALSE;

MonoTypeEnum atype;
int vector_size, arg_size, scalar_arg;
if (!get_common_simd_info (vector_klass, csignature, &atype, &vector_size, &arg_size, &scalar_arg))
return FALSE;

if (emit_common_simd_operations (td, id, atype, vector_size, arg_size, scalar_arg, &simd_opcode, &simd_intrins))
if (emit_common_simd_operations (td, id, atype, vector_size, arg_size, scalar_arg, &simd_opcode, &simd_intrins)) {
goto opcode_added;
} else if (id == SN_ctor) {
if (csignature->param_count == vector_size / arg_size && atype == csignature->params [0]->type) {
emit_vector_create (td, csignature, vector_klass, vector_size);
if (!newobj) {
// If the ctor is called explicitly, then we need to store to the passed `this`
interp_emit_stobj (td, vector_klass, FALSE);
td->ip += 5;
}
return TRUE;
}
}

if (simd_opcode == -1 || simd_intrins == -1)
return FALSE;

interp_add_ins (td, simd_opcode);
td->last_ins->data [0] = simd_intrins;

opcode_added:
emit_common_simd_epilogue (td, vector_klass, csignature, vector_size, FALSE);
return TRUE;
}

static gboolean
emit_sn_vector4 (TransformData *td, MonoMethod *cmethod, MonoMethodSignature *csignature, gboolean newobj)
{
int id = lookup_intrins (sn_vector_t_methods, sizeof (sn_vector_t_methods), cmethod);
if (id == -1)
return FALSE;

gint16 simd_opcode = -1;
gint16 simd_intrins = -1;

// First argument is always vector
MonoClass *vector_klass = cmethod->klass;

MonoTypeEnum atype = MONO_TYPE_R4;
int vector_size = SIZEOF_V128;
int arg_size = sizeof (float);
int scalar_arg = -1;
for (int i = 0; i < csignature->param_count; i++) {
if (csignature->params [i]->type != MONO_TYPE_GENERICINST)
scalar_arg = i;
}

if (emit_common_simd_operations (td, id, atype, vector_size, arg_size, scalar_arg, &simd_opcode, &simd_intrins)) {
goto opcode_added;
} else if (id == SN_ctor) {
if (csignature->param_count == vector_size / arg_size && atype == csignature->params [0]->type) {
emit_vector_create (td, csignature, vector_klass, vector_size);
if (!newobj) {
// If the ctor is called explicitly, then we need to store to the passed `this`
interp_emit_stobj (td, vector_klass, FALSE);
td->ip += 5;
}
return TRUE;
}
}

if (simd_opcode == -1 || simd_intrins == -1)
return FALSE;
Expand Down Expand Up @@ -805,7 +883,7 @@ emit_sri_packedsimd (TransformData *td, MonoMethod *cmethod, MonoMethodSignature
}

static gboolean
interp_emit_simd_intrinsics (TransformData *td, MonoMethod *cmethod, MonoMethodSignature *csignature)
interp_emit_simd_intrinsics (TransformData *td, MonoMethod *cmethod, MonoMethodSignature *csignature, gboolean newobj)
{
const char *class_name;
const char *class_ns;
Expand All @@ -824,7 +902,9 @@ interp_emit_simd_intrinsics (TransformData *td, MonoMethod *cmethod, MonoMethodS
return emit_sri_vector128_t (td, cmethod, csignature);
} else if (!strcmp (class_ns, "System.Numerics")) {
if (!strcmp (class_name, "Vector`1"))
return emit_sn_vector_t (td, cmethod, csignature);
return emit_sn_vector_t (td, cmethod, csignature, newobj);
else if (!strcmp (class_name, "Vector4"))
return emit_sn_vector4 (td, cmethod, csignature, newobj);
} else if (!strcmp (class_ns, "System.Runtime.Intrinsics.Wasm")) {
if (!strcmp (class_name, "PackedSimd"))
return emit_sri_packedsimd (td, cmethod, csignature);
Expand Down
35 changes: 32 additions & 3 deletions src/mono/mono/mini/interp/transform.c
Original file line number Diff line number Diff line change
Expand Up @@ -1981,7 +1981,7 @@ interp_handle_intrinsics (TransformData *td, MonoMethod *target_method, MonoClas
const char *klass_name = m_class_get_name (target_method->klass);

#ifdef INTERP_ENABLE_SIMD
if ((mono_interp_opt & INTERP_OPT_SIMD) && interp_emit_simd_intrinsics (td, target_method, csignature))
if ((mono_interp_opt & INTERP_OPT_SIMD) && interp_emit_simd_intrinsics (td, target_method, csignature, FALSE))
return TRUE;
#endif

Expand Down Expand Up @@ -6289,6 +6289,10 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
init_last_ins_call (td);
td->last_ins->info.call_info->call_offset = call_offset;
} else {
#ifdef INTERP_ENABLE_SIMD
if ((mono_interp_opt & INTERP_OPT_SIMD) && interp_emit_simd_intrinsics (td, m, csignature, TRUE))
break;
#endif
td->sp -= csignature->param_count;

// Move params types in temporary buffer
Expand Down Expand Up @@ -9362,7 +9366,7 @@ write_v128_element (gpointer v128_addr, LocalValue *val, int index, int el_size)
switch (el_size) {
case 1: *(gint8*)el_addr = (gint8)val->i; break;
case 2: *(gint16*)el_addr = (gint16)val->i; break;
case 4: *(gint32*)el_addr = val->i; break;
case 4: *(gint32*)el_addr = val->i; break; // this also handles r4
case 8: *(gint64*)el_addr = val->l; break;
default:
g_assert_not_reached ();
Expand All @@ -9379,7 +9383,7 @@ interp_fold_simd_create (TransformData *td, InterpBasicBlock *cbb, LocalValue *l
int var = args [index];
while (var != -1) {
LocalValue *val = &local_defs [var];
if (val->type != LOCAL_VALUE_I4 && val->type != LOCAL_VALUE_I8)
if (val->type != LOCAL_VALUE_I4 && val->type != LOCAL_VALUE_I8 && val->type != LOCAL_VALUE_R4)
return ins;
index++;
var = args [index];
Expand Down Expand Up @@ -9654,6 +9658,11 @@ interp_cprop (TransformData *td)
} else if (MINT_IS_LDC_I8 (opcode)) {
local_defs [dreg].type = LOCAL_VALUE_I8;
local_defs [dreg].l = interp_get_const_from_ldc_i8 (ins);
} else if (opcode == MINT_LDC_R4) {
guint32 val_u = READ32 (&ins->data [0]);
float f = *(float*)(&val_u);
local_defs [dreg].type = LOCAL_VALUE_R4;
local_defs [dreg].f = f;
} else if (ins->opcode == MINT_LDPTR) {
#if SIZEOF_VOID_P == 8
local_defs [dreg].type = LOCAL_VALUE_I8;
Expand Down Expand Up @@ -9824,6 +9833,26 @@ interp_cprop (TransformData *td)
dump_interp_inst (ins, td->data_items);
}
}
} else if (opcode == MINT_STOBJ_VT || opcode == MINT_STOBJ_VT_NOREF) {
InterpInst *ldloca = local_defs [sregs [0]].ins;
if (ldloca != NULL && ldloca->opcode == MINT_LDLOCA_S) {
int stsize = ins->data [0];
int local = ldloca->sregs [0];

if (stsize == td->locals [local].size) {
// Replace LDLOCA + STOBJ_VT with MOV_VT
local_ref_count [sregs [0]]--;
ins->opcode = MINT_MOV_VT;
sregs [0] = sregs [1];
ins->dreg = local;
needs_retry = TRUE;

if (td->verbose_level) {
g_print ("Replace ldloca/stobj_vt pair :\n\t");
dump_interp_inst (ins, td->data_items);
}
}
}
} else if (MINT_IS_STIND (opcode)) {
InterpInst *ldloca = local_defs [sregs [0]].ins;
if (ldloca != NULL && ldloca->opcode == MINT_LDLOCA_S) {
Expand Down
6 changes: 4 additions & 2 deletions src/mono/mono/mini/interp/transform.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@ typedef struct
#define LOCAL_VALUE_LOCAL 1
#define LOCAL_VALUE_I4 2
#define LOCAL_VALUE_I8 3
#define LOCAL_VALUE_NON_NULL 4
#define LOCAL_VALUE_R4 4
#define LOCAL_VALUE_NON_NULL 5

// LocalValue contains data to construct an InterpInst that is equivalent with the contents
// of the stack slot / local / argument.
Expand All @@ -62,6 +63,7 @@ typedef struct {
int local;
gint32 i;
gint64 l;
float f;
};
// The instruction that writes this local.
InterpInst *ins;
Expand Down Expand Up @@ -381,6 +383,6 @@ mono_interp_print_td_code (TransformData *td);

/* Forward definitions for simd methods */
static gboolean
interp_emit_simd_intrinsics (TransformData *td, MonoMethod *cmethod, MonoMethodSignature *csignature);
interp_emit_simd_intrinsics (TransformData *td, MonoMethod *cmethod, MonoMethodSignature *csignature, gboolean newobj);

#endif /* __MONO_MINI_INTERP_TRANSFORM_H__ */

0 comments on commit 6d74424

Please sign in to comment.