From e5d75cd6079d26f306690e1164faf22df7fa8163 Mon Sep 17 00:00:00 2001 From: Katelyn Gadd Date: Tue, 23 Apr 2024 17:07:53 -0700 Subject: [PATCH 1/4] Optimize bundled_resources key creation, hashing, and comparison --- .../metadata/bundled-resources-internals.h | 1 + src/mono/mono/metadata/bundled-resources.c | 116 +++++++++++------- 2 files changed, 76 insertions(+), 41 deletions(-) diff --git a/src/mono/mono/metadata/bundled-resources-internals.h b/src/mono/mono/metadata/bundled-resources-internals.h index 1a8c03f4fb4e7..34e911ba7a1eb 100644 --- a/src/mono/mono/metadata/bundled-resources-internals.h +++ b/src/mono/mono/metadata/bundled-resources-internals.h @@ -17,6 +17,7 @@ typedef enum { typedef void (*free_bundled_resource_func)(void *, void*); +// WARNING: The layout of these structs cannot change because EmitBundleBase.cs depends on it! typedef struct _MonoBundledResource { MonoBundledResourceType type; const char *id; diff --git a/src/mono/mono/metadata/bundled-resources.c b/src/mono/mono/metadata/bundled-resources.c index 9eae4f5db8fa8..ef824db6d766c 100644 --- a/src/mono/mono/metadata/bundled-resources.c +++ b/src/mono/mono/metadata/bundled-resources.c @@ -8,8 +8,11 @@ #include #include #include +#include "../native/containers/dn-simdhash-specializations.h" +#include "../native/containers/dn-simdhash-utils.h" -static GHashTable *bundled_resources = NULL; +static dn_simdhash_ght_t *bundled_resources = NULL; +static dn_simdhash_ptr_ptr_t *bundled_resource_key_lookup_table = NULL; static bool bundled_resources_contains_assemblies = false; static bool bundled_resources_contains_satellite_assemblies = false; @@ -31,8 +34,10 @@ mono_bundled_resources_free (void) { g_assert (mono_runtime_is_shutting_down ()); - g_hash_table_destroy (bundled_resources); + dn_simdhash_free (bundled_resources); + dn_simdhash_free (bundled_resource_key_lookup_table); bundled_resources = NULL; + bundled_resource_key_lookup_table = NULL; bundled_resources_contains_assemblies = false; bundled_resources_contains_satellite_assemblies = false; @@ -50,6 +55,12 @@ bundled_resources_value_destroy_func (void *resource) MonoBundledResource *value = (MonoBundledResource *)resource; if (value->free_func) value->free_func (resource, value->free_data); + + char *key; + if (dn_simdhash_ptr_ptr_try_get_value (bundled_resource_key_lookup_table, (void *)value->id, (void **)&key)) { + dn_simdhash_ptr_ptr_try_remove (bundled_resource_key_lookup_table, (void *)value->id); + g_free (key); + } } static bool @@ -62,48 +73,58 @@ bundled_resources_is_known_assembly_extension (const char *ext) #endif } -static gboolean -bundled_resources_resource_id_equal (const char *id_one, const char *id_two) +// strrchr calls strlen, so we need to do a search with known length instead +// for some reason memrchr is defined in a header but the build fails when we try to use it +static const char * +g_memrchr (const char *s, char c, size_t n) { - const char *extension_one = strrchr (id_one, '.'); - const char *extension_two = strrchr (id_two, '.'); - if (extension_one && extension_two && bundled_resources_is_known_assembly_extension (extension_one) && bundled_resources_is_known_assembly_extension (extension_two)) { - size_t len_one = extension_one - id_one; - size_t len_two = extension_two - id_two; - return (len_one == len_two) && !strncmp (id_one, id_two, len_one); - } - - return !strcmp (id_one, id_two); + while (n--) + if (s[n] == c) + return (void *)(s + n); + return NULL; } -static guint -bundled_resources_resource_id_hash (const char *id) +// If a bundled resource has a known assembly extension, we strip the extension from its name +// This ensures that lookups for foo.dll will work even if the assembly is in a webcil container +static char * +key_from_id (const char *id, char *buffer, guint buffer_len) { - const char *current = id; - const char *extension = NULL; - guint previous_hash = 0; - guint hash = 0; - - while (*current) { - hash = (hash << 5) - (hash + *current); - if (*current == '.') { - extension = current; - previous_hash = hash; - } - current++; + size_t id_length = strlen (id), + extension_offset = -1; + const char *extension = g_memrchr (id, '.', id_length); + if (extension) + extension_offset = extension - id; + if (!buffer) { + buffer_len = (guint)(id_length + 1); + buffer = g_malloc (buffer_len); } + buffer[0] = 0; - // alias all extensions to .dll - if (extension && bundled_resources_is_known_assembly_extension (extension)) { - hash = previous_hash; - hash = (hash << 5) - (hash + 'd'); - hash = (hash << 5) - (hash + 'l'); - hash = (hash << 5) - (hash + 'l'); - } + if (extension_offset && bundled_resources_is_known_assembly_extension (extension)) + g_strlcpy(buffer, id, MIN(buffer_len, extension_offset + 1)); + else + g_strlcpy(buffer, id, MIN(buffer_len, id_length + 1)); + + return buffer; +} + +static gboolean +bundled_resources_resource_id_equal (const char *key_one, const char *key_two) +{ + return strcmp (key_one, key_two) == 0; +} - return hash; +static guint32 +bundled_resources_resource_id_hash (const char *key) +{ + // FIXME: Seed + // FIXME: We should cache the hash code so rehashes are cheaper + return MurmurHash3_32_streaming ((const uint8_t *)key, 0); } +static MonoBundledResource * +bundled_resources_get (const char *id); + //--------------------------------------------------------------------------------------- // // mono_bundled_resources_add handles bundling of many types of resources to circumvent @@ -130,7 +151,11 @@ mono_bundled_resources_add (MonoBundledResource **resources_to_bundle, uint32_t g_assert (!domain); if (!bundled_resources) - bundled_resources = g_hash_table_new_full ((GHashFunc)bundled_resources_resource_id_hash, (GEqualFunc)bundled_resources_resource_id_equal, NULL, bundled_resources_value_destroy_func); + // FIXME: Choose a good initial capacity to avoid rehashes during startup. I picked one at random + bundled_resources = dn_simdhash_ght_new_full ((GHashFunc)bundled_resources_resource_id_hash, (GEqualFunc)bundled_resources_resource_id_equal, NULL, bundled_resources_value_destroy_func, 2048, NULL); + + if (!bundled_resource_key_lookup_table) + bundled_resource_key_lookup_table = dn_simdhash_ptr_ptr_new (2048, NULL); bool assemblyAdded = false; bool satelliteAssemblyAdded = false; @@ -143,7 +168,13 @@ mono_bundled_resources_add (MonoBundledResource **resources_to_bundle, uint32_t if (resource_to_bundle->type == MONO_BUNDLED_SATELLITE_ASSEMBLY) satelliteAssemblyAdded = true; - g_hash_table_insert (bundled_resources, (gpointer) resource_to_bundle->id, resource_to_bundle); + // Generate the hash key for the id (strip certain extensions) and store it + // so that we can free it later when freeing the bundled data + char *key = key_from_id (resource_to_bundle->id, NULL, 0); + dn_simdhash_ptr_ptr_try_add (bundled_resource_key_lookup_table, (void *)resource_to_bundle->id, key); + + g_assert (dn_simdhash_ght_try_add (bundled_resources, (gpointer) key, resource_to_bundle)); + // g_assert (bundled_resources_get (resource_to_bundle->id) == resource_to_bundle); } if (assemblyAdded) @@ -172,7 +203,12 @@ bundled_resources_get (const char *id) if (!bundled_resources) return NULL; - return g_hash_table_lookup (bundled_resources, id); + char key_buffer[1024]; + key_from_id(id, key_buffer, 1024); + + MonoBundledResource *result = NULL; + dn_simdhash_ght_try_get_value (bundled_resources, key_buffer, (void **)&result); + return result; } //--------------------------------------------------------------------------------------- @@ -364,9 +400,7 @@ bool mono_bundled_resources_get_data_resource_values (const char *id, const uint8_t **data_out, uint32_t *size_out) { MonoBundledDataResource *bundled_data_resource = bundled_resources_get_data_resource (id); - if (!bundled_data_resource || - !bundled_data_resource->data.data || - bundled_data_resource->data.size == 0) + if (!bundled_data_resource || !bundled_data_resource->data.data) return false; if (data_out) From ae6371d2524f3e359c10ebce6b80b361959fd30a Mon Sep 17 00:00:00 2001 From: Katelyn Gadd Date: Wed, 24 Apr 2024 14:57:35 -0700 Subject: [PATCH 2/4] Try different resource key normalization because I'm stumped --- src/mono/mono/metadata/bundled-resources.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/mono/mono/metadata/bundled-resources.c b/src/mono/mono/metadata/bundled-resources.c index ef824db6d766c..6d4d287b1971b 100644 --- a/src/mono/mono/metadata/bundled-resources.c +++ b/src/mono/mono/metadata/bundled-resources.c @@ -95,15 +95,21 @@ key_from_id (const char *id, char *buffer, guint buffer_len) if (extension) extension_offset = extension - id; if (!buffer) { - buffer_len = (guint)(id_length + 1); + // Add space for .dll and null terminator + buffer_len = (guint)(id_length + 6); buffer = g_malloc (buffer_len); } buffer[0] = 0; - if (extension_offset && bundled_resources_is_known_assembly_extension (extension)) - g_strlcpy(buffer, id, MIN(buffer_len, extension_offset + 1)); - else - g_strlcpy(buffer, id, MIN(buffer_len, id_length + 1)); + if (extension_offset && bundled_resources_is_known_assembly_extension (extension)) { + // Subtract from buffer_len to make sure we have space for .dll + g_strlcpy (buffer, id, MIN(buffer_len - 4, extension_offset + 2)); + strcat (buffer, "dll"); + } else { + g_strlcpy (buffer, id, MIN(buffer_len, id_length + 1)); + } + + g_print ("key_from_id('%s') == '%s'\n", id, buffer); return buffer; } From 3f6a0a61bd7018b56bc91017b8494f284c1ffdb8 Mon Sep 17 00:00:00 2001 From: Katelyn Gadd Date: Wed, 24 Apr 2024 21:42:58 -0700 Subject: [PATCH 3/4] Remove debug printf --- src/mono/mono/metadata/bundled-resources.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/mono/mono/metadata/bundled-resources.c b/src/mono/mono/metadata/bundled-resources.c index 6d4d287b1971b..fd9bd495194d8 100644 --- a/src/mono/mono/metadata/bundled-resources.c +++ b/src/mono/mono/metadata/bundled-resources.c @@ -109,8 +109,6 @@ key_from_id (const char *id, char *buffer, guint buffer_len) g_strlcpy (buffer, id, MIN(buffer_len, id_length + 1)); } - g_print ("key_from_id('%s') == '%s'\n", id, buffer); - return buffer; } From a78577a603b47988bca1fed3c12118a779de63f5 Mon Sep 17 00:00:00 2001 From: Katelyn Gadd Date: Mon, 13 May 2024 09:30:32 -0700 Subject: [PATCH 4/4] Address PR feedback --- src/mono/mono/eglib/eglib-remap.h | 1 + src/mono/mono/eglib/glib.h | 1 + src/mono/mono/eglib/gstr.c | 9 +++++++++ src/mono/mono/metadata/bundled-resources.c | 13 +------------ 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/mono/mono/eglib/eglib-remap.h b/src/mono/mono/eglib/eglib-remap.h index 5fc770dfa9732..3f42bd8308afc 100644 --- a/src/mono/mono/eglib/eglib-remap.h +++ b/src/mono/mono/eglib/eglib-remap.h @@ -98,6 +98,7 @@ #define g_log_set_fatal_mask monoeg_g_log_set_fatal_mask #define g_logv monoeg_g_logv #define g_memdup monoeg_g_memdup +#define g_memrchr monoeg_g_memrchr #define g_mem_set_vtable monoeg_g_mem_set_vtable #define g_mem_get_vtable monoeg_g_mem_get_vtable #define g_mkdtemp monoeg_g_mkdtemp diff --git a/src/mono/mono/eglib/glib.h b/src/mono/mono/eglib/glib.h index 7957b6949c130..ae0dd994db235 100644 --- a/src/mono/mono/eglib/glib.h +++ b/src/mono/mono/eglib/glib.h @@ -375,6 +375,7 @@ gchar *g_strchug (gchar *str); gchar *g_strchomp (gchar *str); gchar *g_strnfill (gsize length, gchar fill_char); gsize g_strnlen (const char*, gsize); +const gchar *g_memrchr (const char *s, char c, size_t n); void g_strdelimit (char *string, char delimiter, char new_delimiter); diff --git a/src/mono/mono/eglib/gstr.c b/src/mono/mono/eglib/gstr.c index 6ae0eb6b668ba..557977cab0d69 100644 --- a/src/mono/mono/eglib/gstr.c +++ b/src/mono/mono/eglib/gstr.c @@ -783,3 +783,12 @@ g_strnlen (const char* s, gsize n) for (i = 0; i < n && s [i]; ++i) ; return i; } + +const gchar * +g_memrchr (const char *s, char c, size_t n) +{ + while (n--) + if (s[n] == c) + return (void *)(s + n); + return NULL; +} diff --git a/src/mono/mono/metadata/bundled-resources.c b/src/mono/mono/metadata/bundled-resources.c index fd9bd495194d8..7b2f80ef3edfd 100644 --- a/src/mono/mono/metadata/bundled-resources.c +++ b/src/mono/mono/metadata/bundled-resources.c @@ -73,17 +73,6 @@ bundled_resources_is_known_assembly_extension (const char *ext) #endif } -// strrchr calls strlen, so we need to do a search with known length instead -// for some reason memrchr is defined in a header but the build fails when we try to use it -static const char * -g_memrchr (const char *s, char c, size_t n) -{ - while (n--) - if (s[n] == c) - return (void *)(s + n); - return NULL; -} - // If a bundled resource has a known assembly extension, we strip the extension from its name // This ensures that lookups for foo.dll will work even if the assembly is in a webcil container static char * @@ -208,7 +197,7 @@ bundled_resources_get (const char *id) return NULL; char key_buffer[1024]; - key_from_id(id, key_buffer, 1024); + key_from_id(id, key_buffer, sizeof(key_buffer)); MonoBundledResource *result = NULL; dn_simdhash_ght_try_get_value (bundled_resources, key_buffer, (void **)&result);