Skip to content

Commit

Permalink
Optimize bundled_resources key creation, hashing, and comparison
Browse files Browse the repository at this point in the history
  • Loading branch information
kg committed Apr 24, 2024
1 parent b5ee986 commit 0c58523
Show file tree
Hide file tree
Showing 3 changed files with 79 additions and 42 deletions.
4 changes: 3 additions & 1 deletion src/mono/mono/metadata/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,9 @@ set(imported_native_sources
../../../native/containers/dn-simdhash.c
../../../native/containers/dn-simdhash-string-ptr.c
../../../native/containers/dn-simdhash-u32-ptr.c
../../../native/containers/dn-simdhash-ptrpair-ptr.c)
../../../native/containers/dn-simdhash-ptrpair-ptr.c
../../../native/containers/dn-simdhash-ptr-ptr.c
../../../native/containers/dn-simdhash-ght-compatible.c)

set(metadata_common_sources
appdomain.c
Expand Down
1 change: 1 addition & 0 deletions src/mono/mono/metadata/bundled-resources-internals.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ typedef enum {

typedef void (*free_bundled_resource_func)(void *, void*);

// WARNING: The layout of these structs cannot change because EmitBundleBase.cs depends on it!
typedef struct _MonoBundledResource {
MonoBundledResourceType type;
const char *id;
Expand Down
116 changes: 75 additions & 41 deletions src/mono/mono/metadata/bundled-resources.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,11 @@
#include <mono/metadata/appdomain.h>
#include <mono/metadata/bundled-resources-internals.h>
#include <mono/metadata/webcil-loader.h>
#include "../native/containers/dn-simdhash-specializations.h"
#include "../native/containers/dn-simdhash-utils.h"

static GHashTable *bundled_resources = NULL;
static dn_simdhash_ght_t *bundled_resources = NULL;
static dn_simdhash_ptr_ptr_t *bundled_resource_key_lookup_table = NULL;
static bool bundled_resources_contains_assemblies = false;
static bool bundled_resources_contains_satellite_assemblies = false;

Expand All @@ -31,8 +34,10 @@ mono_bundled_resources_free (void)
{
g_assert (mono_runtime_is_shutting_down ());

g_hash_table_destroy (bundled_resources);
dn_simdhash_free (bundled_resources);
dn_simdhash_free (bundled_resource_key_lookup_table);
bundled_resources = NULL;
bundled_resource_key_lookup_table = NULL;

bundled_resources_contains_assemblies = false;
bundled_resources_contains_satellite_assemblies = false;
Expand All @@ -50,6 +55,12 @@ bundled_resources_value_destroy_func (void *resource)
MonoBundledResource *value = (MonoBundledResource *)resource;
if (value->free_func)
value->free_func (resource, value->free_data);

char *key;
if (dn_simdhash_ptr_ptr_try_get_value (bundled_resource_key_lookup_table, (void *)value->id, (void **)&key)) {
dn_simdhash_ptr_ptr_try_remove (bundled_resource_key_lookup_table, (void *)value->id);
g_free (key);
}
}

static bool
Expand All @@ -62,48 +73,58 @@ bundled_resources_is_known_assembly_extension (const char *ext)
#endif
}

static gboolean
bundled_resources_resource_id_equal (const char *id_one, const char *id_two)
// strrchr calls strlen, so we need to do a search with known length instead
// for some reason memrchr is defined in a header but the build fails when we try to use it
static const char *
g_memrchr (const char *s, char c, size_t n)
{
const char *extension_one = strrchr (id_one, '.');
const char *extension_two = strrchr (id_two, '.');
if (extension_one && extension_two && bundled_resources_is_known_assembly_extension (extension_one) && bundled_resources_is_known_assembly_extension (extension_two)) {
size_t len_one = extension_one - id_one;
size_t len_two = extension_two - id_two;
return (len_one == len_two) && !strncmp (id_one, id_two, len_one);
}

return !strcmp (id_one, id_two);
while (n--)
if (s[n] == c)
return (void *)(s + n);
return NULL;
}

static guint
bundled_resources_resource_id_hash (const char *id)
// If a bundled resource has a known assembly extension, we strip the extension from its name
// This ensures that lookups for foo.dll will work even if the assembly is in a webcil container
static char *
key_from_id (const char *id, char *buffer, guint buffer_len)
{
const char *current = id;
const char *extension = NULL;
guint previous_hash = 0;
guint hash = 0;

while (*current) {
hash = (hash << 5) - (hash + *current);
if (*current == '.') {
extension = current;
previous_hash = hash;
}
current++;
size_t id_length = strlen (id),
extension_offset = -1;
const char *extension = g_memrchr (id, '.', id_length);
if (extension)
extension_offset = extension - id;
if (!buffer) {
buffer_len = (guint)(id_length + 1);
buffer = g_malloc (buffer_len);
}
buffer[0] = 0;

// alias all extensions to .dll
if (extension && bundled_resources_is_known_assembly_extension (extension)) {
hash = previous_hash;
hash = (hash << 5) - (hash + 'd');
hash = (hash << 5) - (hash + 'l');
hash = (hash << 5) - (hash + 'l');
}
if (extension_offset && bundled_resources_is_known_assembly_extension (extension))
g_strlcpy(buffer, id, MIN(buffer_len, extension_offset + 1));
else
g_strlcpy(buffer, id, MIN(buffer_len, id_length + 1));

return buffer;
}

static gboolean
bundled_resources_resource_id_equal (const char *key_one, const char *key_two)
{
return strcmp (key_one, key_two) == 0;
}

return hash;
static guint32
bundled_resources_resource_id_hash (const char *key)
{
// FIXME: Seed
// FIXME: We should cache the hash code so rehashes are cheaper
return MurmurHash3_32_streaming ((const uint8_t *)key, 0);
}

static MonoBundledResource *
bundled_resources_get (const char *id);

//---------------------------------------------------------------------------------------
//
// mono_bundled_resources_add handles bundling of many types of resources to circumvent
Expand All @@ -130,7 +151,11 @@ mono_bundled_resources_add (MonoBundledResource **resources_to_bundle, uint32_t
g_assert (!domain);

if (!bundled_resources)
bundled_resources = g_hash_table_new_full ((GHashFunc)bundled_resources_resource_id_hash, (GEqualFunc)bundled_resources_resource_id_equal, NULL, bundled_resources_value_destroy_func);
// FIXME: Choose a good initial capacity to avoid rehashes during startup. I picked one at random
bundled_resources = dn_simdhash_ght_new_full ((GHashFunc)bundled_resources_resource_id_hash, (GEqualFunc)bundled_resources_resource_id_equal, NULL, bundled_resources_value_destroy_func, 2048, NULL);

if (!bundled_resource_key_lookup_table)
bundled_resource_key_lookup_table = dn_simdhash_ptr_ptr_new (2048, NULL);

bool assemblyAdded = false;
bool satelliteAssemblyAdded = false;
Expand All @@ -143,7 +168,13 @@ mono_bundled_resources_add (MonoBundledResource **resources_to_bundle, uint32_t
if (resource_to_bundle->type == MONO_BUNDLED_SATELLITE_ASSEMBLY)
satelliteAssemblyAdded = true;

g_hash_table_insert (bundled_resources, (gpointer) resource_to_bundle->id, resource_to_bundle);
// Generate the hash key for the id (strip certain extensions) and store it
// so that we can free it later when freeing the bundled data
char *key = key_from_id (resource_to_bundle->id, NULL, 0);
dn_simdhash_ptr_ptr_try_add (bundled_resource_key_lookup_table, (void *)resource_to_bundle->id, key);

g_assert (dn_simdhash_ght_try_add (bundled_resources, (gpointer) key, resource_to_bundle));
// g_assert (bundled_resources_get (resource_to_bundle->id) == resource_to_bundle);
}

if (assemblyAdded)
Expand Down Expand Up @@ -172,7 +203,12 @@ bundled_resources_get (const char *id)
if (!bundled_resources)
return NULL;

return g_hash_table_lookup (bundled_resources, id);
char key_buffer[1024];
key_from_id(id, key_buffer, 1024);

MonoBundledResource *result = NULL;
dn_simdhash_ght_try_get_value (bundled_resources, key_buffer, (void **)&result);
return result;
}

//---------------------------------------------------------------------------------------
Expand Down Expand Up @@ -364,9 +400,7 @@ bool
mono_bundled_resources_get_data_resource_values (const char *id, const uint8_t **data_out, uint32_t *size_out)
{
MonoBundledDataResource *bundled_data_resource = bundled_resources_get_data_resource (id);
if (!bundled_data_resource ||
!bundled_data_resource->data.data ||
bundled_data_resource->data.size == 0)
if (!bundled_data_resource || !bundled_data_resource->data.data)
return false;

if (data_out)
Expand Down

0 comments on commit 0c58523

Please sign in to comment.