Skip to content

Commit

Permalink
ename Emscripten EHSjLj functions in wasm backend
Browse files Browse the repository at this point in the history
Now that we rename invoke wrappers and `emscripten_longjmp_jmpbuf` in
the wasm backend, this deletes all related renaming routines and
relevant tests. But we still need to generate dynCalls for invokes; for
that this adds dynCall generations for invokes in `GenerateDynCalls`
pass, and moves related functions from wasm-emscripten.cpp to
GenerateDynCalls.cpp, given that now they are only used there.

Addresses: WebAssembly#3043 and WebAssembly#3081
Companions:
  • Loading branch information
aheejin committed Oct 1, 2020
1 parent 7549fa4 commit 9eb7cf8
Show file tree
Hide file tree
Showing 14 changed files with 181 additions and 304 deletions.
88 changes: 83 additions & 5 deletions src/passes/GenerateDynCalls.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
#include "ir/import-utils.h"
#include "pass.h"
#include "support/debug.h"
#include "wasm-emscripten.h"
#include "wasm-builder.h"

#define DEBUG_TYPE "generate-dyncalls"

Expand All @@ -36,21 +36,99 @@ namespace wasm {
struct GenerateDynCalls : public WalkerPass<PostWalker<GenerateDynCalls>> {
GenerateDynCalls(bool onlyI64) : onlyI64(onlyI64) {}

void doWalkModule(Module* wasm) {
PostWalker<GenerateDynCalls>::doWalkModule(wasm);
for (auto& sig : invokeSigs) {
generateDynCallThunk(sig);
}
}

void visitTable(Table* table) {
// Generate dynCalls for functions in the table
if (table->segments.size() > 0) {
EmscriptenGlueGenerator generator(*getModule());
generator.onlyI64DynCalls = onlyI64;
std::vector<Name> tableSegmentData;
for (const auto& indirectFunc : table->segments[0].data) {
generator.generateDynCallThunk(
getModule()->getFunction(indirectFunc)->sig);
generateDynCallThunk(getModule()->getFunction(indirectFunc)->sig);
}
}
}

void visitFunction(Function* func) {
// Generate dynCalls for invokes
if (func->imported() && func->base.startsWith("invoke_")) {
invokeSigs.insert(func->sig);
}
}

void generateDynCallThunk(Signature sig);

bool onlyI64;
// The set of all invokes' signatures
std::unordered_set<Signature> invokeSigs;
};

static bool hasI64(Signature sig) {
// We only generate dynCall functions for signatures that contain i64. This is
// because any other function can be called directly from JavaScript using the
// wasm table.
for (auto t : sig.results) {
if (t.getID() == Type::i64) {
return true;
}
}
for (auto t : sig.params) {
if (t.getID() == Type::i64) {
return true;
}
}
return false;
}

static void exportFunction(Module& wasm, Name name, bool must_export) {
if (!wasm.getFunctionOrNull(name)) {
assert(!must_export);
return;
}
if (wasm.getExportOrNull(name)) {
return; // Already exported
}
auto exp = new Export;
exp->name = exp->value = name;
exp->kind = ExternalKind::Function;
wasm.addExport(exp);
}

void GenerateDynCalls::generateDynCallThunk(Signature sig) {
if (onlyI64 && !hasI64(sig)) {
return;
}

Module* wasm = getModule();
Builder builder(*wasm);
Name name = std::string("dynCall_") + getSig(sig.results, sig.params);
if (wasm->getFunctionOrNull(name) || wasm->getExportOrNull(name)) {
return; // module already contains this dyncall
}
std::vector<NameType> params;
params.emplace_back("fptr", Type::i32); // function pointer param
int p = 0;
for (const auto& param : sig.params) {
params.emplace_back(std::to_string(p++), param);
}
Function* f = builder.makeFunction(name, std::move(params), sig.results, {});
Expression* fptr = builder.makeLocalGet(0, Type::i32);
std::vector<Expression*> args;
Index i = 0;
for (const auto& param : sig.params) {
args.push_back(builder.makeLocalGet(++i, param));
}
Expression* call = builder.makeCallIndirect(fptr, args, sig);
f->body = call;

wasm->addFunction(f);
exportFunction(*wasm, f->name, true);
}

Pass* createGenerateDynCallsPass() { return new GenerateDynCalls(false); }
Pass* createGenerateI64DynCallsPass() { return new GenerateDynCalls(true); }

Expand Down
18 changes: 0 additions & 18 deletions src/passes/PostEmscripten.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,31 +78,13 @@ struct OptimizeCalls : public WalkerPass<PostWalker<OptimizeCalls>> {

struct PostEmscripten : public Pass {
void run(PassRunner* runner, Module* module) override {
// Optimize imports
optimizeImports(runner, module);

// Optimize calls
OptimizeCalls().run(runner, module);

// Optimize exceptions
optimizeExceptions(runner, module);
}

void optimizeImports(PassRunner* runner, Module* module) {
// Calling emscripten_longjmp_jmpbuf is the same as emscripten_longjmp.
Name EMSCRIPTEN_LONGJMP("emscripten_longjmp");
Name EMSCRIPTEN_LONGJMP_JMPBUF("emscripten_longjmp_jmpbuf");
ImportInfo info(*module);
auto* emscripten_longjmp =
info.getImportedFunction(ENV, EMSCRIPTEN_LONGJMP);
auto* emscripten_longjmp_jmpbuf =
info.getImportedFunction(ENV, EMSCRIPTEN_LONGJMP_JMPBUF);
if (emscripten_longjmp && emscripten_longjmp_jmpbuf) {
// Both exist, so it is worth renaming so that we have only one.
emscripten_longjmp_jmpbuf->base = EMSCRIPTEN_LONGJMP;
}
}

// Optimize exceptions (and setjmp) by removing unnecessary invoke* calls.
// An invoke is a call to JS with a function pointer; JS does a try-catch
// and calls the pointer, catching and reporting any error. If we know no
Expand Down
2 changes: 0 additions & 2 deletions src/tools/wasm-emscripten-finalize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -259,8 +259,6 @@ int main(int argc, const char* argv[]) {
generator.onlyI64DynCalls = onlyI64DynCalls;
generator.noDynCalls = noDynCalls;

generator.fixInvokeFunctionNames();

std::vector<Name> initializerFunctions;

// The wasm backend emits "__indirect_function_table" as the import name for
Expand Down
5 changes: 0 additions & 5 deletions src/wasm-emscripten.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,6 @@ class EmscriptenGlueGenerator {
// the file).
void separateDataSegments(Output* outfile, Address base);

void generateDynCallThunk(Signature sig);

bool standalone = false;
bool sideModule = false;
bool minimizeWasmChanges = false;
Expand All @@ -71,9 +69,6 @@ class EmscriptenGlueGenerator {
Builder builder;
Address stackPointerOffset;
bool useStackPointerGlobal;
// Used by generateDynCallThunk to track all the dynCall functions created
// so far.
std::unordered_set<Signature> sigs;
};

} // namespace wasm
Expand Down
194 changes: 0 additions & 194 deletions src/wasm/wasm-emscripten.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,68 +116,6 @@ void EmscriptenGlueGenerator::generatePostInstantiateFunction() {
wasm.addExport(ex);
}

inline void exportFunction(Module& wasm, Name name, bool must_export) {
if (!wasm.getFunctionOrNull(name)) {
assert(!must_export);
return;
}
if (wasm.getExportOrNull(name)) {
return; // Already exported
}
auto exp = new Export;
exp->name = exp->value = name;
exp->kind = ExternalKind::Function;
wasm.addExport(exp);
}

static bool hasI64(Signature sig) {
// We only generate dynCall functions for signatures that contain
// i64. This is because any other function can be called directly
// from JavaScript using the wasm table.
for (auto t : sig.results) {
if (t.getID() == Type::i64) {
return true;
}
}
for (auto t : sig.params) {
if (t.getID() == Type::i64) {
return true;
}
}
return false;
}

void EmscriptenGlueGenerator::generateDynCallThunk(Signature sig) {
if (noDynCalls || (onlyI64DynCalls && !hasI64(sig))) {
return;
}
if (!sigs.insert(sig).second) {
return; // sig is already in the set
}
Name name = std::string("dynCall_") + getSig(sig.results, sig.params);
if (wasm.getFunctionOrNull(name) || wasm.getExportOrNull(name)) {
return; // module already contains this dyncall
}
std::vector<NameType> params;
params.emplace_back("fptr", Type::i32); // function pointer param
int p = 0;
for (const auto& param : sig.params) {
params.emplace_back(std::to_string(p++), param);
}
Function* f = builder.makeFunction(name, std::move(params), sig.results, {});
Expression* fptr = builder.makeLocalGet(0, Type::i32);
std::vector<Expression*> args;
Index i = 0;
for (const auto& param : sig.params) {
args.push_back(builder.makeLocalGet(++i, param));
}
Expression* call = builder.makeCallIndirect(fptr, args, sig);
f->body = call;

wasm.addFunction(f);
exportFunction(wasm, f->name, true);
}

// lld can sometimes produce a build with an imported mutable __stack_pointer
// (i.e. when linking with -fpie). This method internalizes the
// __stack_pointer and initializes it from an immutable global instead.
Expand Down Expand Up @@ -549,138 +487,6 @@ EmJsWalker fixEmJsFuncsAndReturnWalker(Module& wasm) {
return walker;
}

// Fixes function name hacks caused by LLVM exception & setjmp/longjmp
// handling pass for wasm.
// This does two things:
// 1. Change emscripten_longjmp_jmpbuf to emscripten_longjmp.
// In setjmp/longjmp handling pass in wasm backend, what we want to do is
// to change all function calls to longjmp to calls to emscripten_longjmp.
// Because we replace all calls to longjmp to emscripten_longjmp, the
// signature of that function should be the same as longjmp:
// emscripten_longjmp(jmp_buf, int)
// But after calling a function that might longjmp, while we test whether
// a longjmp occurred, we have to load an int address value and call
// emscripten_longjmp again with that address as the first argument. (Refer
// to lib/Target/WebAssembly/WebAssemblyEmscriptenEHSjLj.cpp in LLVM for
// details.)
// In this case we need the signature of emscripten_longjmp to be (int,
// int). So we need two different kinds of emscripten_longjmp signatures in
// LLVM IR. Both signatures will be lowered to (int, int) eventually, but
// in LLVM IR, types are not lowered yet.
// So we declare two functions in LLVM:
// emscripten_longjmp_jmpbuf(jmp_buf, int)
// emscripten_longjmp(int, int)
// And we change the name of emscripten_longjmp_jmpbuf to
// emscripten_longjmp here.
// 2. Converts invoke wrapper names.
// Refer to the comments in fixEmExceptionInvoke below.
struct FixInvokeFunctionNamesWalker
: public PostWalker<FixInvokeFunctionNamesWalker> {
Module& wasm;
std::vector<Name> toRemove;
std::map<Name, Name> importRenames;
std::map<Name, Name> functionRenames;
std::set<Signature> invokeSigs;
ImportInfo imports;

FixInvokeFunctionNamesWalker(Module& _wasm) : wasm(_wasm), imports(wasm) {}

// Converts invoke wrapper names generated by LLVM backend to real invoke
// wrapper names that are expected by JavaScript glue code.
// This is required to support wasm exception handling (asm.js style).
//
// LLVM backend lowers
// invoke @func(arg1, arg2) to label %invoke.cont unwind label %lpad
// into
// ... (some code)
// call @invoke_SIG(func, arg1, arg2)
// ... (some code)
// SIG is a mangled string generated based on the LLVM IR-level function
// signature. In LLVM IR, types are not lowered yet, so this mangling scheme
// simply takes LLVM's string representtion of parameter types and concatenate
// them with '_'. For example, the name of an invoke wrapper for function
// void foo(struct mystruct*, int) will be
// "__invoke_void_%struct.mystruct*_int".
// This function converts the names of invoke wrappers based on their lowered
// argument types and a return type. In the example above, the resulting new
// wrapper name becomes "invoke_vii".
Name fixEmExceptionInvoke(const Name& name, Signature sig) {
std::string nameStr = name.c_str();
if (nameStr.front() == '"' && nameStr.back() == '"') {
nameStr = nameStr.substr(1, nameStr.size() - 2);
}
if (nameStr.find("__invoke_") != 0) {
return name;
}

std::vector<Type> newParams(sig.params.begin() + 1, sig.params.end());
Signature sigWoOrigFunc = Signature(Type(newParams), sig.results);
invokeSigs.insert(sigWoOrigFunc);
return Name("invoke_" +
getSig(sigWoOrigFunc.results, sigWoOrigFunc.params));
}

void visitFunction(Function* curr) {
if (!curr->imported()) {
return;
}

Name newname = fixEmExceptionInvoke(curr->base, curr->sig);
if (newname == curr->base) {
return;
}

BYN_TRACE("renaming import: " << curr->module << "." << curr->base << " ("
<< curr->name << ") -> " << newname << "\n");

if (auto* f = imports.getImportedFunction(curr->module, newname)) {
BYN_TRACE("remove redundant import: " << curr->base << "\n");
toRemove.push_back(curr->name);
// Make sure the existing import has the correct internal name.
if (f->name != newname) {
functionRenames[f->name] = newname;
}
} else {
BYN_TRACE("rename import: " << curr->base << "\n");
curr->base = newname;
}

functionRenames[curr->name] = newname;

// Ensure that an imported functions of this name exists.
importRenames[curr->base] = newname;
}

void visitModule(Module* curr) {
for (auto name : toRemove) {
wasm.removeFunction(name);
}

// Rename all uses of the old function to the new import name
ModuleUtils::renameFunctions(wasm, functionRenames);

// For imports that for renamed, update any associated GOT.func imports.
for (auto& pair : importRenames) {
BYN_TRACE("looking for: GOT.func." << pair.first << "\n");
if (auto g = imports.getImportedGlobal("GOT.func", pair.first)) {
BYN_TRACE("renaming corresponding GOT entry: " << g->base << " -> "
<< pair.second << "\n");
g->base = pair.second;
}
}
}
};

void EmscriptenGlueGenerator::fixInvokeFunctionNames() {
BYN_TRACE("fixInvokeFunctionNames\n");
FixInvokeFunctionNamesWalker walker(wasm);
walker.walkModule(&wasm);
BYN_TRACE("generating dyncall thunks\n");
for (auto sig : walker.invokeSigs) {
generateDynCallThunk(sig);
}
}

void printSignatures(std::ostream& o, const std::set<Signature>& c) {
o << "[";
bool first = true;
Expand Down
Loading

0 comments on commit 9eb7cf8

Please sign in to comment.