Skip to content

Commit

Permalink
Auto merge of #56487 - nikic:discard-modules-earlier, r=alexcrichton
Browse files Browse the repository at this point in the history
Discard LLVM modules earlier when performing ThinLTO

Currently ThinLTO is performed by first compiling all modules (and keeping them in memory), and then serializing them into ThinLTO buffers in a separate, synchronized step. Modules are later read back from ThinLTO buffers when running the ThinLTO optimization pipeline.

We can also find the following comment in `lto.rs`:

        // FIXME: right now, like with fat LTO, we serialize all in-memory
        //        modules before working with them and ThinLTO. We really
        //        shouldn't do this, however, and instead figure out how to
        //        extract a summary from an in-memory module and then merge that
        //        into the global index. It turns out that this loop is by far
        //        the most expensive portion of this small bit of global
        //        analysis!

I don't think that what is suggested here is the right approach: One of the primary benefits of using ThinLTO over ordinary LTO is that it's not necessary to keep all the modules (merged or not) in memory for the duration of the linking step.

However, we currently don't really make use of this (at least for crate-local ThinLTO), because we keep all modules in memory until the start of the LTO step. This PR changes the implementation to instead perform the serialization into ThinLTO buffers directly after the initial optimization step.

Most of the changes here are plumbing to separate out fat and thin lto handling in `write.rs`, as these now use different intermediate artifacts. For fat lto this will be in-memory modules, for thin lto it will be ThinLTO buffers.

r? @alexcrichton
  • Loading branch information
bors committed Dec 7, 2018
2 parents fc84f5f + 8128d0d commit f504d3f
Show file tree
Hide file tree
Showing 4 changed files with 231 additions and 166 deletions.
147 changes: 77 additions & 70 deletions src/librustc_codegen_llvm/back/lto.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,18 +48,11 @@ pub fn crate_type_allows_lto(crate_type: config::CrateType) -> bool {
}
}

/// Performs LTO, which in the case of full LTO means merging all modules into
/// a single one and returning it for further optimizing. For ThinLTO, it will
/// do the global analysis necessary and return two lists, one of the modules
/// the need optimization and another for modules that can simply be copied over
/// from the incr. comp. cache.
pub(crate) fn run(cgcx: &CodegenContext<LlvmCodegenBackend>,
modules: Vec<ModuleCodegen<ModuleLlvm>>,
cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
timeline: &mut Timeline)
-> Result<(Vec<LtoModuleCodegen<LlvmCodegenBackend>>, Vec<WorkProduct>), FatalError>
fn prepare_lto(cgcx: &CodegenContext<LlvmCodegenBackend>,
timeline: &mut Timeline,
diag_handler: &Handler)
-> Result<(Vec<CString>, Vec<(SerializedModule<ModuleBuffer>, CString)>), FatalError>
{
let diag_handler = cgcx.create_diag_handler();
let export_threshold = match cgcx.lto {
// We're just doing LTO for our one crate
Lto::ThinLocal => SymbolExportLevel::Rust,
Expand Down Expand Up @@ -144,36 +137,74 @@ pub(crate) fn run(cgcx: &CodegenContext<LlvmCodegenBackend>,
}
}

Ok((symbol_white_list, upstream_modules))
}

/// Performs fat LTO by merging all modules into a single one and returning it
/// for further optimization.
pub(crate) fn run_fat(cgcx: &CodegenContext<LlvmCodegenBackend>,
modules: Vec<ModuleCodegen<ModuleLlvm>>,
timeline: &mut Timeline)
-> Result<LtoModuleCodegen<LlvmCodegenBackend>, FatalError>
{
let diag_handler = cgcx.create_diag_handler();
let (symbol_white_list, upstream_modules) = prepare_lto(cgcx, timeline, &diag_handler)?;
let symbol_white_list = symbol_white_list.iter()
.map(|c| c.as_ptr())
.collect::<Vec<_>>();
match cgcx.lto {
Lto::Fat => {
assert!(cached_modules.is_empty());
let opt_jobs = fat_lto(cgcx,
&diag_handler,
modules,
upstream_modules,
&symbol_white_list,
timeline);
opt_jobs.map(|opt_jobs| (opt_jobs, vec![]))
}
Lto::Thin |
Lto::ThinLocal => {
if cgcx.opts.debugging_opts.cross_lang_lto.enabled() {
unreachable!("We should never reach this case if the LTO step \
is deferred to the linker");
}
thin_lto(cgcx,
&diag_handler,
modules,
upstream_modules,
cached_modules,
&symbol_white_list,
timeline)
fat_lto(cgcx, &diag_handler, modules, upstream_modules, &symbol_white_list, timeline)
}

/// Performs thin LTO by performing necessary global analysis and returning two
/// lists, one of the modules that need optimization and another for modules that
/// can simply be copied over from the incr. comp. cache.
pub(crate) fn run_thin(cgcx: &CodegenContext<LlvmCodegenBackend>,
modules: Vec<(String, ThinBuffer)>,
cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
timeline: &mut Timeline)
-> Result<(Vec<LtoModuleCodegen<LlvmCodegenBackend>>, Vec<WorkProduct>), FatalError>
{
let diag_handler = cgcx.create_diag_handler();
let (symbol_white_list, upstream_modules) = prepare_lto(cgcx, timeline, &diag_handler)?;
let symbol_white_list = symbol_white_list.iter()
.map(|c| c.as_ptr())
.collect::<Vec<_>>();
if cgcx.opts.debugging_opts.cross_lang_lto.enabled() {
unreachable!("We should never reach this case if the LTO step \
is deferred to the linker");
}
thin_lto(cgcx,
&diag_handler,
modules,
upstream_modules,
cached_modules,
&symbol_white_list,
timeline)
}

pub(crate) fn prepare_thin(
cgcx: &CodegenContext<LlvmCodegenBackend>,
module: ModuleCodegen<ModuleLlvm>
) -> (String, ThinBuffer) {
let name = module.name.clone();
let buffer = ThinBuffer::new(module.module_llvm.llmod());

// We emit the module after having serialized it into a ThinBuffer
// because only then it will contain the ThinLTO module summary.
if let Some(ref incr_comp_session_dir) = cgcx.incr_comp_session_dir {
if cgcx.config(module.kind).emit_pre_thin_lto_bc {
let path = incr_comp_session_dir
.join(pre_lto_bitcode_filename(&name));

fs::write(&path, buffer.data()).unwrap_or_else(|e| {
panic!("Error writing pre-lto-bitcode file `{}`: {}",
path.display(),
e);
});
}
Lto::No => unreachable!(),
}

(name, buffer)
}

fn fat_lto(cgcx: &CodegenContext<LlvmCodegenBackend>,
Expand All @@ -182,7 +213,7 @@ fn fat_lto(cgcx: &CodegenContext<LlvmCodegenBackend>,
mut serialized_modules: Vec<(SerializedModule<ModuleBuffer>, CString)>,
symbol_white_list: &[*const libc::c_char],
timeline: &mut Timeline)
-> Result<Vec<LtoModuleCodegen<LlvmCodegenBackend>>, FatalError>
-> Result<LtoModuleCodegen<LlvmCodegenBackend>, FatalError>
{
info!("going for a fat lto");

Expand Down Expand Up @@ -271,10 +302,10 @@ fn fat_lto(cgcx: &CodegenContext<LlvmCodegenBackend>,
timeline.record("passes");
}

Ok(vec![LtoModuleCodegen::Fat {
Ok(LtoModuleCodegen::Fat {
module: Some(module),
_serialized_bitcode: serialized_bitcode,
}])
})
}

struct Linker<'a>(&'a mut llvm::Linker<'a>);
Expand Down Expand Up @@ -335,7 +366,7 @@ impl Drop for Linker<'a> {
/// they all go out of scope.
fn thin_lto(cgcx: &CodegenContext<LlvmCodegenBackend>,
diag_handler: &Handler,
modules: Vec<ModuleCodegen<ModuleLlvm>>,
modules: Vec<(String, ThinBuffer)>,
serialized_modules: Vec<(SerializedModule<ModuleBuffer>, CString)>,
cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
symbol_white_list: &[*const libc::c_char],
Expand All @@ -355,41 +386,17 @@ fn thin_lto(cgcx: &CodegenContext<LlvmCodegenBackend>,
let mut module_names = Vec::with_capacity(full_scope_len);
let mut thin_modules = Vec::with_capacity(full_scope_len);

// FIXME: right now, like with fat LTO, we serialize all in-memory
// modules before working with them and ThinLTO. We really
// shouldn't do this, however, and instead figure out how to
// extract a summary from an in-memory module and then merge that
// into the global index. It turns out that this loop is by far
// the most expensive portion of this small bit of global
// analysis!
for (i, module) in modules.into_iter().enumerate() {
info!("local module: {} - {}", i, module.name);
let name = CString::new(module.name.clone()).unwrap();
let buffer = ThinBuffer::new(module.module_llvm.llmod());

// We emit the module after having serialized it into a ThinBuffer
// because only then it will contain the ThinLTO module summary.
if let Some(ref incr_comp_session_dir) = cgcx.incr_comp_session_dir {
if cgcx.config(module.kind).emit_pre_thin_lto_bc {
let path = incr_comp_session_dir
.join(pre_lto_bitcode_filename(&module.name));

fs::write(&path, buffer.data()).unwrap_or_else(|e| {
panic!("Error writing pre-lto-bitcode file `{}`: {}",
path.display(),
e);
});
}
}

for (i, (name, buffer)) in modules.into_iter().enumerate() {
info!("local module: {} - {}", i, name);
let cname = CString::new(name.clone()).unwrap();
thin_modules.push(llvm::ThinLTOModule {
identifier: name.as_ptr(),
identifier: cname.as_ptr(),
data: buffer.data().as_ptr(),
len: buffer.data().len(),
});
thin_buffers.push(buffer);
module_names.push(name);
timeline.record(&module.name);
module_names.push(cname);
timeline.record(&name);
}

// FIXME: All upstream crates are deserialized internally in the
Expand Down
17 changes: 15 additions & 2 deletions src/librustc_codegen_llvm/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -176,13 +176,20 @@ impl WriteBackendMethods for LlvmCodegenBackend {
fn print_pass_timings(&self) {
unsafe { llvm::LLVMRustPrintPassTimings(); }
}
fn run_lto(
fn run_fat_lto(
cgcx: &CodegenContext<Self>,
modules: Vec<ModuleCodegen<Self::Module>>,
timeline: &mut Timeline
) -> Result<LtoModuleCodegen<Self>, FatalError> {
back::lto::run_fat(cgcx, modules, timeline)
}
fn run_thin_lto(
cgcx: &CodegenContext<Self>,
modules: Vec<(String, Self::ThinBuffer)>,
cached_modules: Vec<(SerializedModule<Self::ModuleBuffer>, WorkProduct)>,
timeline: &mut Timeline
) -> Result<(Vec<LtoModuleCodegen<Self>>, Vec<WorkProduct>), FatalError> {
back::lto::run(cgcx, modules, cached_modules, timeline)
back::lto::run_thin(cgcx, modules, cached_modules, timeline)
}
unsafe fn optimize(
cgcx: &CodegenContext<Self>,
Expand All @@ -209,6 +216,12 @@ impl WriteBackendMethods for LlvmCodegenBackend {
) -> Result<CompiledModule, FatalError> {
back::write::codegen(cgcx, diag_handler, module, config, timeline)
}
fn prepare_thin(
cgcx: &CodegenContext<Self>,
module: ModuleCodegen<Self::Module>
) -> (String, Self::ThinBuffer) {
back::lto::prepare_thin(cgcx, module)
}
fn run_lto_pass_manager(
cgcx: &CodegenContext<Self>,
module: &ModuleCodegen<Self::Module>,
Expand Down
Loading

0 comments on commit f504d3f

Please sign in to comment.