Auto merge of #56487 - nikic:discard-modules-earlier, r=alexcrichton

Discard LLVM modules earlier when performing ThinLTO Currently ThinLTO is performed by first compiling all modules (and keeping them in memory), and then serializing them into ThinLTO buffers in a separate, synchronized step. Modules are later read back from ThinLTO buffers when running the ThinLTO optimization pipeline. We can also find the following comment in `lto.rs`: // FIXME: right now, like with fat LTO, we serialize all in-memory // modules before working with them and ThinLTO. We really // shouldn't do this, however, and instead figure out how to // extract a summary from an in-memory module and then merge that // into the global index. It turns out that this loop is by far // the most expensive portion of this small bit of global // analysis! I don't think that what is suggested here is the right approach: One of the primary benefits of using ThinLTO over ordinary LTO is that it's not necessary to keep all the modules (merged or not) in memory for the duration of the linking step. However, we currently don't really make use of this (at least for crate-local ThinLTO), because we keep all modules in memory until the start of the LTO step. This PR changes the implementation to instead perform the serialization into ThinLTO buffers directly after the initial optimization step. Most of the changes here are plumbing to separate out fat and thin lto handling in `write.rs`, as these now use different intermediate artifacts. For fat lto this will be in-memory modules, for thin lto it will be ThinLTO buffers. r? @alexcrichton
rust-lang · Dec 7, 2018 · f504d3f · f504d3f
2 parents fc84f5f + 8128d0d
commit f504d3f
Show file tree

Hide file tree

Showing 4 changed files with 231 additions and 166 deletions.
diff --git a/src/librustc_codegen_llvm/back/lto.rs b/src/librustc_codegen_llvm/back/lto.rs
@@ -48,18 +48,11 @@ pub fn crate_type_allows_lto(crate_type: config::CrateType) -> bool {
     }
 }
 
-/// Performs LTO, which in the case of full LTO means merging all modules into
-/// a single one and returning it for further optimizing. For ThinLTO, it will
-/// do the global analysis necessary and return two lists, one of the modules
-/// the need optimization and another for modules that can simply be copied over
-/// from the incr. comp. cache.
-pub(crate) fn run(cgcx: &CodegenContext<LlvmCodegenBackend>,
-                  modules: Vec<ModuleCodegen<ModuleLlvm>>,
-                  cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
-                  timeline: &mut Timeline)
-    -> Result<(Vec<LtoModuleCodegen<LlvmCodegenBackend>>, Vec<WorkProduct>), FatalError>
+fn prepare_lto(cgcx: &CodegenContext<LlvmCodegenBackend>,
+               timeline: &mut Timeline,
+               diag_handler: &Handler)
+    -> Result<(Vec<CString>, Vec<(SerializedModule<ModuleBuffer>, CString)>), FatalError>
 {
-    let diag_handler = cgcx.create_diag_handler();
     let export_threshold = match cgcx.lto {
         // We're just doing LTO for our one crate
         Lto::ThinLocal => SymbolExportLevel::Rust,
@@ -144,36 +137,74 @@ pub(crate) fn run(cgcx: &CodegenContext<LlvmCodegenBackend>,
         }
     }
 
+    Ok((symbol_white_list, upstream_modules))
+}
+
+/// Performs fat LTO by merging all modules into a single one and returning it
+/// for further optimization.
+pub(crate) fn run_fat(cgcx: &CodegenContext<LlvmCodegenBackend>,
+                      modules: Vec<ModuleCodegen<ModuleLlvm>>,
+                      timeline: &mut Timeline)
+    -> Result<LtoModuleCodegen<LlvmCodegenBackend>, FatalError>
+{
+    let diag_handler = cgcx.create_diag_handler();
+    let (symbol_white_list, upstream_modules) = prepare_lto(cgcx, timeline, &diag_handler)?;
     let symbol_white_list = symbol_white_list.iter()
                                              .map(|c| c.as_ptr())
                                              .collect::<Vec<_>>();
-    match cgcx.lto {
-        Lto::Fat => {
-            assert!(cached_modules.is_empty());
-            let opt_jobs = fat_lto(cgcx,
-                                   &diag_handler,
-                                   modules,
-                                   upstream_modules,
-                                   &symbol_white_list,
-                                   timeline);
-            opt_jobs.map(|opt_jobs| (opt_jobs, vec![]))
-        }
-        Lto::Thin |
-        Lto::ThinLocal => {
-            if cgcx.opts.debugging_opts.cross_lang_lto.enabled() {
-                unreachable!("We should never reach this case if the LTO step \
-                              is deferred to the linker");
-            }
-            thin_lto(cgcx,
-                     &diag_handler,
-                     modules,
-                     upstream_modules,
-                     cached_modules,
-                     &symbol_white_list,
-                     timeline)
+    fat_lto(cgcx, &diag_handler, modules, upstream_modules, &symbol_white_list, timeline)
+}
+
+/// Performs thin LTO by performing necessary global analysis and returning two
+/// lists, one of the modules that need optimization and another for modules that
+/// can simply be copied over from the incr. comp. cache.
+pub(crate) fn run_thin(cgcx: &CodegenContext<LlvmCodegenBackend>,
+                       modules: Vec<(String, ThinBuffer)>,
+                       cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
+                       timeline: &mut Timeline)
+    -> Result<(Vec<LtoModuleCodegen<LlvmCodegenBackend>>, Vec<WorkProduct>), FatalError>
+{
+    let diag_handler = cgcx.create_diag_handler();
+    let (symbol_white_list, upstream_modules) = prepare_lto(cgcx, timeline, &diag_handler)?;
+    let symbol_white_list = symbol_white_list.iter()
+                                             .map(|c| c.as_ptr())
+                                             .collect::<Vec<_>>();
+    if cgcx.opts.debugging_opts.cross_lang_lto.enabled() {
+        unreachable!("We should never reach this case if the LTO step \
+                      is deferred to the linker");
+    }
+    thin_lto(cgcx,
+             &diag_handler,
+             modules,
+             upstream_modules,
+             cached_modules,
+             &symbol_white_list,
+             timeline)
+}
+
+pub(crate) fn prepare_thin(
+    cgcx: &CodegenContext<LlvmCodegenBackend>,
+    module: ModuleCodegen<ModuleLlvm>
+) -> (String, ThinBuffer) {
+    let name = module.name.clone();
+    let buffer = ThinBuffer::new(module.module_llvm.llmod());
+
+    // We emit the module after having serialized it into a ThinBuffer
+    // because only then it will contain the ThinLTO module summary.
+    if let Some(ref incr_comp_session_dir) = cgcx.incr_comp_session_dir {
+        if cgcx.config(module.kind).emit_pre_thin_lto_bc {
+            let path = incr_comp_session_dir
+                .join(pre_lto_bitcode_filename(&name));
+
+            fs::write(&path, buffer.data()).unwrap_or_else(|e| {
+                panic!("Error writing pre-lto-bitcode file `{}`: {}",
+                       path.display(),
+                       e);
+            });
         }
-        Lto::No => unreachable!(),
     }
+
+    (name, buffer)
 }
 
 fn fat_lto(cgcx: &CodegenContext<LlvmCodegenBackend>,
@@ -182,7 +213,7 @@ fn fat_lto(cgcx: &CodegenContext<LlvmCodegenBackend>,
            mut serialized_modules: Vec<(SerializedModule<ModuleBuffer>, CString)>,
            symbol_white_list: &[*const libc::c_char],
            timeline: &mut Timeline)
-    -> Result<Vec<LtoModuleCodegen<LlvmCodegenBackend>>, FatalError>
+    -> Result<LtoModuleCodegen<LlvmCodegenBackend>, FatalError>
 {
     info!("going for a fat lto");
 
@@ -271,10 +302,10 @@ fn fat_lto(cgcx: &CodegenContext<LlvmCodegenBackend>,
         timeline.record("passes");
     }
 
-    Ok(vec![LtoModuleCodegen::Fat {
+    Ok(LtoModuleCodegen::Fat {
         module: Some(module),
         _serialized_bitcode: serialized_bitcode,
-    }])
+    })
 }
 
 struct Linker<'a>(&'a mut llvm::Linker<'a>);
@@ -335,7 +366,7 @@ impl Drop for Linker<'a> {
 /// they all go out of scope.
 fn thin_lto(cgcx: &CodegenContext<LlvmCodegenBackend>,
             diag_handler: &Handler,
-            modules: Vec<ModuleCodegen<ModuleLlvm>>,
+            modules: Vec<(String, ThinBuffer)>,
             serialized_modules: Vec<(SerializedModule<ModuleBuffer>, CString)>,
             cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
             symbol_white_list: &[*const libc::c_char],
@@ -355,41 +386,17 @@ fn thin_lto(cgcx: &CodegenContext<LlvmCodegenBackend>,
         let mut module_names = Vec::with_capacity(full_scope_len);
         let mut thin_modules = Vec::with_capacity(full_scope_len);
 
-        // FIXME: right now, like with fat LTO, we serialize all in-memory
-        //        modules before working with them and ThinLTO. We really
-        //        shouldn't do this, however, and instead figure out how to
-        //        extract a summary from an in-memory module and then merge that
-        //        into the global index. It turns out that this loop is by far
-        //        the most expensive portion of this small bit of global
-        //        analysis!
-        for (i, module) in modules.into_iter().enumerate() {
-            info!("local module: {} - {}", i, module.name);
-            let name = CString::new(module.name.clone()).unwrap();
-            let buffer = ThinBuffer::new(module.module_llvm.llmod());
-
-            // We emit the module after having serialized it into a ThinBuffer
-            // because only then it will contain the ThinLTO module summary.
-            if let Some(ref incr_comp_session_dir) = cgcx.incr_comp_session_dir {
-                if cgcx.config(module.kind).emit_pre_thin_lto_bc {
-                    let path = incr_comp_session_dir
-                        .join(pre_lto_bitcode_filename(&module.name));
-
-                    fs::write(&path, buffer.data()).unwrap_or_else(|e| {
-                        panic!("Error writing pre-lto-bitcode file `{}`: {}",
-                               path.display(),
-                               e);
-                    });
-                }
-            }
-
+        for (i, (name, buffer)) in modules.into_iter().enumerate() {
+            info!("local module: {} - {}", i, name);
+            let cname = CString::new(name.clone()).unwrap();
             thin_modules.push(llvm::ThinLTOModule {
-                identifier: name.as_ptr(),
+                identifier: cname.as_ptr(),
                 data: buffer.data().as_ptr(),
                 len: buffer.data().len(),
             });
             thin_buffers.push(buffer);
-            module_names.push(name);
-            timeline.record(&module.name);
+            module_names.push(cname);
+            timeline.record(&name);
         }
 
         // FIXME: All upstream crates are deserialized internally in the

diff --git a/src/librustc_codegen_llvm/lib.rs b/src/librustc_codegen_llvm/lib.rs
@@ -176,13 +176,20 @@ impl WriteBackendMethods for LlvmCodegenBackend {
     fn print_pass_timings(&self) {
             unsafe { llvm::LLVMRustPrintPassTimings(); }
     }
-    fn run_lto(
+    fn run_fat_lto(
         cgcx: &CodegenContext<Self>,
         modules: Vec<ModuleCodegen<Self::Module>>,
+        timeline: &mut Timeline
+    ) -> Result<LtoModuleCodegen<Self>, FatalError> {
+        back::lto::run_fat(cgcx, modules, timeline)
+    }
+    fn run_thin_lto(
+        cgcx: &CodegenContext<Self>,
+        modules: Vec<(String, Self::ThinBuffer)>,
         cached_modules: Vec<(SerializedModule<Self::ModuleBuffer>, WorkProduct)>,
         timeline: &mut Timeline
     ) -> Result<(Vec<LtoModuleCodegen<Self>>, Vec<WorkProduct>), FatalError> {
-        back::lto::run(cgcx, modules, cached_modules, timeline)
+        back::lto::run_thin(cgcx, modules, cached_modules, timeline)
     }
     unsafe fn optimize(
         cgcx: &CodegenContext<Self>,
@@ -209,6 +216,12 @@ impl WriteBackendMethods for LlvmCodegenBackend {
     ) -> Result<CompiledModule, FatalError> {
         back::write::codegen(cgcx, diag_handler, module, config, timeline)
     }
+    fn prepare_thin(
+        cgcx: &CodegenContext<Self>,
+        module: ModuleCodegen<Self::Module>
+    ) -> (String, Self::ThinBuffer) {
+        back::lto::prepare_thin(cgcx, module)
+    }
     fn run_lto_pass_manager(
         cgcx: &CodegenContext<Self>,
         module: &ModuleCodegen<Self::Module>,