diff --git a/src/librustc_codegen_llvm/back/lto.rs b/src/librustc_codegen_llvm/back/lto.rs index 99828e5b7fbbe..bddb45da10b00 100644 --- a/src/librustc_codegen_llvm/back/lto.rs +++ b/src/librustc_codegen_llvm/back/lto.rs @@ -48,18 +48,11 @@ pub fn crate_type_allows_lto(crate_type: config::CrateType) -> bool { } } -/// Performs LTO, which in the case of full LTO means merging all modules into -/// a single one and returning it for further optimizing. For ThinLTO, it will -/// do the global analysis necessary and return two lists, one of the modules -/// the need optimization and another for modules that can simply be copied over -/// from the incr. comp. cache. -pub(crate) fn run(cgcx: &CodegenContext, - modules: Vec>, - cached_modules: Vec<(SerializedModule, WorkProduct)>, - timeline: &mut Timeline) - -> Result<(Vec>, Vec), FatalError> +fn prepare_lto(cgcx: &CodegenContext, + timeline: &mut Timeline, + diag_handler: &Handler) + -> Result<(Vec, Vec<(SerializedModule, CString)>), FatalError> { - let diag_handler = cgcx.create_diag_handler(); let export_threshold = match cgcx.lto { // We're just doing LTO for our one crate Lto::ThinLocal => SymbolExportLevel::Rust, @@ -144,36 +137,74 @@ pub(crate) fn run(cgcx: &CodegenContext, } } + Ok((symbol_white_list, upstream_modules)) +} + +/// Performs fat LTO by merging all modules into a single one and returning it +/// for further optimization. +pub(crate) fn run_fat(cgcx: &CodegenContext, + modules: Vec>, + timeline: &mut Timeline) + -> Result, FatalError> +{ + let diag_handler = cgcx.create_diag_handler(); + let (symbol_white_list, upstream_modules) = prepare_lto(cgcx, timeline, &diag_handler)?; let symbol_white_list = symbol_white_list.iter() .map(|c| c.as_ptr()) .collect::>(); - match cgcx.lto { - Lto::Fat => { - assert!(cached_modules.is_empty()); - let opt_jobs = fat_lto(cgcx, - &diag_handler, - modules, - upstream_modules, - &symbol_white_list, - timeline); - opt_jobs.map(|opt_jobs| (opt_jobs, vec![])) - } - Lto::Thin | - Lto::ThinLocal => { - if cgcx.opts.debugging_opts.cross_lang_lto.enabled() { - unreachable!("We should never reach this case if the LTO step \ - is deferred to the linker"); - } - thin_lto(cgcx, - &diag_handler, - modules, - upstream_modules, - cached_modules, - &symbol_white_list, - timeline) + fat_lto(cgcx, &diag_handler, modules, upstream_modules, &symbol_white_list, timeline) +} + +/// Performs thin LTO by performing necessary global analysis and returning two +/// lists, one of the modules that need optimization and another for modules that +/// can simply be copied over from the incr. comp. cache. +pub(crate) fn run_thin(cgcx: &CodegenContext, + modules: Vec<(String, ThinBuffer)>, + cached_modules: Vec<(SerializedModule, WorkProduct)>, + timeline: &mut Timeline) + -> Result<(Vec>, Vec), FatalError> +{ + let diag_handler = cgcx.create_diag_handler(); + let (symbol_white_list, upstream_modules) = prepare_lto(cgcx, timeline, &diag_handler)?; + let symbol_white_list = symbol_white_list.iter() + .map(|c| c.as_ptr()) + .collect::>(); + if cgcx.opts.debugging_opts.cross_lang_lto.enabled() { + unreachable!("We should never reach this case if the LTO step \ + is deferred to the linker"); + } + thin_lto(cgcx, + &diag_handler, + modules, + upstream_modules, + cached_modules, + &symbol_white_list, + timeline) +} + +pub(crate) fn prepare_thin( + cgcx: &CodegenContext, + module: ModuleCodegen +) -> (String, ThinBuffer) { + let name = module.name.clone(); + let buffer = ThinBuffer::new(module.module_llvm.llmod()); + + // We emit the module after having serialized it into a ThinBuffer + // because only then it will contain the ThinLTO module summary. + if let Some(ref incr_comp_session_dir) = cgcx.incr_comp_session_dir { + if cgcx.config(module.kind).emit_pre_thin_lto_bc { + let path = incr_comp_session_dir + .join(pre_lto_bitcode_filename(&name)); + + fs::write(&path, buffer.data()).unwrap_or_else(|e| { + panic!("Error writing pre-lto-bitcode file `{}`: {}", + path.display(), + e); + }); } - Lto::No => unreachable!(), } + + (name, buffer) } fn fat_lto(cgcx: &CodegenContext, @@ -182,7 +213,7 @@ fn fat_lto(cgcx: &CodegenContext, mut serialized_modules: Vec<(SerializedModule, CString)>, symbol_white_list: &[*const libc::c_char], timeline: &mut Timeline) - -> Result>, FatalError> + -> Result, FatalError> { info!("going for a fat lto"); @@ -271,10 +302,10 @@ fn fat_lto(cgcx: &CodegenContext, timeline.record("passes"); } - Ok(vec![LtoModuleCodegen::Fat { + Ok(LtoModuleCodegen::Fat { module: Some(module), _serialized_bitcode: serialized_bitcode, - }]) + }) } struct Linker<'a>(&'a mut llvm::Linker<'a>); @@ -335,7 +366,7 @@ impl Drop for Linker<'a> { /// they all go out of scope. fn thin_lto(cgcx: &CodegenContext, diag_handler: &Handler, - modules: Vec>, + modules: Vec<(String, ThinBuffer)>, serialized_modules: Vec<(SerializedModule, CString)>, cached_modules: Vec<(SerializedModule, WorkProduct)>, symbol_white_list: &[*const libc::c_char], @@ -355,41 +386,17 @@ fn thin_lto(cgcx: &CodegenContext, let mut module_names = Vec::with_capacity(full_scope_len); let mut thin_modules = Vec::with_capacity(full_scope_len); - // FIXME: right now, like with fat LTO, we serialize all in-memory - // modules before working with them and ThinLTO. We really - // shouldn't do this, however, and instead figure out how to - // extract a summary from an in-memory module and then merge that - // into the global index. It turns out that this loop is by far - // the most expensive portion of this small bit of global - // analysis! - for (i, module) in modules.into_iter().enumerate() { - info!("local module: {} - {}", i, module.name); - let name = CString::new(module.name.clone()).unwrap(); - let buffer = ThinBuffer::new(module.module_llvm.llmod()); - - // We emit the module after having serialized it into a ThinBuffer - // because only then it will contain the ThinLTO module summary. - if let Some(ref incr_comp_session_dir) = cgcx.incr_comp_session_dir { - if cgcx.config(module.kind).emit_pre_thin_lto_bc { - let path = incr_comp_session_dir - .join(pre_lto_bitcode_filename(&module.name)); - - fs::write(&path, buffer.data()).unwrap_or_else(|e| { - panic!("Error writing pre-lto-bitcode file `{}`: {}", - path.display(), - e); - }); - } - } - + for (i, (name, buffer)) in modules.into_iter().enumerate() { + info!("local module: {} - {}", i, name); + let cname = CString::new(name.clone()).unwrap(); thin_modules.push(llvm::ThinLTOModule { - identifier: name.as_ptr(), + identifier: cname.as_ptr(), data: buffer.data().as_ptr(), len: buffer.data().len(), }); thin_buffers.push(buffer); - module_names.push(name); - timeline.record(&module.name); + module_names.push(cname); + timeline.record(&name); } // FIXME: All upstream crates are deserialized internally in the diff --git a/src/librustc_codegen_llvm/lib.rs b/src/librustc_codegen_llvm/lib.rs index 4f90cb793b6d2..ff06d3759bd94 100644 --- a/src/librustc_codegen_llvm/lib.rs +++ b/src/librustc_codegen_llvm/lib.rs @@ -176,13 +176,20 @@ impl WriteBackendMethods for LlvmCodegenBackend { fn print_pass_timings(&self) { unsafe { llvm::LLVMRustPrintPassTimings(); } } - fn run_lto( + fn run_fat_lto( cgcx: &CodegenContext, modules: Vec>, + timeline: &mut Timeline + ) -> Result, FatalError> { + back::lto::run_fat(cgcx, modules, timeline) + } + fn run_thin_lto( + cgcx: &CodegenContext, + modules: Vec<(String, Self::ThinBuffer)>, cached_modules: Vec<(SerializedModule, WorkProduct)>, timeline: &mut Timeline ) -> Result<(Vec>, Vec), FatalError> { - back::lto::run(cgcx, modules, cached_modules, timeline) + back::lto::run_thin(cgcx, modules, cached_modules, timeline) } unsafe fn optimize( cgcx: &CodegenContext, @@ -209,6 +216,12 @@ impl WriteBackendMethods for LlvmCodegenBackend { ) -> Result { back::write::codegen(cgcx, diag_handler, module, config, timeline) } + fn prepare_thin( + cgcx: &CodegenContext, + module: ModuleCodegen + ) -> (String, Self::ThinBuffer) { + back::lto::prepare_thin(cgcx, module) + } fn run_lto_pass_manager( cgcx: &CodegenContext, module: &ModuleCodegen, diff --git a/src/librustc_codegen_ssa/back/write.rs b/src/librustc_codegen_ssa/back/write.rs index 46aee5339ba9e..59955ce77cde4 100644 --- a/src/librustc_codegen_ssa/back/write.rs +++ b/src/librustc_codegen_ssa/back/write.rs @@ -252,7 +252,8 @@ impl CodegenContext { fn generate_lto_work( cgcx: &CodegenContext, - modules: Vec>, + needs_fat_lto: Vec>, + needs_thin_lto: Vec<(String, B::ThinBuffer)>, import_only_modules: Vec<(SerializedModule, WorkProduct)> ) -> Vec<(WorkItem, u64)> { let mut timeline = cgcx.time_graph.as_ref().map(|tg| { @@ -260,22 +261,28 @@ fn generate_lto_work( CODEGEN_WORK_PACKAGE_KIND, "generate lto") }).unwrap_or(Timeline::noop()); - let (lto_modules, copy_jobs) = B::run_lto(cgcx, modules, import_only_modules, &mut timeline) - .unwrap_or_else(|e| e.raise()); - let lto_modules = lto_modules.into_iter().map(|module| { + let (lto_modules, copy_jobs) = if !needs_fat_lto.is_empty() { + assert!(needs_thin_lto.is_empty()); + assert!(import_only_modules.is_empty()); + let lto_module = B::run_fat_lto(cgcx, needs_fat_lto, &mut timeline) + .unwrap_or_else(|e| e.raise()); + (vec![lto_module], vec![]) + } else { + assert!(needs_fat_lto.is_empty()); + B::run_thin_lto(cgcx, needs_thin_lto, import_only_modules, &mut timeline) + .unwrap_or_else(|e| e.raise()) + }; + + lto_modules.into_iter().map(|module| { let cost = module.cost(); (WorkItem::LTO(module), cost) - }); - - let copy_jobs = copy_jobs.into_iter().map(|wp| { + }).chain(copy_jobs.into_iter().map(|wp| { (WorkItem::CopyPostLtoArtifacts(CachedModuleCodegen { name: wp.cgu_name.clone(), source: wp, }), 0) - }); - - lto_modules.chain(copy_jobs).collect() + })).collect() } pub struct CompiledModules { @@ -671,16 +678,17 @@ impl WorkItem { } } -enum WorkItemResult { +enum WorkItemResult { Compiled(CompiledModule), - NeedsLTO(ModuleCodegen), + NeedsFatLTO(ModuleCodegen), + NeedsThinLTO(String, B::ThinBuffer), } fn execute_work_item( cgcx: &CodegenContext, work_item: WorkItem, timeline: &mut Timeline -) -> Result, FatalError> { +) -> Result, FatalError> { let module_config = cgcx.config(work_item.module_kind()); match work_item { @@ -696,67 +704,80 @@ fn execute_work_item( } } +// Actual LTO type we end up chosing based on multiple factors. +enum ComputedLtoType { + No, + Thin, + Fat, +} + fn execute_optimize_work_item( cgcx: &CodegenContext, module: ModuleCodegen, module_config: &ModuleConfig, timeline: &mut Timeline -) -> Result, FatalError> { +) -> Result, FatalError> { let diag_handler = cgcx.create_diag_handler(); unsafe { B::optimize(cgcx, &diag_handler, &module, module_config, timeline)?; } - let linker_does_lto = cgcx.opts.debugging_opts.cross_lang_lto.enabled(); - // After we've done the initial round of optimizations we need to // decide whether to synchronously codegen this module or ship it // back to the coordinator thread for further LTO processing (which // has to wait for all the initial modules to be optimized). - // - // Here we dispatch based on the `cgcx.lto` and kind of module we're - // codegenning... - let needs_lto = match cgcx.lto { - Lto::No => false, - // If the linker does LTO, we don't have to do it. Note that we - // keep doing full LTO, if it is requested, as not to break the - // assumption that the output will be a single module. - Lto::Thin | Lto::ThinLocal if linker_does_lto => false, + // If the linker does LTO, we don't have to do it. Note that we + // keep doing full LTO, if it is requested, as not to break the + // assumption that the output will be a single module. + let linker_does_lto = cgcx.opts.debugging_opts.cross_lang_lto.enabled(); - // Here we've got a full crate graph LTO requested. We ignore - // this, however, if the crate type is only an rlib as there's - // no full crate graph to process, that'll happen later. - // - // This use case currently comes up primarily for targets that - // require LTO so the request for LTO is always unconditionally - // passed down to the backend, but we don't actually want to do - // anything about it yet until we've got a final product. - Lto::Fat | Lto::Thin => { - cgcx.crate_types.len() != 1 || - cgcx.crate_types[0] != config::CrateType::Rlib - } + // When we're automatically doing ThinLTO for multi-codegen-unit + // builds we don't actually want to LTO the allocator modules if + // it shows up. This is due to various linker shenanigans that + // we'll encounter later. + let is_allocator = module.kind == ModuleKind::Allocator; - // When we're automatically doing ThinLTO for multi-codegen-unit - // builds we don't actually want to LTO the allocator modules if - // it shows up. This is due to various linker shenanigans that - // we'll encounter later. - Lto::ThinLocal => { - module.kind != ModuleKind::Allocator - } - }; + // We ignore a request for full crate grath LTO if the cate type + // is only an rlib, as there is no full crate graph to process, + // that'll happen later. + // + // This use case currently comes up primarily for targets that + // require LTO so the request for LTO is always unconditionally + // passed down to the backend, but we don't actually want to do + // anything about it yet until we've got a final product. + let is_rlib = cgcx.crate_types.len() == 1 + && cgcx.crate_types[0] == config::CrateType::Rlib; // Metadata modules never participate in LTO regardless of the lto // settings. - let needs_lto = needs_lto && module.kind != ModuleKind::Metadata; - - if needs_lto { - Ok(WorkItemResult::NeedsLTO(module)) + let lto_type = if module.kind == ModuleKind::Metadata { + ComputedLtoType::No } else { - let module = unsafe { B::codegen(cgcx, &diag_handler, module, module_config, timeline)? }; - Ok(WorkItemResult::Compiled(module)) - } + match cgcx.lto { + Lto::ThinLocal if !linker_does_lto && !is_allocator + => ComputedLtoType::Thin, + Lto::Thin if !linker_does_lto && !is_rlib + => ComputedLtoType::Thin, + Lto::Fat if !is_rlib => ComputedLtoType::Fat, + _ => ComputedLtoType::No, + } + }; + + Ok(match lto_type { + ComputedLtoType::No => { + let module = unsafe { + B::codegen(cgcx, &diag_handler, module, module_config, timeline)? + }; + WorkItemResult::Compiled(module) + } + ComputedLtoType::Thin => { + let (name, thin_buffer) = B::prepare_thin(cgcx, module); + WorkItemResult::NeedsThinLTO(name, thin_buffer) + } + ComputedLtoType::Fat => WorkItemResult::NeedsFatLTO(module), + }) } fn execute_copy_from_cache_work_item( @@ -764,7 +785,7 @@ fn execute_copy_from_cache_work_item( module: CachedModuleCodegen, module_config: &ModuleConfig, _: &mut Timeline -) -> Result, FatalError> { +) -> Result, FatalError> { let incr_comp_session_dir = cgcx.incr_comp_session_dir .as_ref() .unwrap(); @@ -826,7 +847,7 @@ fn execute_lto_work_item( mut module: lto::LtoModuleCodegen, module_config: &ModuleConfig, timeline: &mut Timeline -) -> Result, FatalError> { +) -> Result, FatalError> { let diag_handler = cgcx.create_diag_handler(); unsafe { @@ -838,10 +859,15 @@ fn execute_lto_work_item( pub enum Message { Token(io::Result), - NeedsLTO { + NeedsFatLTO { result: ModuleCodegen, worker_id: usize, }, + NeedsThinLTO { + name: String, + thin_buffer: B::ThinBuffer, + worker_id: usize, + }, Done { result: Result, worker_id: usize, @@ -1137,7 +1163,8 @@ fn start_executing_work( let mut compiled_modules = vec![]; let mut compiled_metadata_module = None; let mut compiled_allocator_module = None; - let mut needs_lto = Vec::new(); + let mut needs_fat_lto = Vec::new(); + let mut needs_thin_lto = Vec::new(); let mut lto_import_only_modules = Vec::new(); let mut started_lto = false; let mut codegen_aborted = false; @@ -1166,7 +1193,8 @@ fn start_executing_work( running > 0 || (!codegen_aborted && ( work_items.len() > 0 || - needs_lto.len() > 0 || + needs_fat_lto.len() > 0 || + needs_thin_lto.len() > 0 || lto_import_only_modules.len() > 0 || main_thread_worker_state != MainThreadWorkerState::Idle )) @@ -1212,12 +1240,17 @@ fn start_executing_work( running == 0 && main_thread_worker_state == MainThreadWorkerState::Idle { assert!(!started_lto); - assert!(needs_lto.len() + lto_import_only_modules.len() > 0); started_lto = true; - let modules = mem::replace(&mut needs_lto, Vec::new()); + + let needs_fat_lto = + mem::replace(&mut needs_fat_lto, Vec::new()); + let needs_thin_lto = + mem::replace(&mut needs_thin_lto, Vec::new()); let import_only_modules = mem::replace(&mut lto_import_only_modules, Vec::new()); - for (work, cost) in generate_lto_work(&cgcx, modules, import_only_modules) { + + for (work, cost) in generate_lto_work(&cgcx, needs_fat_lto, + needs_thin_lto, import_only_modules) { let insertion_index = work_items .binary_search_by_key(&cost, |&(_, cost)| cost) .unwrap_or_else(|e| e); @@ -1284,6 +1317,21 @@ fn start_executing_work( // Relinquish accidentally acquired extra tokens tokens.truncate(running); + // If a thread exits successfully then we drop a token associated + // with that worker and update our `running` count. We may later + // re-acquire a token to continue running more work. We may also not + // actually drop a token here if the worker was running with an + // "ephemeral token" + let mut free_worker = |worker_id| { + if main_thread_worker_state == MainThreadWorkerState::LLVMing { + main_thread_worker_state = MainThreadWorkerState::Idle; + } else { + running -= 1; + } + + free_worker_ids.push(worker_id); + }; + let msg = coordinator_receive.recv().unwrap(); match *msg.downcast::>().ok().unwrap() { // Save the token locally and the next turn of the loop will use @@ -1358,24 +1406,8 @@ fn start_executing_work( assert_eq!(main_thread_worker_state, MainThreadWorkerState::Codegenning); } - - // If a thread exits successfully then we drop a token associated - // with that worker and update our `running` count. We may later - // re-acquire a token to continue running more work. We may also not - // actually drop a token here if the worker was running with an - // "ephemeral token" - // - // Note that if the thread failed that means it panicked, so we - // abort immediately. Message::Done { result: Ok(compiled_module), worker_id } => { - if main_thread_worker_state == MainThreadWorkerState::LLVMing { - main_thread_worker_state = MainThreadWorkerState::Idle; - } else { - running -= 1; - } - - free_worker_ids.push(worker_id); - + free_worker(worker_id); match compiled_module.kind { ModuleKind::Regular => { compiled_modules.push(compiled_module); @@ -1390,15 +1422,15 @@ fn start_executing_work( } } } - Message::NeedsLTO { result, worker_id } => { + Message::NeedsFatLTO { result, worker_id } => { assert!(!started_lto); - if main_thread_worker_state == MainThreadWorkerState::LLVMing { - main_thread_worker_state = MainThreadWorkerState::Idle; - } else { - running -= 1; - } - free_worker_ids.push(worker_id); - needs_lto.push(result); + free_worker(worker_id); + needs_fat_lto.push(result); + } + Message::NeedsThinLTO { name, thin_buffer, worker_id } => { + assert!(!started_lto); + free_worker(worker_id); + needs_thin_lto.push((name, thin_buffer)); } Message::AddImportOnlyModule { module_data, work_product } => { assert!(!started_lto); @@ -1408,6 +1440,7 @@ fn start_executing_work( lto_import_only_modules.push((module_data, work_product)); main_thread_worker_state = MainThreadWorkerState::Idle; } + // If the thread failed that means it panicked, so we abort immediately. Message::Done { result: Err(()), worker_id: _ } => { bug!("worker thread panicked"); } @@ -1485,7 +1518,7 @@ fn spawn_work( // we exit. struct Bomb { coordinator_send: Sender>, - result: Option>, + result: Option>, worker_id: usize, } impl Drop for Bomb { @@ -1495,8 +1528,11 @@ fn spawn_work( Some(WorkItemResult::Compiled(m)) => { Message::Done:: { result: Ok(m), worker_id } } - Some(WorkItemResult::NeedsLTO(m)) => { - Message::NeedsLTO:: { result: m, worker_id } + Some(WorkItemResult::NeedsFatLTO(m)) => { + Message::NeedsFatLTO:: { result: m, worker_id } + } + Some(WorkItemResult::NeedsThinLTO(name, thin_buffer)) => { + Message::NeedsThinLTO:: { name, thin_buffer, worker_id } } None => Message::Done:: { result: Err(()), worker_id } }; diff --git a/src/librustc_codegen_ssa/traits/write.rs b/src/librustc_codegen_ssa/traits/write.rs index 72522e19af210..edc5c2717bc4f 100644 --- a/src/librustc_codegen_ssa/traits/write.rs +++ b/src/librustc_codegen_ssa/traits/write.rs @@ -24,14 +24,19 @@ pub trait WriteBackendMethods: 'static + Sized + Clone { type ThinData: Send + Sync; type ThinBuffer: ThinBufferMethods; - /// Performs LTO, which in the case of full LTO means merging all modules into - /// a single one and returning it for further optimizing. For ThinLTO, it will - /// do the global analysis necessary and return two lists, one of the modules - /// the need optimization and another for modules that can simply be copied over - /// from the incr. comp. cache. - fn run_lto( + /// Performs fat LTO by merging all modules into a single one and returning it + /// for further optimization. + fn run_fat_lto( cgcx: &CodegenContext, modules: Vec>, + timeline: &mut Timeline, + ) -> Result, FatalError>; + /// Performs thin LTO by performing necessary global analysis and returning two + /// lists, one of the modules that need optimization and another for modules that + /// can simply be copied over from the incr. comp. cache. + fn run_thin_lto( + cgcx: &CodegenContext, + modules: Vec<(String, Self::ThinBuffer)>, cached_modules: Vec<(SerializedModule, WorkProduct)>, timeline: &mut Timeline, ) -> Result<(Vec>, Vec), FatalError>; @@ -55,6 +60,10 @@ pub trait WriteBackendMethods: 'static + Sized + Clone { config: &ModuleConfig, timeline: &mut Timeline, ) -> Result; + fn prepare_thin( + cgcx: &CodegenContext, + module: ModuleCodegen + ) -> (String, Self::ThinBuffer); fn run_lto_pass_manager( cgcx: &CodegenContext, llmod: &ModuleCodegen,