Skip to content

Commit

Permalink
Move lev_distance to rustc_ast, make non-generic
Browse files Browse the repository at this point in the history
rustc_ast currently has a few dependencies on rustc_lexer. Ideally, an AST
would not have any dependency its lexer, for minimizing unnecessarily
design-time dependencies. Breaking this dependency would also have practical
benefits, since modifying rustc_lexer would not trigger a rebuild of rustc_ast.

This commit does not remove the rustc_ast --> rustc_lexer dependency,
but it does remove one of the sources of this dependency, which is the
code that handles fuzzy matching between symbol names for making suggestions
in diagnostics. Since that code depends only on Symbol, it is easy to move
it to rustc_span. It might even be best to move it to a separate crate,
since other tools such as Cargo use the same algorithm, and have simply
contain a duplicate of the code.

This changes the signature of find_best_match_for_name so that it is no
longer generic over its input. I checked the optimized binaries, and this
function was duplicated at nearly every call site, because most call sites
used short-lived iterator chains, generic over Map and such. But there's
no good reason for a function like this to be generic, since all it does
is immediately convert the generic input (the Iterator impl) to a concrete
Vec<Symbol>. This has all of the costs of generics (duplicated method bodies)
with no benefit.

Changing find_best_match_for_name to be non-generic removed about 10KB of
code from the optimized binary. I know it's a drop in the bucket, but we have
to start reducing binary size, and beginning to tame over-use of generics
is part of that.
  • Loading branch information
Arlie Davis committed Nov 25, 2020
1 parent 1c389ff commit 5481c1b
Show file tree
Hide file tree
Showing 16 changed files with 96 additions and 84 deletions.
1 change: 0 additions & 1 deletion compiler/rustc_ast/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ macro_rules! unwrap_or {
pub mod util {
pub mod classify;
pub mod comments;
pub mod lev_distance;
pub mod literal;
pub mod parser;
}
Expand Down
9 changes: 6 additions & 3 deletions compiler/rustc_interface/src/util.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
use rustc_ast::mut_visit::{visit_clobber, MutVisitor, *};
use rustc_ast::ptr::P;
use rustc_ast::util::lev_distance::find_best_match_for_name;
use rustc_ast::{self as ast, AttrVec, BlockCheckMode};
use rustc_codegen_ssa::traits::CodegenBackend;
use rustc_data_structures::fingerprint::Fingerprint;
Expand All @@ -20,6 +19,7 @@ use rustc_session::parse::CrateConfig;
use rustc_session::CrateDisambiguator;
use rustc_session::{early_error, filesearch, output, DiagnosticOutput, Session};
use rustc_span::edition::Edition;
use rustc_span::lev_distance::find_best_match_for_name;
use rustc_span::source_map::FileLoader;
use rustc_span::symbol::{sym, Symbol};
use smallvec::SmallVec;
Expand Down Expand Up @@ -512,8 +512,11 @@ pub(crate) fn check_attr_crate_type(

if let ast::MetaItemKind::NameValue(spanned) = a.meta().unwrap().kind {
let span = spanned.span;
let lev_candidate =
find_best_match_for_name(CRATE_TYPES.iter().map(|(k, _)| k), n, None);
let lev_candidate = find_best_match_for_name(
&CRATE_TYPES.iter().map(|(k, _)| *k).collect::<Vec<_>>(),
n,
None,
);
if let Some(candidate) = lev_candidate {
lint_buffer.buffer_lint_with_diagnostic(
lint::builtin::UNKNOWN_CRATE_TYPES,
Expand Down
4 changes: 2 additions & 2 deletions compiler/rustc_lint/src/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ use self::TargetLint::*;
use crate::levels::LintLevelsBuilder;
use crate::passes::{EarlyLintPassObject, LateLintPassObject};
use rustc_ast as ast;
use rustc_ast::util::lev_distance::find_best_match_for_name;
use rustc_data_structures::fx::FxHashMap;
use rustc_data_structures::sync;
use rustc_errors::{add_elided_lifetime_in_path_suggestion, struct_span_err, Applicability};
Expand All @@ -37,6 +36,7 @@ use rustc_session::lint::BuiltinLintDiagnostics;
use rustc_session::lint::{FutureIncompatibleInfo, Level, Lint, LintBuffer, LintId};
use rustc_session::Session;
use rustc_session::SessionLintStore;
use rustc_span::lev_distance::find_best_match_for_name;
use rustc_span::{symbol::Symbol, MultiSpan, Span, DUMMY_SP};
use rustc_target::abi::LayoutOf;

Expand Down Expand Up @@ -411,7 +411,7 @@ impl LintStore {
self.by_name.keys().map(|name| Symbol::intern(&name)).collect::<Vec<_>>();

let suggestion = find_best_match_for_name(
symbols.iter(),
&symbols,
Symbol::intern(&lint_name.to_lowercase()),
None,
);
Expand Down
4 changes: 2 additions & 2 deletions compiler/rustc_resolve/src/diagnostics.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
use std::cmp::Reverse;
use std::ptr;

use rustc_ast::util::lev_distance::find_best_match_for_name;
use rustc_ast::{self as ast, Path};
use rustc_ast_pretty::pprust;
use rustc_data_structures::fx::FxHashSet;
Expand All @@ -14,6 +13,7 @@ use rustc_middle::bug;
use rustc_middle::ty::{self, DefIdTree};
use rustc_session::Session;
use rustc_span::hygiene::MacroKind;
use rustc_span::lev_distance::find_best_match_for_name;
use rustc_span::source_map::SourceMap;
use rustc_span::symbol::{kw, sym, Ident, Symbol};
use rustc_span::{BytePos, MultiSpan, Span};
Expand Down Expand Up @@ -716,7 +716,7 @@ impl<'a> Resolver<'a> {
suggestions.sort_by_cached_key(|suggestion| suggestion.candidate.as_str());

match find_best_match_for_name(
suggestions.iter().map(|suggestion| &suggestion.candidate),
&suggestions.iter().map(|suggestion| suggestion.candidate).collect::<Vec<Symbol>>(),
ident.name,
None,
) {
Expand Down
48 changes: 26 additions & 22 deletions compiler/rustc_resolve/src/imports.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ use crate::{CrateLint, Module, ModuleOrUniformRoot, ParentScope, PerNS, ScopeSet
use crate::{NameBinding, NameBindingKind, PathResult, PrivacyError, ToNameBinding};

use rustc_ast::unwrap_or;
use rustc_ast::util::lev_distance::find_best_match_for_name;
use rustc_ast::NodeId;
use rustc_ast_lowering::ResolverAstLowering;
use rustc_data_structures::fx::FxHashSet;
Expand All @@ -25,6 +24,7 @@ use rustc_session::lint::builtin::{PUB_USE_OF_PRIVATE_EXTERN_CRATE, UNUSED_IMPOR
use rustc_session::lint::BuiltinLintDiagnostics;
use rustc_session::DiagnosticMessageId;
use rustc_span::hygiene::ExpnId;
use rustc_span::lev_distance::find_best_match_for_name;
use rustc_span::symbol::{kw, Ident, Symbol};
use rustc_span::{MultiSpan, Span};

Expand Down Expand Up @@ -1096,33 +1096,37 @@ impl<'a, 'b> ImportResolver<'a, 'b> {
_ => None,
};
let resolutions = resolutions.as_ref().into_iter().flat_map(|r| r.iter());
let names = resolutions.filter_map(|(BindingKey { ident: i, .. }, resolution)| {
if *i == ident {
return None;
} // Never suggest the same name
match *resolution.borrow() {
NameResolution { binding: Some(name_binding), .. } => {
match name_binding.kind {
NameBindingKind::Import { binding, .. } => {
match binding.kind {
// Never suggest the name that has binding error
// i.e., the name that cannot be previously resolved
NameBindingKind::Res(Res::Err, _) => None,
_ => Some(&i.name),
let names = resolutions
.filter_map(|(BindingKey { ident: i, .. }, resolution)| {
if *i == ident {
return None;
} // Never suggest the same name
match *resolution.borrow() {
NameResolution { binding: Some(name_binding), .. } => {
match name_binding.kind {
NameBindingKind::Import { binding, .. } => {
match binding.kind {
// Never suggest the name that has binding error
// i.e., the name that cannot be previously resolved
NameBindingKind::Res(Res::Err, _) => None,
_ => Some(i.name),
}
}
_ => Some(i.name),
}
_ => Some(&i.name),
}
NameResolution { ref single_imports, .. }
if single_imports.is_empty() =>
{
None
}
_ => Some(i.name),
}
NameResolution { ref single_imports, .. } if single_imports.is_empty() => {
None
}
_ => Some(&i.name),
}
});
})
.collect::<Vec<Symbol>>();

let lev_suggestion =
find_best_match_for_name(names, ident.name, None).map(|suggestion| {
find_best_match_for_name(&names, ident.name, None).map(|suggestion| {
(
vec![(ident.span, suggestion.to_string())],
String::from("a similar name exists in the module"),
Expand Down
9 changes: 5 additions & 4 deletions compiler/rustc_resolve/src/late/diagnostics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ use crate::path_names_to_string;
use crate::{CrateLint, Module, ModuleKind, ModuleOrUniformRoot};
use crate::{PathResult, PathSource, Segment};

use rustc_ast::util::lev_distance::find_best_match_for_name;
use rustc_ast::visit::FnKind;
use rustc_ast::{self as ast, Expr, ExprKind, Item, ItemKind, NodeId, Path, Ty, TyKind};
use rustc_ast_pretty::pprust::path_segment_to_string;
Expand All @@ -18,6 +17,7 @@ use rustc_hir::def_id::{DefId, CRATE_DEF_INDEX, LOCAL_CRATE};
use rustc_hir::PrimTy;
use rustc_session::parse::feature_err;
use rustc_span::hygiene::MacroKind;
use rustc_span::lev_distance::find_best_match_for_name;
use rustc_span::symbol::{kw, sym, Ident, Symbol};
use rustc_span::{BytePos, MultiSpan, Span, DUMMY_SP};

Expand Down Expand Up @@ -1206,7 +1206,7 @@ impl<'a: 'ast, 'ast> LateResolutionVisitor<'a, '_, 'ast> {
names.sort_by_cached_key(|suggestion| suggestion.candidate.as_str());

match find_best_match_for_name(
names.iter().map(|suggestion| &suggestion.candidate),
&names.iter().map(|suggestion| suggestion.candidate).collect::<Vec<Symbol>>(),
name,
None,
) {
Expand Down Expand Up @@ -1592,9 +1592,10 @@ impl<'a: 'ast, 'ast> LateResolutionVisitor<'a, '_, 'ast> {
.bindings
.iter()
.filter(|(id, _)| id.span.ctxt() == label.span.ctxt())
.map(|(id, _)| &id.name);
.map(|(id, _)| id.name)
.collect::<Vec<Symbol>>();

find_best_match_for_name(names, label.name, None).map(|symbol| {
find_best_match_for_name(&names, label.name, None).map(|symbol| {
// Upon finding a similar name, get the ident that it was from - the span
// contained within helps make a useful diagnostic. In addition, determine
// whether this candidate is within scope.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
// FIXME(Centril): Move to rustc_span?

use rustc_span::symbol::Symbol;
use crate::symbol::Symbol;
use std::cmp;

#[cfg(test)]
Expand Down Expand Up @@ -45,17 +43,14 @@ pub fn lev_distance(a: &str, b: &str) -> usize {
///
/// Besides Levenshtein, we use case insensitive comparison to improve accuracy on an edge case with
/// a lower(upper)case letters mismatch.
pub fn find_best_match_for_name<'a, T>(
iter_names: T,
#[cold]
pub fn find_best_match_for_name(
name_vec: &[Symbol],
lookup: Symbol,
dist: Option<usize>,
) -> Option<Symbol>
where
T: Iterator<Item = &'a Symbol>,
{
) -> Option<Symbol> {
let lookup = &lookup.as_str();
let max_dist = dist.unwrap_or_else(|| cmp::max(lookup.len(), 3) / 3);
let name_vec: Vec<&Symbol> = iter_names.collect();

let (case_insensitive_match, levenshtein_match) = name_vec
.iter()
Expand Down Expand Up @@ -83,18 +78,18 @@ where
// 2. Levenshtein distance match
// 3. Sorted word match
if let Some(candidate) = case_insensitive_match {
Some(*candidate)
Some(candidate)
} else if levenshtein_match.is_some() {
levenshtein_match.map(|(candidate, _)| *candidate)
levenshtein_match.map(|(candidate, _)| candidate)
} else {
find_match_by_sorted_words(name_vec, lookup)
}
}

fn find_match_by_sorted_words<'a>(iter_names: Vec<&'a Symbol>, lookup: &str) -> Option<Symbol> {
fn find_match_by_sorted_words(iter_names: &[Symbol], lookup: &str) -> Option<Symbol> {
iter_names.iter().fold(None, |result, candidate| {
if sort_by_words(&candidate.as_str()) == sort_by_words(lookup) {
Some(**candidate)
Some(*candidate)
} else {
result
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,38 +21,35 @@ fn test_lev_distance() {

#[test]
fn test_find_best_match_for_name() {
use rustc_span::with_default_session_globals;
use crate::with_default_session_globals;
with_default_session_globals(|| {
let input = vec![Symbol::intern("aaab"), Symbol::intern("aaabc")];
assert_eq!(
find_best_match_for_name(input.iter(), Symbol::intern("aaaa"), None),
find_best_match_for_name(&input, Symbol::intern("aaaa"), None),
Some(Symbol::intern("aaab"))
);

assert_eq!(
find_best_match_for_name(input.iter(), Symbol::intern("1111111111"), None),
None
);
assert_eq!(find_best_match_for_name(&input, Symbol::intern("1111111111"), None), None);

let input = vec![Symbol::intern("aAAA")];
assert_eq!(
find_best_match_for_name(input.iter(), Symbol::intern("AAAA"), None),
find_best_match_for_name(&input, Symbol::intern("AAAA"), None),
Some(Symbol::intern("aAAA"))
);

let input = vec![Symbol::intern("AAAA")];
// Returns None because `lev_distance > max_dist / 3`
assert_eq!(find_best_match_for_name(input.iter(), Symbol::intern("aaaa"), None), None);
assert_eq!(find_best_match_for_name(&input, Symbol::intern("aaaa"), None), None);

let input = vec![Symbol::intern("AAAA")];
assert_eq!(
find_best_match_for_name(input.iter(), Symbol::intern("aaaa"), Some(4)),
find_best_match_for_name(&input, Symbol::intern("aaaa"), Some(4)),
Some(Symbol::intern("AAAA"))
);

let input = vec![Symbol::intern("a_longer_variable_name")];
assert_eq!(
find_best_match_for_name(input.iter(), Symbol::intern("a_variable_longer_name"), None),
find_best_match_for_name(&input, Symbol::intern("a_variable_longer_name"), None),
Some(Symbol::intern("a_longer_variable_name"))
);
})
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_span/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ use hygiene::Transparency;
pub use hygiene::{DesugaringKind, ExpnData, ExpnId, ExpnKind, ForLoopLoc, MacroKind};
pub mod def_id;
use def_id::{CrateNum, DefId, LOCAL_CRATE};
pub mod lev_distance;
mod span_encoding;
pub use span_encoding::{Span, DUMMY_SP};

Expand Down
4 changes: 2 additions & 2 deletions compiler/rustc_typeck/src/astconv/errors.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
use crate::astconv::AstConv;
use rustc_ast::util::lev_distance::find_best_match_for_name;
use rustc_data_structures::fx::FxHashMap;
use rustc_errors::{pluralize, struct_span_err, Applicability};
use rustc_hir as hir;
use rustc_hir::def_id::DefId;
use rustc_middle::ty;
use rustc_session::parse::feature_err;
use rustc_span::lev_distance::find_best_match_for_name;
use rustc_span::symbol::{sym, Ident};
use rustc_span::{Span, DUMMY_SP};

Expand Down Expand Up @@ -180,7 +180,7 @@ impl<'o, 'tcx> dyn AstConv<'tcx> + 'o {
.collect();

if let (Some(suggested_name), true) = (
find_best_match_for_name(all_candidate_names.iter(), assoc_name.name, None),
find_best_match_for_name(&all_candidate_names, assoc_name.name, None),
assoc_name.span != DUMMY_SP,
) {
err.span_suggestion(
Expand Down
8 changes: 6 additions & 2 deletions compiler/rustc_typeck/src/astconv/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ use crate::errors::{
};
use crate::middle::resolve_lifetime as rl;
use crate::require_c_abi_if_c_variadic;
use rustc_ast::util::lev_distance::find_best_match_for_name;
use rustc_data_structures::fx::{FxHashMap, FxHashSet};
use rustc_errors::{struct_span_err, Applicability, ErrorReported, FatalError};
use rustc_hir as hir;
Expand All @@ -26,6 +25,7 @@ use rustc_middle::ty::subst::{self, InternalSubsts, Subst, SubstsRef};
use rustc_middle::ty::GenericParamDefKind;
use rustc_middle::ty::{self, Const, DefIdTree, Ty, TyCtxt, TypeFoldable};
use rustc_session::lint::builtin::AMBIGUOUS_ASSOCIATED_ITEMS;
use rustc_span::lev_distance::find_best_match_for_name;
use rustc_span::symbol::{Ident, Symbol};
use rustc_span::{Span, DUMMY_SP};
use rustc_target::spec::abi;
Expand Down Expand Up @@ -1579,7 +1579,11 @@ impl<'o, 'tcx> dyn AstConv<'tcx> + 'o {

let adt_def = qself_ty.ty_adt_def().expect("enum is not an ADT");
if let Some(suggested_name) = find_best_match_for_name(
adt_def.variants.iter().map(|variant| &variant.ident.name),
&adt_def
.variants
.iter()
.map(|variant| variant.ident.name)
.collect::<Vec<Symbol>>(),
assoc_ident.name,
None,
) {
Expand Down
Loading

0 comments on commit 5481c1b

Please sign in to comment.