Skip to content

Commit

Permalink
Fix to #22049 - Query: consider updating select expression identifier…
Browse files Browse the repository at this point in the history
…s when projecting a subset of column and adding distinct

As fix to #15873 we started blocking some scenarios that used to work (by accident) - when we have a subquery using Distinct or GroupBy that doesn't happen to have any duplicates.

Fix is to enable those scenarios (and others) by modifying identifier columns in case of distinct and group by, if the original identifiers are not already present. In case of distinct, the entire projection becomes unique identifier, as distinct guarantees it to be unique.
In case of groupby, the grouping key becomes the identifier - since we only support grouping key or group aggregate in the projection, we are also guaranteed to have 1 row per unique grouping key.

Also fix to #24288 - Query: add collection join tries to convert correlated collection from APPLY to JOIN for subqueries with Distinct and GroupBy, which is incorrect

we would always try to convert subquery with groupby and distinct from apply to join, however we can only do this if the projection already contains the join key. Otherwise, adding the join key to the projection would change the meaning of operation in case of distinct and create invalid query in case of group by (projecting column that is not part of grouping key or aggregate).

Fixes #22049
Fixes #24288
  • Loading branch information
maumar committed Mar 2, 2021
1 parent 01adefd commit 8fc1e26
Show file tree
Hide file tree
Showing 25 changed files with 1,253 additions and 111 deletions.
20 changes: 17 additions & 3 deletions src/EFCore.Relational/Properties/RelationalStrings.Designer.cs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 8 additions & 2 deletions src/EFCore.Relational/Properties/RelationalStrings.resx
Original file line number Diff line number Diff line change
Expand Up @@ -621,8 +621,14 @@
<data name="MissingConcurrencyColumn" xml:space="preserve">
<value>Entity type '{entityType}' doesn't contain a property mapped to the store-generated concurrency token column '{missingColumn}' which is used by another entity type sharing the table '{table}'. Add a store-generated property to '{entityType}' which is mapped to the same column; it may be in shadow state.</value>
</data>
<data name="MissingIdentifyingProjectionInDistinctGroupBySubquery" xml:space="preserve">
<value>Unable to translate a collection subquery in a projection since it uses 'Distinct' or 'Group By' operations and doesn't project key columns of all tables required to generate results on the client side. Missing column: {column}. Either add column(s) to the projection or rewrite the query to not use the 'GroupBy'/'Distinct' operation.</value>
<data name="UnableToTranslateSubqueryWithDistinct" xml:space="preserve">
<value>Subquery with 'Distinct' can only be translated if projection consists only of entities and their properties, or it contains keys of all entities required to generate results on the client side. Either add '{column}' to the projection, remove complex elements of the projection, or rewrite the query to not use the 'Distinct' operation.</value>
</data>
<data name="UnableToTranslateSubqueryWithGroupBy" xml:space="preserve">
<value>Subquery with 'GroupBy' can only be translated if grouping key consists only of entities and their properties, or it contains keys of all entities required to generate results on the client side. Either add '{column}' to the grouping key, remove complex elements of the grouping key, or rewrite the query to not use the 'GroupBy' operation.</value>
</data>
<data name="DistinctOnCollectionNotSupported" xml:space="preserve">
<value>Using 'Distinct' operation on a projection containing a collection is not supported.</value>
</data>
<data name="MissingOrderingInSelectExpression" xml:space="preserve">
<value>'Reverse' could not be translated to the server because there is no ordering on the server side.</value>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -387,11 +387,33 @@ private static ShapedQueryExpression CreateShapedQueryExpression(IEntityType ent
{
Check.NotNull(source, nameof(source));

var collectionShaperExpressionFinder = new CollectionShaperExpressionFinder();
collectionShaperExpressionFinder.Visit(source.ShaperExpression);
if (collectionShaperExpressionFinder.Found)
{
throw new NotSupportedException(RelationalStrings.DistinctOnCollectionNotSupported);
}

((SelectExpression)source.QueryExpression).ApplyDistinct();

return source;
}

private class CollectionShaperExpressionFinder : ExpressionVisitor
{
public bool Found { get; private set; }

protected override Expression VisitExtension(Expression extensionExpression)
{
if (extensionExpression is CollectionShaperExpression)
{
Found = true;
}

return base.VisitExtension(extensionExpression);
}
}

/// <inheritdoc />
protected override ShapedQueryExpression? TranslateElementAtOrDefault(
ShapedQueryExpression source,
Expand Down
128 changes: 116 additions & 12 deletions src/EFCore.Relational/Query/SqlExpressions/SelectExpression.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1119,6 +1119,20 @@ public IDictionary<SqlExpression, ColumnExpression> PushdownIntoSubquery()
// if we can't propagate any identifier - clear them all instead
// when adding collection join we detect this and throw appropriate exception
_identifier.Clear();

if (IsDistinct
&& projectionMap.Select(p => p.Value).All(x => x is ColumnExpression))
{
// for distinct try to use entire projection as identifiers
_identifier.AddRange(projectionMap.Select(p => p.Value).Select(x => ((ColumnExpression)x, x.TypeMapping!.Comparer)));
}
else if (GroupBy.Count > 0
&& GroupBy.All(x => x is ColumnExpression))
{
// for group by try to use grouping key as identifiers
_identifier.AddRange(GroupBy.Select(x => ((ColumnExpression)x, x.TypeMapping!.Comparer)));
}

break;
}
}
Expand Down Expand Up @@ -1409,7 +1423,7 @@ public CollectionShaperExpression AddCollectionProjection(
var parentIdentifier = GetIdentifierAccessor(identifierFromParent).Item1;
innerSelectExpression.ApplyProjection();

ValidateIdentifyingProjection(innerSelectExpression);
RemapIdentifiers(innerSelectExpression);

for (var i = 0; i < identifierFromParent.Count; i++)
{
Expand Down Expand Up @@ -1511,7 +1525,7 @@ public CollectionShaperExpression AddCollectionProjection(
var innerClientEval = innerSelectExpression.Projection.Count > 0;
innerSelectExpression.ApplyProjection();

ValidateIdentifyingProjection(innerSelectExpression);
RemapIdentifiers(innerSelectExpression);

if (collectionIndex == 0)
{
Expand Down Expand Up @@ -1635,24 +1649,72 @@ public CollectionShaperExpression AddCollectionProjection(
return result;
}

static void ValidateIdentifyingProjection(SelectExpression selectExpression)
static void RemapIdentifiers(SelectExpression innerSelectExpression)
{
if (selectExpression.IsDistinct
|| selectExpression.GroupBy.Count > 0)
if (innerSelectExpression.IsDistinct
|| innerSelectExpression.GroupBy.Count > 0)
{
var innerSelectProjectionExpressions = selectExpression._projection.Select(p => p.Expression).ToList();
foreach (var innerSelectIdentifier in selectExpression._identifier)
if (!IdentifiersInProjection(innerSelectExpression, out var missingIdentifier))
{
if (!innerSelectProjectionExpressions.Contains(innerSelectIdentifier.Column)
&& (selectExpression.GroupBy.Count == 0
|| !selectExpression.GroupBy.Contains(innerSelectIdentifier.Column)))
if (innerSelectExpression.IsDistinct)
{
throw new InvalidOperationException(RelationalStrings.MissingIdentifyingProjectionInDistinctGroupBySubquery(
innerSelectIdentifier.Column.Table.Alias + "." + innerSelectIdentifier.Column.Name));
if (innerSelectExpression._projection.All(p => p.Expression is ColumnExpression))
{
innerSelectExpression._identifier.Clear();
foreach (var projection in innerSelectExpression._projection)
{
innerSelectExpression._identifier.Add(((ColumnExpression)projection.Expression, projection.Expression.TypeMapping!.Comparer));
}
}
else
{
throw new InvalidOperationException(
RelationalStrings.UnableToTranslateSubqueryWithDistinct(
missingIdentifier.Table.Alias + "." + missingIdentifier.Name));
}
}
else
{
if (innerSelectExpression.GroupBy.All(g => g is ColumnExpression))
{
innerSelectExpression._identifier.Clear();
foreach (var grouping in innerSelectExpression.GroupBy)
{
innerSelectExpression._identifier.Add(((ColumnExpression)grouping, grouping.TypeMapping!.Comparer));
}
}
else
{
throw new InvalidOperationException(
RelationalStrings.UnableToTranslateSubqueryWithGroupBy(
missingIdentifier.Table.Alias + "." + missingIdentifier.Name));
}
}
}
}
}

static bool IdentifiersInProjection(
SelectExpression selectExpression,
[CA.NotNullWhen(false)] out ColumnExpression? missingIdentifier)
{
var innerSelectProjectionExpressions = selectExpression._projection.Select(p => p.Expression).ToList();
foreach (var innerSelectIdentifier in selectExpression._identifier)
{
if (!innerSelectProjectionExpressions.Contains(innerSelectIdentifier.Column)
&& (selectExpression.GroupBy.Count == 0
|| !selectExpression.GroupBy.Contains(innerSelectIdentifier.Column)))
{
missingIdentifier = innerSelectIdentifier.Column;

return false;
}
}

missingIdentifier = null;

return true;
}
}

private sealed class EntityShaperNullableMarkingExpressionVisitor : ExpressionVisitor
Expand Down Expand Up @@ -1709,12 +1771,54 @@ private static SqlExpression MakeNullable(SqlExpression sqlExpression)
joinPredicate = RemoveRedundantNullChecks(joinPredicate, columnExpressions);
}

// we can't convert apply to join in case of distinct and groupby, if the projection doesn't already contain the join keys
// since we can't add the missing keys to the projection - only convert to join if all the keys are already there
if (joinPredicate != null
&& (selectExpression.IsDistinct
|| selectExpression.GroupBy.Count > 0))
{
// if select expression has only one table and projecting an entity
// we are guaranteed that the join key will be in the projection
// even if currently the projection is not populated
if (selectExpression.Tables.Count != 1
|| selectExpression._projectionMapping.Count != 1
|| selectExpression._projectionMapping.First().Value is not EntityProjectionExpression)
{
var innerKeyColumns = InnerKeyColumns(selectExpression.Tables, joinPredicate);
foreach (var innerColumn in innerKeyColumns)
{
if (!selectExpression.Projection.Select(p => p.Expression).Contains(innerColumn))
{
return null;
}
}
}
}

selectExpression.Predicate = predicate;

return joinPredicate;
}

return null;

static List<ColumnExpression> InnerKeyColumns(IEnumerable<TableExpressionBase> tables, SqlExpression joinPredicate)
{
if (joinPredicate is SqlBinaryExpression sqlBinaryExpression)
{
var leftColumns = InnerKeyColumns(tables, sqlBinaryExpression.Left);
var rightColumns = InnerKeyColumns(tables, sqlBinaryExpression.Right);

return leftColumns.Concat(rightColumns).ToList();
}
else if (joinPredicate is ColumnExpression columnExpression
&& tables.Contains(columnExpression.Table))
{
return new List<ColumnExpression> { columnExpression };
}

return new List<ColumnExpression>();
}
}

private SqlExpression? TryExtractJoinKey(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4222,6 +4222,12 @@ public override Task Using_string_Equals_with_StringComparison_throws_informativ
CoreStrings.QueryUnableToTranslateStringEqualsWithStringComparison);
}

[ConditionalFact(Skip = "Cross collection join Issue#17246")]
public override Task Select_nested_collection_with_distinct(bool async)
{
return base.Select_nested_collection_with_distinct(async);
}

private void AssertSql(params string[] expected)
=> Fixture.TestSqlLoggerFactory.AssertBaseline(expected);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1144,9 +1144,9 @@ public override Task Projecting_multiple_collection_with_same_constant_works(boo
}

[ConditionalTheory(Skip = "Issue#17246")]
public override async Task Projecting_after_navigation_and_distinct_throws(bool isAsync)
public override async Task Projecting_after_navigation_and_distinct(bool isAsync)
{
await base.Projecting_after_navigation_and_distinct_throws(isAsync);
await base.Projecting_after_navigation_and_distinct(isAsync);

AssertSql(" ");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,10 @@ public override void Client_code_using_instance_in_static_method()
public override void Client_code_using_instance_method_throws()
=> base.Client_code_using_instance_method_throws();

[ConditionalTheory(Skip = "Issue#24291")]
public override Task Select_nested_collection_with_distinct(bool async)
=> base.Select_nested_collection_with_distinct(async);

public override async Task Max_on_empty_sequence_throws(bool async)
{
using var context = CreateContext();
Expand Down
Loading

0 comments on commit 8fc1e26

Please sign in to comment.