apache · cryptoe · Mar 7, 2024 · Feb 27, 2024 · Feb 29, 2024 · Mar 1, 2024
diff --git a/processing/src/main/java/org/apache/druid/frame/allocation/AppendableMemory.java b/processing/src/main/java/org/apache/druid/frame/allocation/AppendableMemory.java
@@ -135,8 +135,13 @@ public boolean reserveAdditional(final int bytes)
     if (idx < 0 || bytes + limits.getInt(idx) > blockHolders.get(idx).get().getCapacity()) {
       // Allocation needed.
       // Math.max(allocationSize, bytes) in case "bytes" is greater than SOFT_MAXIMUM_ALLOCATION_SIZE.
+      // However, cap the allocation request to the available bytes in the allocator, in case the requested bytes
+      // are less than what are available in the allocator, however the SOFT_MAXIMUM_ALLOCATION_SIZE is greater than the
+      // bytes available in the allocator. In such a case where bytes < available < SOFT_MAXIMUM_ALLOCATION_SIZE, we
+      // want to allocate all the available memory in the allocator, and in the other cases where available is the greatest
+      // of all, we want to allocate according to the max of bytes & SOFT_MAXIMUM_ALLOCATION_SIZE
       final Optional<ResourceHolder<WritableMemory>> newMemory =
-          allocator.allocate(Math.max(nextAllocationSize, bytes));
+          allocator.allocate(Math.min(allocator.available(), Math.max(nextAllocationSize, bytes)));
 
       if (!newMemory.isPresent()) {
         return false;

diff --git a/processing/src/main/java/org/apache/druid/frame/allocation/HeapMemoryAllocator.java b/processing/src/main/java/org/apache/druid/frame/allocation/HeapMemoryAllocator.java
@@ -19,6 +19,7 @@
 
 package org.apache.druid.frame.allocation;
 
+import com.google.common.annotations.VisibleForTesting;
 import com.google.common.primitives.Ints;
 import org.apache.datasketches.memory.WritableMemory;
 import org.apache.druid.collections.ResourceHolder;
@@ -37,7 +38,8 @@ public class HeapMemoryAllocator implements MemoryAllocator
 
   private long bytesAllocated = 0;
 
-  private HeapMemoryAllocator(final long capacity)
+  @VisibleForTesting
+  HeapMemoryAllocator(final long capacity)
   {
     this.capacity = capacity;
   }
@@ -53,7 +55,7 @@ public static HeapMemoryAllocator unlimited()
   @Override
   public Optional<ResourceHolder<WritableMemory>> allocate(final long size)
   {
-    if (bytesAllocated < capacity - size) {
+    if (size <= capacity - bytesAllocated) {
       bytesAllocated += size;
 
       return Optional.of(

diff --git a/processing/src/main/java/org/apache/druid/frame/segment/FrameCursorUtils.java b/processing/src/main/java/org/apache/druid/frame/segment/FrameCursorUtils.java
@@ -26,6 +26,7 @@
 import org.apache.druid.java.util.common.Intervals;
 import org.apache.druid.java.util.common.guava.Sequence;
 import org.apache.druid.java.util.common.guava.Sequences;
+import org.apache.druid.query.QueryContexts;
 import org.apache.druid.query.filter.BoundDimFilter;
 import org.apache.druid.query.filter.Filter;
 import org.apache.druid.query.ordering.StringComparators;
@@ -42,6 +43,25 @@
 
 public class FrameCursorUtils
 {
+
+  /**
+   * Exception to be thrown when the subquery's rows are too wide to fit in a single frame. In such case, byte based
+   * limiting should be disabled or the user should modify the query.
+   * <p>
+   * NOTE: This error message is not appropriate when a similar exception is hit in MSQ, since this workaround
+   * is not applicable in that scenario
+   */
+  public static final DruidException SUBQUERY_ROW_TOO_LARGE_EXCEPTION =
+      DruidException
+          .forPersona(DruidException.Persona.OPERATOR)
+          .ofCategory(DruidException.Category.CAPACITY_EXCEEDED)
+          .build(
+              "Subquery's row size exceeds the frame size and therefore cannot write the subquery's "
+              + "row to the frame. Either modify the subqueries to materialize smaller rows by removing wide columns, "
+              + "or disable byte based limiting by setting '%s' to 'disabled'",
+              QueryContexts.MAX_SUBQUERY_BYTES_KEY
+          );
+
   private FrameCursorUtils()
   {
     // No instantiation.
@@ -79,60 +99,66 @@ public static Filter buildFilter(@Nullable Filter filter, Interval interval)
 
   /**
    * Writes a {@link Cursor} to a sequence of {@link Frame}. This method iterates over the rows of the cursor,
-   * and writes the columns to the frames
-   *
-   * @param cursor                 Cursor to write to the frame
-   * @param frameWriterFactory     Frame writer factory to write to the frame.
-   *                               Determines the signature of the rows that are written to the frames
+   * and writes the columns to the frames. The iterable is lazy, and it traverses the required portion of the cursor
+   * as required
    */
-  public static Sequence<Frame> cursorToFrames(
-      Cursor cursor,
-      FrameWriterFactory frameWriterFactory
+  public static Iterable<Frame> cursorToFramesIterable(
+      final Cursor cursor,
+      final FrameWriterFactory frameWriterFactory
   )
   {
+    return () -> new Iterator<Frame>()
+    {
+      @Override
+      public boolean hasNext()
+      {
+        return !cursor.isDone();
+      }
 
-    return Sequences.simple(
-        () -> new Iterator<Frame>()
-        {
-          @Override
-          public boolean hasNext()
-          {
-            return !cursor.isDone();
-          }
-
-          @Override
-          public Frame next()
-          {
-            // Makes sure that cursor contains some elements prior. This ensures if no row is written, then the row size
-            // is larger than the MemoryAllocators returned by the provided factory
-            if (!hasNext()) {
-              throw new NoSuchElementException();
+      @Override
+      public Frame next()
+      {
+        // Makes sure that cursor contains some elements prior. This ensures if no row is written, then the row size
+        // is larger than the MemoryAllocators returned by the provided factory
+        if (!hasNext()) {
+          throw new NoSuchElementException();
+        }
+        boolean firstRowWritten = false;
+        Frame frame;
+        try (final FrameWriter frameWriter = frameWriterFactory.newFrameWriter(cursor.getColumnSelectorFactory())) {
+          while (!cursor.isDone()) {
+            if (!frameWriter.addSelection()) {
+              break;
             }
-            boolean firstRowWritten = false;
-            Frame frame;
-            try (final FrameWriter frameWriter = frameWriterFactory.newFrameWriter(cursor.getColumnSelectorFactory())) {
-              while (!cursor.isDone()) {
-                if (!frameWriter.addSelection()) {
-                  break;
-                }
-                firstRowWritten = true;
-                cursor.advance();
-              }
-
-              if (!firstRowWritten) {
-                throw DruidException
-                    .forPersona(DruidException.Persona.DEVELOPER)
-                    .ofCategory(DruidException.Category.CAPACITY_EXCEEDED)
-                    .build("Subquery's row size exceeds the frame size and therefore cannot write the subquery's "
-                           + "row to the frame. This is a non-configurable static limit that can only be modified by the "
-                           + "developer.");
-              }
+            firstRowWritten = true;
+            cursor.advance();
+          }
 
-              frame = Frame.wrap(frameWriter.toByteArray());
-            }
-            return frame;
+          if (!firstRowWritten) {
+            throw SUBQUERY_ROW_TOO_LARGE_EXCEPTION;
           }
+
+          frame = Frame.wrap(frameWriter.toByteArray());
         }
-    );
+        return frame;
+      }
+    };
+  }
+
+  /**
+   * Writes a {@link Cursor} to a sequence of {@link Frame}. This method iterates over the rows of the cursor,
+   * and writes the columns to the frames
+   *
+   * @param cursor             Cursor to write to the frame
+   * @param frameWriterFactory Frame writer factory to write to the frame.
+   *                           It also determines the signature of the rows that are written to the frames
+   */
+  public static Sequence<Frame> cursorToFramesSequence(
+      final Cursor cursor,
+      final FrameWriterFactory frameWriterFactory
+  )
+  {
+
+    return Sequences.simple(cursorToFramesIterable(cursor, frameWriterFactory));
   }
 }
diff --git a/processing/src/main/java/org/apache/druid/query/groupby/GroupByQueryQueryToolChest.java b/processing/src/main/java/org/apache/druid/query/groupby/GroupByQueryQueryToolChest.java
@@ -736,7 +736,7 @@ public Optional<Sequence<FrameSignaturePair>> resultsAsFrames(
     Cursor cursor = cursorAndCloseable.lhs;
     Closeable closeble = cursorAndCloseable.rhs;
 
-    Sequence<Frame> frames = FrameCursorUtils.cursorToFrames(cursor, frameWriterFactory).withBaggage(closeble);
+    Sequence<Frame> frames = FrameCursorUtils.cursorToFramesSequence(cursor, frameWriterFactory).withBaggage(closeble);
 
     return Optional.of(frames.map(frame -> new FrameSignaturePair(frame, modifiedRowSignature)));
   }