Skip to content

Commit

Permalink
Recycle byte[] buffer in ControllableInputStream.readToByteBuffer
Browse files Browse the repository at this point in the history
Also, eliminates a redundant buffer by not creating a ByteArrayOutputStream.

Because the ByteBuffer returned may have an array with capacity beyond its limit, uses of that bytebuffer now use the .limit() size, vs assuming it was the exact size.
  • Loading branch information
jhy committed Aug 7, 2024
1 parent 2e64811 commit 10470f3
Show file tree
Hide file tree
Showing 7 changed files with 71 additions and 37 deletions.
20 changes: 17 additions & 3 deletions src/main/java/org/jsoup/helper/HttpConnection.java
Original file line number Diff line number Diff line change
Expand Up @@ -969,7 +969,7 @@ private InputStream prepareParse() {
Validate.isTrue(executed, "Request must be executed (with .execute(), .get(), or .post() before parsing response");
InputStream stream = bodyStream;
if (byteData != null) { // bytes have been read in to the buffer, parse that
stream = new ByteArrayInputStream(byteData.array());
stream = new ByteArrayInputStream(byteData.array(), 0, byteData.limit());
inputStreamRead = false; // ok to reparse if in bytes
}
Validate.isFalse(inputStreamRead, "Input stream already read and parsed, cannot re-read.");
Expand Down Expand Up @@ -1035,7 +1035,19 @@ public String body() {
public byte[] bodyAsBytes() {
prepareByteData();
Validate.notNull(byteData);
return byteData.array();
Validate.isTrue(byteData.hasArray()); // we made it, so it should

byte[] array = byteData.array();
int offset = byteData.arrayOffset();
int length = byteData.limit();

if (offset == 0 && length == array.length) { // exact, just return it
return array;
} else { // trim to size
byte[] exactArray = new byte[length];
System.arraycopy(array, offset, exactArray, 0, length);
return exactArray;
}
}

@Override
Expand All @@ -1050,7 +1062,9 @@ public BufferedInputStream bodyStream() {

// if we have read to bytes (via buffer up), return those as a stream.
if (byteData != null) {
return new BufferedInputStream(new ByteArrayInputStream(byteData.array()), SharedConstants.DefaultBufferSize);
return new BufferedInputStream(
new ByteArrayInputStream(byteData.array(), 0, byteData.limit()),
SharedConstants.DefaultBufferSize);
}

Validate.isFalse(inputStreamRead, "Request has already been read");
Expand Down
41 changes: 24 additions & 17 deletions src/main/java/org/jsoup/internal/ControllableInputStream.java
Original file line number Diff line number Diff line change
Expand Up @@ -105,26 +105,33 @@ public int read(byte[] b, int off, int len) throws IOException {
public static ByteBuffer readToByteBuffer(InputStream in, int max) throws IOException {
Validate.isTrue(max >= 0, "maxSize must be 0 (unlimited) or larger");
Validate.notNull(in);
final boolean localCapped = max > 0; // still possibly capped in total stream
final int bufferSize = localCapped && max < DefaultBufferSize ? max : DefaultBufferSize;
final byte[] readBuffer = new byte[bufferSize];
final ByteArrayOutputStream outStream = new ByteArrayOutputStream(bufferSize);

int read;
int remaining = max;
while (true) {
read = in.read(readBuffer, 0, localCapped ? Math.min(remaining, bufferSize) : bufferSize);
if (read == -1) break;
if (localCapped) { // this local byteBuffer cap may be smaller than the overall maxSize (like when reading first bytes)
if (read >= remaining) {
outStream.write(readBuffer, 0, remaining);
break;
final boolean capped = max > 0;
final byte[] readBuf = SimpleBufferedInput.BufferPool.borrow(); // Share the same byte[] pool as SBI
final int outSize = capped ? Math.min(max, DefaultBufferSize) : DefaultBufferSize;
ByteBuffer outBuf = ByteBuffer.allocate(outSize);

try {
int remaining = max;
int read;
while ((read = in.read(readBuf, 0, capped ? Math.min(remaining, DefaultBufferSize) : DefaultBufferSize)) != -1) {
if (outBuf.remaining() < read) { // needs to grow
int newCapacity = (int) Math.max(outBuf.capacity() * 1.5, outBuf.capacity() + read);
ByteBuffer newBuffer = ByteBuffer.allocate(newCapacity);
outBuf.flip();
newBuffer.put(outBuf);
outBuf = newBuffer;
}
outBuf.put(readBuf, 0, read);
if (capped) {
remaining -= read;
if (remaining <= 0) break;
}
remaining -= read;
}
outStream.write(readBuffer, 0, read);
outBuf.flip(); // Prepare the buffer for reading
return outBuf;
} finally {
SimpleBufferedInput.BufferPool.release(readBuf);
}
return ByteBuffer.wrap(outStream.toByteArray());
}

@SuppressWarnings("NonSynchronizedMethodOverridesSynchronizedMethod") // not synchronized in later JDKs
Expand Down
2 changes: 1 addition & 1 deletion src/test/java/org/jsoup/helper/DataUtilTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,7 @@ void handlesUnlimitedRead() throws IOException {
VaryingReadInputStream stream = new VaryingReadInputStream(ParseTest.inputStreamFrom(input));

ByteBuffer byteBuffer = DataUtil.readToByteBuffer(stream, 0);
String read = new String(byteBuffer.array());
String read = new String(byteBuffer.array(), 0, byteBuffer.limit(), StandardCharsets.UTF_8);

assertEquals(input, read);
}
Expand Down
24 changes: 13 additions & 11 deletions src/test/java/org/jsoup/integration/ConnectIT.java
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,8 @@ public void infiniteReadSupported() throws IOException {
assertTrue(caught);
}

private static final int LargeHtmlSize = 280735;

@Test
public void remainingAfterFirstRead() throws IOException {
int bufferSize = 5 * 1024;
Expand Down Expand Up @@ -214,33 +216,34 @@ public void remainingAfterFirstRead() throws IOException {
// bodyStream is not capped to body size - only for jsoup consumed stream
assertTrue(fullArray.length > capSize);

assertEquals(280735, fullArray.length);
String fullText = new String(fullArray, StandardCharsets.UTF_8);
assertEquals(LargeHtmlSize, fullRead.limit());
String fullText = new String(fullRead.array(), 0, fullRead.limit(), StandardCharsets.UTF_8);
assertTrue(fullText.startsWith(firstText));
assertEquals(LargeHtmlSize, fullText.length());
}
}

@Test
public void noLimitAfterFirstRead() throws IOException {
int bufferSize = 5 * 1024;
int firstMaxRead = 5 * 1024;

String url = FileServlet.urlTo("/htmltests/large.html"); // 280 K
try (BufferedInputStream stream = Jsoup.connect(url).execute().bodyStream()) {
// simulates parse which does a limited read first
stream.mark(bufferSize);
ByteBuffer firstBytes = DataUtil.readToByteBuffer(stream, bufferSize);
stream.mark(firstMaxRead);
ByteBuffer firstBytes = DataUtil.readToByteBuffer(stream, firstMaxRead);
byte[] array = firstBytes.array();
String firstText = new String(array, StandardCharsets.UTF_8);
assertTrue(firstText.startsWith("<html><head><title>Large"));
assertEquals(bufferSize, array.length);
assertEquals(firstMaxRead, array.length);

// reset and read fully
stream.reset();
ByteBuffer fullRead = DataUtil.readToByteBuffer(stream, 0);
byte[] fullArray = fullRead.array();
assertEquals(280735, fullArray.length);
String fullText = new String(fullArray, StandardCharsets.UTF_8);
assertEquals(LargeHtmlSize, fullRead.limit());
String fullText = new String(fullRead.array(), 0, fullRead.limit(), StandardCharsets.UTF_8);
assertTrue(fullText.startsWith(firstText));
assertEquals(LargeHtmlSize, fullText.length());
}
}

Expand All @@ -255,8 +258,7 @@ public void noLimitAfterFirstRead() throws IOException {
.bodyStream()) {

ByteBuffer cappedRead = DataUtil.readToByteBuffer(stream, 0);
byte[] cappedArray = cappedRead.array();
assertEquals(cap, cappedArray.length);
assertEquals(cap, cappedRead.limit());
}
}
}
16 changes: 13 additions & 3 deletions src/test/java/org/jsoup/integration/ConnectTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import java.net.URL;
import java.net.URLDecoder;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicBoolean;
Expand Down Expand Up @@ -603,13 +604,22 @@ public void testBinaryContentTypeThrowsException() throws IOException {

@Test
public void canFetchBinaryAsBytes() throws IOException {
Connection.Response res = Jsoup.connect(FileServlet.urlTo("/htmltests/thumb.jpg"))
String path = "/htmltests/thumb.jpg";
int actualSize = 1052;

Connection.Response res = Jsoup.connect(FileServlet.urlTo(path))
.data(FileServlet.ContentTypeParam, "image/jpeg")
.ignoreContentType(true)
.execute();

byte[] bytes = res.bodyAsBytes();
assertEquals(1052, bytes.length);
byte[] resBytes = res.bodyAsBytes();
assertEquals(actualSize, resBytes.length);

// compare the content of the file and the bytes:
Path filePath = ParseTest.getPath(path);
byte[] fileBytes = Files.readAllBytes(filePath);
assertEquals(actualSize, fileBytes.length);
assertArrayEquals(fileBytes, resBytes);
}

@Test
Expand Down
3 changes: 2 additions & 1 deletion src/test/java/org/jsoup/integration/ParseTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,8 @@ public static String getFileAsString(File file) throws IOException {
if (file.getName().endsWith(".gz")) {
InputStream stream = new GZIPInputStream(new FileInputStream(file));
ByteBuffer byteBuffer = DataUtil.readToByteBuffer(stream, 0);
bytes = byteBuffer.array();
bytes = new byte[byteBuffer.limit()];
System.arraycopy(byteBuffer.array(), 0, bytes, 0, byteBuffer.limit());
} else {
bytes = Files.readAllBytes(file.toPath());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ protected void doIt(HttpServletRequest req, HttpServletResponse res) throws IOEx

// post body
ByteBuffer byteBuffer = DataUtil.readToByteBuffer(req.getInputStream(), 0);
String postData = new String(byteBuffer.array(), StandardCharsets.UTF_8);
String postData = new String(byteBuffer.array(), byteBuffer.arrayOffset(), byteBuffer.limit(), StandardCharsets.UTF_8);
if (!StringUtil.isBlank(postData)) {
write(w, "Post Data", postData);
}
Expand Down

0 comments on commit 10470f3

Please sign in to comment.