Skip to content

Commit

Permalink
Load optional UTF-32 charsets lazily #121
Browse files Browse the repository at this point in the history
  • Loading branch information
danielkec authored and lukasj committed Jan 23, 2024
1 parent 981cd2f commit c64269c
Showing 1 changed file with 22 additions and 9 deletions.
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2012, 2021 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2024 Oracle and/or its affiliates. All rights reserved.
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License v. 2.0, which is available at
Expand Down Expand Up @@ -30,17 +30,16 @@
* @author Jitendra Kotamraju
*/
class UnicodeDetectingInputStream extends FilterInputStream {

private static final Charset UTF_32LE = Charset.forName("UTF-32LE");
private static final Charset UTF_32BE = Charset.forName("UTF-32BE");

private static final byte FF = (byte)0xFF;
private static final byte FE = (byte)0xFE;
private static final byte EF = (byte)0xEF;
private static final byte BB = (byte)0xBB;
private static final byte BF = (byte)0xBF;
private static final byte NUL = (byte)0x00;

private static Charset utf32Le;
private static Charset utf32Be;

private final byte[] buf = new byte[4];
private int bufLen;
private int curIndex;
Expand Down Expand Up @@ -108,10 +107,10 @@ private Charset detectEncoding() {
// Use BOM to detect encoding
if (buf[0] == NUL && buf[1] == NUL && buf[2] == FE && buf[3] == FF) {
curIndex = 4;
return UTF_32BE;
return getUtf32be();
} else if (buf[0] == FF && buf[1] == FE && buf[2] == NUL && buf[3] == NUL) {
curIndex = 4;
return UTF_32LE;
return getUtf32le();
} else if (buf[0] == FE && buf[1] == FF) {
curIndex = 2;
return StandardCharsets.UTF_16BE;
Expand All @@ -124,18 +123,32 @@ private Charset detectEncoding() {
}
// No BOM, just use JSON RFC's encoding algo to auto-detect
if (buf[0] == NUL && buf[1] == NUL && buf[2] == NUL) {
return UTF_32BE;
return getUtf32be();
} else if (buf[0] == NUL && buf[2] == NUL) {
return StandardCharsets.UTF_16BE;
} else if (buf[1] == NUL && buf[2] == NUL && buf[3] == NUL) {
return UTF_32LE;
return getUtf32le();
} else if (buf[1] == NUL && buf[3] == NUL) {
return StandardCharsets.UTF_16LE;
}
}
return StandardCharsets.UTF_8;
}

private static Charset getUtf32be() {
if (utf32Be == null) {
utf32Be = Charset.forName("UTF-32BE");
}
return utf32Be;
}

private static Charset getUtf32le() {
if (utf32Le == null) {
utf32Le = Charset.forName("UTF-32LE");
}
return utf32Le;
}

@Override
public int read() throws IOException {
if (curIndex < bufLen) {
Expand Down

0 comments on commit c64269c

Please sign in to comment.