Hardened String Block Parser (#3239)

* fix: add headerSize to stringBlock to detect larger headers * fix: handle app with style offset, but 0 styles * refactor: split counting stream into CountingDataInput * fix: read strings till end of string pool chunk * fix: support out of bound string reading * fix: don't read string/style offset out of bounds * refactor: cleanup comments for string parser * style: comment on 4 byte alignment * fix: only warn if utf16 string
iBotPeaches · Aug 1, 2023 · 7c2cb5b · 7c2cb5b
1 parent 9f8c1b3
commit 7c2cb5b
Show file tree

Hide file tree

Showing 6 changed files with 142 additions and 59 deletions.
diff --git a/brut.apktool/apktool-lib/src/main/java/brut/androlib/res/data/arsc/ARSCHeader.java b/brut.apktool/apktool-lib/src/main/java/brut/androlib/res/data/arsc/ARSCHeader.java
@@ -16,8 +16,8 @@
  */
 package brut.androlib.res.data.arsc;
 
+import brut.util.ExtCountingDataInput;
 import brut.util.ExtDataInput;
-import org.apache.commons.io.input.CountingInputStream;
 
 import java.io.EOFException;
 import java.io.IOException;
@@ -39,23 +39,23 @@ public ARSCHeader(short type, int headerSize, int chunkSize, int headerStart) {
         this.endPosition = headerStart + chunkSize;
     }
 
-    public static ARSCHeader read(ExtDataInput in, CountingInputStream countIn) throws IOException {
+    public static ARSCHeader read(ExtCountingDataInput in) throws IOException {
         short type;
-        int start = countIn.getCount();
+        int start = in.position();
         try {
             type = in.readShort();
         } catch (EOFException ex) {
-            return new ARSCHeader(RES_NONE_TYPE, 0, 0, countIn.getCount());
+            return new ARSCHeader(RES_NONE_TYPE, 0, 0, in.position());
         }
         return new ARSCHeader(type, in.readShort(), in.readInt(), start);
     }
 
-    public void checkForUnreadHeader(ExtDataInput in, CountingInputStream countIn) throws IOException {
+    public void checkForUnreadHeader(ExtCountingDataInput in) throws IOException {
         // Some applications lie about the reported size of their chunk header. Trusting the chunkSize is misleading
         // So compare to what we actually read in the header vs reported and skip the rest.
         // However, this runs after each chunk and not every chunk reading has a specific distinction between the
         // header and the body.
-        int actualHeaderSize = countIn.getCount() - this.startPosition;
+        int actualHeaderSize = in.position() - this.startPosition;
         int exceedingSize = this.headerSize - actualHeaderSize;
         if (exceedingSize > 0) {
             byte[] buf = new byte[exceedingSize];

diff --git a/brut.apktool/apktool-lib/src/main/java/brut/androlib/res/decoder/ARSCDecoder.java b/brut.apktool/apktool-lib/src/main/java/brut/androlib/res/decoder/ARSCDecoder.java
@@ -22,9 +22,8 @@
 import brut.androlib.res.data.arsc.*;
 import brut.androlib.res.data.value.*;
 import brut.util.Duo;
-import brut.util.ExtDataInput;
+import brut.util.ExtCountingDataInput;
 import com.google.common.io.LittleEndianDataInputStream;
-import org.apache.commons.io.input.CountingInputStream;
 
 import java.io.*;
 import java.math.BigInteger;
@@ -52,16 +51,12 @@ public static ARSCData decode(InputStream arscStream, boolean findFlagsOffsets,
     }
 
     private ARSCDecoder(InputStream arscStream, ResTable resTable, boolean storeFlagsOffsets, boolean keepBroken) {
-        arscStream = mCountIn = new CountingInputStream(arscStream);
         if (storeFlagsOffsets) {
             mFlagsOffsets = new ArrayList<>();
         } else {
             mFlagsOffsets = null;
         }
-        // We need to explicitly cast to DataInput as otherwise the constructor is ambiguous.
-        // We choose DataInput instead of InputStream as ExtDataInput wraps an InputStream in
-        // a DataInputStream which is big-endian and ignores the little-endian behavior.
-        mIn = new ExtDataInput((DataInput) new LittleEndianDataInputStream(arscStream));
+        mIn = new ExtCountingDataInput(new LittleEndianDataInputStream(arscStream));
         mResTable = resTable;
         mKeepBroken = keepBroken;
     }
@@ -126,20 +121,20 @@ private ResPackage[] readResourceTable() throws IOException, AndrolibException {
 
     private void readStringPoolChunk() throws IOException, AndrolibException {
         checkChunkType(ARSCHeader.RES_STRING_POOL_TYPE);
-        mTableStrings = StringBlock.readWithoutChunk(mIn, mHeader.chunkSize);
+        mTableStrings = StringBlock.readWithoutChunk(mIn, mHeader.startPosition, mHeader.headerSize, mHeader.chunkSize);
     }
 
     private void readTableChunk() throws IOException, AndrolibException {
         checkChunkType(ARSCHeader.RES_TABLE_TYPE);
         mIn.skipInt(); // packageCount
 
-        mHeader.checkForUnreadHeader(mIn, mCountIn);
+        mHeader.checkForUnreadHeader(mIn);
     }
 
     private void readUnknownChunk() throws IOException, AndrolibException {
         checkChunkType(ARSCHeader.RES_NULL_TYPE);
 
-        mHeader.checkForUnreadHeader(mIn, mCountIn);
+        mHeader.checkForUnreadHeader(mIn);
 
         LOGGER.warning("Skipping unknown chunk data of size " + mHeader.chunkSize);
         mHeader.skipChunk(mIn);
@@ -177,7 +172,7 @@ private ResPackage readTablePackage() throws IOException, AndrolibException {
             LOGGER.warning("Please report this application to Apktool for a fix: https://github.com/iBotPeaches/Apktool/issues/1728");
         }
 
-        mHeader.checkForUnreadHeader(mIn, mCountIn);
+        mHeader.checkForUnreadHeader(mIn);
 
         mTypeNames = StringBlock.readWithChunk(mIn);
         mSpecNames = StringBlock.readWithChunk(mIn);
@@ -195,7 +190,7 @@ private void readLibraryType() throws AndrolibException, IOException {
         int packageId;
         String packageName;
 
-        mHeader.checkForUnreadHeader(mIn, mCountIn);
+        mHeader.checkForUnreadHeader(mIn);
 
         for (int i = 0; i < libraryCount; i++) {
             packageId = mIn.readInt();
@@ -207,7 +202,7 @@ private void readLibraryType() throws AndrolibException, IOException {
     private void readStagedAliasSpec() throws IOException {
         int count = mIn.readInt();
 
-        mHeader.checkForUnreadHeader(mIn, mCountIn);
+        mHeader.checkForUnreadHeader(mIn);
 
         for (int i = 0; i < count; i++) {
             LOGGER.fine(String.format("Skipping staged alias stagedId (%h) finalId: %h", mIn.readInt(), mIn.readInt()));
@@ -219,7 +214,7 @@ private void readOverlaySpec() throws AndrolibException, IOException {
         String name = mIn.readNullEndedString(256, true);
         String actor = mIn.readNullEndedString(256, true);
 
-        mHeader.checkForUnreadHeader(mIn, mCountIn);
+        mHeader.checkForUnreadHeader(mIn);
 
         LOGGER.fine(String.format("Overlay name: \"%s\", actor: \"%s\")", name, actor));
     }
@@ -229,7 +224,7 @@ private void readOverlayPolicySpec() throws AndrolibException, IOException {
         mIn.skipInt(); // policyFlags
         int count = mIn.readInt();
 
-        mHeader.checkForUnreadHeader(mIn, mCountIn);
+        mHeader.checkForUnreadHeader(mIn);
 
         for (int i = 0; i < count; i++) {
             LOGGER.fine(String.format("Skipping overlay (%h)", mIn.readInt()));
@@ -244,10 +239,10 @@ private ResTypeSpec readTableSpecType() throws AndrolibException, IOException {
         int entryCount = mIn.readInt();
 
         if (mFlagsOffsets != null) {
-            mFlagsOffsets.add(new FlagsOffset(mCountIn.getCount(), entryCount));
+            mFlagsOffsets.add(new FlagsOffset(mIn.position(), entryCount));
         }
 
-        mHeader.checkForUnreadHeader(mIn, mCountIn);
+        mHeader.checkForUnreadHeader(mIn);
 
         mIn.skipBytes(entryCount * 4); // flags
         mTypeSpec = new ResTypeSpec(mTypeNames.getString(id - 1), mResTable, mPkg, id, entryCount);
@@ -272,7 +267,7 @@ private ResType readTableType() throws IOException, AndrolibException {
         mMissingResSpecMap = new LinkedHashMap<>();
         ResConfigFlags flags = readConfigFlags();
 
-        mHeader.checkForUnreadHeader(mIn, mCountIn);
+        mHeader.checkForUnreadHeader(mIn);
 
         if ((typeFlags & 0x01) != 0) {
             LOGGER.fine("Sparse type flags detected: " + mTypeSpec.getName());
@@ -310,7 +305,7 @@ private ResType readTableType() throws IOException, AndrolibException {
             mResId = (mResId & 0xffff0000) | i;
 
             // As seen in some recent APKs - there are more entries reported than can fit in the chunk.
-            if (mCountIn.getCount() == mHeader.endPosition) {
+            if (mIn.position() == mHeader.endPosition) {
                 int remainingEntries = entryCount - i;
                 LOGGER.warning(String.format("End of chunk hit. Skipping remaining entries (%d) in type: %s",
                     remainingEntries, mTypeSpec.getName()
@@ -325,8 +320,8 @@ private ResType readTableType() throws IOException, AndrolibException {
         }
 
         // skip "TYPE 8 chunks" and/or padding data at the end of this chunk
-        if (mCountIn.getCount() < mHeader.endPosition) {
-            long bytesSkipped = mCountIn.skip(mHeader.endPosition - mCountIn.getCount());
+        if (mIn.position() < mHeader.endPosition) {
+            long bytesSkipped = mIn.skip(mHeader.endPosition - mIn.position());
             LOGGER.warning("Unknown data detected. Skipping: " + bytesSkipped + " byte(s)");
         }
 
@@ -630,7 +625,7 @@ private void removeResSpec(ResResSpec spec) {
     }
 
     private ARSCHeader nextChunk() throws IOException {
-        return mHeader = ARSCHeader.read(mIn, mCountIn);
+        return mHeader = ARSCHeader.read(mIn);
     }
 
     private void checkChunkType(int expectedType) throws AndrolibException {
@@ -640,9 +635,8 @@ private void checkChunkType(int expectedType) throws AndrolibException {
         }
     }
 
-    private final ExtDataInput mIn;
+    private final ExtCountingDataInput mIn;
     private final ResTable mResTable;
-    private final CountingInputStream mCountIn;
     private final List<FlagsOffset> mFlagsOffsets;
     private final boolean mKeepBroken;
 

diff --git a/brut.apktool/apktool-lib/src/main/java/brut/androlib/res/decoder/AXmlResourceParser.java b/brut.apktool/apktool-lib/src/main/java/brut/androlib/res/decoder/AXmlResourceParser.java
@@ -24,9 +24,8 @@
 import brut.androlib.res.data.arsc.ARSCHeader;
 import brut.androlib.res.data.axml.NamespaceStack;
 import brut.androlib.res.xml.ResXmlEncoders;
-import brut.util.ExtDataInput;
+import brut.util.ExtCountingDataInput;
 import com.google.common.io.LittleEndianDataInputStream;
-import org.apache.commons.io.input.CountingInputStream;
 import org.xmlpull.v1.XmlPullParserException;
 
 import java.io.*;
@@ -64,11 +63,7 @@ public void setAttrDecoder(ResAttrDecoder attrDecoder) {
     public void open(InputStream stream) {
         close();
         if (stream != null) {
-            stream = mCountIn = new CountingInputStream(stream);
-            // We need to explicitly cast to DataInput as otherwise the constructor is ambiguous.
-            // We choose DataInput instead of InputStream as ExtDataInput wraps an InputStream in
-            // a DataInputStream which is big-endian and ignores the little-endian behavior.
-            mIn = new ExtDataInput((DataInput) new LittleEndianDataInputStream(stream));
+            mIn = new ExtCountingDataInput(new LittleEndianDataInputStream(stream));
         }
     }
 
@@ -79,7 +74,6 @@ public void close() {
         }
         isOperational = false;
         mIn = null;
-        mCountIn = null;
         mStringBlock = null;
         mResourceIds = null;
         mNamespaces.reset();
@@ -687,8 +681,8 @@ private void doNext() throws IOException {
             }
 
             // #2070 - Some applications have 2 start namespaces, but only 1 end namespace.
-            if (mCountIn.available() == 0) {
-                LOGGER.warning(String.format("AXML hit unexpected end of file at byte: 0x%X", mCountIn.getCount()));
+            if (mIn.remaining() == 0) {
+                LOGGER.warning(String.format("AXML hit unexpected end of file at byte: 0x%X", mIn.position()));
                 mEvent = END_DOCUMENT;
                 break;
             }
@@ -715,7 +709,7 @@ private void doNext() throws IOException {
             if (chunkType < ARSCHeader.RES_XML_FIRST_CHUNK_TYPE || chunkType > ARSCHeader.RES_XML_LAST_CHUNK_TYPE) {
                 int chunkSize = mIn.readInt();
                 mIn.skipBytes(chunkSize - 8);
-                LOGGER.warning(String.format("Unknown chunk type at: (0x%08x) skipping...", mCountIn.getCount()));
+                LOGGER.warning(String.format("Unknown chunk type at: (0x%08x) skipping...", mIn.position()));
                 break;
             }
 
@@ -806,8 +800,7 @@ private void setFirstError(AndrolibException error) {
         }
     }
 
-    private ExtDataInput mIn;
-    private CountingInputStream mCountIn;
+    private ExtCountingDataInput mIn;
     private ResAttrDecoder mAttrDecoder;
     private AndrolibException mFirstError;
 

diff --git a/brut.apktool/apktool-lib/src/main/java/brut/androlib/res/decoder/StringBlock.java b/brut.apktool/apktool-lib/src/main/java/brut/androlib/res/decoder/StringBlock.java
@@ -18,7 +18,7 @@
 
 import brut.androlib.res.data.arsc.ARSCHeader;
 import brut.androlib.res.xml.ResXmlEncoders;
-import brut.util.ExtDataInput;
+import brut.util.ExtCountingDataInput;
 import com.google.common.annotations.VisibleForTesting;
 
 import java.io.IOException;
@@ -30,44 +30,61 @@
 import java.util.logging.Logger;
 
 public class StringBlock {
-    public static StringBlock readWithChunk(ExtDataInput reader) throws IOException {
+    public static StringBlock readWithChunk(ExtCountingDataInput reader) throws IOException {
+        int startPosition = reader.position();
         reader.skipCheckShort(ARSCHeader.RES_STRING_POOL_TYPE);
-        reader.skipShort(); // headerSize
+        int headerSize = reader.readShort();
         int chunkSize = reader.readInt();
 
-        return readWithoutChunk(reader, chunkSize);
+        return readWithoutChunk(reader, startPosition, headerSize, chunkSize);
     }
 
-    public static StringBlock readWithoutChunk(ExtDataInput reader, int chunkSize) throws IOException {
+    public static StringBlock readWithoutChunk(ExtCountingDataInput reader, int startPosition, int headerSize,
+                                               int chunkSize) throws IOException
+    {
         // ResStringPool_header
         int stringCount = reader.readInt();
         int styleCount = reader.readInt();
         int flags = reader.readInt();
         int stringsOffset = reader.readInt();
         int stylesOffset = reader.readInt();
 
+        // For some applications they pack the StringBlock header with more unused data at end.
+        if (headerSize > STRING_BLOCK_HEADER_SIZE) {
+            reader.skipBytes(headerSize - STRING_BLOCK_HEADER_SIZE);
+        }
+
         StringBlock block = new StringBlock();
         block.m_isUTF8 = (flags & UTF8_FLAG) != 0;
-        block.m_stringOffsets = reader.readIntArray(stringCount);
+        block.m_stringOffsets = reader.readSafeIntArray(stringCount, startPosition + stringsOffset);
 
         if (styleCount != 0) {
-            block.m_styleOffsets = reader.readIntArray(styleCount);
+            block.m_styleOffsets = reader.readSafeIntArray(styleCount, startPosition + stylesOffset);
+        }
+
+        // #3236 - Some applications give a style offset, but have 0 styles. Make this check more robust.
+        boolean hasStyles = stylesOffset != 0 && styleCount != 0;
+        int size = chunkSize - stringsOffset;
+
+        // If we have both strings and even just a lying style offset - lets calculate the size of the strings without
+        // accidentally parsing all the styles.
+        if (styleCount > 0) {
+            size = stylesOffset - stringsOffset;
         }
 
-        int size = ((stylesOffset == 0) ? chunkSize : stylesOffset) - stringsOffset;
         block.m_strings = new byte[size];
         reader.readFully(block.m_strings);
 
-        if (stylesOffset != 0) {
-            size = (chunkSize - stylesOffset);
+        if (hasStyles) {
+            size = chunkSize - stylesOffset;
             block.m_styles = reader.readIntArray(size / 4);
+        }
 
-            // read remaining bytes
-            int remaining = size % 4;
-            if (remaining >= 1) {
-                while (remaining-- > 0) {
-                    reader.readByte();
-                }
+        // In case we aren't 4 byte aligned we need to skip the remaining bytes.
+        int remaining = size % 4;
+        if (remaining >= 1) {
+            while (remaining-- > 0) {
+                reader.readByte();
             }
         }
 
@@ -211,6 +228,11 @@ String decodeString(int offset, int length) {
                 LOGGER.warning("Failed to decode a string at offset " + offset + " of length " + length);
                 return null;
             }
+        } catch (IndexOutOfBoundsException ex) {
+            if (!m_isUTF8) {
+                LOGGER.warning("String extends outside of pool at  " + offset + " of length " + length);
+                return null;
+            }
         }
 
         try {
@@ -275,4 +297,5 @@ private static int[] getUtf16(byte[] array, int offset) {
     private static final Logger LOGGER = Logger.getLogger(StringBlock.class.getName());
 
     private static final int UTF8_FLAG = 0x00000100;
+    private static final int STRING_BLOCK_HEADER_SIZE = 28;
 }
diff --git a/brut.j.util/build.gradle b/brut.j.util/build.gradle
@@ -17,4 +17,5 @@
 dependencies {
   implementation project(':brut.j.common')
   implementation depends.commons_io
+  implementation depends.guava
 }