Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add ability to disallow repeated keys in CBOR #2681

Open
wants to merge 4 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions core/api/kotlinx-serialization-core.api
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ public abstract interface class kotlinx/serialization/DeserializationStrategy {
public abstract fun getDescriptor ()Lkotlinx/serialization/descriptors/SerialDescriptor;
}

public final class kotlinx/serialization/DuplicateMapKeyException : kotlinx/serialization/SerializationException {
public fun <init> (Ljava/lang/Object;)V
}

public abstract interface annotation class kotlinx/serialization/EncodeDefault : java/lang/annotation/Annotation {
public abstract fun mode ()Lkotlinx/serialization/EncodeDefault$Mode;
}
Expand Down Expand Up @@ -384,6 +388,7 @@ public abstract class kotlinx/serialization/encoding/AbstractDecoder : kotlinx/s
public final fun decodeStringElement (Lkotlinx/serialization/descriptors/SerialDescriptor;I)Ljava/lang/String;
public fun decodeValue ()Ljava/lang/Object;
public fun endStructure (Lkotlinx/serialization/descriptors/SerialDescriptor;)V
public fun visitKey (Ljava/lang/Object;)V
}

public abstract class kotlinx/serialization/encoding/AbstractEncoder : kotlinx/serialization/encoding/CompositeEncoder, kotlinx/serialization/encoding/Encoder {
Expand Down Expand Up @@ -448,6 +453,7 @@ public abstract interface class kotlinx/serialization/encoding/CompositeDecoder
public abstract fun decodeStringElement (Lkotlinx/serialization/descriptors/SerialDescriptor;I)Ljava/lang/String;
public abstract fun endStructure (Lkotlinx/serialization/descriptors/SerialDescriptor;)V
public abstract fun getSerializersModule ()Lkotlinx/serialization/modules/SerializersModule;
public abstract fun visitKey (Ljava/lang/Object;)V
}

public final class kotlinx/serialization/encoding/CompositeDecoder$Companion {
Expand All @@ -460,6 +466,7 @@ public final class kotlinx/serialization/encoding/CompositeDecoder$DefaultImpls
public static synthetic fun decodeNullableSerializableElement$default (Lkotlinx/serialization/encoding/CompositeDecoder;Lkotlinx/serialization/descriptors/SerialDescriptor;ILkotlinx/serialization/DeserializationStrategy;Ljava/lang/Object;ILjava/lang/Object;)Ljava/lang/Object;
public static fun decodeSequentially (Lkotlinx/serialization/encoding/CompositeDecoder;)Z
public static synthetic fun decodeSerializableElement$default (Lkotlinx/serialization/encoding/CompositeDecoder;Lkotlinx/serialization/descriptors/SerialDescriptor;ILkotlinx/serialization/DeserializationStrategy;Ljava/lang/Object;ILjava/lang/Object;)Ljava/lang/Object;
public static fun visitKey (Lkotlinx/serialization/encoding/CompositeDecoder;Ljava/lang/Object;)V
}

public abstract interface class kotlinx/serialization/encoding/CompositeEncoder {
Expand Down Expand Up @@ -1119,6 +1126,7 @@ public abstract class kotlinx/serialization/internal/TaggedDecoder : kotlinx/ser
protected abstract fun getTag (Lkotlinx/serialization/descriptors/SerialDescriptor;I)Ljava/lang/Object;
protected final fun popTag ()Ljava/lang/Object;
protected final fun pushTag (Ljava/lang/Object;)V
public fun visitKey (Ljava/lang/Object;)V
}

public abstract class kotlinx/serialization/internal/TaggedEncoder : kotlinx/serialization/encoding/CompositeEncoder, kotlinx/serialization/encoding/Encoder {
Expand Down
4 changes: 4 additions & 0 deletions core/api/kotlinx-serialization-core.klib.api
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,7 @@ abstract interface kotlinx.serialization.encoding/CompositeDecoder { // kotlinx.
}
open fun decodeCollectionSize(kotlinx.serialization.descriptors/SerialDescriptor): kotlin/Int // kotlinx.serialization.encoding/CompositeDecoder.decodeCollectionSize|decodeCollectionSize(kotlinx.serialization.descriptors.SerialDescriptor){}[0]
open fun decodeSequentially(): kotlin/Boolean // kotlinx.serialization.encoding/CompositeDecoder.decodeSequentially|decodeSequentially(){}[0]
open fun visitKey(kotlin/Any?) // kotlinx.serialization.encoding/CompositeDecoder.visitKey|visitKey(kotlin.Any?){}[0]
}
abstract interface kotlinx.serialization.encoding/CompositeEncoder { // kotlinx.serialization.encoding/CompositeEncoder|null[0]
abstract fun <#A1: kotlin/Any> encodeNullableSerializableElement(kotlinx.serialization.descriptors/SerialDescriptor, kotlin/Int, kotlinx.serialization/SerializationStrategy<#A1>, #A1?) // kotlinx.serialization.encoding/CompositeEncoder.encodeNullableSerializableElement|encodeNullableSerializableElement(kotlinx.serialization.descriptors.SerialDescriptor;kotlin.Int;kotlinx.serialization.SerializationStrategy<0:0>;0:0?){0§<kotlin.Any>}[0]
Expand Down Expand Up @@ -522,6 +523,9 @@ final class kotlinx.serialization.modules/SerializersModuleBuilder : kotlinx.ser
final fun build(): kotlinx.serialization.modules/SerializersModule // kotlinx.serialization.modules/SerializersModuleBuilder.build|build(){}[0]
final fun include(kotlinx.serialization.modules/SerializersModule) // kotlinx.serialization.modules/SerializersModuleBuilder.include|include(kotlinx.serialization.modules.SerializersModule){}[0]
}
final class kotlinx.serialization/DuplicateMapKeyException : kotlinx.serialization/SerializationException { // kotlinx.serialization/DuplicateMapKeyException|null[0]
constructor <init>(kotlin/Any?) // kotlinx.serialization/DuplicateMapKeyException.<init>|<init>(kotlin.Any?){}[0]
}
final class kotlinx.serialization/MissingFieldException : kotlinx.serialization/SerializationException { // kotlinx.serialization/MissingFieldException|null[0]
constructor <init>(kotlin.collections/List<kotlin/String>, kotlin/String) // kotlinx.serialization/MissingFieldException.<init>|<init>(kotlin.collections.List<kotlin.String>;kotlin.String){}[0]
constructor <init>(kotlin.collections/List<kotlin/String>, kotlin/String?, kotlin/Throwable?) // kotlinx.serialization/MissingFieldException.<init>|<init>(kotlin.collections.List<kotlin.String>;kotlin.String?;kotlin.Throwable?){}[0]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -133,3 +133,10 @@ internal constructor(message: String?) : SerializationException(message) {
// This constructor is used by the generated serializers
constructor(index: Int) : this("An unknown field for index $index")
}

/**
* Thrown when a map deserializer encounters a repeated map key (and configuration disallows this.)
*/
@ExperimentalSerializationApi
public class DuplicateMapKeyException(key: Any?) :
SerializationException("Duplicate keys not allowed in maps. Key appeared twice: $key")
timmc marked this conversation as resolved.
Show resolved Hide resolved
14 changes: 13 additions & 1 deletion core/commonMain/src/kotlinx/serialization/encoding/Decoding.kt
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ internal inline fun <T : Any> Decoder.decodeIfNullable(deserializer: Deserializa
* [CompositeDecoder] is a part of decoding process that is bound to a particular structured part of
* the serialized form, described by the serial descriptor passed to [Decoder.beginStructure].
*
* Typically, for unordered data, [CompositeDecoder] is used by a serializer withing a [decodeElementIndex]-based
* Typically, for unordered data, [CompositeDecoder] is used by a serializer within a [decodeElementIndex]-based
* loop that decodes all the required data one-by-one in any order and then terminates by calling [endStructure].
* Please refer to [decodeElementIndex] for example of such loop.
*
Expand Down Expand Up @@ -558,6 +558,18 @@ public interface CompositeDecoder {
deserializer: DeserializationStrategy<T?>,
previousValue: T? = null
): T?

/**
* Called after a key has been read.
*
* This could be a map or set key, or anything otherwise intended to be
* distinct within the collection under normal circumstances.
*
* Implementations might use this as a hook for throwing an exception when
* duplicate keys are encountered.
*/
@ExperimentalSerializationApi
public fun visitKey(key: Any?) { }
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not happy with this name but couldn't think of anything better -- suggestions very welcome. :-)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Checking duplication of keys should be more like a kind of common logics, and should be provided via a well-implemented utility class, rather than a function in top-level interface for each format to implement. IMO, most of them will be just some copy-paste.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@xiaozhikang0916 The challenge is that the behaviour needs to be configurable. There is no central configuration mechanism in the library, only formats and SerialDescriptors. There is also the need to do this in an ABI/API compatible way. Formats already need to do various things to play nice/support the various library features. This would just be another one. It also makes sense as various formats have their own duplication semantics.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I initially tried plumbing the information through on the deserialization layer and it went very badly. This is only the start of what would be required: dev...timmc:timmc/all-strict (although in practice it would be a config object, not a single boolean -- this was just a failed proof of concept.)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I mean the duplication check in other formats ( if they need ), would share some common logic, like holding a set and putting keys in it. These common logic can be extracted to a utility class.


On the other hand, I am against adding this function in Decoding interface. It is implemented by each format with different behaviours, and only for internal using, which means not public, and users writing custom serializers should never call this function.

IMO, you may probably leave some instructions for format developers about how to achieve duplication check, rather than a public default funtion in this base interface, and make it open for users.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be possible to circumvent the MapSerializer if you have a map specific decoder. This decoder could intercept the decoding of the key, and then check there (before the result is returned to the serializer).

}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ public sealed class MapLikeSerializer<Key, Value, Collection, Builder : MutableM

final override fun readElement(decoder: CompositeDecoder, index: Int, builder: Builder, checkIndex: Boolean) {
val key: Key = decoder.decodeSerializableElement(descriptor, index, keySerializer)
decoder.visitKey(key)
val vIndex = if (checkIndex) {
decoder.decodeElementIndex(descriptor).also {
require(it == index + 1) { "Value must follow key in a map, index for key: $index, returned index for value: $it" }
Expand Down
4 changes: 3 additions & 1 deletion formats/cbor/api/kotlinx-serialization-cbor.api
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ public synthetic class kotlinx/serialization/cbor/ByteString$Impl : kotlinx/seri

public abstract class kotlinx/serialization/cbor/Cbor : kotlinx/serialization/BinaryFormat {
public static final field Default Lkotlinx/serialization/cbor/Cbor$Default;
public synthetic fun <init> (ZZLkotlinx/serialization/modules/SerializersModule;Lkotlin/jvm/internal/DefaultConstructorMarker;)V
public synthetic fun <init> (ZZZLkotlinx/serialization/modules/SerializersModule;Lkotlin/jvm/internal/DefaultConstructorMarker;)V
public fun decodeFromByteArray (Lkotlinx/serialization/DeserializationStrategy;[B)Ljava/lang/Object;
public fun encodeToByteArray (Lkotlinx/serialization/SerializationStrategy;Ljava/lang/Object;)[B
public fun getSerializersModule ()Lkotlinx/serialization/modules/SerializersModule;
Expand All @@ -17,9 +17,11 @@ public final class kotlinx/serialization/cbor/Cbor$Default : kotlinx/serializati
}

public final class kotlinx/serialization/cbor/CborBuilder {
public final fun getAllowDuplicateKeys ()Z
public final fun getEncodeDefaults ()Z
public final fun getIgnoreUnknownKeys ()Z
public final fun getSerializersModule ()Lkotlinx/serialization/modules/SerializersModule;
public final fun setAllowDuplicateKeys (Z)V
public final fun setEncodeDefaults (Z)V
public final fun setIgnoreUnknownKeys (Z)V
public final fun setSerializersModule (Lkotlinx/serialization/modules/SerializersModule;)V
Expand Down
5 changes: 4 additions & 1 deletion formats/cbor/api/kotlinx-serialization-cbor.klib.api
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@

// Library unique name: <org.jetbrains.kotlinx:kotlinx-serialization-cbor>
final class kotlinx.serialization.cbor/CborBuilder { // kotlinx.serialization.cbor/CborBuilder|null[0]
final var allowDuplicateKeys // kotlinx.serialization.cbor/CborBuilder.allowDuplicateKeys|<get-allowDuplicateKeys>(){}[0]
final fun <get-allowDuplicateKeys>(): kotlin/Boolean // kotlinx.serialization.cbor/CborBuilder.allowDuplicateKeys.<get-allowDuplicateKeys>|<get-allowDuplicateKeys>(){}[0]
final fun <set-allowDuplicateKeys>(kotlin/Boolean) // kotlinx.serialization.cbor/CborBuilder.allowDuplicateKeys.<set-allowDuplicateKeys>|<set-allowDuplicateKeys>(kotlin.Boolean){}[0]
final var encodeDefaults // kotlinx.serialization.cbor/CborBuilder.encodeDefaults|<get-encodeDefaults>(){}[0]
final fun <get-encodeDefaults>(): kotlin/Boolean // kotlinx.serialization.cbor/CborBuilder.encodeDefaults.<get-encodeDefaults>|<get-encodeDefaults>(){}[0]
final fun <set-encodeDefaults>(kotlin/Boolean) // kotlinx.serialization.cbor/CborBuilder.encodeDefaults.<set-encodeDefaults>|<set-encodeDefaults>(kotlin.Boolean){}[0]
Expand All @@ -22,7 +25,7 @@ open annotation class kotlinx.serialization.cbor/ByteString : kotlin/Annotation
constructor <init>() // kotlinx.serialization.cbor/ByteString.<init>|<init>(){}[0]
}
sealed class kotlinx.serialization.cbor/Cbor : kotlinx.serialization/BinaryFormat { // kotlinx.serialization.cbor/Cbor|null[0]
constructor <init>(kotlin/Boolean, kotlin/Boolean, kotlinx.serialization.modules/SerializersModule) // kotlinx.serialization.cbor/Cbor.<init>|<init>(kotlin.Boolean;kotlin.Boolean;kotlinx.serialization.modules.SerializersModule){}[0]
constructor <init>(kotlin/Boolean, kotlin/Boolean, kotlin/Boolean, kotlinx.serialization.modules/SerializersModule) // kotlinx.serialization.cbor/Cbor.<init>|<init>(kotlin.Boolean;kotlin.Boolean;kotlin.Boolean;kotlinx.serialization.modules.SerializersModule){}[0]
final object Default : kotlinx.serialization.cbor/Cbor // kotlinx.serialization.cbor/Cbor.Default|null[0]
open fun <#A1: kotlin/Any?> decodeFromByteArray(kotlinx.serialization/DeserializationStrategy<#A1>, kotlin/ByteArray): #A1 // kotlinx.serialization.cbor/Cbor.decodeFromByteArray|decodeFromByteArray(kotlinx.serialization.DeserializationStrategy<0:0>;kotlin.ByteArray){0§<kotlin.Any?>}[0]
open fun <#A1: kotlin/Any?> encodeToByteArray(kotlinx.serialization/SerializationStrategy<#A1>, #A1): kotlin/ByteArray // kotlinx.serialization.cbor/Cbor.encodeToByteArray|encodeToByteArray(kotlinx.serialization.SerializationStrategy<0:0>;0:0){0§<kotlin.Any?>}[0]
Expand Down
20 changes: 16 additions & 4 deletions formats/cbor/commonMain/src/kotlinx/serialization/cbor/Cbor.kt
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,14 @@ import kotlinx.serialization.modules.*
public sealed class Cbor(
internal val encodeDefaults: Boolean,
internal val ignoreUnknownKeys: Boolean,
internal val allowDuplicateKeys: Boolean,
override val serializersModule: SerializersModule
) : BinaryFormat {

/**
* The default instance of [Cbor]
*/
public companion object Default : Cbor(false, false, EmptySerializersModule())
timmc marked this conversation as resolved.
Show resolved Hide resolved
public companion object Default : Cbor(false, false, true, EmptySerializersModule())

override fun <T> encodeToByteArray(serializer: SerializationStrategy<T>, value: T): ByteArray {
val output = ByteArrayOutput()
Expand All @@ -55,8 +56,11 @@ public sealed class Cbor(
}

@OptIn(ExperimentalSerializationApi::class)
private class CborImpl(encodeDefaults: Boolean, ignoreUnknownKeys: Boolean, serializersModule: SerializersModule) :
Cbor(encodeDefaults, ignoreUnknownKeys, serializersModule)
private class CborImpl(
encodeDefaults: Boolean, ignoreUnknownKeys: Boolean, allowDuplicateKeys: Boolean,
serializersModule: SerializersModule,
) :
Cbor(encodeDefaults, ignoreUnknownKeys, allowDuplicateKeys, serializersModule)

/**
* Creates an instance of [Cbor] configured from the optionally given [Cbor instance][from]
Expand All @@ -66,7 +70,7 @@ private class CborImpl(encodeDefaults: Boolean, ignoreUnknownKeys: Boolean, seri
public fun Cbor(from: Cbor = Cbor, builderAction: CborBuilder.() -> Unit): Cbor {
val builder = CborBuilder(from)
builder.builderAction()
return CborImpl(builder.encodeDefaults, builder.ignoreUnknownKeys, builder.serializersModule)
return CborImpl(builder.encodeDefaults, builder.ignoreUnknownKeys, builder.allowDuplicateKeys, builder.serializersModule)
}

/**
Expand All @@ -87,6 +91,14 @@ public class CborBuilder internal constructor(cbor: Cbor) {
*/
public var ignoreUnknownKeys: Boolean = cbor.ignoreUnknownKeys

/**
* Specifies whether it is an error to read a map with duplicate keys.
*
* If this is set to false, decoding a map with two keys that compare as equal
* will cause a [DuplicateMapKeyException] error to be thrown.
*/
public var allowDuplicateKeys: Boolean = cbor.allowDuplicateKeys

/**
* Module with contextual and polymorphic serializers to be used in the resulting [Cbor] instance.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -197,8 +197,21 @@ internal class CborEncoder(private val output: ByteArrayOutput) {
}
}

private class CborMapReader(cbor: Cbor, decoder: CborDecoder) : CborListReader(cbor, decoder) {
private class CborMapReader(val cbor: Cbor, decoder: CborDecoder) : CborListReader(cbor, decoder) {
/** Keys that have been seen so far while reading this map. */
private val seenKeys = mutableSetOf<Any?>()

override fun skipBeginToken() = setSize(decoder.startMap() * 2)

override fun visitKey(key: Any?) {
if (cbor.allowDuplicateKeys)
return

val added = seenKeys.add(key)
if (!added) {
throw DuplicateMapKeyException(key)
}
}
}

private open class CborListReader(cbor: Cbor, decoder: CborDecoder) : CborReader(cbor, decoder) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package kotlinx.serialization.cbor

import kotlinx.serialization.assertFailsWithMessage
import kotlinx.serialization.decodeFromByteArray
import kotlinx.serialization.HexConverter
import kotlinx.serialization.DuplicateMapKeyException
import kotlin.test.Test

class CborStrictModeTest {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TODO: Add a (passing) test for duplicate key detection when deserializing a data class. I have a test locally that fails -- I think I need to add visitKey call to some inlined code somewhere, but I haven't found the appropriate callsite yet.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I need to modify the serialization plugin's code generator in order to do this! Where does that code even live?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@timmc The plugin code lives in the main Kotlin repository, but you don't need to go there.

I suppose you mean to detect multiple keys in object serializiation (like this Json { "a": "something", "a": "something else" }). To support that your format will need to plug in to decodeElementIndex and mark/record seen indices. Note that for some formats collections can be validly expressed as repeated key/value pairs (protobuf does this), so depending on how CBOR works you may need to do a slightly more extensive check.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm. I had previously tried modifying decodeElementIndex and wasn't getting the results I expected. Maybe I'll take another crack at it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh! I see, the decodeElementIndex in CborReader. Got it.

private val strict = Cbor { allowDuplicateKeys = false }

/** Duplicate keys are rejected in generic maps. */
@Test
fun testDuplicateKeysInMap() {
val duplicateKeys = HexConverter.parseHexBinary("A2617805617806")
assertFailsWithMessage<DuplicateMapKeyException>("Duplicate keys not allowed in maps. Key appeared twice: x") {
strict.decodeFromByteArray<Map<String, Long>>(duplicateKeys)
}
}
}
1 change: 1 addition & 0 deletions formats/json/api/kotlinx-serialization-json.api
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@ public final class kotlinx/serialization/json/JsonDecoder$DefaultImpls {
public static fun decodeNullableSerializableValue (Lkotlinx/serialization/json/JsonDecoder;Lkotlinx/serialization/DeserializationStrategy;)Ljava/lang/Object;
public static fun decodeSequentially (Lkotlinx/serialization/json/JsonDecoder;)Z
public static fun decodeSerializableValue (Lkotlinx/serialization/json/JsonDecoder;Lkotlinx/serialization/DeserializationStrategy;)Ljava/lang/Object;
public static fun visitKey (Lkotlinx/serialization/json/JsonDecoder;Ljava/lang/Object;)V
}

public abstract class kotlinx/serialization/json/JsonElement {
Expand Down