From b0e44bdf6195714465c651e57b11747d68ad405c Mon Sep 17 00:00:00 2001 From: Christophe Le Saec <51320496+clesaec@users.noreply.github.com> Date: Fri, 2 Feb 2024 12:19:13 +0100 Subject: [PATCH] AVRO-3918: Add UUID with `fixed[16]` (#2652) * AVRO-3918: add uuid with bytes and fixed * AVRO-3918: add licence * AVRO-3918: change spec * AVRO-3918: force big endian mode for long value * AVRO-3918: remove inefficient uuid bytes storage * AVRO-3918: enforce network byte order As stated in RFC 4122 section 4.1.2, UUIDs are in network byte order. Also added a test for string based UUID conversion. * Use buffer instead --------- Co-authored-by: Oscar Westra van Holthe - Kind Co-authored-by: Fokko Driesprong --- .../java/org/apache/avro/Conversions.java | 16 +++++ .../java/org/apache/avro/LogicalTypes.java | 10 ++- .../java/org/apache/avro/TestLogicalType.java | 2 +- .../org/apache/avro/TestUuidConversions.java | 72 +++++++++++++++++++ 4 files changed, 97 insertions(+), 3 deletions(-) create mode 100644 lang/java/avro/src/test/java/org/apache/avro/TestUuidConversions.java diff --git a/lang/java/avro/src/main/java/org/apache/avro/Conversions.java b/lang/java/avro/src/main/java/org/apache/avro/Conversions.java index 043ddfa0725..99ad500647e 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/Conversions.java +++ b/lang/java/avro/src/main/java/org/apache/avro/Conversions.java @@ -68,6 +68,22 @@ public UUID fromCharSequence(CharSequence value, Schema schema, LogicalType type public CharSequence toCharSequence(UUID value, Schema schema, LogicalType type) { return value.toString(); } + + @Override + public UUID fromFixed(final GenericFixed value, final Schema schema, final LogicalType type) { + ByteBuffer buffer = ByteBuffer.wrap(value.bytes()); + long mostSigBits = buffer.getLong(); + long leastSigBits = buffer.getLong(); + return new UUID(mostSigBits, leastSigBits); + } + + @Override + public GenericFixed toFixed(final UUID value, final Schema schema, final LogicalType type) { + ByteBuffer buffer = ByteBuffer.allocate(2 * Long.BYTES); + buffer.putLong(value.getMostSignificantBits()); + buffer.putLong(value.getLeastSignificantBits()); + return new GenericData.Fixed(schema, buffer.array()); + } } public static class DecimalConversion extends Conversion { diff --git a/lang/java/avro/src/main/java/org/apache/avro/LogicalTypes.java b/lang/java/avro/src/main/java/org/apache/avro/LogicalTypes.java index 643760af98c..6a894f05104 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/LogicalTypes.java +++ b/lang/java/avro/src/main/java/org/apache/avro/LogicalTypes.java @@ -289,6 +289,9 @@ public static LocalTimestampNanos localTimestampNanos() { /** Uuid represents a uuid without a time */ public static class Uuid extends LogicalType { + + private static final int UUID_BYTES = 2 * Long.BYTES; + private Uuid() { super(UUID); } @@ -296,8 +299,11 @@ private Uuid() { @Override public void validate(Schema schema) { super.validate(schema); - if (schema.getType() != Schema.Type.STRING) { - throw new IllegalArgumentException("Uuid can only be used with an underlying string type"); + if (schema.getType() != Schema.Type.STRING && schema.getType() != Schema.Type.FIXED) { + throw new IllegalArgumentException("Uuid can only be used with an underlying string or fixed type"); + } + if (schema.getType() == Schema.Type.FIXED && schema.getFixedSize() != UUID_BYTES) { + throw new IllegalArgumentException("Uuid with fixed type must have a size of " + UUID_BYTES + " bytes"); } } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestLogicalType.java b/lang/java/avro/src/test/java/org/apache/avro/TestLogicalType.java index 4476ac7db2c..733997db28f 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestLogicalType.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestLogicalType.java @@ -208,7 +208,7 @@ void uuidExtendsString() { assertEquals(LogicalTypes.uuid(), uuidSchema.getLogicalType()); assertThrows("UUID requires a string", IllegalArgumentException.class, - "Uuid can only be used with an underlying string type", + "Uuid can only be used with an underlying string or fixed type", () -> LogicalTypes.uuid().addToSchema(Schema.create(Schema.Type.INT))); } diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestUuidConversions.java b/lang/java/avro/src/test/java/org/apache/avro/TestUuidConversions.java new file mode 100644 index 00000000000..640bf1a2bb5 --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/TestUuidConversions.java @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro; + +import org.apache.avro.generic.GenericFixed; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +import java.math.BigInteger; +import java.util.UUID; +import java.util.stream.Stream; + +public class TestUuidConversions { + + private Conversions.UUIDConversion uuidConversion = new Conversions.UUIDConversion(); + + private Schema fixed = Schema.createFixed("fixed", "doc", "", Long.BYTES * 2); + private Schema fixedUuid = LogicalTypes.uuid().addToSchema(fixed); + + private Schema string = Schema.createFixed("fixed", "doc", "", Long.BYTES * 2); + private Schema stringUuid = LogicalTypes.uuid().addToSchema(string); + + @ParameterizedTest + @MethodSource("uuidData") + void uuidFixed(UUID uuid) { + GenericFixed value = uuidConversion.toFixed(uuid, fixedUuid, LogicalTypes.uuid()); + + byte[] b = new byte[Long.BYTES]; + System.arraycopy(value.bytes(), 0, b, 0, b.length); + Assertions.assertEquals(uuid.getMostSignificantBits(), new BigInteger(b).longValue()); + System.arraycopy(value.bytes(), Long.BYTES, b, 0, b.length); + Assertions.assertEquals(uuid.getLeastSignificantBits(), new BigInteger(b).longValue()); + + UUID uuid1 = uuidConversion.fromFixed(value, fixedUuid, LogicalTypes.uuid()); + Assertions.assertEquals(uuid, uuid1); + } + + @ParameterizedTest + @MethodSource("uuidData") + void uuidCharSequence(UUID uuid) { + CharSequence value = uuidConversion.toCharSequence(uuid, stringUuid, LogicalTypes.uuid()); + + Assertions.assertEquals(uuid.toString(), value.toString()); + + UUID uuid1 = uuidConversion.fromCharSequence(value, stringUuid, LogicalTypes.uuid()); + Assertions.assertEquals(uuid, uuid1); + } + + public static Stream uuidData() { + return Stream.of(Arguments.of(new UUID(Long.MIN_VALUE, Long.MAX_VALUE)), Arguments.of(new UUID(-1, 0)), + Arguments.of(UUID.randomUUID()), Arguments.of(UUID.randomUUID())); + } + +}