Prefer use of java.util.zip.CRC32C for Framed format (#269)

Co-authored-by: BO8979 <BO8979@W1971362.northamerica.cerner.net>
This commit is contained in:
Brett Okken 2021-01-20 13:30:19 -06:00 committed by GitHub
parent 110727ed69
commit 822513dbe6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 1426 additions and 1379 deletions

View File

@ -48,8 +48,7 @@ public class PureJavaCrc32C
/** {@inheritDoc} */ /** {@inheritDoc} */
public long getValue() public long getValue()
{ {
long ret = crc; return (~crc) & 0xffffffffL;
return (~ret) & 0xffffffffL;
} }
/** {@inheritDoc} */ /** {@inheritDoc} */

View File

@ -1,125 +1,165 @@
/* /*
* Created: Apr 12, 2013 * Created: Apr 12, 2013
*/ */
package org.xerial.snappy; package org.xerial.snappy;
import java.io.IOException; import java.io.IOException;
import java.nio.ByteBuffer; import java.lang.invoke.MethodHandle;
import java.nio.channels.ReadableByteChannel; import java.lang.invoke.MethodHandles;
import java.lang.invoke.MethodType;
/** import java.nio.ByteBuffer;
* Constants and utilities for implementing x-snappy-framed. import java.nio.channels.ReadableByteChannel;
* import java.util.function.Supplier;
* @author Brett Okken import java.util.logging.Level;
* @since 1.1.0 import java.util.logging.Logger;
*/ import java.util.zip.Checksum;
final class SnappyFramed
{ /**
public static final int COMPRESSED_DATA_FLAG = 0x00; * Constants and utilities for implementing x-snappy-framed.
*
public static final int UNCOMPRESSED_DATA_FLAG = 0x01; * @author Brett Okken
* @since 1.1.0
public static final int STREAM_IDENTIFIER_FLAG = 0xff; */
final class SnappyFramed
private static final int MASK_DELTA = 0xa282ead8; {
public static final int COMPRESSED_DATA_FLAG = 0x00;
/**
* The header consists of the stream identifier flag, 3 bytes indicating a public static final int UNCOMPRESSED_DATA_FLAG = 0x01;
* length of 6, and "sNaPpY" in ASCII.
*/ public static final int STREAM_IDENTIFIER_FLAG = 0xff;
public static final byte[] HEADER_BYTES = new byte[] {
(byte) STREAM_IDENTIFIER_FLAG, 0x06, 0x00, 0x00, 0x73, 0x4e, 0x61, private static final int MASK_DELTA = 0xa282ead8;
0x50, 0x70, 0x59};
private static final Supplier<Checksum> CHECKSUM_SUPPLIER;
public static int maskedCrc32c(byte[] data)
{ static
return maskedCrc32c(data, 0, data.length); {
} Supplier<Checksum> supplier = null;
try
public static int maskedCrc32c(byte[] data, int offset, int length) {
{ final Class crc32cClazz = Class.forName("java.util.zip.CRC32C");
final PureJavaCrc32C crc32c = new PureJavaCrc32C(); final MethodHandles.Lookup lookup = MethodHandles.publicLookup();
crc32c.update(data, offset, length);
return mask(crc32c.getIntegerValue()); final MethodHandle conHandle = lookup.findConstructor(crc32cClazz, MethodType.methodType(void.class))
} .asType(MethodType.methodType(Checksum.class));
supplier = () -> {
/** try
* Checksums are not stored directly, but masked, as checksumming data and {
* then its own checksum can be problematic. The masking is the same as used return (Checksum) conHandle.invokeExact();
* in Apache Hadoop: Rotate the checksum by 15 bits, then add the constant }
* 0xa282ead8 (using wraparound as normal for unsigned integers). This is catch (Throwable e)
* equivalent to the following C code: {
* <p/> throw new IllegalStateException(e);
* <pre> }
* uint32_t mask_checksum(uint32_t x) { };
* return ((x >> 15) | (x << 17)) + 0xa282ead8; }
* } catch(Throwable t)
* </pre> {
*/ Logger.getLogger(SnappyFramed.class.getName())
public static int mask(int crc) .log(Level.FINE, "java.util.zip.CRC32C not loaded, using PureJavaCrc32C", t);
{ supplier = null;
// Rotate right by 15 bits and add a constant. }
return ((crc >>> 15) | (crc << 17)) + MASK_DELTA;
} CHECKSUM_SUPPLIER = supplier != null ? supplier : PureJavaCrc32C::new;
}
static final int readBytes(ReadableByteChannel source, ByteBuffer dest)
throws IOException /**
{ * The header consists of the stream identifier flag, 3 bytes indicating a
// tells how many bytes to read. * length of 6, and "sNaPpY" in ASCII.
final int expectedLength = dest.remaining(); */
public static final byte[] HEADER_BYTES = new byte[] {
int totalRead = 0; (byte) STREAM_IDENTIFIER_FLAG, 0x06, 0x00, 0x00, 0x73, 0x4e, 0x61,
0x50, 0x70, 0x59};
// how many bytes were read.
int lastRead = source.read(dest); public static Checksum getCRC32C()
{
totalRead = lastRead; return CHECKSUM_SUPPLIER.get();
}
// if we did not read as many bytes as we had hoped, try reading again.
if (lastRead < expectedLength) { public static int maskedCrc32c(Checksum crc32c, byte[] data, int offset, int length)
// as long the buffer is not full (remaining() == 0) and we have not reached EOF (lastRead == -1) keep reading. {
while (dest.remaining() != 0 && lastRead != -1) { crc32c.reset();
lastRead = source.read(dest); crc32c.update(data, offset, length);
return mask((int) crc32c.getValue());
// if we got EOF, do not add to total read. }
if (lastRead != -1) {
totalRead += lastRead; /**
} * Checksums are not stored directly, but masked, as checksumming data and
} * then its own checksum can be problematic. The masking is the same as used
} * in Apache Hadoop: Rotate the checksum by 15 bits, then add the constant
* 0xa282ead8 (using wraparound as normal for unsigned integers). This is
if (totalRead > 0) { * equivalent to the following C code:
dest.limit(dest.position()); * <p/>
} * <pre>
else { * uint32_t mask_checksum(uint32_t x) {
dest.position(dest.limit()); * return ((x >> 15) | (x << 17)) + 0xa282ead8;
} * }
* </pre>
return totalRead; */
} public static int mask(int crc)
{
static int skip(final ReadableByteChannel source, final int skip, final ByteBuffer buffer) // Rotate right by 15 bits and add a constant.
throws IOException return ((crc >>> 15) | (crc << 17)) + MASK_DELTA;
{ }
if (skip <= 0) {
return 0; static final int readBytes(ReadableByteChannel source, ByteBuffer dest)
} throws IOException
{
int toSkip = skip; // tells how many bytes to read.
int skipped = 0; final int expectedLength = dest.remaining();
while (toSkip > 0 && skipped != -1) {
buffer.clear(); int totalRead = 0;
if (toSkip < buffer.capacity()) {
buffer.limit(toSkip); // how many bytes were read.
} int lastRead = source.read(dest);
skipped = source.read(buffer); totalRead = lastRead;
if (skipped > 0) {
toSkip -= skipped; // if we did not read as many bytes as we had hoped, try reading again.
} if (lastRead < expectedLength) {
} // as long the buffer is not full (remaining() == 0) and we have not reached EOF (lastRead == -1) keep reading.
while (dest.remaining() != 0 && lastRead != -1) {
buffer.clear(); lastRead = source.read(dest);
return skip - toSkip;
} // if we got EOF, do not add to total read.
} if (lastRead != -1) {
totalRead += lastRead;
}
}
}
if (totalRead > 0) {
dest.limit(dest.position());
}
else {
dest.position(dest.limit());
}
return totalRead;
}
static int skip(final ReadableByteChannel source, final int skip, final ByteBuffer buffer)
throws IOException
{
if (skip <= 0) {
return 0;
}
int toSkip = skip;
int skipped = 0;
while (toSkip > 0 && skipped != -1) {
buffer.clear();
if (toSkip < buffer.capacity()) {
buffer.limit(toSkip);
}
skipped = source.read(buffer);
if (skipped > 0) {
toSkip -= skipped;
}
}
buffer.clear();
return skip - toSkip;
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -9,7 +9,6 @@ import static org.junit.Assert.fail;
import static org.xerial.snappy.SnappyFramed.COMPRESSED_DATA_FLAG; import static org.xerial.snappy.SnappyFramed.COMPRESSED_DATA_FLAG;
import static org.xerial.snappy.SnappyFramed.HEADER_BYTES; import static org.xerial.snappy.SnappyFramed.HEADER_BYTES;
import static org.xerial.snappy.SnappyFramed.UNCOMPRESSED_DATA_FLAG; import static org.xerial.snappy.SnappyFramed.UNCOMPRESSED_DATA_FLAG;
import static org.xerial.snappy.SnappyFramed.maskedCrc32c;
import java.io.ByteArrayInputStream; import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream; import java.io.ByteArrayOutputStream;
@ -418,4 +417,9 @@ public class SnappyFramedStreamTest
assertEquals(random.length, length); assertEquals(random.length, length);
return random; return random;
} }
public static int maskedCrc32c(byte[] data)
{
return SnappyFramed.maskedCrc32c(new PureJavaCrc32C(), data, 0, data.length);
}
} }