Prefer use of java.util.zip.CRC32C for Framed format (#269)

Co-authored-by: BO8979 <BO8979@W1971362.northamerica.cerner.net>
This commit is contained in:
Brett Okken 2021-01-20 13:30:19 -06:00 committed by GitHub
parent 110727ed69
commit 822513dbe6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 1426 additions and 1379 deletions

View File

@ -48,8 +48,7 @@ public class PureJavaCrc32C
/** {@inheritDoc} */
public long getValue()
{
long ret = crc;
return (~ret) & 0xffffffffL;
return (~crc) & 0xffffffffL;
}
/** {@inheritDoc} */

View File

@ -1,125 +1,165 @@
/*
* Created: Apr 12, 2013
*/
package org.xerial.snappy;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.channels.ReadableByteChannel;
/**
* Constants and utilities for implementing x-snappy-framed.
*
* @author Brett Okken
* @since 1.1.0
*/
final class SnappyFramed
{
public static final int COMPRESSED_DATA_FLAG = 0x00;
public static final int UNCOMPRESSED_DATA_FLAG = 0x01;
public static final int STREAM_IDENTIFIER_FLAG = 0xff;
private static final int MASK_DELTA = 0xa282ead8;
/**
* The header consists of the stream identifier flag, 3 bytes indicating a
* length of 6, and "sNaPpY" in ASCII.
*/
public static final byte[] HEADER_BYTES = new byte[] {
(byte) STREAM_IDENTIFIER_FLAG, 0x06, 0x00, 0x00, 0x73, 0x4e, 0x61,
0x50, 0x70, 0x59};
public static int maskedCrc32c(byte[] data)
{
return maskedCrc32c(data, 0, data.length);
}
public static int maskedCrc32c(byte[] data, int offset, int length)
{
final PureJavaCrc32C crc32c = new PureJavaCrc32C();
crc32c.update(data, offset, length);
return mask(crc32c.getIntegerValue());
}
/**
* Checksums are not stored directly, but masked, as checksumming data and
* then its own checksum can be problematic. The masking is the same as used
* in Apache Hadoop: Rotate the checksum by 15 bits, then add the constant
* 0xa282ead8 (using wraparound as normal for unsigned integers). This is
* equivalent to the following C code:
* <p/>
* <pre>
* uint32_t mask_checksum(uint32_t x) {
* return ((x >> 15) | (x << 17)) + 0xa282ead8;
* }
* </pre>
*/
public static int mask(int crc)
{
// Rotate right by 15 bits and add a constant.
return ((crc >>> 15) | (crc << 17)) + MASK_DELTA;
}
static final int readBytes(ReadableByteChannel source, ByteBuffer dest)
throws IOException
{
// tells how many bytes to read.
final int expectedLength = dest.remaining();
int totalRead = 0;
// how many bytes were read.
int lastRead = source.read(dest);
totalRead = lastRead;
// if we did not read as many bytes as we had hoped, try reading again.
if (lastRead < expectedLength) {
// as long the buffer is not full (remaining() == 0) and we have not reached EOF (lastRead == -1) keep reading.
while (dest.remaining() != 0 && lastRead != -1) {
lastRead = source.read(dest);
// if we got EOF, do not add to total read.
if (lastRead != -1) {
totalRead += lastRead;
}
}
}
if (totalRead > 0) {
dest.limit(dest.position());
}
else {
dest.position(dest.limit());
}
return totalRead;
}
static int skip(final ReadableByteChannel source, final int skip, final ByteBuffer buffer)
throws IOException
{
if (skip <= 0) {
return 0;
}
int toSkip = skip;
int skipped = 0;
while (toSkip > 0 && skipped != -1) {
buffer.clear();
if (toSkip < buffer.capacity()) {
buffer.limit(toSkip);
}
skipped = source.read(buffer);
if (skipped > 0) {
toSkip -= skipped;
}
}
buffer.clear();
return skip - toSkip;
}
}
/*
* Created: Apr 12, 2013
*/
package org.xerial.snappy;
import java.io.IOException;
import java.lang.invoke.MethodHandle;
import java.lang.invoke.MethodHandles;
import java.lang.invoke.MethodType;
import java.nio.ByteBuffer;
import java.nio.channels.ReadableByteChannel;
import java.util.function.Supplier;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.zip.Checksum;
/**
* Constants and utilities for implementing x-snappy-framed.
*
* @author Brett Okken
* @since 1.1.0
*/
final class SnappyFramed
{
public static final int COMPRESSED_DATA_FLAG = 0x00;
public static final int UNCOMPRESSED_DATA_FLAG = 0x01;
public static final int STREAM_IDENTIFIER_FLAG = 0xff;
private static final int MASK_DELTA = 0xa282ead8;
private static final Supplier<Checksum> CHECKSUM_SUPPLIER;
static
{
Supplier<Checksum> supplier = null;
try
{
final Class crc32cClazz = Class.forName("java.util.zip.CRC32C");
final MethodHandles.Lookup lookup = MethodHandles.publicLookup();
final MethodHandle conHandle = lookup.findConstructor(crc32cClazz, MethodType.methodType(void.class))
.asType(MethodType.methodType(Checksum.class));
supplier = () -> {
try
{
return (Checksum) conHandle.invokeExact();
}
catch (Throwable e)
{
throw new IllegalStateException(e);
}
};
}
catch(Throwable t)
{
Logger.getLogger(SnappyFramed.class.getName())
.log(Level.FINE, "java.util.zip.CRC32C not loaded, using PureJavaCrc32C", t);
supplier = null;
}
CHECKSUM_SUPPLIER = supplier != null ? supplier : PureJavaCrc32C::new;
}
/**
* The header consists of the stream identifier flag, 3 bytes indicating a
* length of 6, and "sNaPpY" in ASCII.
*/
public static final byte[] HEADER_BYTES = new byte[] {
(byte) STREAM_IDENTIFIER_FLAG, 0x06, 0x00, 0x00, 0x73, 0x4e, 0x61,
0x50, 0x70, 0x59};
public static Checksum getCRC32C()
{
return CHECKSUM_SUPPLIER.get();
}
public static int maskedCrc32c(Checksum crc32c, byte[] data, int offset, int length)
{
crc32c.reset();
crc32c.update(data, offset, length);
return mask((int) crc32c.getValue());
}
/**
* Checksums are not stored directly, but masked, as checksumming data and
* then its own checksum can be problematic. The masking is the same as used
* in Apache Hadoop: Rotate the checksum by 15 bits, then add the constant
* 0xa282ead8 (using wraparound as normal for unsigned integers). This is
* equivalent to the following C code:
* <p/>
* <pre>
* uint32_t mask_checksum(uint32_t x) {
* return ((x >> 15) | (x << 17)) + 0xa282ead8;
* }
* </pre>
*/
public static int mask(int crc)
{
// Rotate right by 15 bits and add a constant.
return ((crc >>> 15) | (crc << 17)) + MASK_DELTA;
}
static final int readBytes(ReadableByteChannel source, ByteBuffer dest)
throws IOException
{
// tells how many bytes to read.
final int expectedLength = dest.remaining();
int totalRead = 0;
// how many bytes were read.
int lastRead = source.read(dest);
totalRead = lastRead;
// if we did not read as many bytes as we had hoped, try reading again.
if (lastRead < expectedLength) {
// as long the buffer is not full (remaining() == 0) and we have not reached EOF (lastRead == -1) keep reading.
while (dest.remaining() != 0 && lastRead != -1) {
lastRead = source.read(dest);
// if we got EOF, do not add to total read.
if (lastRead != -1) {
totalRead += lastRead;
}
}
}
if (totalRead > 0) {
dest.limit(dest.position());
}
else {
dest.position(dest.limit());
}
return totalRead;
}
static int skip(final ReadableByteChannel source, final int skip, final ByteBuffer buffer)
throws IOException
{
if (skip <= 0) {
return 0;
}
int toSkip = skip;
int skipped = 0;
while (toSkip > 0 && skipped != -1) {
buffer.clear();
if (toSkip < buffer.capacity()) {
buffer.limit(toSkip);
}
skipped = source.read(buffer);
if (skipped > 0) {
toSkip -= skipped;
}
}
buffer.clear();
return skip - toSkip;
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -9,7 +9,6 @@ import static org.junit.Assert.fail;
import static org.xerial.snappy.SnappyFramed.COMPRESSED_DATA_FLAG;
import static org.xerial.snappy.SnappyFramed.HEADER_BYTES;
import static org.xerial.snappy.SnappyFramed.UNCOMPRESSED_DATA_FLAG;
import static org.xerial.snappy.SnappyFramed.maskedCrc32c;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
@ -418,4 +417,9 @@ public class SnappyFramedStreamTest
assertEquals(random.length, length);
return random;
}
public static int maskedCrc32c(byte[] data)
{
return SnappyFramed.maskedCrc32c(new PureJavaCrc32C(), data, 0, data.length);
}
}