Prefer use of java.util.zip.CRC32C for Framed format (#269)
Co-authored-by: BO8979 <BO8979@W1971362.northamerica.cerner.net>
This commit is contained in:
parent
110727ed69
commit
822513dbe6
|
@ -48,8 +48,7 @@ public class PureJavaCrc32C
|
||||||
/** {@inheritDoc} */
|
/** {@inheritDoc} */
|
||||||
public long getValue()
|
public long getValue()
|
||||||
{
|
{
|
||||||
long ret = crc;
|
return (~crc) & 0xffffffffL;
|
||||||
return (~ret) & 0xffffffffL;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** {@inheritDoc} */
|
/** {@inheritDoc} */
|
||||||
|
|
|
@ -1,125 +1,165 @@
|
||||||
/*
|
/*
|
||||||
* Created: Apr 12, 2013
|
* Created: Apr 12, 2013
|
||||||
*/
|
*/
|
||||||
package org.xerial.snappy;
|
package org.xerial.snappy;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.ByteBuffer;
|
import java.lang.invoke.MethodHandle;
|
||||||
import java.nio.channels.ReadableByteChannel;
|
import java.lang.invoke.MethodHandles;
|
||||||
|
import java.lang.invoke.MethodType;
|
||||||
/**
|
import java.nio.ByteBuffer;
|
||||||
* Constants and utilities for implementing x-snappy-framed.
|
import java.nio.channels.ReadableByteChannel;
|
||||||
*
|
import java.util.function.Supplier;
|
||||||
* @author Brett Okken
|
import java.util.logging.Level;
|
||||||
* @since 1.1.0
|
import java.util.logging.Logger;
|
||||||
*/
|
import java.util.zip.Checksum;
|
||||||
final class SnappyFramed
|
|
||||||
{
|
/**
|
||||||
public static final int COMPRESSED_DATA_FLAG = 0x00;
|
* Constants and utilities for implementing x-snappy-framed.
|
||||||
|
*
|
||||||
public static final int UNCOMPRESSED_DATA_FLAG = 0x01;
|
* @author Brett Okken
|
||||||
|
* @since 1.1.0
|
||||||
public static final int STREAM_IDENTIFIER_FLAG = 0xff;
|
*/
|
||||||
|
final class SnappyFramed
|
||||||
private static final int MASK_DELTA = 0xa282ead8;
|
{
|
||||||
|
public static final int COMPRESSED_DATA_FLAG = 0x00;
|
||||||
/**
|
|
||||||
* The header consists of the stream identifier flag, 3 bytes indicating a
|
public static final int UNCOMPRESSED_DATA_FLAG = 0x01;
|
||||||
* length of 6, and "sNaPpY" in ASCII.
|
|
||||||
*/
|
public static final int STREAM_IDENTIFIER_FLAG = 0xff;
|
||||||
public static final byte[] HEADER_BYTES = new byte[] {
|
|
||||||
(byte) STREAM_IDENTIFIER_FLAG, 0x06, 0x00, 0x00, 0x73, 0x4e, 0x61,
|
private static final int MASK_DELTA = 0xa282ead8;
|
||||||
0x50, 0x70, 0x59};
|
|
||||||
|
private static final Supplier<Checksum> CHECKSUM_SUPPLIER;
|
||||||
public static int maskedCrc32c(byte[] data)
|
|
||||||
{
|
static
|
||||||
return maskedCrc32c(data, 0, data.length);
|
{
|
||||||
}
|
Supplier<Checksum> supplier = null;
|
||||||
|
try
|
||||||
public static int maskedCrc32c(byte[] data, int offset, int length)
|
{
|
||||||
{
|
final Class crc32cClazz = Class.forName("java.util.zip.CRC32C");
|
||||||
final PureJavaCrc32C crc32c = new PureJavaCrc32C();
|
final MethodHandles.Lookup lookup = MethodHandles.publicLookup();
|
||||||
crc32c.update(data, offset, length);
|
|
||||||
return mask(crc32c.getIntegerValue());
|
final MethodHandle conHandle = lookup.findConstructor(crc32cClazz, MethodType.methodType(void.class))
|
||||||
}
|
.asType(MethodType.methodType(Checksum.class));
|
||||||
|
supplier = () -> {
|
||||||
/**
|
try
|
||||||
* Checksums are not stored directly, but masked, as checksumming data and
|
{
|
||||||
* then its own checksum can be problematic. The masking is the same as used
|
return (Checksum) conHandle.invokeExact();
|
||||||
* in Apache Hadoop: Rotate the checksum by 15 bits, then add the constant
|
}
|
||||||
* 0xa282ead8 (using wraparound as normal for unsigned integers). This is
|
catch (Throwable e)
|
||||||
* equivalent to the following C code:
|
{
|
||||||
* <p/>
|
throw new IllegalStateException(e);
|
||||||
* <pre>
|
}
|
||||||
* uint32_t mask_checksum(uint32_t x) {
|
};
|
||||||
* return ((x >> 15) | (x << 17)) + 0xa282ead8;
|
}
|
||||||
* }
|
catch(Throwable t)
|
||||||
* </pre>
|
{
|
||||||
*/
|
Logger.getLogger(SnappyFramed.class.getName())
|
||||||
public static int mask(int crc)
|
.log(Level.FINE, "java.util.zip.CRC32C not loaded, using PureJavaCrc32C", t);
|
||||||
{
|
supplier = null;
|
||||||
// Rotate right by 15 bits and add a constant.
|
}
|
||||||
return ((crc >>> 15) | (crc << 17)) + MASK_DELTA;
|
|
||||||
}
|
CHECKSUM_SUPPLIER = supplier != null ? supplier : PureJavaCrc32C::new;
|
||||||
|
}
|
||||||
static final int readBytes(ReadableByteChannel source, ByteBuffer dest)
|
|
||||||
throws IOException
|
/**
|
||||||
{
|
* The header consists of the stream identifier flag, 3 bytes indicating a
|
||||||
// tells how many bytes to read.
|
* length of 6, and "sNaPpY" in ASCII.
|
||||||
final int expectedLength = dest.remaining();
|
*/
|
||||||
|
public static final byte[] HEADER_BYTES = new byte[] {
|
||||||
int totalRead = 0;
|
(byte) STREAM_IDENTIFIER_FLAG, 0x06, 0x00, 0x00, 0x73, 0x4e, 0x61,
|
||||||
|
0x50, 0x70, 0x59};
|
||||||
// how many bytes were read.
|
|
||||||
int lastRead = source.read(dest);
|
public static Checksum getCRC32C()
|
||||||
|
{
|
||||||
totalRead = lastRead;
|
return CHECKSUM_SUPPLIER.get();
|
||||||
|
}
|
||||||
// if we did not read as many bytes as we had hoped, try reading again.
|
|
||||||
if (lastRead < expectedLength) {
|
public static int maskedCrc32c(Checksum crc32c, byte[] data, int offset, int length)
|
||||||
// as long the buffer is not full (remaining() == 0) and we have not reached EOF (lastRead == -1) keep reading.
|
{
|
||||||
while (dest.remaining() != 0 && lastRead != -1) {
|
crc32c.reset();
|
||||||
lastRead = source.read(dest);
|
crc32c.update(data, offset, length);
|
||||||
|
return mask((int) crc32c.getValue());
|
||||||
// if we got EOF, do not add to total read.
|
}
|
||||||
if (lastRead != -1) {
|
|
||||||
totalRead += lastRead;
|
/**
|
||||||
}
|
* Checksums are not stored directly, but masked, as checksumming data and
|
||||||
}
|
* then its own checksum can be problematic. The masking is the same as used
|
||||||
}
|
* in Apache Hadoop: Rotate the checksum by 15 bits, then add the constant
|
||||||
|
* 0xa282ead8 (using wraparound as normal for unsigned integers). This is
|
||||||
if (totalRead > 0) {
|
* equivalent to the following C code:
|
||||||
dest.limit(dest.position());
|
* <p/>
|
||||||
}
|
* <pre>
|
||||||
else {
|
* uint32_t mask_checksum(uint32_t x) {
|
||||||
dest.position(dest.limit());
|
* return ((x >> 15) | (x << 17)) + 0xa282ead8;
|
||||||
}
|
* }
|
||||||
|
* </pre>
|
||||||
return totalRead;
|
*/
|
||||||
}
|
public static int mask(int crc)
|
||||||
|
{
|
||||||
static int skip(final ReadableByteChannel source, final int skip, final ByteBuffer buffer)
|
// Rotate right by 15 bits and add a constant.
|
||||||
throws IOException
|
return ((crc >>> 15) | (crc << 17)) + MASK_DELTA;
|
||||||
{
|
}
|
||||||
if (skip <= 0) {
|
|
||||||
return 0;
|
static final int readBytes(ReadableByteChannel source, ByteBuffer dest)
|
||||||
}
|
throws IOException
|
||||||
|
{
|
||||||
int toSkip = skip;
|
// tells how many bytes to read.
|
||||||
int skipped = 0;
|
final int expectedLength = dest.remaining();
|
||||||
while (toSkip > 0 && skipped != -1) {
|
|
||||||
buffer.clear();
|
int totalRead = 0;
|
||||||
if (toSkip < buffer.capacity()) {
|
|
||||||
buffer.limit(toSkip);
|
// how many bytes were read.
|
||||||
}
|
int lastRead = source.read(dest);
|
||||||
|
|
||||||
skipped = source.read(buffer);
|
totalRead = lastRead;
|
||||||
if (skipped > 0) {
|
|
||||||
toSkip -= skipped;
|
// if we did not read as many bytes as we had hoped, try reading again.
|
||||||
}
|
if (lastRead < expectedLength) {
|
||||||
}
|
// as long the buffer is not full (remaining() == 0) and we have not reached EOF (lastRead == -1) keep reading.
|
||||||
|
while (dest.remaining() != 0 && lastRead != -1) {
|
||||||
buffer.clear();
|
lastRead = source.read(dest);
|
||||||
return skip - toSkip;
|
|
||||||
}
|
// if we got EOF, do not add to total read.
|
||||||
}
|
if (lastRead != -1) {
|
||||||
|
totalRead += lastRead;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (totalRead > 0) {
|
||||||
|
dest.limit(dest.position());
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
dest.position(dest.limit());
|
||||||
|
}
|
||||||
|
|
||||||
|
return totalRead;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int skip(final ReadableByteChannel source, final int skip, final ByteBuffer buffer)
|
||||||
|
throws IOException
|
||||||
|
{
|
||||||
|
if (skip <= 0) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int toSkip = skip;
|
||||||
|
int skipped = 0;
|
||||||
|
while (toSkip > 0 && skipped != -1) {
|
||||||
|
buffer.clear();
|
||||||
|
if (toSkip < buffer.capacity()) {
|
||||||
|
buffer.limit(toSkip);
|
||||||
|
}
|
||||||
|
|
||||||
|
skipped = source.read(buffer);
|
||||||
|
if (skipped > 0) {
|
||||||
|
toSkip -= skipped;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
buffer.clear();
|
||||||
|
return skip - toSkip;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -9,7 +9,6 @@ import static org.junit.Assert.fail;
|
||||||
import static org.xerial.snappy.SnappyFramed.COMPRESSED_DATA_FLAG;
|
import static org.xerial.snappy.SnappyFramed.COMPRESSED_DATA_FLAG;
|
||||||
import static org.xerial.snappy.SnappyFramed.HEADER_BYTES;
|
import static org.xerial.snappy.SnappyFramed.HEADER_BYTES;
|
||||||
import static org.xerial.snappy.SnappyFramed.UNCOMPRESSED_DATA_FLAG;
|
import static org.xerial.snappy.SnappyFramed.UNCOMPRESSED_DATA_FLAG;
|
||||||
import static org.xerial.snappy.SnappyFramed.maskedCrc32c;
|
|
||||||
|
|
||||||
import java.io.ByteArrayInputStream;
|
import java.io.ByteArrayInputStream;
|
||||||
import java.io.ByteArrayOutputStream;
|
import java.io.ByteArrayOutputStream;
|
||||||
|
@ -418,4 +417,9 @@ public class SnappyFramedStreamTest
|
||||||
assertEquals(random.length, length);
|
assertEquals(random.length, length);
|
||||||
return random;
|
return random;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static int maskedCrc32c(byte[] data)
|
||||||
|
{
|
||||||
|
return SnappyFramed.maskedCrc32c(new PureJavaCrc32C(), data, 0, data.length);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue