Add pure-java Snappy implementation (#244)
* Add PureJavaSnappy * Add more tests for pure-java snappy * Add note on enabling pure-java mode * Add org.xerial.snappy.pure to osgi bundle
This commit is contained in:
parent
edc4ec28bd
commit
594ca5fddb
|
@ -169,6 +169,12 @@ When building on Solaris, use `gmake`:
|
|||
A file `target/snappy-java-$(version).jar` is the product additionally containing the native library built for your platform.
|
||||
|
||||
## Miscellaneous Notes
|
||||
|
||||
### Using pure-java Snappy implementation
|
||||
|
||||
snappy-java can optionally use a pure-java implementation of Snappy based on [aircompressor](https://github.com/airlift/aircompressor/tree/master/src/main/java/io/airlift/compress/snappy). This implementation is selected when no native Snappy library for your platform is found. You can also force using this pure-java implementation by setting a JVM property `org.xerial.snappy.purejava=true` before loading any class of Snappy (e.g., using `-Dorg.xerial.snappy.purejava=true` option when launching JVM).
|
||||
|
||||
|
||||
### Using snappy-java with Tomcat 6 (or higher) Web Server
|
||||
|
||||
Simply put the snappy-java's jar to WEB-INF/lib folder of your web application. Usual JNI-library specific problem no longer exists since snappy-java version 1.0.3 or higher can be loaded by multiple class loaders.
|
||||
|
|
|
@ -111,7 +111,7 @@ enablePlugins(SbtOsgi)
|
|||
|
||||
osgiSettings
|
||||
|
||||
OsgiKeys.exportPackage := Seq("org.xerial.snappy", "org.xerial.snappy.buffer", "org.xerial.snappy.pool")
|
||||
OsgiKeys.exportPackage := Seq("org.xerial.snappy", "org.xerial.snappy.buffer", "org.xerial.snappy.pool", "org.xerial.snappy.pure")
|
||||
OsgiKeys.bundleSymbolicName := "org.xerial.snappy.snappy-java"
|
||||
OsgiKeys.bundleActivator := Option("org.xerial.snappy.SnappyBundleActivator")
|
||||
OsgiKeys.importPackage := Seq("""org.osgi.framework;version="[1.5,2)"""")
|
||||
|
|
|
@ -24,6 +24,8 @@
|
|||
//--------------------------------------
|
||||
package org.xerial.snappy;
|
||||
|
||||
import org.xerial.snappy.pure.PureJavaSnappy;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
|
@ -43,18 +45,13 @@ import java.util.Properties;
|
|||
public class Snappy
|
||||
{
|
||||
static {
|
||||
try {
|
||||
impl = SnappyLoader.loadSnappyApi();
|
||||
}
|
||||
catch (Exception e) {
|
||||
throw new ExceptionInInitializerError(e);
|
||||
}
|
||||
init();
|
||||
}
|
||||
|
||||
/**
|
||||
* An instance of SnappyNative
|
||||
*/
|
||||
private static SnappyNative impl;
|
||||
private static SnappyApi impl;
|
||||
|
||||
/**
|
||||
* Clean up a temporary file (native lib) generated by snappy-java.
|
||||
|
@ -69,6 +66,15 @@ public class Snappy
|
|||
SnappyLoader.setSnappyApi(null);
|
||||
}
|
||||
|
||||
static void init() {
|
||||
try {
|
||||
impl = SnappyLoader.loadSnappyApi();
|
||||
}
|
||||
catch (Exception e) {
|
||||
throw new ExceptionInInitializerError(e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Copy bytes from source to destination
|
||||
*
|
||||
|
|
|
@ -0,0 +1,60 @@
|
|||
package org.xerial.snappy;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
|
||||
/**
|
||||
* Snappy compressor/decompressor interface. The implementation can be JNI binding or pure-java Snappy implementation.
|
||||
*/
|
||||
public interface SnappyApi
|
||||
{
|
||||
// ------------------------------------------------------------------------
|
||||
// Generic compression/decompression routines.
|
||||
// ------------------------------------------------------------------------
|
||||
long rawCompress(long inputAddr, long inputSize, long destAddr)
|
||||
throws IOException;
|
||||
|
||||
long rawUncompress(long inputAddr, long inputSize, long destAddr)
|
||||
throws IOException;
|
||||
|
||||
int rawCompress(ByteBuffer input, int inputOffset, int inputLength, ByteBuffer compressed,
|
||||
int outputOffset)
|
||||
throws IOException;
|
||||
|
||||
int rawCompress(Object input, int inputOffset, int inputByteLength, Object output, int outputOffset)
|
||||
throws IOException;
|
||||
|
||||
int rawUncompress(ByteBuffer compressed, int inputOffset, int inputLength, ByteBuffer uncompressed,
|
||||
int outputOffset)
|
||||
throws IOException;
|
||||
|
||||
int rawUncompress(Object input, int inputOffset, int inputLength, Object output, int outputOffset)
|
||||
throws IOException;
|
||||
|
||||
// Returns the maximal size of the compressed representation of
|
||||
// input data that is "source_bytes" bytes in length;
|
||||
int maxCompressedLength(int source_bytes);
|
||||
|
||||
// This operation takes O(1) time.
|
||||
int uncompressedLength(ByteBuffer compressed, int offset, int len)
|
||||
throws IOException;
|
||||
|
||||
int uncompressedLength(Object input, int offset, int len)
|
||||
throws IOException;
|
||||
|
||||
long uncompressedLength(long inputAddr, long len)
|
||||
throws IOException;
|
||||
|
||||
boolean isValidCompressedBuffer(ByteBuffer compressed, int offset, int len)
|
||||
throws IOException;
|
||||
|
||||
boolean isValidCompressedBuffer(Object input, int offset, int len)
|
||||
throws IOException;
|
||||
|
||||
boolean isValidCompressedBuffer(long inputAddr, long offset, long len)
|
||||
throws IOException;
|
||||
|
||||
void arrayCopy(Object src, int offset, int byteLength, Object dest, int dOffset)
|
||||
throws IOException;
|
||||
|
||||
}
|
|
@ -41,7 +41,8 @@ public enum SnappyErrorCode
|
|||
FAILED_TO_UNCOMPRESS(5),
|
||||
EMPTY_INPUT(6),
|
||||
INCOMPATIBLE_VERSION(7),
|
||||
INVALID_CHUNK_SIZE(8);
|
||||
INVALID_CHUNK_SIZE(8),
|
||||
UNSUPPORTED_PLATFORM(9);
|
||||
|
||||
public final int id;
|
||||
|
||||
|
|
|
@ -24,6 +24,8 @@
|
|||
//--------------------------------------
|
||||
package org.xerial.snappy;
|
||||
|
||||
import org.xerial.snappy.pure.PureJavaSnappy;
|
||||
|
||||
import java.io.*;
|
||||
import java.net.URL;
|
||||
import java.util.Enumeration;
|
||||
|
@ -75,13 +77,14 @@ public class SnappyLoader
|
|||
public static final String SNAPPY_SYSTEM_PROPERTIES_FILE = "org-xerial-snappy.properties";
|
||||
public static final String KEY_SNAPPY_LIB_PATH = "org.xerial.snappy.lib.path";
|
||||
public static final String KEY_SNAPPY_LIB_NAME = "org.xerial.snappy.lib.name";
|
||||
public static final String KEY_SNAPPY_PUREJAVA = "org.xerial.snappy.purejava";
|
||||
public static final String KEY_SNAPPY_TEMPDIR = "org.xerial.snappy.tempdir";
|
||||
public static final String KEY_SNAPPY_USE_SYSTEMLIB = "org.xerial.snappy.use.systemlib";
|
||||
public static final String KEY_SNAPPY_DISABLE_BUNDLED_LIBS = "org.xerial.snappy.disable.bundled.libs"; // Depreciated, but preserved for backward compatibility
|
||||
|
||||
private static boolean isLoaded = false;
|
||||
|
||||
private static volatile SnappyNative snappyApi = null;
|
||||
private static volatile SnappyApi snappyApi = null;
|
||||
private static volatile BitShuffleNative bitshuffleApi = null;
|
||||
|
||||
private static File nativeLibFile = null;
|
||||
|
@ -101,11 +104,11 @@ public class SnappyLoader
|
|||
/**
|
||||
* Set the `snappyApi` instance.
|
||||
*
|
||||
* @param nativeCode
|
||||
* @param apiImpl
|
||||
*/
|
||||
static synchronized void setSnappyApi(SnappyNative nativeCode)
|
||||
static synchronized void setSnappyApi(SnappyApi apiImpl)
|
||||
{
|
||||
snappyApi = nativeCode;
|
||||
snappyApi = apiImpl;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -146,13 +149,29 @@ public class SnappyLoader
|
|||
loadSnappySystemProperties();
|
||||
}
|
||||
|
||||
static synchronized SnappyNative loadSnappyApi()
|
||||
static synchronized boolean isPureJava() {
|
||||
return snappyApi != null && PureJavaSnappy.class.isAssignableFrom(snappyApi.getClass());
|
||||
}
|
||||
|
||||
static synchronized SnappyApi loadSnappyApi()
|
||||
{
|
||||
if (snappyApi != null) {
|
||||
return snappyApi;
|
||||
}
|
||||
loadNativeLibrary();
|
||||
setSnappyApi(new SnappyNative());
|
||||
try {
|
||||
if(Boolean.parseBoolean(System.getProperty(KEY_SNAPPY_PUREJAVA, "false"))) {
|
||||
// Use pure-java Snappy implementation
|
||||
setSnappyApi(new PureJavaSnappy());
|
||||
}
|
||||
else {
|
||||
loadNativeLibrary();
|
||||
setSnappyApi(new SnappyNative());
|
||||
}
|
||||
}
|
||||
catch(Exception e) {
|
||||
// Fall-back to pure-java Snappy implementation
|
||||
setSnappyApi(new PureJavaSnappy());
|
||||
}
|
||||
return snappyApi;
|
||||
}
|
||||
|
||||
|
|
|
@ -38,7 +38,7 @@ import java.nio.ByteBuffer;
|
|||
*
|
||||
* @author leo
|
||||
*/
|
||||
public class SnappyNative
|
||||
public class SnappyNative implements SnappyApi
|
||||
{
|
||||
|
||||
public native String nativeLibraryVersion();
|
||||
|
@ -46,49 +46,63 @@ public class SnappyNative
|
|||
// ------------------------------------------------------------------------
|
||||
// Generic compression/decompression routines.
|
||||
// ------------------------------------------------------------------------
|
||||
@Override
|
||||
public native long rawCompress(long inputAddr, long inputSize, long destAddr)
|
||||
throws IOException;
|
||||
|
||||
@Override
|
||||
public native long rawUncompress(long inputAddr, long inputSize, long destAddr)
|
||||
throws IOException;
|
||||
|
||||
@Override
|
||||
public native int rawCompress(ByteBuffer input, int inputOffset, int inputLength, ByteBuffer compressed,
|
||||
int outputOffset)
|
||||
throws IOException;
|
||||
|
||||
@Override
|
||||
public native int rawCompress(Object input, int inputOffset, int inputByteLength, Object output, int outputOffset)
|
||||
throws IOException;
|
||||
|
||||
@Override
|
||||
public native int rawUncompress(ByteBuffer compressed, int inputOffset, int inputLength, ByteBuffer uncompressed,
|
||||
int outputOffset)
|
||||
throws IOException;
|
||||
|
||||
@Override
|
||||
public native int rawUncompress(Object input, int inputOffset, int inputLength, Object output, int outputOffset)
|
||||
throws IOException;
|
||||
|
||||
// Returns the maximal size of the compressed representation of
|
||||
// input data that is "source_bytes" bytes in length;
|
||||
@Override
|
||||
public native int maxCompressedLength(int source_bytes);
|
||||
|
||||
// This operation takes O(1) time.
|
||||
@Override
|
||||
public native int uncompressedLength(ByteBuffer compressed, int offset, int len)
|
||||
throws IOException;
|
||||
|
||||
@Override
|
||||
public native int uncompressedLength(Object input, int offset, int len)
|
||||
throws IOException;
|
||||
|
||||
@Override
|
||||
public native long uncompressedLength(long inputAddr, long len)
|
||||
throws IOException;
|
||||
|
||||
@Override
|
||||
public native boolean isValidCompressedBuffer(ByteBuffer compressed, int offset, int len)
|
||||
throws IOException;
|
||||
|
||||
@Override
|
||||
public native boolean isValidCompressedBuffer(Object input, int offset, int len)
|
||||
throws IOException;
|
||||
|
||||
@Override
|
||||
public native boolean isValidCompressedBuffer(long inputAddr, long offset, long len)
|
||||
throws IOException;
|
||||
|
||||
@Override
|
||||
public native void arrayCopy(Object src, int offset, int byteLength, Object dest, int dOffset)
|
||||
throws IOException;
|
||||
|
||||
|
|
|
@ -0,0 +1,244 @@
|
|||
package org.xerial.snappy.pure;
|
||||
|
||||
import org.xerial.snappy.SnappyApi;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
|
||||
import static org.xerial.snappy.pure.UnsafeUtil.getAddress;
|
||||
import static sun.misc.Unsafe.ARRAY_BYTE_BASE_OFFSET;
|
||||
|
||||
/**
|
||||
* A pure-java Snappy implementation using https://github.com/airlift/aircompressor
|
||||
*/
|
||||
public class PureJavaSnappy implements SnappyApi
|
||||
{
|
||||
private final short[] table = new short[SnappyRawCompressor.MAX_HASH_TABLE_SIZE];
|
||||
private final static int MAX_OUTPUT_LENGTH = Integer.MAX_VALUE;
|
||||
|
||||
@Override
|
||||
public long rawCompress(long inputAddr, long inputSize, long destAddr)
|
||||
throws IOException
|
||||
{
|
||||
return SnappyRawCompressor.compress(null, inputAddr, inputSize, null, destAddr, MAX_OUTPUT_LENGTH, table);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long rawUncompress(long inputAddr, long inputSize, long destAddr)
|
||||
throws IOException
|
||||
{
|
||||
return SnappyRawDecompressor.decompress(null, inputAddr, inputSize, null, destAddr, MAX_OUTPUT_LENGTH);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int rawCompress(ByteBuffer input, int inputOffset, int inputLength, ByteBuffer compressed, int outputOffset)
|
||||
throws IOException
|
||||
{
|
||||
Object inputBase;
|
||||
long inputAddress;
|
||||
long inputLimit;
|
||||
if (input.isDirect()) {
|
||||
inputBase = null;
|
||||
long address = getAddress(input);
|
||||
inputAddress = address + input.position();
|
||||
inputLimit = address + input.limit();
|
||||
}
|
||||
else if (input.hasArray()) {
|
||||
inputBase = input.array();
|
||||
inputAddress = ARRAY_BYTE_BASE_OFFSET + input.arrayOffset() + input.position();
|
||||
inputLimit = ARRAY_BYTE_BASE_OFFSET + input.arrayOffset() + input.limit();
|
||||
}
|
||||
else {
|
||||
throw new IllegalArgumentException("Unsupported input ByteBuffer implementation " + input.getClass().getName());
|
||||
}
|
||||
|
||||
Object outputBase;
|
||||
long outputAddress;
|
||||
long outputLimit;
|
||||
if (compressed.isDirect()) {
|
||||
outputBase = null;
|
||||
long address = getAddress(compressed);
|
||||
outputAddress = address + compressed.position();
|
||||
outputLimit = address + compressed.limit();
|
||||
}
|
||||
else if (compressed.hasArray()) {
|
||||
outputBase = compressed.array();
|
||||
outputAddress = ARRAY_BYTE_BASE_OFFSET + compressed.arrayOffset() + compressed.position();
|
||||
outputLimit = ARRAY_BYTE_BASE_OFFSET + compressed.arrayOffset() + compressed.limit();
|
||||
}
|
||||
else {
|
||||
throw new IllegalArgumentException("Unsupported output ByteBuffer implementation " + compressed.getClass().getName());
|
||||
}
|
||||
|
||||
// HACK: Assure JVM does not collect Slice wrappers while compressing, since the
|
||||
// collection may trigger freeing of the underlying memory resulting in a segfault
|
||||
// There is no other known way to signal to the JVM that an object should not be
|
||||
// collected in a block, and technically, the JVM is allowed to eliminate these locks.
|
||||
synchronized (input) {
|
||||
synchronized (compressed) {
|
||||
int written = SnappyRawCompressor.compress(
|
||||
inputBase,
|
||||
inputAddress,
|
||||
inputLimit,
|
||||
outputBase,
|
||||
outputAddress,
|
||||
outputLimit,
|
||||
table);
|
||||
compressed.position(compressed.position() + written);
|
||||
return written;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int rawCompress(Object input, int inputOffset, int inputByteLength, Object output, int outputOffset)
|
||||
throws IOException
|
||||
{
|
||||
long inputAddress = ARRAY_BYTE_BASE_OFFSET + inputOffset;
|
||||
long inputLimit = inputAddress + inputByteLength;
|
||||
long outputAddress = ARRAY_BYTE_BASE_OFFSET + outputOffset;
|
||||
long outputLimit = outputAddress + MAX_OUTPUT_LENGTH;
|
||||
|
||||
return SnappyRawCompressor.compress(input, inputAddress, inputLimit, output, outputAddress, outputLimit, table);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int rawUncompress(ByteBuffer compressed, int inputOffset, int inputLength, ByteBuffer uncompressed, int outputOffset)
|
||||
throws IOException
|
||||
{
|
||||
Object inputBase;
|
||||
long inputAddress;
|
||||
long inputLimit;
|
||||
if (compressed.isDirect()) {
|
||||
inputBase = null;
|
||||
long address = getAddress(compressed);
|
||||
inputAddress = address + compressed.position();
|
||||
inputLimit = address + compressed.limit();
|
||||
}
|
||||
else if (compressed.hasArray()) {
|
||||
inputBase = compressed.array();
|
||||
inputAddress = ARRAY_BYTE_BASE_OFFSET + compressed.arrayOffset() + compressed.position();
|
||||
inputLimit = ARRAY_BYTE_BASE_OFFSET + compressed.arrayOffset() + compressed.limit();
|
||||
}
|
||||
else {
|
||||
throw new IllegalArgumentException("Unsupported input ByteBuffer implementation " + compressed.getClass().getName());
|
||||
}
|
||||
|
||||
Object outputBase;
|
||||
long outputAddress;
|
||||
long outputLimit;
|
||||
if (uncompressed.isDirect()) {
|
||||
outputBase = null;
|
||||
long address = getAddress(uncompressed);
|
||||
outputAddress = address + uncompressed.position();
|
||||
outputLimit = address + uncompressed.limit();
|
||||
}
|
||||
else if (uncompressed.hasArray()) {
|
||||
outputBase = uncompressed.array();
|
||||
outputAddress = ARRAY_BYTE_BASE_OFFSET + uncompressed.arrayOffset() + uncompressed.position();
|
||||
outputLimit = ARRAY_BYTE_BASE_OFFSET + uncompressed.arrayOffset() + uncompressed.limit();
|
||||
}
|
||||
else {
|
||||
throw new IllegalArgumentException("Unsupported output ByteBuffer implementation " + uncompressed.getClass().getName());
|
||||
}
|
||||
|
||||
// HACK: Assure JVM does not collect Slice wrappers while decompressing, since the
|
||||
// collection may trigger freeing of the underlying memory resulting in a segfault
|
||||
// There is no other known way to signal to the JVM that an object should not be
|
||||
// collected in a block, and technically, the JVM is allowed to eliminate these locks.
|
||||
synchronized (compressed) {
|
||||
synchronized (uncompressed) {
|
||||
int written = SnappyRawDecompressor.decompress(inputBase, inputAddress, inputLimit, outputBase, outputAddress, outputLimit);
|
||||
uncompressed.position(uncompressed.position() + written);
|
||||
return written;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int rawUncompress(Object input, int inputOffset, int inputLength, Object output, int outputOffset)
|
||||
throws IOException
|
||||
{
|
||||
long inputAddress = ARRAY_BYTE_BASE_OFFSET + inputOffset;
|
||||
long inputLimit = inputAddress + inputLength;
|
||||
long outputAddress = ARRAY_BYTE_BASE_OFFSET + outputOffset;
|
||||
long outputLimit = outputAddress + MAX_OUTPUT_LENGTH;
|
||||
|
||||
return SnappyRawDecompressor.decompress(input, inputAddress, inputLimit, output, outputAddress, outputLimit);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int maxCompressedLength(int source_bytes)
|
||||
{
|
||||
return SnappyRawCompressor.maxCompressedLength(source_bytes);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int uncompressedLength(ByteBuffer compressed, int offset, int len)
|
||||
throws IOException
|
||||
{
|
||||
Object inputBase;
|
||||
long inputAddress;
|
||||
long inputLimit;
|
||||
if(compressed.isDirect()) {
|
||||
inputBase = null;
|
||||
long address = getAddress(compressed);
|
||||
inputAddress = address + compressed.position();
|
||||
inputLimit = address + compressed.limit();
|
||||
}
|
||||
else if (compressed.hasArray()){
|
||||
inputBase = compressed.array();
|
||||
inputAddress = ARRAY_BYTE_BASE_OFFSET + offset;
|
||||
inputLimit = ARRAY_BYTE_BASE_OFFSET + len;
|
||||
}
|
||||
else {
|
||||
throw new IllegalArgumentException("Unsupported input ByteBuffer implementation: " + compressed.getClass().getName());
|
||||
}
|
||||
return SnappyRawDecompressor.getUncompressedLength(inputBase, inputAddress, inputLimit);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int uncompressedLength(Object input, int offset, int len)
|
||||
throws IOException
|
||||
{
|
||||
long compressedAddress = ARRAY_BYTE_BASE_OFFSET + offset;
|
||||
long compressedLimit = ARRAY_BYTE_BASE_OFFSET + len;
|
||||
|
||||
return SnappyRawDecompressor.getUncompressedLength(input, compressedAddress, compressedLimit);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long uncompressedLength(long inputAddr, long len)
|
||||
throws IOException
|
||||
{
|
||||
return SnappyRawDecompressor.getUncompressedLength(null, inputAddr, inputAddr + len);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isValidCompressedBuffer(ByteBuffer compressed, int offset, int len)
|
||||
throws IOException
|
||||
{
|
||||
throw new UnsupportedOperationException("isValidCompressedBuffer is not supported in pure-java mode");
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isValidCompressedBuffer(Object input, int offset, int len)
|
||||
throws IOException
|
||||
{
|
||||
throw new UnsupportedOperationException("isValidCompressedBuffer is not supported in pure-java mode");
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isValidCompressedBuffer(long inputAddr, long offset, long len)
|
||||
throws IOException
|
||||
{
|
||||
throw new UnsupportedOperationException("isValidCompressedBuffer is not supported in pure-java mode");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void arrayCopy(Object src, int offset, int byteLength, Object dest, int dOffset)
|
||||
throws IOException
|
||||
{
|
||||
System.arraycopy(src, offset, dest, dOffset, byteLength);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,29 @@
|
|||
/*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.xerial.snappy.pure;
|
||||
|
||||
final class SnappyConstants
|
||||
{
|
||||
static final int SIZE_OF_SHORT = 2;
|
||||
static final int SIZE_OF_INT = 4;
|
||||
static final int SIZE_OF_LONG = 8;
|
||||
|
||||
static final int LITERAL = 0;
|
||||
static final int COPY_1_BYTE_OFFSET = 1; // 3 bit length + 3 bits of offset in opcode
|
||||
static final int COPY_2_BYTE_OFFSET = 2;
|
||||
|
||||
private SnappyConstants()
|
||||
{
|
||||
}
|
||||
}
|
|
@ -0,0 +1,409 @@
|
|||
/*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.xerial.snappy.pure;
|
||||
import java.util.Arrays;
|
||||
|
||||
import static org.xerial.snappy.pure.SnappyConstants.COPY_1_BYTE_OFFSET;
|
||||
import static org.xerial.snappy.pure.SnappyConstants.COPY_2_BYTE_OFFSET;
|
||||
import static org.xerial.snappy.pure.SnappyConstants.SIZE_OF_INT;
|
||||
import static org.xerial.snappy.pure.SnappyConstants.SIZE_OF_LONG;
|
||||
import static org.xerial.snappy.pure.SnappyConstants.SIZE_OF_SHORT;
|
||||
import static org.xerial.snappy.pure.UnsafeUtil.UNSAFE;
|
||||
|
||||
public final class SnappyRawCompressor
|
||||
{
|
||||
// The size of a compression block. Note that many parts of the compression
|
||||
// code assumes that BLOCK_SIZE <= 65536; in particular, the hash table
|
||||
// can only store 16-bit offsets, and EmitCopy() also assumes the offset
|
||||
// is 65535 bytes or less. Note also that if you change this, it will
|
||||
// affect the framing format (see framing_format.txt).
|
||||
//
|
||||
// Note that there might be older data around that is compressed with larger
|
||||
// block sizes, so the decompression code should not rely on the
|
||||
// non-existence of long back-references.
|
||||
private static final int BLOCK_LOG = 16;
|
||||
private static final int BLOCK_SIZE = 1 << BLOCK_LOG;
|
||||
|
||||
private static final int INPUT_MARGIN_BYTES = 15;
|
||||
|
||||
private static final int MAX_HASH_TABLE_BITS = 14;
|
||||
public static final int MAX_HASH_TABLE_SIZE = 1 << MAX_HASH_TABLE_BITS;
|
||||
|
||||
private SnappyRawCompressor() {}
|
||||
|
||||
public static int maxCompressedLength(int sourceLength)
|
||||
{
|
||||
// Compressed data can be defined as:
|
||||
// compressed := item* literal*
|
||||
// item := literal* copy
|
||||
//
|
||||
// The trailing literal sequence has a space blowup of at most 62/60
|
||||
// since a literal of length 60 needs one tag byte + one extra byte
|
||||
// for length information.
|
||||
//
|
||||
// Item blowup is trickier to measure. Suppose the "copy" op copies
|
||||
// 4 bytes of data. Because of a special check in the encoding code,
|
||||
// we produce a 4-byte copy only if the offset is < 65536. Therefore
|
||||
// the copy op takes 3 bytes to encode, and this type of item leads
|
||||
// to at most the 62/60 blowup for representing literals.
|
||||
//
|
||||
// Suppose the "copy" op copies 5 bytes of data. If the offset is big
|
||||
// enough, it will take 5 bytes to encode the copy op. Therefore the
|
||||
// worst case here is a one-byte literal followed by a five-byte copy.
|
||||
// I.e., 6 bytes of input turn into 7 bytes of "compressed" data.
|
||||
//
|
||||
// This last factor dominates the blowup, so the final estimate is:
|
||||
return 32 + sourceLength + sourceLength / 6;
|
||||
}
|
||||
|
||||
// suppress warnings is required to use assert
|
||||
@SuppressWarnings("IllegalToken")
|
||||
public static int compress(
|
||||
final Object inputBase,
|
||||
final long inputAddress,
|
||||
final long inputLimit,
|
||||
final Object outputBase,
|
||||
final long outputAddress,
|
||||
final long outputLimit,
|
||||
final short[] table)
|
||||
{
|
||||
// The compression code assumes output is larger than the max compression size (with 32 bytes of
|
||||
// extra padding), and does not check bounds for writing to output.
|
||||
int maxCompressedLength = maxCompressedLength((int) (inputLimit - inputAddress));
|
||||
if (outputLimit - outputAddress < maxCompressedLength) {
|
||||
throw new IllegalArgumentException("Output buffer must be at least " + maxCompressedLength + " bytes");
|
||||
}
|
||||
|
||||
// First write the uncompressed size to the output as a variable length int
|
||||
long output = writeUncompressedLength(outputBase, outputAddress, (int) (inputLimit - inputAddress));
|
||||
|
||||
for (long blockAddress = inputAddress; blockAddress < inputLimit; blockAddress += BLOCK_SIZE) {
|
||||
final long blockLimit = Math.min(inputLimit, blockAddress + BLOCK_SIZE);
|
||||
long input = blockAddress;
|
||||
assert blockLimit - blockAddress <= BLOCK_SIZE;
|
||||
|
||||
int blockHashTableSize = getHashTableSize((int) (blockLimit - blockAddress));
|
||||
Arrays.fill(table, 0, blockHashTableSize, (short) 0);
|
||||
|
||||
// todo given that hashTableSize is required to be a power of 2, this is overly complex
|
||||
final int shift = 32 - log2Floor(blockHashTableSize);
|
||||
assert (blockHashTableSize & (blockHashTableSize - 1)) == 0 : "table must be power of two";
|
||||
assert 0xFFFFFFFF >>> shift == blockHashTableSize - 1;
|
||||
|
||||
// Bytes in [nextEmitAddress, input) will be emitted as literal bytes. Or
|
||||
// [nextEmitAddress, inputLimit) after the main loop.
|
||||
long nextEmitAddress = input;
|
||||
|
||||
final long fastInputLimit = blockLimit - INPUT_MARGIN_BYTES;
|
||||
while (input <= fastInputLimit) {
|
||||
assert nextEmitAddress <= input;
|
||||
|
||||
// The body of this loop emits a literal once and then emits a copy one
|
||||
// or more times. (The exception is that when we're close to exhausting
|
||||
// the input we exit and emit a literal.)
|
||||
//
|
||||
// In the first iteration of this loop we're just starting, so
|
||||
// there's nothing to copy, so we must emit a literal once. And we
|
||||
// only start a new iteration when the current iteration has determined
|
||||
// that a literal will precede the next copy (if any).
|
||||
//
|
||||
// Step 1: Scan forward in the input looking for a 4-byte-long match.
|
||||
// If we get close to exhausting the input exit and emit a final literal.
|
||||
//
|
||||
// Heuristic match skipping: If 32 bytes are scanned with no matches
|
||||
// found, start looking only at every other byte. If 32 more bytes are
|
||||
// scanned, look at every third byte, etc.. When a match is found,
|
||||
// immediately go back to looking at every byte. This is a small loss
|
||||
// (~5% performance, ~0.1% density) for compressible data due to more
|
||||
// bookkeeping, but for non-compressible data (such as JPEG) it's a huge
|
||||
// win since the compressor quickly "realizes" the data is incompressible
|
||||
// and doesn't bother looking for matches everywhere.
|
||||
//
|
||||
// The "skip" variable keeps track of how many bytes there are since the
|
||||
// last match; dividing it by 32 (ie. right-shifting by five) gives the
|
||||
// number of bytes to move ahead for each iteration.
|
||||
int skip = 32;
|
||||
|
||||
long candidateIndex = 0;
|
||||
for (input += 1; input + (skip >>> 5) <= fastInputLimit; input += ((skip++) >>> 5)) {
|
||||
// hash the 4 bytes starting at the input pointer
|
||||
int currentInt = UNSAFE.getInt(inputBase, input);
|
||||
int hash = hashBytes(currentInt, shift);
|
||||
|
||||
// get the position of a 4 bytes sequence with the same hash
|
||||
candidateIndex = blockAddress + (table[hash] & 0xFFFF);
|
||||
assert candidateIndex >= 0;
|
||||
assert candidateIndex < input;
|
||||
|
||||
// update the hash to point to the current position
|
||||
table[hash] = (short) (input - blockAddress);
|
||||
|
||||
// if the 4 byte sequence a the candidate index matches the sequence at the
|
||||
// current position, proceed to the next phase
|
||||
if (currentInt == UNSAFE.getInt(inputBase, candidateIndex)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (input + (skip >>> 5) > fastInputLimit) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Step 2: A 4-byte match has been found. We'll later see if more
|
||||
// than 4 bytes match. But, prior to the match, input
|
||||
// bytes [nextEmit, ip) are unmatched. Emit them as "literal bytes."
|
||||
assert nextEmitAddress + 16 <= blockLimit;
|
||||
|
||||
int literalLength = (int) (input - nextEmitAddress);
|
||||
output = emitLiteralLength(outputBase, output, literalLength);
|
||||
|
||||
// Fast copy can use 8 extra bytes of input and output, which is safe because:
|
||||
// - The input will always have INPUT_MARGIN_BYTES = 15 extra available bytes
|
||||
// - The output will always have 32 spare bytes (see MaxCompressedLength).
|
||||
output = fastCopy(inputBase, nextEmitAddress, outputBase, output, literalLength);
|
||||
|
||||
// Step 3: Call EmitCopy, and then see if another EmitCopy could
|
||||
// be our next move. Repeat until we find no match for the
|
||||
// input immediately after what was consumed by the last EmitCopy call.
|
||||
//
|
||||
// If we exit this loop normally then we need to call EmitLiteral next,
|
||||
// though we don't yet know how big the literal will be. We handle that
|
||||
// by proceeding to the next iteration of the main loop. We also can exit
|
||||
// this loop via goto if we get close to exhausting the input.
|
||||
int inputBytes;
|
||||
do {
|
||||
// We have a 4-byte match at input, and no need to emit any
|
||||
// "literal bytes" prior to input.
|
||||
assert (blockLimit >= input + SIZE_OF_INT);
|
||||
|
||||
// determine match length
|
||||
int matched = count(inputBase, input + SIZE_OF_INT, candidateIndex + SIZE_OF_INT, blockLimit);
|
||||
matched += SIZE_OF_INT;
|
||||
|
||||
// Emit the copy operation for this chunk
|
||||
output = emitCopy(outputBase, output, input, candidateIndex, matched);
|
||||
input += matched;
|
||||
|
||||
// are we done?
|
||||
if (input >= fastInputLimit) {
|
||||
break;
|
||||
}
|
||||
|
||||
// We could immediately start working at input now, but to improve
|
||||
// compression we first update table[Hash(ip - 1, ...)].
|
||||
long longValue = UNSAFE.getLong(inputBase, input - 1);
|
||||
int prevInt = (int) longValue;
|
||||
inputBytes = (int) (longValue >>> 8);
|
||||
|
||||
// add hash starting with previous byte
|
||||
int prevHash = hashBytes(prevInt, shift);
|
||||
table[prevHash] = (short) (input - blockAddress - 1);
|
||||
|
||||
// update hash of current byte
|
||||
int curHash = hashBytes(inputBytes, shift);
|
||||
|
||||
candidateIndex = blockAddress + (table[curHash] & 0xFFFF);
|
||||
table[curHash] = (short) (input - blockAddress);
|
||||
} while (inputBytes == UNSAFE.getInt(inputBase, candidateIndex));
|
||||
nextEmitAddress = input;
|
||||
}
|
||||
|
||||
// Emit the remaining bytes as a literal
|
||||
if (nextEmitAddress < blockLimit) {
|
||||
int literalLength = (int) (blockLimit - nextEmitAddress);
|
||||
output = emitLiteralLength(outputBase, output, literalLength);
|
||||
UNSAFE.copyMemory(inputBase, nextEmitAddress, outputBase, output, literalLength);
|
||||
output += literalLength;
|
||||
}
|
||||
}
|
||||
|
||||
return (int) (output - outputAddress);
|
||||
}
|
||||
|
||||
private static int count(Object inputBase, final long start, long matchStart, long matchLimit)
|
||||
{
|
||||
long current = start;
|
||||
|
||||
// first, compare long at a time
|
||||
while (current < matchLimit - (SIZE_OF_LONG - 1)) {
|
||||
long diff = UNSAFE.getLong(inputBase, matchStart) ^ UNSAFE.getLong(inputBase, current);
|
||||
if (diff != 0) {
|
||||
current += Long.numberOfTrailingZeros(diff) >> 3;
|
||||
return (int) (current - start);
|
||||
}
|
||||
|
||||
current += SIZE_OF_LONG;
|
||||
matchStart += SIZE_OF_LONG;
|
||||
}
|
||||
|
||||
if (current < matchLimit - (SIZE_OF_INT - 1) && UNSAFE.getInt(inputBase, matchStart) == UNSAFE.getInt(inputBase, current)) {
|
||||
current += SIZE_OF_INT;
|
||||
matchStart += SIZE_OF_INT;
|
||||
}
|
||||
|
||||
if (current < matchLimit - (SIZE_OF_SHORT - 1) && UNSAFE.getShort(inputBase, matchStart) == UNSAFE.getShort(inputBase, current)) {
|
||||
current += SIZE_OF_SHORT;
|
||||
matchStart += SIZE_OF_SHORT;
|
||||
}
|
||||
|
||||
if (current < matchLimit && UNSAFE.getByte(inputBase, matchStart) == UNSAFE.getByte(inputBase, current)) {
|
||||
++current;
|
||||
}
|
||||
|
||||
return (int) (current - start);
|
||||
}
|
||||
|
||||
private static long emitLiteralLength(Object outputBase, long output, int literalLength)
|
||||
{
|
||||
int n = literalLength - 1; // Zero-length literals are disallowed
|
||||
if (n < 60) {
|
||||
// Size fits in tag byte
|
||||
UNSAFE.putByte(outputBase, output++, (byte) (n << 2));
|
||||
}
|
||||
else {
|
||||
int bytes;
|
||||
if (n < (1 << 8)) {
|
||||
UNSAFE.putByte(outputBase, output++, (byte) (59 + 1 << 2));
|
||||
bytes = 1;
|
||||
}
|
||||
else if (n < (1 << 16)) {
|
||||
UNSAFE.putByte(outputBase, output++, (byte) (59 + 2 << 2));
|
||||
bytes = 2;
|
||||
}
|
||||
else if (n < (1 << 24)) {
|
||||
UNSAFE.putByte(outputBase, output++, (byte) (59 + 3 << 2));
|
||||
bytes = 3;
|
||||
}
|
||||
else {
|
||||
UNSAFE.putByte(outputBase, output++, (byte) (59 + 4 << 2));
|
||||
bytes = 4;
|
||||
}
|
||||
// System is assumed to be little endian, so low bytes will be zero for the smaller numbers
|
||||
UNSAFE.putInt(outputBase, output, n);
|
||||
output += bytes;
|
||||
}
|
||||
return output;
|
||||
}
|
||||
|
||||
private static long fastCopy(final Object inputBase, long input, final Object outputBase, long output, final int literalLength)
|
||||
{
|
||||
final long outputLimit = output + literalLength;
|
||||
do {
|
||||
UNSAFE.putLong(outputBase, output, UNSAFE.getLong(inputBase, input));
|
||||
input += SIZE_OF_LONG;
|
||||
output += SIZE_OF_LONG;
|
||||
}
|
||||
while (output < outputLimit);
|
||||
return outputLimit;
|
||||
}
|
||||
|
||||
private static long emitCopy(Object outputBase, long output, long input, long matchIndex, int matchLength)
|
||||
{
|
||||
long offset = input - matchIndex;
|
||||
|
||||
// Emit 64 byte copies but make sure to keep at least four bytes reserved
|
||||
while (matchLength >= 68) {
|
||||
UNSAFE.putByte(outputBase, output++, (byte) (COPY_2_BYTE_OFFSET + ((64 - 1) << 2)));
|
||||
UNSAFE.putShort(outputBase, output, (short) offset);
|
||||
output += SIZE_OF_SHORT;
|
||||
matchLength -= 64;
|
||||
}
|
||||
|
||||
// Emit an extra 60 byte copy if have too much data to fit in one copy
|
||||
// length < 68
|
||||
if (matchLength > 64) {
|
||||
UNSAFE.putByte(outputBase, output++, (byte) (COPY_2_BYTE_OFFSET + ((60 - 1) << 2)));
|
||||
UNSAFE.putShort(outputBase, output, (short) offset);
|
||||
output += SIZE_OF_SHORT;
|
||||
matchLength -= 60;
|
||||
}
|
||||
|
||||
// Emit remainder
|
||||
if ((matchLength < 12) && (offset < 2048)) {
|
||||
int lenMinus4 = matchLength - 4;
|
||||
UNSAFE.putByte(outputBase, output++, (byte) (COPY_1_BYTE_OFFSET + ((lenMinus4) << 2) + ((offset >>> 8) << 5)));
|
||||
UNSAFE.putByte(outputBase, output++, (byte) (offset));
|
||||
}
|
||||
else {
|
||||
UNSAFE.putByte(outputBase, output++, (byte) (COPY_2_BYTE_OFFSET + ((matchLength - 1) << 2)));
|
||||
UNSAFE.putShort(outputBase, output, (short) offset);
|
||||
output += SIZE_OF_SHORT;
|
||||
}
|
||||
return output;
|
||||
}
|
||||
|
||||
@SuppressWarnings("IllegalToken")
|
||||
private static int getHashTableSize(int inputSize)
|
||||
{
|
||||
// Use smaller hash table when input.size() is smaller, since we
|
||||
// fill the table, incurring O(hash table size) overhead for
|
||||
// compression, and if the input is short, we won't need that
|
||||
// many hash table entries anyway.
|
||||
assert (MAX_HASH_TABLE_SIZE >= 256);
|
||||
|
||||
// smallest power of 2 larger than inputSize
|
||||
int target = Integer.highestOneBit(inputSize - 1) << 1;
|
||||
|
||||
// keep it between MIN_TABLE_SIZE and MAX_TABLE_SIZE
|
||||
return Math.max(Math.min(target, MAX_HASH_TABLE_SIZE), 256);
|
||||
}
|
||||
|
||||
// Any hash function will produce a valid compressed stream, but a good
|
||||
// hash function reduces the number of collisions and thus yields better
|
||||
// compression for compressible input, and more speed for incompressible
|
||||
// input. Of course, it doesn't hurt if the hash function is reasonably fast
|
||||
// either, as it gets called a lot.
|
||||
private static int hashBytes(int value, int shift)
|
||||
{
|
||||
return (value * 0x1e35a7bd) >>> shift;
|
||||
}
|
||||
|
||||
private static int log2Floor(int n)
|
||||
{
|
||||
return n == 0 ? -1 : 31 ^ Integer.numberOfLeadingZeros(n);
|
||||
}
|
||||
|
||||
private static final int HIGH_BIT_MASK = 0x80;
|
||||
/**
|
||||
* Writes the uncompressed length as variable length integer.
|
||||
*/
|
||||
private static long writeUncompressedLength(Object outputBase, long outputAddress, int uncompressedLength)
|
||||
{
|
||||
if (uncompressedLength < (1 << 7) && uncompressedLength >= 0) {
|
||||
UNSAFE.putByte(outputBase, outputAddress++, (byte) (uncompressedLength));
|
||||
}
|
||||
else if (uncompressedLength < (1 << 14) && uncompressedLength > 0) {
|
||||
UNSAFE.putByte(outputBase, outputAddress++, (byte) (uncompressedLength | HIGH_BIT_MASK));
|
||||
UNSAFE.putByte(outputBase, outputAddress++, (byte) (uncompressedLength >>> 7));
|
||||
}
|
||||
else if (uncompressedLength < (1 << 21) && uncompressedLength > 0) {
|
||||
UNSAFE.putByte(outputBase, outputAddress++, (byte) (uncompressedLength | HIGH_BIT_MASK));
|
||||
UNSAFE.putByte(outputBase, outputAddress++, (byte) ((uncompressedLength >>> 7) | HIGH_BIT_MASK));
|
||||
UNSAFE.putByte(outputBase, outputAddress++, (byte) (uncompressedLength >>> 14));
|
||||
}
|
||||
else if (uncompressedLength < (1 << 28) && uncompressedLength > 0) {
|
||||
UNSAFE.putByte(outputBase, outputAddress++, (byte) (uncompressedLength | HIGH_BIT_MASK));
|
||||
UNSAFE.putByte(outputBase, outputAddress++, (byte) ((uncompressedLength >>> 7) | HIGH_BIT_MASK));
|
||||
UNSAFE.putByte(outputBase, outputAddress++, (byte) ((uncompressedLength >>> 14) | HIGH_BIT_MASK));
|
||||
UNSAFE.putByte(outputBase, outputAddress++, (byte) (uncompressedLength >>> 21));
|
||||
}
|
||||
else {
|
||||
UNSAFE.putByte(outputBase, outputAddress++, (byte) (uncompressedLength | HIGH_BIT_MASK));
|
||||
UNSAFE.putByte(outputBase, outputAddress++, (byte) ((uncompressedLength >>> 7) | HIGH_BIT_MASK));
|
||||
UNSAFE.putByte(outputBase, outputAddress++, (byte) ((uncompressedLength >>> 14) | HIGH_BIT_MASK));
|
||||
UNSAFE.putByte(outputBase, outputAddress++, (byte) ((uncompressedLength >>> 21) | HIGH_BIT_MASK));
|
||||
UNSAFE.putByte(outputBase, outputAddress++, (byte) (uncompressedLength >>> 28));
|
||||
}
|
||||
return outputAddress;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,316 @@
|
|||
/*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.xerial.snappy.pure;
|
||||
|
||||
import org.xerial.snappy.SnappyError;
|
||||
import org.xerial.snappy.SnappyErrorCode;
|
||||
|
||||
import static org.xerial.snappy.pure.SnappyConstants.LITERAL;
|
||||
import static org.xerial.snappy.pure.SnappyConstants.SIZE_OF_INT;
|
||||
import static org.xerial.snappy.pure.SnappyConstants.SIZE_OF_LONG;
|
||||
import static org.xerial.snappy.pure.UnsafeUtil.UNSAFE;
|
||||
|
||||
public final class SnappyRawDecompressor
|
||||
{
|
||||
private static final int[] DEC_32_TABLE = {4, 1, 2, 1, 4, 4, 4, 4};
|
||||
private static final int[] DEC_64_TABLE = {0, 0, 0, -1, 0, 1, 2, 3};
|
||||
|
||||
private SnappyRawDecompressor() {}
|
||||
|
||||
public static int getUncompressedLength(Object compressed, long compressedAddress, long compressedLimit)
|
||||
{
|
||||
return readUncompressedLength(compressed, compressedAddress, compressedLimit)[0];
|
||||
}
|
||||
|
||||
public static int decompress(
|
||||
final Object inputBase,
|
||||
final long inputAddress,
|
||||
final long inputLimit,
|
||||
final Object outputBase,
|
||||
final long outputAddress,
|
||||
final long outputLimit)
|
||||
{
|
||||
// Read the uncompressed length from the front of the input
|
||||
long input = inputAddress;
|
||||
int[] varInt = readUncompressedLength(inputBase, input, inputLimit);
|
||||
int expectedLength = varInt[0];
|
||||
input += varInt[1];
|
||||
|
||||
if(!(expectedLength <= (outputLimit - outputAddress))) {
|
||||
throw new SnappyError(SnappyErrorCode.INVALID_CHUNK_SIZE, String.format("Uncompressed length %s must be less than %s", expectedLength, (outputLimit - outputAddress)));
|
||||
}
|
||||
|
||||
// Process the entire input
|
||||
int uncompressedSize = uncompressAll(
|
||||
inputBase,
|
||||
input,
|
||||
inputLimit,
|
||||
outputBase,
|
||||
outputAddress,
|
||||
outputLimit);
|
||||
|
||||
if (!(expectedLength == uncompressedSize)) {
|
||||
throw new SnappyError(SnappyErrorCode.INVALID_CHUNK_SIZE, String.format("Recorded length is %s bytes but actual length after decompression is %s bytes ",
|
||||
expectedLength,
|
||||
uncompressedSize));
|
||||
}
|
||||
|
||||
return expectedLength;
|
||||
}
|
||||
|
||||
private static int uncompressAll(
|
||||
final Object inputBase,
|
||||
final long inputAddress,
|
||||
final long inputLimit,
|
||||
final Object outputBase,
|
||||
final long outputAddress,
|
||||
final long outputLimit)
|
||||
{
|
||||
final long fastOutputLimit = outputLimit - SIZE_OF_LONG; // maximum offset in output buffer to which it's safe to write long-at-a-time
|
||||
|
||||
long output = outputAddress;
|
||||
long input = inputAddress;
|
||||
|
||||
while (input < inputLimit) {
|
||||
int opCode = UNSAFE.getByte(inputBase, input++) & 0xFF;
|
||||
int entry = opLookupTable[opCode] & 0xFFFF;
|
||||
|
||||
int trailerBytes = entry >>> 11;
|
||||
int trailer = 0;
|
||||
if (input + SIZE_OF_INT < inputLimit) {
|
||||
trailer = UNSAFE.getInt(inputBase, input) & wordmask[trailerBytes];
|
||||
}
|
||||
else {
|
||||
if (input + trailerBytes > inputLimit) {
|
||||
throw new SnappyError(SnappyErrorCode.PARSING_ERROR, String.format("position: %d", input - inputAddress));
|
||||
}
|
||||
switch (trailerBytes) {
|
||||
case 4:
|
||||
trailer = (UNSAFE.getByte(inputBase, input + 3) & 0xff) << 24;
|
||||
case 3:
|
||||
trailer |= (UNSAFE.getByte(inputBase, input + 2) & 0xff) << 16;
|
||||
case 2:
|
||||
trailer |= (UNSAFE.getByte(inputBase, input + 1) & 0xff) << 8;
|
||||
case 1:
|
||||
trailer |= (UNSAFE.getByte(inputBase, input) & 0xff);
|
||||
}
|
||||
}
|
||||
if (trailer < 0) {
|
||||
throw new SnappyError(SnappyErrorCode.PARSING_ERROR, String.format("position: %d", input - inputAddress));
|
||||
}
|
||||
input += trailerBytes;
|
||||
|
||||
int length = entry & 0xff;
|
||||
if (length == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((opCode & 0x3) == LITERAL) {
|
||||
int literalLength = length + trailer;
|
||||
|
||||
// copy literal
|
||||
long literalOutputLimit = output + literalLength;
|
||||
if (literalOutputLimit > fastOutputLimit || input + literalLength > inputLimit - SIZE_OF_LONG) {
|
||||
if (literalOutputLimit > outputLimit) {
|
||||
throw new SnappyError(SnappyErrorCode.PARSING_ERROR, String.format("position: %d", input - inputAddress));
|
||||
}
|
||||
|
||||
// slow, precise copy
|
||||
UNSAFE.copyMemory(inputBase, input, outputBase, output, literalLength);
|
||||
input += literalLength;
|
||||
output += literalLength;
|
||||
}
|
||||
else {
|
||||
// fast copy. We may over-copy but there's enough room in input and output to not overrun them
|
||||
do {
|
||||
UNSAFE.putLong(outputBase, output, UNSAFE.getLong(inputBase, input));
|
||||
input += SIZE_OF_LONG;
|
||||
output += SIZE_OF_LONG;
|
||||
}
|
||||
while (output < literalOutputLimit);
|
||||
input -= (output - literalOutputLimit); // adjust index if we over-copied
|
||||
output = literalOutputLimit;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// matchOffset/256 is encoded in bits 8..10. By just fetching
|
||||
// those bits, we get matchOffset (since the bit-field starts at
|
||||
// bit 8).
|
||||
int matchOffset = entry & 0x700;
|
||||
matchOffset += trailer;
|
||||
|
||||
long matchAddress = output - matchOffset;
|
||||
if (matchAddress < outputAddress || output + length > outputLimit) {
|
||||
throw new SnappyError(SnappyErrorCode.PARSING_ERROR, String.format("position: %d", input - inputAddress));
|
||||
}
|
||||
long matchOutputLimit = output + length;
|
||||
|
||||
if (output > fastOutputLimit) {
|
||||
// slow match copy
|
||||
while (output < matchOutputLimit) {
|
||||
UNSAFE.putByte(outputBase, output++, UNSAFE.getByte(outputBase, matchAddress++));
|
||||
}
|
||||
}
|
||||
else {
|
||||
// copy repeated sequence
|
||||
if (matchOffset < SIZE_OF_LONG) {
|
||||
// 8 bytes apart so that we can copy long-at-a-time below
|
||||
int increment32 = DEC_32_TABLE[matchOffset];
|
||||
int decrement64 = DEC_64_TABLE[matchOffset];
|
||||
|
||||
UNSAFE.putByte(outputBase, output, UNSAFE.getByte(outputBase, matchAddress));
|
||||
UNSAFE.putByte(outputBase, output + 1, UNSAFE.getByte(outputBase, matchAddress + 1));
|
||||
UNSAFE.putByte(outputBase, output + 2, UNSAFE.getByte(outputBase, matchAddress + 2));
|
||||
UNSAFE.putByte(outputBase, output + 3, UNSAFE.getByte(outputBase, matchAddress + 3));
|
||||
output += SIZE_OF_INT;
|
||||
matchAddress += increment32;
|
||||
|
||||
UNSAFE.putInt(outputBase, output, UNSAFE.getInt(outputBase, matchAddress));
|
||||
output += SIZE_OF_INT;
|
||||
matchAddress -= decrement64;
|
||||
}
|
||||
else {
|
||||
UNSAFE.putLong(outputBase, output, UNSAFE.getLong(outputBase, matchAddress));
|
||||
matchAddress += SIZE_OF_LONG;
|
||||
output += SIZE_OF_LONG;
|
||||
}
|
||||
|
||||
if (matchOutputLimit > fastOutputLimit) {
|
||||
if (matchOutputLimit > outputLimit) {
|
||||
throw new SnappyError(SnappyErrorCode.PARSING_ERROR, String.format("position: %d", input - inputAddress));
|
||||
}
|
||||
|
||||
while (output < fastOutputLimit) {
|
||||
UNSAFE.putLong(outputBase, output, UNSAFE.getLong(outputBase, matchAddress));
|
||||
matchAddress += SIZE_OF_LONG;
|
||||
output += SIZE_OF_LONG;
|
||||
}
|
||||
|
||||
while (output < matchOutputLimit) {
|
||||
UNSAFE.putByte(outputBase, output++, UNSAFE.getByte(outputBase, matchAddress++));
|
||||
}
|
||||
}
|
||||
else {
|
||||
while (output < matchOutputLimit) {
|
||||
UNSAFE.putLong(outputBase, output, UNSAFE.getLong(outputBase, matchAddress));
|
||||
matchAddress += SIZE_OF_LONG;
|
||||
output += SIZE_OF_LONG;
|
||||
}
|
||||
}
|
||||
}
|
||||
output = matchOutputLimit; // correction in case we over-copied
|
||||
}
|
||||
}
|
||||
|
||||
return (int) (output - outputAddress);
|
||||
}
|
||||
|
||||
// Mapping from i in range [0,4] to a mask to extract the bottom 8*i bits
|
||||
private static final int[] wordmask = new int[] {
|
||||
0, 0xff, 0xffff, 0xffffff, 0xffffffff
|
||||
};
|
||||
|
||||
// Data stored per entry in lookup table:
|
||||
// Range Bits-used Description
|
||||
// ------------------------------------
|
||||
// 1..64 0..7 Literal/copy length encoded in opcode byte
|
||||
// 0..7 8..10 Copy offset encoded in opcode byte / 256
|
||||
// 0..4 11..13 Extra bytes after opcode
|
||||
//
|
||||
// We use eight bits for the length even though 7 would have sufficed
|
||||
// because of efficiency reasons:
|
||||
// (1) Extracting a byte is faster than a bit-field
|
||||
// (2) It properly aligns copy offset so we do not need a <<8
|
||||
private static final short[] opLookupTable = new short[] {
|
||||
0x0001, 0x0804, 0x1001, 0x2001, 0x0002, 0x0805, 0x1002, 0x2002,
|
||||
0x0003, 0x0806, 0x1003, 0x2003, 0x0004, 0x0807, 0x1004, 0x2004,
|
||||
0x0005, 0x0808, 0x1005, 0x2005, 0x0006, 0x0809, 0x1006, 0x2006,
|
||||
0x0007, 0x080a, 0x1007, 0x2007, 0x0008, 0x080b, 0x1008, 0x2008,
|
||||
0x0009, 0x0904, 0x1009, 0x2009, 0x000a, 0x0905, 0x100a, 0x200a,
|
||||
0x000b, 0x0906, 0x100b, 0x200b, 0x000c, 0x0907, 0x100c, 0x200c,
|
||||
0x000d, 0x0908, 0x100d, 0x200d, 0x000e, 0x0909, 0x100e, 0x200e,
|
||||
0x000f, 0x090a, 0x100f, 0x200f, 0x0010, 0x090b, 0x1010, 0x2010,
|
||||
0x0011, 0x0a04, 0x1011, 0x2011, 0x0012, 0x0a05, 0x1012, 0x2012,
|
||||
0x0013, 0x0a06, 0x1013, 0x2013, 0x0014, 0x0a07, 0x1014, 0x2014,
|
||||
0x0015, 0x0a08, 0x1015, 0x2015, 0x0016, 0x0a09, 0x1016, 0x2016,
|
||||
0x0017, 0x0a0a, 0x1017, 0x2017, 0x0018, 0x0a0b, 0x1018, 0x2018,
|
||||
0x0019, 0x0b04, 0x1019, 0x2019, 0x001a, 0x0b05, 0x101a, 0x201a,
|
||||
0x001b, 0x0b06, 0x101b, 0x201b, 0x001c, 0x0b07, 0x101c, 0x201c,
|
||||
0x001d, 0x0b08, 0x101d, 0x201d, 0x001e, 0x0b09, 0x101e, 0x201e,
|
||||
0x001f, 0x0b0a, 0x101f, 0x201f, 0x0020, 0x0b0b, 0x1020, 0x2020,
|
||||
0x0021, 0x0c04, 0x1021, 0x2021, 0x0022, 0x0c05, 0x1022, 0x2022,
|
||||
0x0023, 0x0c06, 0x1023, 0x2023, 0x0024, 0x0c07, 0x1024, 0x2024,
|
||||
0x0025, 0x0c08, 0x1025, 0x2025, 0x0026, 0x0c09, 0x1026, 0x2026,
|
||||
0x0027, 0x0c0a, 0x1027, 0x2027, 0x0028, 0x0c0b, 0x1028, 0x2028,
|
||||
0x0029, 0x0d04, 0x1029, 0x2029, 0x002a, 0x0d05, 0x102a, 0x202a,
|
||||
0x002b, 0x0d06, 0x102b, 0x202b, 0x002c, 0x0d07, 0x102c, 0x202c,
|
||||
0x002d, 0x0d08, 0x102d, 0x202d, 0x002e, 0x0d09, 0x102e, 0x202e,
|
||||
0x002f, 0x0d0a, 0x102f, 0x202f, 0x0030, 0x0d0b, 0x1030, 0x2030,
|
||||
0x0031, 0x0e04, 0x1031, 0x2031, 0x0032, 0x0e05, 0x1032, 0x2032,
|
||||
0x0033, 0x0e06, 0x1033, 0x2033, 0x0034, 0x0e07, 0x1034, 0x2034,
|
||||
0x0035, 0x0e08, 0x1035, 0x2035, 0x0036, 0x0e09, 0x1036, 0x2036,
|
||||
0x0037, 0x0e0a, 0x1037, 0x2037, 0x0038, 0x0e0b, 0x1038, 0x2038,
|
||||
0x0039, 0x0f04, 0x1039, 0x2039, 0x003a, 0x0f05, 0x103a, 0x203a,
|
||||
0x003b, 0x0f06, 0x103b, 0x203b, 0x003c, 0x0f07, 0x103c, 0x203c,
|
||||
0x0801, 0x0f08, 0x103d, 0x203d, 0x1001, 0x0f09, 0x103e, 0x203e,
|
||||
0x1801, 0x0f0a, 0x103f, 0x203f, 0x2001, 0x0f0b, 0x1040, 0x2040
|
||||
};
|
||||
|
||||
/**
|
||||
* Reads the variable length integer encoded a the specified offset, and
|
||||
* returns this length with the number of bytes read.
|
||||
*/
|
||||
static int[] readUncompressedLength(Object compressed, long compressedAddress, long compressedLimit)
|
||||
{
|
||||
int result;
|
||||
int bytesRead = 0;
|
||||
{
|
||||
int b = getUnsignedByteSafe(compressed, compressedAddress + bytesRead, compressedLimit);
|
||||
bytesRead++;
|
||||
result = b & 0x7f;
|
||||
if ((b & 0x80) != 0) {
|
||||
b = getUnsignedByteSafe(compressed, compressedAddress + bytesRead, compressedLimit);
|
||||
bytesRead++;
|
||||
result |= (b & 0x7f) << 7;
|
||||
if ((b & 0x80) != 0) {
|
||||
b = getUnsignedByteSafe(compressed, compressedAddress + bytesRead, compressedLimit);
|
||||
bytesRead++;
|
||||
result |= (b & 0x7f) << 14;
|
||||
if ((b & 0x80) != 0) {
|
||||
b = getUnsignedByteSafe(compressed, compressedAddress + bytesRead, compressedLimit);
|
||||
bytesRead++;
|
||||
result |= (b & 0x7f) << 21;
|
||||
if ((b & 0x80) != 0) {
|
||||
b = getUnsignedByteSafe(compressed, compressedAddress + bytesRead, compressedLimit);
|
||||
bytesRead++;
|
||||
result |= (b & 0x7f) << 28;
|
||||
if ((b & 0x80) != 0) {
|
||||
throw new SnappyError(SnappyErrorCode.PARSING_ERROR, String.format("position: %d, error: %s", compressedAddress + bytesRead, "last byte of compressed length int has high bit set"));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return new int[] {result, bytesRead};
|
||||
}
|
||||
|
||||
private static int getUnsignedByteSafe(Object base, long address, long limit)
|
||||
{
|
||||
if (address >= limit) {
|
||||
throw new SnappyError(SnappyErrorCode.PARSING_ERROR, String.format("position: %d, error: %s", limit - address, "Input is truncated"));
|
||||
}
|
||||
return UNSAFE.getByte(base, address) & 0xFF;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,69 @@
|
|||
/*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.xerial.snappy.pure;
|
||||
|
||||
import org.xerial.snappy.SnappyError;
|
||||
import org.xerial.snappy.SnappyErrorCode;
|
||||
import sun.misc.Unsafe;
|
||||
|
||||
import java.lang.reflect.Field;
|
||||
import java.nio.Buffer;
|
||||
import java.nio.ByteOrder;
|
||||
|
||||
import static java.lang.String.format;
|
||||
|
||||
final class UnsafeUtil
|
||||
{
|
||||
public static final Unsafe UNSAFE;
|
||||
private static final Field ADDRESS_ACCESSOR;
|
||||
|
||||
private UnsafeUtil()
|
||||
{
|
||||
}
|
||||
|
||||
static {
|
||||
ByteOrder order = ByteOrder.nativeOrder();
|
||||
if (!order.equals(ByteOrder.LITTLE_ENDIAN)) {
|
||||
throw new SnappyError(SnappyErrorCode.UNSUPPORTED_PLATFORM, format("pure-java snappy requires a little endian platform (found %s)", order));
|
||||
}
|
||||
|
||||
try {
|
||||
Field theUnsafe = Unsafe.class.getDeclaredField("theUnsafe");
|
||||
theUnsafe.setAccessible(true);
|
||||
UNSAFE = (Unsafe) theUnsafe.get(null);
|
||||
}
|
||||
catch (Exception e) {
|
||||
throw new SnappyError(SnappyErrorCode.UNSUPPORTED_PLATFORM, "pure-java snappy requires access to sun.misc.Unsafe");
|
||||
}
|
||||
|
||||
try {
|
||||
Field field = Buffer.class.getDeclaredField("address");
|
||||
field.setAccessible(true);
|
||||
ADDRESS_ACCESSOR = field;
|
||||
}
|
||||
catch (Exception e) {
|
||||
throw new SnappyError(SnappyErrorCode.UNSUPPORTED_PLATFORM, "pure-java snappy requires access to java.nio.Buffer raw address field");
|
||||
}
|
||||
}
|
||||
|
||||
public static long getAddress(Buffer buffer)
|
||||
{
|
||||
try {
|
||||
return (long) ADDRESS_ACCESSOR.get(buffer);
|
||||
}
|
||||
catch (IllegalAccessException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,25 @@
|
|||
package org.xerial.snappy;
|
||||
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class PureJavaSnappyInputStreamTest
|
||||
extends SnappyInputStreamTest
|
||||
{
|
||||
@BeforeClass
|
||||
public static void setUp()
|
||||
throws Exception
|
||||
{
|
||||
PureJavaSnappyTestUtil.setUp();
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
public static void tearDown()
|
||||
throws Exception
|
||||
{
|
||||
PureJavaSnappyTestUtil.tearDown();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,25 @@
|
|||
package org.xerial.snappy;
|
||||
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class PureJavaSnappyOutputStreamTest
|
||||
extends SnappyOutputStreamTest
|
||||
{
|
||||
@BeforeClass
|
||||
public static void setUp()
|
||||
throws Exception
|
||||
{
|
||||
PureJavaSnappyTestUtil.setUp();
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
public static void tearDown()
|
||||
throws Exception
|
||||
{
|
||||
PureJavaSnappyTestUtil.tearDown();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,25 @@
|
|||
package org.xerial.snappy;
|
||||
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class PureJavaSnappyTest
|
||||
extends SnappyTest
|
||||
{
|
||||
@BeforeClass
|
||||
public static void setUp()
|
||||
throws Exception
|
||||
{
|
||||
PureJavaSnappyTestUtil.setUp();
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
public static void tearDown()
|
||||
throws Exception
|
||||
{
|
||||
PureJavaSnappyTestUtil.tearDown();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,34 @@
|
|||
package org.xerial.snappy;
|
||||
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
import org.xerial.util.log.Logger;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public abstract class PureJavaSnappyTestUtil
|
||||
{
|
||||
private static Logger _logger = Logger.getLogger(PureJavaSnappyTestUtil.class);
|
||||
|
||||
@BeforeClass
|
||||
public static void setUp()
|
||||
throws Exception
|
||||
{
|
||||
_logger.info("Loading pure-java Snappy");
|
||||
Snappy.cleanUp();
|
||||
System.setProperty(SnappyLoader.KEY_SNAPPY_PUREJAVA, "true");
|
||||
SnappyLoader.loadSnappyApi();
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
public static void tearDown()
|
||||
throws Exception
|
||||
{
|
||||
System.setProperty(SnappyLoader.KEY_SNAPPY_PUREJAVA, "false");
|
||||
Snappy.cleanUp();
|
||||
_logger.info("Unloading pure-java Snappy");
|
||||
// Initialize Snappy again for the other tests
|
||||
Snappy.init();
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue