diff --git a/README.md b/README.md index e98b8eb..c205655 100755 --- a/README.md +++ b/README.md @@ -169,6 +169,12 @@ When building on Solaris, use `gmake`: A file `target/snappy-java-$(version).jar` is the product additionally containing the native library built for your platform. ## Miscellaneous Notes + +### Using pure-java Snappy implementation + +snappy-java can optionally use a pure-java implementation of Snappy based on [aircompressor](https://github.com/airlift/aircompressor/tree/master/src/main/java/io/airlift/compress/snappy). This implementation is selected when no native Snappy library for your platform is found. You can also force using this pure-java implementation by setting a JVM property `org.xerial.snappy.purejava=true` before loading any class of Snappy (e.g., using `-Dorg.xerial.snappy.purejava=true` option when launching JVM). + + ### Using snappy-java with Tomcat 6 (or higher) Web Server Simply put the snappy-java's jar to WEB-INF/lib folder of your web application. Usual JNI-library specific problem no longer exists since snappy-java version 1.0.3 or higher can be loaded by multiple class loaders. diff --git a/build.sbt b/build.sbt index 75af171..7ae9886 100644 --- a/build.sbt +++ b/build.sbt @@ -111,7 +111,7 @@ enablePlugins(SbtOsgi) osgiSettings -OsgiKeys.exportPackage := Seq("org.xerial.snappy", "org.xerial.snappy.buffer", "org.xerial.snappy.pool") +OsgiKeys.exportPackage := Seq("org.xerial.snappy", "org.xerial.snappy.buffer", "org.xerial.snappy.pool", "org.xerial.snappy.pure") OsgiKeys.bundleSymbolicName := "org.xerial.snappy.snappy-java" OsgiKeys.bundleActivator := Option("org.xerial.snappy.SnappyBundleActivator") OsgiKeys.importPackage := Seq("""org.osgi.framework;version="[1.5,2)"""") diff --git a/src/main/java/org/xerial/snappy/Snappy.java b/src/main/java/org/xerial/snappy/Snappy.java index 31d0d2e..c7d7ce8 100755 --- a/src/main/java/org/xerial/snappy/Snappy.java +++ b/src/main/java/org/xerial/snappy/Snappy.java @@ -19,11 +19,13 @@ // Snappy.java // Since: 2011/03/29 // -// $URL$ +// $URL$ // $Author$ //-------------------------------------- package org.xerial.snappy; +import org.xerial.snappy.pure.PureJavaSnappy; + import java.io.IOException; import java.io.InputStream; import java.io.UnsupportedEncodingException; @@ -43,18 +45,13 @@ import java.util.Properties; public class Snappy { static { - try { - impl = SnappyLoader.loadSnappyApi(); - } - catch (Exception e) { - throw new ExceptionInInitializerError(e); - } + init(); } /** * An instance of SnappyNative */ - private static SnappyNative impl; + private static SnappyApi impl; /** * Clean up a temporary file (native lib) generated by snappy-java. @@ -69,6 +66,15 @@ public class Snappy SnappyLoader.setSnappyApi(null); } + static void init() { + try { + impl = SnappyLoader.loadSnappyApi(); + } + catch (Exception e) { + throw new ExceptionInInitializerError(e); + } + } + /** * Copy bytes from source to destination * diff --git a/src/main/java/org/xerial/snappy/SnappyApi.java b/src/main/java/org/xerial/snappy/SnappyApi.java new file mode 100644 index 0000000..ab7f28a --- /dev/null +++ b/src/main/java/org/xerial/snappy/SnappyApi.java @@ -0,0 +1,60 @@ +package org.xerial.snappy; + +import java.io.IOException; +import java.nio.ByteBuffer; + +/** + * Snappy compressor/decompressor interface. The implementation can be JNI binding or pure-java Snappy implementation. + */ +public interface SnappyApi +{ + // ------------------------------------------------------------------------ + // Generic compression/decompression routines. + // ------------------------------------------------------------------------ + long rawCompress(long inputAddr, long inputSize, long destAddr) + throws IOException; + + long rawUncompress(long inputAddr, long inputSize, long destAddr) + throws IOException; + + int rawCompress(ByteBuffer input, int inputOffset, int inputLength, ByteBuffer compressed, + int outputOffset) + throws IOException; + + int rawCompress(Object input, int inputOffset, int inputByteLength, Object output, int outputOffset) + throws IOException; + + int rawUncompress(ByteBuffer compressed, int inputOffset, int inputLength, ByteBuffer uncompressed, + int outputOffset) + throws IOException; + + int rawUncompress(Object input, int inputOffset, int inputLength, Object output, int outputOffset) + throws IOException; + + // Returns the maximal size of the compressed representation of + // input data that is "source_bytes" bytes in length; + int maxCompressedLength(int source_bytes); + + // This operation takes O(1) time. + int uncompressedLength(ByteBuffer compressed, int offset, int len) + throws IOException; + + int uncompressedLength(Object input, int offset, int len) + throws IOException; + + long uncompressedLength(long inputAddr, long len) + throws IOException; + + boolean isValidCompressedBuffer(ByteBuffer compressed, int offset, int len) + throws IOException; + + boolean isValidCompressedBuffer(Object input, int offset, int len) + throws IOException; + + boolean isValidCompressedBuffer(long inputAddr, long offset, long len) + throws IOException; + + void arrayCopy(Object src, int offset, int byteLength, Object dest, int dOffset) + throws IOException; + +} diff --git a/src/main/java/org/xerial/snappy/SnappyErrorCode.java b/src/main/java/org/xerial/snappy/SnappyErrorCode.java index 4325b02..24d5b18 100755 --- a/src/main/java/org/xerial/snappy/SnappyErrorCode.java +++ b/src/main/java/org/xerial/snappy/SnappyErrorCode.java @@ -41,7 +41,8 @@ public enum SnappyErrorCode FAILED_TO_UNCOMPRESS(5), EMPTY_INPUT(6), INCOMPATIBLE_VERSION(7), - INVALID_CHUNK_SIZE(8); + INVALID_CHUNK_SIZE(8), + UNSUPPORTED_PLATFORM(9); public final int id; diff --git a/src/main/java/org/xerial/snappy/SnappyLoader.java b/src/main/java/org/xerial/snappy/SnappyLoader.java index 27855af..f1d0665 100755 --- a/src/main/java/org/xerial/snappy/SnappyLoader.java +++ b/src/main/java/org/xerial/snappy/SnappyLoader.java @@ -19,11 +19,13 @@ // SnappyLoader.java // Since: 2011/03/29 // -// $URL$ +// $URL$ // $Author$ //-------------------------------------- package org.xerial.snappy; +import org.xerial.snappy.pure.PureJavaSnappy; + import java.io.*; import java.net.URL; import java.util.Enumeration; @@ -75,13 +77,14 @@ public class SnappyLoader public static final String SNAPPY_SYSTEM_PROPERTIES_FILE = "org-xerial-snappy.properties"; public static final String KEY_SNAPPY_LIB_PATH = "org.xerial.snappy.lib.path"; public static final String KEY_SNAPPY_LIB_NAME = "org.xerial.snappy.lib.name"; + public static final String KEY_SNAPPY_PUREJAVA = "org.xerial.snappy.purejava"; public static final String KEY_SNAPPY_TEMPDIR = "org.xerial.snappy.tempdir"; public static final String KEY_SNAPPY_USE_SYSTEMLIB = "org.xerial.snappy.use.systemlib"; public static final String KEY_SNAPPY_DISABLE_BUNDLED_LIBS = "org.xerial.snappy.disable.bundled.libs"; // Depreciated, but preserved for backward compatibility private static boolean isLoaded = false; - private static volatile SnappyNative snappyApi = null; + private static volatile SnappyApi snappyApi = null; private static volatile BitShuffleNative bitshuffleApi = null; private static File nativeLibFile = null; @@ -101,11 +104,11 @@ public class SnappyLoader /** * Set the `snappyApi` instance. * - * @param nativeCode + * @param apiImpl */ - static synchronized void setSnappyApi(SnappyNative nativeCode) + static synchronized void setSnappyApi(SnappyApi apiImpl) { - snappyApi = nativeCode; + snappyApi = apiImpl; } /** @@ -146,13 +149,29 @@ public class SnappyLoader loadSnappySystemProperties(); } - static synchronized SnappyNative loadSnappyApi() + static synchronized boolean isPureJava() { + return snappyApi != null && PureJavaSnappy.class.isAssignableFrom(snappyApi.getClass()); + } + + static synchronized SnappyApi loadSnappyApi() { if (snappyApi != null) { return snappyApi; } - loadNativeLibrary(); - setSnappyApi(new SnappyNative()); + try { + if(Boolean.parseBoolean(System.getProperty(KEY_SNAPPY_PUREJAVA, "false"))) { + // Use pure-java Snappy implementation + setSnappyApi(new PureJavaSnappy()); + } + else { + loadNativeLibrary(); + setSnappyApi(new SnappyNative()); + } + } + catch(Exception e) { + // Fall-back to pure-java Snappy implementation + setSnappyApi(new PureJavaSnappy()); + } return snappyApi; } @@ -311,7 +330,7 @@ public class SnappyLoader String snappyNativeLibraryPath = System.getProperty(KEY_SNAPPY_LIB_PATH); String snappyNativeLibraryName = System.getProperty(KEY_SNAPPY_LIB_NAME); - // Resolve the library file name with a suffix (e.g., dll, .so, etc.) + // Resolve the library file name with a suffix (e.g., dll, .so, etc.) if (snappyNativeLibraryName == null) { snappyNativeLibraryName = System.mapLibraryName("snappyjava"); } diff --git a/src/main/java/org/xerial/snappy/SnappyNative.java b/src/main/java/org/xerial/snappy/SnappyNative.java index 95a6f41..98ae1d9 100755 --- a/src/main/java/org/xerial/snappy/SnappyNative.java +++ b/src/main/java/org/xerial/snappy/SnappyNative.java @@ -19,7 +19,7 @@ // SnappyNative.java // Since: 2011/03/30 // -// $URL$ +// $URL$ // $Author$ //-------------------------------------- package org.xerial.snappy; @@ -38,7 +38,7 @@ import java.nio.ByteBuffer; * * @author leo */ -public class SnappyNative +public class SnappyNative implements SnappyApi { public native String nativeLibraryVersion(); @@ -46,49 +46,63 @@ public class SnappyNative // ------------------------------------------------------------------------ // Generic compression/decompression routines. // ------------------------------------------------------------------------ + @Override public native long rawCompress(long inputAddr, long inputSize, long destAddr) throws IOException; + @Override public native long rawUncompress(long inputAddr, long inputSize, long destAddr) throws IOException; + @Override public native int rawCompress(ByteBuffer input, int inputOffset, int inputLength, ByteBuffer compressed, int outputOffset) throws IOException; + @Override public native int rawCompress(Object input, int inputOffset, int inputByteLength, Object output, int outputOffset) throws IOException; + @Override public native int rawUncompress(ByteBuffer compressed, int inputOffset, int inputLength, ByteBuffer uncompressed, int outputOffset) throws IOException; + @Override public native int rawUncompress(Object input, int inputOffset, int inputLength, Object output, int outputOffset) throws IOException; // Returns the maximal size of the compressed representation of // input data that is "source_bytes" bytes in length; + @Override public native int maxCompressedLength(int source_bytes); // This operation takes O(1) time. + @Override public native int uncompressedLength(ByteBuffer compressed, int offset, int len) throws IOException; + @Override public native int uncompressedLength(Object input, int offset, int len) throws IOException; + @Override public native long uncompressedLength(long inputAddr, long len) throws IOException; + @Override public native boolean isValidCompressedBuffer(ByteBuffer compressed, int offset, int len) throws IOException; + @Override public native boolean isValidCompressedBuffer(Object input, int offset, int len) throws IOException; + @Override public native boolean isValidCompressedBuffer(long inputAddr, long offset, long len) throws IOException; + @Override public native void arrayCopy(Object src, int offset, int byteLength, Object dest, int dOffset) throws IOException; diff --git a/src/main/java/org/xerial/snappy/pure/PureJavaSnappy.java b/src/main/java/org/xerial/snappy/pure/PureJavaSnappy.java new file mode 100644 index 0000000..faf54ef --- /dev/null +++ b/src/main/java/org/xerial/snappy/pure/PureJavaSnappy.java @@ -0,0 +1,244 @@ +package org.xerial.snappy.pure; + +import org.xerial.snappy.SnappyApi; + +import java.io.IOException; +import java.nio.ByteBuffer; + +import static org.xerial.snappy.pure.UnsafeUtil.getAddress; +import static sun.misc.Unsafe.ARRAY_BYTE_BASE_OFFSET; + +/** + * A pure-java Snappy implementation using https://github.com/airlift/aircompressor + */ +public class PureJavaSnappy implements SnappyApi +{ + private final short[] table = new short[SnappyRawCompressor.MAX_HASH_TABLE_SIZE]; + private final static int MAX_OUTPUT_LENGTH = Integer.MAX_VALUE; + + @Override + public long rawCompress(long inputAddr, long inputSize, long destAddr) + throws IOException + { + return SnappyRawCompressor.compress(null, inputAddr, inputSize, null, destAddr, MAX_OUTPUT_LENGTH, table); + } + + @Override + public long rawUncompress(long inputAddr, long inputSize, long destAddr) + throws IOException + { + return SnappyRawDecompressor.decompress(null, inputAddr, inputSize, null, destAddr, MAX_OUTPUT_LENGTH); + } + + @Override + public int rawCompress(ByteBuffer input, int inputOffset, int inputLength, ByteBuffer compressed, int outputOffset) + throws IOException + { + Object inputBase; + long inputAddress; + long inputLimit; + if (input.isDirect()) { + inputBase = null; + long address = getAddress(input); + inputAddress = address + input.position(); + inputLimit = address + input.limit(); + } + else if (input.hasArray()) { + inputBase = input.array(); + inputAddress = ARRAY_BYTE_BASE_OFFSET + input.arrayOffset() + input.position(); + inputLimit = ARRAY_BYTE_BASE_OFFSET + input.arrayOffset() + input.limit(); + } + else { + throw new IllegalArgumentException("Unsupported input ByteBuffer implementation " + input.getClass().getName()); + } + + Object outputBase; + long outputAddress; + long outputLimit; + if (compressed.isDirect()) { + outputBase = null; + long address = getAddress(compressed); + outputAddress = address + compressed.position(); + outputLimit = address + compressed.limit(); + } + else if (compressed.hasArray()) { + outputBase = compressed.array(); + outputAddress = ARRAY_BYTE_BASE_OFFSET + compressed.arrayOffset() + compressed.position(); + outputLimit = ARRAY_BYTE_BASE_OFFSET + compressed.arrayOffset() + compressed.limit(); + } + else { + throw new IllegalArgumentException("Unsupported output ByteBuffer implementation " + compressed.getClass().getName()); + } + + // HACK: Assure JVM does not collect Slice wrappers while compressing, since the + // collection may trigger freeing of the underlying memory resulting in a segfault + // There is no other known way to signal to the JVM that an object should not be + // collected in a block, and technically, the JVM is allowed to eliminate these locks. + synchronized (input) { + synchronized (compressed) { + int written = SnappyRawCompressor.compress( + inputBase, + inputAddress, + inputLimit, + outputBase, + outputAddress, + outputLimit, + table); + compressed.position(compressed.position() + written); + return written; + } + } + } + + @Override + public int rawCompress(Object input, int inputOffset, int inputByteLength, Object output, int outputOffset) + throws IOException + { + long inputAddress = ARRAY_BYTE_BASE_OFFSET + inputOffset; + long inputLimit = inputAddress + inputByteLength; + long outputAddress = ARRAY_BYTE_BASE_OFFSET + outputOffset; + long outputLimit = outputAddress + MAX_OUTPUT_LENGTH; + + return SnappyRawCompressor.compress(input, inputAddress, inputLimit, output, outputAddress, outputLimit, table); + } + + @Override + public int rawUncompress(ByteBuffer compressed, int inputOffset, int inputLength, ByteBuffer uncompressed, int outputOffset) + throws IOException + { + Object inputBase; + long inputAddress; + long inputLimit; + if (compressed.isDirect()) { + inputBase = null; + long address = getAddress(compressed); + inputAddress = address + compressed.position(); + inputLimit = address + compressed.limit(); + } + else if (compressed.hasArray()) { + inputBase = compressed.array(); + inputAddress = ARRAY_BYTE_BASE_OFFSET + compressed.arrayOffset() + compressed.position(); + inputLimit = ARRAY_BYTE_BASE_OFFSET + compressed.arrayOffset() + compressed.limit(); + } + else { + throw new IllegalArgumentException("Unsupported input ByteBuffer implementation " + compressed.getClass().getName()); + } + + Object outputBase; + long outputAddress; + long outputLimit; + if (uncompressed.isDirect()) { + outputBase = null; + long address = getAddress(uncompressed); + outputAddress = address + uncompressed.position(); + outputLimit = address + uncompressed.limit(); + } + else if (uncompressed.hasArray()) { + outputBase = uncompressed.array(); + outputAddress = ARRAY_BYTE_BASE_OFFSET + uncompressed.arrayOffset() + uncompressed.position(); + outputLimit = ARRAY_BYTE_BASE_OFFSET + uncompressed.arrayOffset() + uncompressed.limit(); + } + else { + throw new IllegalArgumentException("Unsupported output ByteBuffer implementation " + uncompressed.getClass().getName()); + } + + // HACK: Assure JVM does not collect Slice wrappers while decompressing, since the + // collection may trigger freeing of the underlying memory resulting in a segfault + // There is no other known way to signal to the JVM that an object should not be + // collected in a block, and technically, the JVM is allowed to eliminate these locks. + synchronized (compressed) { + synchronized (uncompressed) { + int written = SnappyRawDecompressor.decompress(inputBase, inputAddress, inputLimit, outputBase, outputAddress, outputLimit); + uncompressed.position(uncompressed.position() + written); + return written; + } + } + } + + @Override + public int rawUncompress(Object input, int inputOffset, int inputLength, Object output, int outputOffset) + throws IOException + { + long inputAddress = ARRAY_BYTE_BASE_OFFSET + inputOffset; + long inputLimit = inputAddress + inputLength; + long outputAddress = ARRAY_BYTE_BASE_OFFSET + outputOffset; + long outputLimit = outputAddress + MAX_OUTPUT_LENGTH; + + return SnappyRawDecompressor.decompress(input, inputAddress, inputLimit, output, outputAddress, outputLimit); + } + + @Override + public int maxCompressedLength(int source_bytes) + { + return SnappyRawCompressor.maxCompressedLength(source_bytes); + } + + @Override + public int uncompressedLength(ByteBuffer compressed, int offset, int len) + throws IOException + { + Object inputBase; + long inputAddress; + long inputLimit; + if(compressed.isDirect()) { + inputBase = null; + long address = getAddress(compressed); + inputAddress = address + compressed.position(); + inputLimit = address + compressed.limit(); + } + else if (compressed.hasArray()){ + inputBase = compressed.array(); + inputAddress = ARRAY_BYTE_BASE_OFFSET + offset; + inputLimit = ARRAY_BYTE_BASE_OFFSET + len; + } + else { + throw new IllegalArgumentException("Unsupported input ByteBuffer implementation: " + compressed.getClass().getName()); + } + return SnappyRawDecompressor.getUncompressedLength(inputBase, inputAddress, inputLimit); + } + + @Override + public int uncompressedLength(Object input, int offset, int len) + throws IOException + { + long compressedAddress = ARRAY_BYTE_BASE_OFFSET + offset; + long compressedLimit = ARRAY_BYTE_BASE_OFFSET + len; + + return SnappyRawDecompressor.getUncompressedLength(input, compressedAddress, compressedLimit); + } + + @Override + public long uncompressedLength(long inputAddr, long len) + throws IOException + { + return SnappyRawDecompressor.getUncompressedLength(null, inputAddr, inputAddr + len); + } + + @Override + public boolean isValidCompressedBuffer(ByteBuffer compressed, int offset, int len) + throws IOException + { + throw new UnsupportedOperationException("isValidCompressedBuffer is not supported in pure-java mode"); + } + + @Override + public boolean isValidCompressedBuffer(Object input, int offset, int len) + throws IOException + { + throw new UnsupportedOperationException("isValidCompressedBuffer is not supported in pure-java mode"); + } + + @Override + public boolean isValidCompressedBuffer(long inputAddr, long offset, long len) + throws IOException + { + throw new UnsupportedOperationException("isValidCompressedBuffer is not supported in pure-java mode"); + } + + @Override + public void arrayCopy(Object src, int offset, int byteLength, Object dest, int dOffset) + throws IOException + { + System.arraycopy(src, offset, dest, dOffset, byteLength); + } +} diff --git a/src/main/java/org/xerial/snappy/pure/SnappyConstants.java b/src/main/java/org/xerial/snappy/pure/SnappyConstants.java new file mode 100644 index 0000000..c2110a8 --- /dev/null +++ b/src/main/java/org/xerial/snappy/pure/SnappyConstants.java @@ -0,0 +1,29 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.xerial.snappy.pure; + +final class SnappyConstants +{ + static final int SIZE_OF_SHORT = 2; + static final int SIZE_OF_INT = 4; + static final int SIZE_OF_LONG = 8; + + static final int LITERAL = 0; + static final int COPY_1_BYTE_OFFSET = 1; // 3 bit length + 3 bits of offset in opcode + static final int COPY_2_BYTE_OFFSET = 2; + + private SnappyConstants() + { + } +} diff --git a/src/main/java/org/xerial/snappy/pure/SnappyRawCompressor.java b/src/main/java/org/xerial/snappy/pure/SnappyRawCompressor.java new file mode 100644 index 0000000..e58b46e --- /dev/null +++ b/src/main/java/org/xerial/snappy/pure/SnappyRawCompressor.java @@ -0,0 +1,409 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.xerial.snappy.pure; +import java.util.Arrays; + +import static org.xerial.snappy.pure.SnappyConstants.COPY_1_BYTE_OFFSET; +import static org.xerial.snappy.pure.SnappyConstants.COPY_2_BYTE_OFFSET; +import static org.xerial.snappy.pure.SnappyConstants.SIZE_OF_INT; +import static org.xerial.snappy.pure.SnappyConstants.SIZE_OF_LONG; +import static org.xerial.snappy.pure.SnappyConstants.SIZE_OF_SHORT; +import static org.xerial.snappy.pure.UnsafeUtil.UNSAFE; + +public final class SnappyRawCompressor +{ + // The size of a compression block. Note that many parts of the compression + // code assumes that BLOCK_SIZE <= 65536; in particular, the hash table + // can only store 16-bit offsets, and EmitCopy() also assumes the offset + // is 65535 bytes or less. Note also that if you change this, it will + // affect the framing format (see framing_format.txt). + // + // Note that there might be older data around that is compressed with larger + // block sizes, so the decompression code should not rely on the + // non-existence of long back-references. + private static final int BLOCK_LOG = 16; + private static final int BLOCK_SIZE = 1 << BLOCK_LOG; + + private static final int INPUT_MARGIN_BYTES = 15; + + private static final int MAX_HASH_TABLE_BITS = 14; + public static final int MAX_HASH_TABLE_SIZE = 1 << MAX_HASH_TABLE_BITS; + + private SnappyRawCompressor() {} + + public static int maxCompressedLength(int sourceLength) + { + // Compressed data can be defined as: + // compressed := item* literal* + // item := literal* copy + // + // The trailing literal sequence has a space blowup of at most 62/60 + // since a literal of length 60 needs one tag byte + one extra byte + // for length information. + // + // Item blowup is trickier to measure. Suppose the "copy" op copies + // 4 bytes of data. Because of a special check in the encoding code, + // we produce a 4-byte copy only if the offset is < 65536. Therefore + // the copy op takes 3 bytes to encode, and this type of item leads + // to at most the 62/60 blowup for representing literals. + // + // Suppose the "copy" op copies 5 bytes of data. If the offset is big + // enough, it will take 5 bytes to encode the copy op. Therefore the + // worst case here is a one-byte literal followed by a five-byte copy. + // I.e., 6 bytes of input turn into 7 bytes of "compressed" data. + // + // This last factor dominates the blowup, so the final estimate is: + return 32 + sourceLength + sourceLength / 6; + } + + // suppress warnings is required to use assert + @SuppressWarnings("IllegalToken") + public static int compress( + final Object inputBase, + final long inputAddress, + final long inputLimit, + final Object outputBase, + final long outputAddress, + final long outputLimit, + final short[] table) + { + // The compression code assumes output is larger than the max compression size (with 32 bytes of + // extra padding), and does not check bounds for writing to output. + int maxCompressedLength = maxCompressedLength((int) (inputLimit - inputAddress)); + if (outputLimit - outputAddress < maxCompressedLength) { + throw new IllegalArgumentException("Output buffer must be at least " + maxCompressedLength + " bytes"); + } + + // First write the uncompressed size to the output as a variable length int + long output = writeUncompressedLength(outputBase, outputAddress, (int) (inputLimit - inputAddress)); + + for (long blockAddress = inputAddress; blockAddress < inputLimit; blockAddress += BLOCK_SIZE) { + final long blockLimit = Math.min(inputLimit, blockAddress + BLOCK_SIZE); + long input = blockAddress; + assert blockLimit - blockAddress <= BLOCK_SIZE; + + int blockHashTableSize = getHashTableSize((int) (blockLimit - blockAddress)); + Arrays.fill(table, 0, blockHashTableSize, (short) 0); + + // todo given that hashTableSize is required to be a power of 2, this is overly complex + final int shift = 32 - log2Floor(blockHashTableSize); + assert (blockHashTableSize & (blockHashTableSize - 1)) == 0 : "table must be power of two"; + assert 0xFFFFFFFF >>> shift == blockHashTableSize - 1; + + // Bytes in [nextEmitAddress, input) will be emitted as literal bytes. Or + // [nextEmitAddress, inputLimit) after the main loop. + long nextEmitAddress = input; + + final long fastInputLimit = blockLimit - INPUT_MARGIN_BYTES; + while (input <= fastInputLimit) { + assert nextEmitAddress <= input; + + // The body of this loop emits a literal once and then emits a copy one + // or more times. (The exception is that when we're close to exhausting + // the input we exit and emit a literal.) + // + // In the first iteration of this loop we're just starting, so + // there's nothing to copy, so we must emit a literal once. And we + // only start a new iteration when the current iteration has determined + // that a literal will precede the next copy (if any). + // + // Step 1: Scan forward in the input looking for a 4-byte-long match. + // If we get close to exhausting the input exit and emit a final literal. + // + // Heuristic match skipping: If 32 bytes are scanned with no matches + // found, start looking only at every other byte. If 32 more bytes are + // scanned, look at every third byte, etc.. When a match is found, + // immediately go back to looking at every byte. This is a small loss + // (~5% performance, ~0.1% density) for compressible data due to more + // bookkeeping, but for non-compressible data (such as JPEG) it's a huge + // win since the compressor quickly "realizes" the data is incompressible + // and doesn't bother looking for matches everywhere. + // + // The "skip" variable keeps track of how many bytes there are since the + // last match; dividing it by 32 (ie. right-shifting by five) gives the + // number of bytes to move ahead for each iteration. + int skip = 32; + + long candidateIndex = 0; + for (input += 1; input + (skip >>> 5) <= fastInputLimit; input += ((skip++) >>> 5)) { + // hash the 4 bytes starting at the input pointer + int currentInt = UNSAFE.getInt(inputBase, input); + int hash = hashBytes(currentInt, shift); + + // get the position of a 4 bytes sequence with the same hash + candidateIndex = blockAddress + (table[hash] & 0xFFFF); + assert candidateIndex >= 0; + assert candidateIndex < input; + + // update the hash to point to the current position + table[hash] = (short) (input - blockAddress); + + // if the 4 byte sequence a the candidate index matches the sequence at the + // current position, proceed to the next phase + if (currentInt == UNSAFE.getInt(inputBase, candidateIndex)) { + break; + } + } + if (input + (skip >>> 5) > fastInputLimit) { + break; + } + + // Step 2: A 4-byte match has been found. We'll later see if more + // than 4 bytes match. But, prior to the match, input + // bytes [nextEmit, ip) are unmatched. Emit them as "literal bytes." + assert nextEmitAddress + 16 <= blockLimit; + + int literalLength = (int) (input - nextEmitAddress); + output = emitLiteralLength(outputBase, output, literalLength); + + // Fast copy can use 8 extra bytes of input and output, which is safe because: + // - The input will always have INPUT_MARGIN_BYTES = 15 extra available bytes + // - The output will always have 32 spare bytes (see MaxCompressedLength). + output = fastCopy(inputBase, nextEmitAddress, outputBase, output, literalLength); + + // Step 3: Call EmitCopy, and then see if another EmitCopy could + // be our next move. Repeat until we find no match for the + // input immediately after what was consumed by the last EmitCopy call. + // + // If we exit this loop normally then we need to call EmitLiteral next, + // though we don't yet know how big the literal will be. We handle that + // by proceeding to the next iteration of the main loop. We also can exit + // this loop via goto if we get close to exhausting the input. + int inputBytes; + do { + // We have a 4-byte match at input, and no need to emit any + // "literal bytes" prior to input. + assert (blockLimit >= input + SIZE_OF_INT); + + // determine match length + int matched = count(inputBase, input + SIZE_OF_INT, candidateIndex + SIZE_OF_INT, blockLimit); + matched += SIZE_OF_INT; + + // Emit the copy operation for this chunk + output = emitCopy(outputBase, output, input, candidateIndex, matched); + input += matched; + + // are we done? + if (input >= fastInputLimit) { + break; + } + + // We could immediately start working at input now, but to improve + // compression we first update table[Hash(ip - 1, ...)]. + long longValue = UNSAFE.getLong(inputBase, input - 1); + int prevInt = (int) longValue; + inputBytes = (int) (longValue >>> 8); + + // add hash starting with previous byte + int prevHash = hashBytes(prevInt, shift); + table[prevHash] = (short) (input - blockAddress - 1); + + // update hash of current byte + int curHash = hashBytes(inputBytes, shift); + + candidateIndex = blockAddress + (table[curHash] & 0xFFFF); + table[curHash] = (short) (input - blockAddress); + } while (inputBytes == UNSAFE.getInt(inputBase, candidateIndex)); + nextEmitAddress = input; + } + + // Emit the remaining bytes as a literal + if (nextEmitAddress < blockLimit) { + int literalLength = (int) (blockLimit - nextEmitAddress); + output = emitLiteralLength(outputBase, output, literalLength); + UNSAFE.copyMemory(inputBase, nextEmitAddress, outputBase, output, literalLength); + output += literalLength; + } + } + + return (int) (output - outputAddress); + } + + private static int count(Object inputBase, final long start, long matchStart, long matchLimit) + { + long current = start; + + // first, compare long at a time + while (current < matchLimit - (SIZE_OF_LONG - 1)) { + long diff = UNSAFE.getLong(inputBase, matchStart) ^ UNSAFE.getLong(inputBase, current); + if (diff != 0) { + current += Long.numberOfTrailingZeros(diff) >> 3; + return (int) (current - start); + } + + current += SIZE_OF_LONG; + matchStart += SIZE_OF_LONG; + } + + if (current < matchLimit - (SIZE_OF_INT - 1) && UNSAFE.getInt(inputBase, matchStart) == UNSAFE.getInt(inputBase, current)) { + current += SIZE_OF_INT; + matchStart += SIZE_OF_INT; + } + + if (current < matchLimit - (SIZE_OF_SHORT - 1) && UNSAFE.getShort(inputBase, matchStart) == UNSAFE.getShort(inputBase, current)) { + current += SIZE_OF_SHORT; + matchStart += SIZE_OF_SHORT; + } + + if (current < matchLimit && UNSAFE.getByte(inputBase, matchStart) == UNSAFE.getByte(inputBase, current)) { + ++current; + } + + return (int) (current - start); + } + + private static long emitLiteralLength(Object outputBase, long output, int literalLength) + { + int n = literalLength - 1; // Zero-length literals are disallowed + if (n < 60) { + // Size fits in tag byte + UNSAFE.putByte(outputBase, output++, (byte) (n << 2)); + } + else { + int bytes; + if (n < (1 << 8)) { + UNSAFE.putByte(outputBase, output++, (byte) (59 + 1 << 2)); + bytes = 1; + } + else if (n < (1 << 16)) { + UNSAFE.putByte(outputBase, output++, (byte) (59 + 2 << 2)); + bytes = 2; + } + else if (n < (1 << 24)) { + UNSAFE.putByte(outputBase, output++, (byte) (59 + 3 << 2)); + bytes = 3; + } + else { + UNSAFE.putByte(outputBase, output++, (byte) (59 + 4 << 2)); + bytes = 4; + } + // System is assumed to be little endian, so low bytes will be zero for the smaller numbers + UNSAFE.putInt(outputBase, output, n); + output += bytes; + } + return output; + } + + private static long fastCopy(final Object inputBase, long input, final Object outputBase, long output, final int literalLength) + { + final long outputLimit = output + literalLength; + do { + UNSAFE.putLong(outputBase, output, UNSAFE.getLong(inputBase, input)); + input += SIZE_OF_LONG; + output += SIZE_OF_LONG; + } + while (output < outputLimit); + return outputLimit; + } + + private static long emitCopy(Object outputBase, long output, long input, long matchIndex, int matchLength) + { + long offset = input - matchIndex; + + // Emit 64 byte copies but make sure to keep at least four bytes reserved + while (matchLength >= 68) { + UNSAFE.putByte(outputBase, output++, (byte) (COPY_2_BYTE_OFFSET + ((64 - 1) << 2))); + UNSAFE.putShort(outputBase, output, (short) offset); + output += SIZE_OF_SHORT; + matchLength -= 64; + } + + // Emit an extra 60 byte copy if have too much data to fit in one copy + // length < 68 + if (matchLength > 64) { + UNSAFE.putByte(outputBase, output++, (byte) (COPY_2_BYTE_OFFSET + ((60 - 1) << 2))); + UNSAFE.putShort(outputBase, output, (short) offset); + output += SIZE_OF_SHORT; + matchLength -= 60; + } + + // Emit remainder + if ((matchLength < 12) && (offset < 2048)) { + int lenMinus4 = matchLength - 4; + UNSAFE.putByte(outputBase, output++, (byte) (COPY_1_BYTE_OFFSET + ((lenMinus4) << 2) + ((offset >>> 8) << 5))); + UNSAFE.putByte(outputBase, output++, (byte) (offset)); + } + else { + UNSAFE.putByte(outputBase, output++, (byte) (COPY_2_BYTE_OFFSET + ((matchLength - 1) << 2))); + UNSAFE.putShort(outputBase, output, (short) offset); + output += SIZE_OF_SHORT; + } + return output; + } + + @SuppressWarnings("IllegalToken") + private static int getHashTableSize(int inputSize) + { + // Use smaller hash table when input.size() is smaller, since we + // fill the table, incurring O(hash table size) overhead for + // compression, and if the input is short, we won't need that + // many hash table entries anyway. + assert (MAX_HASH_TABLE_SIZE >= 256); + + // smallest power of 2 larger than inputSize + int target = Integer.highestOneBit(inputSize - 1) << 1; + + // keep it between MIN_TABLE_SIZE and MAX_TABLE_SIZE + return Math.max(Math.min(target, MAX_HASH_TABLE_SIZE), 256); + } + + // Any hash function will produce a valid compressed stream, but a good + // hash function reduces the number of collisions and thus yields better + // compression for compressible input, and more speed for incompressible + // input. Of course, it doesn't hurt if the hash function is reasonably fast + // either, as it gets called a lot. + private static int hashBytes(int value, int shift) + { + return (value * 0x1e35a7bd) >>> shift; + } + + private static int log2Floor(int n) + { + return n == 0 ? -1 : 31 ^ Integer.numberOfLeadingZeros(n); + } + + private static final int HIGH_BIT_MASK = 0x80; + /** + * Writes the uncompressed length as variable length integer. + */ + private static long writeUncompressedLength(Object outputBase, long outputAddress, int uncompressedLength) + { + if (uncompressedLength < (1 << 7) && uncompressedLength >= 0) { + UNSAFE.putByte(outputBase, outputAddress++, (byte) (uncompressedLength)); + } + else if (uncompressedLength < (1 << 14) && uncompressedLength > 0) { + UNSAFE.putByte(outputBase, outputAddress++, (byte) (uncompressedLength | HIGH_BIT_MASK)); + UNSAFE.putByte(outputBase, outputAddress++, (byte) (uncompressedLength >>> 7)); + } + else if (uncompressedLength < (1 << 21) && uncompressedLength > 0) { + UNSAFE.putByte(outputBase, outputAddress++, (byte) (uncompressedLength | HIGH_BIT_MASK)); + UNSAFE.putByte(outputBase, outputAddress++, (byte) ((uncompressedLength >>> 7) | HIGH_BIT_MASK)); + UNSAFE.putByte(outputBase, outputAddress++, (byte) (uncompressedLength >>> 14)); + } + else if (uncompressedLength < (1 << 28) && uncompressedLength > 0) { + UNSAFE.putByte(outputBase, outputAddress++, (byte) (uncompressedLength | HIGH_BIT_MASK)); + UNSAFE.putByte(outputBase, outputAddress++, (byte) ((uncompressedLength >>> 7) | HIGH_BIT_MASK)); + UNSAFE.putByte(outputBase, outputAddress++, (byte) ((uncompressedLength >>> 14) | HIGH_BIT_MASK)); + UNSAFE.putByte(outputBase, outputAddress++, (byte) (uncompressedLength >>> 21)); + } + else { + UNSAFE.putByte(outputBase, outputAddress++, (byte) (uncompressedLength | HIGH_BIT_MASK)); + UNSAFE.putByte(outputBase, outputAddress++, (byte) ((uncompressedLength >>> 7) | HIGH_BIT_MASK)); + UNSAFE.putByte(outputBase, outputAddress++, (byte) ((uncompressedLength >>> 14) | HIGH_BIT_MASK)); + UNSAFE.putByte(outputBase, outputAddress++, (byte) ((uncompressedLength >>> 21) | HIGH_BIT_MASK)); + UNSAFE.putByte(outputBase, outputAddress++, (byte) (uncompressedLength >>> 28)); + } + return outputAddress; + } +} diff --git a/src/main/java/org/xerial/snappy/pure/SnappyRawDecompressor.java b/src/main/java/org/xerial/snappy/pure/SnappyRawDecompressor.java new file mode 100644 index 0000000..688cdd3 --- /dev/null +++ b/src/main/java/org/xerial/snappy/pure/SnappyRawDecompressor.java @@ -0,0 +1,316 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.xerial.snappy.pure; + +import org.xerial.snappy.SnappyError; +import org.xerial.snappy.SnappyErrorCode; + +import static org.xerial.snappy.pure.SnappyConstants.LITERAL; +import static org.xerial.snappy.pure.SnappyConstants.SIZE_OF_INT; +import static org.xerial.snappy.pure.SnappyConstants.SIZE_OF_LONG; +import static org.xerial.snappy.pure.UnsafeUtil.UNSAFE; + +public final class SnappyRawDecompressor +{ + private static final int[] DEC_32_TABLE = {4, 1, 2, 1, 4, 4, 4, 4}; + private static final int[] DEC_64_TABLE = {0, 0, 0, -1, 0, 1, 2, 3}; + + private SnappyRawDecompressor() {} + + public static int getUncompressedLength(Object compressed, long compressedAddress, long compressedLimit) + { + return readUncompressedLength(compressed, compressedAddress, compressedLimit)[0]; + } + + public static int decompress( + final Object inputBase, + final long inputAddress, + final long inputLimit, + final Object outputBase, + final long outputAddress, + final long outputLimit) + { + // Read the uncompressed length from the front of the input + long input = inputAddress; + int[] varInt = readUncompressedLength(inputBase, input, inputLimit); + int expectedLength = varInt[0]; + input += varInt[1]; + + if(!(expectedLength <= (outputLimit - outputAddress))) { + throw new SnappyError(SnappyErrorCode.INVALID_CHUNK_SIZE, String.format("Uncompressed length %s must be less than %s", expectedLength, (outputLimit - outputAddress))); + } + + // Process the entire input + int uncompressedSize = uncompressAll( + inputBase, + input, + inputLimit, + outputBase, + outputAddress, + outputLimit); + + if (!(expectedLength == uncompressedSize)) { + throw new SnappyError(SnappyErrorCode.INVALID_CHUNK_SIZE, String.format("Recorded length is %s bytes but actual length after decompression is %s bytes ", + expectedLength, + uncompressedSize)); + } + + return expectedLength; + } + + private static int uncompressAll( + final Object inputBase, + final long inputAddress, + final long inputLimit, + final Object outputBase, + final long outputAddress, + final long outputLimit) + { + final long fastOutputLimit = outputLimit - SIZE_OF_LONG; // maximum offset in output buffer to which it's safe to write long-at-a-time + + long output = outputAddress; + long input = inputAddress; + + while (input < inputLimit) { + int opCode = UNSAFE.getByte(inputBase, input++) & 0xFF; + int entry = opLookupTable[opCode] & 0xFFFF; + + int trailerBytes = entry >>> 11; + int trailer = 0; + if (input + SIZE_OF_INT < inputLimit) { + trailer = UNSAFE.getInt(inputBase, input) & wordmask[trailerBytes]; + } + else { + if (input + trailerBytes > inputLimit) { + throw new SnappyError(SnappyErrorCode.PARSING_ERROR, String.format("position: %d", input - inputAddress)); + } + switch (trailerBytes) { + case 4: + trailer = (UNSAFE.getByte(inputBase, input + 3) & 0xff) << 24; + case 3: + trailer |= (UNSAFE.getByte(inputBase, input + 2) & 0xff) << 16; + case 2: + trailer |= (UNSAFE.getByte(inputBase, input + 1) & 0xff) << 8; + case 1: + trailer |= (UNSAFE.getByte(inputBase, input) & 0xff); + } + } + if (trailer < 0) { + throw new SnappyError(SnappyErrorCode.PARSING_ERROR, String.format("position: %d", input - inputAddress)); + } + input += trailerBytes; + + int length = entry & 0xff; + if (length == 0) { + continue; + } + + if ((opCode & 0x3) == LITERAL) { + int literalLength = length + trailer; + + // copy literal + long literalOutputLimit = output + literalLength; + if (literalOutputLimit > fastOutputLimit || input + literalLength > inputLimit - SIZE_OF_LONG) { + if (literalOutputLimit > outputLimit) { + throw new SnappyError(SnappyErrorCode.PARSING_ERROR, String.format("position: %d", input - inputAddress)); + } + + // slow, precise copy + UNSAFE.copyMemory(inputBase, input, outputBase, output, literalLength); + input += literalLength; + output += literalLength; + } + else { + // fast copy. We may over-copy but there's enough room in input and output to not overrun them + do { + UNSAFE.putLong(outputBase, output, UNSAFE.getLong(inputBase, input)); + input += SIZE_OF_LONG; + output += SIZE_OF_LONG; + } + while (output < literalOutputLimit); + input -= (output - literalOutputLimit); // adjust index if we over-copied + output = literalOutputLimit; + } + } + else { + // matchOffset/256 is encoded in bits 8..10. By just fetching + // those bits, we get matchOffset (since the bit-field starts at + // bit 8). + int matchOffset = entry & 0x700; + matchOffset += trailer; + + long matchAddress = output - matchOffset; + if (matchAddress < outputAddress || output + length > outputLimit) { + throw new SnappyError(SnappyErrorCode.PARSING_ERROR, String.format("position: %d", input - inputAddress)); + } + long matchOutputLimit = output + length; + + if (output > fastOutputLimit) { + // slow match copy + while (output < matchOutputLimit) { + UNSAFE.putByte(outputBase, output++, UNSAFE.getByte(outputBase, matchAddress++)); + } + } + else { + // copy repeated sequence + if (matchOffset < SIZE_OF_LONG) { + // 8 bytes apart so that we can copy long-at-a-time below + int increment32 = DEC_32_TABLE[matchOffset]; + int decrement64 = DEC_64_TABLE[matchOffset]; + + UNSAFE.putByte(outputBase, output, UNSAFE.getByte(outputBase, matchAddress)); + UNSAFE.putByte(outputBase, output + 1, UNSAFE.getByte(outputBase, matchAddress + 1)); + UNSAFE.putByte(outputBase, output + 2, UNSAFE.getByte(outputBase, matchAddress + 2)); + UNSAFE.putByte(outputBase, output + 3, UNSAFE.getByte(outputBase, matchAddress + 3)); + output += SIZE_OF_INT; + matchAddress += increment32; + + UNSAFE.putInt(outputBase, output, UNSAFE.getInt(outputBase, matchAddress)); + output += SIZE_OF_INT; + matchAddress -= decrement64; + } + else { + UNSAFE.putLong(outputBase, output, UNSAFE.getLong(outputBase, matchAddress)); + matchAddress += SIZE_OF_LONG; + output += SIZE_OF_LONG; + } + + if (matchOutputLimit > fastOutputLimit) { + if (matchOutputLimit > outputLimit) { + throw new SnappyError(SnappyErrorCode.PARSING_ERROR, String.format("position: %d", input - inputAddress)); + } + + while (output < fastOutputLimit) { + UNSAFE.putLong(outputBase, output, UNSAFE.getLong(outputBase, matchAddress)); + matchAddress += SIZE_OF_LONG; + output += SIZE_OF_LONG; + } + + while (output < matchOutputLimit) { + UNSAFE.putByte(outputBase, output++, UNSAFE.getByte(outputBase, matchAddress++)); + } + } + else { + while (output < matchOutputLimit) { + UNSAFE.putLong(outputBase, output, UNSAFE.getLong(outputBase, matchAddress)); + matchAddress += SIZE_OF_LONG; + output += SIZE_OF_LONG; + } + } + } + output = matchOutputLimit; // correction in case we over-copied + } + } + + return (int) (output - outputAddress); + } + + // Mapping from i in range [0,4] to a mask to extract the bottom 8*i bits + private static final int[] wordmask = new int[] { + 0, 0xff, 0xffff, 0xffffff, 0xffffffff + }; + + // Data stored per entry in lookup table: + // Range Bits-used Description + // ------------------------------------ + // 1..64 0..7 Literal/copy length encoded in opcode byte + // 0..7 8..10 Copy offset encoded in opcode byte / 256 + // 0..4 11..13 Extra bytes after opcode + // + // We use eight bits for the length even though 7 would have sufficed + // because of efficiency reasons: + // (1) Extracting a byte is faster than a bit-field + // (2) It properly aligns copy offset so we do not need a <<8 + private static final short[] opLookupTable = new short[] { + 0x0001, 0x0804, 0x1001, 0x2001, 0x0002, 0x0805, 0x1002, 0x2002, + 0x0003, 0x0806, 0x1003, 0x2003, 0x0004, 0x0807, 0x1004, 0x2004, + 0x0005, 0x0808, 0x1005, 0x2005, 0x0006, 0x0809, 0x1006, 0x2006, + 0x0007, 0x080a, 0x1007, 0x2007, 0x0008, 0x080b, 0x1008, 0x2008, + 0x0009, 0x0904, 0x1009, 0x2009, 0x000a, 0x0905, 0x100a, 0x200a, + 0x000b, 0x0906, 0x100b, 0x200b, 0x000c, 0x0907, 0x100c, 0x200c, + 0x000d, 0x0908, 0x100d, 0x200d, 0x000e, 0x0909, 0x100e, 0x200e, + 0x000f, 0x090a, 0x100f, 0x200f, 0x0010, 0x090b, 0x1010, 0x2010, + 0x0011, 0x0a04, 0x1011, 0x2011, 0x0012, 0x0a05, 0x1012, 0x2012, + 0x0013, 0x0a06, 0x1013, 0x2013, 0x0014, 0x0a07, 0x1014, 0x2014, + 0x0015, 0x0a08, 0x1015, 0x2015, 0x0016, 0x0a09, 0x1016, 0x2016, + 0x0017, 0x0a0a, 0x1017, 0x2017, 0x0018, 0x0a0b, 0x1018, 0x2018, + 0x0019, 0x0b04, 0x1019, 0x2019, 0x001a, 0x0b05, 0x101a, 0x201a, + 0x001b, 0x0b06, 0x101b, 0x201b, 0x001c, 0x0b07, 0x101c, 0x201c, + 0x001d, 0x0b08, 0x101d, 0x201d, 0x001e, 0x0b09, 0x101e, 0x201e, + 0x001f, 0x0b0a, 0x101f, 0x201f, 0x0020, 0x0b0b, 0x1020, 0x2020, + 0x0021, 0x0c04, 0x1021, 0x2021, 0x0022, 0x0c05, 0x1022, 0x2022, + 0x0023, 0x0c06, 0x1023, 0x2023, 0x0024, 0x0c07, 0x1024, 0x2024, + 0x0025, 0x0c08, 0x1025, 0x2025, 0x0026, 0x0c09, 0x1026, 0x2026, + 0x0027, 0x0c0a, 0x1027, 0x2027, 0x0028, 0x0c0b, 0x1028, 0x2028, + 0x0029, 0x0d04, 0x1029, 0x2029, 0x002a, 0x0d05, 0x102a, 0x202a, + 0x002b, 0x0d06, 0x102b, 0x202b, 0x002c, 0x0d07, 0x102c, 0x202c, + 0x002d, 0x0d08, 0x102d, 0x202d, 0x002e, 0x0d09, 0x102e, 0x202e, + 0x002f, 0x0d0a, 0x102f, 0x202f, 0x0030, 0x0d0b, 0x1030, 0x2030, + 0x0031, 0x0e04, 0x1031, 0x2031, 0x0032, 0x0e05, 0x1032, 0x2032, + 0x0033, 0x0e06, 0x1033, 0x2033, 0x0034, 0x0e07, 0x1034, 0x2034, + 0x0035, 0x0e08, 0x1035, 0x2035, 0x0036, 0x0e09, 0x1036, 0x2036, + 0x0037, 0x0e0a, 0x1037, 0x2037, 0x0038, 0x0e0b, 0x1038, 0x2038, + 0x0039, 0x0f04, 0x1039, 0x2039, 0x003a, 0x0f05, 0x103a, 0x203a, + 0x003b, 0x0f06, 0x103b, 0x203b, 0x003c, 0x0f07, 0x103c, 0x203c, + 0x0801, 0x0f08, 0x103d, 0x203d, 0x1001, 0x0f09, 0x103e, 0x203e, + 0x1801, 0x0f0a, 0x103f, 0x203f, 0x2001, 0x0f0b, 0x1040, 0x2040 + }; + + /** + * Reads the variable length integer encoded a the specified offset, and + * returns this length with the number of bytes read. + */ + static int[] readUncompressedLength(Object compressed, long compressedAddress, long compressedLimit) + { + int result; + int bytesRead = 0; + { + int b = getUnsignedByteSafe(compressed, compressedAddress + bytesRead, compressedLimit); + bytesRead++; + result = b & 0x7f; + if ((b & 0x80) != 0) { + b = getUnsignedByteSafe(compressed, compressedAddress + bytesRead, compressedLimit); + bytesRead++; + result |= (b & 0x7f) << 7; + if ((b & 0x80) != 0) { + b = getUnsignedByteSafe(compressed, compressedAddress + bytesRead, compressedLimit); + bytesRead++; + result |= (b & 0x7f) << 14; + if ((b & 0x80) != 0) { + b = getUnsignedByteSafe(compressed, compressedAddress + bytesRead, compressedLimit); + bytesRead++; + result |= (b & 0x7f) << 21; + if ((b & 0x80) != 0) { + b = getUnsignedByteSafe(compressed, compressedAddress + bytesRead, compressedLimit); + bytesRead++; + result |= (b & 0x7f) << 28; + if ((b & 0x80) != 0) { + throw new SnappyError(SnappyErrorCode.PARSING_ERROR, String.format("position: %d, error: %s", compressedAddress + bytesRead, "last byte of compressed length int has high bit set")); + } + } + } + } + } + } + return new int[] {result, bytesRead}; + } + + private static int getUnsignedByteSafe(Object base, long address, long limit) + { + if (address >= limit) { + throw new SnappyError(SnappyErrorCode.PARSING_ERROR, String.format("position: %d, error: %s", limit - address, "Input is truncated")); + } + return UNSAFE.getByte(base, address) & 0xFF; + } +} diff --git a/src/main/java/org/xerial/snappy/pure/UnsafeUtil.java b/src/main/java/org/xerial/snappy/pure/UnsafeUtil.java new file mode 100644 index 0000000..97677d6 --- /dev/null +++ b/src/main/java/org/xerial/snappy/pure/UnsafeUtil.java @@ -0,0 +1,69 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.xerial.snappy.pure; + +import org.xerial.snappy.SnappyError; +import org.xerial.snappy.SnappyErrorCode; +import sun.misc.Unsafe; + +import java.lang.reflect.Field; +import java.nio.Buffer; +import java.nio.ByteOrder; + +import static java.lang.String.format; + +final class UnsafeUtil +{ + public static final Unsafe UNSAFE; + private static final Field ADDRESS_ACCESSOR; + + private UnsafeUtil() + { + } + + static { + ByteOrder order = ByteOrder.nativeOrder(); + if (!order.equals(ByteOrder.LITTLE_ENDIAN)) { + throw new SnappyError(SnappyErrorCode.UNSUPPORTED_PLATFORM, format("pure-java snappy requires a little endian platform (found %s)", order)); + } + + try { + Field theUnsafe = Unsafe.class.getDeclaredField("theUnsafe"); + theUnsafe.setAccessible(true); + UNSAFE = (Unsafe) theUnsafe.get(null); + } + catch (Exception e) { + throw new SnappyError(SnappyErrorCode.UNSUPPORTED_PLATFORM, "pure-java snappy requires access to sun.misc.Unsafe"); + } + + try { + Field field = Buffer.class.getDeclaredField("address"); + field.setAccessible(true); + ADDRESS_ACCESSOR = field; + } + catch (Exception e) { + throw new SnappyError(SnappyErrorCode.UNSUPPORTED_PLATFORM, "pure-java snappy requires access to java.nio.Buffer raw address field"); + } + } + + public static long getAddress(Buffer buffer) + { + try { + return (long) ADDRESS_ACCESSOR.get(buffer); + } + catch (IllegalAccessException e) { + throw new RuntimeException(e); + } + } +} diff --git a/src/test/java/org/xerial/snappy/PureJavaSnappyInputStreamTest.java b/src/test/java/org/xerial/snappy/PureJavaSnappyInputStreamTest.java new file mode 100644 index 0000000..d7f8612 --- /dev/null +++ b/src/test/java/org/xerial/snappy/PureJavaSnappyInputStreamTest.java @@ -0,0 +1,25 @@ +package org.xerial.snappy; + +import org.junit.AfterClass; +import org.junit.BeforeClass; + +/** + * + */ +public class PureJavaSnappyInputStreamTest + extends SnappyInputStreamTest +{ + @BeforeClass + public static void setUp() + throws Exception + { + PureJavaSnappyTestUtil.setUp(); + } + + @AfterClass + public static void tearDown() + throws Exception + { + PureJavaSnappyTestUtil.tearDown(); + } +} diff --git a/src/test/java/org/xerial/snappy/PureJavaSnappyOutputStreamTest.java b/src/test/java/org/xerial/snappy/PureJavaSnappyOutputStreamTest.java new file mode 100644 index 0000000..7bed062 --- /dev/null +++ b/src/test/java/org/xerial/snappy/PureJavaSnappyOutputStreamTest.java @@ -0,0 +1,25 @@ +package org.xerial.snappy; + +import org.junit.AfterClass; +import org.junit.BeforeClass; + +/** + * + */ +public class PureJavaSnappyOutputStreamTest + extends SnappyOutputStreamTest +{ + @BeforeClass + public static void setUp() + throws Exception + { + PureJavaSnappyTestUtil.setUp(); + } + + @AfterClass + public static void tearDown() + throws Exception + { + PureJavaSnappyTestUtil.tearDown(); + } +} diff --git a/src/test/java/org/xerial/snappy/PureJavaSnappyTest.java b/src/test/java/org/xerial/snappy/PureJavaSnappyTest.java new file mode 100644 index 0000000..99c261c --- /dev/null +++ b/src/test/java/org/xerial/snappy/PureJavaSnappyTest.java @@ -0,0 +1,25 @@ +package org.xerial.snappy; + +import org.junit.AfterClass; +import org.junit.BeforeClass; + +/** + * + */ +public class PureJavaSnappyTest + extends SnappyTest +{ + @BeforeClass + public static void setUp() + throws Exception + { + PureJavaSnappyTestUtil.setUp(); + } + + @AfterClass + public static void tearDown() + throws Exception + { + PureJavaSnappyTestUtil.tearDown(); + } +} diff --git a/src/test/java/org/xerial/snappy/PureJavaSnappyTestUtil.java b/src/test/java/org/xerial/snappy/PureJavaSnappyTestUtil.java new file mode 100644 index 0000000..bbbf79f --- /dev/null +++ b/src/test/java/org/xerial/snappy/PureJavaSnappyTestUtil.java @@ -0,0 +1,34 @@ +package org.xerial.snappy; + +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.xerial.util.log.Logger; + +/** + * + */ +public abstract class PureJavaSnappyTestUtil +{ + private static Logger _logger = Logger.getLogger(PureJavaSnappyTestUtil.class); + + @BeforeClass + public static void setUp() + throws Exception + { + _logger.info("Loading pure-java Snappy"); + Snappy.cleanUp(); + System.setProperty(SnappyLoader.KEY_SNAPPY_PUREJAVA, "true"); + SnappyLoader.loadSnappyApi(); + } + + @AfterClass + public static void tearDown() + throws Exception + { + System.setProperty(SnappyLoader.KEY_SNAPPY_PUREJAVA, "false"); + Snappy.cleanUp(); + _logger.info("Unloading pure-java Snappy"); + // Initialize Snappy again for the other tests + Snappy.init(); + } +}