diff --git a/Makefile b/Makefile index 364c95c..0759503 100644 --- a/Makefile +++ b/Makefile @@ -7,17 +7,57 @@ SBT:=./sbt all: snappy SNAPPY_OUT:=$(TARGET)/$(snappy)-$(os_arch) -SNAPPY_ARCHIVE:=$(TARGET)/snappy-$(VERSION).tar.gz +SNAPPY_ARCHIVE:=$(TARGET)/snappy-$(VERSION).tar.gz SNAPPY_CC:=snappy-sinksource.cc snappy-stubs-internal.cc snappy.cc SNAPPY_SRC_DIR:=$(TARGET)/snappy-$(VERSION) SNAPPY_SRC:=$(addprefix $(SNAPPY_SRC_DIR)/,$(SNAPPY_CC)) -SNAPPY_OBJ:=$(addprefix $(SNAPPY_OUT)/,$(patsubst %.cc,%.o,$(SNAPPY_CC)) SnappyNative.o) - -SNAPPY_GIT_UNPACKED:=$(TARGET)/snappy-git-extracted.log SNAPPY_GIT_REPO_URL:=https://github.com/google/snappy SNAPPY_GIT_REV:=2b9152d9c5bed71dffb7f7f6c7a3ec48b058ff2d # 1.1.3 with autogen.sh fix +SNAPPY_UNPACKED:=$(TARGET)/snappy-extracted.log +SNAPPY_GIT_UNPACKED:=$(TARGET)/snappy-git-extracted.log -CXXFLAGS:=$(CXXFLAGS) -I$(SNAPPY_SRC_DIR) +ifdef ENABLE_BITSHUFFLE + # TODO: Upgrade to a stable release + BITSHUFFLE_VERSION:=55f9b4caec73fa21d13947cacea1295926781440 + BITSHUFFLE_ARCHIVE:=$(TARGET)/bitshuffle-$(BITSHUFFLE_VERSION).tar.gz + BITSHUFFLE_C:=bitshuffle_core.c iochain.c + BITSHUFFLE_SRC_DIR:=$(TARGET)/bitshuffle-$(BITSHUFFLE_VERSION)/src + BITSHUFFLE_SRC:=$(addprefix $(BITSHUFFLE_SRC_DIR)/,$(BITSHUFFLE_C)) + BITSHUFFLE_UNPACKED:=$(TARGET)/bitshuffle-extracted.log +endif + +ifdef USE_GIT + ifndef GIT_REPO_URL + $(warning GIT_REPO_URL is not set when using git) + endif + ifndef GIT_SNAPPY_BRANCH + $(warning GIT_SNAPPY_BRANCH is not set when using git) + endif +endif + +ifdef ENABLE_BITSHUFFLE + $(BITSHUFFLE_ARCHIVE): + @mkdir -p $(@D) + curl -L -o$@ https://github.com/kiyo-masui/bitshuffle/archive/$(BITSHUFFLE_VERSION).tar.gz + + $(BITSHUFFLE_UNPACKED): $(BITSHUFFLE_ARCHIVE) + $(TAR) xvfz $< -C $(TARGET) + touch $@ + + $(BITSHUFFLE_SRC): $(BITSHUFFLE_UNPACKED) + + $(SNAPPY_OUT)/%.o : $(BITSHUFFLE_SRC_DIR)/%.c + @mkdir -p $(@D) + $(CXX) $(CXXFLAGS) -c $< -o $@ + + SNAPPY_OBJ:=$(addprefix $(SNAPPY_OUT)/,$(patsubst %.cc,%.o,$(SNAPPY_CC)) $(patsubst %.c,%.o,$(BITSHUFFLE_C)) SnappyNative.o) + + CXXFLAGS:=$(CXXFLAGS) -DSNAPPY_BITSHUFFLE_ENABLED -I$(SNAPPY_SRC_DIR) -I$(BITSHUFFLE_SRC_DIR) +else + SNAPPY_OBJ:=$(addprefix $(SNAPPY_OUT)/,$(patsubst %.cc,%.o,$(SNAPPY_CC)) SnappyNative.o) + + CXXFLAGS:=$(CXXFLAGS) -I$(SNAPPY_SRC_DIR) +endif ifeq ($(OS_NAME),SunOS) TAR:= gtar @@ -29,6 +69,11 @@ $(SNAPPY_ARCHIVE): @mkdir -p $(@D) curl -L -o$@ https://github.com/google/snappy/releases/download/$(VERSION)/snappy-$(VERSION).tar.gz +$(SNAPPY_UNPACKED): $(SNAPPY_ARCHIVE) + $(TAR) xvfz $< -C $(TARGET) + touch $@ + cd $(SNAPPY_SRC_DIR) && ./configure + $(SNAPPY_GIT_UNPACKED): rm -rf $(SNAPPY_SRC_DIR) @mkdir -p $(SNAPPY_SRC_DIR) @@ -50,18 +95,18 @@ $(SNAPPY_SRC): $(SNAPPY_GIT_UNPACKED) $(SNAPPY_OUT)/%.o : $(SNAPPY_SRC_DIR)/%.cc @mkdir -p $(@D) - $(CXX) $(CXXFLAGS) -c $< -o $@ + $(CXX) $(CXXFLAGS) -c $< -o $@ -$(SNAPPY_OUT)/SnappyNative.o : $(SRC)/org/xerial/snappy/SnappyNative.cpp $(SRC)/org/xerial/snappy/SnappyNative.h +$(SNAPPY_OUT)/SnappyNative.o : $(SRC)/org/xerial/snappy/SnappyNative.cpp $(SRC)/org/xerial/snappy/SnappyNative.h @mkdir -p $(@D) $(CXX) $(CXXFLAGS) -c $< -o $@ $(SNAPPY_OUT)/$(LIBNAME): $(SNAPPY_OBJ) - $(CXX) $(CXXFLAGS) -o $@ $+ $(LINKFLAGS) + $(CXX) $(CXXFLAGS) -o $@ $+ $(LINKFLAGS) $(STRIP) $@ -clean-native: +clean-native: rm -rf $(SNAPPY_OUT) clean: @@ -76,7 +121,7 @@ snappy-jar-version:=snappy-java-$(shell perl -npe "s/version in ThisBuild\s+:=\s native: $(SNAPPY_GIT_UNPACKED) $(NATIVE_DLL) snappy: native $(TARGET)/$(snappy-jar-version).jar -$(NATIVE_DLL): $(SNAPPY_OUT)/$(LIBNAME) +$(NATIVE_DLL): $(SNAPPY_OUT)/$(LIBNAME) @mkdir -p $(@D) cp $< $@ @mkdir -p $(NATIVE_TARGET_DIR) @@ -85,20 +130,20 @@ $(NATIVE_DLL): $(SNAPPY_OUT)/$(LIBNAME) package: $(TARGET)/$(snappy-jar-version).jar -$(TARGET)/$(snappy-jar-version).jar: - $(SBT) package +$(TARGET)/$(snappy-jar-version).jar: + $(SBT) package test: $(NATIVE_DLL) $(SBT) test -win32: +win32: $(MAKE) native CROSS_PREFIX=i686-w64-mingw32- OS_NAME=Windows OS_ARCH=x86 # for cross-compilation on Ubuntu, install the g++-mingw-w64-x86-64 package win64: $(MAKE) native CROSS_PREFIX=x86_64-w64-mingw32- OS_NAME=Windows OS_ARCH=x86_64 -mac32: +mac32: $(MAKE) native OS_NAME=Mac OS_ARCH=x86 linux32: @@ -139,11 +184,10 @@ googlecode-src-upload: $(TARGET)/snappy-java-$(VERSION)-src.upload GOOGLECODE_USER:=leo@xerial.org $(TARGET)/snappy-java-$(VERSION)-lib.upload: - ./googlecode_upload.py -s "library for all platforms" -p snappy-java -l "Type-Executable,Featured,OpSys-All" -u "$(GOOGLECODE_USER)" target/snappy-java-$(VERSION).jar + ./googlecode_upload.py -s "library for all platforms" -p snappy-java -l "Type-Executable,Featured,OpSys-All" -u "$(GOOGLECODE_USER)" target/snappy-java-$(VERSION).jar touch $@ $(TARGET)/snappy-java-$(VERSION)-src.upload: - ./googlecode_upload.py -s "source code archive" -p snappy-java -l "Type-Source,OpSys-All" -u "$(GOOGLECODE_USER)" target/snappy-java-$(VERSION).tar.gz + ./googlecode_upload.py -s "source code archive" -p snappy-java -l "Type-Source,OpSys-All" -u "$(GOOGLECODE_USER)" target/snappy-java-$(VERSION).tar.gz touch $@ - diff --git a/src/main/java/org/xerial/snappy/Snappy.java b/src/main/java/org/xerial/snappy/Snappy.java index dc81f7c..53d6b23 100755 --- a/src/main/java/org/xerial/snappy/Snappy.java +++ b/src/main/java/org/xerial/snappy/Snappy.java @@ -903,4 +903,174 @@ public class Snappy byte[] uncompressed = uncompress(input); return new String(uncompressed, encoding); } + + /** + * Apply a bit-shuffling filter into the input short array. + * + * @param input + * @return bit-shuffled byte array + * @throws IOException + */ + public static byte[] bitShuffle(short[] input) throws IOException { + byte[] output = new byte[input.length * 2]; + if (impl.supportBitSuffle()) { + impl.bitShuffle(input, 0, 2, input.length * 2, output, 0); + } else { + arrayCopy(input, 0, input.length * 2, output, 0); + } + return output; + } + + /** + * Apply a bit-shuffling filter into the input int array. + * + * @param input + * @return bit-shuffled byte array + * @throws IOException + */ + public static byte[] bitShuffle(int[] input) throws IOException { + byte[] output = new byte[input.length * 4]; + if (impl.supportBitSuffle()) { + impl.bitShuffle(input, 0, 4, input.length * 4, output, 0); + } else { + arrayCopy(input, 0, input.length * 4, output, 0); + } + return output; + } + + /** + * Apply a bit-shuffling filter into the input long array. + * + * @param input + * @return bit-shuffled byte array + * @throws IOException + */ + public static byte[] bitShuffle(long[] input) throws IOException { + byte[] output = new byte[input.length * 8]; + if (impl.supportBitSuffle()) { + impl.bitShuffle(input, 0, 8, input.length * 8, output, 0); + } else { + arrayCopy(input, 0, input.length * 8, output, 0); + } + return output; + } + + /** + * Apply a bit-shuffling filter into the input float array. + * + * @param input + * @return bit-shuffled byte array + * @throws IOException + */ + public static byte[] bitShuffle(float[] input) throws IOException { + byte[] output = new byte[input.length * 4]; + if (impl.supportBitSuffle()) { + impl.bitShuffle(input, 0, 4, input.length * 4, output, 0); + } else { + arrayCopy(input, 0, input.length * 4, output, 0); + } + return output; + } + + /** + * Apply a bit-shuffling filter into the input double array. + * + * @param input + * @return bit-shuffled byte array + * @throws IOException + */ + public static byte[] bitShuffle(double[] input) throws IOException { + byte[] output = new byte[input.length * 8]; + if (impl.supportBitSuffle()) { + impl.bitShuffle(input, 0, 8, input.length * 8, output, 0); + } else { + arrayCopy(input, 0, input.length * 8, output, 0); + } + return output; + } + + /** + * Convert the input bit-shuffled byte array into an original short array. + * + * @param input + * @return a short array + * @throws IOException + */ + public static short[] bitUnShuffleShortArray(byte[] input) throws IOException { + short[] output = new short[input.length / 2]; + if (impl.supportBitSuffle()) { + impl.bitUnShuffle(input, 0, 2, input.length, output, 0); + } else { + arrayCopy(input, 0, input.length, output, 0); + } + return output; + } + + /** + * Convert the input bit-shuffled byte array into an original int array. + * + * @param input + * @return an int array + * @throws IOException + */ + public static int[] bitUnShuffleIntArray(byte[] input) throws IOException { + int[] output = new int[input.length / 4]; + if (impl.supportBitSuffle()) { + impl.bitUnShuffle(input, 0, 4, input.length, output, 0); + } else { + arrayCopy(input, 0, input.length, output, 0); + } + return output; + } + + /** + * Convert the input bit-shuffled byte array into an original long array. + * + * @param input + * @return a long array + * @throws IOException + */ + public static long[] bitUnShuffleLongArray(byte[] input) throws IOException { + long[] output = new long[input.length / 8]; + if (impl.supportBitSuffle()) { + impl.bitUnShuffle(input, 0, 8, input.length, output, 0); + } else { + arrayCopy(input, 0, input.length, output, 0); + } + return output; + } + + /** + * Convert the input bit-shuffled byte array into an original float array. + * + * @param input + * @return an float array + * @throws IOException + */ + public static float[] bitUnShuffleFloatArray(byte[] input) throws IOException { + float[] output = new float[input.length / 4]; + if (impl.supportBitSuffle()) { + impl.bitUnShuffle(input, 0, 4, input.length, output, 0); + } else { + arrayCopy(input, 0, input.length, output, 0); + } + return output; + } + + /** + * Convert the input bit-shuffled byte array into an original double array. + * + * @param input + * @return a double array + * @throws IOException + */ + public static double[] bitUnShuffleDoubleArray(byte[] input) throws IOException { + double[] output = new double[input.length / 8]; + if (impl.supportBitSuffle()) { + impl.bitUnShuffle(input, 0, 8, input.length, output, 0); + } else { + arrayCopy(input, 0, input.length, output, 0); + } + return output; + } } diff --git a/src/main/java/org/xerial/snappy/SnappyNative.cpp b/src/main/java/org/xerial/snappy/SnappyNative.cpp index d635880..1abeab7 100755 --- a/src/main/java/org/xerial/snappy/SnappyNative.cpp +++ b/src/main/java/org/xerial/snappy/SnappyNative.cpp @@ -18,6 +18,11 @@ #include #include "SnappyNative.h" +#ifdef SNAPPY_BITSHUFFLE_ENABLED +#include +#include +#endif + void throw_exception(JNIEnv *env, jobject self, int errorCode) { jclass c = env->FindClass("org/xerial/snappy/SnappyNative"); @@ -170,6 +175,100 @@ JNIEXPORT jint JNICALL Java_org_xerial_snappy_SnappyNative_rawUncompress__Ljava_ +/* + * Class: org_xerial_snappy_SnappyNative + * Method: supportBitSuffle + * Signature: ()Z + */ +JNIEXPORT jboolean JNICALL Java_org_xerial_snappy_SnappyNative_supportBitSuffle + (JNIEnv *, jobject) { +#ifdef SNAPPY_BITSHUFFLE_ENABLED + return (jboolean) true; +#else + return (jboolean) false; +#endif +} + + + +/* + * Class: org_xerial_snappy_SnappyNative + * Method: bitShuffle + * Signature: (Ljava/lang/Object;IIILjava/lang/Object;I)I + */ +JNIEXPORT jint JNICALL Java_org_xerial_snappy_SnappyNative_bitShuffle + (JNIEnv * env, jobject self, jobject input, jint inputOffset, jint typeSize, jint length, jobject output, jint outputOffset) +{ +#ifdef SNAPPY_BITSHUFFLE_ENABLED + char* in = (char*) env->GetPrimitiveArrayCritical((jarray) input, 0); + char* out = (char*) env->GetPrimitiveArrayCritical((jarray) output, 0); + if(in == 0 || out == 0) { + // out of memory + if(in != 0) { + env->ReleasePrimitiveArrayCritical((jarray) input, in, 0); + } + if(out != 0) { + env->ReleasePrimitiveArrayCritical((jarray) output, out, 0); + } + throw_exception(env, self, 4); + return 0; + } + + int64_t processedBytes = bshuf_bitshuffle( + in + inputOffset, out + outputOffset, (size_t) (length / typeSize), (size_t) typeSize, 0); + + env->ReleasePrimitiveArrayCritical((jarray) input, in, 0); + env->ReleasePrimitiveArrayCritical((jarray) output, out, 0); + + return (jint) processedBytes; +#else + // Returns an error code for unsupported operations + throw_exception(env, self, 1); + return (jint) 0; +#endif +} + + + +/* + * Class: org_xerial_snappy_SnappyNative + * Method: bitUnShuffle + * Signature: (Ljava/lang/Object;IIILjava/lang/Object;I)I + */ +JNIEXPORT jint JNICALL Java_org_xerial_snappy_SnappyNative_bitUnShuffle + (JNIEnv * env, jobject self, jobject input, jint inputOffset, jint typeSize, jint length, jobject output, jint outputOffset) +{ +#ifdef SNAPPY_BITSHUFFLE_ENABLED + char* in = (char*) env->GetPrimitiveArrayCritical((jarray) input, 0); + char* out = (char*) env->GetPrimitiveArrayCritical((jarray) output, 0); + if(in == 0 || out == 0) { + // out of memory + if(in != 0) { + env->ReleasePrimitiveArrayCritical((jarray) input, in, 0); + } + if(out != 0) { + env->ReleasePrimitiveArrayCritical((jarray) output, out, 0); + } + throw_exception(env, self, 4); + return 0; + } + + int64_t processedBytes = bshuf_bitunshuffle( + in + inputOffset, out + outputOffset, (size_t) (length / typeSize), (size_t) typeSize, 0); + + env->ReleasePrimitiveArrayCritical((jarray) input, in, 0); + env->ReleasePrimitiveArrayCritical((jarray) output, out, 0); + + return (jint) processedBytes; +#else + // Returns an error code for unsupported operations + throw_exception(env, self, 1); + return (jint) 0; +#endif +} + + + /* * Class: org_xerial_snappy_Snappy * Method: maxCompressedLength diff --git a/src/main/java/org/xerial/snappy/SnappyNative.h b/src/main/java/org/xerial/snappy/SnappyNative.h old mode 100755 new mode 100644 index d1f3e58..451c37b --- a/src/main/java/org/xerial/snappy/SnappyNative.h +++ b/src/main/java/org/xerial/snappy/SnappyNative.h @@ -63,6 +63,30 @@ JNIEXPORT jint JNICALL Java_org_xerial_snappy_SnappyNative_rawUncompress__Ljava_ JNIEXPORT jint JNICALL Java_org_xerial_snappy_SnappyNative_rawUncompress__Ljava_lang_Object_2IILjava_lang_Object_2I (JNIEnv *, jobject, jobject, jint, jint, jobject, jint); +/* + * Class: org_xerial_snappy_SnappyNative + * Method: supportBitSuffle + * Signature: ()Z + */ +JNIEXPORT jboolean JNICALL Java_org_xerial_snappy_SnappyNative_supportBitSuffle + (JNIEnv *, jobject); + +/* + * Class: org_xerial_snappy_SnappyNative + * Method: bitShuffle + * Signature: (Ljava/lang/Object;IIILjava/lang/Object;I)I + */ +JNIEXPORT jint JNICALL Java_org_xerial_snappy_SnappyNative_bitShuffle + (JNIEnv *, jobject, jobject, jint, jint, jint, jobject, jint); + +/* + * Class: org_xerial_snappy_SnappyNative + * Method: bitUnShuffle + * Signature: (Ljava/lang/Object;IIILjava/lang/Object;I)I + */ +JNIEXPORT jint JNICALL Java_org_xerial_snappy_SnappyNative_bitUnShuffle + (JNIEnv *, jobject, jobject, jint, jint, jint, jobject, jint); + /* * Class: org_xerial_snappy_SnappyNative * Method: maxCompressedLength diff --git a/src/main/java/org/xerial/snappy/SnappyNative.java b/src/main/java/org/xerial/snappy/SnappyNative.java index 95a6f41..3ef8abe 100755 --- a/src/main/java/org/xerial/snappy/SnappyNative.java +++ b/src/main/java/org/xerial/snappy/SnappyNative.java @@ -66,6 +66,22 @@ public class SnappyNative public native int rawUncompress(Object input, int inputOffset, int inputLength, Object output, int outputOffset) throws IOException; + // ------------------------------------------------------------------------ + // Bit-shuffling routines to improve compression of typed binary data. + // A quick benchmark result can be found in a gist below; + // https://gist.github.com/maropu/01103215df34b317a7a7 + // ------------------------------------------------------------------------ + + // Returns true iff a native library loaded in JVMs can run bit-shuffling. + // Bit-shuffling is executable only in x86 environments that support SSE/AVX instructions. + public native boolean supportBitSuffle(); + + public native int bitShuffle(Object input, int inputOffset, int typeSize, int byteLength, Object output, int outputOffset) + throws IOException; + + public native int bitUnShuffle(Object input, int inputOffset, int typeSize, int byteLength, Object output, int outputOffset) + throws IOException; + // Returns the maximal size of the compressed representation of // input data that is "source_bytes" bytes in length; public native int maxCompressedLength(int source_bytes); diff --git a/src/main/resources/org/xerial/snappy/native/Mac/x86_64/libsnappyjava.jnilib b/src/main/resources/org/xerial/snappy/native/Mac/x86_64/libsnappyjava.jnilib index f56b8b4..ad93f95 100755 Binary files a/src/main/resources/org/xerial/snappy/native/Mac/x86_64/libsnappyjava.jnilib and b/src/main/resources/org/xerial/snappy/native/Mac/x86_64/libsnappyjava.jnilib differ diff --git a/src/test/java/org/xerial/snappy/SnappyTest.java b/src/test/java/org/xerial/snappy/SnappyTest.java index 18b39e9..f450b09 100755 --- a/src/test/java/org/xerial/snappy/SnappyTest.java +++ b/src/test/java/org/xerial/snappy/SnappyTest.java @@ -329,4 +329,34 @@ public class SnappyTest _logger.debug(e); } } + + @Test + public void bitShuffleLongArray() + throws Exception + { + long[] data = new long[] {2, 3, 15, 4234, 43251531412342342L, 23423422342L}; + byte[] shuffledData = Snappy.bitShuffle(data); + long[] result = Snappy.bitUnShuffleLongArray(shuffledData); + assertArrayEquals(data, result); + } + + @Test + public void bitShuffleShortArray() + throws Exception + { + short[] data = new short[] {432, -32267, 1, 3, 34, 43, 34, Short.MAX_VALUE, -1}; + byte[] shuffledData = Snappy.bitShuffle(data); + short[] result = Snappy.bitUnShuffleShortArray(shuffledData); + assertArrayEquals(data, result); + } + + @Test + public void bitShuffleIntArray() + throws Exception + { + int[] data = new int[] {432, -32267, 1, 3, 34, 43, 34, Short.MAX_VALUE, -1, Integer.MAX_VALUE, 3424, 43}; + byte[] shuffledData = Snappy.bitShuffle(data); + int[] result = Snappy.bitUnShuffleIntArray(shuffledData); + assertArrayEquals(data, result); + } }