Implement bit-shuffling interfaces to improve LZ performance

This commit is contained in:
Takeshi YAMAMURO 2016-03-29 23:14:33 +09:00
parent 8c04209b55
commit 99b297cfcd
7 changed files with 400 additions and 17 deletions

View File

@ -7,17 +7,57 @@ SBT:=./sbt
all: snappy
SNAPPY_OUT:=$(TARGET)/$(snappy)-$(os_arch)
SNAPPY_ARCHIVE:=$(TARGET)/snappy-$(VERSION).tar.gz
SNAPPY_ARCHIVE:=$(TARGET)/snappy-$(VERSION).tar.gz
SNAPPY_CC:=snappy-sinksource.cc snappy-stubs-internal.cc snappy.cc
SNAPPY_SRC_DIR:=$(TARGET)/snappy-$(VERSION)
SNAPPY_SRC:=$(addprefix $(SNAPPY_SRC_DIR)/,$(SNAPPY_CC))
SNAPPY_OBJ:=$(addprefix $(SNAPPY_OUT)/,$(patsubst %.cc,%.o,$(SNAPPY_CC)) SnappyNative.o)
SNAPPY_GIT_UNPACKED:=$(TARGET)/snappy-git-extracted.log
SNAPPY_GIT_REPO_URL:=https://github.com/google/snappy
SNAPPY_GIT_REV:=2b9152d9c5bed71dffb7f7f6c7a3ec48b058ff2d # 1.1.3 with autogen.sh fix
SNAPPY_UNPACKED:=$(TARGET)/snappy-extracted.log
SNAPPY_GIT_UNPACKED:=$(TARGET)/snappy-git-extracted.log
CXXFLAGS:=$(CXXFLAGS) -I$(SNAPPY_SRC_DIR)
ifdef ENABLE_BITSHUFFLE
# TODO: Upgrade to a stable release
BITSHUFFLE_VERSION:=55f9b4caec73fa21d13947cacea1295926781440
BITSHUFFLE_ARCHIVE:=$(TARGET)/bitshuffle-$(BITSHUFFLE_VERSION).tar.gz
BITSHUFFLE_C:=bitshuffle_core.c iochain.c
BITSHUFFLE_SRC_DIR:=$(TARGET)/bitshuffle-$(BITSHUFFLE_VERSION)/src
BITSHUFFLE_SRC:=$(addprefix $(BITSHUFFLE_SRC_DIR)/,$(BITSHUFFLE_C))
BITSHUFFLE_UNPACKED:=$(TARGET)/bitshuffle-extracted.log
endif
ifdef USE_GIT
ifndef GIT_REPO_URL
$(warning GIT_REPO_URL is not set when using git)
endif
ifndef GIT_SNAPPY_BRANCH
$(warning GIT_SNAPPY_BRANCH is not set when using git)
endif
endif
ifdef ENABLE_BITSHUFFLE
$(BITSHUFFLE_ARCHIVE):
@mkdir -p $(@D)
curl -L -o$@ https://github.com/kiyo-masui/bitshuffle/archive/$(BITSHUFFLE_VERSION).tar.gz
$(BITSHUFFLE_UNPACKED): $(BITSHUFFLE_ARCHIVE)
$(TAR) xvfz $< -C $(TARGET)
touch $@
$(BITSHUFFLE_SRC): $(BITSHUFFLE_UNPACKED)
$(SNAPPY_OUT)/%.o : $(BITSHUFFLE_SRC_DIR)/%.c
@mkdir -p $(@D)
$(CXX) $(CXXFLAGS) -c $< -o $@
SNAPPY_OBJ:=$(addprefix $(SNAPPY_OUT)/,$(patsubst %.cc,%.o,$(SNAPPY_CC)) $(patsubst %.c,%.o,$(BITSHUFFLE_C)) SnappyNative.o)
CXXFLAGS:=$(CXXFLAGS) -DSNAPPY_BITSHUFFLE_ENABLED -I$(SNAPPY_SRC_DIR) -I$(BITSHUFFLE_SRC_DIR)
else
SNAPPY_OBJ:=$(addprefix $(SNAPPY_OUT)/,$(patsubst %.cc,%.o,$(SNAPPY_CC)) SnappyNative.o)
CXXFLAGS:=$(CXXFLAGS) -I$(SNAPPY_SRC_DIR)
endif
ifeq ($(OS_NAME),SunOS)
TAR:= gtar
@ -29,6 +69,11 @@ $(SNAPPY_ARCHIVE):
@mkdir -p $(@D)
curl -L -o$@ https://github.com/google/snappy/releases/download/$(VERSION)/snappy-$(VERSION).tar.gz
$(SNAPPY_UNPACKED): $(SNAPPY_ARCHIVE)
$(TAR) xvfz $< -C $(TARGET)
touch $@
cd $(SNAPPY_SRC_DIR) && ./configure
$(SNAPPY_GIT_UNPACKED):
rm -rf $(SNAPPY_SRC_DIR)
@mkdir -p $(SNAPPY_SRC_DIR)
@ -50,18 +95,18 @@ $(SNAPPY_SRC): $(SNAPPY_GIT_UNPACKED)
$(SNAPPY_OUT)/%.o : $(SNAPPY_SRC_DIR)/%.cc
@mkdir -p $(@D)
$(CXX) $(CXXFLAGS) -c $< -o $@
$(CXX) $(CXXFLAGS) -c $< -o $@
$(SNAPPY_OUT)/SnappyNative.o : $(SRC)/org/xerial/snappy/SnappyNative.cpp $(SRC)/org/xerial/snappy/SnappyNative.h
$(SNAPPY_OUT)/SnappyNative.o : $(SRC)/org/xerial/snappy/SnappyNative.cpp $(SRC)/org/xerial/snappy/SnappyNative.h
@mkdir -p $(@D)
$(CXX) $(CXXFLAGS) -c $< -o $@
$(SNAPPY_OUT)/$(LIBNAME): $(SNAPPY_OBJ)
$(CXX) $(CXXFLAGS) -o $@ $+ $(LINKFLAGS)
$(CXX) $(CXXFLAGS) -o $@ $+ $(LINKFLAGS)
$(STRIP) $@
clean-native:
clean-native:
rm -rf $(SNAPPY_OUT)
clean:
@ -76,7 +121,7 @@ snappy-jar-version:=snappy-java-$(shell perl -npe "s/version in ThisBuild\s+:=\s
native: $(SNAPPY_GIT_UNPACKED) $(NATIVE_DLL)
snappy: native $(TARGET)/$(snappy-jar-version).jar
$(NATIVE_DLL): $(SNAPPY_OUT)/$(LIBNAME)
$(NATIVE_DLL): $(SNAPPY_OUT)/$(LIBNAME)
@mkdir -p $(@D)
cp $< $@
@mkdir -p $(NATIVE_TARGET_DIR)
@ -85,20 +130,20 @@ $(NATIVE_DLL): $(SNAPPY_OUT)/$(LIBNAME)
package: $(TARGET)/$(snappy-jar-version).jar
$(TARGET)/$(snappy-jar-version).jar:
$(SBT) package
$(TARGET)/$(snappy-jar-version).jar:
$(SBT) package
test: $(NATIVE_DLL)
$(SBT) test
win32:
win32:
$(MAKE) native CROSS_PREFIX=i686-w64-mingw32- OS_NAME=Windows OS_ARCH=x86
# for cross-compilation on Ubuntu, install the g++-mingw-w64-x86-64 package
win64:
$(MAKE) native CROSS_PREFIX=x86_64-w64-mingw32- OS_NAME=Windows OS_ARCH=x86_64
mac32:
mac32:
$(MAKE) native OS_NAME=Mac OS_ARCH=x86
linux32:
@ -139,11 +184,10 @@ googlecode-src-upload: $(TARGET)/snappy-java-$(VERSION)-src.upload
GOOGLECODE_USER:=leo@xerial.org
$(TARGET)/snappy-java-$(VERSION)-lib.upload:
./googlecode_upload.py -s "library for all platforms" -p snappy-java -l "Type-Executable,Featured,OpSys-All" -u "$(GOOGLECODE_USER)" target/snappy-java-$(VERSION).jar
./googlecode_upload.py -s "library for all platforms" -p snappy-java -l "Type-Executable,Featured,OpSys-All" -u "$(GOOGLECODE_USER)" target/snappy-java-$(VERSION).jar
touch $@
$(TARGET)/snappy-java-$(VERSION)-src.upload:
./googlecode_upload.py -s "source code archive" -p snappy-java -l "Type-Source,OpSys-All" -u "$(GOOGLECODE_USER)" target/snappy-java-$(VERSION).tar.gz
./googlecode_upload.py -s "source code archive" -p snappy-java -l "Type-Source,OpSys-All" -u "$(GOOGLECODE_USER)" target/snappy-java-$(VERSION).tar.gz
touch $@

View File

@ -903,4 +903,174 @@ public class Snappy
byte[] uncompressed = uncompress(input);
return new String(uncompressed, encoding);
}
/**
* Apply a bit-shuffling filter into the input short array.
*
* @param input
* @return bit-shuffled byte array
* @throws IOException
*/
public static byte[] bitShuffle(short[] input) throws IOException {
byte[] output = new byte[input.length * 2];
if (impl.supportBitSuffle()) {
impl.bitShuffle(input, 0, 2, input.length * 2, output, 0);
} else {
arrayCopy(input, 0, input.length * 2, output, 0);
}
return output;
}
/**
* Apply a bit-shuffling filter into the input int array.
*
* @param input
* @return bit-shuffled byte array
* @throws IOException
*/
public static byte[] bitShuffle(int[] input) throws IOException {
byte[] output = new byte[input.length * 4];
if (impl.supportBitSuffle()) {
impl.bitShuffle(input, 0, 4, input.length * 4, output, 0);
} else {
arrayCopy(input, 0, input.length * 4, output, 0);
}
return output;
}
/**
* Apply a bit-shuffling filter into the input long array.
*
* @param input
* @return bit-shuffled byte array
* @throws IOException
*/
public static byte[] bitShuffle(long[] input) throws IOException {
byte[] output = new byte[input.length * 8];
if (impl.supportBitSuffle()) {
impl.bitShuffle(input, 0, 8, input.length * 8, output, 0);
} else {
arrayCopy(input, 0, input.length * 8, output, 0);
}
return output;
}
/**
* Apply a bit-shuffling filter into the input float array.
*
* @param input
* @return bit-shuffled byte array
* @throws IOException
*/
public static byte[] bitShuffle(float[] input) throws IOException {
byte[] output = new byte[input.length * 4];
if (impl.supportBitSuffle()) {
impl.bitShuffle(input, 0, 4, input.length * 4, output, 0);
} else {
arrayCopy(input, 0, input.length * 4, output, 0);
}
return output;
}
/**
* Apply a bit-shuffling filter into the input double array.
*
* @param input
* @return bit-shuffled byte array
* @throws IOException
*/
public static byte[] bitShuffle(double[] input) throws IOException {
byte[] output = new byte[input.length * 8];
if (impl.supportBitSuffle()) {
impl.bitShuffle(input, 0, 8, input.length * 8, output, 0);
} else {
arrayCopy(input, 0, input.length * 8, output, 0);
}
return output;
}
/**
* Convert the input bit-shuffled byte array into an original short array.
*
* @param input
* @return a short array
* @throws IOException
*/
public static short[] bitUnShuffleShortArray(byte[] input) throws IOException {
short[] output = new short[input.length / 2];
if (impl.supportBitSuffle()) {
impl.bitUnShuffle(input, 0, 2, input.length, output, 0);
} else {
arrayCopy(input, 0, input.length, output, 0);
}
return output;
}
/**
* Convert the input bit-shuffled byte array into an original int array.
*
* @param input
* @return an int array
* @throws IOException
*/
public static int[] bitUnShuffleIntArray(byte[] input) throws IOException {
int[] output = new int[input.length / 4];
if (impl.supportBitSuffle()) {
impl.bitUnShuffle(input, 0, 4, input.length, output, 0);
} else {
arrayCopy(input, 0, input.length, output, 0);
}
return output;
}
/**
* Convert the input bit-shuffled byte array into an original long array.
*
* @param input
* @return a long array
* @throws IOException
*/
public static long[] bitUnShuffleLongArray(byte[] input) throws IOException {
long[] output = new long[input.length / 8];
if (impl.supportBitSuffle()) {
impl.bitUnShuffle(input, 0, 8, input.length, output, 0);
} else {
arrayCopy(input, 0, input.length, output, 0);
}
return output;
}
/**
* Convert the input bit-shuffled byte array into an original float array.
*
* @param input
* @return an float array
* @throws IOException
*/
public static float[] bitUnShuffleFloatArray(byte[] input) throws IOException {
float[] output = new float[input.length / 4];
if (impl.supportBitSuffle()) {
impl.bitUnShuffle(input, 0, 4, input.length, output, 0);
} else {
arrayCopy(input, 0, input.length, output, 0);
}
return output;
}
/**
* Convert the input bit-shuffled byte array into an original double array.
*
* @param input
* @return a double array
* @throws IOException
*/
public static double[] bitUnShuffleDoubleArray(byte[] input) throws IOException {
double[] output = new double[input.length / 8];
if (impl.supportBitSuffle()) {
impl.bitUnShuffle(input, 0, 8, input.length, output, 0);
} else {
arrayCopy(input, 0, input.length, output, 0);
}
return output;
}
}

View File

@ -18,6 +18,11 @@
#include <snappy.h>
#include "SnappyNative.h"
#ifdef SNAPPY_BITSHUFFLE_ENABLED
#include <bitshuffle.h>
#include <stdint.h>
#endif
void throw_exception(JNIEnv *env, jobject self, int errorCode)
{
jclass c = env->FindClass("org/xerial/snappy/SnappyNative");
@ -170,6 +175,100 @@ JNIEXPORT jint JNICALL Java_org_xerial_snappy_SnappyNative_rawUncompress__Ljava_
/*
* Class: org_xerial_snappy_SnappyNative
* Method: supportBitSuffle
* Signature: ()Z
*/
JNIEXPORT jboolean JNICALL Java_org_xerial_snappy_SnappyNative_supportBitSuffle
(JNIEnv *, jobject) {
#ifdef SNAPPY_BITSHUFFLE_ENABLED
return (jboolean) true;
#else
return (jboolean) false;
#endif
}
/*
* Class: org_xerial_snappy_SnappyNative
* Method: bitShuffle
* Signature: (Ljava/lang/Object;IIILjava/lang/Object;I)I
*/
JNIEXPORT jint JNICALL Java_org_xerial_snappy_SnappyNative_bitShuffle
(JNIEnv * env, jobject self, jobject input, jint inputOffset, jint typeSize, jint length, jobject output, jint outputOffset)
{
#ifdef SNAPPY_BITSHUFFLE_ENABLED
char* in = (char*) env->GetPrimitiveArrayCritical((jarray) input, 0);
char* out = (char*) env->GetPrimitiveArrayCritical((jarray) output, 0);
if(in == 0 || out == 0) {
// out of memory
if(in != 0) {
env->ReleasePrimitiveArrayCritical((jarray) input, in, 0);
}
if(out != 0) {
env->ReleasePrimitiveArrayCritical((jarray) output, out, 0);
}
throw_exception(env, self, 4);
return 0;
}
int64_t processedBytes = bshuf_bitshuffle(
in + inputOffset, out + outputOffset, (size_t) (length / typeSize), (size_t) typeSize, 0);
env->ReleasePrimitiveArrayCritical((jarray) input, in, 0);
env->ReleasePrimitiveArrayCritical((jarray) output, out, 0);
return (jint) processedBytes;
#else
// Returns an error code for unsupported operations
throw_exception(env, self, 1);
return (jint) 0;
#endif
}
/*
* Class: org_xerial_snappy_SnappyNative
* Method: bitUnShuffle
* Signature: (Ljava/lang/Object;IIILjava/lang/Object;I)I
*/
JNIEXPORT jint JNICALL Java_org_xerial_snappy_SnappyNative_bitUnShuffle
(JNIEnv * env, jobject self, jobject input, jint inputOffset, jint typeSize, jint length, jobject output, jint outputOffset)
{
#ifdef SNAPPY_BITSHUFFLE_ENABLED
char* in = (char*) env->GetPrimitiveArrayCritical((jarray) input, 0);
char* out = (char*) env->GetPrimitiveArrayCritical((jarray) output, 0);
if(in == 0 || out == 0) {
// out of memory
if(in != 0) {
env->ReleasePrimitiveArrayCritical((jarray) input, in, 0);
}
if(out != 0) {
env->ReleasePrimitiveArrayCritical((jarray) output, out, 0);
}
throw_exception(env, self, 4);
return 0;
}
int64_t processedBytes = bshuf_bitunshuffle(
in + inputOffset, out + outputOffset, (size_t) (length / typeSize), (size_t) typeSize, 0);
env->ReleasePrimitiveArrayCritical((jarray) input, in, 0);
env->ReleasePrimitiveArrayCritical((jarray) output, out, 0);
return (jint) processedBytes;
#else
// Returns an error code for unsupported operations
throw_exception(env, self, 1);
return (jint) 0;
#endif
}
/*
* Class: org_xerial_snappy_Snappy
* Method: maxCompressedLength

24
src/main/java/org/xerial/snappy/SnappyNative.h Executable file → Normal file
View File

@ -63,6 +63,30 @@ JNIEXPORT jint JNICALL Java_org_xerial_snappy_SnappyNative_rawUncompress__Ljava_
JNIEXPORT jint JNICALL Java_org_xerial_snappy_SnappyNative_rawUncompress__Ljava_lang_Object_2IILjava_lang_Object_2I
(JNIEnv *, jobject, jobject, jint, jint, jobject, jint);
/*
* Class: org_xerial_snappy_SnappyNative
* Method: supportBitSuffle
* Signature: ()Z
*/
JNIEXPORT jboolean JNICALL Java_org_xerial_snappy_SnappyNative_supportBitSuffle
(JNIEnv *, jobject);
/*
* Class: org_xerial_snappy_SnappyNative
* Method: bitShuffle
* Signature: (Ljava/lang/Object;IIILjava/lang/Object;I)I
*/
JNIEXPORT jint JNICALL Java_org_xerial_snappy_SnappyNative_bitShuffle
(JNIEnv *, jobject, jobject, jint, jint, jint, jobject, jint);
/*
* Class: org_xerial_snappy_SnappyNative
* Method: bitUnShuffle
* Signature: (Ljava/lang/Object;IIILjava/lang/Object;I)I
*/
JNIEXPORT jint JNICALL Java_org_xerial_snappy_SnappyNative_bitUnShuffle
(JNIEnv *, jobject, jobject, jint, jint, jint, jobject, jint);
/*
* Class: org_xerial_snappy_SnappyNative
* Method: maxCompressedLength

View File

@ -66,6 +66,22 @@ public class SnappyNative
public native int rawUncompress(Object input, int inputOffset, int inputLength, Object output, int outputOffset)
throws IOException;
// ------------------------------------------------------------------------
// Bit-shuffling routines to improve compression of typed binary data.
// A quick benchmark result can be found in a gist below;
// https://gist.github.com/maropu/01103215df34b317a7a7
// ------------------------------------------------------------------------
// Returns true iff a native library loaded in JVMs can run bit-shuffling.
// Bit-shuffling is executable only in x86 environments that support SSE/AVX instructions.
public native boolean supportBitSuffle();
public native int bitShuffle(Object input, int inputOffset, int typeSize, int byteLength, Object output, int outputOffset)
throws IOException;
public native int bitUnShuffle(Object input, int inputOffset, int typeSize, int byteLength, Object output, int outputOffset)
throws IOException;
// Returns the maximal size of the compressed representation of
// input data that is "source_bytes" bytes in length;
public native int maxCompressedLength(int source_bytes);

View File

@ -329,4 +329,34 @@ public class SnappyTest
_logger.debug(e);
}
}
@Test
public void bitShuffleLongArray()
throws Exception
{
long[] data = new long[] {2, 3, 15, 4234, 43251531412342342L, 23423422342L};
byte[] shuffledData = Snappy.bitShuffle(data);
long[] result = Snappy.bitUnShuffleLongArray(shuffledData);
assertArrayEquals(data, result);
}
@Test
public void bitShuffleShortArray()
throws Exception
{
short[] data = new short[] {432, -32267, 1, 3, 34, 43, 34, Short.MAX_VALUE, -1};
byte[] shuffledData = Snappy.bitShuffle(data);
short[] result = Snappy.bitUnShuffleShortArray(shuffledData);
assertArrayEquals(data, result);
}
@Test
public void bitShuffleIntArray()
throws Exception
{
int[] data = new int[] {432, -32267, 1, 3, 34, 43, 34, Short.MAX_VALUE, -1, Integer.MAX_VALUE, 3424, 43};
byte[] shuffledData = Snappy.bitShuffle(data);
int[] result = Snappy.bitUnShuffleIntArray(shuffledData);
assertArrayEquals(data, result);
}
}