Implement bit-shuffling interfaces to improve LZ performance

This commit is contained in:
Takeshi YAMAMURO 2016-03-29 23:14:33 +09:00
parent 8c04209b55
commit 99b297cfcd
7 changed files with 400 additions and 17 deletions

View File

@ -11,13 +11,53 @@ SNAPPY_ARCHIVE:=$(TARGET)/snappy-$(VERSION).tar.gz
SNAPPY_CC:=snappy-sinksource.cc snappy-stubs-internal.cc snappy.cc SNAPPY_CC:=snappy-sinksource.cc snappy-stubs-internal.cc snappy.cc
SNAPPY_SRC_DIR:=$(TARGET)/snappy-$(VERSION) SNAPPY_SRC_DIR:=$(TARGET)/snappy-$(VERSION)
SNAPPY_SRC:=$(addprefix $(SNAPPY_SRC_DIR)/,$(SNAPPY_CC)) SNAPPY_SRC:=$(addprefix $(SNAPPY_SRC_DIR)/,$(SNAPPY_CC))
SNAPPY_OBJ:=$(addprefix $(SNAPPY_OUT)/,$(patsubst %.cc,%.o,$(SNAPPY_CC)) SnappyNative.o)
SNAPPY_GIT_UNPACKED:=$(TARGET)/snappy-git-extracted.log
SNAPPY_GIT_REPO_URL:=https://github.com/google/snappy SNAPPY_GIT_REPO_URL:=https://github.com/google/snappy
SNAPPY_GIT_REV:=2b9152d9c5bed71dffb7f7f6c7a3ec48b058ff2d # 1.1.3 with autogen.sh fix SNAPPY_GIT_REV:=2b9152d9c5bed71dffb7f7f6c7a3ec48b058ff2d # 1.1.3 with autogen.sh fix
SNAPPY_UNPACKED:=$(TARGET)/snappy-extracted.log
SNAPPY_GIT_UNPACKED:=$(TARGET)/snappy-git-extracted.log
ifdef ENABLE_BITSHUFFLE
# TODO: Upgrade to a stable release
BITSHUFFLE_VERSION:=55f9b4caec73fa21d13947cacea1295926781440
BITSHUFFLE_ARCHIVE:=$(TARGET)/bitshuffle-$(BITSHUFFLE_VERSION).tar.gz
BITSHUFFLE_C:=bitshuffle_core.c iochain.c
BITSHUFFLE_SRC_DIR:=$(TARGET)/bitshuffle-$(BITSHUFFLE_VERSION)/src
BITSHUFFLE_SRC:=$(addprefix $(BITSHUFFLE_SRC_DIR)/,$(BITSHUFFLE_C))
BITSHUFFLE_UNPACKED:=$(TARGET)/bitshuffle-extracted.log
endif
ifdef USE_GIT
ifndef GIT_REPO_URL
$(warning GIT_REPO_URL is not set when using git)
endif
ifndef GIT_SNAPPY_BRANCH
$(warning GIT_SNAPPY_BRANCH is not set when using git)
endif
endif
ifdef ENABLE_BITSHUFFLE
$(BITSHUFFLE_ARCHIVE):
@mkdir -p $(@D)
curl -L -o$@ https://github.com/kiyo-masui/bitshuffle/archive/$(BITSHUFFLE_VERSION).tar.gz
$(BITSHUFFLE_UNPACKED): $(BITSHUFFLE_ARCHIVE)
$(TAR) xvfz $< -C $(TARGET)
touch $@
$(BITSHUFFLE_SRC): $(BITSHUFFLE_UNPACKED)
$(SNAPPY_OUT)/%.o : $(BITSHUFFLE_SRC_DIR)/%.c
@mkdir -p $(@D)
$(CXX) $(CXXFLAGS) -c $< -o $@
SNAPPY_OBJ:=$(addprefix $(SNAPPY_OUT)/,$(patsubst %.cc,%.o,$(SNAPPY_CC)) $(patsubst %.c,%.o,$(BITSHUFFLE_C)) SnappyNative.o)
CXXFLAGS:=$(CXXFLAGS) -DSNAPPY_BITSHUFFLE_ENABLED -I$(SNAPPY_SRC_DIR) -I$(BITSHUFFLE_SRC_DIR)
else
SNAPPY_OBJ:=$(addprefix $(SNAPPY_OUT)/,$(patsubst %.cc,%.o,$(SNAPPY_CC)) SnappyNative.o)
CXXFLAGS:=$(CXXFLAGS) -I$(SNAPPY_SRC_DIR) CXXFLAGS:=$(CXXFLAGS) -I$(SNAPPY_SRC_DIR)
endif
ifeq ($(OS_NAME),SunOS) ifeq ($(OS_NAME),SunOS)
TAR:= gtar TAR:= gtar
@ -29,6 +69,11 @@ $(SNAPPY_ARCHIVE):
@mkdir -p $(@D) @mkdir -p $(@D)
curl -L -o$@ https://github.com/google/snappy/releases/download/$(VERSION)/snappy-$(VERSION).tar.gz curl -L -o$@ https://github.com/google/snappy/releases/download/$(VERSION)/snappy-$(VERSION).tar.gz
$(SNAPPY_UNPACKED): $(SNAPPY_ARCHIVE)
$(TAR) xvfz $< -C $(TARGET)
touch $@
cd $(SNAPPY_SRC_DIR) && ./configure
$(SNAPPY_GIT_UNPACKED): $(SNAPPY_GIT_UNPACKED):
rm -rf $(SNAPPY_SRC_DIR) rm -rf $(SNAPPY_SRC_DIR)
@mkdir -p $(SNAPPY_SRC_DIR) @mkdir -p $(SNAPPY_SRC_DIR)
@ -146,4 +191,3 @@ $(TARGET)/snappy-java-$(VERSION)-src.upload:
./googlecode_upload.py -s "source code archive" -p snappy-java -l "Type-Source,OpSys-All" -u "$(GOOGLECODE_USER)" target/snappy-java-$(VERSION).tar.gz ./googlecode_upload.py -s "source code archive" -p snappy-java -l "Type-Source,OpSys-All" -u "$(GOOGLECODE_USER)" target/snappy-java-$(VERSION).tar.gz
touch $@ touch $@

View File

@ -903,4 +903,174 @@ public class Snappy
byte[] uncompressed = uncompress(input); byte[] uncompressed = uncompress(input);
return new String(uncompressed, encoding); return new String(uncompressed, encoding);
} }
/**
* Apply a bit-shuffling filter into the input short array.
*
* @param input
* @return bit-shuffled byte array
* @throws IOException
*/
public static byte[] bitShuffle(short[] input) throws IOException {
byte[] output = new byte[input.length * 2];
if (impl.supportBitSuffle()) {
impl.bitShuffle(input, 0, 2, input.length * 2, output, 0);
} else {
arrayCopy(input, 0, input.length * 2, output, 0);
}
return output;
}
/**
* Apply a bit-shuffling filter into the input int array.
*
* @param input
* @return bit-shuffled byte array
* @throws IOException
*/
public static byte[] bitShuffle(int[] input) throws IOException {
byte[] output = new byte[input.length * 4];
if (impl.supportBitSuffle()) {
impl.bitShuffle(input, 0, 4, input.length * 4, output, 0);
} else {
arrayCopy(input, 0, input.length * 4, output, 0);
}
return output;
}
/**
* Apply a bit-shuffling filter into the input long array.
*
* @param input
* @return bit-shuffled byte array
* @throws IOException
*/
public static byte[] bitShuffle(long[] input) throws IOException {
byte[] output = new byte[input.length * 8];
if (impl.supportBitSuffle()) {
impl.bitShuffle(input, 0, 8, input.length * 8, output, 0);
} else {
arrayCopy(input, 0, input.length * 8, output, 0);
}
return output;
}
/**
* Apply a bit-shuffling filter into the input float array.
*
* @param input
* @return bit-shuffled byte array
* @throws IOException
*/
public static byte[] bitShuffle(float[] input) throws IOException {
byte[] output = new byte[input.length * 4];
if (impl.supportBitSuffle()) {
impl.bitShuffle(input, 0, 4, input.length * 4, output, 0);
} else {
arrayCopy(input, 0, input.length * 4, output, 0);
}
return output;
}
/**
* Apply a bit-shuffling filter into the input double array.
*
* @param input
* @return bit-shuffled byte array
* @throws IOException
*/
public static byte[] bitShuffle(double[] input) throws IOException {
byte[] output = new byte[input.length * 8];
if (impl.supportBitSuffle()) {
impl.bitShuffle(input, 0, 8, input.length * 8, output, 0);
} else {
arrayCopy(input, 0, input.length * 8, output, 0);
}
return output;
}
/**
* Convert the input bit-shuffled byte array into an original short array.
*
* @param input
* @return a short array
* @throws IOException
*/
public static short[] bitUnShuffleShortArray(byte[] input) throws IOException {
short[] output = new short[input.length / 2];
if (impl.supportBitSuffle()) {
impl.bitUnShuffle(input, 0, 2, input.length, output, 0);
} else {
arrayCopy(input, 0, input.length, output, 0);
}
return output;
}
/**
* Convert the input bit-shuffled byte array into an original int array.
*
* @param input
* @return an int array
* @throws IOException
*/
public static int[] bitUnShuffleIntArray(byte[] input) throws IOException {
int[] output = new int[input.length / 4];
if (impl.supportBitSuffle()) {
impl.bitUnShuffle(input, 0, 4, input.length, output, 0);
} else {
arrayCopy(input, 0, input.length, output, 0);
}
return output;
}
/**
* Convert the input bit-shuffled byte array into an original long array.
*
* @param input
* @return a long array
* @throws IOException
*/
public static long[] bitUnShuffleLongArray(byte[] input) throws IOException {
long[] output = new long[input.length / 8];
if (impl.supportBitSuffle()) {
impl.bitUnShuffle(input, 0, 8, input.length, output, 0);
} else {
arrayCopy(input, 0, input.length, output, 0);
}
return output;
}
/**
* Convert the input bit-shuffled byte array into an original float array.
*
* @param input
* @return an float array
* @throws IOException
*/
public static float[] bitUnShuffleFloatArray(byte[] input) throws IOException {
float[] output = new float[input.length / 4];
if (impl.supportBitSuffle()) {
impl.bitUnShuffle(input, 0, 4, input.length, output, 0);
} else {
arrayCopy(input, 0, input.length, output, 0);
}
return output;
}
/**
* Convert the input bit-shuffled byte array into an original double array.
*
* @param input
* @return a double array
* @throws IOException
*/
public static double[] bitUnShuffleDoubleArray(byte[] input) throws IOException {
double[] output = new double[input.length / 8];
if (impl.supportBitSuffle()) {
impl.bitUnShuffle(input, 0, 8, input.length, output, 0);
} else {
arrayCopy(input, 0, input.length, output, 0);
}
return output;
}
} }

View File

@ -18,6 +18,11 @@
#include <snappy.h> #include <snappy.h>
#include "SnappyNative.h" #include "SnappyNative.h"
#ifdef SNAPPY_BITSHUFFLE_ENABLED
#include <bitshuffle.h>
#include <stdint.h>
#endif
void throw_exception(JNIEnv *env, jobject self, int errorCode) void throw_exception(JNIEnv *env, jobject self, int errorCode)
{ {
jclass c = env->FindClass("org/xerial/snappy/SnappyNative"); jclass c = env->FindClass("org/xerial/snappy/SnappyNative");
@ -170,6 +175,100 @@ JNIEXPORT jint JNICALL Java_org_xerial_snappy_SnappyNative_rawUncompress__Ljava_
/*
* Class: org_xerial_snappy_SnappyNative
* Method: supportBitSuffle
* Signature: ()Z
*/
JNIEXPORT jboolean JNICALL Java_org_xerial_snappy_SnappyNative_supportBitSuffle
(JNIEnv *, jobject) {
#ifdef SNAPPY_BITSHUFFLE_ENABLED
return (jboolean) true;
#else
return (jboolean) false;
#endif
}
/*
* Class: org_xerial_snappy_SnappyNative
* Method: bitShuffle
* Signature: (Ljava/lang/Object;IIILjava/lang/Object;I)I
*/
JNIEXPORT jint JNICALL Java_org_xerial_snappy_SnappyNative_bitShuffle
(JNIEnv * env, jobject self, jobject input, jint inputOffset, jint typeSize, jint length, jobject output, jint outputOffset)
{
#ifdef SNAPPY_BITSHUFFLE_ENABLED
char* in = (char*) env->GetPrimitiveArrayCritical((jarray) input, 0);
char* out = (char*) env->GetPrimitiveArrayCritical((jarray) output, 0);
if(in == 0 || out == 0) {
// out of memory
if(in != 0) {
env->ReleasePrimitiveArrayCritical((jarray) input, in, 0);
}
if(out != 0) {
env->ReleasePrimitiveArrayCritical((jarray) output, out, 0);
}
throw_exception(env, self, 4);
return 0;
}
int64_t processedBytes = bshuf_bitshuffle(
in + inputOffset, out + outputOffset, (size_t) (length / typeSize), (size_t) typeSize, 0);
env->ReleasePrimitiveArrayCritical((jarray) input, in, 0);
env->ReleasePrimitiveArrayCritical((jarray) output, out, 0);
return (jint) processedBytes;
#else
// Returns an error code for unsupported operations
throw_exception(env, self, 1);
return (jint) 0;
#endif
}
/*
* Class: org_xerial_snappy_SnappyNative
* Method: bitUnShuffle
* Signature: (Ljava/lang/Object;IIILjava/lang/Object;I)I
*/
JNIEXPORT jint JNICALL Java_org_xerial_snappy_SnappyNative_bitUnShuffle
(JNIEnv * env, jobject self, jobject input, jint inputOffset, jint typeSize, jint length, jobject output, jint outputOffset)
{
#ifdef SNAPPY_BITSHUFFLE_ENABLED
char* in = (char*) env->GetPrimitiveArrayCritical((jarray) input, 0);
char* out = (char*) env->GetPrimitiveArrayCritical((jarray) output, 0);
if(in == 0 || out == 0) {
// out of memory
if(in != 0) {
env->ReleasePrimitiveArrayCritical((jarray) input, in, 0);
}
if(out != 0) {
env->ReleasePrimitiveArrayCritical((jarray) output, out, 0);
}
throw_exception(env, self, 4);
return 0;
}
int64_t processedBytes = bshuf_bitunshuffle(
in + inputOffset, out + outputOffset, (size_t) (length / typeSize), (size_t) typeSize, 0);
env->ReleasePrimitiveArrayCritical((jarray) input, in, 0);
env->ReleasePrimitiveArrayCritical((jarray) output, out, 0);
return (jint) processedBytes;
#else
// Returns an error code for unsupported operations
throw_exception(env, self, 1);
return (jint) 0;
#endif
}
/* /*
* Class: org_xerial_snappy_Snappy * Class: org_xerial_snappy_Snappy
* Method: maxCompressedLength * Method: maxCompressedLength

24
src/main/java/org/xerial/snappy/SnappyNative.h Executable file → Normal file
View File

@ -63,6 +63,30 @@ JNIEXPORT jint JNICALL Java_org_xerial_snappy_SnappyNative_rawUncompress__Ljava_
JNIEXPORT jint JNICALL Java_org_xerial_snappy_SnappyNative_rawUncompress__Ljava_lang_Object_2IILjava_lang_Object_2I JNIEXPORT jint JNICALL Java_org_xerial_snappy_SnappyNative_rawUncompress__Ljava_lang_Object_2IILjava_lang_Object_2I
(JNIEnv *, jobject, jobject, jint, jint, jobject, jint); (JNIEnv *, jobject, jobject, jint, jint, jobject, jint);
/*
* Class: org_xerial_snappy_SnappyNative
* Method: supportBitSuffle
* Signature: ()Z
*/
JNIEXPORT jboolean JNICALL Java_org_xerial_snappy_SnappyNative_supportBitSuffle
(JNIEnv *, jobject);
/*
* Class: org_xerial_snappy_SnappyNative
* Method: bitShuffle
* Signature: (Ljava/lang/Object;IIILjava/lang/Object;I)I
*/
JNIEXPORT jint JNICALL Java_org_xerial_snappy_SnappyNative_bitShuffle
(JNIEnv *, jobject, jobject, jint, jint, jint, jobject, jint);
/*
* Class: org_xerial_snappy_SnappyNative
* Method: bitUnShuffle
* Signature: (Ljava/lang/Object;IIILjava/lang/Object;I)I
*/
JNIEXPORT jint JNICALL Java_org_xerial_snappy_SnappyNative_bitUnShuffle
(JNIEnv *, jobject, jobject, jint, jint, jint, jobject, jint);
/* /*
* Class: org_xerial_snappy_SnappyNative * Class: org_xerial_snappy_SnappyNative
* Method: maxCompressedLength * Method: maxCompressedLength

View File

@ -66,6 +66,22 @@ public class SnappyNative
public native int rawUncompress(Object input, int inputOffset, int inputLength, Object output, int outputOffset) public native int rawUncompress(Object input, int inputOffset, int inputLength, Object output, int outputOffset)
throws IOException; throws IOException;
// ------------------------------------------------------------------------
// Bit-shuffling routines to improve compression of typed binary data.
// A quick benchmark result can be found in a gist below;
// https://gist.github.com/maropu/01103215df34b317a7a7
// ------------------------------------------------------------------------
// Returns true iff a native library loaded in JVMs can run bit-shuffling.
// Bit-shuffling is executable only in x86 environments that support SSE/AVX instructions.
public native boolean supportBitSuffle();
public native int bitShuffle(Object input, int inputOffset, int typeSize, int byteLength, Object output, int outputOffset)
throws IOException;
public native int bitUnShuffle(Object input, int inputOffset, int typeSize, int byteLength, Object output, int outputOffset)
throws IOException;
// Returns the maximal size of the compressed representation of // Returns the maximal size of the compressed representation of
// input data that is "source_bytes" bytes in length; // input data that is "source_bytes" bytes in length;
public native int maxCompressedLength(int source_bytes); public native int maxCompressedLength(int source_bytes);

View File

@ -329,4 +329,34 @@ public class SnappyTest
_logger.debug(e); _logger.debug(e);
} }
} }
@Test
public void bitShuffleLongArray()
throws Exception
{
long[] data = new long[] {2, 3, 15, 4234, 43251531412342342L, 23423422342L};
byte[] shuffledData = Snappy.bitShuffle(data);
long[] result = Snappy.bitUnShuffleLongArray(shuffledData);
assertArrayEquals(data, result);
}
@Test
public void bitShuffleShortArray()
throws Exception
{
short[] data = new short[] {432, -32267, 1, 3, 34, 43, 34, Short.MAX_VALUE, -1};
byte[] shuffledData = Snappy.bitShuffle(data);
short[] result = Snappy.bitUnShuffleShortArray(shuffledData);
assertArrayEquals(data, result);
}
@Test
public void bitShuffleIntArray()
throws Exception
{
int[] data = new int[] {432, -32267, 1, 3, 34, 43, 34, Short.MAX_VALUE, -1, Integer.MAX_VALUE, 3424, 43};
byte[] shuffledData = Snappy.bitShuffle(data);
int[] result = Snappy.bitUnShuffleIntArray(shuffledData);
assertArrayEquals(data, result);
}
} }