Implement bit-shuffling interfaces to improve LZ performance
This commit is contained in:
parent
8c04209b55
commit
99b297cfcd
52
Makefile
52
Makefile
|
@ -11,13 +11,53 @@ SNAPPY_ARCHIVE:=$(TARGET)/snappy-$(VERSION).tar.gz
|
|||
SNAPPY_CC:=snappy-sinksource.cc snappy-stubs-internal.cc snappy.cc
|
||||
SNAPPY_SRC_DIR:=$(TARGET)/snappy-$(VERSION)
|
||||
SNAPPY_SRC:=$(addprefix $(SNAPPY_SRC_DIR)/,$(SNAPPY_CC))
|
||||
SNAPPY_OBJ:=$(addprefix $(SNAPPY_OUT)/,$(patsubst %.cc,%.o,$(SNAPPY_CC)) SnappyNative.o)
|
||||
|
||||
SNAPPY_GIT_UNPACKED:=$(TARGET)/snappy-git-extracted.log
|
||||
SNAPPY_GIT_REPO_URL:=https://github.com/google/snappy
|
||||
SNAPPY_GIT_REV:=2b9152d9c5bed71dffb7f7f6c7a3ec48b058ff2d # 1.1.3 with autogen.sh fix
|
||||
SNAPPY_UNPACKED:=$(TARGET)/snappy-extracted.log
|
||||
SNAPPY_GIT_UNPACKED:=$(TARGET)/snappy-git-extracted.log
|
||||
|
||||
ifdef ENABLE_BITSHUFFLE
|
||||
# TODO: Upgrade to a stable release
|
||||
BITSHUFFLE_VERSION:=55f9b4caec73fa21d13947cacea1295926781440
|
||||
BITSHUFFLE_ARCHIVE:=$(TARGET)/bitshuffle-$(BITSHUFFLE_VERSION).tar.gz
|
||||
BITSHUFFLE_C:=bitshuffle_core.c iochain.c
|
||||
BITSHUFFLE_SRC_DIR:=$(TARGET)/bitshuffle-$(BITSHUFFLE_VERSION)/src
|
||||
BITSHUFFLE_SRC:=$(addprefix $(BITSHUFFLE_SRC_DIR)/,$(BITSHUFFLE_C))
|
||||
BITSHUFFLE_UNPACKED:=$(TARGET)/bitshuffle-extracted.log
|
||||
endif
|
||||
|
||||
ifdef USE_GIT
|
||||
ifndef GIT_REPO_URL
|
||||
$(warning GIT_REPO_URL is not set when using git)
|
||||
endif
|
||||
ifndef GIT_SNAPPY_BRANCH
|
||||
$(warning GIT_SNAPPY_BRANCH is not set when using git)
|
||||
endif
|
||||
endif
|
||||
|
||||
ifdef ENABLE_BITSHUFFLE
|
||||
$(BITSHUFFLE_ARCHIVE):
|
||||
@mkdir -p $(@D)
|
||||
curl -L -o$@ https://github.com/kiyo-masui/bitshuffle/archive/$(BITSHUFFLE_VERSION).tar.gz
|
||||
|
||||
$(BITSHUFFLE_UNPACKED): $(BITSHUFFLE_ARCHIVE)
|
||||
$(TAR) xvfz $< -C $(TARGET)
|
||||
touch $@
|
||||
|
||||
$(BITSHUFFLE_SRC): $(BITSHUFFLE_UNPACKED)
|
||||
|
||||
$(SNAPPY_OUT)/%.o : $(BITSHUFFLE_SRC_DIR)/%.c
|
||||
@mkdir -p $(@D)
|
||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||
|
||||
SNAPPY_OBJ:=$(addprefix $(SNAPPY_OUT)/,$(patsubst %.cc,%.o,$(SNAPPY_CC)) $(patsubst %.c,%.o,$(BITSHUFFLE_C)) SnappyNative.o)
|
||||
|
||||
CXXFLAGS:=$(CXXFLAGS) -DSNAPPY_BITSHUFFLE_ENABLED -I$(SNAPPY_SRC_DIR) -I$(BITSHUFFLE_SRC_DIR)
|
||||
else
|
||||
SNAPPY_OBJ:=$(addprefix $(SNAPPY_OUT)/,$(patsubst %.cc,%.o,$(SNAPPY_CC)) SnappyNative.o)
|
||||
|
||||
CXXFLAGS:=$(CXXFLAGS) -I$(SNAPPY_SRC_DIR)
|
||||
endif
|
||||
|
||||
ifeq ($(OS_NAME),SunOS)
|
||||
TAR:= gtar
|
||||
|
@ -29,6 +69,11 @@ $(SNAPPY_ARCHIVE):
|
|||
@mkdir -p $(@D)
|
||||
curl -L -o$@ https://github.com/google/snappy/releases/download/$(VERSION)/snappy-$(VERSION).tar.gz
|
||||
|
||||
$(SNAPPY_UNPACKED): $(SNAPPY_ARCHIVE)
|
||||
$(TAR) xvfz $< -C $(TARGET)
|
||||
touch $@
|
||||
cd $(SNAPPY_SRC_DIR) && ./configure
|
||||
|
||||
$(SNAPPY_GIT_UNPACKED):
|
||||
rm -rf $(SNAPPY_SRC_DIR)
|
||||
@mkdir -p $(SNAPPY_SRC_DIR)
|
||||
|
@ -146,4 +191,3 @@ $(TARGET)/snappy-java-$(VERSION)-src.upload:
|
|||
./googlecode_upload.py -s "source code archive" -p snappy-java -l "Type-Source,OpSys-All" -u "$(GOOGLECODE_USER)" target/snappy-java-$(VERSION).tar.gz
|
||||
touch $@
|
||||
|
||||
|
||||
|
|
|
@ -903,4 +903,174 @@ public class Snappy
|
|||
byte[] uncompressed = uncompress(input);
|
||||
return new String(uncompressed, encoding);
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply a bit-shuffling filter into the input short array.
|
||||
*
|
||||
* @param input
|
||||
* @return bit-shuffled byte array
|
||||
* @throws IOException
|
||||
*/
|
||||
public static byte[] bitShuffle(short[] input) throws IOException {
|
||||
byte[] output = new byte[input.length * 2];
|
||||
if (impl.supportBitSuffle()) {
|
||||
impl.bitShuffle(input, 0, 2, input.length * 2, output, 0);
|
||||
} else {
|
||||
arrayCopy(input, 0, input.length * 2, output, 0);
|
||||
}
|
||||
return output;
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply a bit-shuffling filter into the input int array.
|
||||
*
|
||||
* @param input
|
||||
* @return bit-shuffled byte array
|
||||
* @throws IOException
|
||||
*/
|
||||
public static byte[] bitShuffle(int[] input) throws IOException {
|
||||
byte[] output = new byte[input.length * 4];
|
||||
if (impl.supportBitSuffle()) {
|
||||
impl.bitShuffle(input, 0, 4, input.length * 4, output, 0);
|
||||
} else {
|
||||
arrayCopy(input, 0, input.length * 4, output, 0);
|
||||
}
|
||||
return output;
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply a bit-shuffling filter into the input long array.
|
||||
*
|
||||
* @param input
|
||||
* @return bit-shuffled byte array
|
||||
* @throws IOException
|
||||
*/
|
||||
public static byte[] bitShuffle(long[] input) throws IOException {
|
||||
byte[] output = new byte[input.length * 8];
|
||||
if (impl.supportBitSuffle()) {
|
||||
impl.bitShuffle(input, 0, 8, input.length * 8, output, 0);
|
||||
} else {
|
||||
arrayCopy(input, 0, input.length * 8, output, 0);
|
||||
}
|
||||
return output;
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply a bit-shuffling filter into the input float array.
|
||||
*
|
||||
* @param input
|
||||
* @return bit-shuffled byte array
|
||||
* @throws IOException
|
||||
*/
|
||||
public static byte[] bitShuffle(float[] input) throws IOException {
|
||||
byte[] output = new byte[input.length * 4];
|
||||
if (impl.supportBitSuffle()) {
|
||||
impl.bitShuffle(input, 0, 4, input.length * 4, output, 0);
|
||||
} else {
|
||||
arrayCopy(input, 0, input.length * 4, output, 0);
|
||||
}
|
||||
return output;
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply a bit-shuffling filter into the input double array.
|
||||
*
|
||||
* @param input
|
||||
* @return bit-shuffled byte array
|
||||
* @throws IOException
|
||||
*/
|
||||
public static byte[] bitShuffle(double[] input) throws IOException {
|
||||
byte[] output = new byte[input.length * 8];
|
||||
if (impl.supportBitSuffle()) {
|
||||
impl.bitShuffle(input, 0, 8, input.length * 8, output, 0);
|
||||
} else {
|
||||
arrayCopy(input, 0, input.length * 8, output, 0);
|
||||
}
|
||||
return output;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert the input bit-shuffled byte array into an original short array.
|
||||
*
|
||||
* @param input
|
||||
* @return a short array
|
||||
* @throws IOException
|
||||
*/
|
||||
public static short[] bitUnShuffleShortArray(byte[] input) throws IOException {
|
||||
short[] output = new short[input.length / 2];
|
||||
if (impl.supportBitSuffle()) {
|
||||
impl.bitUnShuffle(input, 0, 2, input.length, output, 0);
|
||||
} else {
|
||||
arrayCopy(input, 0, input.length, output, 0);
|
||||
}
|
||||
return output;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert the input bit-shuffled byte array into an original int array.
|
||||
*
|
||||
* @param input
|
||||
* @return an int array
|
||||
* @throws IOException
|
||||
*/
|
||||
public static int[] bitUnShuffleIntArray(byte[] input) throws IOException {
|
||||
int[] output = new int[input.length / 4];
|
||||
if (impl.supportBitSuffle()) {
|
||||
impl.bitUnShuffle(input, 0, 4, input.length, output, 0);
|
||||
} else {
|
||||
arrayCopy(input, 0, input.length, output, 0);
|
||||
}
|
||||
return output;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert the input bit-shuffled byte array into an original long array.
|
||||
*
|
||||
* @param input
|
||||
* @return a long array
|
||||
* @throws IOException
|
||||
*/
|
||||
public static long[] bitUnShuffleLongArray(byte[] input) throws IOException {
|
||||
long[] output = new long[input.length / 8];
|
||||
if (impl.supportBitSuffle()) {
|
||||
impl.bitUnShuffle(input, 0, 8, input.length, output, 0);
|
||||
} else {
|
||||
arrayCopy(input, 0, input.length, output, 0);
|
||||
}
|
||||
return output;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert the input bit-shuffled byte array into an original float array.
|
||||
*
|
||||
* @param input
|
||||
* @return an float array
|
||||
* @throws IOException
|
||||
*/
|
||||
public static float[] bitUnShuffleFloatArray(byte[] input) throws IOException {
|
||||
float[] output = new float[input.length / 4];
|
||||
if (impl.supportBitSuffle()) {
|
||||
impl.bitUnShuffle(input, 0, 4, input.length, output, 0);
|
||||
} else {
|
||||
arrayCopy(input, 0, input.length, output, 0);
|
||||
}
|
||||
return output;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert the input bit-shuffled byte array into an original double array.
|
||||
*
|
||||
* @param input
|
||||
* @return a double array
|
||||
* @throws IOException
|
||||
*/
|
||||
public static double[] bitUnShuffleDoubleArray(byte[] input) throws IOException {
|
||||
double[] output = new double[input.length / 8];
|
||||
if (impl.supportBitSuffle()) {
|
||||
impl.bitUnShuffle(input, 0, 8, input.length, output, 0);
|
||||
} else {
|
||||
arrayCopy(input, 0, input.length, output, 0);
|
||||
}
|
||||
return output;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,6 +18,11 @@
|
|||
#include <snappy.h>
|
||||
#include "SnappyNative.h"
|
||||
|
||||
#ifdef SNAPPY_BITSHUFFLE_ENABLED
|
||||
#include <bitshuffle.h>
|
||||
#include <stdint.h>
|
||||
#endif
|
||||
|
||||
void throw_exception(JNIEnv *env, jobject self, int errorCode)
|
||||
{
|
||||
jclass c = env->FindClass("org/xerial/snappy/SnappyNative");
|
||||
|
@ -170,6 +175,100 @@ JNIEXPORT jint JNICALL Java_org_xerial_snappy_SnappyNative_rawUncompress__Ljava_
|
|||
|
||||
|
||||
|
||||
/*
|
||||
* Class: org_xerial_snappy_SnappyNative
|
||||
* Method: supportBitSuffle
|
||||
* Signature: ()Z
|
||||
*/
|
||||
JNIEXPORT jboolean JNICALL Java_org_xerial_snappy_SnappyNative_supportBitSuffle
|
||||
(JNIEnv *, jobject) {
|
||||
#ifdef SNAPPY_BITSHUFFLE_ENABLED
|
||||
return (jboolean) true;
|
||||
#else
|
||||
return (jboolean) false;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Class: org_xerial_snappy_SnappyNative
|
||||
* Method: bitShuffle
|
||||
* Signature: (Ljava/lang/Object;IIILjava/lang/Object;I)I
|
||||
*/
|
||||
JNIEXPORT jint JNICALL Java_org_xerial_snappy_SnappyNative_bitShuffle
|
||||
(JNIEnv * env, jobject self, jobject input, jint inputOffset, jint typeSize, jint length, jobject output, jint outputOffset)
|
||||
{
|
||||
#ifdef SNAPPY_BITSHUFFLE_ENABLED
|
||||
char* in = (char*) env->GetPrimitiveArrayCritical((jarray) input, 0);
|
||||
char* out = (char*) env->GetPrimitiveArrayCritical((jarray) output, 0);
|
||||
if(in == 0 || out == 0) {
|
||||
// out of memory
|
||||
if(in != 0) {
|
||||
env->ReleasePrimitiveArrayCritical((jarray) input, in, 0);
|
||||
}
|
||||
if(out != 0) {
|
||||
env->ReleasePrimitiveArrayCritical((jarray) output, out, 0);
|
||||
}
|
||||
throw_exception(env, self, 4);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int64_t processedBytes = bshuf_bitshuffle(
|
||||
in + inputOffset, out + outputOffset, (size_t) (length / typeSize), (size_t) typeSize, 0);
|
||||
|
||||
env->ReleasePrimitiveArrayCritical((jarray) input, in, 0);
|
||||
env->ReleasePrimitiveArrayCritical((jarray) output, out, 0);
|
||||
|
||||
return (jint) processedBytes;
|
||||
#else
|
||||
// Returns an error code for unsupported operations
|
||||
throw_exception(env, self, 1);
|
||||
return (jint) 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Class: org_xerial_snappy_SnappyNative
|
||||
* Method: bitUnShuffle
|
||||
* Signature: (Ljava/lang/Object;IIILjava/lang/Object;I)I
|
||||
*/
|
||||
JNIEXPORT jint JNICALL Java_org_xerial_snappy_SnappyNative_bitUnShuffle
|
||||
(JNIEnv * env, jobject self, jobject input, jint inputOffset, jint typeSize, jint length, jobject output, jint outputOffset)
|
||||
{
|
||||
#ifdef SNAPPY_BITSHUFFLE_ENABLED
|
||||
char* in = (char*) env->GetPrimitiveArrayCritical((jarray) input, 0);
|
||||
char* out = (char*) env->GetPrimitiveArrayCritical((jarray) output, 0);
|
||||
if(in == 0 || out == 0) {
|
||||
// out of memory
|
||||
if(in != 0) {
|
||||
env->ReleasePrimitiveArrayCritical((jarray) input, in, 0);
|
||||
}
|
||||
if(out != 0) {
|
||||
env->ReleasePrimitiveArrayCritical((jarray) output, out, 0);
|
||||
}
|
||||
throw_exception(env, self, 4);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int64_t processedBytes = bshuf_bitunshuffle(
|
||||
in + inputOffset, out + outputOffset, (size_t) (length / typeSize), (size_t) typeSize, 0);
|
||||
|
||||
env->ReleasePrimitiveArrayCritical((jarray) input, in, 0);
|
||||
env->ReleasePrimitiveArrayCritical((jarray) output, out, 0);
|
||||
|
||||
return (jint) processedBytes;
|
||||
#else
|
||||
// Returns an error code for unsupported operations
|
||||
throw_exception(env, self, 1);
|
||||
return (jint) 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Class: org_xerial_snappy_Snappy
|
||||
* Method: maxCompressedLength
|
||||
|
|
|
@ -63,6 +63,30 @@ JNIEXPORT jint JNICALL Java_org_xerial_snappy_SnappyNative_rawUncompress__Ljava_
|
|||
JNIEXPORT jint JNICALL Java_org_xerial_snappy_SnappyNative_rawUncompress__Ljava_lang_Object_2IILjava_lang_Object_2I
|
||||
(JNIEnv *, jobject, jobject, jint, jint, jobject, jint);
|
||||
|
||||
/*
|
||||
* Class: org_xerial_snappy_SnappyNative
|
||||
* Method: supportBitSuffle
|
||||
* Signature: ()Z
|
||||
*/
|
||||
JNIEXPORT jboolean JNICALL Java_org_xerial_snappy_SnappyNative_supportBitSuffle
|
||||
(JNIEnv *, jobject);
|
||||
|
||||
/*
|
||||
* Class: org_xerial_snappy_SnappyNative
|
||||
* Method: bitShuffle
|
||||
* Signature: (Ljava/lang/Object;IIILjava/lang/Object;I)I
|
||||
*/
|
||||
JNIEXPORT jint JNICALL Java_org_xerial_snappy_SnappyNative_bitShuffle
|
||||
(JNIEnv *, jobject, jobject, jint, jint, jint, jobject, jint);
|
||||
|
||||
/*
|
||||
* Class: org_xerial_snappy_SnappyNative
|
||||
* Method: bitUnShuffle
|
||||
* Signature: (Ljava/lang/Object;IIILjava/lang/Object;I)I
|
||||
*/
|
||||
JNIEXPORT jint JNICALL Java_org_xerial_snappy_SnappyNative_bitUnShuffle
|
||||
(JNIEnv *, jobject, jobject, jint, jint, jint, jobject, jint);
|
||||
|
||||
/*
|
||||
* Class: org_xerial_snappy_SnappyNative
|
||||
* Method: maxCompressedLength
|
||||
|
|
|
@ -66,6 +66,22 @@ public class SnappyNative
|
|||
public native int rawUncompress(Object input, int inputOffset, int inputLength, Object output, int outputOffset)
|
||||
throws IOException;
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// Bit-shuffling routines to improve compression of typed binary data.
|
||||
// A quick benchmark result can be found in a gist below;
|
||||
// https://gist.github.com/maropu/01103215df34b317a7a7
|
||||
// ------------------------------------------------------------------------
|
||||
|
||||
// Returns true iff a native library loaded in JVMs can run bit-shuffling.
|
||||
// Bit-shuffling is executable only in x86 environments that support SSE/AVX instructions.
|
||||
public native boolean supportBitSuffle();
|
||||
|
||||
public native int bitShuffle(Object input, int inputOffset, int typeSize, int byteLength, Object output, int outputOffset)
|
||||
throws IOException;
|
||||
|
||||
public native int bitUnShuffle(Object input, int inputOffset, int typeSize, int byteLength, Object output, int outputOffset)
|
||||
throws IOException;
|
||||
|
||||
// Returns the maximal size of the compressed representation of
|
||||
// input data that is "source_bytes" bytes in length;
|
||||
public native int maxCompressedLength(int source_bytes);
|
||||
|
|
Binary file not shown.
|
@ -329,4 +329,34 @@ public class SnappyTest
|
|||
_logger.debug(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void bitShuffleLongArray()
|
||||
throws Exception
|
||||
{
|
||||
long[] data = new long[] {2, 3, 15, 4234, 43251531412342342L, 23423422342L};
|
||||
byte[] shuffledData = Snappy.bitShuffle(data);
|
||||
long[] result = Snappy.bitUnShuffleLongArray(shuffledData);
|
||||
assertArrayEquals(data, result);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void bitShuffleShortArray()
|
||||
throws Exception
|
||||
{
|
||||
short[] data = new short[] {432, -32267, 1, 3, 34, 43, 34, Short.MAX_VALUE, -1};
|
||||
byte[] shuffledData = Snappy.bitShuffle(data);
|
||||
short[] result = Snappy.bitUnShuffleShortArray(shuffledData);
|
||||
assertArrayEquals(data, result);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void bitShuffleIntArray()
|
||||
throws Exception
|
||||
{
|
||||
int[] data = new int[] {432, -32267, 1, 3, 34, 43, 34, Short.MAX_VALUE, -1, Integer.MAX_VALUE, 3424, 43};
|
||||
byte[] shuffledData = Snappy.bitShuffle(data);
|
||||
int[] result = Snappy.bitUnShuffleIntArray(shuffledData);
|
||||
assertArrayEquals(data, result);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue