#82: Improving SnappyOutputStream performance by reducing the numbe of data copies

This commit is contained in:
Taro L. Saito 2014-07-18 18:05:51 +09:00
parent 5b5a3a379d
commit f695e84cac
3 changed files with 125 additions and 111 deletions

View File

@ -50,7 +50,8 @@ import java.util.Arrays;
public class SnappyCodec
{
public static final byte[] MAGIC_HEADER = new byte[] { -126, 'S', 'N', 'A', 'P', 'P', 'Y', 0 };
public static final int MAGIC_LEN = 8;
public static final int MAGIC_LEN = MAGIC_HEADER.length;
public static final int HEADER_SIZE = MAGIC_LEN + 8;
public static final int DEFAULT_VERSION = 1;
public static final int MINIMUM_COMPATIBLE_VERSION = 1;
@ -58,11 +59,25 @@ public class SnappyCodec
public final byte[] magic;
public final int version;
public final int compatibleVersion;
private final byte[] headerArray;
private SnappyCodec(byte[] magic, int version, int compatibleVersion) {
this.magic = magic;
this.version = version;
this.compatibleVersion = compatibleVersion;
ByteArrayOutputStream header = new ByteArrayOutputStream(HEADER_SIZE);
DataOutputStream d = new DataOutputStream(header);
try {
d.write(magic, 0, MAGIC_LEN);
d.writeInt(version);
d.writeInt(compatibleVersion);
d.close();
}
catch(IOException e) {
throw new RuntimeException(e);
}
headerArray = header.toByteArray();
}
@Override
@ -71,17 +86,17 @@ public class SnappyCodec
}
public static int headerSize() {
return MAGIC_LEN + 4 * 2;
return HEADER_SIZE;
}
public void writeHeader(OutputStream out) throws IOException {
ByteArrayOutputStream header = new ByteArrayOutputStream();
DataOutputStream d = new DataOutputStream(header);
d.write(magic, 0, MAGIC_LEN);
d.writeInt(version);
d.writeInt(compatibleVersion);
d.close();
out.write(header.toByteArray(), 0, header.size());
public int writeHeader(byte[] dst, int dstOffset) {
System.arraycopy(headerArray, 0, dst, dstOffset, headerArray.length);
return headerArray.length;
}
public int writeHeader(OutputStream out) throws IOException {
out.write(headerArray, 0, headerArray.length);
return headerArray.length;
}
public boolean isValidMagicHeader() {
@ -97,8 +112,6 @@ public class SnappyCodec
return new SnappyCodec(magic, version, compatibleVersion);
}
public static SnappyCodec currentHeader() {
return new SnappyCodec(MAGIC_HEADER, DEFAULT_VERSION, MINIMUM_COMPATIBLE_VERSION);
}
public static SnappyCodec currentHeader = new SnappyCodec(MAGIC_HEADER, DEFAULT_VERSION, MINIMUM_COMPATIBLE_VERSION);
}

View File

@ -30,10 +30,10 @@ import java.io.OutputStream;
/**
* This class implements a stream filter for writing compressed data using
* Snappy.
*
* <p>
* The input data is blocked into 32kb size (in default), and each block is
* compressed and then passed to the given {@link OutputStream}.
*
* </p>
* The output data format is:
* <ol>
* <li>snappy codec header defined in {@link SnappyCodec} (8 bytes)
@ -42,55 +42,43 @@ import java.io.OutputStream;
* <li>compressed block 2
* <li>...
* </ol>
*
* <p/>
* Note that the compressed data created by {@link SnappyOutputStream} cannot be
* uncompressed by {@link Snappy#uncompress(byte[])} since the output formats of
* {@link Snappy#compress(byte[])} and {@link SnappyOutputStream} are different.
* Use {@link SnappyInputStream} for uncompress the data generated by
* {@link SnappyOutputStream}.
*
*
* @author leo
*
*/
public class SnappyOutputStream extends OutputStream
{
static final int DEFAULT_BLOCK_SIZE = 32 * 1024; // Use 32kb for the default block size
public class SnappyOutputStream extends OutputStream {
static final int MIN_BLOCK_SIZE = 1 * 1024;
static final int DEFAULT_BLOCK_SIZE = 32 * 1024; // Use 32kb for the default block size
protected final OutputStream out;
private final int blockSize;
private int cursor = 0;
protected byte[] uncompressed;
protected byte[] compressed;
private final int blockSize;
private int inputCursor = 0;
protected byte[] uncompressed;
private int outputCursor = 0;
protected byte[] outputBuffer;
public SnappyOutputStream(OutputStream out) throws IOException {
public SnappyOutputStream(OutputStream out) {
this(out, DEFAULT_BLOCK_SIZE);
}
/**
* @param out
* @param blockSize
* byte size of the internal buffer size
* @param blockSize byte size of the internal buffer size
* @throws IOException
*/
public SnappyOutputStream(OutputStream out, int blockSize) throws IOException {
public SnappyOutputStream(OutputStream out, int blockSize) {
this.out = out;
this.blockSize = blockSize;
this.blockSize = Math.max(MIN_BLOCK_SIZE, blockSize);
uncompressed = new byte[blockSize];
compressed = new byte[Snappy.maxCompressedLength(blockSize)];
writeHeader();
outputBuffer = new byte[SnappyCodec.HEADER_SIZE + 4 + Snappy.maxCompressedLength(blockSize)];
outputCursor = SnappyCodec.currentHeader.writeHeader(outputBuffer, 0);
}
protected void writeHeader() throws IOException {
SnappyCodec.currentHeader().writeHeader(out);
}
/**
* Writes len bytes from the specified byte array starting at offset off to
* this output stream. The general contract for write(b, off, len) is that
* some of the bytes in the array b are written to the output stream in
* order; element b[off] is the first byte written and b[off+len-1] is the
* last byte written by this operation.
*/
/* (non-Javadoc)
* @see java.io.OutputStream#write(byte[], int, int)
*/
@ -101,13 +89,10 @@ public class SnappyOutputStream extends OutputStream
/**
* Compress the input long array data
*
* @param d
* input array
* @param off
* offset in the array
* @param len
* the number of elements in the array to copy
*
* @param d input array
* @param off offset in the array
* @param len the number of elements in the array to copy
* @throws IOException
*/
public void write(long[] d, int off, int len) throws IOException {
@ -116,13 +101,10 @@ public class SnappyOutputStream extends OutputStream
/**
* Compress the input double array data
*
* @param f
* input array
* @param off
* offset in the array
* @param len
* the number of elements in the array to copy
*
* @param f input array
* @param off offset in the array
* @param len the number of elements in the array to copy
* @throws IOException
*/
public void write(double[] f, int off, int len) throws IOException {
@ -131,13 +113,10 @@ public class SnappyOutputStream extends OutputStream
/**
* Compress the input float array data
*
* @param f
* input array
* @param off
* offset in the array
* @param len
* the number of elements in the array to copy
*
* @param f input array
* @param off offset in the array
* @param len the number of elements in the array to copy
* @throws IOException
*/
public void write(float[] f, int off, int len) throws IOException {
@ -146,13 +125,10 @@ public class SnappyOutputStream extends OutputStream
/**
* Compress the input int array data
*
* @param f
* input array
* @param off
* offset in the array
* @param len
* the number of elements in the array to copy
*
* @param f input array
* @param off offset in the array
* @param len the number of elements in the array to copy
* @throws IOException
*/
public void write(int[] f, int off, int len) throws IOException {
@ -161,13 +137,10 @@ public class SnappyOutputStream extends OutputStream
/**
* Compress the input short array data
*
* @param f
* input array
* @param off
* offset in the array
* @param len
* the number of elements in the array to copy
*
* @param f input array
* @param off offset in the array
* @param len the number of elements in the array to copy
* @throws IOException
*/
public void write(short[] f, int off, int len) throws IOException {
@ -176,7 +149,7 @@ public class SnappyOutputStream extends OutputStream
/**
* Compress the input array data
*
*
* @param d
* @throws IOException
*/
@ -186,7 +159,7 @@ public class SnappyOutputStream extends OutputStream
/**
* Compress the input array data
*
*
* @param f
* @throws IOException
*/
@ -196,7 +169,7 @@ public class SnappyOutputStream extends OutputStream
/**
* Compress the input array data
*
*
* @param f
* @throws IOException
*/
@ -206,7 +179,7 @@ public class SnappyOutputStream extends OutputStream
/**
* Compress the input array data
*
*
* @param f
* @throws IOException
*/
@ -216,7 +189,7 @@ public class SnappyOutputStream extends OutputStream
/**
* Compress the input array data
*
*
* @param f
* @throws IOException
*/
@ -224,25 +197,39 @@ public class SnappyOutputStream extends OutputStream
write(f, 0, f.length);
}
private boolean hasSufficientOutputBufferFor(int inputSize) {
int maxCompressedSize = Snappy.maxCompressedLength(inputSize);
return maxCompressedSize < outputBuffer.length - outputCursor - 4;
}
/**
* Compress the raw byte array data.
*
* @param array
* array data of any type (e.g., byte[], float[], long[], ...)
*
* @param array array data of any type (e.g., byte[], float[], long[], ...)
* @param byteOffset
* @param byteLength
* @throws IOException
*/
public void rawWrite(Object array, int byteOffset, int byteLength) throws IOException {
for (int readBytes = 0; readBytes < byteLength;) {
int copyLen = Math.min(uncompressed.length - cursor, byteLength - readBytes);
Snappy.arrayCopy(array, byteOffset + readBytes, copyLen, uncompressed, cursor);
readBytes += copyLen;
cursor += copyLen;
if (cursor >= uncompressed.length) {
dump();
if(inputCursor + byteLength < MIN_BLOCK_SIZE) {
// copy the input data to uncompressed buffer
Snappy.arrayCopy(array, byteOffset, byteLength, uncompressed, inputCursor);
inputCursor += byteLength;
return;
}
compressInput();
for(int readBytes = 0; readBytes < byteLength; ) {
int inputLen = Math.min(blockSize, byteLength - readBytes);
if(!hasSufficientOutputBufferFor(inputLen)) {
dumpOutput();
}
int compressedSize = Snappy.rawCompress(array, byteOffset + readBytes, inputLen, outputBuffer, outputCursor + 4);
writeInt(outputBuffer, outputCursor, compressedSize);
outputCursor += 4 + compressedSize;
readBytes += inputLen;
}
}
@ -257,10 +244,10 @@ public class SnappyOutputStream extends OutputStream
*/
@Override
public void write(int b) throws IOException {
if (cursor >= uncompressed.length) {
dump();
if(inputCursor >= uncompressed.length) {
compressInput();
}
uncompressed[cursor++] = (byte) b;
uncompressed[inputCursor++] = (byte) b;
}
/* (non-Javadoc)
@ -268,15 +255,17 @@ public class SnappyOutputStream extends OutputStream
*/
@Override
public void flush() throws IOException {
dump();
compressInput();
dumpOutput();
out.flush();
}
static void writeInt(OutputStream out, int value) throws IOException {
out.write((value >> 24) & 0xFF);
out.write((value >> 16) & 0xFF);
out.write((value >> 8) & 0xFF);
out.write((value >> 0) & 0xFF);
static void writeInt(byte[] dst, int offset, int v) {
int p = offset;
dst[offset++] = (byte) ((v >> 24) & 0xFF);
dst[offset++] = (byte) ((v >> 16) & 0xFF);
dst[offset++] = (byte) ((v >> 8) & 0xFF);
dst[offset++] = (byte) ((v >> 0) & 0xFF);
}
static int readInt(byte[] buffer, int pos) {
@ -287,15 +276,27 @@ public class SnappyOutputStream extends OutputStream
return b1 | b2 | b3 | b4;
}
protected void dump() throws IOException {
if (cursor <= 0)
protected void dumpOutput() throws IOException {
if(outputCursor > 0) {
out.write(outputBuffer, 0, outputCursor);
outputCursor = 0;
}
}
protected void compressInput() throws IOException {
if(inputCursor <= 0) {
return; // no need to dump
}
// Compress and dump the buffer content
int compressedSize = Snappy.compress(uncompressed, 0, cursor, compressed, 0);
writeInt(out, compressedSize);
out.write(compressed, 0, compressedSize);
cursor = 0;
if(!hasSufficientOutputBufferFor(inputCursor)) {
dumpOutput();
}
int compressedSize = Snappy.compress(uncompressed, 0, inputCursor, outputBuffer, outputCursor + 4);
// Write compressed data size
writeInt(outputBuffer, outputCursor, compressedSize);
outputCursor += 4 + compressedSize;
inputCursor = 0;
}
/**
@ -307,8 +308,6 @@ public class SnappyOutputStream extends OutputStream
@Override
public void close() throws IOException {
flush();
super.close();
out.close();
}

View File

@ -13,7 +13,7 @@ import scala.util.Random
class SnappyPerformanceTest extends SnappySpec {
lazy val data = {
val a = new Array[Byte](32 * 1024 * 1024)
val a = new Array[Byte](128 * 1024 * 1024)
for (i <- (0 until a.length).par) {
a(i) = Math.sin(i * 0.01).toByte
@ -29,10 +29,12 @@ class SnappyPerformanceTest extends SnappySpec {
val input = data
time("compression", repeat=100, logLevel = LogLevel.INFO) {
// 0.037 sec. => 0.026
block("default") {
val out = new ByteArrayOutputStream()
val sout = new SnappyOutputStream(out)
sout.write(input)
sout.close()
out.close()
}