Merge pull request #102 from xerial/fix-output-size

Stabilize compressed data size of SnappyOutputStream
This commit is contained in:
Taro L. Saito 2015-04-14 15:58:22 +09:00
commit dc2dd27f85
2 changed files with 50 additions and 17 deletions

View File

@ -231,25 +231,19 @@ public class SnappyOutputStream extends OutputStream {
* @throws IOException
*/
public void rawWrite(Object array, int byteOffset, int byteLength) throws IOException {
if(inputCursor + byteLength < MIN_BLOCK_SIZE) {
int cursor = 0;
while(cursor < byteLength) {
int readLen = Math.min(byteLength - cursor, blockSize - inputCursor);
// copy the input data to uncompressed buffer
Snappy.arrayCopy(array, byteOffset, byteLength, inputBuffer, inputCursor);
inputCursor += byteLength;
return;
if(readLen > 0) {
Snappy.arrayCopy(array, byteOffset + cursor, readLen, inputBuffer, inputCursor);
inputCursor += readLen;
}
if(inputCursor < blockSize)
return;
compressInput();
for(int readBytes = 0; readBytes < byteLength; ) {
int inputLen = Math.min(blockSize, byteLength - readBytes);
if(!hasSufficientOutputBufferFor(inputLen)) {
dumpOutput();
}
int compressedSize = Snappy.rawCompress(array, byteOffset + readBytes, inputLen, outputBuffer, outputCursor + 4);
writeInt(outputBuffer, outputCursor, compressedSize);
outputCursor += 4 + compressedSize;
readBytes += inputLen;
cursor += readLen;
}
}

View File

@ -118,6 +118,45 @@ public class SnappyOutputStreamTest
assertArrayEquals(orig, decompressed);
}
/**
* Compress the input array by passing it chunk-by-chunk to a SnappyOutputStream.
* @param orig the data to compress
* @param maxChunkSize the maximum chunk size, in bytes.
* @return the compressed bytes
*/
private static byte[] compressAsChunks(byte[] orig, int maxChunkSize) throws Exception {
ByteArrayOutputStream b = new ByteArrayOutputStream();
SnappyOutputStream out = new SnappyOutputStream(b);
int remaining = orig.length;
for (int start = 0; start < orig.length; start += maxChunkSize) {
out.write(orig, start, remaining < maxChunkSize ? remaining : maxChunkSize);
remaining -= maxChunkSize;
}
out.close();
return b.toByteArray();
}
@Test
public void batchingOfWritesShouldNotAffectCompressedDataSize() throws Exception {
// Regression test for issue #100, a bug where the size of compressed data could be affected
// by the batching of writes to the SnappyOutputStream rather than the total amount of data
// written to the stream.
byte[] orig = CalgaryTest.readFile("alice29.txt");
// Compress the data once so that we know the expected size:
byte[] expectedCompressedData = compressAsChunks(orig, Integer.MAX_VALUE);
// Hardcoding an expected compressed size here will catch regressions that lower the
// compression quality:
assertEquals(91013, expectedCompressedData.length);
// The chunk size should not affect the size of the compressed output:
int[] chunkSizes = new int[] { 1, 100, 1023, 1024, 10000};
for (int chunkSize : chunkSizes) {
byte[] compressedData = compressAsChunks(orig, chunkSize);
assertEquals(String.format("when chunk size = %,d", chunkSize), expectedCompressedData.length, compressedData.length);
assertArrayEquals(expectedCompressedData, compressedData);
}
}
@Test
public void longArrayCompress() throws Exception {
long[] l = new long[10];