From a6eb0a605d32459c856500a579473119a3f260f7 Mon Sep 17 00:00:00 2001 From: Josh Rosen Date: Mon, 13 Apr 2015 12:09:37 -0700 Subject: [PATCH 1/3] Add failing regression test for #100. --- .../xerial/snappy/SnappyOutputStreamTest.java | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/src/test/java/org/xerial/snappy/SnappyOutputStreamTest.java b/src/test/java/org/xerial/snappy/SnappyOutputStreamTest.java index de72125..5b5f0e1 100755 --- a/src/test/java/org/xerial/snappy/SnappyOutputStreamTest.java +++ b/src/test/java/org/xerial/snappy/SnappyOutputStreamTest.java @@ -118,6 +118,45 @@ public class SnappyOutputStreamTest assertArrayEquals(orig, decompressed); } + /** + * Compress the input array by passing it chunk-by-chunk to a SnappyOutputStream. + * @param orig the data to compress + * @param maxChunkSize the maximum chunk size, in bytes. + * @return the compressed bytes + */ + private static byte[] compressAsChunks(byte[] orig, int maxChunkSize) throws Exception { + ByteArrayOutputStream b = new ByteArrayOutputStream(); + SnappyOutputStream out = new SnappyOutputStream(b); + + int remaining = orig.length; + for (int start = 0; start < orig.length; start += maxChunkSize) { + out.write(orig, start, remaining < maxChunkSize ? remaining : maxChunkSize); + remaining -= maxChunkSize; + } + out.close(); + return b.toByteArray(); + } + + @Test + public void batchingOfWritesShouldNotAffectCompressedDataSize() throws Exception { + // Regression test for issue #100, a bug where the size of compressed data could be affected + // by the batching of writes to the SnappyOutputStream rather than the total amount of data + // written to the stream. + byte[] orig = CalgaryTest.readFile("alice29.txt"); + // Compress the data once so that we know the expected size: + byte[] expectedCompressedData = compressAsChunks(orig, Integer.MAX_VALUE); + // Hardcoding an expected compressed size here will catch regressions that lower the + // compression quality: + assertEquals(91013, expectedCompressedData.length); + // The chunk size should not affect the size of the compressed output: + int[] chunkSizes = new int[] { 1, 100, 1023, 1024, 10000}; + for (int chunkSize : chunkSizes) { + byte[] compressedData = compressAsChunks(orig, chunkSize); + assertEquals(expectedCompressedData.length, compressedData.length); + assertArrayEquals(expectedCompressedData, compressedData); + } + } + @Test public void longArrayCompress() throws Exception { long[] l = new long[10]; From 3fe32512e41acfaf3a5b263c8241616da053e4c9 Mon Sep 17 00:00:00 2001 From: "Taro L. Saito" Date: Tue, 14 Apr 2015 15:22:20 +0900 Subject: [PATCH 2/3] Use specified block size instead of MIN_BLOCK_SIZE --- src/main/java/org/xerial/snappy/SnappyOutputStream.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/xerial/snappy/SnappyOutputStream.java b/src/main/java/org/xerial/snappy/SnappyOutputStream.java index 6041767..56c073a 100755 --- a/src/main/java/org/xerial/snappy/SnappyOutputStream.java +++ b/src/main/java/org/xerial/snappy/SnappyOutputStream.java @@ -232,7 +232,7 @@ public class SnappyOutputStream extends OutputStream { */ public void rawWrite(Object array, int byteOffset, int byteLength) throws IOException { - if(inputCursor + byteLength < MIN_BLOCK_SIZE) { + if(inputCursor + byteLength < blockSize) { // copy the input data to uncompressed buffer Snappy.arrayCopy(array, byteOffset, byteLength, inputBuffer, inputCursor); inputCursor += byteLength; From 6d9925ba364c89f1f01ed5043b5bc3dc63ed3941 Mon Sep 17 00:00:00 2001 From: "Taro L. Saito" Date: Tue, 14 Apr 2015 15:43:50 +0900 Subject: [PATCH 3/3] Fixes for #100 --- .../org/xerial/snappy/SnappyOutputStream.java | 28 ++++++++----------- .../xerial/snappy/SnappyOutputStreamTest.java | 2 +- 2 files changed, 12 insertions(+), 18 deletions(-) diff --git a/src/main/java/org/xerial/snappy/SnappyOutputStream.java b/src/main/java/org/xerial/snappy/SnappyOutputStream.java index 56c073a..c930a4b 100755 --- a/src/main/java/org/xerial/snappy/SnappyOutputStream.java +++ b/src/main/java/org/xerial/snappy/SnappyOutputStream.java @@ -231,25 +231,19 @@ public class SnappyOutputStream extends OutputStream { * @throws IOException */ public void rawWrite(Object array, int byteOffset, int byteLength) throws IOException { - - if(inputCursor + byteLength < blockSize) { + int cursor = 0; + while(cursor < byteLength) { + int readLen = Math.min(byteLength - cursor, blockSize - inputCursor); // copy the input data to uncompressed buffer - Snappy.arrayCopy(array, byteOffset, byteLength, inputBuffer, inputCursor); - inputCursor += byteLength; - return; - } - - compressInput(); - - for(int readBytes = 0; readBytes < byteLength; ) { - int inputLen = Math.min(blockSize, byteLength - readBytes); - if(!hasSufficientOutputBufferFor(inputLen)) { - dumpOutput(); + if(readLen > 0) { + Snappy.arrayCopy(array, byteOffset + cursor, readLen, inputBuffer, inputCursor); + inputCursor += readLen; } - int compressedSize = Snappy.rawCompress(array, byteOffset + readBytes, inputLen, outputBuffer, outputCursor + 4); - writeInt(outputBuffer, outputCursor, compressedSize); - outputCursor += 4 + compressedSize; - readBytes += inputLen; + if(inputCursor < blockSize) + return; + + compressInput(); + cursor += readLen; } } diff --git a/src/test/java/org/xerial/snappy/SnappyOutputStreamTest.java b/src/test/java/org/xerial/snappy/SnappyOutputStreamTest.java index 5b5f0e1..10850a8 100755 --- a/src/test/java/org/xerial/snappy/SnappyOutputStreamTest.java +++ b/src/test/java/org/xerial/snappy/SnappyOutputStreamTest.java @@ -152,7 +152,7 @@ public class SnappyOutputStreamTest int[] chunkSizes = new int[] { 1, 100, 1023, 1024, 10000}; for (int chunkSize : chunkSizes) { byte[] compressedData = compressAsChunks(orig, chunkSize); - assertEquals(expectedCompressedData.length, compressedData.length); + assertEquals(String.format("when chunk size = %,d", chunkSize), expectedCompressedData.length, compressedData.length); assertArrayEquals(expectedCompressedData, compressedData); } }