Merge pull request #102 from xerial/fix-output-size

Stabilize compressed data size of SnappyOutputStream
2025-07-23 05:54:41 +02:00 · 2015-04-14 15:58:22 +09:00 · 2015-04-14 15:58:22 +09:00 · dc2dd27f85
commit dc2dd27f85
parent 0dd3f488f6 6d9925ba36
2 changed files with 50 additions and 17 deletions
--- a/src/main/java/org/xerial/snappy/SnappyOutputStream.java
+++ b/src/main/java/org/xerial/snappy/SnappyOutputStream.java
@ -231,25 +231,19 @@ public class SnappyOutputStream extends OutputStream {
     * @throws IOException
     */
    public void rawWrite(Object array, int byteOffset, int byteLength) throws IOException {
-
-        if(inputCursor + byteLength < MIN_BLOCK_SIZE) {
+        int cursor = 0;
+        while(cursor < byteLength) {
+            int readLen = Math.min(byteLength - cursor, blockSize - inputCursor);
            // copy the input data to uncompressed buffer
-            Snappy.arrayCopy(array, byteOffset, byteLength, inputBuffer, inputCursor);
-            inputCursor += byteLength;
-            return;
-        }
-
-        compressInput();
-
-        for(int readBytes = 0; readBytes < byteLength; ) {
-            int inputLen = Math.min(blockSize, byteLength - readBytes);
-            if(!hasSufficientOutputBufferFor(inputLen)) {
-                dumpOutput();
+            if(readLen > 0) {
+                Snappy.arrayCopy(array, byteOffset + cursor, readLen, inputBuffer, inputCursor);
+                inputCursor += readLen;
            }
-            int compressedSize = Snappy.rawCompress(array, byteOffset + readBytes, inputLen, outputBuffer, outputCursor + 4);
-            writeInt(outputBuffer, outputCursor, compressedSize);
-            outputCursor += 4 + compressedSize;
-            readBytes += inputLen;
+            if(inputCursor < blockSize)
+                return;
+
+            compressInput();
+            cursor += readLen;
        }
    }

--- a/src/test/java/org/xerial/snappy/SnappyOutputStreamTest.java
+++ b/src/test/java/org/xerial/snappy/SnappyOutputStreamTest.java
@ -118,6 +118,45 @@ public class SnappyOutputStreamTest
        assertArrayEquals(orig, decompressed);
    }

+    /**
+     * Compress the input array by passing it chunk-by-chunk to a SnappyOutputStream.
+     * @param orig the data to compress
+     * @param maxChunkSize the maximum chunk size, in bytes.
+     * @return the compressed bytes
+     */
+    private static byte[] compressAsChunks(byte[] orig, int maxChunkSize) throws Exception {
+        ByteArrayOutputStream b = new ByteArrayOutputStream();
+        SnappyOutputStream out = new SnappyOutputStream(b);
+
+        int remaining = orig.length;
+        for (int start = 0; start < orig.length; start += maxChunkSize) {
+            out.write(orig, start, remaining < maxChunkSize ? remaining : maxChunkSize);
+            remaining -= maxChunkSize;
+        }
+        out.close();
+        return b.toByteArray();
+    }
+
+    @Test
+    public void batchingOfWritesShouldNotAffectCompressedDataSize() throws Exception {
+        // Regression test for issue #100, a bug where the size of compressed data could be affected
+        // by the batching of writes to the SnappyOutputStream rather than the total amount of data
+        // written to the stream.
+        byte[] orig = CalgaryTest.readFile("alice29.txt");
+        // Compress the data once so that we know the expected size:
+        byte[] expectedCompressedData = compressAsChunks(orig, Integer.MAX_VALUE);
+        // Hardcoding an expected compressed size here will catch regressions that lower the
+        // compression quality:
+        assertEquals(91013, expectedCompressedData.length);
+        // The chunk size should not affect the size of the compressed output:
+        int[] chunkSizes = new int[] { 1, 100, 1023, 1024, 10000};
+        for (int chunkSize : chunkSizes) {
+            byte[] compressedData = compressAsChunks(orig, chunkSize);
+            assertEquals(String.format("when chunk size = %,d", chunkSize), expectedCompressedData.length, compressedData.length);
+            assertArrayEquals(expectedCompressedData, compressedData);
+        }
+    }
+
    @Test
    public void longArrayCompress() throws Exception {
        long[] l = new long[10];