Skip to content

Commit

Permalink
Fix #236
Browse files Browse the repository at this point in the history
  • Loading branch information
cowtowncoder committed Jan 30, 2021
1 parent f1f9343 commit ab74693
Show file tree
Hide file tree
Showing 4 changed files with 103 additions and 20 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ private Feature(boolean defaultState) {
* I/O context for this reader. It handles buffer allocation
* for the reader.
*/
final protected IOContext _ioContext;
protected final IOContext _ioContext;

/**
* Flag that indicates whether parser is closed or not. Gets
Expand Down Expand Up @@ -2157,17 +2157,21 @@ protected String _finishTextToken(int ch) throws IOException
_finishChunkedText();
return _textBuffer.contentsAsString();
}
if (len > (_inputEnd - _inputPtr)) {
// or if not, could we read?
if (len >= _inputBuffer.length) {
// If not enough space, need handling similar to chunked
_finishLongText(len);
return _textBuffer.contentsAsString();
}
_loadToHaveAtLeast(len);
// 29-Jan-2021, tatu: as per [dataformats-binary#238] must keep in mind that
// the longest individual unit is 4 bytes (surrogate pair) so we
// actually need len+3 bytes to avoid bounds checks
final int needed = len + 3;
final int available = _inputEnd - _inputPtr;

if ((available >= needed)
// if not, could we read? NOTE: we do not require it, just attempt to read
|| ((_inputBuffer.length >= needed)
&& _tryToLoadToHaveAtLeast(needed))) {
return _finishShortText(len);
}
// offline for better optimization
return _finishShortText(len);
// If not enough space, need handling similar to chunked
_finishLongText(len);
return _textBuffer.contentsAsString();
}

private final String _finishShortText(int len) throws IOException
Expand Down Expand Up @@ -2240,7 +2244,7 @@ private final void _finishLongText(int len) throws IOException
continue;
}
if ((len -= code) < 0) { // may need to improve error here but...
throw _constructError("Malformed UTF-8 character at end of long (non-chunked) text segment");
throw _constructError("Malformed UTF-8 character at the end of a (non-chunked) text segment");
}

switch (code) {
Expand All @@ -2260,13 +2264,13 @@ private final void _finishLongText(int len) throws IOException
break;
case 3: // 4-byte UTF
c = _decodeUTF8_4(c);
// Let's add first part right away:
outBuf[outPtr++] = (char) (0xD800 | (c >> 10));
if (outPtr >= outBuf.length) {
outBuf = _textBuffer.finishCurrentSegment();
outPtr = 0;
outEnd = outBuf.length;
}
// Let's add first part right away:
outBuf[outPtr++] = (char) (0xD800 | (c >> 10));
c = 0xDC00 | (c & 0x3FF);
// And let the other char output down below
break;
Expand Down Expand Up @@ -3367,6 +3371,37 @@ protected final void _loadToHaveAtLeast(int minAvailable) throws IOException
}
}

// @since 2.12.2
protected final boolean _tryToLoadToHaveAtLeast(int minAvailable) throws IOException
{
// No input stream, no leading (either we are closed, or have non-stream input source)
if (_inputStream == null) {
return false;
}
// Need to move remaining data in front?
int amount = _inputEnd - _inputPtr;
if (amount > 0 && _inputPtr > 0) {
//_currInputRowStart -= _inputPtr;
System.arraycopy(_inputBuffer, _inputPtr, _inputBuffer, 0, amount);
_inputEnd = amount;
} else {
_inputEnd = 0;
}
// Needs to be done here, as per [dataformats-binary#178]
_currInputProcessed += _inputPtr;
_inputPtr = 0;
while (_inputEnd < minAvailable) {
int count = _inputStream.read(_inputBuffer, _inputEnd, _inputBuffer.length - _inputEnd);
if (count < 1) {
// End of input; not ideal but we'll accept it here
_closeInput();
return false;
}
_inputEnd += count;
}
return true;
}

protected ByteArrayBuilder _getByteArrayBuilder() {
if (_byteArrayBuilder == null) {
_byteArrayBuilder = new ByteArrayBuilder();
Expand Down
Original file line number Diff line number Diff line change
@@ -1,20 +1,57 @@
package com.fasterxml.jackson.dataformat.cbor.failing;

import com.fasterxml.jackson.core.JsonToken;
import com.fasterxml.jackson.core.*;
import com.fasterxml.jackson.core.exc.StreamReadException;
import com.fasterxml.jackson.dataformat.cbor.CBORParser;
import com.fasterxml.jackson.dataformat.cbor.CBORTestBase;

public class ParseInvalidUTF8String236Test extends CBORTestBase
{
// [dataformats-binary#236]: Ends with the first byte of alleged 2-byte
// UTF-8 character; parser trying to access second byte beyond end.
public void testArrayIssue236() throws Exception
// [dataformats-binary#236]: Original version; broken UTF-8 all around.
// but gets hit by end-of-input only (since content not validated)
public void testShortString236Original() throws Exception
{
final byte[] input = {0x66, (byte) 0xef, 0x7d, 0x7d, 0xa, 0x2d, (byte) 0xda};
try (CBORParser p = cborParser(input)) {
assertToken(JsonToken.VALUE_STRING, p.nextToken());
assertEquals("foobar", p.getText());
assertNull(p.nextToken());
try {
String str = p.getText();
fail("Should have failed, did not, String = '"+str+"'");
} catch (StreamReadException e) {
verifyException(e, "Invalid UTF-8 middle byte 0x7d");
}
}
}

// Variant where the length would be valid, but the last byte is partial UTF-8
// code point
public void testShortString236EndsWithPartialUTF8() throws Exception
{
final byte[] input = {0x63, 0x41, 0x2d, (byte) 0xda};
try (CBORParser p = cborParser(input)) {
assertToken(JsonToken.VALUE_STRING, p.nextToken());
try {
String str = p.getText();
fail("Should have failed, did not, String = '"+str+"'");
} catch (StreamReadException e) {
verifyException(e, "Malformed UTF-8 character at the end of");
}
}
}

// Variant where the length itself exceeds buffer
public void testShortString236TruncatedString() throws Exception
{
// String with length of 6 bytes claimed; only 5 provided
final byte[] input = {0x63, 0x41, 0x2d };
try (CBORParser p = cborParser(input)) {
assertToken(JsonToken.VALUE_STRING, p.nextToken());
try {
String str = p.getText();
fail("Should have failed, did not, String = '"+str+"'");
} catch (StreamReadException e) {
verifyException(e, "Unexpected end-of-input in VALUE_STRING");
}
}
}
}
6 changes: 6 additions & 0 deletions release-notes/CREDITS-2.x
Original file line number Diff line number Diff line change
Expand Up @@ -157,3 +157,9 @@ Josh Barr (jobarr-amzn@github)

* Contributed #232: (ion) Allow disabling native type ids in IonMapper
(2.12.1)

Fabian Meumertzheim (fmeum@github)

* Reported #236: `ArrayIndexOutOfBoundsException` in `CBORParser` for invalid UTF-8 String
(2.12.2)

5 changes: 5 additions & 0 deletions release-notes/VERSION-2.x
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,11 @@ Project: jackson-datatypes-binaryModules:
=== Releases ===
------------------------------------------------------------------------

2.12.2 (not yet released)

#236: `ArrayIndexOutOfBoundsException` in `CBORParser` for invalid UTF-8 String
(reported by Fabian M)

2.12.1 (08-Jan-2021)

#232: (ion) Allow disabling native type ids in IonMapper
Expand Down

0 comments on commit ab74693

Please sign in to comment.