mirror of
https://code.briarproject.org/briar/briar.git
synced 2026-02-20 22:59:54 +01:00
Reject invalid UTF-8 instead of ignoring it.
This commit is contained in:
@@ -14,6 +14,7 @@ import java.util.regex.Pattern;
|
|||||||
import javax.annotation.Nullable;
|
import javax.annotation.Nullable;
|
||||||
|
|
||||||
import static java.nio.charset.CodingErrorAction.IGNORE;
|
import static java.nio.charset.CodingErrorAction.IGNORE;
|
||||||
|
import static java.nio.charset.CodingErrorAction.REPORT;
|
||||||
import static java.util.regex.Pattern.CASE_INSENSITIVE;
|
import static java.util.regex.Pattern.CASE_INSENSITIVE;
|
||||||
|
|
||||||
@SuppressWarnings("CharsetObjectCanBeUsed")
|
@SuppressWarnings("CharsetObjectCanBeUsed")
|
||||||
@@ -52,26 +53,38 @@ public class StringUtils {
|
|||||||
return s.getBytes(UTF_8);
|
return s.getBytes(UTF_8);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String fromUtf8(byte[] bytes) {
|
public static String fromUtf8(byte[] bytes) throws FormatException {
|
||||||
return fromUtf8(bytes, 0, bytes.length);
|
return fromUtf8(bytes, 0, bytes.length, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String fromUtf8(byte[] bytes, int off, int len) {
|
public static String fromUtf8(byte[] bytes, int off, int len)
|
||||||
|
throws FormatException {
|
||||||
|
return fromUtf8(bytes, off, len, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String fromUtf8(byte[] bytes, int off, int len,
|
||||||
|
boolean strict) throws FormatException {
|
||||||
CharsetDecoder decoder = UTF_8.newDecoder();
|
CharsetDecoder decoder = UTF_8.newDecoder();
|
||||||
decoder.onMalformedInput(IGNORE);
|
decoder.onMalformedInput(strict ? REPORT : IGNORE);
|
||||||
decoder.onUnmappableCharacter(IGNORE);
|
decoder.onUnmappableCharacter(strict ? REPORT : IGNORE);
|
||||||
ByteBuffer buffer = ByteBuffer.wrap(bytes, off, len);
|
ByteBuffer buffer = ByteBuffer.wrap(bytes, off, len);
|
||||||
try {
|
try {
|
||||||
return decoder.decode(buffer).toString();
|
return decoder.decode(buffer).toString();
|
||||||
} catch (CharacterCodingException e) {
|
} catch (CharacterCodingException e) {
|
||||||
throw new AssertionError(e);
|
throw new FormatException();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String truncateUtf8(String s, int maxUtf8Length) {
|
public static String truncateUtf8(String s, int maxUtf8Length) {
|
||||||
byte[] utf8 = toUtf8(s);
|
byte[] utf8 = toUtf8(s);
|
||||||
if (utf8.length <= maxUtf8Length) return s;
|
if (utf8.length <= maxUtf8Length) return s;
|
||||||
return fromUtf8(utf8, 0, maxUtf8Length);
|
// Don't be strict when converting back, so that if we truncate a
|
||||||
|
// multi-byte character the whole character gets dropped
|
||||||
|
try {
|
||||||
|
return fromUtf8(utf8, 0, maxUtf8Length, false);
|
||||||
|
} catch (FormatException e) {
|
||||||
|
throw new AssertionError(e);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
package org.briarproject.bramble.data;
|
package org.briarproject.bramble.data;
|
||||||
|
|
||||||
|
import org.briarproject.bramble.api.FormatException;
|
||||||
import org.briarproject.bramble.test.BrambleTestCase;
|
import org.briarproject.bramble.test.BrambleTestCase;
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
@@ -32,10 +33,13 @@ public class BdfReaderImplFuzzingTest extends BrambleTestCase {
|
|||||||
in.reset();
|
in.reset();
|
||||||
BdfReaderImpl r = new BdfReaderImpl(in, DEFAULT_NESTED_LIMIT,
|
BdfReaderImpl r = new BdfReaderImpl(in, DEFAULT_NESTED_LIMIT,
|
||||||
DEFAULT_MAX_BUFFER_SIZE);
|
DEFAULT_MAX_BUFFER_SIZE);
|
||||||
int length = r.readString().length();
|
try {
|
||||||
assertTrue(length >= 0);
|
int length = r.readString().length();
|
||||||
assertTrue(length <= 20);
|
assertTrue(length <= 20);
|
||||||
assertTrue(r.eof());
|
assertTrue(r.eof());
|
||||||
|
} catch (FormatException e) {
|
||||||
|
// Expected when bytes are not valid UTF-8
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -88,51 +88,58 @@ public class StringUtilsTest extends BrambleTestCase {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testFromUtf8AcceptsNullCharacterUsingStandardUtf8() {
|
public void testFromUtf8AcceptsNullCharacterUsingStandardUtf8()
|
||||||
|
throws Exception {
|
||||||
// The UTF-8 encoding of the null character is valid
|
// The UTF-8 encoding of the null character is valid
|
||||||
assertEquals("\u0000", StringUtils.fromUtf8(new byte[1]));
|
byte[] utf8 = new byte[1];
|
||||||
|
String actual = StringUtils.fromUtf8(utf8);
|
||||||
|
assertEquals("\u0000", actual);
|
||||||
|
// When we convert back to UTF-8 we should get the original encoding
|
||||||
|
assertArrayEquals(utf8, StringUtils.toUtf8(actual));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test(expected = FormatException.class)
|
||||||
public void testFromUtf8RemovesNullCharacterUsingModifiedUtf8() {
|
public void testFromUtf8RejectsNullCharacterUsingModifiedUtf8()
|
||||||
|
throws Exception {
|
||||||
// The modified UTF-8 encoding of the null character is not valid
|
// The modified UTF-8 encoding of the null character is not valid
|
||||||
byte[] b = new byte[] {
|
byte[] b = new byte[] {
|
||||||
(byte) 0xC0, (byte) 0x80, // Null character as modified UTF-8
|
(byte) 0xC0, (byte) 0x80, // Null character as modified UTF-8
|
||||||
(byte) 0xC8, (byte) 0x85 // U+0205
|
(byte) 0xC8, (byte) 0x85 // U+0205
|
||||||
};
|
};
|
||||||
// Conversion should ignore the invalid character and return the rest
|
StringUtils.fromUtf8(b);
|
||||||
String expected = "\u0205";
|
|
||||||
assertEquals(expected, StringUtils.fromUtf8(b));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testFromUtf8AcceptsSupplementaryCharacterUsingStandardUtf8() {
|
public void testFromUtf8AcceptsSupplementaryCharacterUsingStandardUtf8()
|
||||||
|
throws Exception {
|
||||||
// The UTF-8 encoding of a supplementary character is valid and should
|
// The UTF-8 encoding of a supplementary character is valid and should
|
||||||
// be converted to a surrogate pair
|
// be converted to a surrogate pair
|
||||||
byte[] b = new byte[] {
|
byte[] utf8 = new byte[] {
|
||||||
(byte) 0xF0, (byte) 0x90, (byte) 0x90, (byte) 0x80, // U+10400
|
(byte) 0xF0, (byte) 0x90, (byte) 0x90, (byte) 0x80, // U+10400
|
||||||
(byte) 0xC8, (byte) 0x85 // U+0205
|
(byte) 0xC8, (byte) 0x85 // U+0205
|
||||||
};
|
};
|
||||||
String expected = "\uD801\uDC00\u0205"; // Surrogate pair
|
String expected = "\uD801\uDC00\u0205"; // Surrogate pair
|
||||||
assertEquals(expected, StringUtils.fromUtf8(b));
|
String actual = StringUtils.fromUtf8(utf8);
|
||||||
|
assertEquals(expected, actual);
|
||||||
|
// When we convert back to UTF-8 we should get the original encoding
|
||||||
|
assertArrayEquals(utf8, StringUtils.toUtf8(actual));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test(expected = FormatException.class)
|
||||||
public void testFromUtf8RemovesSupplementaryCharacterUsingModifiedUtf8() {
|
public void testFromUtf8RejectsSupplementaryCharacterUsingModifiedUtf8()
|
||||||
|
throws Exception {
|
||||||
// The CESU-8 or modified UTF-8 encoding of a supplementary character
|
// The CESU-8 or modified UTF-8 encoding of a supplementary character
|
||||||
// is not valid
|
// is not valid
|
||||||
byte[] b = new byte[] {
|
byte[] utf8 = new byte[] {
|
||||||
(byte) 0xED, (byte) 0xA0, (byte) 0x81, // U+10400 as CSEU-8
|
(byte) 0xED, (byte) 0xA0, (byte) 0x81, // U+10400 as CSEU-8
|
||||||
(byte) 0xED, (byte) 0xB0, (byte) 0x80,
|
(byte) 0xED, (byte) 0xB0, (byte) 0x80,
|
||||||
(byte) 0xC8, (byte) 0x85 // U+0205
|
(byte) 0xC8, (byte) 0x85 // U+0205
|
||||||
};
|
};
|
||||||
// Conversion should ignore the invalid character and return the rest
|
StringUtils.fromUtf8(utf8);
|
||||||
String expected = "\u0205";
|
|
||||||
assertEquals(expected, StringUtils.fromUtf8(b));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testFromUtf8EmptyInput() {
|
public void testFromUtf8EmptyInput() throws Exception {
|
||||||
assertEquals("", StringUtils.fromUtf8(new byte[0]));
|
assertEquals("", StringUtils.fromUtf8(new byte[0]));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user