diff --git a/bramble-api/src/main/java/org/briarproject/bramble/util/StringUtils.java b/bramble-api/src/main/java/org/briarproject/bramble/util/StringUtils.java index d9a19e70a..59beeeee2 100644 --- a/bramble-api/src/main/java/org/briarproject/bramble/util/StringUtils.java +++ b/bramble-api/src/main/java/org/briarproject/bramble/util/StringUtils.java @@ -14,6 +14,7 @@ import java.util.regex.Pattern; import javax.annotation.Nullable; import static java.nio.charset.CodingErrorAction.IGNORE; +import static java.nio.charset.CodingErrorAction.REPORT; import static java.util.regex.Pattern.CASE_INSENSITIVE; @SuppressWarnings("CharsetObjectCanBeUsed") @@ -52,26 +53,38 @@ public class StringUtils { return s.getBytes(UTF_8); } - public static String fromUtf8(byte[] bytes) { - return fromUtf8(bytes, 0, bytes.length); + public static String fromUtf8(byte[] bytes) throws FormatException { + return fromUtf8(bytes, 0, bytes.length, true); } - public static String fromUtf8(byte[] bytes, int off, int len) { + public static String fromUtf8(byte[] bytes, int off, int len) + throws FormatException { + return fromUtf8(bytes, off, len, true); + } + + private static String fromUtf8(byte[] bytes, int off, int len, + boolean strict) throws FormatException { CharsetDecoder decoder = UTF_8.newDecoder(); - decoder.onMalformedInput(IGNORE); - decoder.onUnmappableCharacter(IGNORE); + decoder.onMalformedInput(strict ? REPORT : IGNORE); + decoder.onUnmappableCharacter(strict ? REPORT : IGNORE); ByteBuffer buffer = ByteBuffer.wrap(bytes, off, len); try { return decoder.decode(buffer).toString(); } catch (CharacterCodingException e) { - throw new AssertionError(e); + throw new FormatException(); } } public static String truncateUtf8(String s, int maxUtf8Length) { byte[] utf8 = toUtf8(s); if (utf8.length <= maxUtf8Length) return s; - return fromUtf8(utf8, 0, maxUtf8Length); + // Don't be strict when converting back, so that if we truncate a + // multi-byte character the whole character gets dropped + try { + return fromUtf8(utf8, 0, maxUtf8Length, false); + } catch (FormatException e) { + throw new AssertionError(e); + } } /** diff --git a/bramble-core/src/test/java/org/briarproject/bramble/data/BdfReaderImplFuzzingTest.java b/bramble-core/src/test/java/org/briarproject/bramble/data/BdfReaderImplFuzzingTest.java index 852e66853..f83a6bc6f 100644 --- a/bramble-core/src/test/java/org/briarproject/bramble/data/BdfReaderImplFuzzingTest.java +++ b/bramble-core/src/test/java/org/briarproject/bramble/data/BdfReaderImplFuzzingTest.java @@ -1,5 +1,6 @@ package org.briarproject.bramble.data; +import org.briarproject.bramble.api.FormatException; import org.briarproject.bramble.test.BrambleTestCase; import org.junit.Before; import org.junit.Test; @@ -32,10 +33,13 @@ public class BdfReaderImplFuzzingTest extends BrambleTestCase { in.reset(); BdfReaderImpl r = new BdfReaderImpl(in, DEFAULT_NESTED_LIMIT, DEFAULT_MAX_BUFFER_SIZE); - int length = r.readString().length(); - assertTrue(length >= 0); - assertTrue(length <= 20); - assertTrue(r.eof()); + try { + int length = r.readString().length(); + assertTrue(length <= 20); + assertTrue(r.eof()); + } catch (FormatException e) { + // Expected when bytes are not valid UTF-8 + } } } } diff --git a/bramble-core/src/test/java/org/briarproject/bramble/util/StringUtilsTest.java b/bramble-core/src/test/java/org/briarproject/bramble/util/StringUtilsTest.java index 593640824..b3c13c27b 100644 --- a/bramble-core/src/test/java/org/briarproject/bramble/util/StringUtilsTest.java +++ b/bramble-core/src/test/java/org/briarproject/bramble/util/StringUtilsTest.java @@ -88,51 +88,58 @@ public class StringUtilsTest extends BrambleTestCase { } @Test - public void testFromUtf8AcceptsNullCharacterUsingStandardUtf8() { + public void testFromUtf8AcceptsNullCharacterUsingStandardUtf8() + throws Exception { // The UTF-8 encoding of the null character is valid - assertEquals("\u0000", StringUtils.fromUtf8(new byte[1])); + byte[] utf8 = new byte[1]; + String actual = StringUtils.fromUtf8(utf8); + assertEquals("\u0000", actual); + // When we convert back to UTF-8 we should get the original encoding + assertArrayEquals(utf8, StringUtils.toUtf8(actual)); } - @Test - public void testFromUtf8RemovesNullCharacterUsingModifiedUtf8() { + @Test(expected = FormatException.class) + public void testFromUtf8RejectsNullCharacterUsingModifiedUtf8() + throws Exception { // The modified UTF-8 encoding of the null character is not valid byte[] b = new byte[] { (byte) 0xC0, (byte) 0x80, // Null character as modified UTF-8 (byte) 0xC8, (byte) 0x85 // U+0205 }; - // Conversion should ignore the invalid character and return the rest - String expected = "\u0205"; - assertEquals(expected, StringUtils.fromUtf8(b)); + StringUtils.fromUtf8(b); } @Test - public void testFromUtf8AcceptsSupplementaryCharacterUsingStandardUtf8() { + public void testFromUtf8AcceptsSupplementaryCharacterUsingStandardUtf8() + throws Exception { // The UTF-8 encoding of a supplementary character is valid and should // be converted to a surrogate pair - byte[] b = new byte[] { + byte[] utf8 = new byte[] { (byte) 0xF0, (byte) 0x90, (byte) 0x90, (byte) 0x80, // U+10400 (byte) 0xC8, (byte) 0x85 // U+0205 }; String expected = "\uD801\uDC00\u0205"; // Surrogate pair - assertEquals(expected, StringUtils.fromUtf8(b)); + String actual = StringUtils.fromUtf8(utf8); + assertEquals(expected, actual); + // When we convert back to UTF-8 we should get the original encoding + assertArrayEquals(utf8, StringUtils.toUtf8(actual)); } - @Test - public void testFromUtf8RemovesSupplementaryCharacterUsingModifiedUtf8() { + @Test(expected = FormatException.class) + public void testFromUtf8RejectsSupplementaryCharacterUsingModifiedUtf8() + throws Exception { // The CESU-8 or modified UTF-8 encoding of a supplementary character // is not valid - byte[] b = new byte[] { + byte[] utf8 = new byte[] { (byte) 0xED, (byte) 0xA0, (byte) 0x81, // U+10400 as CSEU-8 (byte) 0xED, (byte) 0xB0, (byte) 0x80, (byte) 0xC8, (byte) 0x85 // U+0205 }; - // Conversion should ignore the invalid character and return the rest - String expected = "\u0205"; - assertEquals(expected, StringUtils.fromUtf8(b)); + StringUtils.fromUtf8(utf8); } @Test - public void testFromUtf8EmptyInput() { + public void testFromUtf8EmptyInput() throws Exception { assertEquals("", StringUtils.fromUtf8(new byte[0])); }