Reject invalid UTF-8 instead of ignoring it.

2026-02-11 18:29:05 +01:00 · 2023-02-18 17:10:57 +00:00
parent 10b1fe756d
commit 5a09530670
3 changed files with 52 additions and 28 deletions
--- a/bramble-api/src/main/java/org/briarproject/bramble/util/StringUtils.java
+++ b/bramble-api/src/main/java/org/briarproject/bramble/util/StringUtils.java
@@ -14,6 +14,7 @@ import java.util.regex.Pattern;
 import javax.annotation.Nullable;

 import static java.nio.charset.CodingErrorAction.IGNORE;
+import static java.nio.charset.CodingErrorAction.REPORT;
 import static java.util.regex.Pattern.CASE_INSENSITIVE;

@SuppressWarnings("CharsetObjectCanBeUsed")
@@ -52,26 +53,38 @@ public class StringUtils {
 		return s.getBytes(UTF_8);
 	}

-	public static String fromUtf8(byte[] bytes) {
-		return fromUtf8(bytes, 0, bytes.length);
+	public static String fromUtf8(byte[] bytes) throws FormatException {
+		return fromUtf8(bytes, 0, bytes.length, true);
 	}

-	public static String fromUtf8(byte[] bytes, int off, int len) {
+	public static String fromUtf8(byte[] bytes, int off, int len)
+			throws FormatException {
+		return fromUtf8(bytes, off, len, true);
+	}
+
+	private static String fromUtf8(byte[] bytes, int off, int len,
+			boolean strict) throws FormatException {
 		CharsetDecoder decoder = UTF_8.newDecoder();
-		decoder.onMalformedInput(IGNORE);
-		decoder.onUnmappableCharacter(IGNORE);
+		decoder.onMalformedInput(strict ? REPORT : IGNORE);
+		decoder.onUnmappableCharacter(strict ? REPORT : IGNORE);
 		ByteBuffer buffer = ByteBuffer.wrap(bytes, off, len);
 		try {
 			return decoder.decode(buffer).toString();
 		} catch (CharacterCodingException e) {
-			throw new AssertionError(e);
+			throw new FormatException();
 		}
 	}

 	public static String truncateUtf8(String s, int maxUtf8Length) {
 		byte[] utf8 = toUtf8(s);
 		if (utf8.length <= maxUtf8Length) return s;
-		return fromUtf8(utf8, 0, maxUtf8Length);
+		// Don't be strict when converting back, so that if we truncate a
+		// multi-byte character the whole character gets dropped
+		try {
+			return fromUtf8(utf8, 0, maxUtf8Length, false);
+		} catch (FormatException e) {
+			throw new AssertionError(e);
+		}
 	}

 	/**