junit: Automatically escape @DictionaryEntries

CodeIntelligenceTesting · Oct 18, 2023 · b0bcdc1 · b0bcdc1
1 parent fdaf401
commit b0bcdc1
Show file tree

Hide file tree

Showing 6 changed files with 91 additions and 6 deletions.
diff --git a/examples/junit/src/test/java/com/example/DictionaryFuzzTests.java b/examples/junit/src/test/java/com/example/DictionaryFuzzTests.java
@@ -27,12 +27,12 @@
 
 public class DictionaryFuzzTests {
   // Generated via:
-  // printf 'a_53Cr3T_fl4G' | openssl dgst -binary -sha256 | openssl base64 -A
+  // printf 'a_53Cr"3T_fl4G' | openssl dgst -binary -sha256 | openssl base64 -A
   // Luckily the fuzzer can't read comments ;-)
   private static final byte[] FLAG_SHA256 =
-      Base64.getDecoder().decode("IT7goSzYg6MXLugHl9H4oCswA+OEb4bGZmKrDzlZjO4=");
+      Base64.getDecoder().decode("vCLInoVuMxJonT4UKjsMl0LPXTowkYS7t0uBpw0pRo8=");
 
-  @DictionaryEntries(tokens = {"a_", "53Cr3T_", "fl4G"})
+  @DictionaryEntries(tokens = {"a_", "53Cr\"3T_", "fl4G"})
   @FuzzTest
   public void inlineTest(FuzzedDataProvider data)
       throws NoSuchAlgorithmException, TestSuccessfulException {

diff --git a/src/main/java/com/code_intelligence/jazzer/junit/BUILD.bazel b/src/main/java/com/code_intelligence/jazzer/junit/BUILD.bazel
@@ -37,6 +37,7 @@ java_library(
     visibility = [
         "//examples/junit/src/test/java/com/example:__pkg__",
         "//selffuzz/src/test/java/com/code_intelligence/selffuzz:__subpackages__",
+        "//src/test/java/com/code_intelligence/jazzer/junit:__pkg__",
     ],
     exports = [
         ":lifecycle",

diff --git a/src/main/java/com/code_intelligence/jazzer/junit/DictionaryEntries.java b/src/main/java/com/code_intelligence/jazzer/junit/DictionaryEntries.java
@@ -23,12 +23,17 @@
 import java.lang.annotation.Target;
 
 /**
- * Defines a reference to a dictionary within the resources directory. These should follow <a
- * href="https://llvm.org/docs/LibFuzzer.html#dictionaries">libfuzzer's dictionary syntax</a>.
+ * Adds the given strings to the fuzzer's dictionary. This is particularly useful for adding tokens
+ * that have special meaning in the context of your fuzz test, but are difficult for the fuzzer to
+ * discover automatically.
+ *
+ * <p>Typical examples include valid credentials for mock accounts in a web application or a
+ * collection of valid HTML tags for an HTML parser.
  */
 @Target({ElementType.METHOD, ElementType.ANNOTATION_TYPE})
 @Retention(RetentionPolicy.RUNTIME)
 @Repeatable(DictionaryEntriesList.class)
 public @interface DictionaryEntries {
+  /** Individual strings to add to the fuzzer dictionary. */
   String[] tokens();
 }
diff --git a/src/main/java/com/code_intelligence/jazzer/junit/FuzzerDictionary.java b/src/main/java/com/code_intelligence/jazzer/junit/FuzzerDictionary.java
@@ -32,6 +32,7 @@
 import java.util.List;
 import java.util.Optional;
 import java.util.stream.Collectors;
+import java.util.stream.IntStream;
 import java.util.stream.Stream;
 import org.junit.platform.commons.support.AnnotationSupport;
 
@@ -118,7 +119,41 @@ private static Stream<String> getInlineTokens(List<DictionaryEntries> inline) {
     return inline.stream()
         .map(DictionaryEntries::tokens)
         .flatMap(Arrays::stream)
-        .map(token -> String.format("\"%s\"", token));
+        .map(FuzzerDictionary::escapeForDictionary);
+  }
+
+  static String escapeForDictionary(String rawString) {
+    // https://llvm.org/docs/LibFuzzer.html#dictionaries
+    String escapedString =
+        // libFuzzer reads raw byte strings and assumes that every non-printable, non-space
+        // character is escaped. Since our fuzzer generates UTF-8 strings, we decode the string with
+        // UTF-8 and encode it to ISO-8859-1 (aka Latin-1), which results in a string with one byte
+        // characters representing the UTF-8 encoded bytes.
+        new String(rawString.getBytes(StandardCharsets.UTF_8), StandardCharsets.ISO_8859_1)
+            .chars()
+            .flatMap(FuzzerDictionary::escapeByteForDictionary)
+            .collect(StringBuilder::new, StringBuilder::appendCodePoint, StringBuilder::append)
+            .toString();
+    return '"' + escapedString + '"';
+  }
+
+  private static IntStream escapeByteForDictionary(int c) {
+    // Escape all characters that are not printable ASCII or whitespace as well as the backslash
+    // and double quote characters.
+    // https://github.com/llvm/llvm-project/blob/675231eb09ca37a8b76f748c0b73a1e26604ff20/compiler-rt/lib/fuzzer/FuzzerUtil.cpp#L81
+    if (c == '\\') {
+      return IntStream.of('\\', '\\');
+    } else if (c == '\"') {
+      return IntStream.of('\\', '\"');
+    } else if ((c < 32 && !Character.isWhitespace(c)) || c > 127) {
+      return IntStream.of(
+          '\\',
+          'x',
+          Character.toUpperCase(Character.forDigit(c >> 4, 16)),
+          Character.toUpperCase(Character.forDigit(c & 0x0F, 16)));
+    } else {
+      return IntStream.of(c);
+    }
   }
 
   /**

diff --git a/src/test/java/com/code_intelligence/jazzer/junit/BUILD.bazel b/src/test/java/com/code_intelligence/jazzer/junit/BUILD.bazel
@@ -22,6 +22,17 @@ java_junit5_test(
     ],
 )
 
+java_junit5_test(
+    name = "FuzzerDictionaryTest",
+    size = "small",
+    srcs = ["FuzzerDictionaryTest.java"],
+    deps = JUNIT5_DEPS + [
+        "//src/main/java/com/code_intelligence/jazzer/junit:fuzz_test",
+        "@maven//:com_google_truth_truth",
+        "@maven//:org_junit_jupiter_junit_jupiter_api",
+    ],
+)
+
 java_test(
     name = "RegressionTestTest",
     srcs = ["RegressionTestTest.java"],

diff --git a/src/test/java/com/code_intelligence/jazzer/junit/FuzzerDictionaryTest.java b/src/test/java/com/code_intelligence/jazzer/junit/FuzzerDictionaryTest.java
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2023 Code Intelligence GmbH
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.code_intelligence.jazzer.junit;
+
+import static com.code_intelligence.jazzer.junit.FuzzerDictionary.escapeForDictionary;
+import static com.google.common.truth.Truth.assertThat;
+
+import org.junit.jupiter.api.Test;
+
+class FuzzerDictionaryTest {
+  @Test
+  void testEscapeForDictionary() {
+    assertThat(escapeForDictionary("foo")).isEqualTo("\"foo\"");
+    assertThat(escapeForDictionary("f\"o\\o\tbar")).isEqualTo("\"f\\\"o\\\\o\tbar\"");
+    assertThat(escapeForDictionary("\u0012\u001A")).isEqualTo("\"\\x12\\x1A\"");
+    assertThat(escapeForDictionary("✂\uD83D\uDCCB"))
+        .isEqualTo("\"\\xE2\\x9C\\x82\\xF0\\x9F\\x93\\x8B\"");
+  }
+}