Use the Mann-Whitney U test to determine steady state.

2026-02-13 03:09:04 +01:00 · 2017-12-08 12:13:22 +00:00
parent 596c140310
commit 1a912a29f8
3 changed files with 313 additions and 17 deletions
--- a/bramble-core/src/test/java/org/briarproject/bramble/db/JdbcDatabasePerformanceTest.java
+++ b/bramble-core/src/test/java/org/briarproject/bramble/db/JdbcDatabasePerformanceTest.java
@@ -15,6 +15,7 @@ import org.briarproject.bramble.api.system.Clock;
 import org.briarproject.bramble.system.SystemClock;
 import org.briarproject.bramble.test.BrambleTestCase;
 import org.briarproject.bramble.test.TestDatabaseConfig;
+import org.briarproject.bramble.test.UTest;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
@@ -41,6 +42,8 @@ import static org.briarproject.bramble.test.TestUtils.getMessage;
 import static org.briarproject.bramble.test.TestUtils.getRandomBytes;
 import static org.briarproject.bramble.test.TestUtils.getStandardDeviation;
 import static org.briarproject.bramble.test.TestUtils.getTestDirectory;
+import static org.briarproject.bramble.test.UTest.Result.INCONCLUSIVE;
+import static org.briarproject.bramble.test.UTest.Z_CRITICAL_0_1;
 import static org.briarproject.bramble.util.StringUtils.getRandomString;
 import static org.junit.Assert.assertTrue;

@@ -81,12 +84,6 @@ public abstract class JdbcDatabasePerformanceTest extends BrambleTestCase {
 	private static final int METADATA_KEY_LENGTH = 10;
 	private static final int METADATA_VALUE_LENGTH = 100;

-	/**
-	 * How many times to run each benchmark before measuring, to warm up the
-	 * JIT and DB indices.
-	 */
-	private static final int WARMUP_ITERATIONS = 1000;
-
 	/**
 	 * How many times to run each benchmark while measuring.
 	 */
@@ -238,22 +235,23 @@ public abstract class JdbcDatabasePerformanceTest extends BrambleTestCase {
 		populateDatabase(db);
 		db.close();
 		db = openDatabase();
-		// Measure the first run
+		// Measure the first iteration
 		long start = System.nanoTime();
 		task.run(db);
 		long firstDuration = System.nanoTime() - start;
-		// Warm up the JIT and DB indices
-		for (int i = 0; i < WARMUP_ITERATIONS; i++) task.run(db);
-		// Measure the next runs
-		List<Long> durations = new ArrayList<>(MEASUREMENT_ITERATIONS);
-		for (int i = 0; i < MEASUREMENT_ITERATIONS; i++) {
-			start = System.nanoTime();
-			task.run(db);
-			durations.add(System.nanoTime() - start);
+		// Measure blocks of iterations until we reach a steady state
+		List<Double> oldDurations = measureBlock(db, task);
+		List<Double> durations = measureBlock(db, task);
+		int blocks = 2;
+		while (UTest.test(oldDurations, durations, Z_CRITICAL_0_1)
+				!= INCONCLUSIVE) {
+			oldDurations = durations;
+			durations = measureBlock(db, task);
+			blocks++;
 		}
 		db.close();
-		String result = String.format("%s\t%,d\t%,d\t%,d\t%,d", name,
-				firstDuration, (long) getMean(durations),
+		String result = String.format("%s\t%d\t%,d\t%,d\t%,d\t%,d", name,
+				blocks, firstDuration, (long) getMean(durations),
 				(long) getMedian(durations),
 				(long) getStandardDeviation(durations));
 		System.out.println(result);
@@ -324,6 +322,17 @@ public abstract class JdbcDatabasePerformanceTest extends BrambleTestCase {
 		db.commitTransaction(txn);
 	}

+	private List<Double> measureBlock(Database<Connection> db,
+			BenchmarkTask<Database<Connection>> task) throws Exception {
+		List<Double> durations = new ArrayList<>(MEASUREMENT_ITERATIONS);
+		for (int i = 0; i < MEASUREMENT_ITERATIONS; i++) {
+			long start = System.nanoTime();
+			task.run(db);
+			durations.add((double) (System.nanoTime() - start));
+		}
+		return durations;
+	}
+
 	private ClientId getClientId() {
 		return new ClientId(getRandomString(CLIENT_ID_LENGTH));
 	}
--- a/bramble-core/src/test/java/org/briarproject/bramble/test/UTest.java
+++ b/bramble-core/src/test/java/org/briarproject/bramble/test/UTest.java
@@ -0,0 +1,195 @@
+package org.briarproject.bramble.test;
+
+import java.io.BufferedReader;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+import javax.annotation.Nonnull;
+
+import static org.briarproject.bramble.test.UTest.Result.INCONCLUSIVE;
+import static org.briarproject.bramble.test.UTest.Result.LARGER;
+import static org.briarproject.bramble.test.UTest.Result.SMALLER;
+
+public class UTest {
+
+	public enum Result {
+
+		/**
+		 * The first sample has significantly smaller values than the second.
+		 */
+		SMALLER,
+
+		/**
+		 * There is no significant difference between the samples.
+		 */
+		INCONCLUSIVE,
+
+		/**
+		 * The first sample has significantly larger values than the second.
+		 */
+		LARGER
+	}
+
+	/**
+	 * Critical z value for P = 0.01, two-tailed test.
+	 */
+	public static final double Z_CRITICAL_0_01 = 2.576;
+
+	/**
+	 * Critical z value for P = 0.05, two-tailed test.
+	 */
+	public static final double Z_CRITICAL_0_05 = 1.960;
+
+	/**
+	 * Critical z value for P = 0.1, two-tailed test.
+	 */
+	public static final double Z_CRITICAL_0_1 = 1.645;
+
+	/**
+	 * Performs a two-tailed Mann-Whitney U test on the given samples using the
+	 * critical z value for P = 0.01.
+	 * <p/>
+	 * The method used here is explained at
+	 * http://faculty.vassar.edu/lowry/ch11a.html
+	 */
+	public static Result test(List<Double> a, List<Double> b) {
+		return test(a, b, Z_CRITICAL_0_01);
+	}
+
+	/**
+	 * Performs a two-tailed Mann-Whitney U test on the given samples using the
+	 * given critical z value.
+	 * <p/>
+	 * The method used here is explained at
+	 * http://faculty.vassar.edu/lowry/ch11a.html
+	 * <p/>
+	 * Critical z values for two-tailed tests can be found at
+	 * http://sphweb.bumc.bu.edu/otlt/mph-modules/bs/bs704_hypothesistest-means-proportions/bs704_hypothesistest-means-proportions3.html
+	 */
+	public static Result test(List<Double> a, List<Double> b,
+			double zCritical) {
+		int nA = a.size(), nB = b.size();
+		if (nA < 5 || nB < 5)
+			throw new IllegalArgumentException("Too few values for U test");
+
+		// Sort the values, keeping track of which sample they belong to
+		List<Value> sorted = new ArrayList<>(nA + nB);
+		for (Double d : a) sorted.add(new Value(d, true));
+		for (Double d : b) sorted.add(new Value(d, false));
+		Collections.sort(sorted);
+
+		// Assign ranks to the values
+		int i = 0, size = sorted.size();
+		while (i < size) {
+			double value = sorted.get(i).value;
+			int ties = 1;
+			while (i + ties < size && sorted.get(i + ties).value == value)
+				ties++;
+			int bottomRank = i + 1;
+			int topRank = i + ties;
+			double meanRank = (bottomRank + topRank) / 2.0;
+			for (int j = 0; j < ties; j++)
+				sorted.get(i + j).rank = meanRank;
+			i += ties;
+		}
+
+		// Calculate the total rank of each sample
+		double tA = 0, tB = 0;
+		for (Value v : sorted) {
+			if (v.a) tA += v.rank;
+			else tB += v.rank;
+		}
+
+		// The standard deviation of both total ranks is the same
+		double sigma = Math.sqrt(nA * nB * (nA + nB + 1.0) / 12.0);
+
+		// Means of the distributions of the total ranks
+		double muA = nA * (nA + nB + 1.0) / 2.0;
+		double muB = nB * (nA + nB + 1.0) / 2.0;
+
+		// Calculate z scores
+		double zA, zB;
+		if (tA > muA) zA = (tA - muA - 0.5) / sigma;
+		else zA = (tA - muA + 0.5) / sigma;
+		if (tB > muB) zB = (tB - muB - 0.5) / sigma;
+		else zB = (tB - muB + 0.5) / sigma;
+
+		// Compare z scores to critical value
+		if (zA > zCritical) return LARGER;
+		else if (zB > zCritical) return SMALLER;
+		else return INCONCLUSIVE;
+	}
+
+	public static void main(String[] args) {
+		if (args.length < 2 || args.length > 3)
+			die("usage: UTest <file1> <file2> [zCritical]");
+
+		List<Double> a = readFile(args[0]);
+		List<Double> b = readFile(args[1]);
+		int nA = a.size(), nB = b.size();
+		if (nA < 5 || nB < 5) die("Too few values for U test\n");
+
+		double zCritical;
+		if (args.length == 3) zCritical = Double.valueOf(args[2]);
+		else zCritical = Z_CRITICAL_0_01;
+
+		switch (test(a, b, zCritical)) {
+			case SMALLER:
+				System.out.println(args[0] + " is smaller");
+				break;
+			case INCONCLUSIVE:
+				System.out.println("No significant difference");
+				break;
+			case LARGER:
+				System.out.println(args[0] + " is larger");
+				break;
+		}
+	}
+
+	private static void die(String message) {
+		System.err.println(message);
+		System.exit(1);
+	}
+
+	private static List<Double> readFile(String filename) {
+		List<Double> values = new ArrayList<>();
+		try {
+			BufferedReader in;
+			in = new BufferedReader(new FileReader(filename));
+			String s;
+			while ((s = in.readLine()) != null) values.add(new Double(s));
+			in.close();
+		} catch (FileNotFoundException fnf) {
+			die(filename + " not found");
+		} catch (IOException io) {
+			die("Error reading from " + filename);
+		} catch (NumberFormatException nf) {
+			die("Invalid data in " + filename);
+		}
+		return values;
+	}
+
+	private static class Value implements Comparable<Value> {
+
+		private final double value;
+		private final boolean a;
+
+		private double rank;
+
+		private Value(double value, boolean a) {
+			this.value = value;
+			this.a = a;
+		}
+
+		@Override
+		public int compareTo(@Nonnull Value v) {
+			if (value < v.value) return -1;
+			if (value > v.value) return 1;
+			return 0;
+		}
+	}
+}
--- a/bramble-core/src/test/java/org/briarproject/bramble/test/UTestTest.java
+++ b/bramble-core/src/test/java/org/briarproject/bramble/test/UTestTest.java
@@ -0,0 +1,92 @@
+package org.briarproject.bramble.test;
+
+import org.junit.Test;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+
+import static org.briarproject.bramble.test.UTest.Result.INCONCLUSIVE;
+import static org.briarproject.bramble.test.UTest.Result.LARGER;
+import static org.briarproject.bramble.test.UTest.Result.SMALLER;
+import static org.junit.Assert.assertEquals;
+
+public class UTestTest extends BrambleTestCase {
+
+	private final Random random = new Random();
+
+	@Test
+	public void testSmallerLarger() {
+		// Create two samples, which may have different sizes
+		int aSize = random.nextInt(1000) + 1000;
+		int bSize = random.nextInt(1000) + 1000;
+		List<Double> a = new ArrayList<>(aSize);
+		List<Double> b = new ArrayList<>(bSize);
+		// Values in b are significantly larger
+		for (int i = 0; i < aSize; i++) a.add(random.nextDouble());
+		for (int i = 0; i < bSize; i++) b.add(random.nextDouble() + 0.1);
+		// The U test should detect that a is smaller than b
+		assertEquals(SMALLER, UTest.test(a, b));
+		assertEquals(LARGER, UTest.test(b, a));
+	}
+
+	@Test
+	public void testSmallerLargerWithTies() {
+		// Create two samples, which may have different sizes
+		int aSize = random.nextInt(1000) + 1000;
+		int bSize = random.nextInt(1000) + 1000;
+		List<Double> a = new ArrayList<>(aSize);
+		List<Double> b = new ArrayList<>(bSize);
+		// Put some tied values in both samples
+		addTiedValues(a, b);
+		// Values in b are significantly larger
+		for (int i = a.size(); i < aSize; i++) a.add(random.nextDouble());
+		for (int i = b.size(); i < bSize; i++) b.add(random.nextDouble() + 0.1);
+		// The U test should detect that a is smaller than b
+		assertEquals(SMALLER, UTest.test(a, b));
+		assertEquals(LARGER, UTest.test(b, a));
+	}
+
+	@Test
+	public void testInconclusive() {
+		// Create two samples, which may have different sizes
+		int aSize = random.nextInt(1000) + 1000;
+		int bSize = random.nextInt(1000) + 1000;
+		List<Double> a = new ArrayList<>(aSize);
+		List<Double> b = new ArrayList<>(bSize);
+		// Values in a and b have the same distribution
+		for (int i = 0; i < aSize; i++) a.add(random.nextDouble());
+		for (int i = 0; i < bSize; i++) b.add(random.nextDouble());
+		// The U test should not detect a difference between a and b
+		assertEquals(INCONCLUSIVE, UTest.test(a, b));
+		assertEquals(INCONCLUSIVE, UTest.test(b, a));
+	}
+
+	@Test
+	public void testInconclusiveWithTies() {
+		// Create two samples, which may have different sizes
+		int aSize = random.nextInt(1000) + 1000;
+		int bSize = random.nextInt(1000) + 1000;
+		List<Double> a = new ArrayList<>(aSize);
+		List<Double> b = new ArrayList<>(bSize);
+		// Put some tied values in both samples
+		addTiedValues(a, b);
+		// Values in a and b have the same distribution
+		for (int i = a.size(); i < aSize; i++) a.add(random.nextDouble());
+		for (int i = b.size(); i < bSize; i++) b.add(random.nextDouble());
+		// The U test should not detect a difference between a and b
+		assertEquals(INCONCLUSIVE, UTest.test(a, b));
+		assertEquals(INCONCLUSIVE, UTest.test(b, a));
+	}
+
+	private void addTiedValues(List<Double> a, List<Double> b) {
+		for (int i = 0; i < 10; i++) {
+			double tiedValue = random.nextDouble();
+			int numTies = random.nextInt(5) + 1;
+			for (int j = 0; j < numTies; j++) {
+				if (random.nextBoolean()) a.add(tiedValue);
+				else b.add(tiedValue);
+			}
+		}
+	}
+}