Use the Mann-Whitney U test to determine steady state.

This commit is contained in:
akwizgran
2017-12-08 12:13:22 +00:00
parent 596c140310
commit 1a912a29f8
3 changed files with 313 additions and 17 deletions

View File

@@ -15,6 +15,7 @@ import org.briarproject.bramble.api.system.Clock;
import org.briarproject.bramble.system.SystemClock;
import org.briarproject.bramble.test.BrambleTestCase;
import org.briarproject.bramble.test.TestDatabaseConfig;
import org.briarproject.bramble.test.UTest;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
@@ -41,6 +42,8 @@ import static org.briarproject.bramble.test.TestUtils.getMessage;
import static org.briarproject.bramble.test.TestUtils.getRandomBytes;
import static org.briarproject.bramble.test.TestUtils.getStandardDeviation;
import static org.briarproject.bramble.test.TestUtils.getTestDirectory;
import static org.briarproject.bramble.test.UTest.Result.INCONCLUSIVE;
import static org.briarproject.bramble.test.UTest.Z_CRITICAL_0_1;
import static org.briarproject.bramble.util.StringUtils.getRandomString;
import static org.junit.Assert.assertTrue;
@@ -81,12 +84,6 @@ public abstract class JdbcDatabasePerformanceTest extends BrambleTestCase {
private static final int METADATA_KEY_LENGTH = 10;
private static final int METADATA_VALUE_LENGTH = 100;
/**
* How many times to run each benchmark before measuring, to warm up the
* JIT and DB indices.
*/
private static final int WARMUP_ITERATIONS = 1000;
/**
* How many times to run each benchmark while measuring.
*/
@@ -238,22 +235,23 @@ public abstract class JdbcDatabasePerformanceTest extends BrambleTestCase {
populateDatabase(db);
db.close();
db = openDatabase();
// Measure the first run
// Measure the first iteration
long start = System.nanoTime();
task.run(db);
long firstDuration = System.nanoTime() - start;
// Warm up the JIT and DB indices
for (int i = 0; i < WARMUP_ITERATIONS; i++) task.run(db);
// Measure the next runs
List<Long> durations = new ArrayList<>(MEASUREMENT_ITERATIONS);
for (int i = 0; i < MEASUREMENT_ITERATIONS; i++) {
start = System.nanoTime();
task.run(db);
durations.add(System.nanoTime() - start);
// Measure blocks of iterations until we reach a steady state
List<Double> oldDurations = measureBlock(db, task);
List<Double> durations = measureBlock(db, task);
int blocks = 2;
while (UTest.test(oldDurations, durations, Z_CRITICAL_0_1)
!= INCONCLUSIVE) {
oldDurations = durations;
durations = measureBlock(db, task);
blocks++;
}
db.close();
String result = String.format("%s\t%,d\t%,d\t%,d\t%,d", name,
firstDuration, (long) getMean(durations),
String result = String.format("%s\t%d\t%,d\t%,d\t%,d\t%,d", name,
blocks, firstDuration, (long) getMean(durations),
(long) getMedian(durations),
(long) getStandardDeviation(durations));
System.out.println(result);
@@ -324,6 +322,17 @@ public abstract class JdbcDatabasePerformanceTest extends BrambleTestCase {
db.commitTransaction(txn);
}
private List<Double> measureBlock(Database<Connection> db,
BenchmarkTask<Database<Connection>> task) throws Exception {
List<Double> durations = new ArrayList<>(MEASUREMENT_ITERATIONS);
for (int i = 0; i < MEASUREMENT_ITERATIONS; i++) {
long start = System.nanoTime();
task.run(db);
durations.add((double) (System.nanoTime() - start));
}
return durations;
}
private ClientId getClientId() {
return new ClientId(getRandomString(CLIENT_ID_LENGTH));
}

View File

@@ -0,0 +1,195 @@
package org.briarproject.bramble.test;
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import javax.annotation.Nonnull;
import static org.briarproject.bramble.test.UTest.Result.INCONCLUSIVE;
import static org.briarproject.bramble.test.UTest.Result.LARGER;
import static org.briarproject.bramble.test.UTest.Result.SMALLER;
public class UTest {
public enum Result {
/**
* The first sample has significantly smaller values than the second.
*/
SMALLER,
/**
* There is no significant difference between the samples.
*/
INCONCLUSIVE,
/**
* The first sample has significantly larger values than the second.
*/
LARGER
}
/**
* Critical z value for P = 0.01, two-tailed test.
*/
public static final double Z_CRITICAL_0_01 = 2.576;
/**
* Critical z value for P = 0.05, two-tailed test.
*/
public static final double Z_CRITICAL_0_05 = 1.960;
/**
* Critical z value for P = 0.1, two-tailed test.
*/
public static final double Z_CRITICAL_0_1 = 1.645;
/**
* Performs a two-tailed Mann-Whitney U test on the given samples using the
* critical z value for P = 0.01.
* <p/>
* The method used here is explained at
* http://faculty.vassar.edu/lowry/ch11a.html
*/
public static Result test(List<Double> a, List<Double> b) {
return test(a, b, Z_CRITICAL_0_01);
}
/**
* Performs a two-tailed Mann-Whitney U test on the given samples using the
* given critical z value.
* <p/>
* The method used here is explained at
* http://faculty.vassar.edu/lowry/ch11a.html
* <p/>
* Critical z values for two-tailed tests can be found at
* http://sphweb.bumc.bu.edu/otlt/mph-modules/bs/bs704_hypothesistest-means-proportions/bs704_hypothesistest-means-proportions3.html
*/
public static Result test(List<Double> a, List<Double> b,
double zCritical) {
int nA = a.size(), nB = b.size();
if (nA < 5 || nB < 5)
throw new IllegalArgumentException("Too few values for U test");
// Sort the values, keeping track of which sample they belong to
List<Value> sorted = new ArrayList<>(nA + nB);
for (Double d : a) sorted.add(new Value(d, true));
for (Double d : b) sorted.add(new Value(d, false));
Collections.sort(sorted);
// Assign ranks to the values
int i = 0, size = sorted.size();
while (i < size) {
double value = sorted.get(i).value;
int ties = 1;
while (i + ties < size && sorted.get(i + ties).value == value)
ties++;
int bottomRank = i + 1;
int topRank = i + ties;
double meanRank = (bottomRank + topRank) / 2.0;
for (int j = 0; j < ties; j++)
sorted.get(i + j).rank = meanRank;
i += ties;
}
// Calculate the total rank of each sample
double tA = 0, tB = 0;
for (Value v : sorted) {
if (v.a) tA += v.rank;
else tB += v.rank;
}
// The standard deviation of both total ranks is the same
double sigma = Math.sqrt(nA * nB * (nA + nB + 1.0) / 12.0);
// Means of the distributions of the total ranks
double muA = nA * (nA + nB + 1.0) / 2.0;
double muB = nB * (nA + nB + 1.0) / 2.0;
// Calculate z scores
double zA, zB;
if (tA > muA) zA = (tA - muA - 0.5) / sigma;
else zA = (tA - muA + 0.5) / sigma;
if (tB > muB) zB = (tB - muB - 0.5) / sigma;
else zB = (tB - muB + 0.5) / sigma;
// Compare z scores to critical value
if (zA > zCritical) return LARGER;
else if (zB > zCritical) return SMALLER;
else return INCONCLUSIVE;
}
public static void main(String[] args) {
if (args.length < 2 || args.length > 3)
die("usage: UTest <file1> <file2> [zCritical]");
List<Double> a = readFile(args[0]);
List<Double> b = readFile(args[1]);
int nA = a.size(), nB = b.size();
if (nA < 5 || nB < 5) die("Too few values for U test\n");
double zCritical;
if (args.length == 3) zCritical = Double.valueOf(args[2]);
else zCritical = Z_CRITICAL_0_01;
switch (test(a, b, zCritical)) {
case SMALLER:
System.out.println(args[0] + " is smaller");
break;
case INCONCLUSIVE:
System.out.println("No significant difference");
break;
case LARGER:
System.out.println(args[0] + " is larger");
break;
}
}
private static void die(String message) {
System.err.println(message);
System.exit(1);
}
private static List<Double> readFile(String filename) {
List<Double> values = new ArrayList<>();
try {
BufferedReader in;
in = new BufferedReader(new FileReader(filename));
String s;
while ((s = in.readLine()) != null) values.add(new Double(s));
in.close();
} catch (FileNotFoundException fnf) {
die(filename + " not found");
} catch (IOException io) {
die("Error reading from " + filename);
} catch (NumberFormatException nf) {
die("Invalid data in " + filename);
}
return values;
}
private static class Value implements Comparable<Value> {
private final double value;
private final boolean a;
private double rank;
private Value(double value, boolean a) {
this.value = value;
this.a = a;
}
@Override
public int compareTo(@Nonnull Value v) {
if (value < v.value) return -1;
if (value > v.value) return 1;
return 0;
}
}
}

View File

@@ -0,0 +1,92 @@
package org.briarproject.bramble.test;
import org.junit.Test;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
import static org.briarproject.bramble.test.UTest.Result.INCONCLUSIVE;
import static org.briarproject.bramble.test.UTest.Result.LARGER;
import static org.briarproject.bramble.test.UTest.Result.SMALLER;
import static org.junit.Assert.assertEquals;
public class UTestTest extends BrambleTestCase {
private final Random random = new Random();
@Test
public void testSmallerLarger() {
// Create two samples, which may have different sizes
int aSize = random.nextInt(1000) + 1000;
int bSize = random.nextInt(1000) + 1000;
List<Double> a = new ArrayList<>(aSize);
List<Double> b = new ArrayList<>(bSize);
// Values in b are significantly larger
for (int i = 0; i < aSize; i++) a.add(random.nextDouble());
for (int i = 0; i < bSize; i++) b.add(random.nextDouble() + 0.1);
// The U test should detect that a is smaller than b
assertEquals(SMALLER, UTest.test(a, b));
assertEquals(LARGER, UTest.test(b, a));
}
@Test
public void testSmallerLargerWithTies() {
// Create two samples, which may have different sizes
int aSize = random.nextInt(1000) + 1000;
int bSize = random.nextInt(1000) + 1000;
List<Double> a = new ArrayList<>(aSize);
List<Double> b = new ArrayList<>(bSize);
// Put some tied values in both samples
addTiedValues(a, b);
// Values in b are significantly larger
for (int i = a.size(); i < aSize; i++) a.add(random.nextDouble());
for (int i = b.size(); i < bSize; i++) b.add(random.nextDouble() + 0.1);
// The U test should detect that a is smaller than b
assertEquals(SMALLER, UTest.test(a, b));
assertEquals(LARGER, UTest.test(b, a));
}
@Test
public void testInconclusive() {
// Create two samples, which may have different sizes
int aSize = random.nextInt(1000) + 1000;
int bSize = random.nextInt(1000) + 1000;
List<Double> a = new ArrayList<>(aSize);
List<Double> b = new ArrayList<>(bSize);
// Values in a and b have the same distribution
for (int i = 0; i < aSize; i++) a.add(random.nextDouble());
for (int i = 0; i < bSize; i++) b.add(random.nextDouble());
// The U test should not detect a difference between a and b
assertEquals(INCONCLUSIVE, UTest.test(a, b));
assertEquals(INCONCLUSIVE, UTest.test(b, a));
}
@Test
public void testInconclusiveWithTies() {
// Create two samples, which may have different sizes
int aSize = random.nextInt(1000) + 1000;
int bSize = random.nextInt(1000) + 1000;
List<Double> a = new ArrayList<>(aSize);
List<Double> b = new ArrayList<>(bSize);
// Put some tied values in both samples
addTiedValues(a, b);
// Values in a and b have the same distribution
for (int i = a.size(); i < aSize; i++) a.add(random.nextDouble());
for (int i = b.size(); i < bSize; i++) b.add(random.nextDouble());
// The U test should not detect a difference between a and b
assertEquals(INCONCLUSIVE, UTest.test(a, b));
assertEquals(INCONCLUSIVE, UTest.test(b, a));
}
private void addTiedValues(List<Double> a, List<Double> b) {
for (int i = 0; i < 10; i++) {
double tiedValue = random.nextDouble();
int numTies = random.nextInt(5) + 1;
for (int j = 0; j < numTies; j++) {
if (random.nextBoolean()) a.add(tiedValue);
else b.add(tiedValue);
}
}
}
}