Skip to content
Snippets Groups Projects
Verified Commit 93de9f04 authored by David Procházka's avatar David Procházka
Browse files

ADD: performance measures, improved benchmarking setup

parent 74f6858c
No related branches found
No related tags found
No related merge requests found
package mhtree.benchmarking;
import messif.objects.LocalAbstractObject;
import messif.objects.impl.ObjectFloatVectorNeuralNetworkL2;
import messif.objects.util.AbstractObjectList;
import messif.objects.util.AbstractStreamObjectIterator;
import messif.objects.util.StreamGenericAbstractObjectIterator;
import java.io.IOException;
import java.util.List;
import java.util.function.Consumer;
import java.util.logging.Level;
import java.util.logging.Logger;
public class BenchmarkConfig {
private List<MHTreeConfig> mhTreeConfigs;
private List<String> datasetPaths;
private List<Consumer<MHTreeConfig>> benchmarkFunctions;
private final List<MHTreeConfig> mhTreeConfigs;
private final List<Consumer<MHTreeConfig>> benchmarkFunctions;
public BenchmarkConfig(List<MHTreeConfig> mhTreeConfigs, List<String> datasetPaths, List<Consumer<MHTreeConfig>> benchmarkFunctions) {
public BenchmarkConfig(List<MHTreeConfig> mhTreeConfigs, List<Consumer<MHTreeConfig>> benchmarkFunctions) {
this.mhTreeConfigs = mhTreeConfigs;
this.datasetPaths = datasetPaths;
this.benchmarkFunctions = benchmarkFunctions;
}
public void execute() {
for (String datasetPath : datasetPaths) {
AbstractStreamObjectIterator<ObjectFloatVectorNeuralNetworkL2> objectIterator;
public void execute(List<LocalAbstractObject> objects) {
for (MHTreeConfig mhTreeConfig : mhTreeConfigs) {
mhTreeConfig.setObjects(objects);
try {
objectIterator = new StreamGenericAbstractObjectIterator<>(ObjectFloatVectorNeuralNetworkL2.class, datasetPath);
} catch (IOException e) {
Logger.getLogger("MH-Tree Benchmark").log(Level.SEVERE, null, e);
return;
}
List<LocalAbstractObject> objects = new AbstractObjectList<>(objectIterator);
for (MHTreeConfig mhTreeConfig : mhTreeConfigs) {
mhTreeConfig.setObjects(objects);
for (int i = 0; i < benchmarkFunctions.size(); i++) {
System.out.println("~Running" + mhTreeConfig + " on " + i + "-th function");
benchmarkFunctions.get(i).accept(mhTreeConfig);
}
for (Consumer<MHTreeConfig> benchmarkFunction : benchmarkFunctions) {
benchmarkFunction.accept(mhTreeConfig);
System.out.println();
}
}
}
......
......@@ -2,53 +2,33 @@ package mhtree.benchmarking;
import messif.objects.LocalAbstractObject;
import mhtree.InsertType;
import mhtree.ObjectToNodeDistance;
import java.util.List;
public class MHTreeConfig {
private final int leafCapacity;
private final int numberOfChildren;
private final int numberOfThreads;
private final InsertType insertType;
private List<LocalAbstractObject> objects;
public final int leafCapacity;
public final int numberOfChildren;
public final InsertType insertType;
public final ObjectToNodeDistance objectToNodeDistance;
public List<LocalAbstractObject> objects;
MHTreeConfig(int leafCapacity, int numberOfChildren, int numberOfThreads, InsertType insertType) {
MHTreeConfig(int leafCapacity, int numberOfChildren, InsertType insertType, ObjectToNodeDistance objectToNodeDistance) {
this.leafCapacity = leafCapacity;
this.numberOfChildren = numberOfChildren;
this.numberOfThreads = numberOfThreads;
this.insertType = insertType;
this.objectToNodeDistance = objectToNodeDistance;
}
public void setObjects(List<LocalAbstractObject> objects) {
this.objects = objects;
}
public List<LocalAbstractObject> getObjects() {
return objects;
}
public int getLeafCapacity() {
return leafCapacity;
}
public int getNumberOfChildren() {
return numberOfChildren;
}
public int getNumberOfThreads() {
return numberOfThreads;
}
public InsertType getInsertType() {
return insertType;
}
@Override
public String toString() {
return "MHTreeConfig{" +
"leafCapacity=" + leafCapacity +
", numberOfChildren=" + numberOfChildren +
", numberOfThreads=" + numberOfThreads +
", insertType=" + insertType +
'}';
}
......
package mhtree.benchmarking;
import messif.objects.util.DistanceRankedObject;
import messif.objects.util.RankedAbstractObject;
import messif.operations.query.ApproxKNNQueryOperation;
import messif.operations.query.KNNQueryOperation;
import messif.utility.reflection.NoSuchInstantiatorException;
import java.lang.reflect.InvocationTargetException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
import java.util.stream.Collectors;
public class PerformanceMeasures {
public static double measureErrorOnThePosition(ApproxKNNQueryOperation approxKNNQueryOperation, Tree tree) {
KNNQueryOperation rankedObjects = new KNNQueryOperation(approxKNNQueryOperation.getQueryObject(), tree.getObjectCount());
try {
tree.kNN(rankedObjects);
} catch (ClassNotFoundException | NoSuchInstantiatorException | InvocationTargetException e) {
e.printStackTrace();
}
Map<String, Integer> IDtoPosition = new HashMap<>(rankedObjects.getAnswerCount());
int i = 1;
for (RankedAbstractObject object : rankedObjects) {
IDtoPosition.put(object.getObject().getLocatorURI(), i);
i++;
}
double sum = 0;
int approxPosition = 1;
for (RankedAbstractObject object : approxKNNQueryOperation) {
sum += IDtoPosition.get(object.getObject().getLocatorURI()) - approxPosition;
approxPosition++;
}
return sum / (approxKNNQueryOperation.getAnswerCount() * rankedObjects.getAnswerCount());
}
// comparing done based on distances, counts how many of the same distances of KNNQueryOperation were presents in the answer of ApproxKNNQueryOperation
public static double measureRecall(ApproxKNNQueryOperation approxKNNQueryOperation, Tree tree) {
if (approxKNNQueryOperation.getAnswerCount() == 0) return 0d;
KNNQueryOperation knnQueryOperation = new KNNQueryOperation(approxKNNQueryOperation.getQueryObject(), approxKNNQueryOperation.getK());
try {
tree.kNN(knnQueryOperation);
} catch (ClassNotFoundException | NoSuchInstantiatorException | InvocationTargetException e) {
e.printStackTrace();
}
List<RankedAbstractObject> objects = new ArrayList<>(knnQueryOperation.getAnswerCount());
for (RankedAbstractObject object : knnQueryOperation)
objects.add(object);
Map<Float, Long> frequencyMap = objects.parallelStream()
.map(DistanceRankedObject::getDistance)
.collect(Collectors.groupingBy(Function.identity(), Collectors.counting()));
long trueCount = 0;
for (RankedAbstractObject approxObject : approxKNNQueryOperation) {
float distance = approxObject.getDistance();
if (frequencyMap.containsKey(distance)) {
long count = frequencyMap.get(distance);
if (count == 1) {
frequencyMap.remove(distance);
} else {
frequencyMap.replace(distance, count - 1);
}
trueCount++;
}
}
return trueCount / (double) knnQueryOperation.getAnswerCount();
}
}
package mhtree.benchmarking;
import cz.muni.fi.disa.similarityoperators.cover.AbstractRepresentation;
import messif.algorithms.AlgorithmMethodException;
import messif.buckets.BucketStorageException;
import messif.buckets.impl.MemoryStorageBucket;
import messif.objects.LocalAbstractObject;
import messif.operations.data.InsertOperation;
import messif.operations.query.ApproxKNNQueryOperation;
import messif.objects.impl.ObjectFloatVectorNeuralNetworkL2;
import messif.objects.util.AbstractObjectList;
import messif.objects.util.StreamGenericAbstractObjectIterator;
import messif.operations.data.BulkInsertOperation;
import mhtree.InsertType;
import mhtree.MHTree;
import mhtree.ObjectToNodeDistance;
import mtree.MTree;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.function.Supplier;
public class RunBenchmark {
private static List<String> datasets = Arrays.asList(
"./data/mix_m.txt",
"./data/intersection.txt",
"./data/middle_c.txt",
"./data/r1.txt"
);
private static final String dataset = "./data/middle_c.txt";
public static void main(String[] args) {
if (args.length != 0) datasets = Arrays.asList(args.clone());
public static void main(String[] args) throws IOException {
List<LocalAbstractObject> objects = loadDataset(args.length == 1 ? args[0] : RunBenchmark.dataset);
AbstractRepresentation.PrecomputedDistances.COMPUTATION_THREADS = 16;
new BenchmarkConfig(
Arrays.asList(
new MHTreeConfig(500, 5, 16, InsertType.GREEDY),
new MHTreeConfig(500, 5, 16, InsertType.INCREMENTAL)
new MHTreeConfig(50, 10, InsertType.GREEDY, ObjectToNodeDistance.NEAREST)
),
datasets,
Arrays.asList(
RunBenchmark::benchBuildTree,
(MHTreeConfig config) -> benchApproxKNN(config, 30),
RunBenchmark::benchInsert
)).execute();
(MHTreeConfig config) -> finalBench(config, new int[]{10}, 2)
)).execute(objects);
}
public static void benchInsert(MHTreeConfig config) {
try {
List<LocalAbstractObject> objects = config.getObjects();
List<LocalAbstractObject> initialObjects = objects.subList(0, objects.size() / 2);
List<LocalAbstractObject> restOfTheObjects = objects.subList(objects.size() / 2, objects.size());
long timeStart = System.currentTimeMillis();
MHTree tree = new MHTree(
initialObjects,
config.getLeafCapacity(),
config.getNumberOfChildren(),
config.getNumberOfThreads(),
config.getInsertType(),
MemoryStorageBucket.class,
null);
long treeBuilt = System.currentTimeMillis();
long tookMax = Long.MIN_VALUE;
long tookMin = Long.MAX_VALUE;
for (LocalAbstractObject object : restOfTheObjects) {
long insertStart = System.currentTimeMillis();
tree.insert(new InsertOperation(object));
long took = System.currentTimeMillis() - insertStart;
tookMax = Math.max(tookMax, took);
tookMin = Math.min(tookMin, took);
private static void finalBench(MHTreeConfig config, int[] ks, int buildInsertRatio) {
List<LocalAbstractObject> objects = config.objects;
// int buildObjectsCount = objects.size() - objects.size() / buildInsertRatio;
//
// List<LocalAbstractObject> buildObjects = objects.subList(0, buildObjectsCount);
// List<LocalAbstractObject> insertObjects = objects.subList(buildObjectsCount, objects.size());
//
// System.out.println(objects.size() + " objects, build on " + buildObjects.size() + ", inserted " + insertObjects.size());
MHTree mhtree = measure("MH-Tree build", (Supplier<MHTree>) () -> {
try {
return new MHTree.Builder(objects, config.leafCapacity, config.numberOfChildren)
.insertType(config.insertType)
.objectToNodeDistance(config.objectToNodeDistance)
.bucketDispatcher(MemoryStorageBucket.class, null)
.build();
} catch (BucketStorageException e) {
e.printStackTrace();
return null;
}
});
mhtree.printStatistics();
System.out.println();
BulkInsertOperation bulkInsertOperation = new BulkInsertOperation(objects);
MTree mtree = measure("M-Tree build", (Supplier<MTree>) () -> {
try {
MTree mTree = new MTree(config.numberOfChildren,
2L * config.leafCapacity,
MemoryStorageBucket.class,
null);
mTree.insert(bulkInsertOperation);
return mTree;
} catch (AlgorithmMethodException | InstantiationException e) {
e.printStackTrace();
return null;
}
});
long timeStop = System.currentTimeMillis();
System.out.println("~Building took: " + (treeBuilt - timeStart) + " ms");
System.out.println("~max: " + tookMax + " ms");
System.out.println("~min: " + tookMin + " ms");
System.out.println("~Took: " + (timeStop - timeStart) + " ms");
tree.printStatistics();
} catch (BucketStorageException ex) {
Logger.getLogger("BuildTree").log(Level.SEVERE, null, ex);
}
mtree.printStatistics();
}
public static void benchApproxKNN(MHTreeConfig config, int k) {
try {
MHTree tree = new MHTree(config.getObjects(),
config.getLeafCapacity(),
config.getNumberOfChildren(),
config.getNumberOfThreads(),
config.getInsertType(),
MemoryStorageBucket.class,
null);
long timeStart = System.currentTimeMillis();
long tookMax = Long.MIN_VALUE;
long tookMin = Long.MAX_VALUE;
for (LocalAbstractObject object : config.getObjects()) {
long nnStart = System.currentTimeMillis();
private static <T extends Supplier<U>, U> U measure(String what, T f) {
U result;
tree.approxKNN(new ApproxKNNQueryOperation(object, k));
long timeStart = System.currentTimeMillis();
result = f.get();
long timeEnd = System.currentTimeMillis();
long took = System.currentTimeMillis() - nnStart;
System.out.println(what + " took " + (timeEnd - timeStart) + " ms");
tookMax = Math.max(tookMax, took);
tookMin = Math.min(tookMin, took);
}
long timeStop = System.currentTimeMillis();
System.out.println("~nn took: " + (timeStop - timeStart) + " ms");
System.out.println("~max: " + tookMax + " ms");
System.out.println("~min: " + tookMin + " ms");
return result;
}
System.out.println("~" + config + " k: " + k);
private static <T extends Runnable> void measure(String what, T f) {
long timeStart = System.currentTimeMillis();
f.run();
long timeEnd = System.currentTimeMillis();
tree.printStatistics();
} catch (BucketStorageException ex) {
Logger.getLogger("BuildTree").log(Level.SEVERE, null, ex);
}
System.out.println(what + " took " + (timeEnd - timeStart) + " ms");
}
public static void benchBuildTree(MHTreeConfig config) {
try {
long timeStart = System.currentTimeMillis();
MHTree tree = new MHTree(
config.getObjects(),
config.getLeafCapacity(),
config.getNumberOfChildren(),
config.getNumberOfThreads(),
config.getInsertType(),
MemoryStorageBucket.class,
null);
long timeStop = System.currentTimeMillis();
System.out.println("~Building took: " + (timeStop - timeStart) + " ms");
tree.printStatistics();
} catch (BucketStorageException ex) {
Logger.getLogger("BuildTree").log(Level.SEVERE, null, ex);
}
private static List<LocalAbstractObject> loadDataset(String path) throws IOException {
return new AbstractObjectList<>(new StreamGenericAbstractObjectIterator<>(ObjectFloatVectorNeuralNetworkL2.class, path));
}
}
package mhtree.benchmarking;
import java.util.Arrays;
import java.util.Map;
public class Table {
String[] header;
String formatString;
Table(String[] header) {
this.header = header;
Arrays.stream(header).forEach(h -> formatString += "%-35s");
formatString += "%n";
System.out.format(formatString, (Object[]) header);
}
public void print(Map<String, Object> stats) {
System.out.format(formatString, Arrays.stream(header).map(stats::get).toArray());
}
}
package mhtree.benchmarking;
import messif.operations.query.KNNQueryOperation;
import messif.utility.reflection.NoSuchInstantiatorException;
import java.lang.reflect.InvocationTargetException;
/**
* Abstraction over MH-Tree and M-Tree for the benchmark purposes.
*/
public interface Tree {
void kNN(KNNQueryOperation knnQueryOperation) throws ClassNotFoundException, NoSuchInstantiatorException, InvocationTargetException;
int getObjectCount();
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment