Skip to content
Snippets Groups Projects
Verified Commit 9d974aac authored by David Procházka's avatar David Procházka
Browse files

ADD: optimized benchmark

parent 024c6400
No related branches found
No related tags found
No related merge requests found
......@@ -3,7 +3,7 @@ package mhtree;
import messif.operations.Approximate;
public class ApproxState {
protected int limit;
public int limit;
protected int objectsChecked;
protected int bucketsVisited;
......
......@@ -10,6 +10,7 @@ import messif.objects.LocalAbstractObject;
import messif.operations.data.InsertOperation;
import messif.operations.query.ApproxKNNQueryOperation;
import messif.operations.query.KNNQueryOperation;
import mhtree.benchmarking.SearchState;
import java.io.Serializable;
import java.util.ArrayList;
......@@ -63,7 +64,33 @@ public class MHTree extends Algorithm implements Serializable {
}
public void approxKNN(ApproxKNNQueryOperation operation) {
kNNSearch(operation, ApproxState.create(operation, this));
SearchState searchState = (SearchState) operation.suppData;
while (!searchState.queue.isEmpty()) {
if (searchState.approxState != null && searchState.approxState.stop()) {
break;
}
Node node = searchState.queue.remove().getNode();
if (node.isLeaf()) {
for (LocalAbstractObject object : node.getObjects()) {
if (!operation.isAnswerFull() || searchState.queryObject.getDistance(object) < operation.getAnswerDistance()) {
operation.addToAnswer(object);
}
}
if (searchState.approxState != null) {
searchState.approxState.update((LeafNode) node);
}
} else {
for (Node child : ((InternalNode) node).getChildren()) {
searchState.queue.add(new ObjectToNodeDistanceRank(searchState.queryObject, child));
}
}
}
operation.endOperation();
}
public void kNNSearch(KNNQueryOperation operation, ApproxState approxState) {
......@@ -99,6 +126,10 @@ public class MHTree extends Algorithm implements Serializable {
operation.endOperation();
}
public Node getRoot() {
return root;
}
public int getObjectCount() {
return bucketDispatcher.getObjectCount();
}
......@@ -310,6 +341,9 @@ public class MHTree extends Algorithm implements Serializable {
root = createRoot(nodeDegree);
objectDistances = null;
System.gc();
return new MHTree(this);
}
......
......@@ -72,4 +72,32 @@ public class PerformanceMeasures {
return trueKNNFoundCount / (double) knnQueryOperation.getAnswerCount();
}
public static double measureRecall(ApproxKNNQueryOperation approxKNNQueryOperation, Map<String, List<RankedAbstractObject>> trueKNN) {
if (approxKNNQueryOperation.getAnswerCount() == 0) return 0d;
List<RankedAbstractObject> kNNObjects = trueKNN.get(approxKNNQueryOperation.getQueryObject().getLocatorURI()).subList(0, approxKNNQueryOperation.getK());
Map<Float, Long> frequencyMap = kNNObjects
.stream()
.map(DistanceRankedObject::getDistance)
.collect(Collectors.groupingBy(Function.identity(), Collectors.counting()));
long trueKNNFoundCount = 0;
for (RankedAbstractObject approxObject : approxKNNQueryOperation) {
float distance = approxObject.getDistance();
if (frequencyMap.containsKey(distance)) {
long count = frequencyMap.get(distance);
if (count == 1) {
frequencyMap.remove(distance);
} else {
frequencyMap.replace(distance, count - 1);
}
trueKNNFoundCount++;
}
}
return trueKNNFoundCount / (double) kNNObjects.size();
}
}
package mhtree.benchmarking;
import cz.muni.fi.disa.similarityoperators.cover.AbstractRepresentation;
import messif.algorithms.AlgorithmMethodException;
import messif.buckets.BucketStorageException;
import messif.objects.LocalAbstractObject;
import messif.objects.impl.ObjectFloatVectorNeuralNetworkL2;
import messif.objects.util.AbstractObjectList;
import messif.objects.util.RankedAbstractObject;
import messif.objects.util.StreamGenericAbstractObjectIterator;
import messif.operations.Approximate;
import messif.operations.query.ApproxKNNQueryOperation;
import messif.operations.query.KNNQueryOperation;
import messif.statistics.Statistics;
import mhtree.InsertType;
import mhtree.MHTree;
import mhtree.MergeType;
import mhtree.ObjectToNodeDistance;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
public class RunBenchmark {
public static void main(String[] args) throws IOException, NoSuchMethodException, AlgorithmMethodException, BucketStorageException, InstantiationException {
public static void main(String[] args) throws IOException, BucketStorageException {
if (args.length != 5) {
throw new IllegalArgumentException("Unexpected number of params");
}
......@@ -56,45 +61,71 @@ public class RunBenchmark {
);
}
private static void percentageToRecall(MHTreeConfig config, List<LocalAbstractObject> objects, int[] ks) throws NoSuchMethodException, AlgorithmMethodException, BucketStorageException, RuntimeException {
int numberOfObjects = objects.size();
private static void percentageToRecall(MHTreeConfig config, List<LocalAbstractObject> objects, int[] ks) throws BucketStorageException, RuntimeException {
MHTree mTree = new MHTree.Builder(objects, config.leafCapacity, config.nodeDegree)
.objectToNodeDistance(config.objectToNodeDistance)
.mergeType(MergeType.REPRESENTATION_BASED)
.build();
mTree.printStatistics();
System.out.println("leafCapacity,nodeDegree,objectToNodeDistance,k,percentage,recall (min),recall (avg),recall (med),recall (max)");
double minimalRecall = 0;
int percentage = 0;
int percentageStep = 5;
int maxK = Arrays.stream(ks).max().getAsInt();
for (int k : ks) {
List<Double> recalls = new ArrayList<>(numberOfObjects);
for (int i = 0; i < numberOfObjects; i++) {
recalls.add(0.0);
}
List<KNNQueryOperation> kNNOperations = objects
.parallelStream()
.map(object -> new KNNQueryOperation(object, maxK))
.collect(Collectors.toList());
while (minimalRecall != 1.0) {
for (int i = 0; i < numberOfObjects; i++) {
if (recalls.get(i) != 1.0) {
ApproxKNNQueryOperation operation = new ApproxKNNQueryOperation(
objects.get(i),
k,
percentage,
Approximate.LocalSearchType.PERCENTAGE,
LocalAbstractObject.UNKNOWN_DISTANCE
);
class Pair {
public List<RankedAbstractObject> kNNObjects;
public String id;
}
mTree.executeOperation(operation);
Map<String, List<RankedAbstractObject>> kNNResults = kNNOperations
.parallelStream()
.map(op -> {
mTree.kNN(op);
Pair pair = new Pair();
pair.id = op.getQueryObject().getLocatorURI();
pair.kNNObjects = new ArrayList<>(maxK);
for (RankedAbstractObject o : op)
pair.kNNObjects.add(o);
return pair;
})
.collect(Collectors.toMap(entry -> entry.id, entry -> entry.kNNObjects));
recalls.set(i, PerformanceMeasures.measureRecall(operation, mTree));
}
}
for (int k : ks) {
List<ApproxKNNQueryOperation> approxOperations = objects
.parallelStream()
.map(object -> new ApproxKNNQueryOperation(object, k, 0, Approximate.LocalSearchType.PERCENTAGE, LocalAbstractObject.UNKNOWN_DISTANCE))
.collect(Collectors.toList());
approxOperations
.parallelStream()
.forEach(op -> op.suppData = new SearchState(mTree, op));
double minimalRecall = 0;
int percentage = 0;
int percentageStep = 5;
Stats recallStats = new Stats(new ArrayList<>(recalls));
while (minimalRecall != 1.0) {
approxOperations
.parallelStream()
.filter(op -> ((SearchState) op.suppData).recall != 1d)
.forEach(op -> {
SearchState searchState = (SearchState) op.suppData;
mTree.approxKNN(op);
searchState.recall = PerformanceMeasures.measureRecall(op, kNNResults);
searchState.approxState.limit += Math.round((float) objects.size() * (float) percentageStep / 100f);
});
Stats recallStats = new Stats(
approxOperations
.stream()
.map(op -> ((SearchState) op.suppData).recall)
.collect(Collectors.toList())
);
System.out.println(String.join(",",
String.valueOf(config.leafCapacity),
......@@ -111,9 +142,6 @@ public class RunBenchmark {
minimalRecall = recallStats.getMin();
percentage += percentageStep;
}
minimalRecall = 0;
percentage = 0;
}
}
......
package mhtree.benchmarking;
import messif.objects.LocalAbstractObject;
import messif.operations.query.ApproxKNNQueryOperation;
import mhtree.ApproxState;
import mhtree.MHTree;
import mhtree.ObjectToNodeDistanceRank;
import java.util.PriorityQueue;
public class SearchState {
public PriorityQueue<ObjectToNodeDistanceRank> queue;
public LocalAbstractObject queryObject;
public ApproxState approxState;
public double recall;
public SearchState(MHTree tree, ApproxKNNQueryOperation operation) {
this.queue = new PriorityQueue<>();
this.queue.add(new ObjectToNodeDistanceRank(operation.getQueryObject(), tree.getRoot()));
this.queryObject = operation.getQueryObject();
this.approxState = ApproxState.create(operation, tree);
this.recall = 0d;
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment