From 93de9f041a154f801022fb21f7506ce510a149d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Proch=C3=A1zka?= <david@prochazka.dev> Date: Mon, 15 Mar 2021 17:28:24 +0100 Subject: [PATCH] ADD: performance measures, improved benchmarking setup --- src/mhtree/benchmarking/BenchmarkConfig.java | 40 +--- src/mhtree/benchmarking/MHTreeConfig.java | 36 +--- .../benchmarking/PerformanceMeasures.java | 82 ++++++++ src/mhtree/benchmarking/RunBenchmark.java | 185 +++++++----------- src/mhtree/benchmarking/Table.java | 22 +++ src/mhtree/benchmarking/Tree.java | 15 ++ 6 files changed, 210 insertions(+), 170 deletions(-) create mode 100644 src/mhtree/benchmarking/PerformanceMeasures.java create mode 100644 src/mhtree/benchmarking/Table.java create mode 100644 src/mhtree/benchmarking/Tree.java diff --git a/src/mhtree/benchmarking/BenchmarkConfig.java b/src/mhtree/benchmarking/BenchmarkConfig.java index 647e6f7..0eb01da 100644 --- a/src/mhtree/benchmarking/BenchmarkConfig.java +++ b/src/mhtree/benchmarking/BenchmarkConfig.java @@ -1,48 +1,26 @@ package mhtree.benchmarking; import messif.objects.LocalAbstractObject; -import messif.objects.impl.ObjectFloatVectorNeuralNetworkL2; -import messif.objects.util.AbstractObjectList; -import messif.objects.util.AbstractStreamObjectIterator; -import messif.objects.util.StreamGenericAbstractObjectIterator; -import java.io.IOException; import java.util.List; import java.util.function.Consumer; -import java.util.logging.Level; -import java.util.logging.Logger; public class BenchmarkConfig { - private List<MHTreeConfig> mhTreeConfigs; - private List<String> datasetPaths; - private List<Consumer<MHTreeConfig>> benchmarkFunctions; + private final List<MHTreeConfig> mhTreeConfigs; + private final List<Consumer<MHTreeConfig>> benchmarkFunctions; - public BenchmarkConfig(List<MHTreeConfig> mhTreeConfigs, List<String> datasetPaths, List<Consumer<MHTreeConfig>> benchmarkFunctions) { + public BenchmarkConfig(List<MHTreeConfig> mhTreeConfigs, List<Consumer<MHTreeConfig>> benchmarkFunctions) { this.mhTreeConfigs = mhTreeConfigs; - this.datasetPaths = datasetPaths; this.benchmarkFunctions = benchmarkFunctions; } - public void execute() { - for (String datasetPath : datasetPaths) { - AbstractStreamObjectIterator<ObjectFloatVectorNeuralNetworkL2> objectIterator; + public void execute(List<LocalAbstractObject> objects) { + for (MHTreeConfig mhTreeConfig : mhTreeConfigs) { + mhTreeConfig.setObjects(objects); - try { - objectIterator = new StreamGenericAbstractObjectIterator<>(ObjectFloatVectorNeuralNetworkL2.class, datasetPath); - } catch (IOException e) { - Logger.getLogger("MH-Tree Benchmark").log(Level.SEVERE, null, e); - return; - } - - List<LocalAbstractObject> objects = new AbstractObjectList<>(objectIterator); - - for (MHTreeConfig mhTreeConfig : mhTreeConfigs) { - mhTreeConfig.setObjects(objects); - - for (int i = 0; i < benchmarkFunctions.size(); i++) { - System.out.println("~Running" + mhTreeConfig + " on " + i + "-th function"); - benchmarkFunctions.get(i).accept(mhTreeConfig); - } + for (Consumer<MHTreeConfig> benchmarkFunction : benchmarkFunctions) { + benchmarkFunction.accept(mhTreeConfig); + System.out.println(); } } } diff --git a/src/mhtree/benchmarking/MHTreeConfig.java b/src/mhtree/benchmarking/MHTreeConfig.java index 7ff5f04..0e04168 100644 --- a/src/mhtree/benchmarking/MHTreeConfig.java +++ b/src/mhtree/benchmarking/MHTreeConfig.java @@ -2,53 +2,33 @@ package mhtree.benchmarking; import messif.objects.LocalAbstractObject; import mhtree.InsertType; +import mhtree.ObjectToNodeDistance; import java.util.List; public class MHTreeConfig { - private final int leafCapacity; - private final int numberOfChildren; - private final int numberOfThreads; - private final InsertType insertType; - private List<LocalAbstractObject> objects; + public final int leafCapacity; + public final int numberOfChildren; + public final InsertType insertType; + public final ObjectToNodeDistance objectToNodeDistance; + public List<LocalAbstractObject> objects; - MHTreeConfig(int leafCapacity, int numberOfChildren, int numberOfThreads, InsertType insertType) { + MHTreeConfig(int leafCapacity, int numberOfChildren, InsertType insertType, ObjectToNodeDistance objectToNodeDistance) { this.leafCapacity = leafCapacity; this.numberOfChildren = numberOfChildren; - this.numberOfThreads = numberOfThreads; this.insertType = insertType; + this.objectToNodeDistance = objectToNodeDistance; } public void setObjects(List<LocalAbstractObject> objects) { this.objects = objects; } - public List<LocalAbstractObject> getObjects() { - return objects; - } - - public int getLeafCapacity() { - return leafCapacity; - } - - public int getNumberOfChildren() { - return numberOfChildren; - } - - public int getNumberOfThreads() { - return numberOfThreads; - } - - public InsertType getInsertType() { - return insertType; - } - @Override public String toString() { return "MHTreeConfig{" + "leafCapacity=" + leafCapacity + ", numberOfChildren=" + numberOfChildren + - ", numberOfThreads=" + numberOfThreads + ", insertType=" + insertType + '}'; } diff --git a/src/mhtree/benchmarking/PerformanceMeasures.java b/src/mhtree/benchmarking/PerformanceMeasures.java new file mode 100644 index 0000000..6cd8498 --- /dev/null +++ b/src/mhtree/benchmarking/PerformanceMeasures.java @@ -0,0 +1,82 @@ +package mhtree.benchmarking; + +import messif.objects.util.DistanceRankedObject; +import messif.objects.util.RankedAbstractObject; +import messif.operations.query.ApproxKNNQueryOperation; +import messif.operations.query.KNNQueryOperation; +import messif.utility.reflection.NoSuchInstantiatorException; + +import java.lang.reflect.InvocationTargetException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.function.Function; +import java.util.stream.Collectors; + +public class PerformanceMeasures { + + public static double measureErrorOnThePosition(ApproxKNNQueryOperation approxKNNQueryOperation, Tree tree) { + KNNQueryOperation rankedObjects = new KNNQueryOperation(approxKNNQueryOperation.getQueryObject(), tree.getObjectCount()); + try { + tree.kNN(rankedObjects); + } catch (ClassNotFoundException | NoSuchInstantiatorException | InvocationTargetException e) { + e.printStackTrace(); + } + + Map<String, Integer> IDtoPosition = new HashMap<>(rankedObjects.getAnswerCount()); + + int i = 1; + for (RankedAbstractObject object : rankedObjects) { + IDtoPosition.put(object.getObject().getLocatorURI(), i); + i++; + } + + double sum = 0; + + int approxPosition = 1; + for (RankedAbstractObject object : approxKNNQueryOperation) { + sum += IDtoPosition.get(object.getObject().getLocatorURI()) - approxPosition; + approxPosition++; + } + + return sum / (approxKNNQueryOperation.getAnswerCount() * rankedObjects.getAnswerCount()); + } + + // comparing done based on distances, counts how many of the same distances of KNNQueryOperation were presents in the answer of ApproxKNNQueryOperation + public static double measureRecall(ApproxKNNQueryOperation approxKNNQueryOperation, Tree tree) { + if (approxKNNQueryOperation.getAnswerCount() == 0) return 0d; + + KNNQueryOperation knnQueryOperation = new KNNQueryOperation(approxKNNQueryOperation.getQueryObject(), approxKNNQueryOperation.getK()); + try { + tree.kNN(knnQueryOperation); + } catch (ClassNotFoundException | NoSuchInstantiatorException | InvocationTargetException e) { + e.printStackTrace(); + } + + List<RankedAbstractObject> objects = new ArrayList<>(knnQueryOperation.getAnswerCount()); + for (RankedAbstractObject object : knnQueryOperation) + objects.add(object); + + Map<Float, Long> frequencyMap = objects.parallelStream() + .map(DistanceRankedObject::getDistance) + .collect(Collectors.groupingBy(Function.identity(), Collectors.counting())); + + long trueCount = 0; + + for (RankedAbstractObject approxObject : approxKNNQueryOperation) { + float distance = approxObject.getDistance(); + if (frequencyMap.containsKey(distance)) { + long count = frequencyMap.get(distance); + if (count == 1) { + frequencyMap.remove(distance); + } else { + frequencyMap.replace(distance, count - 1); + } + trueCount++; + } + } + + return trueCount / (double) knnQueryOperation.getAnswerCount(); + } +} diff --git a/src/mhtree/benchmarking/RunBenchmark.java b/src/mhtree/benchmarking/RunBenchmark.java index ca1dfab..169e1de 100644 --- a/src/mhtree/benchmarking/RunBenchmark.java +++ b/src/mhtree/benchmarking/RunBenchmark.java @@ -1,144 +1,107 @@ package mhtree.benchmarking; +import cz.muni.fi.disa.similarityoperators.cover.AbstractRepresentation; +import messif.algorithms.AlgorithmMethodException; import messif.buckets.BucketStorageException; import messif.buckets.impl.MemoryStorageBucket; import messif.objects.LocalAbstractObject; -import messif.operations.data.InsertOperation; -import messif.operations.query.ApproxKNNQueryOperation; +import messif.objects.impl.ObjectFloatVectorNeuralNetworkL2; +import messif.objects.util.AbstractObjectList; +import messif.objects.util.StreamGenericAbstractObjectIterator; +import messif.operations.data.BulkInsertOperation; import mhtree.InsertType; import mhtree.MHTree; +import mhtree.ObjectToNodeDistance; +import mtree.MTree; +import java.io.IOException; import java.util.Arrays; import java.util.List; -import java.util.logging.Level; -import java.util.logging.Logger; +import java.util.function.Supplier; public class RunBenchmark { - private static List<String> datasets = Arrays.asList( - "./data/mix_m.txt", - "./data/intersection.txt", - "./data/middle_c.txt", - "./data/r1.txt" - ); + private static final String dataset = "./data/middle_c.txt"; - public static void main(String[] args) { - if (args.length != 0) datasets = Arrays.asList(args.clone()); + public static void main(String[] args) throws IOException { + List<LocalAbstractObject> objects = loadDataset(args.length == 1 ? args[0] : RunBenchmark.dataset); + + AbstractRepresentation.PrecomputedDistances.COMPUTATION_THREADS = 16; new BenchmarkConfig( Arrays.asList( - new MHTreeConfig(500, 5, 16, InsertType.GREEDY), - new MHTreeConfig(500, 5, 16, InsertType.INCREMENTAL) + new MHTreeConfig(50, 10, InsertType.GREEDY, ObjectToNodeDistance.NEAREST) ), - datasets, Arrays.asList( - RunBenchmark::benchBuildTree, - (MHTreeConfig config) -> benchApproxKNN(config, 30), - RunBenchmark::benchInsert - )).execute(); + (MHTreeConfig config) -> finalBench(config, new int[]{10}, 2) + )).execute(objects); } - public static void benchInsert(MHTreeConfig config) { - try { - List<LocalAbstractObject> objects = config.getObjects(); - List<LocalAbstractObject> initialObjects = objects.subList(0, objects.size() / 2); - List<LocalAbstractObject> restOfTheObjects = objects.subList(objects.size() / 2, objects.size()); - - long timeStart = System.currentTimeMillis(); - - MHTree tree = new MHTree( - initialObjects, - config.getLeafCapacity(), - config.getNumberOfChildren(), - config.getNumberOfThreads(), - config.getInsertType(), - MemoryStorageBucket.class, - null); - - long treeBuilt = System.currentTimeMillis(); - - long tookMax = Long.MIN_VALUE; - long tookMin = Long.MAX_VALUE; - - for (LocalAbstractObject object : restOfTheObjects) { - long insertStart = System.currentTimeMillis(); - tree.insert(new InsertOperation(object)); - long took = System.currentTimeMillis() - insertStart; - - tookMax = Math.max(tookMax, took); - tookMin = Math.min(tookMin, took); + private static void finalBench(MHTreeConfig config, int[] ks, int buildInsertRatio) { + List<LocalAbstractObject> objects = config.objects; + +// int buildObjectsCount = objects.size() - objects.size() / buildInsertRatio; +// +// List<LocalAbstractObject> buildObjects = objects.subList(0, buildObjectsCount); +// List<LocalAbstractObject> insertObjects = objects.subList(buildObjectsCount, objects.size()); +// +// System.out.println(objects.size() + " objects, build on " + buildObjects.size() + ", inserted " + insertObjects.size()); + + MHTree mhtree = measure("MH-Tree build", (Supplier<MHTree>) () -> { + try { + return new MHTree.Builder(objects, config.leafCapacity, config.numberOfChildren) + .insertType(config.insertType) + .objectToNodeDistance(config.objectToNodeDistance) + .bucketDispatcher(MemoryStorageBucket.class, null) + .build(); + } catch (BucketStorageException e) { + e.printStackTrace(); + return null; } + }); + + mhtree.printStatistics(); + System.out.println(); + + BulkInsertOperation bulkInsertOperation = new BulkInsertOperation(objects); + + MTree mtree = measure("M-Tree build", (Supplier<MTree>) () -> { + try { + MTree mTree = new MTree(config.numberOfChildren, + 2L * config.leafCapacity, + MemoryStorageBucket.class, + null); + mTree.insert(bulkInsertOperation); + return mTree; + } catch (AlgorithmMethodException | InstantiationException e) { + e.printStackTrace(); + return null; + } + }); - long timeStop = System.currentTimeMillis(); - - System.out.println("~Building took: " + (treeBuilt - timeStart) + " ms"); - System.out.println("~max: " + tookMax + " ms"); - System.out.println("~min: " + tookMin + " ms"); - System.out.println("~Took: " + (timeStop - timeStart) + " ms"); - - tree.printStatistics(); - } catch (BucketStorageException ex) { - Logger.getLogger("BuildTree").log(Level.SEVERE, null, ex); - } + mtree.printStatistics(); } - public static void benchApproxKNN(MHTreeConfig config, int k) { - try { - MHTree tree = new MHTree(config.getObjects(), - config.getLeafCapacity(), - config.getNumberOfChildren(), - config.getNumberOfThreads(), - config.getInsertType(), - MemoryStorageBucket.class, - null); - - long timeStart = System.currentTimeMillis(); - - long tookMax = Long.MIN_VALUE; - long tookMin = Long.MAX_VALUE; - - for (LocalAbstractObject object : config.getObjects()) { - long nnStart = System.currentTimeMillis(); + private static <T extends Supplier<U>, U> U measure(String what, T f) { + U result; - tree.approxKNN(new ApproxKNNQueryOperation(object, k)); + long timeStart = System.currentTimeMillis(); + result = f.get(); + long timeEnd = System.currentTimeMillis(); - long took = System.currentTimeMillis() - nnStart; + System.out.println(what + " took " + (timeEnd - timeStart) + " ms"); - tookMax = Math.max(tookMax, took); - tookMin = Math.min(tookMin, took); - } - - long timeStop = System.currentTimeMillis(); - - System.out.println("~nn took: " + (timeStop - timeStart) + " ms"); - System.out.println("~max: " + tookMax + " ms"); - System.out.println("~min: " + tookMin + " ms"); + return result; + } - System.out.println("~" + config + " k: " + k); + private static <T extends Runnable> void measure(String what, T f) { + long timeStart = System.currentTimeMillis(); + f.run(); + long timeEnd = System.currentTimeMillis(); - tree.printStatistics(); - } catch (BucketStorageException ex) { - Logger.getLogger("BuildTree").log(Level.SEVERE, null, ex); - } + System.out.println(what + " took " + (timeEnd - timeStart) + " ms"); } - public static void benchBuildTree(MHTreeConfig config) { - try { - long timeStart = System.currentTimeMillis(); - MHTree tree = new MHTree( - config.getObjects(), - config.getLeafCapacity(), - config.getNumberOfChildren(), - config.getNumberOfThreads(), - config.getInsertType(), - MemoryStorageBucket.class, - null); - long timeStop = System.currentTimeMillis(); - - System.out.println("~Building took: " + (timeStop - timeStart) + " ms"); - - tree.printStatistics(); - } catch (BucketStorageException ex) { - Logger.getLogger("BuildTree").log(Level.SEVERE, null, ex); - } + private static List<LocalAbstractObject> loadDataset(String path) throws IOException { + return new AbstractObjectList<>(new StreamGenericAbstractObjectIterator<>(ObjectFloatVectorNeuralNetworkL2.class, path)); } } diff --git a/src/mhtree/benchmarking/Table.java b/src/mhtree/benchmarking/Table.java new file mode 100644 index 0000000..a464498 --- /dev/null +++ b/src/mhtree/benchmarking/Table.java @@ -0,0 +1,22 @@ +package mhtree.benchmarking; + +import java.util.Arrays; +import java.util.Map; + +public class Table { + String[] header; + String formatString; + + Table(String[] header) { + this.header = header; + + Arrays.stream(header).forEach(h -> formatString += "%-35s"); + formatString += "%n"; + + System.out.format(formatString, (Object[]) header); + } + + public void print(Map<String, Object> stats) { + System.out.format(formatString, Arrays.stream(header).map(stats::get).toArray()); + } +} diff --git a/src/mhtree/benchmarking/Tree.java b/src/mhtree/benchmarking/Tree.java new file mode 100644 index 0000000..fb6d637 --- /dev/null +++ b/src/mhtree/benchmarking/Tree.java @@ -0,0 +1,15 @@ +package mhtree.benchmarking; + +import messif.operations.query.KNNQueryOperation; +import messif.utility.reflection.NoSuchInstantiatorException; + +import java.lang.reflect.InvocationTargetException; + +/** + * Abstraction over MH-Tree and M-Tree for the benchmark purposes. + */ +public interface Tree { + void kNN(KNNQueryOperation knnQueryOperation) throws ClassNotFoundException, NoSuchInstantiatorException, InvocationTargetException; + + int getObjectCount(); +} -- GitLab