diff --git a/src/mhtree/BuildTree.java b/src/mhtree/BuildTree.java index ddab38a3e3473fbffe7bd502a0792fc13e631f88..2a095fec33810568bedbc5620f030af17349cf9e 100644 --- a/src/mhtree/BuildTree.java +++ b/src/mhtree/BuildTree.java @@ -15,7 +15,7 @@ import java.util.stream.Collectors; class BuildTree { private final int leafCapacity; - private final int numberOfChildren; + private final int arity; private final Node[] nodes; private final BitSet validNodeIndices; @@ -27,8 +27,8 @@ class BuildTree { private Node root; - BuildTree(List<LocalAbstractObject> objects, int leafCapacity, int numberOfChildren, InsertType insertType, BucketDispatcher bucketDispatcher) throws BucketStorageException { - this.numberOfChildren = numberOfChildren; + BuildTree(List<LocalAbstractObject> objects, int leafCapacity, int arity, InsertType insertType, BucketDispatcher bucketDispatcher) throws BucketStorageException { + this.arity = arity; this.leafCapacity = leafCapacity; nodes = new Node[objects.size() / leafCapacity]; @@ -57,7 +57,7 @@ class BuildTree { BitSet notProcessedIndices = (BitSet) validNodeIndices.clone(); while (!notProcessedIndices.isEmpty()) { - if (notProcessedIndices.cardinality() < numberOfChildren) { + if (notProcessedIndices.cardinality() < arity) { List<Integer> restOfTheIndices = new ArrayList<>(); notProcessedIndices.stream().forEach(restOfTheIndices::add); @@ -73,7 +73,7 @@ class BuildTree { List<Integer> nnIndices = new ArrayList<>(); - for (int i = 0; i < numberOfChildren - 1; i++) { + for (int i = 0; i < arity - 1; i++) { int index = objectDistances.minDistInArrayExceptIdx(nodeDistances[furthestNodeIndex], notProcessedIndices, furthestNodeIndex); notProcessedIndices.clear(index); nnIndices.add(index); diff --git a/src/mhtree/Histogram.java b/src/mhtree/Histogram.java new file mode 100644 index 0000000000000000000000000000000000000000..470c6f681109ba1e129afb7ca38724f03c5da136 --- /dev/null +++ b/src/mhtree/Histogram.java @@ -0,0 +1,72 @@ +package mhtree; + +import messif.objects.LocalAbstractObject; + +import java.util.Collections; +import java.util.HashMap; +import java.util.List; + +public class Histogram { + private final HashMap<Integer, HashMap<Long, Integer>> levelToHullsCoveredToObjectCount = new HashMap<>(); + + Histogram() { + } + + public static Histogram generate(Node x) { + Histogram histogram = new Histogram(); + + for (int level = 1; level <= x.getHeight() + 1; level++) { + List<Node> levelNodes = x.getNodesOnLevel(level); + + for (LocalAbstractObject object : x.getObjects()) { + long coveredObjectCount = levelNodes.stream() + .filter(node -> node.isCovered(object)) + .count(); + + histogram.addCoveredCount(level, coveredObjectCount); + } + } + + return histogram; + } + + public void addCoveredCount(int level, long count) { + levelToHullsCoveredToObjectCount.merge( + level, + new HashMap<>(Collections.singletonMap(count, 1)), + (hullsCoveredToObjectCount, x) -> { + hullsCoveredToObjectCount.merge(count, 1, (objectCount, y) -> objectCount + 1); + return hullsCoveredToObjectCount; + }); + } + + @Override + public String toString() { + StringBuilder stringBuilder = new StringBuilder(); + + levelToHullsCoveredToObjectCount.forEach((level, hullsCoveredToObjectCount) -> { + stringBuilder.append("Level "); + stringBuilder.append(level); + stringBuilder.append('\n'); + + hullsCoveredToObjectCount.forEach((hullsCovered, objectCount) -> { + stringBuilder.append(objectCount); + stringBuilder.append(" object"); + if (objectCount != 1) stringBuilder.append('s'); + + if (hullsCovered == 0) { + stringBuilder.append(" not covered"); + } else { + stringBuilder.append(" is covered by "); + stringBuilder.append(hullsCovered); + stringBuilder.append(" hull"); + if (hullsCovered != 1) stringBuilder.append('s'); + } + + stringBuilder.append('\n'); + }); + }); + + return stringBuilder.toString(); + } +} diff --git a/src/mhtree/InternalNode.java b/src/mhtree/InternalNode.java index 65195d6f775d2e3fdb68704ac41b8220c02fa256..8a6e656803543c4d1fb0234cbe22a57f8b1c8da4 100644 --- a/src/mhtree/InternalNode.java +++ b/src/mhtree/InternalNode.java @@ -6,6 +6,7 @@ import messif.objects.LocalAbstractObject; import java.io.Serializable; import java.util.ArrayList; import java.util.Collection; +import java.util.Collections; import java.util.List; import java.util.stream.Collectors; @@ -51,4 +52,26 @@ public class InternalNode extends Node implements Serializable { public boolean contains(LocalAbstractObject object) { return children.stream().anyMatch(child -> child.contains(object)); } + + public List<Integer> getLeafNodesObjectCounts() { + return children.stream() + .map(Node::getLeafNodesObjectCounts) + .flatMap(Collection::stream) + .collect(Collectors.toList()); + } + + public int getHeight() { + return children.stream() + .mapToInt(Node::getHeight) + .summaryStatistics() + .getMax() + 1; + } + + public List<Node> getNodesOnLevel(int level) { + if (getLevel() == level) return Collections.singletonList(this); + + return children.stream() + .flatMap(child -> child.getNodesOnLevel(level).stream()) + .collect(Collectors.toList()); + } } diff --git a/src/mhtree/LeafNode.java b/src/mhtree/LeafNode.java index 037820062c0544830b6e3633bd6746c3dafcbe3a..49ea70982f4c370f473fa25e5c1751905610ebf2 100644 --- a/src/mhtree/LeafNode.java +++ b/src/mhtree/LeafNode.java @@ -8,6 +8,7 @@ import messif.objects.util.AbstractObjectIterator; import java.io.Serializable; import java.util.ArrayList; +import java.util.Collections; import java.util.List; public class LeafNode extends Node implements Serializable { @@ -47,4 +48,16 @@ public class LeafNode extends Node implements Serializable { return false; } + + public List<Integer> getLeafNodesObjectCounts() { + return Collections.singletonList(bucket.getObjectCount()); + } + + public int getHeight() { + return 0; + } + + public List<Node> getNodesOnLevel(int level) { + return Collections.singletonList(this); + } } diff --git a/src/mhtree/MHTree.java b/src/mhtree/MHTree.java index ba9beffd58f0910dff3fdd08e50a90804b992830..f7290cded79cef27a62cccb71fe45339ade6d630 100644 --- a/src/mhtree/MHTree.java +++ b/src/mhtree/MHTree.java @@ -11,6 +11,7 @@ import messif.operations.data.InsertOperation; import messif.operations.query.ApproxKNNQueryOperation; import java.io.Serializable; +import java.util.IntSummaryStatistics; import java.util.List; import java.util.Map; import java.util.PriorityQueue; @@ -23,7 +24,7 @@ public class MHTree extends Algorithm implements Serializable { private static final long serialVersionUID = 42L; private final int leafCapacity; - private final int numberOfChildren; + private final int arity; private final Node root; private final BucketDispatcher bucketDispatcher; @@ -32,23 +33,23 @@ public class MHTree extends Algorithm implements Serializable { @AlgorithmConstructor(description = "MH-Tree", arguments = { "list of objects", "number of objects in leaf node", - "number of children in internal node", + "arity", "number of threads used in precomputing distances", "insert type", "storage class for buckets", "storage class parameters" }) - public MHTree(List<LocalAbstractObject> objects, int leafCapacity, int numberOfChildren, int numberOfThreads, InsertType insertType, Class<? extends LocalBucket> defaultBucketClass, Map<String, Object> bucketClassParams) throws BucketStorageException { + public MHTree(List<LocalAbstractObject> objects, int leafCapacity, int arity, int numberOfThreads, InsertType insertType, Class<? extends LocalBucket> defaultBucketClass, Map<String, Object> bucketClassParams) throws BucketStorageException { super("MH-Tree"); this.leafCapacity = leafCapacity; - this.numberOfChildren = numberOfChildren; + this.arity = arity; this.insertType = insertType; bucketDispatcher = new BucketDispatcher(Integer.MAX_VALUE, Long.MAX_VALUE, leafCapacity, 0, false, defaultBucketClass, bucketClassParams); PrecomputedDistances.COMPUTATION_THREADS = numberOfThreads; - root = new BuildTree(objects, leafCapacity, numberOfChildren, insertType, bucketDispatcher).getRoot(); + root = new BuildTree(objects, leafCapacity, arity, insertType, bucketDispatcher).getRoot(); } public void approxKNN(ApproxKNNQueryOperation operation) { @@ -93,11 +94,36 @@ public class MHTree extends Algorithm implements Serializable { return true; } + public void printStatistics() { + List<Integer> objectCounts = root.getLeafNodesObjectCounts(); + + IntSummaryStatistics statistics = objectCounts.stream().mapToInt(Integer::valueOf).summaryStatistics(); + + System.out.println("--- STATISTICS ---"); + + System.out.println("- MH-Tree -"); + + System.out.println("Insert type: " + insertType); + System.out.println("Height: " + root.getHeight()); + System.out.println("Arity: " + arity); + System.out.println("Leaf capacity: " + leafCapacity + " objects"); + + System.out.println("\nHistogram of covered objects per level: "); + System.out.println(Histogram.generate(root)); + + System.out.println("- LeafNodes -"); + + System.out.println("Count: " + statistics.getCount()); + System.out.println("Minimum number of objects: " + statistics.getMin()); + System.out.println("Average number of objects: " + String.format("%.2f", statistics.getAverage())); + System.out.println("Maximum number of objects: " + statistics.getMax()); + } + @Override public String toString() { return "MHTree{" + "leafCapacity=" + leafCapacity + - ", numberOfChildren=" + numberOfChildren + + ", arity=" + arity + ", root=" + root + '}'; } diff --git a/src/mhtree/Node.java b/src/mhtree/Node.java index c8e2b558a14a876c83b73926381cd3396d6e257a..509ef301ae5315420e1b8269432c9c92131f2b8e 100644 --- a/src/mhtree/Node.java +++ b/src/mhtree/Node.java @@ -18,7 +18,7 @@ public abstract class Node implements Serializable { private static final long serialVersionUID = 420L; protected final InsertType insertType; protected HullOptimizedRepresentationV3 hull; - private Node parent; + protected Node parent; Node(PrecomputedDistances distances, InsertType insertType) { this.hull = new HullOptimizedRepresentationV3(distances); @@ -56,6 +56,10 @@ public abstract class Node implements Serializable { return hull.getHull(); } + public int getLevel() { + return parent == null ? 1 : parent.getLevel() + 1; + } + @Override public String toString() { return "Node{hull=" + hull + '}'; @@ -67,6 +71,12 @@ public abstract class Node implements Serializable { public abstract boolean contains(LocalAbstractObject object); + public abstract List<Integer> getLeafNodesObjectCounts(); + + public abstract int getHeight(); + + public abstract List<Node> getNodesOnLevel(int level); + protected void rebuildHull(LocalAbstractObject object) { List<LocalAbstractObject> objects = new ArrayList<>(getObjects()); objects.add(object); diff --git a/src/mhtree/benchmarking/RunBenchmark.java b/src/mhtree/benchmarking/RunBenchmark.java index 9a0994a777d6fea9258e1f280a03875fadbf5802..ca1dfab422ef3a3a905c8dd0ed4fd58d3cd61a03 100644 --- a/src/mhtree/benchmarking/RunBenchmark.java +++ b/src/mhtree/benchmarking/RunBenchmark.java @@ -74,6 +74,8 @@ public class RunBenchmark { System.out.println("~max: " + tookMax + " ms"); System.out.println("~min: " + tookMin + " ms"); System.out.println("~Took: " + (timeStop - timeStart) + " ms"); + + tree.printStatistics(); } catch (BucketStorageException ex) { Logger.getLogger("BuildTree").log(Level.SEVERE, null, ex); } @@ -113,6 +115,7 @@ public class RunBenchmark { System.out.println("~" + config + " k: " + k); + tree.printStatistics(); } catch (BucketStorageException ex) { Logger.getLogger("BuildTree").log(Level.SEVERE, null, ex); } @@ -121,7 +124,7 @@ public class RunBenchmark { public static void benchBuildTree(MHTreeConfig config) { try { long timeStart = System.currentTimeMillis(); - new MHTree( + MHTree tree = new MHTree( config.getObjects(), config.getLeafCapacity(), config.getNumberOfChildren(), @@ -132,6 +135,8 @@ public class RunBenchmark { long timeStop = System.currentTimeMillis(); System.out.println("~Building took: " + (timeStop - timeStart) + " ms"); + + tree.printStatistics(); } catch (BucketStorageException ex) { Logger.getLogger("BuildTree").log(Level.SEVERE, null, ex); }