diff --git a/src/mhtree/MHTree.java b/src/mhtree/MHTree.java index 24077972b3c21498d96a3e1fd7d5265ad42a9832..3a754dc8c82223e76ba228cc610f0d91315db690 100644 --- a/src/mhtree/MHTree.java +++ b/src/mhtree/MHTree.java @@ -4,7 +4,6 @@ import cz.muni.fi.disa.similarityoperators.cover.AbstractRepresentation; import messif.algorithms.Algorithm; import messif.buckets.BucketDispatcher; import messif.buckets.BucketStorageException; -import messif.buckets.LocalBucket; import messif.buckets.impl.MemoryStorageBucket; import messif.objects.LocalAbstractObject; import messif.operations.data.InsertOperation; @@ -18,12 +17,13 @@ import java.util.Arrays; import java.util.BitSet; import java.util.IntSummaryStatistics; import java.util.List; -import java.util.Map; import java.util.PriorityQueue; -import java.util.function.BiFunction; import java.util.stream.Collectors; import static cz.muni.fi.disa.similarityoperators.cover.AbstractRepresentation.Ranked.getRankedIndices; +import static mhtree.InsertType.GREEDY; +import static mhtree.MergingMethod.HULL_BASED_MERGE; +import static mhtree.ObjectToNodeDistance.NEAREST; public class MHTree extends Algorithm implements Serializable { @@ -49,7 +49,7 @@ public class MHTree extends Algorithm implements Serializable { @AlgorithmConstructor(description = "MH-Tree", arguments = { "MH-Tree builder object", }) - private MHTree(Builder builder) { + private MHTree(MHTreeBuilder builder) { super("MH-Tree"); bucketCapacity = builder.bucketCapacity; @@ -225,20 +225,23 @@ public class MHTree extends Algorithm implements Serializable { '}'; } - public static class Builder { + /** + * Represents the MH-Tree bulk-loading algorithm. + */ + public static class MHTreeBuilder { /** - * List of object used during the building of MH-Tree. + * List of object used by the MH-Tree bulk-loading algorithm. */ private final List<LocalAbstractObject> objects; /** - * Minimal number of objects in leaf node's bucket. + * Minimal number of objects in bucket of a leaf node. */ private final int bucketCapacity; /** - * Maximal degree of internal node. + * Maximal degree of an internal node. */ private final int arity; @@ -260,7 +263,7 @@ public class MHTree extends Algorithm implements Serializable { /** * Specifies the merging strategy. */ - private MergeType mergeType; + private MergingMethod mergingMethod; /** * Precomputed object distances. @@ -273,7 +276,7 @@ public class MHTree extends Algorithm implements Serializable { private Node[] nodes; /** - * Identifies which indices in {@code nodes} are valid. + * Identifies which indices in {@link #nodes} are valid. */ private BitSet validNodeIndices; @@ -287,7 +290,14 @@ public class MHTree extends Algorithm implements Serializable { */ private Node root; - public Builder(List<LocalAbstractObject> objects, int bucketCapacity, int arity) { + /** + * Creates a new MH-Tree builder corresponding to the bulk-loading algorithm. + * + * @param objects list of objects + * @param bucketCapacity leaf node bucket capacity + * @param arity arity + */ + public MHTreeBuilder(List<LocalAbstractObject> objects, int bucketCapacity, int arity) { if (objects == null) { throw new NullPointerException("List of objects is null"); } @@ -308,37 +318,62 @@ public class MHTree extends Algorithm implements Serializable { this.bucketCapacity = bucketCapacity; this.arity = arity; - this.insertType = InsertType.GREEDY; - this.objectToNodeDistance = ObjectToNodeDistance.NEAREST; + this.insertType = GREEDY; + this.objectToNodeDistance = NEAREST; this.bucketDispatcher = new BucketDispatcher(Integer.MAX_VALUE, Long.MAX_VALUE, bucketCapacity, 0, false, MemoryStorageBucket.class, null); - this.mergeType = MergeType.HULL_BASED; + this.mergingMethod = HULL_BASED_MERGE; } - public Builder insertType(InsertType insertType) { + /** + * Specifies which insert type should be used. + * + * @param insertType the insert type + * @return the MH-Tree builder object + */ + public MHTreeBuilder insertType(InsertType insertType) { this.insertType = insertType; return this; } - public Builder objectToNodeDistance(ObjectToNodeDistance objectToNodeDistance) { + /** + * Specifies which object to node distance should be used. + * + * @param objectToNodeDistance the object to node distance measure + * @return the MH-Tree builder object + */ + public MHTreeBuilder objectToNodeDistance(ObjectToNodeDistance objectToNodeDistance) { this.objectToNodeDistance = objectToNodeDistance; return this; } - public Builder bucketDispatcher(BucketDispatcher bucketDispatcher) { + /** + * Specifies which bucket dispatcher should be used. + * + * @param bucketDispatcher the bucket dispatcher + * @return the MH-Tree builder object + */ + public MHTreeBuilder bucketDispatcher(BucketDispatcher bucketDispatcher) { this.bucketDispatcher = bucketDispatcher; return this; } - public Builder bucketDispatcher(Class<? extends LocalBucket> defaultBucketClass, Map<String, Object> bucketClassParams) { - this.bucketDispatcher = new BucketDispatcher(Integer.MAX_VALUE, Long.MAX_VALUE, bucketCapacity, 0, false, defaultBucketClass, bucketClassParams); - return this; - } - - public Builder mergeType(MergeType mergeType) { - this.mergeType = mergeType; + /** + * Specifies which merging method should be used. + * + * @param mergingMethod the merging method + * @return the MH-Tree builder object + */ + public MHTreeBuilder mergingMethod(MergingMethod mergingMethod) { + this.mergingMethod = mergingMethod; return this; } + /** + * Builds an MH-Tree based on specified parameters. + * + * @return an MH-Tree based on specified parameters + * @throws BucketStorageException when creating a new bucket or adding an object to the nearest node + */ public MHTree build() throws BucketStorageException { nodes = new Node[objects.size() / bucketCapacity]; @@ -365,6 +400,12 @@ public class MHTree extends Algorithm implements Serializable { return new MHTree(this); } + /** + * Creates a root of an MH-Tree. + * + * @param arity arity + * @return a root of an MH-Tree. + */ private Node createRoot(int arity) { while (validNodeIndices.cardinality() != 1) { BitSet notProcessedNodeIndices = (BitSet) validNodeIndices.clone(); @@ -375,9 +416,11 @@ public class MHTree extends Algorithm implements Serializable { break; } + // Select the furthest node int furthestNodeIndex = nodeDistances.getFurthestIndex(notProcessedNodeIndices); notProcessedNodeIndices.clear(furthestNodeIndex); + // Select the rest of the nodes up to the total of arity List<Integer> closestNodeIndices = Arrays .stream(getRankedIndices(notProcessedNodeIndices, nodeDistances.getDistances(furthestNodeIndex))) .limit(arity - 1) @@ -393,12 +436,19 @@ public class MHTree extends Algorithm implements Serializable { return nodes[validNodeIndices.nextSetBit(0)]; } + /** + * Populates the {@link #nodes} by leaf nodes created on {@link #objects}. + * + * @param bucketCapacity bucket capacity of a leaf node + * @throws BucketStorageException when creating a new bucket or adding an object to the nearest node + */ private void createLeafNodes(int bucketCapacity) throws BucketStorageException { BitSet notProcessedObjectIndices = new BitSet(objectDistances.getObjectCount()); notProcessedObjectIndices.set(0, objectDistances.getObjectCount()); for (int nodeIndex = 0; !notProcessedObjectIndices.isEmpty(); nodeIndex++) { if (notProcessedObjectIndices.cardinality() < bucketCapacity) { + // Add the rest of the objects to their nearest nodes for (int i = notProcessedObjectIndices.nextSetBit(0); i >= 0; i = notProcessedObjectIndices.nextSetBit(i + 1)) { LocalAbstractObject object = objectDistances.getObject(i); nodes[getClosestNodeIndex(object)].addObject(object); @@ -415,7 +465,7 @@ public class MHTree extends Algorithm implements Serializable { objectIndices.add(furthestIndex); // Select the rest of the objects up to the total of bucketCapacity with respect to the building of a hull - objectIndices.addAll(findClosestItems(this::findClosestObjectIndex, furthestIndex, bucketCapacity - 1, notProcessedObjectIndices)); + objectIndices.addAll(findClosestObjects(furthestIndex, bucketCapacity - 1, notProcessedObjectIndices)); List<LocalAbstractObject> objects = objectIndices .stream() @@ -426,12 +476,18 @@ public class MHTree extends Algorithm implements Serializable { } } + /** + * Returns an index of the nearest node in {@link #nodes}. + * + * @param object an object + * @return an index of the nearest node in {@link #nodes} + */ private int getClosestNodeIndex(LocalAbstractObject object) { double minDistance = Double.MAX_VALUE; int closestNodeIndex = -1; for (int candidateIndex = 0; candidateIndex < nodes.length; candidateIndex++) { - double distance = ObjectToNodeDistance.NEAREST.getDistance(object, nodes[candidateIndex], objectDistances); + double distance = NEAREST.getDistance(object, nodes[candidateIndex], objectDistances); if (distance < minDistance) { minDistance = distance; @@ -442,37 +498,50 @@ public class MHTree extends Algorithm implements Serializable { return closestNodeIndex; } - private List<Integer> findClosestItems(BiFunction<List<Integer>, BitSet, Integer> findClosestItemIndex, int itemIndex, int numberOfItems, BitSet notProcessedItemIndices) { - List<Integer> itemIndices = new ArrayList<>(1 + numberOfItems); - itemIndices.add(itemIndex); - - List<Integer> resultItemsIndices = new ArrayList<>(numberOfItems); - - while (resultItemsIndices.size() != numberOfItems) { - int index = findClosestItemIndex.apply(itemIndices, notProcessedItemIndices); + /** + * Returns the closest objects with respect to the building of a hull. + * The number of returned indices is specified by {@code numberOfObjects}. + * Note that the returned indices are already set as invalid in {@code validObjectIndices}. + * + * @param objectIndex the initial object index to which the closest objects are found + * @param numberOfObjects the number of return object indices + * @param validObjectIndices object indices which are taken into consideration + * @return a list of closest object indices with respect to the building of a hull + */ + private List<Integer> findClosestObjects(int objectIndex, int numberOfObjects, BitSet validObjectIndices) { + List<Integer> objectIndices = new ArrayList<>(1 + numberOfObjects); + objectIndices.add(objectIndex); - itemIndices.add(index); - resultItemsIndices.add(index); + while (objectIndices.size() - 1 != numberOfObjects) { + int index = findClosestObjectIndex(objectIndices, validObjectIndices); - notProcessedItemIndices.clear(index); + objectIndices.add(index); + validObjectIndices.clear(index); } - return resultItemsIndices; + return objectIndices.subList(1, objectIndices.size()); } + /** + * Returns the closest object index to {@code indices} with respect to the building of a hull. + * + * @param indices list of object indices + * @param validObjectIndices object indices which are taken into consideration + * @return the closest object index to {@code indices} with respect to the building of a hull + */ private int findClosestObjectIndex(List<Integer> indices, BitSet validObjectIndices) { double minDistance = Double.MAX_VALUE; int closestObjectIndex = -1; for (int index : indices) { int candidateIndex = objectDistances.minDistInArray(objectDistances.getDistances(index), validObjectIndices); - double distance = indices + double distanceSum = indices .stream() .mapToDouble(i -> objectDistances.getDistance(i, candidateIndex)) .sum(); - if (distance < minDistance) { - minDistance = distance; + if (distanceSum < minDistance) { + minDistance = distanceSum; closestObjectIndex = candidateIndex; } } @@ -481,10 +550,10 @@ public class MHTree extends Algorithm implements Serializable { } /** - * Merges nodes specified by indices in set state in {@code nodeIndices}. - * The new node is placed on the first set index in {@code nodeIndices}. + * Merges nodes in {@link #nodes} specified by indices in {@link #validNodeIndices}. + * The new node is placed on the first valid index in {@link #validNodeIndices}. * - * @param nodeIndices the bitset of nodes to be merged + * @param nodeIndices a list of node indices to be merged */ private void mergeNodes(BitSet nodeIndices) { List<Integer> indices = nodeIndices @@ -496,10 +565,11 @@ public class MHTree extends Algorithm implements Serializable { } /** - * Merges specified nodes into one and places the new node on the {@code parentNodeIndex} in {@code nodes}. + * Merges nodes in {@link #nodes} specified by indices in {@link #validNodeIndices} and {@code parentNodeIndex}. + * The new node is placed on the {@code parentNodeIndex} in {@link #nodes}. * - * @param parentNodeIndex an index where the new node is placed - * @param nodeIndices specifies a list of indices which are merge with {@code parentNodeIndex} into a new node + * @param parentNodeIndex the index where the new node is placed + * @param nodeIndices a list of indices which are merged together with {@code parentNodeIndex} into a new node */ private void mergeNodes(int parentNodeIndex, List<Integer> nodeIndices) { if (nodeIndices.size() == 0) return; @@ -511,7 +581,7 @@ public class MHTree extends Algorithm implements Serializable { .map(i -> this.nodes[i]) .collect(Collectors.toList()); - InternalNode parent = Node.createParent(children, objectDistances, insertType, objectToNodeDistance, mergeType); + InternalNode parent = Node.createParent(children, objectDistances, insertType, objectToNodeDistance, mergingMethod); nodeIndices.forEach(index -> { validNodeIndices.clear(index); @@ -525,22 +595,39 @@ public class MHTree extends Algorithm implements Serializable { } /** - * {@code PrecomputedNodeDistances} contains methods for computing, updating, - * and retrieving distance between nodes stored in {@code nodes}. + * {@link PrecomputedNodeDistances} contains methods for computing, updating, + * and retrieving distance between nodes stored in {@link #nodes}. */ private class PrecomputedNodeDistances { + /** + * Node distance matrix for nodes in {@link #nodes}. + */ private final float[][] distances; + /** + * Creates a new node distance matrix and precomputes distances for nodes in {@link #nodes}. + */ PrecomputedNodeDistances() { distances = new float[nodes.length][nodes.length]; computeNodeDistances(); } + /** + * Returns the distances to node on index {@code i}. + * + * @param i an index of node in {@link #nodes} + * @return the distances to node on index {@code i} + */ private float[] getDistances(int i) { return distances[i]; } + /** + * Updates distances for node on index {@code nodeIndex}. + * + * @param nodeIndex the index of the node + */ private void updateNodeDistances(int nodeIndex) { validNodeIndices .stream() @@ -552,12 +639,19 @@ public class MHTree extends Algorithm implements Serializable { }); } + /** + * Returns the maximal distance from the node distance matrix + * using only the valid indices specified by {@code validIndices}. + * + * @param validIndices the valid node indices + * @return the maximal distance from the node distance matrix + */ private int getFurthestIndex(BitSet validIndices) { return Utils.maxDistanceIndex(distances, validIndices); } /** - * Computes distances between nodes in {@code nodes}, stores the result in {@code distances}. + * Computes distances between nodes in {@link #nodes}, stores the result in {@link #distances}. */ private void computeNodeDistances() { for (int i = 0; i < nodes.length; i++) { @@ -571,11 +665,11 @@ public class MHTree extends Algorithm implements Serializable { } /** - * Computes and returns the distance between nodes on indices i and j in {@code nodes}. + * Computes and returns the distance between nodes on indices i and j in {@link #nodes}. * - * @param i an index of node in {@code nodes} - * @param j an index of node in {@code nodes} - * @return the distance between nodes on indices i and j in {@code nodes}. + * @param i an index of node in {@link #nodes} + * @param j an index of node in {@link #nodes} + * @return the distance between nodes on indices i and j in {@link #nodes}. */ private float computeDistanceBetweenNodes(int i, int j) { float distance = Float.MAX_VALUE; diff --git a/src/mhtree/MergeType.java b/src/mhtree/MergingMethod.java similarity index 91% rename from src/mhtree/MergeType.java rename to src/mhtree/MergingMethod.java index 1feb5febf7b293e20e029ba1e42e4db397fb6d38..3cde09a5719bb4620fbef99975b66129db0bb9d6 100644 --- a/src/mhtree/MergeType.java +++ b/src/mhtree/MergingMethod.java @@ -4,11 +4,11 @@ import messif.objects.LocalAbstractObject; import java.util.List; -public enum MergeType { +public enum MergingMethod { /** * Every object in descendants leaf node's bucket is retrieved and returned. */ - OBJECT_BASED { + OBJECT_BASED_MERGE { @Override public List<LocalAbstractObject> getObjects(Node node) { return node.getObjects(); @@ -17,7 +17,7 @@ public enum MergeType { /** * Returns the hull objects of {@code node}'s hull. */ - HULL_BASED { + HULL_BASED_MERGE { @Override public List<LocalAbstractObject> getObjects(Node node) { return node.getHullObjects();