diff --git a/src/mhtree/InsertType.java b/src/mhtree/InsertType.java index 33cd73f69b55ed182f30102cde802d4c3842d599..a57b43b37e10a5399e4b7a88b7f12511b7a3c0b5 100644 --- a/src/mhtree/InsertType.java +++ b/src/mhtree/InsertType.java @@ -17,4 +17,6 @@ public enum InsertType { * If no such hull object is found, the inserted object is simply added as a new hull object. */ INCREMENTAL, + + ADD_HULL_OBJECT, } diff --git a/src/mhtree/InternalNode.java b/src/mhtree/InternalNode.java index d8a07fa1ef20bca47f5631f6ee77dc344a952850..6a586cd4c1bd66d366215ac805e1ce1a41a81385 100644 --- a/src/mhtree/InternalNode.java +++ b/src/mhtree/InternalNode.java @@ -1,6 +1,7 @@ package mhtree; import cz.muni.fi.disa.similarityoperators.cover.AbstractRepresentation.PrecomputedDistances; +import cz.muni.fi.disa.similarityoperators.cover.HullOptimizedRepresentationV3; import messif.objects.LocalAbstractObject; import java.io.Serializable; @@ -10,6 +11,8 @@ import java.util.List; import java.util.Optional; import java.util.stream.Collectors; +import static mhtree.ObjectToNodeDistance.FURTHEST; + /** * Represents an internal node in MH-Tree. */ @@ -28,6 +31,12 @@ class InternalNode extends Node implements Serializable { this.children = children; } + protected InternalNode(HullOptimizedRepresentationV3 hull, InsertType insertType, ObjectToNodeDistance objectToNodeDistance, List<Node> children) { + super(hull, insertType, objectToNodeDistance); + + this.children = children; + } + /** * Returns the list of child nodes. * @@ -37,16 +46,29 @@ class InternalNode extends Node implements Serializable { return children; } + protected int getNumberOfChildren() { + return children.size(); + } + /** - * Returns the nearest child to the {@code object}. +// * Returns the nearest child to the {@code object}. * * @param object object to which the distance is measured * @return the nearest child to the {@code object} */ protected Node getNearestChild(LocalAbstractObject object) { + Optional<Node> nearestCoveredNode = children + .stream() + .filter(child -> child.isCovered(object)) + .min(Comparator.comparing(child -> -FURTHEST.getDistance(object, child))); + + if (nearestCoveredNode.isPresent()) { + return nearestCoveredNode.get(); + } + Optional<Node> nearestChild = children .stream() - .min(Comparator.comparing(child -> child.getDistance(object))); + .min(Comparator.comparing(child -> FURTHEST.getDistance(object, child))); return nearestChild.orElseThrow(() -> new IllegalStateException("Internal node has no children")); } diff --git a/src/mhtree/LeafNode.java b/src/mhtree/LeafNode.java index ea4ead9d37b12dadcfe770b5ff78f37cbee5ba5b..e9cb2a02bad314ca0a65abc1f0f9f0f6704163f3 100644 --- a/src/mhtree/LeafNode.java +++ b/src/mhtree/LeafNode.java @@ -41,6 +41,10 @@ public class LeafNode extends Node implements Serializable { return bucket.getObjectCount(); } + public LocalBucket getBucket() { + return bucket; + } + @Override protected void addObject(LocalAbstractObject object) throws BucketStorageException { bucket.addObject(object); diff --git a/src/mhtree/MHTree.java b/src/mhtree/MHTree.java index 277404d55ac40ba2d97e3db647f4712088aabd3a..666f87b128dd1d609ff9540655cee89b23f8f396 100644 --- a/src/mhtree/MHTree.java +++ b/src/mhtree/MHTree.java @@ -1,6 +1,6 @@ package mhtree; -import cz.muni.fi.disa.similarityoperators.cover.AbstractRepresentation; +import cz.muni.fi.disa.similarityoperators.cover.AbstractRepresentation.PrecomputedDistances; import messif.algorithms.Algorithm; import messif.buckets.BucketDispatcher; import messif.buckets.BucketStorageException; @@ -44,26 +44,22 @@ public class MHTree extends Algorithm implements Serializable { * The maximal degree of an internal node. */ private final int arity; - - /** - * The root node of MH-Tree. - */ - private final Node root; - /** * Specifies which method to use when adding a new object. */ private final InsertType insertType; - /** * Specifies how to measure distance between an object and a node. */ private final ObjectToNodeDistance objectToNodeDistance; - /** * A dispatcher for maintaining a set of local buckets. */ private final BucketDispatcher bucketDispatcher; + /** + * The root node of MH-Tree. + */ + private Node root; /** * Create a new MH-Tree. @@ -86,6 +82,17 @@ public class MHTree extends Algorithm implements Serializable { root = builder.root; } + public MHTree(String algorithmName, int bucketCapacity, int arity, InsertType insertType, ObjectToNodeDistance objectToNodeDistance) throws IllegalArgumentException { + super(algorithmName); + + this.bucketCapacity = bucketCapacity; + this.arity = arity; + this.insertType = insertType; + this.objectToNodeDistance = objectToNodeDistance; + + this.bucketDispatcher = new BucketDispatcher(Integer.MAX_VALUE, Long.MAX_VALUE, 2L * bucketCapacity - 1, 0, false, MemoryStorageBucket.class, null); + } + public void kNN(KNNQueryOperation operation) { kNNSearch(operation, null); } @@ -171,6 +178,16 @@ public class MHTree extends Algorithm implements Serializable { public void insert(InsertOperation operation) throws BucketStorageException { LocalAbstractObject object = operation.getInsertedObject(); + // Empty tree + if (root == null) { + List<LocalAbstractObject> objects = new ArrayList<>(); + objects.add(object); + + root = new LeafNode(new PrecomputedDistances(objects), bucketDispatcher.createBucket(), insertType, objectToNodeDistance); + operation.endOperation(); + return; + } + Node node = root; while (!node.isLeaf()) { @@ -179,11 +196,80 @@ public class MHTree extends Algorithm implements Serializable { node = ((InternalNode) node).getNearestChild(object); } - node.addObject(object); + LeafNode leaf = (LeafNode) node; + leaf.addObject(object); + + if (leaf.getBucket().isSoftCapacityExceeded()) { + // Split leaf node + + List<Node> split = split(leaf); + InternalNode parent = leaf.getParent(); + + if (parent == null) { + // Leaf node is root + + InternalNode internalNode = new InternalNode(leaf.getHull(), insertType, objectToNodeDistance, split); + split.forEach(s -> s.setParent(internalNode)); + root = internalNode; + } else { + // Leaf node has parent + + List<Node> parentChildren = parent.getChildren(); + + if (parent.getNumberOfChildren() == arity) { + // The parent node is full + + parentChildren.remove(leaf); + InternalNode internalNode = new InternalNode(leaf.getHull(), insertType, objectToNodeDistance, split); + split.forEach(s -> s.setParent(internalNode)); + parentChildren.add(internalNode); + } else { + // The parent can accommodate another leaf node + + parentChildren.remove(leaf); + split.forEach(s -> s.setParent(parent)); + parentChildren.addAll(split); + } + } + + bucketDispatcher.removeBucket(leaf.getBucket().getBucketID()); + } operation.endOperation(); } + private List<Node> split(LeafNode leaf) throws BucketStorageException { + PrecomputedDistances objectDistances = new PrecomputedDistances(leaf.getObjects()); + + BitSet notProcessedObjectIndices = new BitSet(objectDistances.getObjectCount()); + notProcessedObjectIndices.set(0, objectDistances.getObjectCount()); + + List<Integer> objectIndices = new ArrayList<>(bucketCapacity); + + // Select the furthest object (the outlier) + int furthestIndex = Utils.maxDistanceIndex(objectDistances.getDistances(), notProcessedObjectIndices); + notProcessedObjectIndices.clear(furthestIndex); + objectIndices.add(furthestIndex); + + // Select the rest of the objects up to the total of bucketCapacity with respect to the building of a hull + objectIndices.addAll(MHTreeBuilder.findClosestObjects(furthestIndex, bucketCapacity - 1, notProcessedObjectIndices, objectDistances)); + + List<LocalAbstractObject> halfOfObjects = objectIndices + .stream() + .map(objectDistances::getObject) + .collect(Collectors.toList()); + + List<LocalAbstractObject> secondHalf = notProcessedObjectIndices + .stream() + .mapToObj(objectDistances::getObject) + .collect(Collectors.toList()); + + return new ArrayList<>(Arrays.asList( + new LeafNode(objectDistances.getSubset(halfOfObjects), bucketDispatcher.createBucket(), insertType, objectToNodeDistance), + new LeafNode(objectDistances.getSubset(secondHalf), bucketDispatcher.createBucket(), insertType, objectToNodeDistance) + )); + } + /** * Returns a list of nodes. * @@ -302,7 +388,7 @@ public class MHTree extends Algorithm implements Serializable { /** * Contains the object distance matrix. */ - private AbstractRepresentation.PrecomputedDistances objectDistances; + private PrecomputedDistances objectDistances; /** * Stores intermediate nodes needed during the algorithm. @@ -354,10 +440,61 @@ public class MHTree extends Algorithm implements Serializable { this.insertType = GREEDY; this.objectToNodeDistance = NEAREST; - this.bucketDispatcher = new BucketDispatcher(Integer.MAX_VALUE, Long.MAX_VALUE, bucketCapacity, 0, false, MemoryStorageBucket.class, null); + this.bucketDispatcher = new BucketDispatcher(Integer.MAX_VALUE, Long.MAX_VALUE, 2L * bucketCapacity - 1, 0, false, MemoryStorageBucket.class, null); this.mergingMethod = HULL_BASED_MERGE; } + /** + * Returns the closest objects with respect to the building of a hull. + * The number of returned indices is specified by {@code numberOfObjects}. + * Note that the returned indices are already set as invalid in {@code validObjectIndices}. + * + * @param objectIndex the initial object index to which the closest objects are found + * @param numberOfObjects the number of return object indices + * @param validObjectIndices object indices which are taken into consideration + * @return a list of closest object indices with respect to the building of a hull + */ + private static List<Integer> findClosestObjects(int objectIndex, int numberOfObjects, BitSet validObjectIndices, PrecomputedDistances objectDistances) { + List<Integer> objectIndices = new ArrayList<>(1 + numberOfObjects); + objectIndices.add(objectIndex); + + while (objectIndices.size() - 1 != numberOfObjects) { + int index = findClosestObjectIndex(objectIndices, validObjectIndices, objectDistances); + + objectIndices.add(index); + validObjectIndices.clear(index); + } + + return objectIndices.subList(1, objectIndices.size()); + } + + /** + * Returns the closest object index to {@code indices} with respect to the building of a hull. + * + * @param indices list of object indices + * @param validObjectIndices object indices which are taken into consideration + * @return the closest object index to {@code indices} with respect to the building of a hull + */ + private static int findClosestObjectIndex(List<Integer> indices, BitSet validObjectIndices, PrecomputedDistances objectDistances) { + double minDistance = Double.MAX_VALUE; + int closestObjectIndex = -1; + + for (int index : indices) { + int candidateIndex = objectDistances.minDistInArray(objectDistances.getDistances(index), validObjectIndices); + double distanceSum = indices + .stream() + .mapToDouble(i -> objectDistances.getDistance(i, candidateIndex)) + .sum(); + + if (distanceSum < minDistance) { + minDistance = distanceSum; + closestObjectIndex = candidateIndex; + } + } + + return closestObjectIndex; + } + /** * Specifies which insert type should be used. * @@ -414,7 +551,7 @@ public class MHTree extends Algorithm implements Serializable { validNodeIndices = new BitSet(nodes.length); validNodeIndices.set(0, nodes.length); - objectDistances = new AbstractRepresentation.PrecomputedDistances(objects); + objectDistances = new PrecomputedDistances(objects); // Every object is stored in the root if (objectDistances.getObjectCount() <= bucketCapacity) { @@ -499,7 +636,7 @@ public class MHTree extends Algorithm implements Serializable { objectIndices.add(furthestIndex); // Select the rest of the objects up to the total of bucketCapacity with respect to the building of a hull - objectIndices.addAll(findClosestObjects(furthestIndex, bucketCapacity - 1, notProcessedObjectIndices)); + objectIndices.addAll(findClosestObjects(furthestIndex, bucketCapacity - 1, notProcessedObjectIndices, objectDistances)); List<LocalAbstractObject> objects = objectIndices .stream() @@ -532,57 +669,6 @@ public class MHTree extends Algorithm implements Serializable { return closestNodeIndex; } - /** - * Returns the closest objects with respect to the building of a hull. - * The number of returned indices is specified by {@code numberOfObjects}. - * Note that the returned indices are already set as invalid in {@code validObjectIndices}. - * - * @param objectIndex the initial object index to which the closest objects are found - * @param numberOfObjects the number of return object indices - * @param validObjectIndices object indices which are taken into consideration - * @return a list of closest object indices with respect to the building of a hull - */ - private List<Integer> findClosestObjects(int objectIndex, int numberOfObjects, BitSet validObjectIndices) { - List<Integer> objectIndices = new ArrayList<>(1 + numberOfObjects); - objectIndices.add(objectIndex); - - while (objectIndices.size() - 1 != numberOfObjects) { - int index = findClosestObjectIndex(objectIndices, validObjectIndices); - - objectIndices.add(index); - validObjectIndices.clear(index); - } - - return objectIndices.subList(1, objectIndices.size()); - } - - /** - * Returns the closest object index to {@code indices} with respect to the building of a hull. - * - * @param indices list of object indices - * @param validObjectIndices object indices which are taken into consideration - * @return the closest object index to {@code indices} with respect to the building of a hull - */ - private int findClosestObjectIndex(List<Integer> indices, BitSet validObjectIndices) { - double minDistance = Double.MAX_VALUE; - int closestObjectIndex = -1; - - for (int index : indices) { - int candidateIndex = objectDistances.minDistInArray(objectDistances.getDistances(index), validObjectIndices); - double distanceSum = indices - .stream() - .mapToDouble(i -> objectDistances.getDistance(i, candidateIndex)) - .sum(); - - if (distanceSum < minDistance) { - minDistance = distanceSum; - closestObjectIndex = candidateIndex; - } - } - - return closestObjectIndex; - } - /** * Merges nodes in {@link #nodes} specified by indices in {@link #validNodeIndices}. * The new node is placed on the first valid index in {@link #validNodeIndices}. @@ -617,6 +703,8 @@ public class MHTree extends Algorithm implements Serializable { InternalNode parent = Node.createParent(children, objectDistances, insertType, objectToNodeDistance, mergingMethod); + children.forEach(child -> child.setParent(parent)); + nodeIndices.forEach(index -> { validNodeIndices.clear(index); this.nodes[index] = null; diff --git a/src/mhtree/MergingMethod.java b/src/mhtree/MergingMethod.java index 3cde09a5719bb4620fbef99975b66129db0bb9d6..431c30edebd1b05e9b52be255d7a61b79c9dc053 100644 --- a/src/mhtree/MergingMethod.java +++ b/src/mhtree/MergingMethod.java @@ -15,7 +15,7 @@ public enum MergingMethod { } }, /** - * Returns the hull objects of {@code node}'s hull. + * Returns the hull objects in {@code node}'s hull. */ HULL_BASED_MERGE { @Override diff --git a/src/mhtree/Node.java b/src/mhtree/Node.java index 09d5b3dceb367b37f1dcca965900315787901a6f..4fb191f5e20d875cb242bcc5ea3986a59112c3b9 100644 --- a/src/mhtree/Node.java +++ b/src/mhtree/Node.java @@ -18,8 +18,8 @@ public abstract class Node implements Serializable { private static final long serialVersionUID = 420L; private final InsertType insertType; - private final ObjectToNodeDistance objectToNodeDistance; - + public ObjectToNodeDistance objectToNodeDistance; + private InternalNode parent; private HullOptimizedRepresentationV3 hull; protected Node(PrecomputedDistances distances, InsertType insertType, ObjectToNodeDistance objectToNodeDistance) { @@ -30,10 +30,16 @@ public abstract class Node implements Serializable { this.objectToNodeDistance = objectToNodeDistance; } - protected static InternalNode createParent(List<Node> nodes, PrecomputedDistances distances, InsertType insertType, ObjectToNodeDistance objectToNodeDistance, MergeType mergeType) { + protected Node(HullOptimizedRepresentationV3 hull, InsertType insertType, ObjectToNodeDistance objectToNodeDistance) { + this.hull = hull; + this.insertType = insertType; + this.objectToNodeDistance = objectToNodeDistance; + } + + protected static InternalNode createParent(List<Node> nodes, PrecomputedDistances distances, InsertType insertType, ObjectToNodeDistance objectToNodeDistance, MergingMethod mergingMethod) { List<LocalAbstractObject> objects = nodes .stream() - .map(mergeType::getObjects) + .map(mergingMethod::getObjects) .flatMap(Collection::stream) .collect(Collectors.toList()); @@ -63,6 +69,18 @@ public abstract class Node implements Serializable { return hull.isExternalCovered(object); } + public HullOptimizedRepresentationV3 getHull() { + return hull; + } + + public InternalNode getParent() { + return parent; + } + + public void setParent(InternalNode parent) { + this.parent = parent; + } + @Override public String toString() { return "Node{hull=" + hull + '}'; @@ -80,6 +98,10 @@ public abstract class Node implements Serializable { return (this instanceof LeafNode); } + protected boolean isInternal() { + return (this instanceof InternalNode); + } + protected int getHullObjectCount() { return hull.getRepresentativesCount(); } @@ -94,6 +116,9 @@ public abstract class Node implements Serializable { case INCREMENTAL: insertIncremental(object); break; + case ADD_HULL_OBJECT: + insertHullRebuild(object); + break; default: throw new IllegalStateException("Unexpected value: " + insertType); } @@ -146,4 +171,12 @@ public abstract class Node implements Serializable { private void insertIncremental(LocalAbstractObject object) { hull.addHullObject(object); } + + private void insertHullRebuild(LocalAbstractObject object) { + List<LocalAbstractObject> hull = this.hull.getHull(); + hull.add(object); + + this.hull = new HullOptimizedRepresentationV3(hull); + this.hull.build(); + } }