From 5557fc206c44ed6e7998b38330a8397a57d932bd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?David=20Proch=C3=A1zka?= <david@prochazka.dev>
Date: Tue, 11 May 2021 15:18:50 +0200
Subject: [PATCH] ADD: insert method with leaf node splitting

---
 src/mhtree/InsertType.java    |   2 +
 src/mhtree/InternalNode.java  |  26 +++-
 src/mhtree/LeafNode.java      |   4 +
 src/mhtree/MHTree.java        | 218 ++++++++++++++++++++++++----------
 src/mhtree/MergingMethod.java |   2 +-
 src/mhtree/Node.java          |  41 ++++++-
 6 files changed, 221 insertions(+), 72 deletions(-)

diff --git a/src/mhtree/InsertType.java b/src/mhtree/InsertType.java
index 33cd73f..a57b43b 100644
--- a/src/mhtree/InsertType.java
+++ b/src/mhtree/InsertType.java
@@ -17,4 +17,6 @@ public enum InsertType {
      * If no such hull object is found, the inserted object is simply added as a new hull object.
      */
     INCREMENTAL,
+
+    ADD_HULL_OBJECT,
 }
diff --git a/src/mhtree/InternalNode.java b/src/mhtree/InternalNode.java
index d8a07fa..6a586cd 100644
--- a/src/mhtree/InternalNode.java
+++ b/src/mhtree/InternalNode.java
@@ -1,6 +1,7 @@
 package mhtree;
 
 import cz.muni.fi.disa.similarityoperators.cover.AbstractRepresentation.PrecomputedDistances;
+import cz.muni.fi.disa.similarityoperators.cover.HullOptimizedRepresentationV3;
 import messif.objects.LocalAbstractObject;
 
 import java.io.Serializable;
@@ -10,6 +11,8 @@ import java.util.List;
 import java.util.Optional;
 import java.util.stream.Collectors;
 
+import static mhtree.ObjectToNodeDistance.FURTHEST;
+
 /**
  * Represents an internal node in MH-Tree.
  */
@@ -28,6 +31,12 @@ class InternalNode extends Node implements Serializable {
         this.children = children;
     }
 
+    protected InternalNode(HullOptimizedRepresentationV3 hull, InsertType insertType, ObjectToNodeDistance objectToNodeDistance, List<Node> children) {
+        super(hull, insertType, objectToNodeDistance);
+
+        this.children = children;
+    }
+
     /**
      * Returns the list of child nodes.
      *
@@ -37,16 +46,29 @@ class InternalNode extends Node implements Serializable {
         return children;
     }
 
+    protected int getNumberOfChildren() {
+        return children.size();
+    }
+
     /**
-     * Returns the nearest child to the {@code object}.
+//     * Returns the nearest child to the {@code object}.
      *
      * @param object object to which the distance is measured
      * @return the nearest child to the {@code object}
      */
     protected Node getNearestChild(LocalAbstractObject object) {
+        Optional<Node> nearestCoveredNode = children
+                .stream()
+                .filter(child -> child.isCovered(object))
+                .min(Comparator.comparing(child -> -FURTHEST.getDistance(object, child)));
+
+        if (nearestCoveredNode.isPresent()) {
+            return nearestCoveredNode.get();
+        }
+
         Optional<Node> nearestChild = children
                 .stream()
-                .min(Comparator.comparing(child -> child.getDistance(object)));
+                .min(Comparator.comparing(child -> FURTHEST.getDistance(object, child)));
 
         return nearestChild.orElseThrow(() -> new IllegalStateException("Internal node has no children"));
     }
diff --git a/src/mhtree/LeafNode.java b/src/mhtree/LeafNode.java
index ea4ead9..e9cb2a0 100644
--- a/src/mhtree/LeafNode.java
+++ b/src/mhtree/LeafNode.java
@@ -41,6 +41,10 @@ public class LeafNode extends Node implements Serializable {
         return bucket.getObjectCount();
     }
 
+    public LocalBucket getBucket() {
+        return bucket;
+    }
+
     @Override
     protected void addObject(LocalAbstractObject object) throws BucketStorageException {
         bucket.addObject(object);
diff --git a/src/mhtree/MHTree.java b/src/mhtree/MHTree.java
index 277404d..666f87b 100644
--- a/src/mhtree/MHTree.java
+++ b/src/mhtree/MHTree.java
@@ -1,6 +1,6 @@
 package mhtree;
 
-import cz.muni.fi.disa.similarityoperators.cover.AbstractRepresentation;
+import cz.muni.fi.disa.similarityoperators.cover.AbstractRepresentation.PrecomputedDistances;
 import messif.algorithms.Algorithm;
 import messif.buckets.BucketDispatcher;
 import messif.buckets.BucketStorageException;
@@ -44,26 +44,22 @@ public class MHTree extends Algorithm implements Serializable {
      * The maximal degree of an internal node.
      */
     private final int arity;
-
-    /**
-     * The root node of MH-Tree.
-     */
-    private final Node root;
-
     /**
      * Specifies which method to use when adding a new object.
      */
     private final InsertType insertType;
-
     /**
      * Specifies how to measure distance between an object and a node.
      */
     private final ObjectToNodeDistance objectToNodeDistance;
-
     /**
      * A dispatcher for maintaining a set of local buckets.
      */
     private final BucketDispatcher bucketDispatcher;
+    /**
+     * The root node of MH-Tree.
+     */
+    private Node root;
 
     /**
      * Create a new MH-Tree.
@@ -86,6 +82,17 @@ public class MHTree extends Algorithm implements Serializable {
         root = builder.root;
     }
 
+    public MHTree(String algorithmName, int bucketCapacity, int arity, InsertType insertType, ObjectToNodeDistance objectToNodeDistance) throws IllegalArgumentException {
+        super(algorithmName);
+
+        this.bucketCapacity = bucketCapacity;
+        this.arity = arity;
+        this.insertType = insertType;
+        this.objectToNodeDistance = objectToNodeDistance;
+
+        this.bucketDispatcher = new BucketDispatcher(Integer.MAX_VALUE, Long.MAX_VALUE, 2L * bucketCapacity - 1, 0, false, MemoryStorageBucket.class, null);
+    }
+
     public void kNN(KNNQueryOperation operation) {
         kNNSearch(operation, null);
     }
@@ -171,6 +178,16 @@ public class MHTree extends Algorithm implements Serializable {
     public void insert(InsertOperation operation) throws BucketStorageException {
         LocalAbstractObject object = operation.getInsertedObject();
 
+        // Empty tree
+        if (root == null) {
+            List<LocalAbstractObject> objects = new ArrayList<>();
+            objects.add(object);
+
+            root = new LeafNode(new PrecomputedDistances(objects), bucketDispatcher.createBucket(), insertType, objectToNodeDistance);
+            operation.endOperation();
+            return;
+        }
+
         Node node = root;
 
         while (!node.isLeaf()) {
@@ -179,11 +196,80 @@ public class MHTree extends Algorithm implements Serializable {
             node = ((InternalNode) node).getNearestChild(object);
         }
 
-        node.addObject(object);
+        LeafNode leaf = (LeafNode) node;
+        leaf.addObject(object);
+
+        if (leaf.getBucket().isSoftCapacityExceeded()) {
+            // Split leaf node
+
+            List<Node> split = split(leaf);
+            InternalNode parent = leaf.getParent();
+
+            if (parent == null) {
+                // Leaf node is root
+
+                InternalNode internalNode = new InternalNode(leaf.getHull(), insertType, objectToNodeDistance, split);
+                split.forEach(s -> s.setParent(internalNode));
+                root = internalNode;
+            } else {
+                // Leaf node has parent
+
+                List<Node> parentChildren = parent.getChildren();
+
+                if (parent.getNumberOfChildren() == arity) {
+                    // The parent node is full
+
+                    parentChildren.remove(leaf);
+                    InternalNode internalNode = new InternalNode(leaf.getHull(), insertType, objectToNodeDistance, split);
+                    split.forEach(s -> s.setParent(internalNode));
+                    parentChildren.add(internalNode);
+                } else {
+                    // The parent can accommodate another leaf node
+
+                    parentChildren.remove(leaf);
+                    split.forEach(s -> s.setParent(parent));
+                    parentChildren.addAll(split);
+                }
+            }
+
+            bucketDispatcher.removeBucket(leaf.getBucket().getBucketID());
+        }
 
         operation.endOperation();
     }
 
+    private List<Node> split(LeafNode leaf) throws BucketStorageException {
+        PrecomputedDistances objectDistances = new PrecomputedDistances(leaf.getObjects());
+
+        BitSet notProcessedObjectIndices = new BitSet(objectDistances.getObjectCount());
+        notProcessedObjectIndices.set(0, objectDistances.getObjectCount());
+
+        List<Integer> objectIndices = new ArrayList<>(bucketCapacity);
+
+        // Select the furthest object (the outlier)
+        int furthestIndex = Utils.maxDistanceIndex(objectDistances.getDistances(), notProcessedObjectIndices);
+        notProcessedObjectIndices.clear(furthestIndex);
+        objectIndices.add(furthestIndex);
+
+        // Select the rest of the objects up to the total of bucketCapacity with respect to the building of a hull
+        objectIndices.addAll(MHTreeBuilder.findClosestObjects(furthestIndex, bucketCapacity - 1, notProcessedObjectIndices, objectDistances));
+
+        List<LocalAbstractObject> halfOfObjects = objectIndices
+                .stream()
+                .map(objectDistances::getObject)
+                .collect(Collectors.toList());
+
+        List<LocalAbstractObject> secondHalf = notProcessedObjectIndices
+                .stream()
+                .mapToObj(objectDistances::getObject)
+                .collect(Collectors.toList());
+
+        return new ArrayList<>(Arrays.asList(
+                new LeafNode(objectDistances.getSubset(halfOfObjects), bucketDispatcher.createBucket(), insertType, objectToNodeDistance),
+                new LeafNode(objectDistances.getSubset(secondHalf), bucketDispatcher.createBucket(), insertType, objectToNodeDistance)
+        ));
+    }
+
     /**
      * Returns a list of nodes.
      *
@@ -302,7 +388,7 @@ public class MHTree extends Algorithm implements Serializable {
         /**
          * Contains the object distance matrix.
          */
-        private AbstractRepresentation.PrecomputedDistances objectDistances;
+        private PrecomputedDistances objectDistances;
 
         /**
          * Stores intermediate nodes needed during the algorithm.
@@ -354,10 +440,61 @@ public class MHTree extends Algorithm implements Serializable {
 
             this.insertType = GREEDY;
             this.objectToNodeDistance = NEAREST;
-            this.bucketDispatcher = new BucketDispatcher(Integer.MAX_VALUE, Long.MAX_VALUE, bucketCapacity, 0, false, MemoryStorageBucket.class, null);
+            this.bucketDispatcher = new BucketDispatcher(Integer.MAX_VALUE, Long.MAX_VALUE, 2L * bucketCapacity - 1, 0, false, MemoryStorageBucket.class, null);
             this.mergingMethod = HULL_BASED_MERGE;
         }
 
+        /**
+         * Returns the closest objects with respect to the building of a hull.
+         * The number of returned indices is specified by {@code numberOfObjects}.
+         * Note that the returned indices are already set as invalid in {@code validObjectIndices}.
+         *
+         * @param objectIndex        the initial object index to which the closest objects are found
+         * @param numberOfObjects    the number of return object indices
+         * @param validObjectIndices object indices which are taken into consideration
+         * @return a list of closest object indices with respect to the building of a hull
+         */
+        private static List<Integer> findClosestObjects(int objectIndex, int numberOfObjects, BitSet validObjectIndices, PrecomputedDistances objectDistances) {
+            List<Integer> objectIndices = new ArrayList<>(1 + numberOfObjects);
+            objectIndices.add(objectIndex);
+
+            while (objectIndices.size() - 1 != numberOfObjects) {
+                int index = findClosestObjectIndex(objectIndices, validObjectIndices, objectDistances);
+
+                objectIndices.add(index);
+                validObjectIndices.clear(index);
+            }
+
+            return objectIndices.subList(1, objectIndices.size());
+        }
+
+        /**
+         * Returns the closest object index to {@code indices} with respect to the building of a hull.
+         *
+         * @param indices            list of object indices
+         * @param validObjectIndices object indices which are taken into consideration
+         * @return the closest object index to {@code indices} with respect to the building of a hull
+         */
+        private static int findClosestObjectIndex(List<Integer> indices, BitSet validObjectIndices, PrecomputedDistances objectDistances) {
+            double minDistance = Double.MAX_VALUE;
+            int closestObjectIndex = -1;
+
+            for (int index : indices) {
+                int candidateIndex = objectDistances.minDistInArray(objectDistances.getDistances(index), validObjectIndices);
+                double distanceSum = indices
+                        .stream()
+                        .mapToDouble(i -> objectDistances.getDistance(i, candidateIndex))
+                        .sum();
+
+                if (distanceSum < minDistance) {
+                    minDistance = distanceSum;
+                    closestObjectIndex = candidateIndex;
+                }
+            }
+
+            return closestObjectIndex;
+        }
+
         /**
          * Specifies which insert type should be used.
          *
@@ -414,7 +551,7 @@ public class MHTree extends Algorithm implements Serializable {
             validNodeIndices = new BitSet(nodes.length);
             validNodeIndices.set(0, nodes.length);
 
-            objectDistances = new AbstractRepresentation.PrecomputedDistances(objects);
+            objectDistances = new PrecomputedDistances(objects);
 
             // Every object is stored in the root
             if (objectDistances.getObjectCount() <= bucketCapacity) {
@@ -499,7 +636,7 @@ public class MHTree extends Algorithm implements Serializable {
                 objectIndices.add(furthestIndex);
 
                 // Select the rest of the objects up to the total of bucketCapacity with respect to the building of a hull
-                objectIndices.addAll(findClosestObjects(furthestIndex, bucketCapacity - 1, notProcessedObjectIndices));
+                objectIndices.addAll(findClosestObjects(furthestIndex, bucketCapacity - 1, notProcessedObjectIndices, objectDistances));
 
                 List<LocalAbstractObject> objects = objectIndices
                         .stream()
@@ -532,57 +669,6 @@ public class MHTree extends Algorithm implements Serializable {
             return closestNodeIndex;
         }
 
-        /**
-         * Returns the closest objects with respect to the building of a hull.
-         * The number of returned indices is specified by {@code numberOfObjects}.
-         * Note that the returned indices are already set as invalid in {@code validObjectIndices}.
-         *
-         * @param objectIndex        the initial object index to which the closest objects are found
-         * @param numberOfObjects    the number of return object indices
-         * @param validObjectIndices object indices which are taken into consideration
-         * @return a list of closest object indices with respect to the building of a hull
-         */
-        private List<Integer> findClosestObjects(int objectIndex, int numberOfObjects, BitSet validObjectIndices) {
-            List<Integer> objectIndices = new ArrayList<>(1 + numberOfObjects);
-            objectIndices.add(objectIndex);
-
-            while (objectIndices.size() - 1 != numberOfObjects) {
-                int index = findClosestObjectIndex(objectIndices, validObjectIndices);
-
-                objectIndices.add(index);
-                validObjectIndices.clear(index);
-            }
-
-            return objectIndices.subList(1, objectIndices.size());
-        }
-
-        /**
-         * Returns the closest object index to {@code indices} with respect to the building of a hull.
-         *
-         * @param indices            list of object indices
-         * @param validObjectIndices object indices which are taken into consideration
-         * @return the closest object index to {@code indices} with respect to the building of a hull
-         */
-        private int findClosestObjectIndex(List<Integer> indices, BitSet validObjectIndices) {
-            double minDistance = Double.MAX_VALUE;
-            int closestObjectIndex = -1;
-
-            for (int index : indices) {
-                int candidateIndex = objectDistances.minDistInArray(objectDistances.getDistances(index), validObjectIndices);
-                double distanceSum = indices
-                        .stream()
-                        .mapToDouble(i -> objectDistances.getDistance(i, candidateIndex))
-                        .sum();
-
-                if (distanceSum < minDistance) {
-                    minDistance = distanceSum;
-                    closestObjectIndex = candidateIndex;
-                }
-            }
-
-            return closestObjectIndex;
-        }
-
         /**
          * Merges nodes in {@link #nodes} specified by indices in {@link #validNodeIndices}.
          * The new node is placed on the first valid index in {@link #validNodeIndices}.
@@ -617,6 +703,8 @@ public class MHTree extends Algorithm implements Serializable {
 
             InternalNode parent = Node.createParent(children, objectDistances, insertType, objectToNodeDistance, mergingMethod);
 
+            children.forEach(child -> child.setParent(parent));
+
             nodeIndices.forEach(index -> {
                 validNodeIndices.clear(index);
                 this.nodes[index] = null;
diff --git a/src/mhtree/MergingMethod.java b/src/mhtree/MergingMethod.java
index 3cde09a..431c30e 100644
--- a/src/mhtree/MergingMethod.java
+++ b/src/mhtree/MergingMethod.java
@@ -15,7 +15,7 @@ public enum MergingMethod {
         }
     },
     /**
-     * Returns the hull objects of {@code node}'s hull.
+     * Returns the hull objects in {@code node}'s hull.
      */
     HULL_BASED_MERGE {
         @Override
diff --git a/src/mhtree/Node.java b/src/mhtree/Node.java
index 09d5b3d..4fb191f 100644
--- a/src/mhtree/Node.java
+++ b/src/mhtree/Node.java
@@ -18,8 +18,8 @@ public abstract class Node implements Serializable {
     private static final long serialVersionUID = 420L;
 
     private final InsertType insertType;
-    private final ObjectToNodeDistance objectToNodeDistance;
-
+    public ObjectToNodeDistance objectToNodeDistance;
+    private InternalNode parent;
     private HullOptimizedRepresentationV3 hull;
 
     protected Node(PrecomputedDistances distances, InsertType insertType, ObjectToNodeDistance objectToNodeDistance) {
@@ -30,10 +30,16 @@ public abstract class Node implements Serializable {
         this.objectToNodeDistance = objectToNodeDistance;
     }
 
-    protected static InternalNode createParent(List<Node> nodes, PrecomputedDistances distances, InsertType insertType, ObjectToNodeDistance objectToNodeDistance, MergeType mergeType) {
+    protected Node(HullOptimizedRepresentationV3 hull, InsertType insertType, ObjectToNodeDistance objectToNodeDistance) {
+        this.hull = hull;
+        this.insertType = insertType;
+        this.objectToNodeDistance = objectToNodeDistance;
+    }
+
+    protected static InternalNode createParent(List<Node> nodes, PrecomputedDistances distances, InsertType insertType, ObjectToNodeDistance objectToNodeDistance, MergingMethod mergingMethod) {
         List<LocalAbstractObject> objects = nodes
                 .stream()
-                .map(mergeType::getObjects)
+                .map(mergingMethod::getObjects)
                 .flatMap(Collection::stream)
                 .collect(Collectors.toList());
 
@@ -63,6 +69,18 @@ public abstract class Node implements Serializable {
         return hull.isExternalCovered(object);
     }
 
+    public HullOptimizedRepresentationV3 getHull() {
+        return hull;
+    }
+
+    public InternalNode getParent() {
+        return parent;
+    }
+
+    public void setParent(InternalNode parent) {
+        this.parent = parent;
+    }
+
     @Override
     public String toString() {
         return "Node{hull=" + hull + '}';
@@ -80,6 +98,10 @@ public abstract class Node implements Serializable {
         return (this instanceof LeafNode);
     }
 
+    protected boolean isInternal() {
+        return (this instanceof InternalNode);
+    }
+
     protected int getHullObjectCount() {
         return hull.getRepresentativesCount();
     }
@@ -94,6 +116,9 @@ public abstract class Node implements Serializable {
             case INCREMENTAL:
                 insertIncremental(object);
                 break;
+            case ADD_HULL_OBJECT:
+                insertHullRebuild(object);
+                break;
             default:
                 throw new IllegalStateException("Unexpected value: " + insertType);
         }
@@ -146,4 +171,12 @@ public abstract class Node implements Serializable {
     private void insertIncremental(LocalAbstractObject object) {
         hull.addHullObject(object);
     }
+
+    private void insertHullRebuild(LocalAbstractObject object) {
+        List<LocalAbstractObject> hull = this.hull.getHull();
+        hull.add(object);
+
+        this.hull = new HullOptimizedRepresentationV3(hull);
+        this.hull.build();
+    }
 }
-- 
GitLab