diff --git a/src/mhtree/BuildTree.java b/src/mhtree/BuildTree.java deleted file mode 100644 index 812ba70feb6c3195f9ef38a3eb74fb3cd9ccf5a1..0000000000000000000000000000000000000000 --- a/src/mhtree/BuildTree.java +++ /dev/null @@ -1,230 +0,0 @@ -package mhtree; - -import cz.muni.fi.disa.similarityoperators.cover.AbstractRepresentation.PrecomputedDistances; -import messif.buckets.BucketDispatcher; -import messif.buckets.BucketStorageException; -import messif.objects.LocalAbstractObject; - -import java.util.ArrayList; -import java.util.BitSet; -import java.util.List; -import java.util.function.BiFunction; -import java.util.stream.Collectors; - -class BuildTree { - private final int arity; - private final int leafCapacity; - private final InsertType insertType; - private final ObjectToNodeDistance objectToNodeDistance; - private final NodeToNodeDistance nodeToNodeDistance; - private final BucketDispatcher bucketDispatcher; - - private final PrecomputedDistances objectDistances; - private final float[][] nodeDistances; - - private final Node[] nodes; - private final BitSet validNodeIndices; - - private final Node root; - - BuildTree(List<LocalAbstractObject> objects, int leafCapacity, int arity, InsertType insertType, ObjectToNodeDistance objectToNodeDistance, NodeToNodeDistance nodeToNodeDistance, BucketDispatcher bucketDispatcher) throws BucketStorageException { - this.leafCapacity = leafCapacity; - this.arity = arity; - this.insertType = insertType; - this.objectToNodeDistance = objectToNodeDistance; - this.nodeToNodeDistance = nodeToNodeDistance; - this.bucketDispatcher = bucketDispatcher; - - nodes = new Node[objects.size() / leafCapacity]; - - validNodeIndices = new BitSet(nodes.length); - validNodeIndices.set(0, nodes.length); - - objectDistances = new PrecomputedDistances(objects); - nodeDistances = new float[nodes.length][nodes.length]; - - // Every object is stored in the root in the case of small number of objects - if (objectDistances.getObjectCount() <= leafCapacity) { - root = new LeafNode(objectDistances, bucketDispatcher.createBucket(), insertType, objectToNodeDistance); - return; - } - - createLeafNodes(); - root = createRoot(); - } - - public Node getRoot() { - return root; - } - - private Node createRoot() { - while (validNodeIndices.cardinality() != 1) { - BitSet notProcessedNodeIndices = (BitSet) validNodeIndices.clone(); - - while (!notProcessedNodeIndices.isEmpty()) { - if (notProcessedNodeIndices.cardinality() <= arity) { - mergeNodes(notProcessedNodeIndices.stream().boxed().collect(Collectors.toList())); - break; - } - - int furthestNodeIndex = getFurthestIndex(nodeDistances, notProcessedNodeIndices); - notProcessedNodeIndices.clear(furthestNodeIndex); - - mergeNodes(furthestNodeIndex, findClosestItems(this::findClosestNodeIndex, furthestNodeIndex, arity - 1, notProcessedNodeIndices)); - } - } - - return nodes[validNodeIndices.nextSetBit(0)]; - } - - private void createLeafNodes() throws BucketStorageException { - BitSet notProcessedObjectIndices = new BitSet(objectDistances.getObjectCount()); - notProcessedObjectIndices.set(0, objectDistances.getObjectCount()); - - for (int nodeIndex = 0; !notProcessedObjectIndices.isEmpty(); nodeIndex++) { - if (notProcessedObjectIndices.cardinality() < leafCapacity) { - for (int i = notProcessedObjectIndices.nextSetBit(0); i >= 0; i = notProcessedObjectIndices.nextSetBit(i + 1)) { - LocalAbstractObject object = objectDistances.getObject(i); - - nodes[getClosestNodeIndex(object)].addObject(object); - } - - return; - } - - List<Integer> objectIndices = new ArrayList<>(leafCapacity); - - // Select a base object - int furthestIndex = getFurthestIndex(objectDistances.getDistances(), notProcessedObjectIndices); - notProcessedObjectIndices.clear(furthestIndex); - objectIndices.add(furthestIndex); - - // Select the rest of the objects up to the total of leafCapacity - objectIndices.addAll(findClosestItems(this::findClosestObjectIndex, furthestIndex, leafCapacity - 1, notProcessedObjectIndices)); - - List<LocalAbstractObject> objects = objectIndices.stream().map(objectDistances::getObject).collect(Collectors.toList()); - - nodes[nodeIndex] = new LeafNode(objectDistances.getSubset(objects), bucketDispatcher.createBucket(), insertType, objectToNodeDistance); - } - } - - private int getClosestNodeIndex(LocalAbstractObject object) { - double minDistance = Double.MAX_VALUE; - int closestNodeIndex = -1; - - for (int candidateIndex = 0; candidateIndex < nodes.length; candidateIndex++) { - double distance = nodes[candidateIndex].getDistance(object); - - if (distance < minDistance) { - minDistance = distance; - closestNodeIndex = candidateIndex; - } - } - - return closestNodeIndex; - } - - private int getFurthestIndex(float[][] distanceMatrix, BitSet validIndices) { - float maxDistance = Float.MIN_VALUE; - int furthestIndex = validIndices.nextSetBit(0); - - while (true) { - float[] distances = distanceMatrix[furthestIndex]; - int candidateIndex = this.objectDistances.maxDistInArray(distances, validIndices); - - if (!(distances[candidateIndex] > maxDistance)) { - return furthestIndex; - } - - maxDistance = distances[candidateIndex]; - furthestIndex = candidateIndex; - } - } - - private List<Integer> findClosestItems(BiFunction<List<Integer>, BitSet, Integer> findClosestItemIndex, int itemIndex, int numberOfItems, BitSet notProcessedItemIndices) { - List<Integer> itemIndices = new ArrayList<>(1 + numberOfItems); - itemIndices.add(itemIndex); - - List<Integer> resultItemsIndices = new ArrayList<>(); - - while (resultItemsIndices.size() != numberOfItems) { - int index = findClosestItemIndex.apply(itemIndices, notProcessedItemIndices); - - itemIndices.add(index); - notProcessedItemIndices.clear(index); - resultItemsIndices.add(index); - } - - return resultItemsIndices; - } - - private int findClosestNodeIndex(List<Integer> indices, BitSet validNodeIndices) { - double minDistance = Double.MAX_VALUE; - int closestNodeIndex = -1; - - for (int candidateIndex = validNodeIndices.nextSetBit(0); candidateIndex >= 0; candidateIndex = validNodeIndices.nextSetBit(candidateIndex + 1)) { - double sum = 0; - for (int index : indices) - sum += nodeDistances[index][candidateIndex]; - - if (sum < minDistance) { - minDistance = sum; - closestNodeIndex = candidateIndex; - } - } - - return closestNodeIndex; - } - - private int findClosestObjectIndex(List<Integer> indices, BitSet validObjectIndices) { - double minDistance = Double.MAX_VALUE; - int closestObjectIndex = -1; - - for (int index : indices) { - int candidateIndex = objectDistances.minDistInArray(objectDistances.getDistances(index), validObjectIndices); - double distance = indices.stream().mapToDouble(i -> objectDistances.getDistance(i, candidateIndex)).sum(); - - if (distance < minDistance) { - minDistance = distance; - closestObjectIndex = candidateIndex; - } - } - - return closestObjectIndex; - } - - private void mergeNodes(List<Integer> nodeIndices) { - int parentNodeIndex = nodeIndices.remove(0); - mergeNodes(parentNodeIndex, nodeIndices); - } - - private void mergeNodes(int parentNodeIndex, List<Integer> nodeIndices) { - if (nodeIndices.size() == 0) return; - - nodeIndices.add(parentNodeIndex); - - List<Node> nodes = nodeIndices.stream().map(i -> this.nodes[i]).collect(Collectors.toList()); - - InternalNode parent = Node.createParent(nodes, objectDistances, insertType, objectToNodeDistance); - nodes.forEach(node -> node.setParent(parent)); - parent.addChildren(nodes); - - nodeIndices.forEach(index -> { - validNodeIndices.clear(index); - this.nodes[index] = null; - }); - - this.nodes[parentNodeIndex] = parent; - validNodeIndices.set(parentNodeIndex); - - // Update node distances - validNodeIndices.stream().forEach(index -> computeNodeDistances(parentNodeIndex, index)); - } - - private void computeNodeDistances(int i, int j) { - float distance = nodeToNodeDistance.getDistance(nodes[i], nodes[j], objectDistances::getDistance); - - nodeDistances[i][j] = distance; - nodeDistances[j][i] = distance; - } -} diff --git a/src/mhtree/Histogram.java b/src/mhtree/Histogram.java deleted file mode 100644 index 470c6f681109ba1e129afb7ca38724f03c5da136..0000000000000000000000000000000000000000 --- a/src/mhtree/Histogram.java +++ /dev/null @@ -1,72 +0,0 @@ -package mhtree; - -import messif.objects.LocalAbstractObject; - -import java.util.Collections; -import java.util.HashMap; -import java.util.List; - -public class Histogram { - private final HashMap<Integer, HashMap<Long, Integer>> levelToHullsCoveredToObjectCount = new HashMap<>(); - - Histogram() { - } - - public static Histogram generate(Node x) { - Histogram histogram = new Histogram(); - - for (int level = 1; level <= x.getHeight() + 1; level++) { - List<Node> levelNodes = x.getNodesOnLevel(level); - - for (LocalAbstractObject object : x.getObjects()) { - long coveredObjectCount = levelNodes.stream() - .filter(node -> node.isCovered(object)) - .count(); - - histogram.addCoveredCount(level, coveredObjectCount); - } - } - - return histogram; - } - - public void addCoveredCount(int level, long count) { - levelToHullsCoveredToObjectCount.merge( - level, - new HashMap<>(Collections.singletonMap(count, 1)), - (hullsCoveredToObjectCount, x) -> { - hullsCoveredToObjectCount.merge(count, 1, (objectCount, y) -> objectCount + 1); - return hullsCoveredToObjectCount; - }); - } - - @Override - public String toString() { - StringBuilder stringBuilder = new StringBuilder(); - - levelToHullsCoveredToObjectCount.forEach((level, hullsCoveredToObjectCount) -> { - stringBuilder.append("Level "); - stringBuilder.append(level); - stringBuilder.append('\n'); - - hullsCoveredToObjectCount.forEach((hullsCovered, objectCount) -> { - stringBuilder.append(objectCount); - stringBuilder.append(" object"); - if (objectCount != 1) stringBuilder.append('s'); - - if (hullsCovered == 0) { - stringBuilder.append(" not covered"); - } else { - stringBuilder.append(" is covered by "); - stringBuilder.append(hullsCovered); - stringBuilder.append(" hull"); - if (hullsCovered != 1) stringBuilder.append('s'); - } - - stringBuilder.append('\n'); - }); - }); - - return stringBuilder.toString(); - } -} diff --git a/src/mhtree/NodeToNodeDistance.java b/src/mhtree/NodeToNodeDistance.java deleted file mode 100644 index 762f1c50765283a441c950819d14738b7735cdbf..0000000000000000000000000000000000000000 --- a/src/mhtree/NodeToNodeDistance.java +++ /dev/null @@ -1,28 +0,0 @@ -package mhtree; - -import messif.objects.LocalAbstractObject; - -import java.util.function.BiFunction; - -public enum NodeToNodeDistance { - NEAREST_HULL_OBJECTS(Math::min, Float.MAX_VALUE), - FURTHEST_HULL_OBJECTS(Math::max, Float.MIN_VALUE); - - private final BiFunction<Float, Float, Float> compareFunction; - private final float defaultValue; - - NodeToNodeDistance(final BiFunction<Float, Float, Float> compareFunction, final float defaultValue) { - this.compareFunction = compareFunction; - this.defaultValue = defaultValue; - } - - public float getDistance(Node node1, Node node2, BiFunction<LocalAbstractObject, LocalAbstractObject, Float> getDistance) { - float distance = defaultValue; - - for (LocalAbstractObject firstHullObject : node1.getHullObjects()) - for (LocalAbstractObject secondHullObject : node2.getHullObjects()) - distance = compareFunction.apply(distance, getDistance.apply(firstHullObject, secondHullObject)); - - return distance; - } -}