Skip to content
Snippets Groups Projects
Commit a99f0006 authored by Vlastislav Dohnal's avatar Vlastislav Dohnal
Browse files

Refactoring

* New methods in MH-Tree -- exact-match search & fat factor
parent c0aa6a1b
No related branches found
No related tags found
No related merge requests found
...@@ -21,9 +21,13 @@ import java.util.PriorityQueue; ...@@ -21,9 +21,13 @@ import java.util.PriorityQueue;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import static cz.muni.fi.disa.similarityoperators.cover.AbstractRepresentation.Ranked.getRankedIndices; import static cz.muni.fi.disa.similarityoperators.cover.AbstractRepresentation.Ranked.getRankedIndices;
import static mhtree.InsertType.GREEDY; import java.util.Iterator;
import static mhtree.MergingMethod.HULL_BASED_MERGE; import java.util.NoSuchElementException;
import static mhtree.ObjectToNodeDistance.NEAREST; import messif.algorithms.AlgorithmMethodException;
import messif.objects.util.AbstractObjectIterator;
import messif.operations.AnswerType;
import messif.operations.data.BulkInsertOperation;
import messif.operations.query.RangeQueryOperation;
/** /**
* MH-Tree is a metric index utilizing metric hulls. * MH-Tree is a metric index utilizing metric hulls.
...@@ -70,7 +74,7 @@ public class MHTree extends Algorithm implements Serializable { ...@@ -70,7 +74,7 @@ public class MHTree extends Algorithm implements Serializable {
"MH-Tree builder object", "MH-Tree builder object",
}) })
private MHTree(MHTreeBuilder builder) { private MHTree(MHTreeBuilder builder) {
super("MH-Tree"); super("Metric Hull Tree");
bucketCapacity = builder.bucketCapacity; bucketCapacity = builder.bucketCapacity;
arity = builder.arity; arity = builder.arity;
...@@ -82,6 +86,32 @@ public class MHTree extends Algorithm implements Serializable { ...@@ -82,6 +86,32 @@ public class MHTree extends Algorithm implements Serializable {
root = builder.root; root = builder.root;
} }
/** Cloning constructor -- to use the builder feature and support MESSIF controll file too. */
private MHTree(MHTree origTree) {
super("Metric Hull Tree");
bucketCapacity = origTree.bucketCapacity;
arity = origTree.arity;
root = origTree.root;
bucketDispatcher = origTree.bucketDispatcher;
insertType = InsertType.INCREMENTAL;
objectToNodeDistance = ObjectToNodeDistance.NEAREST;
}
/**
* Create a new instance of MH-Tree.
* @param algorithmName
* @param bucketCapacity capacity in objects
* @param arity internal node capacity (in hulls -- tree arity)
* @param insertType algorithm to insert objects (hull computation algorithm)
* @param objectToNodeDistance function of ranking an object to a hull
* @throws IllegalArgumentException
*/
@AlgorithmConstructor(description = "MH-Tree", arguments = {
"Algorithm name", "bucket capacity in objeces", "internal node arity", "type of updating hull upon inserting an object",
"type of ranking function of objects to nodes (their hulls)"
})
public MHTree(String algorithmName, int bucketCapacity, int arity, InsertType insertType, ObjectToNodeDistance objectToNodeDistance) throws IllegalArgumentException { public MHTree(String algorithmName, int bucketCapacity, int arity, InsertType insertType, ObjectToNodeDistance objectToNodeDistance) throws IllegalArgumentException {
super(algorithmName); super(algorithmName);
...@@ -93,6 +123,102 @@ public class MHTree extends Algorithm implements Serializable { ...@@ -93,6 +123,102 @@ public class MHTree extends Algorithm implements Serializable {
this.bucketDispatcher = new BucketDispatcher(Integer.MAX_VALUE, Long.MAX_VALUE, 2L * bucketCapacity - 1, 0, false, MemoryStorageBucket.class, null); this.bucketDispatcher = new BucketDispatcher(Integer.MAX_VALUE, Long.MAX_VALUE, 2L * bucketCapacity - 1, 0, false, MemoryStorageBucket.class, null);
} }
/**
* Create a new instance of MH-Tree.
*
* Objects are inserted using {@link InsertType#INCREMENTAL}.
* Distance to rank node in the priority queue is {@link ObjectToNodeDistance#NEAREST}.
*
* @param bucketCapacity capacity in objects
* @param arity internal node capacity (in hulls -- tree arity)
* @throws IllegalArgumentException
*/
@AlgorithmConstructor(description = "MH-Tree", arguments = {
"bucket capacity in objeces", "internal node arity"
})
public MHTree(int bucketCapacity, int arity) throws IllegalArgumentException {
this("Metric Hull Tree", bucketCapacity, arity, InsertType.INCREMENTAL, ObjectToNodeDistance.NEAREST);
}
/**
* Create a new instance of MH-Tree and bulk-load it with data.For bulk-loading this variant is used:
- Insert type is {@link InsertType#GREEDY}.
*
* - Node's hull rank to an object is {@link ObjectToNodeDistance#NEAREST}.
* - Hull merging method is {@link MergingMethod#HULL_BASED_MERGE}.
*
* If new objects are inserted into the structure, the following settings is used then:
* Objects are inserted using {@link InsertType#INCREMENTAL}.
* Distance to rank node in the priority queue is {@link ObjectToNodeDistance#NEAREST}.
*
* @param bucketCapacity capacity in objects
* @param arity internal node capacity (in hulls -- tree arity)
* @param dataIterator data to insert into the tree by bulk-loading
* @throws IllegalArgumentException
* @throws messif.buckets.BucketStorageException
*/
@AlgorithmConstructor(description = "MH-Tree", arguments = {
"bucket capacity in objeces", "internal node arity", "data to bulk-load"
})
public MHTree(int bucketCapacity, int arity, Iterator<LocalAbstractObject> dataIterator) throws IllegalArgumentException, BucketStorageException {
this(new MHTreeBuilder(dataIterator, bucketCapacity, arity).build());
}
public void rangeSearch(RangeQueryOperation operation) {
if (operation.getRadius() != 0)
throw new UnsupportedOperationException("Range query with non-zero radius is not supported yet!");
exactMatchSearch(operation);
}
private void exactMatchSearch(RangeQueryOperation operation) {
// Calculate node overlaps...
final LocalAbstractObject q = operation.getQueryObject();
int acc = 0;
try {
PriorityQueue<ObjectToNodeDistanceRank> queue = new PriorityQueue<>();
//if (root.isCovered(q))
queue.add(new ObjectToNodeDistanceRank(q, root, 1));
while (!queue.isEmpty()) {
Node node = queue.remove().getNode();
acc++;
if (node.isLeaf()) {
for (LocalAbstractObject object : node.getObjects()) {
if (q.getDistance(object) <= operation.getRadius()) {
operation.addToAnswer(object);
return;
}
}
} else {
ObjectToNodeDistanceRank closestNotCovered = null;
double bestDistNotCovered = Double.MAX_VALUE;
boolean isCovered = false;
for (Node child : ((InternalNode) node).getChildren()) {
final ObjectToNodeDistanceRank rank = new ObjectToNodeDistanceRank(q, child, 1);
if (child.isCovered(q)) {
queue.add(rank);
isCovered = true;
} else if (!isCovered) {
if (rank.getDistance() < bestDistNotCovered) {
bestDistNotCovered = rank.getDistance();
closestNotCovered = rank;
}
}
}
if (!isCovered && closestNotCovered != null && queue.isEmpty()) // Add one best child if the object is not covered by any child.
queue.add(closestNotCovered);
}
}
} finally {
operation.endOperation();
operation.setParameter("AccessedNodes", acc);
}
}
public void kNN(KNNQueryOperation operation) { public void kNN(KNNQueryOperation operation) {
kNNSearch(operation, null); kNNSearch(operation, null);
} }
...@@ -169,6 +295,11 @@ public class MHTree extends Algorithm implements Serializable { ...@@ -169,6 +295,11 @@ public class MHTree extends Algorithm implements Serializable {
return bucketDispatcher.getObjectCount(); return bucketDispatcher.getObjectCount();
} }
/** Fills an empty instance of MH-tree with data by bulk-loading */
public void bulkInsert(BulkInsertOperation operation) {
throw new UnsupportedOperationException("Not supported this way yet -- use MHTreeBuilder.");
}
/** /**
* Inserts a new object into the tree. * Inserts a new object into the tree.
* *
...@@ -291,6 +422,53 @@ public class MHTree extends Algorithm implements Serializable { ...@@ -291,6 +422,53 @@ public class MHTree extends Algorithm implements Serializable {
root.gatherLeafNodes(leafNodes); root.gatherLeafNodes(leafNodes);
return leafNodes; return leafNodes;
} }
public AbstractObjectIterator<LocalAbstractObject> getAllObjects() {
final List<LeafNode> leaves = getLeafNodes();
final Iterator<LeafNode> itL = leaves.iterator();
return new AbstractObjectIterator<LocalAbstractObject>() {
LocalAbstractObject lastReturnedO = null;
LeafNode curL = null;
Iterator<LocalAbstractObject> itO = null;
LocalAbstractObject curO = null;
// Get a next object ahead
{ setNext(); }
private void setNext() {
if (itO != null && itO.hasNext()) {
curO = itO.next();
} else if (itL.hasNext()) {
curL = itL.next();
itO = curL.getObjects().iterator();
if (itO.hasNext())
curO = itO.next();
else
curO = null;
} else {
curO = null; // No next object available
}
}
@Override
public LocalAbstractObject getCurrentObject() throws NoSuchElementException {
return lastReturnedO;
}
@Override
public boolean hasNext() {
return (curO != null);
}
@Override
public LocalAbstractObject next() {
lastReturnedO = curO;
setNext();
return lastReturnedO;
}
};
}
/** /**
* Prints statistics about the tree. * Prints statistics about the tree.
...@@ -344,9 +522,48 @@ public class MHTree extends Algorithm implements Serializable { ...@@ -344,9 +522,48 @@ public class MHTree extends Algorithm implements Serializable {
", objectToNodeDistance=" + objectToNodeDistance + ", objectToNodeDistance=" + objectToNodeDistance +
'}'; '}';
} }
/** Computes fat factor of the tree.
*
* @return fat-factor value
*/
public float getFatFactor() {
long accessed = 0; // I_c
int notFound = 0;
final AbstractObjectIterator<LocalAbstractObject> iterObjs = getAllObjects();
try {
while (iterObjs.hasNext()) {
LocalAbstractObject o = iterObjs.next();
RangeQueryOperation op = new RangeQueryOperation(o, 0f, AnswerType.ORIGINAL_OBJECTS);
execute(false, op);
accessed += (long)op.getParameter("AccessedNodes", Integer.class);
if (op.getAnswerCount() == 0)
notFound++;
}
} catch (AlgorithmMethodException | NoSuchMethodException ex) {
}
if (notFound > 0)
System.out.println("Fat factor computation: " + notFound + " exact-match queries did not find an object!");
final long objects = getObjectCount();
final int height = root.getHeight();
final int totalNodes = getNodes().size();
return (float)(accessed - height * objects) / (float)(objects * (totalNodes - height));
}
/** /**
* Represents the MH-Tree bulk-loading algorithm. * Represents the MH-Tree bulk-loading algorithm.
*
* Defaults:
* Insert type is {@link InsertType#GREEDY}.
* Node's hull rank to an object is {@link ObjectToNodeDistance#NEAREST}.
* Hull merging method is {@link MergingMethod#HULL_BASED_MERGE}.
*/ */
public static class MHTreeBuilder { public static class MHTreeBuilder {
...@@ -413,6 +630,27 @@ public class MHTree extends Algorithm implements Serializable { ...@@ -413,6 +630,27 @@ public class MHTree extends Algorithm implements Serializable {
/** /**
* Creates a new MH-Tree builder corresponding to the bulk-loading algorithm. * Creates a new MH-Tree builder corresponding to the bulk-loading algorithm.
* *
* Defaults:
* Insert type is {@link InsertType#GREEDY}.
* Node's hull rank to an object is {@link ObjectToNodeDistance#NEAREST}.
* Hull merging method is {@link MergingMethod#HULL_BASED_MERGE}.
*
* @param objectsIter iterator of objects
* @param bucketCapacity leaf node bucket capacity
* @param arity arity
*/
public MHTreeBuilder(Iterator<LocalAbstractObject> objectsIter, int bucketCapacity, int arity) {
this(iterToList(objectsIter), bucketCapacity, arity);
}
/**
* Creates a new MH-Tree builder corresponding to the bulk-loading algorithm.
*
* Defaults:
* Insert type is {@link InsertType#GREEDY}.
* Node's hull rank to an object is {@link ObjectToNodeDistance#NEAREST}.
* Hull merging method is {@link MergingMethod#HULL_BASED_MERGE}.
*
* @param objects list of objects * @param objects list of objects
* @param bucketCapacity leaf node bucket capacity * @param bucketCapacity leaf node bucket capacity
* @param arity arity * @param arity arity
...@@ -423,11 +661,11 @@ public class MHTree extends Algorithm implements Serializable { ...@@ -423,11 +661,11 @@ public class MHTree extends Algorithm implements Serializable {
} }
if (objects.size() < 3) { if (objects.size() < 3) {
throw new IllegalArgumentException("Number of objects cannot by smaller than 3"); throw new IllegalArgumentException("Number of objects to insert cannot by smaller than 3");
} }
if (bucketCapacity < 3) { if (bucketCapacity < 3) {
throw new IllegalArgumentException("Bucket capacity cannot be smaller than 3"); throw new IllegalArgumentException("Bucket capacity cannot be smaller than 3 objects");
} }
if (arity < 2) { if (arity < 2) {
...@@ -438,12 +676,18 @@ public class MHTree extends Algorithm implements Serializable { ...@@ -438,12 +676,18 @@ public class MHTree extends Algorithm implements Serializable {
this.bucketCapacity = bucketCapacity; this.bucketCapacity = bucketCapacity;
this.arity = arity; this.arity = arity;
this.insertType = GREEDY; this.insertType = InsertType.GREEDY;
this.objectToNodeDistance = NEAREST; this.objectToNodeDistance = ObjectToNodeDistance.NEAREST;
this.bucketDispatcher = new BucketDispatcher(Integer.MAX_VALUE, Long.MAX_VALUE, 2L * bucketCapacity - 1, 0, false, MemoryStorageBucket.class, null); this.bucketDispatcher = new BucketDispatcher(Integer.MAX_VALUE, Long.MAX_VALUE, 2L * bucketCapacity - 1, 0, false, MemoryStorageBucket.class, null);
this.mergingMethod = HULL_BASED_MERGE; this.mergingMethod = MergingMethod.HULL_BASED_MERGE;
} }
private static List<LocalAbstractObject> iterToList(final Iterator<LocalAbstractObject> iter) {
List<LocalAbstractObject> lst = new ArrayList<>();
iter.forEachRemaining(o -> lst.add(o));
return lst;
}
/** /**
* Returns the closest objects with respect to the building of a hull. * Returns the closest objects with respect to the building of a hull.
* The number of returned indices is specified by {@code numberOfObjects}. * The number of returned indices is specified by {@code numberOfObjects}.
...@@ -538,7 +782,7 @@ public class MHTree extends Algorithm implements Serializable { ...@@ -538,7 +782,7 @@ public class MHTree extends Algorithm implements Serializable {
this.mergingMethod = mergingMethod; this.mergingMethod = mergingMethod;
return this; return this;
} }
/** /**
* Builds an MH-Tree based on specified parameters. * Builds an MH-Tree based on specified parameters.
* *
...@@ -551,22 +795,25 @@ public class MHTree extends Algorithm implements Serializable { ...@@ -551,22 +795,25 @@ public class MHTree extends Algorithm implements Serializable {
validNodeIndices = new BitSet(nodes.length); validNodeIndices = new BitSet(nodes.length);
validNodeIndices.set(0, nodes.length); validNodeIndices.set(0, nodes.length);
PrecomputedDistances.COMPUTATION_THREADS = 12;
objectDistances = new PrecomputedDistances(objects); objectDistances = new PrecomputedDistances(objects);
// Every object is stored in the root // Every object is stored in the root
if (objectDistances.getObjectCount() <= bucketCapacity) { if (objects.size() <= bucketCapacity) {
root = new LeafNode(objectDistances, bucketDispatcher.createBucket(), insertType, objectToNodeDistance); root = new LeafNode(objectDistances, bucketDispatcher.createBucket(), insertType, objectToNodeDistance);
return new MHTree(this); return new MHTree(this);
} }
System.out.println("Creating leaf nodes...");
createLeafNodes(bucketCapacity); createLeafNodes(bucketCapacity);
System.out.println("Matrix of node distances...");
nodeDistances = new PrecomputedNodeDistances(); nodeDistances = new PrecomputedNodeDistances();
System.out.println("Creatin MH-tree...");
root = createRoot(arity); root = createRoot(arity);
objectDistances = null; objectDistances = null;
System.gc();
return new MHTree(this); return new MHTree(this);
} }
...@@ -638,12 +885,12 @@ public class MHTree extends Algorithm implements Serializable { ...@@ -638,12 +885,12 @@ public class MHTree extends Algorithm implements Serializable {
// Select the rest of the objects up to the total of bucketCapacity with respect to the building of a hull // Select the rest of the objects up to the total of bucketCapacity with respect to the building of a hull
objectIndices.addAll(findClosestObjects(furthestIndex, bucketCapacity - 1, notProcessedObjectIndices, objectDistances)); objectIndices.addAll(findClosestObjects(furthestIndex, bucketCapacity - 1, notProcessedObjectIndices, objectDistances));
List<LocalAbstractObject> objects = objectIndices List<LocalAbstractObject> objs = objectIndices
.stream() .stream()
.map(objectDistances::getObject) .map(objectDistances::getObject)
.collect(Collectors.toList()); .collect(Collectors.toList());
nodes[nodeIndex] = new LeafNode(objectDistances.getSubset(objects), bucketDispatcher.createBucket(), insertType, objectToNodeDistance); nodes[nodeIndex] = new LeafNode(objectDistances.getSubset(objs), bucketDispatcher.createBucket(), insertType, objectToNodeDistance);
} }
} }
...@@ -658,7 +905,8 @@ public class MHTree extends Algorithm implements Serializable { ...@@ -658,7 +905,8 @@ public class MHTree extends Algorithm implements Serializable {
int closestNodeIndex = -1; int closestNodeIndex = -1;
for (int candidateIndex = 0; candidateIndex < nodes.length; candidateIndex++) { for (int candidateIndex = 0; candidateIndex < nodes.length; candidateIndex++) {
double distance = NEAREST.getDistance(object, nodes[candidateIndex], objectDistances); double distance = ObjectToNodeDistance.NEAREST.getDistance(object, nodes[candidateIndex], objectDistances);
// Vlasta: NOT THIS!!!! >>> double distance = objectToNodeDistance.getDistance(object, nodes[candidateIndex], objectDistances);
if (distance < minDistance) { if (distance < minDistance) {
minDistance = distance; minDistance = distance;
...@@ -692,7 +940,8 @@ public class MHTree extends Algorithm implements Serializable { ...@@ -692,7 +940,8 @@ public class MHTree extends Algorithm implements Serializable {
* @param nodeIndices a list of indices which are merged together with {@code parentNodeIndex} into a new node * @param nodeIndices a list of indices which are merged together with {@code parentNodeIndex} into a new node
*/ */
private void mergeNodes(int parentNodeIndex, List<Integer> nodeIndices) { private void mergeNodes(int parentNodeIndex, List<Integer> nodeIndices) {
if (nodeIndices.size() == 0) return; if (nodeIndices.isEmpty())
return;
nodeIndices.add(parentNodeIndex); nodeIndices.add(parentNodeIndex);
......
...@@ -97,7 +97,7 @@ public class RunBenchmark { ...@@ -97,7 +97,7 @@ public class RunBenchmark {
long buildinTime = System.currentTimeMillis() - buildingStartTimeStamp; long buildinTime = System.currentTimeMillis() - buildingStartTimeStamp;
//OperationStatistics.getLocalThreadStatistics().printStatistics(); //OperationStatistics.getLocalThreadStatistics().printStatistics();
mhTree.printStatistics(); mhTree.printStatistics();
//System.out.println("Fat factor: " + mhTree.getFatFactor()); // System.out.println("Fat factor: " + mhTree.getFatFactor());
System.out.println("Building time: " + buildinTime + " msec"); System.out.println("Building time: " + buildinTime + " msec");
System.out.println("kNN queries will be executed for k=" + Arrays.toString(ks)); System.out.println("kNN queries will be executed for k=" + Arrays.toString(ks));
...@@ -125,18 +125,18 @@ public class RunBenchmark { ...@@ -125,18 +125,18 @@ public class RunBenchmark {
final int approxLimit = Math.round((float) objects.size() * (float) percentage / 100f); final int approxLimit = Math.round((float) objects.size() * (float) percentage / 100f);
approxOperations approxOperations
.parallelStream() .parallelStream()
.filter(op -> ((SearchState) op.suppData).recall != 1d) .filter(op -> ((SearchState) op.suppData).recall < 1d)
.forEach(op -> { .forEach(op -> {
SearchState searchState = (SearchState) op.suppData; SearchState searchState = (SearchState) op.suppData;
searchState.approximateState.limit = approxLimit;
try { try {
//mhTree.approxKNN(op); //mhTree.approxKNN(op);
mhTree.executeOperation(op); mhTree.executeOperation(op);
} catch (AlgorithmMethodException | NoSuchMethodException ex) { } catch (AlgorithmMethodException | NoSuchMethodException ex) {
} }
searchState.time = op.getParameter("OperationTime", Long.class); searchState.time += op.getParameter("OperationTime", Long.class);
searchState.recall = PerformanceMeasures.measureRecall(op, kNNResults); searchState.recall = PerformanceMeasures.measureRecall(op, kNNResults);
searchState.approximateState.limit = approxLimit; // Vlasta: NO!!! Because we test it incrementally!!!! See the search state of operation! op.resetAnswer();
op.resetAnswer();
}); });
Stats recallStats = new Stats( Stats recallStats = new Stats(
...@@ -217,13 +217,11 @@ public class RunBenchmark { ...@@ -217,13 +217,11 @@ public class RunBenchmark {
.forEach(op -> { .forEach(op -> {
SearchInfoState searchState = (SearchInfoState) op.suppData; SearchInfoState searchState = (SearchInfoState) op.suppData;
try { try {
//mhTree.approxKNN(op);
mTree.executeOperation(op); mTree.executeOperation(op);
} catch (AlgorithmMethodException | NoSuchMethodException ex) { } catch (AlgorithmMethodException | NoSuchMethodException ex) {
} }
searchState.time = op.getParameter("OperationTime", Long.class); searchState.time = op.getParameter("OperationTime", Long.class);
searchState.recall = PerformanceMeasures.measureRecall(op, kNNResults); searchState.recall = PerformanceMeasures.measureRecall(op, kNNResults);
//searchState.approximateState.limit += Math.round((float) objects.size() * (float) percentageStep / 100f);
}); });
Stats recallStats = new Stats( Stats recallStats = new Stats(
...@@ -238,23 +236,6 @@ public class RunBenchmark { ...@@ -238,23 +236,6 @@ public class RunBenchmark {
.map(op -> (double)((SearchInfoState) op.suppData).time) .map(op -> (double)((SearchInfoState) op.suppData).time)
.collect(Collectors.toList()) .collect(Collectors.toList())
); );
// for (int i = 0; i < numberOfQueries; i++) {
// if (recalls.get(i) != 1.0) {
// ApproxKNNQueryOperation operation = new ApproxKNNQueryOperation(
// queries.get(i),
// k,
// percentage,
// Approximate.LocalSearchType.PERCENTAGE,
// LocalAbstractObject.UNKNOWN_DISTANCE
// );
//
// mTree.executeOperation(operation);
//
// times.set(i, (double)operation.getParameter("OperationTime", Long.class));
// recalls.set(i, PerformanceMeasures.measureRecall(operation, mTree));
// }
// }
System.out.println(String.join(",", System.out.println(String.join(",",
String.valueOf(config.leafCapacity), String.valueOf(config.leafCapacity),
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment