Loading pom.xml +1 −1 Original line number Diff line number Diff line Loading @@ -4,7 +4,7 @@ <groupId>mindex</groupId> <artifactId>ppp-codes</artifactId> <version>1.3.4-DEVEL</version> <version>1.4.0-DEVEL</version> <packaging>jar</packaging> <name>ppp-codes</name> Loading src/main/java/pppcodes/algorithms/PPPCodeAlgorithm.java +7 −1 Original line number Diff line number Diff line Loading @@ -44,6 +44,7 @@ import messif.operations.data.DeleteOperation; import messif.operations.data.InsertOperation; import messif.operations.query.ApproxKNNQueryOperation; import messif.operations.GetCandidateSetOperation; import messif.operations.GetJoinCandidatesOperation; import messif.operations.query.GetObjectCountOperation; import messif.operations.query.GetRandomObjectsQueryOperation; import mindex.algorithms.MIndexAlgorithm; Loading @@ -54,6 +55,7 @@ import pppcodes.index.PPPCodeInternalCell; import pppcodes.index.persistent.PPPCodeLeafCellFile; import pppcodes.processors.ApproxNavProcessorCandSet; import pppcodes.processors.ApproxNavProcessorNorefine; import pppcodes.processors.GetJoinCandidatesProcessor; import pppcodes.processors.GetRandomNoDataObjectsNavigationProcessor; /** Loading Loading @@ -212,7 +214,7 @@ public class PPPCodeAlgorithm extends MultipleOverlaysAlgorithm { /** Pre-created list of supported operations. */ private final static List<Class<? extends AbstractOperation>> supportedOperations = Collections.unmodifiableList(Arrays.asList(BulkInsertOperation.class, InsertOperation.class, DeleteOperation.class, ApproxKNNQueryOperation.class, GetCandidateSetOperation.class, GetRandomObjectsQueryOperation.class, GetObjectCountOperation.class)); ApproxKNNQueryOperation.class, GetCandidateSetOperation.class, GetRandomObjectsQueryOperation.class, GetObjectCountOperation.class, GetJoinCandidatesOperation.class)); @Override public List<Class<? extends AbstractOperation>> getSupportedOperations() { Loading Loading @@ -252,6 +254,10 @@ public class PPPCodeAlgorithm extends MultipleOverlaysAlgorithm { return new GetRandomNoDataObjectsNavigationProcessor((GetRandomObjectsQueryOperation) operation, algorithms.get(0).getmIndex().getVoronoiCellTree(), locatorConvertor); } if (operation instanceof GetJoinCandidatesOperation) { return new GetJoinCandidatesProcessor((GetJoinCandidatesOperation) operation, algorithms.get(0).getmIndex(), locatorConvertor); } return null; } Loading src/main/java/pppcodes/index/PPPCodeReadWriter.java +0 −1 Original line number Diff line number Diff line Loading @@ -19,7 +19,6 @@ package pppcodes.index; import java.nio.BufferOverflowException; import java.nio.BufferUnderflowException; import java.util.Arrays; import mindex.MetricIndexes; import mindex.distance.PartialQueryPPPDistanceCalculator; import mindex.distance.QueryPPPDistanceCalculator; import pppcodes.PPPCodeIndex; Loading src/main/java/pppcodes/processors/GetJoinCandidatesProcessor.java 0 → 100644 +97 −0 Original line number Diff line number Diff line package pppcodes.processors; import java.util.ArrayList; import java.util.Iterator; import java.util.Map; import messif.algorithms.AlgorithmMethodException; import messif.algorithms.impl.OneStepNavigationProcessor; import messif.objects.util.AbstractObjectIterator; import messif.operations.GetJoinCandidatesOperation; import mindex.navigation.VoronoiCell; import mindex.navigation.VoronoiInternalCell; import mindex.navigation.VoronoiLeafCell; import pppcodes.PPPCodeIndex; import pppcodes.ids.LocatorStringIntConvertor; import pppcodes.index.PPPCodeObject; /** * This processor finds sets of object IDs that are candidates for similarity join. Specifically, * a candidate set is always formed by objects within the same PPP-Tree cell on the lowest level. * * @author xnovak8 */ public class GetJoinCandidatesProcessor extends OneStepNavigationProcessor<GetJoinCandidatesOperation> { /** PPP-Codes store object IDs as integers and this converts them back to Strings. */ protected final LocatorStringIntConvertor locatorTranslator; /** The PPP-Tree to find the candidate sets for similarity join. */ protected final PPPCodeIndex pppCodeIndex; /** The maximal number of candidate sets to be returned (used mainly for testing purposes). */ protected int maxSets = Integer.MAX_VALUE; /** The actual number of candidate sets returned. */ protected int setsGenerated = 0; /** * Creates new processor to get random objects from given PPP-Tree. * @param operation encapsulating operation * @param pppCodeIndex the main PPP-Code index object (configuration and dynamic PPP-Trie) * @param locTranslator translator of internal integer IDs to String locators */ public GetJoinCandidatesProcessor(GetJoinCandidatesOperation operation, PPPCodeIndex pppCodeIndex, LocatorStringIntConvertor locTranslator) { super(operation); this.pppCodeIndex = pppCodeIndex; this.locatorTranslator = locTranslator; maxSets = operation.getParameter(GetJoinCandidatesOperation.MAX_SETS_PARAM, Integer.class, Integer.MAX_VALUE); } @Override protected void process() throws AlgorithmMethodException { // first, consolidate the data in all leafs so that all locators corresponding to one PPP-Code are stored together pppCodeIndex.consolidateTreeData(); processOnInternal(pppCodeIndex.getVoronoiCellTree()); getOperation().setParameter("CAND_SET_COUNT", setsGenerated); } /** * Recursively calls processing on all child nodes of given internal node. * @param internal PPP-Tree internal node to call the processing on */ protected void processOnInternal(VoronoiInternalCell<PPPCodeObject> internal) { Iterator<Map.Entry<Short, VoronoiCell<PPPCodeObject>>> childIt = internal.getChildNodes(); while (childIt.hasNext() && setsGenerated < maxSets) { VoronoiCell<PPPCodeObject> nextNode = childIt.next().getValue(); if (nextNode instanceof VoronoiLeafCell) { processOnLeaf((VoronoiLeafCell) nextNode); } if (nextNode instanceof VoronoiInternalCell) { processOnInternal((VoronoiInternalCell) nextNode); } } } /** * Take data from the bottom leafs that make the candidate sets. * @param leaf PPP-Tree leaf to read candidate sets from */ protected void processOnLeaf(VoronoiLeafCell<PPPCodeObject> leaf) { AbstractObjectIterator<PPPCodeObject> allObjects = leaf.getAllObjects(); while (allObjects.hasNext() && setsGenerated < maxSets) { PPPCodeObject objectsWithOnePPP = allObjects.next(); if (objectsWithOnePPP.getLocatorCount() < 2) { continue; } ArrayList<String> candidateIds = new ArrayList<>(objectsWithOnePPP.getLocatorCount()); for (int intId : objectsWithOnePPP.getLocators()) { candidateIds.add(locatorTranslator.getStringLocator(intId)); } operation.addList(candidateIds); setsGenerated ++; } } } Loading
pom.xml +1 −1 Original line number Diff line number Diff line Loading @@ -4,7 +4,7 @@ <groupId>mindex</groupId> <artifactId>ppp-codes</artifactId> <version>1.3.4-DEVEL</version> <version>1.4.0-DEVEL</version> <packaging>jar</packaging> <name>ppp-codes</name> Loading
src/main/java/pppcodes/algorithms/PPPCodeAlgorithm.java +7 −1 Original line number Diff line number Diff line Loading @@ -44,6 +44,7 @@ import messif.operations.data.DeleteOperation; import messif.operations.data.InsertOperation; import messif.operations.query.ApproxKNNQueryOperation; import messif.operations.GetCandidateSetOperation; import messif.operations.GetJoinCandidatesOperation; import messif.operations.query.GetObjectCountOperation; import messif.operations.query.GetRandomObjectsQueryOperation; import mindex.algorithms.MIndexAlgorithm; Loading @@ -54,6 +55,7 @@ import pppcodes.index.PPPCodeInternalCell; import pppcodes.index.persistent.PPPCodeLeafCellFile; import pppcodes.processors.ApproxNavProcessorCandSet; import pppcodes.processors.ApproxNavProcessorNorefine; import pppcodes.processors.GetJoinCandidatesProcessor; import pppcodes.processors.GetRandomNoDataObjectsNavigationProcessor; /** Loading Loading @@ -212,7 +214,7 @@ public class PPPCodeAlgorithm extends MultipleOverlaysAlgorithm { /** Pre-created list of supported operations. */ private final static List<Class<? extends AbstractOperation>> supportedOperations = Collections.unmodifiableList(Arrays.asList(BulkInsertOperation.class, InsertOperation.class, DeleteOperation.class, ApproxKNNQueryOperation.class, GetCandidateSetOperation.class, GetRandomObjectsQueryOperation.class, GetObjectCountOperation.class)); ApproxKNNQueryOperation.class, GetCandidateSetOperation.class, GetRandomObjectsQueryOperation.class, GetObjectCountOperation.class, GetJoinCandidatesOperation.class)); @Override public List<Class<? extends AbstractOperation>> getSupportedOperations() { Loading Loading @@ -252,6 +254,10 @@ public class PPPCodeAlgorithm extends MultipleOverlaysAlgorithm { return new GetRandomNoDataObjectsNavigationProcessor((GetRandomObjectsQueryOperation) operation, algorithms.get(0).getmIndex().getVoronoiCellTree(), locatorConvertor); } if (operation instanceof GetJoinCandidatesOperation) { return new GetJoinCandidatesProcessor((GetJoinCandidatesOperation) operation, algorithms.get(0).getmIndex(), locatorConvertor); } return null; } Loading
src/main/java/pppcodes/index/PPPCodeReadWriter.java +0 −1 Original line number Diff line number Diff line Loading @@ -19,7 +19,6 @@ package pppcodes.index; import java.nio.BufferOverflowException; import java.nio.BufferUnderflowException; import java.util.Arrays; import mindex.MetricIndexes; import mindex.distance.PartialQueryPPPDistanceCalculator; import mindex.distance.QueryPPPDistanceCalculator; import pppcodes.PPPCodeIndex; Loading
src/main/java/pppcodes/processors/GetJoinCandidatesProcessor.java 0 → 100644 +97 −0 Original line number Diff line number Diff line package pppcodes.processors; import java.util.ArrayList; import java.util.Iterator; import java.util.Map; import messif.algorithms.AlgorithmMethodException; import messif.algorithms.impl.OneStepNavigationProcessor; import messif.objects.util.AbstractObjectIterator; import messif.operations.GetJoinCandidatesOperation; import mindex.navigation.VoronoiCell; import mindex.navigation.VoronoiInternalCell; import mindex.navigation.VoronoiLeafCell; import pppcodes.PPPCodeIndex; import pppcodes.ids.LocatorStringIntConvertor; import pppcodes.index.PPPCodeObject; /** * This processor finds sets of object IDs that are candidates for similarity join. Specifically, * a candidate set is always formed by objects within the same PPP-Tree cell on the lowest level. * * @author xnovak8 */ public class GetJoinCandidatesProcessor extends OneStepNavigationProcessor<GetJoinCandidatesOperation> { /** PPP-Codes store object IDs as integers and this converts them back to Strings. */ protected final LocatorStringIntConvertor locatorTranslator; /** The PPP-Tree to find the candidate sets for similarity join. */ protected final PPPCodeIndex pppCodeIndex; /** The maximal number of candidate sets to be returned (used mainly for testing purposes). */ protected int maxSets = Integer.MAX_VALUE; /** The actual number of candidate sets returned. */ protected int setsGenerated = 0; /** * Creates new processor to get random objects from given PPP-Tree. * @param operation encapsulating operation * @param pppCodeIndex the main PPP-Code index object (configuration and dynamic PPP-Trie) * @param locTranslator translator of internal integer IDs to String locators */ public GetJoinCandidatesProcessor(GetJoinCandidatesOperation operation, PPPCodeIndex pppCodeIndex, LocatorStringIntConvertor locTranslator) { super(operation); this.pppCodeIndex = pppCodeIndex; this.locatorTranslator = locTranslator; maxSets = operation.getParameter(GetJoinCandidatesOperation.MAX_SETS_PARAM, Integer.class, Integer.MAX_VALUE); } @Override protected void process() throws AlgorithmMethodException { // first, consolidate the data in all leafs so that all locators corresponding to one PPP-Code are stored together pppCodeIndex.consolidateTreeData(); processOnInternal(pppCodeIndex.getVoronoiCellTree()); getOperation().setParameter("CAND_SET_COUNT", setsGenerated); } /** * Recursively calls processing on all child nodes of given internal node. * @param internal PPP-Tree internal node to call the processing on */ protected void processOnInternal(VoronoiInternalCell<PPPCodeObject> internal) { Iterator<Map.Entry<Short, VoronoiCell<PPPCodeObject>>> childIt = internal.getChildNodes(); while (childIt.hasNext() && setsGenerated < maxSets) { VoronoiCell<PPPCodeObject> nextNode = childIt.next().getValue(); if (nextNode instanceof VoronoiLeafCell) { processOnLeaf((VoronoiLeafCell) nextNode); } if (nextNode instanceof VoronoiInternalCell) { processOnInternal((VoronoiInternalCell) nextNode); } } } /** * Take data from the bottom leafs that make the candidate sets. * @param leaf PPP-Tree leaf to read candidate sets from */ protected void processOnLeaf(VoronoiLeafCell<PPPCodeObject> leaf) { AbstractObjectIterator<PPPCodeObject> allObjects = leaf.getAllObjects(); while (allObjects.hasNext() && setsGenerated < maxSets) { PPPCodeObject objectsWithOnePPP = allObjects.next(); if (objectsWithOnePPP.getLocatorCount() < 2) { continue; } ArrayList<String> candidateIds = new ArrayList<>(objectsWithOnePPP.getLocatorCount()); for (int intId : objectsWithOnePPP.getLocators()) { candidateIds.add(locatorTranslator.getStringLocator(intId)); } operation.addList(candidateIds); setsGenerated ++; } } }