Commit 98bc14aa authored by David Novak's avatar David Novak
Browse files

* changes to process similarity join (get the candidate set for it)

parent 03be6a78
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -4,7 +4,7 @@

    <groupId>mindex</groupId>
    <artifactId>ppp-codes</artifactId>
    <version>1.3.4-DEVEL</version>
    <version>1.4.0-DEVEL</version>
    <packaging>jar</packaging>

    <name>ppp-codes</name>
+7 −1
Original line number Diff line number Diff line
@@ -44,6 +44,7 @@ import messif.operations.data.DeleteOperation;
import messif.operations.data.InsertOperation;
import messif.operations.query.ApproxKNNQueryOperation;
import messif.operations.GetCandidateSetOperation;
import messif.operations.GetJoinCandidatesOperation;
import messif.operations.query.GetObjectCountOperation;
import messif.operations.query.GetRandomObjectsQueryOperation;
import mindex.algorithms.MIndexAlgorithm;
@@ -54,6 +55,7 @@ import pppcodes.index.PPPCodeInternalCell;
import pppcodes.index.persistent.PPPCodeLeafCellFile;
import pppcodes.processors.ApproxNavProcessorCandSet;
import pppcodes.processors.ApproxNavProcessorNorefine;
import pppcodes.processors.GetJoinCandidatesProcessor;
import pppcodes.processors.GetRandomNoDataObjectsNavigationProcessor;

/**
@@ -212,7 +214,7 @@ public class PPPCodeAlgorithm extends MultipleOverlaysAlgorithm {
    /** Pre-created list of supported operations. */
    private final static List<Class<? extends AbstractOperation>> supportedOperations = 
            Collections.unmodifiableList(Arrays.asList(BulkInsertOperation.class, InsertOperation.class, DeleteOperation.class, 
                ApproxKNNQueryOperation.class, GetCandidateSetOperation.class, GetRandomObjectsQueryOperation.class, GetObjectCountOperation.class));
                ApproxKNNQueryOperation.class, GetCandidateSetOperation.class, GetRandomObjectsQueryOperation.class, GetObjectCountOperation.class, GetJoinCandidatesOperation.class));

    @Override
    public List<Class<? extends AbstractOperation>> getSupportedOperations() {
@@ -252,6 +254,10 @@ public class PPPCodeAlgorithm extends MultipleOverlaysAlgorithm {
            return new GetRandomNoDataObjectsNavigationProcessor((GetRandomObjectsQueryOperation) operation, 
                    algorithms.get(0).getmIndex().getVoronoiCellTree(), locatorConvertor);
        }
        
        if (operation instanceof GetJoinCandidatesOperation) {
            return new GetJoinCandidatesProcessor((GetJoinCandidatesOperation) operation, algorithms.get(0).getmIndex(), locatorConvertor);
        }
        return null;
    }
    
+0 −1
Original line number Diff line number Diff line
@@ -19,7 +19,6 @@ package pppcodes.index;
import java.nio.BufferOverflowException;
import java.nio.BufferUnderflowException;
import java.util.Arrays;
import mindex.MetricIndexes;
import mindex.distance.PartialQueryPPPDistanceCalculator;
import mindex.distance.QueryPPPDistanceCalculator;
import pppcodes.PPPCodeIndex;
+97 −0
Original line number Diff line number Diff line

package pppcodes.processors;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.Map;
import messif.algorithms.AlgorithmMethodException;
import messif.algorithms.impl.OneStepNavigationProcessor;
import messif.objects.util.AbstractObjectIterator;
import messif.operations.GetJoinCandidatesOperation;
import mindex.navigation.VoronoiCell;
import mindex.navigation.VoronoiInternalCell;
import mindex.navigation.VoronoiLeafCell;
import pppcodes.PPPCodeIndex;
import pppcodes.ids.LocatorStringIntConvertor;
import pppcodes.index.PPPCodeObject;

/**
 * This processor finds sets of object IDs that are candidates for similarity join. Specifically,
 *  a candidate set is always formed by objects within the same PPP-Tree cell on the lowest level.
 * 
 * @author xnovak8
 */
public class GetJoinCandidatesProcessor extends OneStepNavigationProcessor<GetJoinCandidatesOperation> {

    /** PPP-Codes store object IDs as integers and this converts them back to Strings. */
    protected final LocatorStringIntConvertor locatorTranslator;
    
    /** The PPP-Tree to find the candidate sets for similarity join. */
    protected final PPPCodeIndex pppCodeIndex;

    /** The maximal number of candidate sets to be returned (used mainly for testing purposes). */
    protected int maxSets = Integer.MAX_VALUE;
    
    /** The actual number of candidate sets returned. */
    protected int setsGenerated = 0;
    
    /**
     * Creates new processor to get random objects from given PPP-Tree.
     * @param operation encapsulating operation
     * @param pppCodeIndex the main PPP-Code index object (configuration and dynamic PPP-Trie)
     * @param locTranslator translator of internal integer IDs to String locators
     */
    public GetJoinCandidatesProcessor(GetJoinCandidatesOperation operation, PPPCodeIndex pppCodeIndex, LocatorStringIntConvertor locTranslator) {
        super(operation);
        this.pppCodeIndex = pppCodeIndex;
        this.locatorTranslator = locTranslator;
        maxSets = operation.getParameter(GetJoinCandidatesOperation.MAX_SETS_PARAM, Integer.class, Integer.MAX_VALUE);
    }    

    @Override
    protected void process() throws AlgorithmMethodException {
        // first, consolidate the data in all leafs so that all locators corresponding to one PPP-Code are stored together
        pppCodeIndex.consolidateTreeData();
        
        processOnInternal(pppCodeIndex.getVoronoiCellTree());
        getOperation().setParameter("CAND_SET_COUNT", setsGenerated);
    }

    /**
     * Recursively calls processing on all child nodes of given internal node.
     * @param internal PPP-Tree internal node to call the processing on
     */
    protected void processOnInternal(VoronoiInternalCell<PPPCodeObject> internal) {
        Iterator<Map.Entry<Short, VoronoiCell<PPPCodeObject>>> childIt = internal.getChildNodes();
        while (childIt.hasNext() && setsGenerated < maxSets) {
            VoronoiCell<PPPCodeObject> nextNode = childIt.next().getValue();
            if (nextNode instanceof VoronoiLeafCell) {
                processOnLeaf((VoronoiLeafCell) nextNode);
            }
            if (nextNode instanceof VoronoiInternalCell) {
                processOnInternal((VoronoiInternalCell) nextNode);
            }
        }        
    }
    
    /**
     * Take data from the bottom leafs that make the candidate sets.
     * @param leaf PPP-Tree leaf to read candidate sets from 
     */
    protected void processOnLeaf(VoronoiLeafCell<PPPCodeObject> leaf) {
        AbstractObjectIterator<PPPCodeObject> allObjects = leaf.getAllObjects();
        while (allObjects.hasNext() && setsGenerated < maxSets) {
            PPPCodeObject objectsWithOnePPP = allObjects.next();
            if (objectsWithOnePPP.getLocatorCount() < 2) {
                continue;
            }
            ArrayList<String> candidateIds = new ArrayList<>(objectsWithOnePPP.getLocatorCount());
            for (int intId : objectsWithOnePPP.getLocators()) {
                candidateIds.add(locatorTranslator.getStringLocator(intId));
            }
            operation.addList(candidateIds);
            setsGenerated ++;
        }
    }

}