Commit f585677b authored by David Novak's avatar David Novak
Browse files

* implemented delete operation:

   - deleted objects (IDs) are marked as deleted within an array of indexes in the leaf node
   - these deleted indexes are NOT used for "getAllObjects"
   - they are only applied during the "consolidate" calling (which is now trigerred during every serialization of the leaf)
parent deade706
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -4,7 +4,7 @@

    <groupId>mindex</groupId>
    <artifactId>ppp-codes</artifactId>
    <version>1.2.10-DEVEL</version>
    <version>1.2.11-DEVEL</version>
    <packaging>jar</packaging>

    <name>ppp-codes</name>
+1 −4
Original line number Diff line number Diff line
@@ -27,8 +27,6 @@ import java.util.concurrent.atomic.AtomicLong;
import messif.algorithms.AlgorithmMethodException;
import messif.algorithms.NavigationProcessor;
import messif.buckets.BucketDispatcher;
import messif.buckets.storage.LongStorage;
import messif.buckets.storage.impl.DiskStorage;
import messif.objects.LocalAbstractObject;
import messif.operations.AbstractOperation;
import messif.operations.data.BulkInsertOperation;
@@ -42,7 +40,6 @@ import mindex.processors.DeleteOperationNavigationProcessor;
import mindex.processors.InsertOperationNavigationProcessor;
import pppcodes.index.PPPCodeObject;
import pppcodes.index.PPPCodeReadWriter;
import pppcodes.index.persistent.PPPCodeLeafCellFile;

/**
 * The full configuration and some functionality of one PPP-Code tree index. It extends the M-Index because
@@ -196,7 +193,7 @@ public class PPPCodeIndex extends MetricIndex {
                return InsertOperationNavigationProcessor.getInstance(operation, this);
            }
            if (operation instanceof DeleteOperation) {
                return DeleteOperationNavigationProcessor.getInstance((DeleteOperation) operation, this);
                return new DeleteOperationNavigationProcessor((DeleteOperation) operation, this);
            }
        } catch (AlgorithmMethodException e) {
            throw new IllegalStateException(e);
+4 −0
Original line number Diff line number Diff line
@@ -73,6 +73,10 @@ public class PPPCodeSingleAlgorithmFile extends PPPCodeSingleAlgorithm {
        return (PPPCodeIndexFile) mIndex;
    }
    
    /**
     * Sets the disk storage for the Voronoi tree leaves to this PPP-Codes algorithm.
     * @param leafStorage disk storage to manage the Voronoi tree leaves
     */
    public void setLeafStorage(DiskStorage<PPPCodeLeafCellFile.PPPCodeLeafData> leafStorage) {
        getmIndex().setLeafStorage(leafStorage);
    }    
+11 −6
Original line number Diff line number Diff line
@@ -85,6 +85,8 @@ public class PPPCodeInternalCell extends VoronoiInternalCell {
    // ************************    Data manipulation     *************************** //
    
    public void consolidateData(byte [] temporary) {
        readLock();
        try {
            for (Iterator<Map.Entry<Short, VoronoiCell>> childNodes = getChildNodes(); childNodes.hasNext(); ) {
                VoronoiCell child = childNodes.next().getValue();
                if (child instanceof PPPCodeInternalCell) {
@@ -93,6 +95,9 @@ public class PPPCodeInternalCell extends VoronoiInternalCell {
                    ((PPPCodeLeafCell) child).consolidateData(temporary);
                }
            }
        } finally {
            readUnLock();
        }
    }
    
}
+172 −66
Original line number Diff line number Diff line
@@ -75,6 +75,12 @@ public class PPPCodeLeafCell extends VoronoiLeafCell<PPPCodeObject> implements M
    /** New write buffer addressed in bits */
    protected volatile transient ByteBufferBits writeBufferBits;
    
    /** 
     * Set of locators (IDs) of deleted objects; it is kept in memory and during serialization (or
     * compaction) these IDs are removed from the actual data. 
     */
    protected volatile transient int [] deletedIndexes;
    
    /**
     * Constructor given all parameters mandatory for the parent class. The storage and the covered interval are set to null.
     * @param mIndex M-Index logic
@@ -99,6 +105,12 @@ public class PPPCodeLeafCell extends VoronoiLeafCell<PPPCodeObject> implements M
    }
    
    private void writeObject(java.io.ObjectOutputStream out) throws IOException {
        readLock();
        try {
            if (! isValid()) {
                throw new IOException("Trying to serialize leaf cell which is not valid");
            }
            consolidateData(null);
            // write position in bits
            out.writeInt(getOccupationInBits());
            if ((writeBufferBits != null) && (writeBufferBits.position() > 0)) {
@@ -108,6 +120,9 @@ public class PPPCodeLeafCell extends VoronoiLeafCell<PPPCodeObject> implements M
                    out.write(data);
                }
            }
        } finally {
            readUnLock();
        }
    }

    private void readObject(java.io.ObjectInputStream in) throws IOException, ClassNotFoundException {
@@ -251,7 +266,7 @@ public class PPPCodeLeafCell extends VoronoiLeafCell<PPPCodeObject> implements M
     */
    @Override
    public synchronized SplitConfiguration insertObjects(List<PPPCodeObject> objects) throws AlgorithmMethodException {
        // WARNING: THIS METHOD ASSUMES THAT THERE IS ONLY ONE LOCATOR IN EACH OBJECT
        // THIS METHOD ASSUMES THAT THERE IS ONLY ONE LOCATOR IN EACH OBJECT
        if (! ensureWriteOf(objects, getLevel())) {
            return new SplitConfiguration(false, 0, objects, null);
        }
@@ -310,6 +325,7 @@ public class PPPCodeLeafCell extends VoronoiLeafCell<PPPCodeObject> implements M
     *  then it is not used
     * @param thisNodePPP this PPP is used as upper (higher) PP indexes of the PPPs of the returned data objects; 
     *   if null, the data objects are simply read from the memory and returned
     * @param topDistance
     * @return iterator over all objects in the storage
     */    
    protected AbstractObjectIterator<? extends PPPCodeObject> getAllObjects(final QueryPPPDistanceCalculator calculator, final short [] thisNodePPP, final float topDistance) {
@@ -433,14 +449,93 @@ public class PPPCodeLeafCell extends VoronoiLeafCell<PPPCodeObject> implements M
    }    
    
    /**
     * Method that deletes given object from corresponding Bucket. If an instance
     * of SplitConfiguration is returned, the upper layer should merge this LeafCell.
     * Deletes given object from this leaf according to locator (ID).
     * @param object to be deleted
     * @param deleteLimit maximum number of objects that match given object to be deleted
     * @param checkLocator if true, then the object is deleted according to object locator
     * @return number of objects actually deleted by this method
     * @throws AlgorithmMethodException 
     */
    @Override
    public void deleteObject(PPPCodeObject object, int deleteLimit, boolean checkLocator) throws AlgorithmMethodException {
        throw new AlgorithmMethodException("delete on PPP-Code algorithm is not implemented yet");
    public int deleteObject(PPPCodeObject object, int deleteLimit, boolean checkLocator) throws AlgorithmMethodException {
        Set<Integer> locatorsToDelete = new HashSet<>(object.getLocatorCount());
        // the sent object can theoretically contain more than one locator
        for (int loc : object.getLocators()) {
            locatorsToDelete.add(loc);
        }
        
        // iterate over all stored objects and try to find the locator(s) to remove
        short [] pppSuffix = Arrays.copyOfRange(object.getPppForReading(), getLevel(), object.getPPPLength());

        int locIndex = 0;
        for (Iterator<PPPCodeObject> allObjs = getAllObjects(); allObjs.hasNext();) {
            PPPCodeObject next = allObjs.next();
            if (Arrays.equals(pppSuffix, next.getPppForReading())) {                
                for (int loc : next.getLocators()) {
                    // if we have deleted all locators to be deleted, end the method
                    if (locatorsToDelete.isEmpty()) {
                        objectCount -= object.getLocatorCount();
                        return object.getLocatorCount();
                    }
                    if (locatorsToDelete.contains(loc)) {
                        addDeletedIndex(locIndex);
                        locatorsToDelete.remove(loc);
                    }
                    locIndex ++;
                }
            } else {
                locIndex += next.getLocatorCount();
            }
        }
        objectCount -= object.getLocatorCount() - locatorsToDelete.size();
        return object.getLocatorCount() - locatorsToDelete.size();
    }

    /**
     * Mark given locator as "deleted" by inserting it into the sorted array {@link #deletedIndexes}.
     * @param index index (order) of the object that was deleted
     */
    protected void addDeletedIndex(int index) {
        if (deletedIndexes == null) {
            deletedIndexes = new int [] {index};
        } else {
            deletedIndexes = Arrays.copyOf(deletedIndexes, deletedIndexes.length + 1);
            int i = 0;
            while (i < deletedIndexes.length - 1 && deletedIndexes[i] < index) {
                i++;
            }
            if (i < deletedIndexes.length - 1) {
                System.arraycopy(deletedIndexes, i, deletedIndexes, i+1, deletedIndexes.length - i - 1);
            }
            deletedIndexes[i] = index;
        }
    }
    
    
    /**
     * Filters out the deleted locators out from given object with locators. 
     * @param firstLocatorIndex index of the first locator in the passed object within all leaf objects
     * @param object object to remove locators from
     * @return object with some locators filtered out
     */
    private PPPCodeObject removeDeletedIndexes(int firstLocatorIndex, PPPCodeObject object) {
        // remove the deleted objects
        if (deletedIndexes == null || deletedIndexes[0] >= firstLocatorIndex + object.getLocatorCount()) {
            return object;
        }
        int [] clearedLocs = object.getLocators();
        int deleted = 0;
        while (deleted < deletedIndexes.length &&  deletedIndexes[deleted] < firstLocatorIndex + object.getLocatorCount()) {
            int indexToRemove = deletedIndexes[deleted] - firstLocatorIndex - deleted;
            System.arraycopy(clearedLocs, indexToRemove + 1, clearedLocs, indexToRemove, clearedLocs.length - indexToRemove - 1);
            deleted ++;
        }
        if (deleted >= deletedIndexes.length) {
            deletedIndexes = null;
        } else {
            deletedIndexes = Arrays.copyOfRange(deletedIndexes, deleted, deletedIndexes.length);
        }
        return new PPPCodeObject(Arrays.copyOf(clearedLocs, clearedLocs.length - deleted), object.getPppForReading());
    }
        
    
@@ -456,6 +551,7 @@ public class PPPCodeLeafCell extends VoronoiLeafCell<PPPCodeObject> implements M
        if (! isValid()) {
            return false;
        }
        consolidateData(null);
        invalidateThisNode(split(getAllObjects(), 0));
        return true;
    }
@@ -513,17 +609,28 @@ public class PPPCodeLeafCell extends VoronoiLeafCell<PPPCodeObject> implements M

    // **********************************       Consolidation of the content      ************************** //
    
    protected void consolidateData(byte [] temporary) {
        writeLock();
        try {
            if (data == null) {
                return;
    /**
     * Merges together all data objects with the same PPP-Codes and filters out the deleted IDs.
     * @param temporary a buffer to be used for the consolidation (so that it does not have to be created for each leaf)
     * @return true, if the consolidation actually took place
     */
    protected boolean consolidateData(byte [] temporary) {
        // if the node is either empty, or it was not modified, it does not need any consolidation
        if (data == null || (writeBufferBits == null && deletedIndexes == null)) {
            return false;
        }
        if (temporary == null) {
            temporary = new byte[1024 * 1024]; 
        }
        
        // read all the objects and put together those with the same PPP
        Map<ShortArray, List<PPPCodeObject>> readObjects = new HashMap<>();
        int objIndex = 0;
        for (AbstractObjectIterator<PPPCodeObject> it = getAllObjects(); it.hasNext();) {
            PPPCodeObject next = it.next();
            int locCount = next.getLocatorCount();
            next = removeDeletedIndexes(objIndex, next);
            objIndex += locCount;
            ShortArray nextPPPCode = new ShortArray(next.getPppForReading());
            List<PPPCodeObject> existing = readObjects.get(nextPPPCode);
            if (existing == null) {
@@ -532,6 +639,7 @@ public class PPPCodeLeafCell extends VoronoiLeafCell<PPPCodeObject> implements M
            existing.add(next);
        }

        // create a single PPPCodeObject for all objects with the same PPP-Code
        List<PPPCodeObject> newObjects = new ArrayList<>(readObjects.size());
        for (List<PPPCodeObject> objsToMerge : readObjects.values()) {
            if (objsToMerge.size() == 1) {
@@ -569,9 +677,7 @@ public class PPPCodeLeafCell extends VoronoiLeafCell<PPPCodeObject> implements M
        data = Arrays.copyOf(temporary, temporaryBuffer.position());
        positionBits = temporaryBuffer.positionInBits();
        writeBufferBits = null;
        } finally {
            writeUnLock();
        }
        return true;
    }

    /**
Loading