Commit a509a6b6 authored by David Novak's avatar David Novak
Browse files

* copied and modified source files from M-Index project

parent d031b41f
Loading
Loading
Loading
Loading
+33 −6
Original line number Diff line number Diff line
@@ -4,7 +4,7 @@

  <groupId>mindex</groupId>
  <artifactId>ppp-codes</artifactId>
  <version>1.0-SNAPSHOT</version>
  <version>1.1</version>
  <packaging>jar</packaging>

  <name>ppp-codes</name>
@@ -27,21 +27,48 @@
  </properties>

  <dependencies>
    <dependency>
      <groupId>messif</groupId>
      <artifactId>messif</artifactId>
      <version>2.1-DEVEL</version>
    </dependency>
    <dependency>
      <groupId>junit</groupId>
      <artifactId>junit</artifactId>
      <version>3.8.1</version>
      <version>4.10</version>
      <scope>test</scope>
    </dependency>
    <dependency>
      <groupId>messif</groupId>
      <artifactId>messif</artifactId>
      <version>2.1-DEVEL</version>
      <groupId>org.jmock</groupId>
      <artifactId>jmock-legacy</artifactId>
      <version>2.6.0</version>
      <scope>test</scope>
    </dependency>
    <dependency>
      <groupId>org.jmock</groupId>
      <artifactId>jmock-junit4</artifactId>
      <version>2.6.0</version>
      <scope>test</scope>
    </dependency>
    <dependency>
      <groupId>org.hamcrest</groupId>
      <artifactId>hamcrest-core</artifactId>
      <version>1.3</version>
      <scope>test</scope>
    </dependency>
    <dependency>
      <groupId>org.hamcrest</groupId>
      <artifactId>hamcrest-library</artifactId>
      <version>1.3</version>
      <scope>test</scope>
    </dependency>
    <dependency>
      <groupId>${project.groupId}</groupId>
      <artifactId>mindex</artifactId>
      <version>2.6.3-DEVEL</version>
      <version>2.7.1-DEVEL</version>
    </dependency>
  </dependencies>
    <description>PPP-Codes index and search algorithm for approximate metric-based search. 
Version 1.1: simple and hopefully efficient id-object index.
</description>
</project>
+288 −0
Original line number Diff line number Diff line
/*
 *  This file is part of MESSIF library.
 *
 *  MESSIF library is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  MESSIF library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with MESSIF library.  If not, see <http://www.gnu.org/licenses/>.
 */
package messif.buckets.index.impl;

import java.io.IOException;
import java.io.Serializable;
import java.lang.reflect.Field;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.logging.Level;
import java.util.logging.Logger;
import messif.buckets.BucketStorageException;
import messif.buckets.index.IndexComparator;
import messif.buckets.index.impl.LongStorageMemoryIndex;
import messif.buckets.storage.impl.DiskStorage;
import messif.objects.LocalAbstractObject;
import mindex.MetricIndexes;

/**
 * Implementation of disk (long) index that stores the indexed data in a sorted array and keeps the
 * keys to be compared always in memory.
 * 
 * <p>
 * All search methods are correctly implemented using binary search on
 * the array whenever possible.
 * </p>
 * 
 * @param <K> the type of keys this index is ordered by
 * @param <T> the type of objects stored in this collection
 * @author Michal Batko, Masaryk University, Brno, Czech Republic, batko@fi.muni.cz
 * @author Vlastislav Dohnal, Masaryk University, Brno, Czech Republic, dohnal@fi.muni.cz
 * @author David Novak, Masaryk University, Brno, Czech Republic, david.novak@fi.muni.cz
 */
public class DiskStorageMemoryIntIndex implements Serializable {

    /** Class serial id for serialization. */
    private static final long serialVersionUID = 1021301L;

    /** Maximal length of the dynamic index; if exceeded, then the indexes are moved to static arrays */
    private static final int MAX_DYNAMIC_LENGTH = 1024 * 1024;

    //****************** Attributes ******************//

    /** Storage associated with this index */
    private DiskStorage<LocalAbstractObject> storage;

    /** Ordered linked index of addresses into the storage */
    private transient LinkedList<IntKeyAddressPair> dynamicIndex;
        
    /** Fixed static ordered arrays of keys and corresponding object positions in the storage. */
    private int [] staticIndex;    
    private long [] staticPositions;
    

    //****************** Constructor ******************//

    /**
     * Creates a new instance of LongStorageMemoryIndex for the specified storage.
     * @param storage the storage to associate with this index
     * @param comparator the comparator imposing natural order of this index
     */
    public DiskStorageMemoryIntIndex(DiskStorage<LocalAbstractObject> storage) {
        this.storage = storage;
        this.dynamicIndex = new LinkedList<>();
        this.staticIndex = new int [0];
        this.staticPositions = new long [0];
    }

    /**
     * Creates a new instance of LongStorageMemoryIndex for the specified storage.
     * @param storage the storage to associate with this index
     * @param comparator the comparator imposing natural order of this index
     */
    public DiskStorageMemoryIntIndex(LongStorageMemoryIndex<Integer, LocalAbstractObject> oldIndex) {
        try {
            Field storageField = LongStorageMemoryIndex.class.getDeclaredField("storage");
            storageField.setAccessible(true);
            this.storage = (DiskStorage<LocalAbstractObject>) storageField.get(oldIndex);
            
            this.dynamicIndex = new LinkedList<>();
            this.staticIndex = new int [oldIndex.size()];
            this.staticPositions = new long [oldIndex.size()];
            
            // init this index from the original index data
            for (int i = 0; i < staticIndex.length; i++) {
                LongStorageMemoryIndex.KeyAddressPair<Integer> keyAddress = oldIndex.get(i);
                staticIndex[i] = keyAddress.key;
                staticPositions[i] = keyAddress.position;
            }            
            
        } catch (NoSuchFieldException | SecurityException | IllegalArgumentException | IllegalAccessException ex) {
            Logger.getLogger(DiskStorageMemoryIntIndex.class.getName()).log(Level.SEVERE, null, ex);
        }
    }
    
    @Override
    public void finalize() throws Throwable {
        storage.finalize();
        super.finalize();
    }

    public void destroy() throws Throwable {
        storage.destroy();
        storage = null;
    }

    
    // **********************    (De)serialization methods     ************************** //
    private void writeObject(java.io.ObjectOutputStream out) throws IOException {
        clearDynamicIndex();
        out.defaultWriteObject();
    }

    private void readObject(java.io.ObjectInputStream in) throws IOException, ClassNotFoundException {
        in.defaultReadObject();
        this.dynamicIndex = new LinkedList<>();
    }

    /**
     * If the dynamic index is not empty then this methods moves all its data to the static arrays (sorted merge).
     */
    private void clearDynamicIndex() {
        if (dynamicIndex.isEmpty()) {
            return;
        }
        int [] newStaticIndex = new int [staticIndex.length + dynamicIndex.size()];
        long [] newStaticPositions = new long [staticIndex.length + dynamicIndex.size()];
                
        // Merge sort data
        int from1 = 0;
        int to1 = staticIndex.length;
        int fromDest = 0;
        Iterator<IntKeyAddressPair> iterator = dynamicIndex.iterator();
        IntKeyAddressPair nextDynamic = iterator.next();
        while (nextDynamic != null) {
            // If the current item of this collection is bigger than the current item of the added collection
            if (from1 >= to1 || staticIndex[from1] > nextDynamic.key) {
                newStaticIndex[fromDest] = nextDynamic.key;
                newStaticPositions[fromDest ++] = nextDynamic.position;
                nextDynamic = iterator.hasNext() ? iterator.next() : null;
            } else {
                newStaticIndex[fromDest] = staticIndex[from1];
                newStaticPositions[fromDest ++] = staticPositions[from1 ++];
            }
        }
        // Copy the remaining data from the old static array
        if (from1 < to1) {
            System.arraycopy(staticIndex, from1, newStaticIndex, fromDest, to1 - from1);
            System.arraycopy(staticPositions, from1, newStaticPositions, fromDest, to1 - from1);
        }
        
        // set newly created indexes
        staticIndex = newStaticIndex;
        staticPositions = newStaticPositions;
        dynamicIndex.clear();
    }
    
    //******************    Search and order      ******************//

    protected int binarySearch(boolean inStaticIndex, int key, int low, int high, boolean indicateFailure) throws IndexOutOfBoundsException, IllegalStateException {
        while (low <= high) {
            int mid = (low + high) >>> 1;
            int cmp = Integer.compare(key, inStaticIndex ? staticIndex[mid] : dynamicIndex.get(mid).key);

            if (cmp > 0) {
                low = mid + 1;
            } else if (cmp < 0) {
                high = mid - 1;
            } else {
                return mid; // key found
            }
        }

        // Key not found
        if (indicateFailure)
            return -(low + 1);
        else
            return low;
    }

    
    // ******************     Index access methods     ****************** //

    /**
     * Given a storage position, this method simply returns object on given position in the storage.
     * @param position storage position
     * @return object on given position in the storage
     */
    private LocalAbstractObject getObjectByStoragePosition(long position) {
        try {
            return storage.read(position);
        } catch (BucketStorageException ex) {
            throw new IllegalStateException("Cannot read object from storage", ex);
        }
    }
    
    /**
     * Given an integer key, this methods returns corresponding object from the storage or null (if not found).
     * @param key object integer key
     * @return corresponding object from the storage or null (if not found)
     */
    public LocalAbstractObject getObject(int key) {
        // try to find the key in the static index
        int keyIndex = binarySearch(true, key, 0, staticIndex.length - 1, true);
        if (keyIndex >= 0) {
            return getObjectByStoragePosition(staticPositions[keyIndex]);
        }
        if (! dynamicIndex.isEmpty() && ((keyIndex = binarySearch(false, key, 0, dynamicIndex.size() - 1, true)) > 0)) {
            return getObjectByStoragePosition(dynamicIndex.get(keyIndex).position);
        }
        return null;
    }

    /**
     * Returns the maximal currently indexed key.
     * @return the maximal currently indexed key
     */
    public int getMaxKey() {
        return Math.max(staticIndex.length > 0 ? staticIndex[staticIndex.length - 1] : -1, 
                dynamicIndex.isEmpty() ? -1 : dynamicIndex.getLast().key);
    }
    
    /**
     * Searches for the point where to insert the object <code>object</code>.
     * @param key key of the object to be inserted
     * @return the point in the array where to put the object
     * @throws BucketStorageException if there was a problem determining the point
     */
    protected int insertionPoint(int key) throws BucketStorageException {
        return binarySearch(false, key, 0, dynamicIndex.size() - 1, false);
    }

    
    protected int extractKey(LocalAbstractObject object) {
        return MetricIndexes.getIntegerID(object);
    }
    
    /**
     * Adds an object to the storage and it's integer key to the local key index.
     * @param object object to be added to this index and its storage
     * @return true if the insertion worked ok
     * @throws BucketStorageException if the storage does not accept the object
     */
    public boolean add(LocalAbstractObject object) throws BucketStorageException {
        // Search for the position where the object is added into index
        int key = extractKey(object);
        int pos = insertionPoint(key);

        dynamicIndex.add(pos, new IntKeyAddressPair(key, storage.store(object).getAddress()));
        if (dynamicIndex.size() > MAX_DYNAMIC_LENGTH) {
            clearDynamicIndex();
        }

        return true;
    }

    /**
     * Class encapsulating the key and long position in the storage.
     * 
     * @param <K> the type of keys this index is ordered by
     */
    protected static class IntKeyAddressPair {

        public final int key;
        
        public final long position;

        public IntKeyAddressPair(int key, long position) {
            this.key = key;
            this.position = position;
        }
    }

}
+37 −0
Original line number Diff line number Diff line
/*
 * To change this template, choose Tools | Templates
 * and open the template in the editor.
 */
package pppcodes;

import java.io.Serializable;
import java.util.Map;
import messif.objects.LocalAbstractObject;
import messif.operations.AbstractOperation;
import messif.operations.query.ApproxKNNQueryOperation;

/**
 *
 * @author xnovak8
 */
@AbstractOperation.OperationName("Approximate k-nearest neighbors query for PPP-Code")
public class ApproxKNNTestQuery extends ApproxKNNQueryOperation {

    @AbstractOperation.OperationConstructor({"Query object", "Number of nearest objects"})
    public ApproxKNNTestQuery(LocalAbstractObject queryObject, int k) {
        super(queryObject, k);
    }

    @Override
    public String toString() {
        StringBuilder stringBuilder = new StringBuilder(super.toString());
        if (getParameterCount() > 0) {
            stringBuilder.append('\n');
        }
        for (Map.Entry<String, ? extends Serializable> param : getParameterMap().entrySet()) {
            stringBuilder.append(param.getKey()).append(": ").append(param.getValue()).append(", ");
        }
        return stringBuilder.toString();
    }
    
}
+192 −0
Original line number Diff line number Diff line
package pppcodes;

import pppcodes.index.PPPCodeInternalCell;
import java.util.Arrays;
import java.util.Collection;
import java.util.Map;
import java.util.Properties;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import messif.algorithms.AlgorithmMethodException;
import messif.algorithms.NavigationProcessor;
import messif.buckets.BucketDispatcher;
import messif.objects.LocalAbstractObject;
import messif.operations.AbstractOperation;
import messif.operations.data.BulkInsertOperation;
import messif.operations.data.DeleteOperation;
import messif.operations.data.InsertOperation;
import mindex.MIndexPPCalculator;
import mindex.MIndexProperties;
import mindex.MIndexProperty;
import mindex.MetricIndex;
import mindex.navigation.VoronoiInternalCell;
import mindex.processors.DeleteOperationNavigationProcessor;
import mindex.processors.InsertOperationNavigationProcessor;
import pppcodes.index.PPPCodeObject;
import pppcodes.index.PPPCodeReadWriter;

/**
 *
 * @author xnovak8
 */
public class PPPCodeIndex extends MetricIndex {
    
    /** Class serial ID for serialization */
    private static final long serialVersionUID = 112300L;
    
    /** PPP Code serializer - it can be null */
    protected transient PPPCodeReadWriter pppCodeSerializer;
    
    //************************************   Getters ************************//

    /** Returns a serializer for PPP Codes */
    public final PPPCodeReadWriter getPPPCodeReadWriter() {
        if (pppCodeSerializer == null) {
            pppCodeSerializer = new PPPCodeReadWriter(this);
        }
        return pppCodeSerializer;
    }

    @Override
    public boolean isSpecialOverfilledBuckets() {
        return false;
    }
        
    @Override
    public boolean isApproxProcessWholeMultiBucket() {
        return false;
    }

    @Override
    public boolean isApproxCheckKeyInterval() {
        return false;
    }
    
    @Override
    public boolean isMultiBuckets() {
        return false;
    }
        
    @Override
    public boolean isPreciseSearch() {
        return false;
    }

        
    // **************************************    Initialization   ************************************* //

    /**
     * Creates new instance of M-Index having a file name of M-Index properties.
     * @param origConfiguration created properties specifying M-Index configuration
     * @param prefix prefix of the properties (e.g. "mindex.") for this PPP-Code single index
     * @throws java.lang.InstantiationException
     */
    public PPPCodeIndex(Properties origConfiguration, String prefix) throws InstantiationException, AlgorithmMethodException {
        this( new MIndexProperties(origConfiguration, prefix));
    }
    
    protected PPPCodeIndex(MIndexProperties config) throws InstantiationException, AlgorithmMethodException {
        super(config);
    }

    @Override
    protected VoronoiInternalCell initVoronoiCellTree() throws AlgorithmMethodException {
        return new PPPCodeInternalCell(this, null, new short[0]) ;
    }

    @Override
    protected MIndexPPCalculator initPPCalculator(MIndexProperties config) throws InstantiationException {
        int threadNumber = config.getIntProperty(MIndexProperty.PPCALCULATOR_THREADS);
        if (threadNumber > 1) {
            return new ParallelPPPCodeCalculator(config);
        }
        return new SequentialPPPCodeCalculator(config);
    }

    @Override
    protected BucketDispatcher initBucketDispatcher(MIndexProperties config) throws InstantiationException {
        return null;
    }

    // *********************  Methods taking care about the storage and dynamic levels of M-Index  ********************** //
        
    /**
     * Return the number of objects stored by this M-Index.
     * @return the number of objects stored by this M-Index.
     */
    @Override
    public int getObjectCount() {
        return voronoiCellTree.getObjectCount();
    }

    /**
     * Return various statistic numbers about this PPP-Code single index.
     */
    public void getTreeSize(Map<Short,AtomicInteger> intCellNumbers, Map<Short, AtomicLong> branchingSums, AtomicInteger leafCellNumber, AtomicLong dataSizeBytes, AtomicLong leafLevelSum) {
        voronoiCellTree.calculateTreeSize(intCellNumbers, branchingSums, leafCellNumber, dataSizeBytes, leafLevelSum);
    }

    public void consolidateData() {
        ((PPPCodeInternalCell) voronoiCellTree).consolidateData(new byte[1024 * 1024]);
    }

    @Override
    public short [] readPPP(LocalAbstractObject object) {
        if (object instanceof PPPCodeObject) {
            return ((PPPCodeObject) object).getPppForReading();
        }
        return super.readPPP(object);
    }
    
    
    // *******************************   Methods processing operations *************************************** //
    
    @Override
    protected Collection<Class<? extends AbstractOperation>> getSupportedOpList() {
        return Arrays.asList(BulkInsertOperation.class, InsertOperation.class, DeleteOperation.class);
    }

    /**
     * Method that creates specific implementation of NavigationProcessor for
     * each type of operation
     * @param operation to be processed
     * @return a specific implementation of NavigationProcessor
     */
    @Override
    public NavigationProcessor<? extends AbstractOperation> getNavigationProcessor(AbstractOperation operation) {
        try {
            // For InsertOperation and BulkInsertOperation create an InsertOperationNavigationProcessor
            if (operation instanceof InsertOperation || operation instanceof BulkInsertOperation) {
                return InsertOperationNavigationProcessor.getInstance(operation, this);
            }
            if (operation instanceof DeleteOperation) {
                return DeleteOperationNavigationProcessor.getInstance((DeleteOperation) operation, this);
            }
        } catch (AlgorithmMethodException e) {
            throw new IllegalStateException(e);
        }
        throw new UnsupportedOperationException("PPP-Code single algorithm does not support processing of operation " + operation.getClass());
    }

    
    // *********************************************   Auxilionary methods   ********************************* //

    /** 
     * Prints info about this M-Index.
     * @return string representation of this M-Index
     */
    @Override
    public String toString() {
        StringBuilder strBuf = new StringBuilder("Single PPP-Code algorithm: \n");
        for (String property : configuration.stringPropertyNames()) {
            strBuf.append("\t").append(property).append(" = ").append(configuration.getProperty(property)).append('\n');
        }
        // pivot settings information
        strBuf.append(ppCalculator.toString());
        
        // storage information
        strBuf.append("\nStoring: ").append(getObjectCount()).append(" objects\n");
        return strBuf.toString();
    }
    
}
+77 −0
Original line number Diff line number Diff line
/*
 *  This file is part of M-Index library.
 *
 *  M-Index library is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  M-Index library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with M-Index library.  If not, see <http://www.gnu.org/licenses/>.
 */
package pppcodes;

import pppcodes.index.PPPCodeObject;
import messif.algorithms.AlgorithmMethodException;
import messif.objects.LocalAbstractObject;
import messif.objects.PrecomputedDistancesFilter;
import mindex.MIndexPPCalculator;
import mindex.MIndexProperties;
import mindex.MetricIndexes;
import mindex.ParallelPPCalculator;

/**
 * This is class is a simple extension of {@link MIndexPPCalculator}. It can
 * calculate PP of given objects or a collection of objects using a thread pool.
 *
 * @author David Novak, Masaryk University, Brno, Czech Republic, david.novak@fi.muni.cz
 */
public class ParallelPPPCodeCalculator extends ParallelPPCalculator {

    /**
     * Class serial ID for serialization
     */
    private static final long serialVersionUID = 114090L;

    /**
     * Creates the PP calculator given M-Index properties
     *
     * @param mIndexProperties properties with M-Index configuration
     * @throws InstantiationException if this part of M-Index cannot be
     * initialized using this properties
     */
    public ParallelPPPCodeCalculator(MIndexProperties mIndexProperties) throws InstantiationException {
        super(mIndexProperties);
    }

    // *********************  Getter overrides    ************************ //
    
    @Override
    public boolean isUsePivotFiltering() {
        return false;
    }

    @Override
    public boolean isPreciseSearch() {
        return false;
    }
    
    // *************** Init object - calculate obj-pivot distance + PP ********************* //
    /**
     * Initialize the object in M-Index: calculate all distances, sort them, set
     * pivot permutation (and set filter, if the filter should be stored by this
     * M-Index).
     *
     * @param object to initialize
     */
    @Override
    public LocalAbstractObject initPP(LocalAbstractObject object) throws AlgorithmMethodException {
        return new PPPCodeObject(MetricIndexes.getIntegerID(object), getObjectPPP(object));
    }

}
Loading