Commit deade706 authored by David Novak's avatar David Novak
Browse files

* added a variant of the PPP-Code Voronoi tree that keeps the data (PPP-Codes...

 * added a variant of the PPP-Code Voronoi tree that keeps the data (PPP-Codes + IDs) in a separate binary storage
 * it should be fully backwards compatible
parent a112933b
Loading
Loading
Loading
Loading
+4 −6
Original line number Diff line number Diff line
@@ -4,7 +4,7 @@

    <groupId>mindex</groupId>
    <artifactId>ppp-codes</artifactId>
    <version>1.2.8-DEVEL</version>
    <version>1.2.10-DEVEL</version>
    <packaging>jar</packaging>

    <name>ppp-codes</name>
@@ -20,7 +20,7 @@
                    <target>1.7</target>
                </configuration>
            </plugin>
            <plugin>
<!--            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-dependency-plugin</artifactId>
                <version>2.8</version>
@@ -32,7 +32,6 @@
                            <goal>copy-dependencies</goal>
                        </goals>
                        <configuration>
                            <!--<outputDirectory>/Users/david/outbox</outputDirectory>-->
                            <outputDirectory>/home/xnovak8/work/simcloud/jars</outputDirectory>
                            <overWriteReleases>false</overWriteReleases>
                            <overWriteSnapshots>false</overWriteSnapshots>
@@ -55,7 +54,6 @@
                                    <type>${project.packaging}</type>
                                </artifactItem>
                            </artifactItems>
                            <!--<outputDirectory>/Users/david/outbox</outputDirectory>-->
                            <outputDirectory>/home/xnovak8/work/simcloud/jars</outputDirectory>
                            <overWriteReleases>false</overWriteReleases>
                            <overWriteSnapshots>false</overWriteSnapshots>
@@ -63,7 +61,7 @@
                        </configuration>
                    </execution>                   
                </executions>
            </plugin>     
            </plugin>     -->
        </plugins>
    </build>
    <properties>
@@ -104,7 +102,7 @@
        <dependency>
            <groupId>${project.groupId}</groupId>
            <artifactId>mindex</artifactId>
            <version>([2.8.0,)</version>
            <version>(2.8.0,)</version>
        </dependency>
    </dependencies>
    <description>PPP-Codes index and search algorithm for approximate metric-based search. 
+6 −4
Original line number Diff line number Diff line
@@ -27,6 +27,8 @@ import java.util.concurrent.atomic.AtomicLong;
import messif.algorithms.AlgorithmMethodException;
import messif.algorithms.NavigationProcessor;
import messif.buckets.BucketDispatcher;
import messif.buckets.storage.LongStorage;
import messif.buckets.storage.impl.DiskStorage;
import messif.objects.LocalAbstractObject;
import messif.operations.AbstractOperation;
import messif.operations.data.BulkInsertOperation;
@@ -40,6 +42,7 @@ import mindex.processors.DeleteOperationNavigationProcessor;
import mindex.processors.InsertOperationNavigationProcessor;
import pppcodes.index.PPPCodeObject;
import pppcodes.index.PPPCodeReadWriter;
import pppcodes.index.persistent.PPPCodeLeafCellFile;

/**
 * The full configuration and some functionality of one PPP-Code tree index. It extends the M-Index because
@@ -58,9 +61,8 @@ public class PPPCodeIndex extends MetricIndex {
    //************************************   Getters ************************//
    
    /**
     * Returns a serializer for PPP Codes
     *
     * @return
     * Returns a serializer for PPP Codes.
     * @return serializer for PPP Codes
     */
    public final PPPCodeReadWriter getPPPCodeReadWriter() {
        if (pppCodeSerializer == null) {
+98 −0
Original line number Diff line number Diff line
/*
 *  This file is part of PPP-Codes library: http://disa.fi.muni.cz/results/software/ppp-codes/
 *
 *  PPP-Codes library is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  PPP-Codes library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with PPP-Codes library.  If not, see <http://www.gnu.org/licenses/>.
 */
package pppcodes;

import java.util.Properties;
import messif.algorithms.AlgorithmMethodException;
import messif.buckets.storage.LongStorage;
import messif.buckets.storage.impl.DiskStorage;
import mindex.MIndexProperties;
import mindex.navigation.VoronoiInternalCell;
import pppcodes.index.persistent.PPPCodeInternalCellFile;
import pppcodes.index.persistent.PPPCodeLeafCellFile.PPPCodeLeafData;

/**
 * Version of PPP-Code index configuration file that maintains the Voronoi leaf data in a separate
 *  disk storage.
 * @author David Novak, Masaryk University, Brno, Czech Republic, novak.david@gmail.com
 */
public class PPPCodeIndexFile extends PPPCodeIndex {
    
    /** Class serial ID for serialization */
    private static final long serialVersionUID = 112401L;

    /** File name where the leaf nodes are stored (and updated every time). */
    protected DiskStorage<PPPCodeLeafData> leafStorage = null;
            
    // **************************************    Initialization   ************************************* //

    /**
     * Creates new instance of M-Index having a file name of M-Index properties.
     * @param origConfiguration created properties specifying M-Index configuration
     * @param prefix prefix of the properties (e.g. "mindex.") for this PPP-Code single index
     * @throws java.lang.InstantiationException if the file does not contain what it should contain
     * @throws messif.algorithms.AlgorithmMethodException if anything else goes wrong
     */
    public PPPCodeIndexFile(Properties origConfiguration, String prefix) throws InstantiationException, AlgorithmMethodException {
        super(origConfiguration, prefix);
    }
    
    protected PPPCodeIndexFile(MIndexProperties config) throws InstantiationException, AlgorithmMethodException {
        super(config);
    }

    //************************************   Getters ************************//

    /**
     * Returns the leaf disk storage - may be null.
     * @return the leaf disk storage - may be null
     */
    public LongStorage<PPPCodeLeafData> getLeafStorage() {
        return leafStorage;
    }
    
    /**
     * If the PPP-Codes are configured using {@link PPPCodeIndexFile} then this method can
     *  set the storage to be used to store the leave node data. This method is efficient only
     *  for the first time - after that all leaves have links to a specific storage.
     * @param leafStorage disk storage to store the leaf data
     */
    public void setLeafStorage(DiskStorage<PPPCodeLeafData> leafStorage) throws IllegalStateException {
        if (this.leafStorage != null) {
            throw new IllegalStateException("the leaf storage is already set and cannot be changed");
        }
        this.leafStorage = leafStorage;
    }

    
    @Override
    protected VoronoiInternalCell initVoronoiCellTree() throws AlgorithmMethodException {
        return new PPPCodeInternalCellFile(this, null, new short[0]) ;
    } 
    
    /** 
     * Prints info about this M-Index.
     * @return string representation of this M-Index
     */
    @Override
    public String toString() {
        StringBuilder strBuf = new StringBuilder(super.toString());
        strBuf.append("Leaf cells in file storage: ").append((leafStorage == null) ? "null" : leafStorage.toString()).append("\n");
        return strBuf.toString();
    }
    
}
+74 −1
Original line number Diff line number Diff line
@@ -18,6 +18,7 @@ package pppcodes.algorithms;

import pppcodes.index.PPPCodeReadWriter;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
@@ -27,9 +28,14 @@ import java.util.Map;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
import java.util.logging.Level;
import java.util.logging.Logger;
import messif.algorithms.Algorithm;
import messif.algorithms.NavigationProcessor;
import messif.algorithms.impl.MultipleOverlaysAlgorithm;
import messif.buckets.storage.impl.DiskStorage;
import messif.operations.AbstractOperation;
import messif.operations.RankingSingleQueryOperation;
import messif.operations.data.BulkInsertOperation;
@@ -38,8 +44,11 @@ import messif.operations.data.InsertOperation;
import messif.operations.query.ApproxKNNQueryOperation;
import messif.operations.GetCandidateSetOperation;
import messif.operations.query.GetRandomObjectsQueryOperation;
import mindex.algorithms.MIndexAlgorithm;
import pppcodes.PPPCodeIndex;
import pppcodes.PPPCodeIndexFile;
import pppcodes.ids.LocatorStringIntConvertor;
import pppcodes.index.persistent.PPPCodeLeafCellFile;
import pppcodes.processors.ApproxNavProcessorCandSet;
import pppcodes.processors.ApproxNavProcessorNorefine;
import pppcodes.processors.GetRandomNoDataObjectsNavigationProcessor;
@@ -60,6 +69,9 @@ public class PPPCodeAlgorithm extends MultipleOverlaysAlgorithm {
    /** Translator of string locators to unique integer IDs and back again. */
    protected final LocatorStringIntConvertor locatorConvertor;
    
    /** A simple lock for periodic algorithm serialization. */
    protected transient Lock serializingLock = new ReentrantLock();
    
    /**
     * Creates a new multi-algorithm overlay for the given collection of algorithms.
     * @param algorithms the algorithms on which the operations are processed
@@ -81,6 +93,11 @@ public class PPPCodeAlgorithm extends MultipleOverlaysAlgorithm {
        this.locatorConvertor = locatorConvertor;
    }
            
    private void readObject(java.io.ObjectInputStream in) throws IOException, ClassNotFoundException {
        in.defaultReadObject();
        this.serializingLock  = new ReentrantLock();
    }    
    
    /**
     * Consolidate data in all leaf nodes of all dynamic Voronoi trees by putting together object IDs with the same PPP.
     */
@@ -90,14 +107,70 @@ public class PPPCodeAlgorithm extends MultipleOverlaysAlgorithm {
        }
    }

    /**
     * Checks all the M-Index algorithms, if their dynamic cell tree was not modified and
     *  serializes the whole algorithm if it was changed. THis method is typically called
     *  periodically.
     * @param serializationFile file to serialize the whole algorithm into
     * @throws IOException if the serialization failed
     */
    public void checkModifiedAndStore(String serializationFile) throws IOException {
        serializingLock.lock();
        try {
            boolean anyModified = false;
            for (MIndexAlgorithm mIndexAlgorithm : algorithms) {
                anyModified = anyModified || mIndexAlgorithm.getmIndex().isDynamicTreeModified();
            }
            if (anyModified) {
                // Store algorithm to file
                this.storeToFile(serializationFile);
            }
        } finally {
            serializingLock.unlock();
        }
    }

    @Override
    protected void beforeStoreToFile(String filepath) {
        super.beforeStoreToFile(filepath);
        for (MIndexAlgorithm mIndexAlgorithm : algorithms) {
            mIndexAlgorithm.beforeStoreToFile(filepath);
        }
    }

    @Override
    protected void afterStoreToFile(String filepath, boolean successful) {
        super.afterStoreToFile(filepath, successful); 
        for (MIndexAlgorithm mIndexAlgorithm : algorithms) {
            mIndexAlgorithm.afterStoreToFile(filepath, successful);
        }
    }        
    
    /**
     * If the PPP-Codes are configured using {@link PPPCodeIndexFile} then this method can
     *  set the storage to be used to store the leave node data. This method is efficient only
     *  for the first time - after that all leaves have links to a specific storage.
     * @param leafStorage disk storage to store the leaf data of all PPP-Code overlays
     */
    public void setLeafStorage(DiskStorage<PPPCodeLeafCellFile.PPPCodeLeafData> leafStorage) throws IllegalStateException {
        for (PPPCodeSingleAlgorithm singleAlg : algorithms) {
            if (singleAlg.getmIndex() instanceof PPPCodeIndexFile) {
                ((PPPCodeIndexFile) singleAlg.getmIndex()).setLeafStorage(leafStorage);
            }
        }
        consolidateData();
        Logger.getLogger(PPPCodeAlgorithm.class.getName()).log(Level.INFO, "leaf disk storage set to: {0}", leafStorage.getFile());
    }
    
    // **********************************       Overriding methods      ******************************* //
    
    @Override
    public void setOperationsThreadPool(ExecutorService operationsThreadPool) {
        super.setOperationsThreadPool(operationsThreadPool); //To change body of generated methods, choose Tools | Templates.
        for (Algorithm algorithm : algorithms) {
            if (algorithm.getOperationsThreadPool() == null) {
            if (algorithm.getOperationsThreadPool() == null || algorithm.getOperationsThreadPool().isShutdown()) {
                algorithm.setOperationsThreadPool(operationsThreadPool);
                System.out.println("setting thread pool also to sub-algorithm");
            }
        }
    }
+13 −0
Original line number Diff line number Diff line
@@ -40,6 +40,7 @@ public class PPPCodeSingleAlgorithm extends MIndexAlgorithm {
     * @param properties text file with M-Index properties settings
     * @param prefix prefix (e.g. "mindex.") for items in property file that should be considered (the rest is ignored)
     * @throws java.lang.InstantiationException if the file does not contain what it should contain
     * @throws messif.algorithms.AlgorithmMethodException if anything else goes wrong
     */
    @Algorithm.AlgorithmConstructor(description = "Constructor from a created properties object", arguments = {"M-Index properties", "prefix"})
    public PPPCodeSingleAlgorithm(Properties properties, String prefix) throws InstantiationException, AlgorithmMethodException {
@@ -53,12 +54,24 @@ public class PPPCodeSingleAlgorithm extends MIndexAlgorithm {
     * @param properties text file with M-Index properties settings
     * @param prefix prefix (e.g. "mindex.") for items in property file that should be considered (the rest is ignored)
     * @throws java.lang.InstantiationException if the file does not contain what it should contain
     * @throws messif.algorithms.AlgorithmMethodException if anything else goes wrong
     */
    @Algorithm.AlgorithmConstructor(description = "Constructor from a created properties object", arguments = {"Algorithm name", "M-Index properties", "prefix"})
    public PPPCodeSingleAlgorithm(String name, Properties properties, String prefix) throws InstantiationException, AlgorithmMethodException {
        super(name, new PPPCodeIndex(properties, prefix));
    }

    /**
     * Internal constructor that creates the algorithm given its name and PPP-Code config object.
     * @param name algorithm name
     * @param index PPP-Code configuration object
     * @throws java.lang.InstantiationException if the file does not contain what it should contain
     * @throws messif.algorithms.AlgorithmMethodException if anything else goes wrong
     */
    protected PPPCodeSingleAlgorithm(String name, PPPCodeIndex index) throws InstantiationException, AlgorithmMethodException {
        super(name, index);
    }
    
    // ************************      Getters and setters      ************************* //

    /** 
Loading