Loading pom.xml +33 −6 Original line number Diff line number Diff line Loading @@ -4,7 +4,7 @@ <groupId>mindex</groupId> <artifactId>ppp-codes</artifactId> <version>1.0-SNAPSHOT</version> <version>1.1</version> <packaging>jar</packaging> <name>ppp-codes</name> Loading @@ -27,21 +27,48 @@ </properties> <dependencies> <dependency> <groupId>messif</groupId> <artifactId>messif</artifactId> <version>2.1-DEVEL</version> </dependency> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>3.8.1</version> <version>4.10</version> <scope>test</scope> </dependency> <dependency> <groupId>messif</groupId> <artifactId>messif</artifactId> <version>2.1-DEVEL</version> <groupId>org.jmock</groupId> <artifactId>jmock-legacy</artifactId> <version>2.6.0</version> <scope>test</scope> </dependency> <dependency> <groupId>org.jmock</groupId> <artifactId>jmock-junit4</artifactId> <version>2.6.0</version> <scope>test</scope> </dependency> <dependency> <groupId>org.hamcrest</groupId> <artifactId>hamcrest-core</artifactId> <version>1.3</version> <scope>test</scope> </dependency> <dependency> <groupId>org.hamcrest</groupId> <artifactId>hamcrest-library</artifactId> <version>1.3</version> <scope>test</scope> </dependency> <dependency> <groupId>${project.groupId}</groupId> <artifactId>mindex</artifactId> <version>2.6.3-DEVEL</version> <version>2.7.1-DEVEL</version> </dependency> </dependencies> <description>PPP-Codes index and search algorithm for approximate metric-based search. Version 1.1: simple and hopefully efficient id-object index. </description> </project> src/main/java/messif/buckets/index/impl/DiskStorageMemoryIntIndex.java 0 → 100644 +288 −0 Original line number Diff line number Diff line /* * This file is part of MESSIF library. * * MESSIF library is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * MESSIF library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with MESSIF library. If not, see <http://www.gnu.org/licenses/>. */ package messif.buckets.index.impl; import java.io.IOException; import java.io.Serializable; import java.lang.reflect.Field; import java.util.Iterator; import java.util.LinkedList; import java.util.logging.Level; import java.util.logging.Logger; import messif.buckets.BucketStorageException; import messif.buckets.index.IndexComparator; import messif.buckets.index.impl.LongStorageMemoryIndex; import messif.buckets.storage.impl.DiskStorage; import messif.objects.LocalAbstractObject; import mindex.MetricIndexes; /** * Implementation of disk (long) index that stores the indexed data in a sorted array and keeps the * keys to be compared always in memory. * * <p> * All search methods are correctly implemented using binary search on * the array whenever possible. * </p> * * @param <K> the type of keys this index is ordered by * @param <T> the type of objects stored in this collection * @author Michal Batko, Masaryk University, Brno, Czech Republic, batko@fi.muni.cz * @author Vlastislav Dohnal, Masaryk University, Brno, Czech Republic, dohnal@fi.muni.cz * @author David Novak, Masaryk University, Brno, Czech Republic, david.novak@fi.muni.cz */ public class DiskStorageMemoryIntIndex implements Serializable { /** Class serial id for serialization. */ private static final long serialVersionUID = 1021301L; /** Maximal length of the dynamic index; if exceeded, then the indexes are moved to static arrays */ private static final int MAX_DYNAMIC_LENGTH = 1024 * 1024; //****************** Attributes ******************// /** Storage associated with this index */ private DiskStorage<LocalAbstractObject> storage; /** Ordered linked index of addresses into the storage */ private transient LinkedList<IntKeyAddressPair> dynamicIndex; /** Fixed static ordered arrays of keys and corresponding object positions in the storage. */ private int [] staticIndex; private long [] staticPositions; //****************** Constructor ******************// /** * Creates a new instance of LongStorageMemoryIndex for the specified storage. * @param storage the storage to associate with this index * @param comparator the comparator imposing natural order of this index */ public DiskStorageMemoryIntIndex(DiskStorage<LocalAbstractObject> storage) { this.storage = storage; this.dynamicIndex = new LinkedList<>(); this.staticIndex = new int [0]; this.staticPositions = new long [0]; } /** * Creates a new instance of LongStorageMemoryIndex for the specified storage. * @param storage the storage to associate with this index * @param comparator the comparator imposing natural order of this index */ public DiskStorageMemoryIntIndex(LongStorageMemoryIndex<Integer, LocalAbstractObject> oldIndex) { try { Field storageField = LongStorageMemoryIndex.class.getDeclaredField("storage"); storageField.setAccessible(true); this.storage = (DiskStorage<LocalAbstractObject>) storageField.get(oldIndex); this.dynamicIndex = new LinkedList<>(); this.staticIndex = new int [oldIndex.size()]; this.staticPositions = new long [oldIndex.size()]; // init this index from the original index data for (int i = 0; i < staticIndex.length; i++) { LongStorageMemoryIndex.KeyAddressPair<Integer> keyAddress = oldIndex.get(i); staticIndex[i] = keyAddress.key; staticPositions[i] = keyAddress.position; } } catch (NoSuchFieldException | SecurityException | IllegalArgumentException | IllegalAccessException ex) { Logger.getLogger(DiskStorageMemoryIntIndex.class.getName()).log(Level.SEVERE, null, ex); } } @Override public void finalize() throws Throwable { storage.finalize(); super.finalize(); } public void destroy() throws Throwable { storage.destroy(); storage = null; } // ********************** (De)serialization methods ************************** // private void writeObject(java.io.ObjectOutputStream out) throws IOException { clearDynamicIndex(); out.defaultWriteObject(); } private void readObject(java.io.ObjectInputStream in) throws IOException, ClassNotFoundException { in.defaultReadObject(); this.dynamicIndex = new LinkedList<>(); } /** * If the dynamic index is not empty then this methods moves all its data to the static arrays (sorted merge). */ private void clearDynamicIndex() { if (dynamicIndex.isEmpty()) { return; } int [] newStaticIndex = new int [staticIndex.length + dynamicIndex.size()]; long [] newStaticPositions = new long [staticIndex.length + dynamicIndex.size()]; // Merge sort data int from1 = 0; int to1 = staticIndex.length; int fromDest = 0; Iterator<IntKeyAddressPair> iterator = dynamicIndex.iterator(); IntKeyAddressPair nextDynamic = iterator.next(); while (nextDynamic != null) { // If the current item of this collection is bigger than the current item of the added collection if (from1 >= to1 || staticIndex[from1] > nextDynamic.key) { newStaticIndex[fromDest] = nextDynamic.key; newStaticPositions[fromDest ++] = nextDynamic.position; nextDynamic = iterator.hasNext() ? iterator.next() : null; } else { newStaticIndex[fromDest] = staticIndex[from1]; newStaticPositions[fromDest ++] = staticPositions[from1 ++]; } } // Copy the remaining data from the old static array if (from1 < to1) { System.arraycopy(staticIndex, from1, newStaticIndex, fromDest, to1 - from1); System.arraycopy(staticPositions, from1, newStaticPositions, fromDest, to1 - from1); } // set newly created indexes staticIndex = newStaticIndex; staticPositions = newStaticPositions; dynamicIndex.clear(); } //****************** Search and order ******************// protected int binarySearch(boolean inStaticIndex, int key, int low, int high, boolean indicateFailure) throws IndexOutOfBoundsException, IllegalStateException { while (low <= high) { int mid = (low + high) >>> 1; int cmp = Integer.compare(key, inStaticIndex ? staticIndex[mid] : dynamicIndex.get(mid).key); if (cmp > 0) { low = mid + 1; } else if (cmp < 0) { high = mid - 1; } else { return mid; // key found } } // Key not found if (indicateFailure) return -(low + 1); else return low; } // ****************** Index access methods ****************** // /** * Given a storage position, this method simply returns object on given position in the storage. * @param position storage position * @return object on given position in the storage */ private LocalAbstractObject getObjectByStoragePosition(long position) { try { return storage.read(position); } catch (BucketStorageException ex) { throw new IllegalStateException("Cannot read object from storage", ex); } } /** * Given an integer key, this methods returns corresponding object from the storage or null (if not found). * @param key object integer key * @return corresponding object from the storage or null (if not found) */ public LocalAbstractObject getObject(int key) { // try to find the key in the static index int keyIndex = binarySearch(true, key, 0, staticIndex.length - 1, true); if (keyIndex >= 0) { return getObjectByStoragePosition(staticPositions[keyIndex]); } if (! dynamicIndex.isEmpty() && ((keyIndex = binarySearch(false, key, 0, dynamicIndex.size() - 1, true)) > 0)) { return getObjectByStoragePosition(dynamicIndex.get(keyIndex).position); } return null; } /** * Returns the maximal currently indexed key. * @return the maximal currently indexed key */ public int getMaxKey() { return Math.max(staticIndex.length > 0 ? staticIndex[staticIndex.length - 1] : -1, dynamicIndex.isEmpty() ? -1 : dynamicIndex.getLast().key); } /** * Searches for the point where to insert the object <code>object</code>. * @param key key of the object to be inserted * @return the point in the array where to put the object * @throws BucketStorageException if there was a problem determining the point */ protected int insertionPoint(int key) throws BucketStorageException { return binarySearch(false, key, 0, dynamicIndex.size() - 1, false); } protected int extractKey(LocalAbstractObject object) { return MetricIndexes.getIntegerID(object); } /** * Adds an object to the storage and it's integer key to the local key index. * @param object object to be added to this index and its storage * @return true if the insertion worked ok * @throws BucketStorageException if the storage does not accept the object */ public boolean add(LocalAbstractObject object) throws BucketStorageException { // Search for the position where the object is added into index int key = extractKey(object); int pos = insertionPoint(key); dynamicIndex.add(pos, new IntKeyAddressPair(key, storage.store(object).getAddress())); if (dynamicIndex.size() > MAX_DYNAMIC_LENGTH) { clearDynamicIndex(); } return true; } /** * Class encapsulating the key and long position in the storage. * * @param <K> the type of keys this index is ordered by */ protected static class IntKeyAddressPair { public final int key; public final long position; public IntKeyAddressPair(int key, long position) { this.key = key; this.position = position; } } } src/main/java/pppcodes/ApproxKNNTestQuery.java 0 → 100644 +37 −0 Original line number Diff line number Diff line /* * To change this template, choose Tools | Templates * and open the template in the editor. */ package pppcodes; import java.io.Serializable; import java.util.Map; import messif.objects.LocalAbstractObject; import messif.operations.AbstractOperation; import messif.operations.query.ApproxKNNQueryOperation; /** * * @author xnovak8 */ @AbstractOperation.OperationName("Approximate k-nearest neighbors query for PPP-Code") public class ApproxKNNTestQuery extends ApproxKNNQueryOperation { @AbstractOperation.OperationConstructor({"Query object", "Number of nearest objects"}) public ApproxKNNTestQuery(LocalAbstractObject queryObject, int k) { super(queryObject, k); } @Override public String toString() { StringBuilder stringBuilder = new StringBuilder(super.toString()); if (getParameterCount() > 0) { stringBuilder.append('\n'); } for (Map.Entry<String, ? extends Serializable> param : getParameterMap().entrySet()) { stringBuilder.append(param.getKey()).append(": ").append(param.getValue()).append(", "); } return stringBuilder.toString(); } } src/main/java/pppcodes/PPPCodeIndex.java 0 → 100644 +192 −0 Original line number Diff line number Diff line package pppcodes; import pppcodes.index.PPPCodeInternalCell; import java.util.Arrays; import java.util.Collection; import java.util.Map; import java.util.Properties; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; import messif.algorithms.AlgorithmMethodException; import messif.algorithms.NavigationProcessor; import messif.buckets.BucketDispatcher; import messif.objects.LocalAbstractObject; import messif.operations.AbstractOperation; import messif.operations.data.BulkInsertOperation; import messif.operations.data.DeleteOperation; import messif.operations.data.InsertOperation; import mindex.MIndexPPCalculator; import mindex.MIndexProperties; import mindex.MIndexProperty; import mindex.MetricIndex; import mindex.navigation.VoronoiInternalCell; import mindex.processors.DeleteOperationNavigationProcessor; import mindex.processors.InsertOperationNavigationProcessor; import pppcodes.index.PPPCodeObject; import pppcodes.index.PPPCodeReadWriter; /** * * @author xnovak8 */ public class PPPCodeIndex extends MetricIndex { /** Class serial ID for serialization */ private static final long serialVersionUID = 112300L; /** PPP Code serializer - it can be null */ protected transient PPPCodeReadWriter pppCodeSerializer; //************************************ Getters ************************// /** Returns a serializer for PPP Codes */ public final PPPCodeReadWriter getPPPCodeReadWriter() { if (pppCodeSerializer == null) { pppCodeSerializer = new PPPCodeReadWriter(this); } return pppCodeSerializer; } @Override public boolean isSpecialOverfilledBuckets() { return false; } @Override public boolean isApproxProcessWholeMultiBucket() { return false; } @Override public boolean isApproxCheckKeyInterval() { return false; } @Override public boolean isMultiBuckets() { return false; } @Override public boolean isPreciseSearch() { return false; } // ************************************** Initialization ************************************* // /** * Creates new instance of M-Index having a file name of M-Index properties. * @param origConfiguration created properties specifying M-Index configuration * @param prefix prefix of the properties (e.g. "mindex.") for this PPP-Code single index * @throws java.lang.InstantiationException */ public PPPCodeIndex(Properties origConfiguration, String prefix) throws InstantiationException, AlgorithmMethodException { this( new MIndexProperties(origConfiguration, prefix)); } protected PPPCodeIndex(MIndexProperties config) throws InstantiationException, AlgorithmMethodException { super(config); } @Override protected VoronoiInternalCell initVoronoiCellTree() throws AlgorithmMethodException { return new PPPCodeInternalCell(this, null, new short[0]) ; } @Override protected MIndexPPCalculator initPPCalculator(MIndexProperties config) throws InstantiationException { int threadNumber = config.getIntProperty(MIndexProperty.PPCALCULATOR_THREADS); if (threadNumber > 1) { return new ParallelPPPCodeCalculator(config); } return new SequentialPPPCodeCalculator(config); } @Override protected BucketDispatcher initBucketDispatcher(MIndexProperties config) throws InstantiationException { return null; } // ********************* Methods taking care about the storage and dynamic levels of M-Index ********************** // /** * Return the number of objects stored by this M-Index. * @return the number of objects stored by this M-Index. */ @Override public int getObjectCount() { return voronoiCellTree.getObjectCount(); } /** * Return various statistic numbers about this PPP-Code single index. */ public void getTreeSize(Map<Short,AtomicInteger> intCellNumbers, Map<Short, AtomicLong> branchingSums, AtomicInteger leafCellNumber, AtomicLong dataSizeBytes, AtomicLong leafLevelSum) { voronoiCellTree.calculateTreeSize(intCellNumbers, branchingSums, leafCellNumber, dataSizeBytes, leafLevelSum); } public void consolidateData() { ((PPPCodeInternalCell) voronoiCellTree).consolidateData(new byte[1024 * 1024]); } @Override public short [] readPPP(LocalAbstractObject object) { if (object instanceof PPPCodeObject) { return ((PPPCodeObject) object).getPppForReading(); } return super.readPPP(object); } // ******************************* Methods processing operations *************************************** // @Override protected Collection<Class<? extends AbstractOperation>> getSupportedOpList() { return Arrays.asList(BulkInsertOperation.class, InsertOperation.class, DeleteOperation.class); } /** * Method that creates specific implementation of NavigationProcessor for * each type of operation * @param operation to be processed * @return a specific implementation of NavigationProcessor */ @Override public NavigationProcessor<? extends AbstractOperation> getNavigationProcessor(AbstractOperation operation) { try { // For InsertOperation and BulkInsertOperation create an InsertOperationNavigationProcessor if (operation instanceof InsertOperation || operation instanceof BulkInsertOperation) { return InsertOperationNavigationProcessor.getInstance(operation, this); } if (operation instanceof DeleteOperation) { return DeleteOperationNavigationProcessor.getInstance((DeleteOperation) operation, this); } } catch (AlgorithmMethodException e) { throw new IllegalStateException(e); } throw new UnsupportedOperationException("PPP-Code single algorithm does not support processing of operation " + operation.getClass()); } // ********************************************* Auxilionary methods ********************************* // /** * Prints info about this M-Index. * @return string representation of this M-Index */ @Override public String toString() { StringBuilder strBuf = new StringBuilder("Single PPP-Code algorithm: \n"); for (String property : configuration.stringPropertyNames()) { strBuf.append("\t").append(property).append(" = ").append(configuration.getProperty(property)).append('\n'); } // pivot settings information strBuf.append(ppCalculator.toString()); // storage information strBuf.append("\nStoring: ").append(getObjectCount()).append(" objects\n"); return strBuf.toString(); } } src/main/java/pppcodes/ParallelPPPCodeCalculator.java 0 → 100644 +77 −0 Original line number Diff line number Diff line /* * This file is part of M-Index library. * * M-Index library is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * M-Index library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with M-Index library. If not, see <http://www.gnu.org/licenses/>. */ package pppcodes; import pppcodes.index.PPPCodeObject; import messif.algorithms.AlgorithmMethodException; import messif.objects.LocalAbstractObject; import messif.objects.PrecomputedDistancesFilter; import mindex.MIndexPPCalculator; import mindex.MIndexProperties; import mindex.MetricIndexes; import mindex.ParallelPPCalculator; /** * This is class is a simple extension of {@link MIndexPPCalculator}. It can * calculate PP of given objects or a collection of objects using a thread pool. * * @author David Novak, Masaryk University, Brno, Czech Republic, david.novak@fi.muni.cz */ public class ParallelPPPCodeCalculator extends ParallelPPCalculator { /** * Class serial ID for serialization */ private static final long serialVersionUID = 114090L; /** * Creates the PP calculator given M-Index properties * * @param mIndexProperties properties with M-Index configuration * @throws InstantiationException if this part of M-Index cannot be * initialized using this properties */ public ParallelPPPCodeCalculator(MIndexProperties mIndexProperties) throws InstantiationException { super(mIndexProperties); } // ********************* Getter overrides ************************ // @Override public boolean isUsePivotFiltering() { return false; } @Override public boolean isPreciseSearch() { return false; } // *************** Init object - calculate obj-pivot distance + PP ********************* // /** * Initialize the object in M-Index: calculate all distances, sort them, set * pivot permutation (and set filter, if the filter should be stored by this * M-Index). * * @param object to initialize */ @Override public LocalAbstractObject initPP(LocalAbstractObject object) throws AlgorithmMethodException { return new PPPCodeObject(MetricIndexes.getIntegerID(object), getObjectPPP(object)); } } Loading
pom.xml +33 −6 Original line number Diff line number Diff line Loading @@ -4,7 +4,7 @@ <groupId>mindex</groupId> <artifactId>ppp-codes</artifactId> <version>1.0-SNAPSHOT</version> <version>1.1</version> <packaging>jar</packaging> <name>ppp-codes</name> Loading @@ -27,21 +27,48 @@ </properties> <dependencies> <dependency> <groupId>messif</groupId> <artifactId>messif</artifactId> <version>2.1-DEVEL</version> </dependency> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>3.8.1</version> <version>4.10</version> <scope>test</scope> </dependency> <dependency> <groupId>messif</groupId> <artifactId>messif</artifactId> <version>2.1-DEVEL</version> <groupId>org.jmock</groupId> <artifactId>jmock-legacy</artifactId> <version>2.6.0</version> <scope>test</scope> </dependency> <dependency> <groupId>org.jmock</groupId> <artifactId>jmock-junit4</artifactId> <version>2.6.0</version> <scope>test</scope> </dependency> <dependency> <groupId>org.hamcrest</groupId> <artifactId>hamcrest-core</artifactId> <version>1.3</version> <scope>test</scope> </dependency> <dependency> <groupId>org.hamcrest</groupId> <artifactId>hamcrest-library</artifactId> <version>1.3</version> <scope>test</scope> </dependency> <dependency> <groupId>${project.groupId}</groupId> <artifactId>mindex</artifactId> <version>2.6.3-DEVEL</version> <version>2.7.1-DEVEL</version> </dependency> </dependencies> <description>PPP-Codes index and search algorithm for approximate metric-based search. Version 1.1: simple and hopefully efficient id-object index. </description> </project>
src/main/java/messif/buckets/index/impl/DiskStorageMemoryIntIndex.java 0 → 100644 +288 −0 Original line number Diff line number Diff line /* * This file is part of MESSIF library. * * MESSIF library is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * MESSIF library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with MESSIF library. If not, see <http://www.gnu.org/licenses/>. */ package messif.buckets.index.impl; import java.io.IOException; import java.io.Serializable; import java.lang.reflect.Field; import java.util.Iterator; import java.util.LinkedList; import java.util.logging.Level; import java.util.logging.Logger; import messif.buckets.BucketStorageException; import messif.buckets.index.IndexComparator; import messif.buckets.index.impl.LongStorageMemoryIndex; import messif.buckets.storage.impl.DiskStorage; import messif.objects.LocalAbstractObject; import mindex.MetricIndexes; /** * Implementation of disk (long) index that stores the indexed data in a sorted array and keeps the * keys to be compared always in memory. * * <p> * All search methods are correctly implemented using binary search on * the array whenever possible. * </p> * * @param <K> the type of keys this index is ordered by * @param <T> the type of objects stored in this collection * @author Michal Batko, Masaryk University, Brno, Czech Republic, batko@fi.muni.cz * @author Vlastislav Dohnal, Masaryk University, Brno, Czech Republic, dohnal@fi.muni.cz * @author David Novak, Masaryk University, Brno, Czech Republic, david.novak@fi.muni.cz */ public class DiskStorageMemoryIntIndex implements Serializable { /** Class serial id for serialization. */ private static final long serialVersionUID = 1021301L; /** Maximal length of the dynamic index; if exceeded, then the indexes are moved to static arrays */ private static final int MAX_DYNAMIC_LENGTH = 1024 * 1024; //****************** Attributes ******************// /** Storage associated with this index */ private DiskStorage<LocalAbstractObject> storage; /** Ordered linked index of addresses into the storage */ private transient LinkedList<IntKeyAddressPair> dynamicIndex; /** Fixed static ordered arrays of keys and corresponding object positions in the storage. */ private int [] staticIndex; private long [] staticPositions; //****************** Constructor ******************// /** * Creates a new instance of LongStorageMemoryIndex for the specified storage. * @param storage the storage to associate with this index * @param comparator the comparator imposing natural order of this index */ public DiskStorageMemoryIntIndex(DiskStorage<LocalAbstractObject> storage) { this.storage = storage; this.dynamicIndex = new LinkedList<>(); this.staticIndex = new int [0]; this.staticPositions = new long [0]; } /** * Creates a new instance of LongStorageMemoryIndex for the specified storage. * @param storage the storage to associate with this index * @param comparator the comparator imposing natural order of this index */ public DiskStorageMemoryIntIndex(LongStorageMemoryIndex<Integer, LocalAbstractObject> oldIndex) { try { Field storageField = LongStorageMemoryIndex.class.getDeclaredField("storage"); storageField.setAccessible(true); this.storage = (DiskStorage<LocalAbstractObject>) storageField.get(oldIndex); this.dynamicIndex = new LinkedList<>(); this.staticIndex = new int [oldIndex.size()]; this.staticPositions = new long [oldIndex.size()]; // init this index from the original index data for (int i = 0; i < staticIndex.length; i++) { LongStorageMemoryIndex.KeyAddressPair<Integer> keyAddress = oldIndex.get(i); staticIndex[i] = keyAddress.key; staticPositions[i] = keyAddress.position; } } catch (NoSuchFieldException | SecurityException | IllegalArgumentException | IllegalAccessException ex) { Logger.getLogger(DiskStorageMemoryIntIndex.class.getName()).log(Level.SEVERE, null, ex); } } @Override public void finalize() throws Throwable { storage.finalize(); super.finalize(); } public void destroy() throws Throwable { storage.destroy(); storage = null; } // ********************** (De)serialization methods ************************** // private void writeObject(java.io.ObjectOutputStream out) throws IOException { clearDynamicIndex(); out.defaultWriteObject(); } private void readObject(java.io.ObjectInputStream in) throws IOException, ClassNotFoundException { in.defaultReadObject(); this.dynamicIndex = new LinkedList<>(); } /** * If the dynamic index is not empty then this methods moves all its data to the static arrays (sorted merge). */ private void clearDynamicIndex() { if (dynamicIndex.isEmpty()) { return; } int [] newStaticIndex = new int [staticIndex.length + dynamicIndex.size()]; long [] newStaticPositions = new long [staticIndex.length + dynamicIndex.size()]; // Merge sort data int from1 = 0; int to1 = staticIndex.length; int fromDest = 0; Iterator<IntKeyAddressPair> iterator = dynamicIndex.iterator(); IntKeyAddressPair nextDynamic = iterator.next(); while (nextDynamic != null) { // If the current item of this collection is bigger than the current item of the added collection if (from1 >= to1 || staticIndex[from1] > nextDynamic.key) { newStaticIndex[fromDest] = nextDynamic.key; newStaticPositions[fromDest ++] = nextDynamic.position; nextDynamic = iterator.hasNext() ? iterator.next() : null; } else { newStaticIndex[fromDest] = staticIndex[from1]; newStaticPositions[fromDest ++] = staticPositions[from1 ++]; } } // Copy the remaining data from the old static array if (from1 < to1) { System.arraycopy(staticIndex, from1, newStaticIndex, fromDest, to1 - from1); System.arraycopy(staticPositions, from1, newStaticPositions, fromDest, to1 - from1); } // set newly created indexes staticIndex = newStaticIndex; staticPositions = newStaticPositions; dynamicIndex.clear(); } //****************** Search and order ******************// protected int binarySearch(boolean inStaticIndex, int key, int low, int high, boolean indicateFailure) throws IndexOutOfBoundsException, IllegalStateException { while (low <= high) { int mid = (low + high) >>> 1; int cmp = Integer.compare(key, inStaticIndex ? staticIndex[mid] : dynamicIndex.get(mid).key); if (cmp > 0) { low = mid + 1; } else if (cmp < 0) { high = mid - 1; } else { return mid; // key found } } // Key not found if (indicateFailure) return -(low + 1); else return low; } // ****************** Index access methods ****************** // /** * Given a storage position, this method simply returns object on given position in the storage. * @param position storage position * @return object on given position in the storage */ private LocalAbstractObject getObjectByStoragePosition(long position) { try { return storage.read(position); } catch (BucketStorageException ex) { throw new IllegalStateException("Cannot read object from storage", ex); } } /** * Given an integer key, this methods returns corresponding object from the storage or null (if not found). * @param key object integer key * @return corresponding object from the storage or null (if not found) */ public LocalAbstractObject getObject(int key) { // try to find the key in the static index int keyIndex = binarySearch(true, key, 0, staticIndex.length - 1, true); if (keyIndex >= 0) { return getObjectByStoragePosition(staticPositions[keyIndex]); } if (! dynamicIndex.isEmpty() && ((keyIndex = binarySearch(false, key, 0, dynamicIndex.size() - 1, true)) > 0)) { return getObjectByStoragePosition(dynamicIndex.get(keyIndex).position); } return null; } /** * Returns the maximal currently indexed key. * @return the maximal currently indexed key */ public int getMaxKey() { return Math.max(staticIndex.length > 0 ? staticIndex[staticIndex.length - 1] : -1, dynamicIndex.isEmpty() ? -1 : dynamicIndex.getLast().key); } /** * Searches for the point where to insert the object <code>object</code>. * @param key key of the object to be inserted * @return the point in the array where to put the object * @throws BucketStorageException if there was a problem determining the point */ protected int insertionPoint(int key) throws BucketStorageException { return binarySearch(false, key, 0, dynamicIndex.size() - 1, false); } protected int extractKey(LocalAbstractObject object) { return MetricIndexes.getIntegerID(object); } /** * Adds an object to the storage and it's integer key to the local key index. * @param object object to be added to this index and its storage * @return true if the insertion worked ok * @throws BucketStorageException if the storage does not accept the object */ public boolean add(LocalAbstractObject object) throws BucketStorageException { // Search for the position where the object is added into index int key = extractKey(object); int pos = insertionPoint(key); dynamicIndex.add(pos, new IntKeyAddressPair(key, storage.store(object).getAddress())); if (dynamicIndex.size() > MAX_DYNAMIC_LENGTH) { clearDynamicIndex(); } return true; } /** * Class encapsulating the key and long position in the storage. * * @param <K> the type of keys this index is ordered by */ protected static class IntKeyAddressPair { public final int key; public final long position; public IntKeyAddressPair(int key, long position) { this.key = key; this.position = position; } } }
src/main/java/pppcodes/ApproxKNNTestQuery.java 0 → 100644 +37 −0 Original line number Diff line number Diff line /* * To change this template, choose Tools | Templates * and open the template in the editor. */ package pppcodes; import java.io.Serializable; import java.util.Map; import messif.objects.LocalAbstractObject; import messif.operations.AbstractOperation; import messif.operations.query.ApproxKNNQueryOperation; /** * * @author xnovak8 */ @AbstractOperation.OperationName("Approximate k-nearest neighbors query for PPP-Code") public class ApproxKNNTestQuery extends ApproxKNNQueryOperation { @AbstractOperation.OperationConstructor({"Query object", "Number of nearest objects"}) public ApproxKNNTestQuery(LocalAbstractObject queryObject, int k) { super(queryObject, k); } @Override public String toString() { StringBuilder stringBuilder = new StringBuilder(super.toString()); if (getParameterCount() > 0) { stringBuilder.append('\n'); } for (Map.Entry<String, ? extends Serializable> param : getParameterMap().entrySet()) { stringBuilder.append(param.getKey()).append(": ").append(param.getValue()).append(", "); } return stringBuilder.toString(); } }
src/main/java/pppcodes/PPPCodeIndex.java 0 → 100644 +192 −0 Original line number Diff line number Diff line package pppcodes; import pppcodes.index.PPPCodeInternalCell; import java.util.Arrays; import java.util.Collection; import java.util.Map; import java.util.Properties; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; import messif.algorithms.AlgorithmMethodException; import messif.algorithms.NavigationProcessor; import messif.buckets.BucketDispatcher; import messif.objects.LocalAbstractObject; import messif.operations.AbstractOperation; import messif.operations.data.BulkInsertOperation; import messif.operations.data.DeleteOperation; import messif.operations.data.InsertOperation; import mindex.MIndexPPCalculator; import mindex.MIndexProperties; import mindex.MIndexProperty; import mindex.MetricIndex; import mindex.navigation.VoronoiInternalCell; import mindex.processors.DeleteOperationNavigationProcessor; import mindex.processors.InsertOperationNavigationProcessor; import pppcodes.index.PPPCodeObject; import pppcodes.index.PPPCodeReadWriter; /** * * @author xnovak8 */ public class PPPCodeIndex extends MetricIndex { /** Class serial ID for serialization */ private static final long serialVersionUID = 112300L; /** PPP Code serializer - it can be null */ protected transient PPPCodeReadWriter pppCodeSerializer; //************************************ Getters ************************// /** Returns a serializer for PPP Codes */ public final PPPCodeReadWriter getPPPCodeReadWriter() { if (pppCodeSerializer == null) { pppCodeSerializer = new PPPCodeReadWriter(this); } return pppCodeSerializer; } @Override public boolean isSpecialOverfilledBuckets() { return false; } @Override public boolean isApproxProcessWholeMultiBucket() { return false; } @Override public boolean isApproxCheckKeyInterval() { return false; } @Override public boolean isMultiBuckets() { return false; } @Override public boolean isPreciseSearch() { return false; } // ************************************** Initialization ************************************* // /** * Creates new instance of M-Index having a file name of M-Index properties. * @param origConfiguration created properties specifying M-Index configuration * @param prefix prefix of the properties (e.g. "mindex.") for this PPP-Code single index * @throws java.lang.InstantiationException */ public PPPCodeIndex(Properties origConfiguration, String prefix) throws InstantiationException, AlgorithmMethodException { this( new MIndexProperties(origConfiguration, prefix)); } protected PPPCodeIndex(MIndexProperties config) throws InstantiationException, AlgorithmMethodException { super(config); } @Override protected VoronoiInternalCell initVoronoiCellTree() throws AlgorithmMethodException { return new PPPCodeInternalCell(this, null, new short[0]) ; } @Override protected MIndexPPCalculator initPPCalculator(MIndexProperties config) throws InstantiationException { int threadNumber = config.getIntProperty(MIndexProperty.PPCALCULATOR_THREADS); if (threadNumber > 1) { return new ParallelPPPCodeCalculator(config); } return new SequentialPPPCodeCalculator(config); } @Override protected BucketDispatcher initBucketDispatcher(MIndexProperties config) throws InstantiationException { return null; } // ********************* Methods taking care about the storage and dynamic levels of M-Index ********************** // /** * Return the number of objects stored by this M-Index. * @return the number of objects stored by this M-Index. */ @Override public int getObjectCount() { return voronoiCellTree.getObjectCount(); } /** * Return various statistic numbers about this PPP-Code single index. */ public void getTreeSize(Map<Short,AtomicInteger> intCellNumbers, Map<Short, AtomicLong> branchingSums, AtomicInteger leafCellNumber, AtomicLong dataSizeBytes, AtomicLong leafLevelSum) { voronoiCellTree.calculateTreeSize(intCellNumbers, branchingSums, leafCellNumber, dataSizeBytes, leafLevelSum); } public void consolidateData() { ((PPPCodeInternalCell) voronoiCellTree).consolidateData(new byte[1024 * 1024]); } @Override public short [] readPPP(LocalAbstractObject object) { if (object instanceof PPPCodeObject) { return ((PPPCodeObject) object).getPppForReading(); } return super.readPPP(object); } // ******************************* Methods processing operations *************************************** // @Override protected Collection<Class<? extends AbstractOperation>> getSupportedOpList() { return Arrays.asList(BulkInsertOperation.class, InsertOperation.class, DeleteOperation.class); } /** * Method that creates specific implementation of NavigationProcessor for * each type of operation * @param operation to be processed * @return a specific implementation of NavigationProcessor */ @Override public NavigationProcessor<? extends AbstractOperation> getNavigationProcessor(AbstractOperation operation) { try { // For InsertOperation and BulkInsertOperation create an InsertOperationNavigationProcessor if (operation instanceof InsertOperation || operation instanceof BulkInsertOperation) { return InsertOperationNavigationProcessor.getInstance(operation, this); } if (operation instanceof DeleteOperation) { return DeleteOperationNavigationProcessor.getInstance((DeleteOperation) operation, this); } } catch (AlgorithmMethodException e) { throw new IllegalStateException(e); } throw new UnsupportedOperationException("PPP-Code single algorithm does not support processing of operation " + operation.getClass()); } // ********************************************* Auxilionary methods ********************************* // /** * Prints info about this M-Index. * @return string representation of this M-Index */ @Override public String toString() { StringBuilder strBuf = new StringBuilder("Single PPP-Code algorithm: \n"); for (String property : configuration.stringPropertyNames()) { strBuf.append("\t").append(property).append(" = ").append(configuration.getProperty(property)).append('\n'); } // pivot settings information strBuf.append(ppCalculator.toString()); // storage information strBuf.append("\nStoring: ").append(getObjectCount()).append(" objects\n"); return strBuf.toString(); } }
src/main/java/pppcodes/ParallelPPPCodeCalculator.java 0 → 100644 +77 −0 Original line number Diff line number Diff line /* * This file is part of M-Index library. * * M-Index library is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * M-Index library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with M-Index library. If not, see <http://www.gnu.org/licenses/>. */ package pppcodes; import pppcodes.index.PPPCodeObject; import messif.algorithms.AlgorithmMethodException; import messif.objects.LocalAbstractObject; import messif.objects.PrecomputedDistancesFilter; import mindex.MIndexPPCalculator; import mindex.MIndexProperties; import mindex.MetricIndexes; import mindex.ParallelPPCalculator; /** * This is class is a simple extension of {@link MIndexPPCalculator}. It can * calculate PP of given objects or a collection of objects using a thread pool. * * @author David Novak, Masaryk University, Brno, Czech Republic, david.novak@fi.muni.cz */ public class ParallelPPPCodeCalculator extends ParallelPPCalculator { /** * Class serial ID for serialization */ private static final long serialVersionUID = 114090L; /** * Creates the PP calculator given M-Index properties * * @param mIndexProperties properties with M-Index configuration * @throws InstantiationException if this part of M-Index cannot be * initialized using this properties */ public ParallelPPPCodeCalculator(MIndexProperties mIndexProperties) throws InstantiationException { super(mIndexProperties); } // ********************* Getter overrides ************************ // @Override public boolean isUsePivotFiltering() { return false; } @Override public boolean isPreciseSearch() { return false; } // *************** Init object - calculate obj-pivot distance + PP ********************* // /** * Initialize the object in M-Index: calculate all distances, sort them, set * pivot permutation (and set filter, if the filter should be stored by this * M-Index). * * @param object to initialize */ @Override public LocalAbstractObject initPP(LocalAbstractObject object) throws AlgorithmMethodException { return new PPPCodeObject(MetricIndexes.getIntegerID(object), getObjectPPP(object)); } }