Commit 8818f893 authored by xnovak8's avatar xnovak8
Browse files

* many modifications that led to performance improvements

     (e.g. usage of Trove library)
 * started implementation of Muller-Molina's Sketch
parent 0fe09452
Loading
Loading
Loading
Loading

nbactions.xml

0 → 100644
+11 −0
Original line number Diff line number Diff line
<?xml version="1.0" encoding="UTF-8"?>
<actions>
        <action>
            <actionName>build-with-dependencies</actionName>
            <reactor>also-make</reactor>
            <goals>
                <goal>install</goal>
                <goal>dependency:copy-dependencies</goal>
            </goals>
        </action>
    </actions>
+10 −0
Original line number Diff line number Diff line
@@ -67,6 +67,16 @@
      <artifactId>mindex</artifactId>
      <version>2.7.1-DEVEL</version>
    </dependency>
    <dependency>
      <groupId>net.sf.trove4j</groupId>
      <artifactId>trove4j</artifactId>
      <version>3.0.3</version>
    </dependency>
    <dependency>
      <groupId>messif</groupId>
      <artifactId>messif-private</artifactId>
      <version>1.0</version>
    </dependency>
  </dependencies>
    <description>PPP-Codes index and search algorithm for approximate metric-based search. 
Version 1.1: simple and hopefully efficient id-object index.
+89 −2
Original line number Diff line number Diff line
@@ -16,13 +16,16 @@
 */
package messif.buckets.index.impl;

import java.io.File;
import java.io.IOException;
import java.io.Serializable;
import java.lang.reflect.Field;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import messif.buckets.BucketStorageException;
@@ -58,6 +61,9 @@ public class DiskStorageMemoryIntIndex implements Serializable {
    /** Storage associated with this index */
    private DiskStorage<LocalAbstractObject> storage;

    /** Copies of the storage for faster reading - the read load is spread equally */
    private transient List<DiskStorage<LocalAbstractObject>> storageCopies = null;
    
    /** Ordered linked index of addresses into the storage */
    private transient LinkedList<IntKeyAddressPair> dynamicIndex;
        
@@ -118,6 +124,16 @@ public class DiskStorageMemoryIntIndex implements Serializable {
        storage = null;
    }

    public boolean addStorageCopy(File file) throws IOException {
        if (! file.canRead()) {
            return false;
        }
        if (storageCopies == null) {
            storageCopies = new ArrayList<>();
        }
        storageCopies.add(new DiskStorage<>(storage, file));
        return true;
    }
    
    // **********************    (De)serialization methods     ************************** //
    private void writeObject(java.io.ObjectOutputStream out) throws IOException {
@@ -201,7 +217,6 @@ public class DiskStorageMemoryIntIndex implements Serializable {
     * @return corresponding object from the storage or null (if not found)
     */
    public Iterator<LocalAbstractObject> getObjects(Collection<Integer> keys) {
        //List<Long> positions = new ArrayList<>(keys.length);
        long positions [] = new long [keys.size()];
        int index = 0;
        for (int key : keys) {
@@ -213,8 +228,80 @@ public class DiskStorageMemoryIntIndex implements Serializable {
                positions[index ++] = dynamicIndex.get(keyIndex).position;
            }
        }
        if (storageCopies == null || storageCopies.isEmpty() || index <= 1) {
            return storage.read((positions.length == index) ? positions : Arrays.copyOf(positions, index));
        }
        List<Iterator<LocalAbstractObject>> subIterators = new ArrayList<>(storageCopies.size() + 1);
        int indexesStep = index / (storageCopies.size() + 1);
        for (int i = 0; i < storageCopies.size() + 1; i++) {
            subIterators.add(getStorageCopy(i).read(Arrays.copyOfRange(positions, indexesStep * i, 
                    (i != storageCopies.size()) ? (indexesStep * (i+1)) :  Math.max(indexesStep * (i+1), index) )));
        }
        return new ItOverIts(subIterators);
    }
    
    private DiskStorage<LocalAbstractObject> getStorageCopy(int index) {
        if (index < 0 || index > storageCopies.size()) {
            return null;
        }
        if (index < storageCopies.size()) {
            return storageCopies.get(index);
        }
        return storage;
    }

    /**
     * Iterator over iterators that reads always one object from each iterator and 
     *  then goes to the following iterator unless all are fully read.
     */
    protected static class ItOverIts implements Iterator<LocalAbstractObject> {

        private final List<Iterator<LocalAbstractObject>> subIterators;

        int currentId = -1;
        
        public ItOverIts(List<Iterator<LocalAbstractObject>> subIterators) {
            this.subIterators = subIterators;
            setNextNonemptyIt();
        }        
        
        private void setNextNonemptyIt() {
            if (subIterators.isEmpty()) {
                return;
            }
            if (++ currentId >= subIterators.size()) {
                currentId = 0;
            }
            while (! subIterators.get(currentId).hasNext()) {
                subIterators.remove(currentId);
                if (subIterators.isEmpty()) {
                    return;
                }
                if (currentId >= subIterators.size()) {
                    currentId = 0;
                }
            }
        }
        
        @Override
        public boolean hasNext() {
            return ! subIterators.isEmpty();
        }

        @Override
        public LocalAbstractObject next() {
            try {
                return (subIterators.get(currentId).next());
            } finally {
                setNextNonemptyIt();
            }
        }

        @Override
        public void remove() {
            throw new UnsupportedOperationException("This is read-only iterator."); //To change body of generated methods, choose Tools | Templates.
        }
    }
    
    /**
     * Returns the maximal currently indexed key.
+102 −0
Original line number Diff line number Diff line
/*
 *  This file is part of M-Index library.
 *
 *  M-Index library is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  M-Index library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with M-Index library.  If not, see <http://www.gnu.org/licenses/>.
 */
package pppcodes.index;

import java.nio.BufferUnderflowException;
import mindex.distance.PartialQueryPPPDistanceCalculator;
import mindex.distance.QueryPPPDistanceCalculator;

/**
 * 
 *
 * @author David Novak, Masaryk University, Brno, Czech Republic, david.novak@fi.muni.cz
 */
public class LocatorsAndDistance implements RankableLocators {
    
    /** Query-object distance or any other float value to sort the objects */
    private final float distance;
    
    /** ID(s) corresponding to given PPP */
    private int [] locators;

    /** Buffer to read the locators */
    private byte [] locatorBuffer;

    /** Buffer position to read the locators */
    private int locatorBufferPosition;

    /** Read-writer */
    private PPPCodeReadWriter locatorReader;
    
    
    // ***************** Constructors *****************//

    /**
     * Reads and create a PPP code object from specified byte buffer creating also the single PPP-Code distance using
     *  given distance calculator.
     * @param input byte buffer to read the data from (with internal position)
     * @param tailArrayLengthToRead length of the PPP array
     * @param calculator encapsulation of a query object that can calculate single distance from the created PPPCode object
     * @param pppUpper prefix of the single PPP code to be used for creating the distance
     * @throws BufferUnderflowException if the buffer does not have sufficient data to read the 
     */
    public LocatorsAndDistance(ByteBufferBits input, PPPCodeReadWriter pppCodeReader, int tailArrayLengthToRead, 
            QueryPPPDistanceCalculator calculator, short [] pppUpper) throws BufferUnderflowException {
        short[] ppp = pppCodeReader.readPPP(input, tailArrayLengthToRead, tailArrayLengthToRead + pppUpper.length);
        System.arraycopy(pppUpper, 0, ppp, 0, pppUpper.length);
        this.distance = calculator.getQueryDistance(ppp);
        initLocators((PackageByteBufferBits) input, pppCodeReader);
    }
    
    public LocatorsAndDistance(ByteBufferBits input, PPPCodeReadWriter pppCodeReader, int tailArrayLengthToRead, 
            PartialQueryPPPDistanceCalculator particalCalculator, int levelToStartFrom, float topDistance) throws BufferUnderflowException {
        short[] ppp = pppCodeReader.readPPP(input, tailArrayLengthToRead, tailArrayLengthToRead);
        this.distance = topDistance + particalCalculator.getPartialQueryDistance(ppp, levelToStartFrom);
        initLocators((PackageByteBufferBits) input, pppCodeReader);
    }
    
    
    private void initLocators(PackageByteBufferBits input, PPPCodeReadWriter pppCodeReader) throws BufferUnderflowException {
        this.locatorReader = pppCodeReader;
        this.locatorBuffer = ((PackageByteBufferBits) input).getBuffer();
        this.locatorBufferPosition = input.positionInBits();
        pppCodeReader.skipIDs(input);
    }
    
    
    @Override
    public int [] getLocators() {
        readLocators();
        return locators;
    }

    @Override
    public final RankableLocators readLocators() {
        if (locators == null) {
            this.locators = locatorReader.readIDArray(locatorReader.createNewBuffer(locatorBufferPosition, locatorBuffer));
        }
        return this;
    }
    
    
    // *******************    Comparable     *********************** //
    
    @Override
    public float getQueryDistance() {
        return distance;
    }
}
+1 −1
Original line number Diff line number Diff line
@@ -77,7 +77,7 @@ public class PPPCodeInternalCell extends VoronoiInternalCell {
            if (childCell.getValue() instanceof PPPCodeInternalCell) {
                subIterators.add(((PPPCodeInternalCell) childCell.getValue()).getAllObjects(calculator, childDistance));
            } else {
                subIterators.add(((PPPCodeLeafCell) childCell.getValue()).getAllObjects(calculator, childCell.getValue().getPppForReading(childCell.getKey()), childDistance, false));
                subIterators.add(((PPPCodeLeafCell) childCell.getValue()).getAllObjects(calculator, childCell.getValue().getPppForReading(childCell.getKey()), childDistance));
            }
        }
        return new ObjectProvidersIterator<>(subIterators);
Loading