Commit 295ec4af authored by David Novak's avatar David Novak
Browse files

various minor modifications

parent 9f3cbdb7
Loading
Loading
Loading
Loading
+12 −21
Original line number Diff line number Diff line
@@ -16,26 +16,6 @@
 */
package messif.objects.extraction;

import java.io.BufferedInputStream;
import java.io.Closeable;
import java.io.DataOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.Serializable;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.net.Socket;
import java.net.URL;
import java.net.UnknownHostException;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Pattern;
import messif.objects.DistanceFunction;
import messif.objects.LocalAbstractObject;
import messif.objects.MetaObject;
@@ -47,6 +27,17 @@ import messif.utility.ExtendedProperties;
import messif.utility.ExtendedPropertiesException;
import messif.utility.ExternalProcessInputStream;

import java.io.*;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.net.Socket;
import java.net.URL;
import java.net.UnknownHostException;
import java.util.*;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Pattern;

/**
 * Collection of utility methods for {@link Extractor}s.
 *
@@ -75,7 +66,7 @@ public abstract class Extractors {
     * @param extractor the plain extractor to wrap
     * @return converted multi-extractor instance
     */
    public static <T extends LocalAbstractObject> MultiExtractor<T> extractorToMultiExtractor(final Extractor<? extends T> extractor) {
    public static <T extends LocalAbstractObject> MultiExtractor<T> extractorToMultiExtractor(final Extractor<T> extractor) {
        return new MultiExtractor<T>() {
            @Override
            public Iterator<T> extract(ExtractorDataSource dataSource) throws ExtractorException, IOException {
+248 −0
Original line number Diff line number Diff line
/*
 *  This file is part of MESSIF library.
 *
 *  MESSIF library is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  MESSIF library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with MESSIF library.  If not, see <http://www.gnu.org/licenses/>.
 */
package messif.pivotselection;

import messif.objects.LocalAbstractObject;
import messif.objects.PrecompDistPerforatedArrayFilter;
import messif.objects.util.AbstractObjectIterator;
import messif.objects.util.AbstractObjectList;

import java.util.ArrayList;
import java.util.List;


/**
 * This class uses the k-means algorithm adapted for metric spaces to cluster the objects
 *
 * @author Michal Batko, Masaryk University, Brno, Czech Republic, batko@fi.muni.cz
 * @author Vlastislav Dohnal, Masaryk University, Brno, Czech Republic, dohnal@fi.muni.cz
 * @author David Novak, Masaryk University, Brno, Czech Republic, david.novak@fi.muni.cz
 */
public class HierarchicalKMeansPivotChooser extends KMeansPivotChooser {

    /** List of initial pivots */
    protected AbstractObjectList<LocalAbstractObject> initialPivots;

    protected List<AbstractObjectList<LocalAbstractObject>> actualClusters;

    /**
     * Creates a new instance of KMeansPivotChooser with empty initial list of pivots.
     */
    public HierarchicalKMeansPivotChooser() {
        this(null);
    }

    /**
     * Creates a new instance of KMeansPivotChooser.
     * @param initialPivots the list of initial pivots
     */
    public HierarchicalKMeansPivotChooser(AbstractObjectList<LocalAbstractObject> initialPivots) {
        this.initialPivots = initialPivots;
    }

    public List<AbstractObjectList<LocalAbstractObject>> getClusters() {
        return actualClusters;
    }
    
    /**
     *  This method only uses the preselected pivots as initial pivots for k-means and rewrites the pivots completely
     */
    @Override
    protected void selectPivot(int count, AbstractObjectIterator<? extends LocalAbstractObject> sampleSetIterator) {
        
        // Store all passed objects temporarily
        AbstractObjectList<LocalAbstractObject> objectList = new AbstractObjectList<LocalAbstractObject>(sampleSetIterator);
        
        List<LocalAbstractObject> pivots = new ArrayList<LocalAbstractObject>(count);
        // initially select "count" pivots at random - or use (partly) preselected pivots
        if (initialPivots != null) {
            for (LocalAbstractObject preselPivot : initialPivots) {
                if (count > pivots.size()) {
                    pivots.add(preselPivot);
                    System.err.println("Adding preselected pivot: "+preselPivot.getLocatorURI());
                }
            }
        }
        if (count > pivots.size()) {
            System.err.println("Selecting: "+(count - pivots.size()) +" pivots at random");
            pivots.addAll(objectList.randomList(count - pivots.size(), true, new AbstractObjectList<LocalAbstractObject>()));
        }
        //printPivots("Initial pivots:", pivots);
        
        boolean continueKMeans = true;
        
        // one step of the k-means algorithm
        int nIterations = 0;
        while (continueKMeans && (nIterations++ < MAX_ITERATIONS)) {
            System.err.println("Running "+nIterations+"th iteration");
            System.err.print("    Voronoi partitioning... ");
            actualClusters = voronoiLikePartitioning(objectList, pivots);
            System.err.println("done");
            StringBuffer buf = new StringBuffer("       cluster sizes:");
            for (AbstractObjectList<LocalAbstractObject> cluster : actualClusters) {
                buf.append(" ").append(cluster.size());
            }
            System.err.println(buf.toString());
            
            System.err.println("    Selecting clustroids...");
            // now calculate the new pivots for the new clusters
            int i = 0;
            List<SelectClustroidThread> selectingThreads = new ArrayList<SelectClustroidThread>();
            for (AbstractObjectList<LocalAbstractObject> cluster : actualClusters) {
                SelectClustroidThread thread = new SelectClustroidThread(cluster, pivots.get(i++));
                thread.start();
                selectingThreads.add(thread);
            }
            
            i = 0;
            continueKMeans = false;
            for (SelectClustroidThread thread : selectingThreads) {
                try {
                    thread.join();
                } catch (InterruptedException e) {
                    e.printStackTrace();
                }
                if (thread.clustroid == null) {
                    System.err.println("        WARNING: no clustroid selected - empty cluster?: "+ actualClusters.get(i).size());
                    //System.out.println("          selecting the pivot at random");
                    continueKMeans = true;
                } else {
                    float pivotShiftDist = pivots.get(i).getDistance(thread.clustroid);
                    if (pivotShiftDist > PIVOTS_DISTINCTION_THRESHOLD) {
                        System.err.println("        pivot "+ i +" shifted by "+pivotShiftDist);
                        pivots.set(i, thread.clustroid);
                        continueKMeans = true;
                    }
                }
                i++;
            }
            //printPivots("Current pivots:", pivots);
        }
        
        //this.preselectedPivots.clear();
        for (LocalAbstractObject pivot : pivots)
            preselectedPivots.add(pivot);
        
    }

    /**
     * Prints all pivots selected by this chooser. Pivots are printed to <code>System.err</code>.
     * @param msg optional message printed before the pivots
     */
    public void printPivots(String msg) {
        printPivots(msg, preselectedPivots);
    }
    
    private void printPivots(String msg, List<LocalAbstractObject> pivots) {
        if (msg != null)
            System.err.println(msg);
        int i = 0;
        for (LocalAbstractObject p : pivots) {
            System.err.println("Pivot " + (++i) + ": " + p);
        }
    }

    /** Given a set of objects, a set of pivots, */
    private List<AbstractObjectList<LocalAbstractObject>> voronoiLikePartitioning(AbstractObjectList<LocalAbstractObject> objects, List<LocalAbstractObject> pivots) {
        List<AbstractObjectList<LocalAbstractObject>> clusters =  new ArrayList<AbstractObjectList<LocalAbstractObject>>(pivots.size());
        
        // precompute the mutual distances between the pivots
        for (LocalAbstractObject pivot : pivots) {
            PrecompDistPerforatedArrayFilter filter = pivot.getDistanceFilter(PrecompDistPerforatedArrayFilter.class);
            if (filter == null) {
                filter = new PrecompDistPerforatedArrayFilter(pivots.size());
                pivot.chainFilter(filter, true);
            } else filter.resetAllPrecompDist();
            
            for (LocalAbstractObject pivot2 : pivots) {
                filter.addPrecompDist(pivot.getDistance(pivot2));
            }
            //  init the cluster for this pivot
            clusters.add(new AbstractObjectList<LocalAbstractObject>());
        }
        
        for (LocalAbstractObject object : objects) {
            PrecompDistPerforatedArrayFilter filter = object.getDistanceFilter(PrecompDistPerforatedArrayFilter.class);
            if (filter == null) {
                filter = new PrecompDistPerforatedArrayFilter(pivots.size());
                object.chainFilter(filter, true);
            } else filter.resetAllPrecompDist();
            
            float minDistance = Float.MAX_VALUE;
            int i = 0;
            int closestPivot = -1;
            float objPivotDist;
            for (LocalAbstractObject pivot : pivots) {
                if (object.excludeUsingPrecompDist(pivot, minDistance))
                    filter.addPrecompDist(LocalAbstractObject.UNKNOWN_DISTANCE);
                else {
                    objPivotDist = object.getDistance(pivot);
                    filter.addPrecompDist(objPivotDist);
                    if (minDistance > objPivotDist) {
                        closestPivot = i;
                        minDistance = objPivotDist;
                    }
                }
                i++;
            }
            clusters.get(closestPivot).add(object);
        }
        
        return clusters;
    }
    
    /** Internal thread for selecting the "center" of a cluster. */
    protected class SelectClustroidThread extends Thread {
        
        // parameter
        AbstractObjectList<LocalAbstractObject> cluster;
        
        // always try the original pivot - put it to the sample pivots list
        LocalAbstractObject originalPivot;
        
        // result
        LocalAbstractObject clustroid;
        
        /**
         * Creates a new SelectClustroidThread for computing the "center" of a cluster.
         * @param cluster the list of objects that form a cluster
         * @param originalPivot the original pivot that is improved
         */
        protected SelectClustroidThread(AbstractObjectList<LocalAbstractObject> cluster, LocalAbstractObject originalPivot) {
            this.cluster = cluster;
            this.originalPivot = originalPivot;
        }
        
        @Override
        public void run() {
            AbstractObjectList<LocalAbstractObject> samplePivots = cluster.randomList(PIVOTS_SAMPLE_SIZE, true, new AbstractObjectList<LocalAbstractObject>(PIVOTS_SAMPLE_SIZE));
            samplePivots.add(this.originalPivot);
            
            double minRowSum = Double.MAX_VALUE;
            for (LocalAbstractObject pivot : samplePivots) {
                double rowSum = 0;
                for (LocalAbstractObject object : cluster) {
                    rowSum += Math.pow(pivot.getDistance(object), 2);
                }
                if (minRowSum > rowSum) {
                    clustroid = pivot;
                    minRowSum = rowSum;
                }
            }
        }
    }
}
+4 −4
Original line number Diff line number Diff line
@@ -16,14 +16,14 @@
 */
package messif.pivotselection;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import messif.objects.LocalAbstractObject;
import messif.objects.PrecompDistPerforatedArrayFilter;
import messif.objects.util.AbstractObjectIterator;
import messif.objects.util.AbstractObjectList;

import java.util.ArrayList;
import java.util.List;


/**
 * This class uses the k-means algorithm adapted for metric spaces to cluster the objects
@@ -41,7 +41,7 @@ public class KMeansPivotChooser extends AbstractPivotChooser {
    public static final float PIVOTS_DISTINCTION_THRESHOLD = 0.1f;
    
    /** Maximal number of iterations to let run */
    public static final int MAX_ITERATIONS = 100;
    public static final int MAX_ITERATIONS = 20;
    
    /** List of initial pivots */
    protected AbstractObjectList<LocalAbstractObject> initialPivots;
+33 −0
Original line number Diff line number Diff line
package test;

/**
 * Created by david on 29/09/16.
 */
public class TestMain {

    public static void main(String [] params) throws NoSuchFieldException, IllegalAccessException {
        int i = 3;
        Class classInt = int.class;
        Integer integer = new Integer(3);
        Class classInteger = Integer.class;

        if (classInt.isInstance(i)) {
            System.out.println("int is intance of Integer");
        }
        //System.out.println("printing casted value: " + (classInt.cast(integer)));
        //if (classInteger.isAssignableFrom(classInt)) {
        if (classInt.isAssignableFrom(classInteger)) {
            System.out.println("they are assignable");
        }

        if (classInt.equals(classInteger)) {
            System.out.println("they are equal");
        }

        if (classInt.isPrimitive() && classInt == classInteger.getField("TYPE").get(null)) {
            System.out.println("wow");
        }

        System.out.println("end");
    }
}
+105 −0
Original line number Diff line number Diff line
<?xml version="1.0" encoding="UTF-8"?>
<Diagram>
  <ID>JAVA</ID>
  <OriginalElement>messif.operations.AbstractOperation</OriginalElement>
  <nodes>
    <node x="148.75000000000006" y="240.0">messif.operations.QueryOperation</node>
    <node x="260.0" y="480.0">messif.operations.query.KNNQueryOperation</node>
    <node x="534.5" y="240.0">messif.operations.data.DataManipulationOperation</node>
    <node x="237.0" y="560.0">messif.operations.query.ApproxKNNQueryOperation</node>
    <node x="23.0" y="480.0">messif.operations.query.RangeQueryOperation</node>
    <node x="571.5" y="320.0">messif.operations.data.DeleteOperation</node>
    <node x="379.45000000000005" y="320.0">messif.operations.data.InsertOperation</node>
    <node x="363.5" y="0.0">messif.utility.Parametric</node>
    <node x="330.5" y="80.0">messif.utility.ModifiableParametric</node>
    <node x="123.25000000000006" y="320.0">messif.operations.RankingQueryOperation</node>
    <node x="727.5" y="320.0">messif.operations.data.BulkInsertOperation</node>
    <node x="339.0" y="160.0">messif.operations.AbstractOperation</node>
    <node x="309.5" y="240.0">messif.operations.query.GetAlgorithmInfoOperation</node>
    <node x="104.25000000000006" y="400.0">messif.operations.RankingSingleQueryOperation</node>
    <node x="0.0" y="560.0">messif.operations.query.ApproxRangeQueryOperation</node>
  </nodes>
  <notes />
  <edges>
    <edge source="messif.operations.query.RangeQueryOperation" target="messif.operations.RankingSingleQueryOperation">
      <point x="0.0" y="-15.0" />
      <point x="108.5" y="455.0" />
      <point x="159.75000000000006" y="455.0" />
      <point x="-55.5" y="15.0" />
    </edge>
    <edge source="messif.operations.RankingSingleQueryOperation" target="messif.operations.RankingQueryOperation">
      <point x="0.0" y="-15.0" />
      <point x="0.0" y="15.0" />
    </edge>
    <edge source="messif.operations.query.GetAlgorithmInfoOperation" target="messif.operations.AbstractOperation">
      <point x="0.0" y="-15.0" />
      <point x="412.0" y="215.0" />
      <point x="413.5" y="215.0" />
      <point x="0.0" y="15.0" />
    </edge>
    <edge source="messif.utility.ModifiableParametric" target="messif.utility.Parametric">
      <point x="0.0" y="-15.0" />
      <point x="0.0" y="15.0" />
    </edge>
    <edge source="messif.operations.AbstractOperation" target="messif.utility.ModifiableParametric">
      <point x="0.0" y="-15.0" />
      <point x="0.0" y="15.0" />
    </edge>
    <edge source="messif.operations.data.InsertOperation" target="messif.operations.data.DataManipulationOperation">
      <point x="0.0" y="-15.0" />
      <point x="445.45000000000005" y="295.0" />
      <point x="569.0" y="295.0" />
      <point x="-69.0" y="15.0" />
    </edge>
    <edge source="messif.operations.query.KNNQueryOperation" target="messif.operations.RankingSingleQueryOperation">
      <point x="0.0" y="-15.0" />
      <point x="340.5" y="455.0" />
      <point x="270.75000000000006" y="455.0" />
      <point x="55.5" y="15.0" />
    </edge>
    <edge source="messif.operations.query.ApproxKNNQueryOperation" target="messif.operations.query.KNNQueryOperation">
      <point x="0.0" y="-15.0" />
      <point x="0.0" y="15.0" />
    </edge>
    <edge source="messif.operations.data.BulkInsertOperation" target="messif.operations.data.DataManipulationOperation">
      <point x="0.0" y="-15.0" />
      <point x="807.0" y="295.0" />
      <point x="707.0" y="295.0" />
      <point x="69.0" y="15.0" />
    </edge>
    <edge source="messif.operations.RankingQueryOperation" target="messif.operations.QueryOperation">
      <point x="0.0" y="-15.0" />
      <point x="0.0" y="15.0" />
    </edge>
    <edge source="messif.operations.data.DeleteOperation" target="messif.operations.data.DataManipulationOperation">
      <point x="0.0" y="-15.0" />
      <point x="639.5" y="295.0" />
      <point x="638.0" y="295.0" />
      <point x="0.0" y="15.0" />
    </edge>
    <edge source="messif.operations.data.DataManipulationOperation" target="messif.operations.AbstractOperation">
      <point x="0.0" y="-15.0" />
      <point x="638.0" y="215.0" />
      <point x="463.16666666666663" y="215.0" />
      <point x="49.66666666666663" y="15.0" />
    </edge>
    <edge source="messif.operations.query.ApproxRangeQueryOperation" target="messif.operations.query.RangeQueryOperation">
      <point x="0.0" y="-15.0" />
      <point x="0.0" y="15.0" />
    </edge>
    <edge source="messif.operations.QueryOperation" target="messif.operations.AbstractOperation">
      <point x="0.0" y="-15.0" />
      <point x="215.25000000000006" y="215.0" />
      <point x="363.8333333333333" y="215.0" />
      <point x="-49.666666666666686" y="15.0" />
    </edge>
  </edges>
  <settings layout="Hierarchic Group" zoom="1.0" x="472.5" y="335.5" />
  <SelectedNodes>
    <node>messif.operations.query.GetAlgorithmInfoOperation</node>
  </SelectedNodes>
  <Categories />
  <SCOPE>All</SCOPE>
  <VISIBILITY>private</VISIBILITY>
</Diagram>