From 0a904aaaee299de113ff7d932800f8a1576e78ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Proch=C3=A1zka?= <david@prochazka.dev> Date: Mon, 14 Dec 2020 10:48:06 +0100 Subject: [PATCH] DEL: similarityoperators source code --- .../cover/AbstractHullRepresentation.java | 289 ------- .../cover/AbstractRepresentation.java | 809 ------------------ .../cover/BallRepresentation.java | 110 --- .../similarityoperators/cover/CoverRank.java | 42 - .../cover/FVDMRepresentation.java | 84 -- .../cover/HullBorderRepresentation.java | 142 --- .../cover/HullCenterRepresentation.java | 45 - .../cover/HullFastBorderRepresentation.java | 111 --- .../cover/HullIncrementalRepresentation.java | 429 ---------- .../HullIncrementalRepresentationV2.java | 400 --------- .../HullIncrementalRepresentationV3.java | 27 - .../cover/HullKNHPRepresentation.java | 46 - .../cover/HullMHPRepresentation.java | 43 - .../cover/HullOptimizedRepresentation.java | 104 --- .../cover/HullOptimizedRepresentationV2.java | 167 ---- .../cover/HullOptimizedRepresentationV3.java | 108 --- .../cover/HullPreselectedRepresentation.java | 66 -- .../cover/HullRepresentation.java | 139 --- .../cover/MinDistObjectPairIterator.java | 89 -- .../cover/SnakeAutoRepresentation.java | 382 --------- .../cover/SnakeRepresentation.java | 134 --- .../cover/types/BallBasedCover.java | 15 - .../similarityoperators/groupBy/SGBAll.java | 63 -- .../similarityoperators/groupBy/SGBAll_N.java | 73 -- .../groupBy/SGBAll_N_Shuffle.java | 74 -- .../similarityoperators/groupBy/SGBAny.java | 46 - .../similarityoperators/groupBy/SGBAny_N.java | 50 -- .../similarityoperators/groupBy/SGBQuery.java | 192 ----- .../similarityoperators/groupBy/SGBVote.java | 51 -- .../groupBy/SGBVote_N.java | 61 -- .../groupBy/SGBVote_N_Shuffle.java | 63 -- .../groupBy/SGB_All_Any.java | 6 - .../groupBy/SoftSGBStrategies.java | 259 ------ .../operations/Containment.java | 22 - .../operations/Intersection.java | 157 ---- .../operations/Subset.java | 23 - .../similarityoperators/operations/Union.java | 152 ---- .../similarityoperators/utils/ImageNet.java | 89 -- .../utils/MergingType.java | 7 - .../utils/ObjectGroups.java | 33 - .../utils/OverlapType.java | 7 - .../similarityoperators/utils/QueryType.java | 6 - .../similarityoperators/utils/UnionFind.java | 126 --- .../UnsupervisedBinaryClassification.java | 289 ------- src/messif/objects/keys/RadiusObjectKey.java | 186 ---- src/messif/utility/FileUtils.java | 38 - ...llRepresentationAsLocalAbstractObject.java | 107 --- 47 files changed, 5961 deletions(-) delete mode 100644 src/cz/muni/fi/disa/similarityoperators/cover/AbstractHullRepresentation.java delete mode 100644 src/cz/muni/fi/disa/similarityoperators/cover/AbstractRepresentation.java delete mode 100644 src/cz/muni/fi/disa/similarityoperators/cover/BallRepresentation.java delete mode 100644 src/cz/muni/fi/disa/similarityoperators/cover/CoverRank.java delete mode 100644 src/cz/muni/fi/disa/similarityoperators/cover/FVDMRepresentation.java delete mode 100644 src/cz/muni/fi/disa/similarityoperators/cover/HullBorderRepresentation.java delete mode 100644 src/cz/muni/fi/disa/similarityoperators/cover/HullCenterRepresentation.java delete mode 100644 src/cz/muni/fi/disa/similarityoperators/cover/HullFastBorderRepresentation.java delete mode 100644 src/cz/muni/fi/disa/similarityoperators/cover/HullIncrementalRepresentation.java delete mode 100644 src/cz/muni/fi/disa/similarityoperators/cover/HullIncrementalRepresentationV2.java delete mode 100644 src/cz/muni/fi/disa/similarityoperators/cover/HullIncrementalRepresentationV3.java delete mode 100644 src/cz/muni/fi/disa/similarityoperators/cover/HullKNHPRepresentation.java delete mode 100644 src/cz/muni/fi/disa/similarityoperators/cover/HullMHPRepresentation.java delete mode 100644 src/cz/muni/fi/disa/similarityoperators/cover/HullOptimizedRepresentation.java delete mode 100644 src/cz/muni/fi/disa/similarityoperators/cover/HullOptimizedRepresentationV2.java delete mode 100644 src/cz/muni/fi/disa/similarityoperators/cover/HullOptimizedRepresentationV3.java delete mode 100644 src/cz/muni/fi/disa/similarityoperators/cover/HullPreselectedRepresentation.java delete mode 100644 src/cz/muni/fi/disa/similarityoperators/cover/HullRepresentation.java delete mode 100644 src/cz/muni/fi/disa/similarityoperators/cover/MinDistObjectPairIterator.java delete mode 100644 src/cz/muni/fi/disa/similarityoperators/cover/SnakeAutoRepresentation.java delete mode 100644 src/cz/muni/fi/disa/similarityoperators/cover/SnakeRepresentation.java delete mode 100644 src/cz/muni/fi/disa/similarityoperators/cover/types/BallBasedCover.java delete mode 100644 src/cz/muni/fi/disa/similarityoperators/groupBy/SGBAll.java delete mode 100644 src/cz/muni/fi/disa/similarityoperators/groupBy/SGBAll_N.java delete mode 100644 src/cz/muni/fi/disa/similarityoperators/groupBy/SGBAll_N_Shuffle.java delete mode 100644 src/cz/muni/fi/disa/similarityoperators/groupBy/SGBAny.java delete mode 100644 src/cz/muni/fi/disa/similarityoperators/groupBy/SGBAny_N.java delete mode 100644 src/cz/muni/fi/disa/similarityoperators/groupBy/SGBQuery.java delete mode 100644 src/cz/muni/fi/disa/similarityoperators/groupBy/SGBVote.java delete mode 100644 src/cz/muni/fi/disa/similarityoperators/groupBy/SGBVote_N.java delete mode 100644 src/cz/muni/fi/disa/similarityoperators/groupBy/SGBVote_N_Shuffle.java delete mode 100644 src/cz/muni/fi/disa/similarityoperators/groupBy/SGB_All_Any.java delete mode 100644 src/cz/muni/fi/disa/similarityoperators/groupBy/SoftSGBStrategies.java delete mode 100644 src/cz/muni/fi/disa/similarityoperators/operations/Containment.java delete mode 100644 src/cz/muni/fi/disa/similarityoperators/operations/Intersection.java delete mode 100644 src/cz/muni/fi/disa/similarityoperators/operations/Subset.java delete mode 100644 src/cz/muni/fi/disa/similarityoperators/operations/Union.java delete mode 100644 src/cz/muni/fi/disa/similarityoperators/utils/ImageNet.java delete mode 100644 src/cz/muni/fi/disa/similarityoperators/utils/MergingType.java delete mode 100644 src/cz/muni/fi/disa/similarityoperators/utils/ObjectGroups.java delete mode 100644 src/cz/muni/fi/disa/similarityoperators/utils/OverlapType.java delete mode 100644 src/cz/muni/fi/disa/similarityoperators/utils/QueryType.java delete mode 100644 src/cz/muni/fi/disa/similarityoperators/utils/UnionFind.java delete mode 100644 src/cz/muni/fi/disa/similarityoperators/utils/UnsupervisedBinaryClassification.java delete mode 100644 src/messif/objects/keys/RadiusObjectKey.java delete mode 100644 src/messif/utility/FileUtils.java delete mode 100644 src/messif/utility/HullRepresentationAsLocalAbstractObject.java diff --git a/src/cz/muni/fi/disa/similarityoperators/cover/AbstractHullRepresentation.java b/src/cz/muni/fi/disa/similarityoperators/cover/AbstractHullRepresentation.java deleted file mode 100644 index 07c531e..0000000 --- a/src/cz/muni/fi/disa/similarityoperators/cover/AbstractHullRepresentation.java +++ /dev/null @@ -1,289 +0,0 @@ -/* - * To change this license header, choose License Headers in Project Properties. - * To change this template file, choose Tools | Templates - * and open the template in the editor. - */ -package cz.muni.fi.disa.similarityoperators.cover; - -import messif.objects.LocalAbstractObject; - -import java.io.IOException; -import java.io.OutputStream; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Iterator; -import java.util.List; - -/** - * A predecessor for all "hull-based" representations. - * @param <T> type to return in {@link #build() } - * - * @author Vlastislav Dohnal, Masaryk University, Brno, Czech Republic, dohnal@fi.muni.cz - */ -public abstract class AbstractHullRepresentation<T extends AbstractRepresentation> extends AbstractRepresentation<T> { - /** List of hull objects */ - protected List<LocalAbstractObject> hull; - - public AbstractHullRepresentation(String locatorID, List<LocalAbstractObject> data) { - super(locatorID, data); - this.hull = new ArrayList<>(); - } - - public AbstractHullRepresentation(List<LocalAbstractObject> data) { - this(null, data); - } - - public AbstractHullRepresentation(PrecomputedDistances shared) { - super(shared); - this.hull = new ArrayList<>(); - } - - @Override - public String toString() { - String base = toShortString(); - StringBuilder sb = new StringBuilder(base.substring(0, base.length()-1)); - sb.append(", "); - boolean sep = false; - for (LocalAbstractObject c : hull) { - if (sep) - sb.append("; "); - sb.append("center=").append(c); - sep = true; - } -// for (LocalAbstractObject c : shared.data) { -// if (sep) -// sb.append("; "); -// sb.append("base data=").append(c); -// sep = true; -// } - sb.append("]"); - return sb.toString(); - } - - - @Override - public String toShortString() { - StringBuilder sb = new StringBuilder(getClass().getSimpleName()); - sb.append('['); - if (locatorID != null) - sb.append("id=").append(locatorID).append(", "); - sb.append("objects=").append(shared.getObjectCount()).append(", "); - sb.append("centers=").append(hull.size()).append(", "); - sb.append("coverage=").append(getCoverage()).append("]"); - return sb.toString(); - } - - @Override - public void write(OutputStream out) throws IOException { - for (LocalAbstractObject h : hull) { - h.write(out); - } - } - - public void writeData(OutputStream out) throws IOException { - for (LocalAbstractObject o : this.shared.data) { - o.write(out); - } - } - - @Override - public Iterator<LocalAbstractObject> getRepresentatives() { - return hull.iterator(); - } - - /** @return List of hull objects */ - public List<LocalAbstractObject> getHull(){ - return hull; - } - - public int[] getHullIdxs(){ - return shared.getIndexes(hull); - } - - @Override - public int getRepresentativesCount() { - return hull.size(); - } - - protected void setCoveredByHull() { - int[] hullIdxs = shared.getIndexes(hull); - setCoveredByHullShared(hullIdxs); - - for (int i = 0; i < shared.getObjectCount(); i++) { - if (isCovered(i)) // Skip hull objects that were set above - continue; - if (isCoveredByHull(i, hullIdxs.length, hullIdxs)) // checks for coverage by all hull points - setCovered(i); - } - if (this.covered.cardinality() == this.shared.getObjectCount()) - allCovered = true; - } - - protected void setCoveredByHullShared(int[] hullIdxs){ - clearCovered(); - if (hull.size() < 2) - return; - - for (int idx : hullIdxs) - setCovered(idx); - - } - - protected static boolean inHull(int idx, int[] idxs) { - for (int i : idxs) { - if (idx == i) - return true; - } - return false; - } - - /** This must be consistent with the computation in isCoveredByHull!!! */ - @Override - public boolean isExternalCovered(LocalAbstractObject o) { - return isExternalCovered(o, hull.size()); - } - - /** This must be consistent with the computation in isCoveredByHull!!! */ - public boolean isExternalCovered(LocalAbstractObject o, int closestHullObjsCnt) { - int[] hullIdxs = shared.getIndexes(hull); - return isExternalCovered(o, closestHullObjsCnt, hullIdxs); - } - - /** This must be consistent with the computation in isCoveredByHull!!! */ - public boolean isExternalCovered(LocalAbstractObject o, int closestHullObjsCnt, int[] hullIdxs) { - Ranked[] knns = getKNNRankedToExtObj(o, closestHullObjsCnt, hull, hullIdxs); - if (knns[0].distance == 0) // A hull object itself matches - return true; - if (knns.length == 1) // Just one hull object, so only this hull object can match (which is handled above), so return false here. - return false; - // Must be consistent with isCoveredByHull! Here we circulate over all hull objects, not just the closest one! - for (int i = 0; i < knns.length; i++) { - if (!isExtObjCoveredByDistSum(knns, i, hullIdxs)) // hullIdxs[knns[i]], hullIdxs)) - return false; - break; // Test just against the 1NN in hull objects! - } - return true; - } - - public CoverRank getExternalCoverRank(LocalAbstractObject o) { - int[] hullIdxs = shared.getIndexes(hull); - Ranked[] knns = getKNNRankedToExtObj(o, hull.size(), hull, hullIdxs); - CoverRank rank = null; - // This must be in consistence with isCoveredByHull!!! - for (int i = 0; i < knns.length; i++) { - if (i == 0) { - rank = getExtObjCoverRankByDistSum(knns, i, hullIdxs); - if (!rank.covered) - break; - } else if (!isExtObjCoveredByDistSum(knns, i, hullIdxs)) { // hullIdxs[knns[i]], hullIdxs)) - rank.covered = false; - break; - } - break; // Test just against the 1NN in hull objects! - } - return rank; - } - - /** - * @param knns array of k-nearest hull points (ranked instances that encapsulates the distance to the external object and index of the hull point in shared data) - * @param nnIdx index to knns parameter that specifies which of them is the nearest one. - * @param hullIdxs index of all hull objects in shared data - * @return - */ - private boolean isExtObjCoveredByDistSum(Ranked[] knns, int nnIdx, int[] hullIdxs) { -// // Sum of distances to NNs (hull objects) but skip the selected NN (in nnIdx) -// float sumObj = 0; -// for (int i = 0; i < knns.length; i++) { -// if (i != nnIdx) -// sumObj += knns[i].distance; -// } -// -// float[] distsNN = shared.getDistances(knns[nnIdx].index); -// float sumNN = shared.sumArray(distsNN, hullIdxs); -// -// return (sumObj <= sumNN); - float[] sums = computeDistanceSumsToNeighbors(knns, nnIdx, hullIdxs); - return (sums[0] <= sums[1]); - } - - /** - * @param knns array of k-nearest hull points (ranked instances that encapsulates the distance to the external object and index of the hull point in shared data) - * @param nnIdx index to knns parameter that specifies which of them is the nearest one. - * @param hullIdxs index of all hull objects in shared data - * @return - */ - protected CoverRank getExtObjCoverRankByDistSum(Ranked[] knns, int nnIdx, int[] hullIdxs) { -// // Sum of distances to NNs (hull objects) but skip the selected NN (in nnIdx) -// float sumObj = 0; -// for (int i = 0; i < knns.length; i++) { -// if (i != nnIdx) -// sumObj += knns[i].distance; -// } -// -// float[] distsNN = shared.getDistances(knns[nnIdx].index); -// float sumNN = shared.sumArray(distsNN, hullIdxs); -// return new CoverRank((sumObj <= sumNN), (sumObj / sumNN) - 0.5f); - float[] sums = computeDistanceSumsToNeighbors(knns, nnIdx, hullIdxs); - final boolean isCovered = (sums[0] <= sums[1]); - return new CoverRank(isCovered, - (isCovered) ? (sums[0] / sums[1]) - 0.5f // [0;0.5] - : (sums[0] / sums[1]) + 10f ); // (10;infty) - } - - private float[] computeDistanceSumsToNeighbors(Ranked[] knns, int nnIdx, int[] hullIdxs) { - // Sum of distances to NNs (hull objects) but skip the selected NN (in nnIdx) - float sumObj = 0; - for (int i = 0; i < knns.length; i++) { - if (i != nnIdx) - sumObj += knns[i].distance; - } - - float[] distsNN = shared.getDistances(knns[nnIdx].index); - float sumNN = shared.sumArray(distsNN, hullIdxs); - - return new float[] { sumObj, sumNN }; - } - - private Ranked[] getKNNRankedToExtObj(LocalAbstractObject o, int k, List<LocalAbstractObject> hull, int[] hullIdxs) { - Ranked[] ranked = new Ranked[hull.size()]; - for (int i = 0; i < ranked.length; i++) { - ranked[i] = new Ranked(o.getDistance(hull.get(i)), hullIdxs[i]); - } - Arrays.sort(ranked); - return Arrays.copyOf(ranked, Math.min(k, ranked.length)); - } - - protected boolean isCoveredByHull(int objIdx, int closestHullObjsCnt, int[] hullIdxs) { - int[] knns = getKNNIndexes(objIdx, closestHullObjsCnt, hullIdxs); - // Must be consistent with isExternalCovered! Here we circulate over all hull objects, not just the closest one! - for (int i = 0; i < knns.length; i++) { - if (!isCoveredByDistSum(objIdx, hullIdxs[knns[i]], hullIdxs)) - return false; - break; // Test just against the 1NN in hull objects! - } - return true; - } - - protected boolean isCoveredByDistSum(int objIdx, int nnIdx, int[] nnIdxs) { - float[] distsObj = shared.getDistances(objIdx); - // Sum of distances to NNs but skip the selected NN (in nnIdx) - float sumObj = shared.sumArray(distsObj, nnIdxs) - distsObj[nnIdx]; - - float[] distsNN = shared.getDistances(nnIdx); - float sumNN = shared.sumArray(distsNN, nnIdxs); - - return (sumObj <= sumNN); - } - - /** Based on F-score, where importance is put on coverage than hull size. */ - public float getCoverageScore() { - float beta = 1f; // or 2f - float beta2 = beta * beta; - - float coverage = getCoverage(); // aka precision - float hullSize = (float)(shared.getObjectCount() - hull.size()) / (float)(shared.getObjectCount() - 3); // aka recall - - return (1 + beta2) * coverage * hullSize / (beta2 * coverage + hullSize); - } - -} diff --git a/src/cz/muni/fi/disa/similarityoperators/cover/AbstractRepresentation.java b/src/cz/muni/fi/disa/similarityoperators/cover/AbstractRepresentation.java deleted file mode 100644 index 7d76d27..0000000 --- a/src/cz/muni/fi/disa/similarityoperators/cover/AbstractRepresentation.java +++ /dev/null @@ -1,809 +0,0 @@ -/* - * To change this license header, choose License Headers in Project Properties. - * To change this template file, choose Tools | Templates - * and open the template in the editor. - */ -package cz.muni.fi.disa.similarityoperators.cover; - -import messif.objects.LocalAbstractObject; -import messif.statistics.Statistics; - -import java.io.IOException; -import java.io.OutputStream; -import java.text.DateFormat; -import java.util.*; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.TimeUnit; -import java.util.logging.Level; -import java.util.logging.Logger; - -/** - * - * @author Vlastislav Dohnal, Masaryk University, Brno, Czech Republic, dohnal@fi.muni.cz - * @param <T> type to return in {@link #build() } - */ -public abstract class AbstractRepresentation<T extends AbstractRepresentation> { - /** ID of this representations, e.g. cluster ID, subset name */ - protected String locatorID; - - /** Data and precomputed distances */ - protected final PrecomputedDistances shared; - - /** Flags of being covered for each data object */ - protected final BitSet covered; - /** Flag to signal that all objects are covered. */ - protected boolean allCovered; - - public AbstractRepresentation(String locatorID, List<LocalAbstractObject> data) { - this.shared = new PrecomputedDistances(data); - this.covered = new BitSet(data.size()); - this.locatorID = locatorID; - } - - public AbstractRepresentation(List<LocalAbstractObject> data) { - this(null, data); - } - - public AbstractRepresentation(PrecomputedDistances shared) { - this.shared = shared; - this.covered = new BitSet(shared.getObjectCount()); - this.locatorID = null; - } - - public final T build() { - long timeStart = System.currentTimeMillis(); - System.err.println(timeStart + " started"); - select(); - long timeStop = System.currentTimeMillis(); - System.err.println("Selection by " + getClass().getSimpleName() + " took: " + (timeStop - timeStart) + " msec"); - return (T)this; - } - - public String getLocatorID() { - return locatorID; - } - - public void setLocatorID(String locatorID) { - this.locatorID = locatorID; - } - - /** - * Implementation of representation selection algorithm goes here. - * This part is timed, so the duration in msec will be printed to stderr. - */ - protected abstract void select(); - - /** Method for checking whether an external object (out of passed list of objects) is covered by this representation or not. - * @param o external object - * @return decision whether the object is covered by this representation or not - */ - public boolean isExternalCovered(LocalAbstractObject o) { - return false; - } - - protected final boolean isCovered(LocalAbstractObject o) { - return isCovered(shared.getIndex(o)); - } - - protected final boolean isCovered(int idx) { - return covered.get(idx); - } - - protected final void setCovered(LocalAbstractObject o) { - setCovered(shared.getIndex(o)); - } - - protected final void setCovered(int idx) { - covered.set(idx); - } - - public final List<LocalAbstractObject> getNotCovered(){ - List<LocalAbstractObject> notCovered = new ArrayList<>(); - for (int i = 0; i < shared.getObjectCount(); i++){ - if (!isCovered(i)) { - notCovered.add(shared.getObject(i)); - } - } - return notCovered; - } - - public void printNotCovered(OutputStream out) throws IOException { - List<LocalAbstractObject> notCovered = getNotCovered(); - for (LocalAbstractObject object : notCovered) { - object.write(out); - } - } - - protected final void clearCovered(LocalAbstractObject o) { - clearCovered(shared.getIndex(o)); - } - - protected final void clearCovered(int idx) { - covered.clear(idx); - allCovered = false; - } - - protected final void clearCovered() { - covered.clear(); - allCovered = false; - } - - protected final boolean isAllCovered() { - if (allCovered) - return true; - allCovered = (covered.cardinality() == shared.getObjectCount()); - return allCovered; - } - - /** Percentange of coverage of the data-set by the selected cover points. - * @return value between 0 and 1 corresponding to coverage. The objects selected as cover points are not included in the computation. - * I.e. coverage = |covered - cover_points| / |data_set - cover_points| - */ - public final float getCoverage() { - if (allCovered) - return 1f; - int excl = getRepresentativesCount(); - return (float)(covered.cardinality() - excl) / (float)(shared.getObjectCount() - excl); - } - - protected void reportNonCovered() { - int from = 0; - boolean firstPrint = true; - while ((from = this.covered.nextClearBit(from)) < this.covered.length()) { - if (firstPrint) { - System.err.println("# Reporting non-covered objects: " + DateFormat.getInstance().format(Calendar.getInstance().getTime())); - firstPrint = false; - } - System.err.println("# Non-covered: " + this.shared.getObject(from).getLocatorURI()); - from++; - } - } - - protected void setCoveredByBall(LocalAbstractObject center, float radius) { - int centerIdx = shared.getIndex(center); - float[] dists = shared.getDistances(centerIdx); - for (int i = 0; i < shared.getObjectCount(); i++) { - if (dists[i] <= radius) - setCovered(i); - } - } - - protected LocalAbstractObject getMedoid() { - float min = Float.MAX_VALUE; - LocalAbstractObject medoid = null; - - for (int i = 0; i < shared.getObjectCount(); i++) { - float sum = shared.sumArray(shared.getDistances(i)); - if (sum < min) { - min = sum; - medoid = shared.getObject(i); - } - } - - return medoid; - } - - protected LocalAbstractObject getMedoid(List<LocalAbstractObject> lst) { - int[] idxs = shared.getIndexes(lst); - - float min = Float.MAX_VALUE; - LocalAbstractObject medoid = null; - - for (int i = 0; i < idxs.length; i++) { - float sum = shared.sumArray(idxs[i], idxs); - if (sum < min) { - min = sum; - medoid = lst.get(i); - } - } - - return medoid; - } - - /** Get furthest object in the data */ - protected LocalAbstractObject getFurthest() { - float max = Float.MIN_VALUE; - int maxIdx = 0; - LocalAbstractObject f = null; - - while (true) { - float[] dists = shared.getDistances(maxIdx); - maxIdx = shared.maxDistInArray(dists); - if (dists[maxIdx] > max) { - f = shared.getObject(maxIdx); - max = dists[maxIdx]; - } else { - break; // we have found it - } - } - return f; - } - - /** Get furthest idx of object within given indexes - * some object has to be the furthest.. else the method would not be called **/ - protected int getFurthestIdx(BitSet notUsedIdxs) { - return getFurthestIdx(shared.distances, notUsedIdxs); - } - - /** Get furthest idx of object within given indexes with respect to provided distMatrix - * some object has to be the furthest.. else the method would not be called **/ - protected int getFurthestIdx(float[][] distMatrix, BitSet notUsedIdxs) { - float max = Float.MIN_VALUE; - int maxIdx = notUsedIdxs.nextSetBit(0); // first not used index - - while (true) { - float[] dists = distMatrix[maxIdx]; - int candidateMaxIdx = shared.maxDistInArray(dists, notUsedIdxs); // max dist within the notUsedIdxs in array - if (dists[candidateMaxIdx] > max) { - max = dists[candidateMaxIdx]; - maxIdx = candidateMaxIdx; - } - else { - break; // we have found it, thus return the idx from prev. iteration - } - } - return maxIdx; - } - - - /** - * Get minimum distance among the passed objects. - * @param lst object to search in - * @return minimum distance, zero if the passed list is empty or contains just one object. - */ - protected float getMinDist(List<LocalAbstractObject> lst) { - if (lst == null || lst.size() <= 1) - return 0f; - int[] idxs = shared.getIndexes(lst); - - float min = Float.MAX_VALUE; - for (int i = 0; i < idxs.length - 1; i++) { - float d = shared.minDist(idxs[i], idxs); - if (d < min) - min = d; - } - return min; - } - - /** Find the maximum distance in the data ("shared" matrix) from the passed object. - * @param objIdx index to {@link #shared} that specifies the object - * @return max distance from the passed object to any other object in the data. - */ - protected float getMaxDist(int objIdx) { - float[] dists = shared.getDistances(objIdx); - float max = dists[0]; - for (float d : dists) { - if (d > max) - max = d; - } - return max; - } - - protected float getRadius(LocalAbstractObject center, List<LocalAbstractObject> lst) { - float radius = 0f; - int centerIdx = shared.getIndex(center); - final float[] centerDists = shared.getDistances(centerIdx); - - if (lst == getObjects()) { - for (float d : centerDists) - if (d > radius) - radius = d; - } else { - for (LocalAbstractObject o : lst) { - float d = centerDists[shared.getIndex(o)]; - if (d > radius) - radius = d; - } - } - - return radius; - } - - /** Find all objects within the passed ball. - * Its center is not included in the answer! - * @param center ball center - * @param radius ball radius - * @return list of objects in the ball except the center object. - */ - protected List<LocalAbstractObject> getBallObjects(LocalAbstractObject center, float radius, boolean excludeCovered) { - List<LocalAbstractObject> res = new ArrayList<>(); - int centerIdx = shared.getIndex(center); - float[] dists = shared.getDistances(centerIdx); - for (int i = 0; i < shared.getObjectCount(); i++) { - if (excludeCovered && isCovered(i)) - continue; - if (dists[i] <= radius && centerIdx != i) - res.add(shared.getObject(i)); - } - return res; - } - - /** - * Find k-nearest neighbors to the passed object. This object itself is not returned as one of the neighbors. - * @param center center (query object) - * @param k number of nearest neighbors to indentify - * @return list of k-nearest neighbor objects except the center object. - */ - protected List<LocalAbstractObject> getKNNObjects(LocalAbstractObject center, int k) { - int centerIdx = shared.getIndex(center); - Ranked[] dists = Ranked.create(shared.getDistances(centerIdx)); - Arrays.sort(dists); - List<LocalAbstractObject> res = new ArrayList<>(k); - for (int i = 0; i < 1 + Math.min(k, dists.length); i++) { - if (dists[i].index == centerIdx) - continue; - res.add(shared.getObject(dists[i].index)); - } - return res; - } - - /** - * Find k-nearest neighbors to the passed object within the passed data objects. The center object itself is not returned as one of the neighbors. - * All objects are assumed to be part of data in {@link #shared} data-set. - * @param center center (query object) - * @param k number of nearest neighbors to indentify - * @param data list of objects to find neighbors in - * @return list of k-nearest neighbor objects except the center object. - */ - protected List<LocalAbstractObject> getKNNObjects(LocalAbstractObject center, int k, List<LocalAbstractObject> data) { - int centerIdx = shared.getIndex(center); - float[] dists = shared.getDistances(centerIdx); - - Ranked[] ranked = new Ranked[data.size()]; - for (int i = 0; i < data.size(); i++) { - int objIdx = shared.getIndex(data.get(i)); - ranked[i] = new Ranked(dists[objIdx], i); - } - Arrays.sort(ranked); - List<LocalAbstractObject> res = new ArrayList<>(k); - for (int i = 0; i < Math.min(k, ranked.length); i++) { - //if (ranked[i].index == centerIdx) // This is TODO! - // continue; - res.add(data.get(ranked[i].index)); - } - return res; - } - - /** - * Find k-nearest neighbors to the passed object within the passed data objects. - * The center object itself is not returned as one of the neighbors if it is not part of the passed dataIdxs array. - * All objects are assumed to be part of data in {@link #shared} data-set. - * @param centerIdx index of center (query object) - * @param k number of nearest neighbors to indentify - * @param dataIdxs array of indexes to objects where the neighbors are located in - * @return list of indexes to k-nearest neighbor objects. - */ - protected int[] getKNNIndexes(int centerIdx, int k, int[] dataIdxs) { - float[] dists = shared.getDistances(centerIdx); - return getKNNIndexesShared(k, dataIdxs, dists); - } - - // centerPoint is outside the precomputed distances - public int[] getKNNIndexesInOtherData(float[] dists, int k, int[] dataIdxs) { - return getKNNIndexesShared(k, dataIdxs, dists); - } - - protected int[] getKNNIndexes(int centerIdx, int k, BitSet idxs) { - Ranked[] ranked = getRankedIdxs(idxs, shared.getDistances(centerIdx)); - int len = Math.min(k, ranked.length); - int[] res = new int[len]; - for (int i = 0; i < len; i++) - res[i] = ranked[i].index; - return res; - - } - - private int[] getKNNIndexesShared(int k, int[] dataIdxs, float[] dists){ - Ranked[] ranked = getRankedIdxs(dataIdxs, dists); // sorted - int len = Math.min(k, ranked.length); - int[] res = new int[len]; - for (int i = 0; i < len; i++) - res[i] = ranked[i].index; - return res; - } - - protected Ranked[] getRankedIdxs(int[] dataIdxs, float[] dists){ - Ranked[] ranked = new Ranked[dataIdxs.length]; - for (int i = 0; i < dataIdxs.length; i++) { - int objIdx = dataIdxs[i]; - ranked[i] = new Ranked(dists[objIdx], i); - } - Arrays.sort(ranked); - return ranked; - } - - protected Ranked[] getRankedIdxs(BitSet dataIdxs, float[] dists){ - Ranked[] ranked = new Ranked[dataIdxs.cardinality()]; - int i = 0; - for (int index = dataIdxs.nextSetBit(0); index >= 0; - index = dataIdxs.nextSetBit(index + 1)) { - ranked[i] = new Ranked(dists[index], index); - i++; - } - Arrays.sort(ranked); - return ranked; - } - - public abstract String toShortString(); - - public void write(OutputStream out) throws IOException { - } - - /** - * Selected representatives. - * @return iterator over selected representatives. - */ - public abstract Iterator<LocalAbstractObject> getRepresentatives(); - - /** - * Count of objects selected as representatives of the passed data - * @return number of objects that are representatives - */ - public abstract int getRepresentativesCount(); - - public List<LocalAbstractObject> getObjects() { - return shared.getObjects(); - } - - //**************************************************************************** - // HELPERS - //**************************************************************************** - - public static class Ranked implements Comparable<Ranked> { - public final float distance; - public final int index; - - public Ranked(float distance, int index) { - this.distance = distance; - this.index = index; - } - - @Override - public int compareTo(Ranked o) { - int cmp = Float.compare(this.distance, o.distance); - if (cmp == 0) - cmp = Integer.compare(this.index, o.index); - return cmp; - } - - public static Ranked[] create(float[] dists) { - Ranked[] ret = new Ranked[dists.length]; - for (int i = 0; i < dists.length; i++) - ret[i] = new Ranked(dists[i], i); - return ret; - } - } - - /** Class encapsulating data objects and precomputed distances related to them. - * This is good for re-using the common work. - */ - public static class PrecomputedDistances { - /** Number of threads to use to compute the distance matrix -- default is one */ - public static int COMPUTATION_THREADS = 1; - - /** Data to summarize / represent */ - protected List<LocalAbstractObject> data; - - /** Map from object's locator to index in data or precomputed distances array */ - private Map<String,Integer> indexes; - /** Distance matrix */ - private float[][] distances; - - public PrecomputedDistances(List<LocalAbstractObject> data) { - this.data = data; - // Precompute distance matrix - initDistanceMatrix(); - } - - private void initDistanceMatrix() { - long timeStart = System.currentTimeMillis(); - - indexes = new HashMap<>(data.size()); - for (int i = 0; i < data.size(); i++) - indexes.put(data.get(i).getLocatorURI(), i); - - if (COMPUTATION_THREADS == 1) - initDistanceMatrixOneThread(); - else - initDistanceMatrixMultiThread(); - - long timeStop = System.currentTimeMillis(); - System.err.println("Distance matrix initialization for " + data.size() + " objects took: " + (timeStop - timeStart) + " msec"); - } - - private void initDistanceMatrixOneThread() { - distances = new float[data.size()][data.size()]; - for (int i = 0; i < data.size(); i++) { - //Arrays.fill(distances[i], 0); - for (int j = i + 1; j < data.size(); j++) { - float d = data.get(i).getDistance(data.get(j)); -// if (d == 0f) -// throw new RuntimeException("Zero distance between " + data.get(i) + " and " + data.get(j)); - distances[i][j] = d; - distances[j][i] = d; - } - } - } - - private void initDistanceMatrixMultiThread() { - boolean statsEnable = Statistics.isEnabledGlobally(); - Statistics.disableGlobally(); - distances = new float[data.size()][data.size()]; - try { - long distsPerThread = data.size() / 2 * data.size() / COMPUTATION_THREADS; - int poolThreads = (distsPerThread <= COMPUTATION_THREADS) ? 1 : COMPUTATION_THREADS; - final ExecutorService pool = Executors.newFixedThreadPool(poolThreads); - - int[] counts = new int[data.size()]; - for (int i = 0; i < data.size(); i++) { - counts[i] = data.size() - i + 1; - } - - int totForThread = 0; - int startForThread = 0; - for (int i = 0; i < data.size(); i++) { - totForThread += counts[i]; - if (totForThread >= distsPerThread) { - pool.submit(new DistanceMatrixTask(data, distances, startForThread, i + 1)); - totForThread = 0; - startForThread = i + 1; - } - } - if (totForThread > 0) - pool.submit(new DistanceMatrixTask(data, distances, startForThread, data.size())); - - pool.shutdown(); - while (!pool.awaitTermination(1, TimeUnit.MINUTES)) - ; - } catch (InterruptedException ex) { - Logger.getLogger(AbstractRepresentation.class.getName()).log(Level.SEVERE, null, ex); - } - if (statsEnable) - Statistics.enableGlobally(); - } - - public int getObjectCount() { - return data.size(); - } - - public LocalAbstractObject getObject(int i) { - return data.get(i); - } - - public List<LocalAbstractObject> getObjects() { - return data; - } - - public int getIndex(LocalAbstractObject o) { - return indexes.get(o.getLocatorURI()); - } - - public int[] getIndexes(List<LocalAbstractObject> lst) { - int[] idxs = new int[lst.size()]; - for (int i = 0; i < lst.size(); i++) { - idxs[i] = indexes.get(lst.get(i).getLocatorURI()); - } - return idxs; - } - - public float getDistance(LocalAbstractObject o1, LocalAbstractObject o2) { - return distances[indexes.get(o1.getLocatorURI())][indexes.get(o2.getLocatorURI())]; - } - - public float getDistance(int idx1, int idx2) { - return distances[idx1][idx2]; - } - - public float[] getDistances(int objIdx) { - return distances[objIdx]; - } - - public float[][] getDistances() { - return distances; - } - - public float sumArray(float[] arr) { - float sum = 0; - for (float v : arr) - sum += v; - return sum; - } - - public float sumArray(float[] arr, int[] selectedIndexes) { - float sum = 0; - for (int i : selectedIndexes) - sum += arr[i]; - return sum; - } - - public float sumArray(int fromObjectAtIndex, int[] selectedIndexes) { - return sumArray(distances[fromObjectAtIndex], selectedIndexes); - } - - /** Index of maximum distance in the passed array */ - public int maxDistInArray(float[] arr) { - int idx = -1; - float max = 0f; - for (int i = 0; i < arr.length; i++) { - if (idx == -1 || max < arr[i]) { - max = arr[i]; - idx = i; - } - } - return idx; - } - - /** Index of maximum distance in the passed array considering only not used indexes */ - public int maxDistInArray(float[] arr, BitSet notUsedIndexes) { - int idx = -1; - float max = 0f; - for (int i = notUsedIndexes.nextSetBit(0); i >= 0; - i = notUsedIndexes.nextSetBit(i + 1)) { - if (idx == -1 || max < arr[i]) { - max = arr[i]; - idx = i; - } - } - return idx; - } - - public int minDistInArray(float[] arr, BitSet notUsedIndexes) { - int idx = -1; - float min = Float.MAX_VALUE; - for (int i = notUsedIndexes.nextSetBit(0); i >= 0; - i = notUsedIndexes.nextSetBit(i + 1)) { - if (idx == -1 || min > arr[i]) { - min = arr[i]; - idx = i; - } - } - return idx; - } - - public int minDistInArrayExceptIdx(float[] arr, BitSet notUsedIndexes, int notIncludeIdx) { - int idx = -1; - float min = Float.MAX_VALUE; - for (int i = notUsedIndexes.nextSetBit(0); i >= 0; - i = notUsedIndexes.nextSetBit(i + 1)) { - if(i == notIncludeIdx){ - continue; - } - if (idx == -1 || min > arr[i]) { - min = arr[i]; - idx = i; - } - } - return idx; - } - - public float minDist(int fromObjectAtIndex, int[] selectedIndexes) { - return minDist(distances[fromObjectAtIndex], fromObjectAtIndex, selectedIndexes); - } - - public MinDistObjectPair minDist(BitSet selectedIndexes) { - float min = Float.MAX_VALUE; - int minIndexFst = -1; - int minIndexSec = -1; - for (int i = selectedIndexes.nextSetBit(0); i >= 0; - i = selectedIndexes.nextSetBit(i + 1)) { - for (int j = selectedIndexes.nextSetBit(i + 1); j >= 0; - j = selectedIndexes.nextSetBit(j + 1)) { - float d = distances[i][j]; - if (d < min) { - min = d; - minIndexFst = i; - minIndexSec = j; - } - } - } - return new MinDistObjectPair(minIndexFst, minIndexSec, min); - } - - public MinDistObjectPair minDist(int[] selectedIndexes) { - return minDist(distances, selectedIndexes); - } - - public int minDistIdx(float[] arr, int fromObjectAtIndex){ - float min = Float.MAX_VALUE; - int idx = fromObjectAtIndex; - for (int i = 0; i < arr.length; i++) { - if (i == fromObjectAtIndex) - continue; // skip myself - float d = arr[i]; - if (d < min) { - min = d; - idx = i; - } - } - return idx; - } - - public float minDist(float[] arr, int fromObjectAtIndex, int[] selectedIndexes) { - float min = Float.MAX_VALUE; - for (int i = 0; i < selectedIndexes.length; i++) { - if (selectedIndexes[i] == fromObjectAtIndex) - continue; // skip myself - float d = arr[selectedIndexes[i]]; - if (d < min) - min = d; - } - return min; - } - - - /** Thread for computing a new artifical cluster center as a mean vector of all passed vectors. */ - private class DistanceMatrixTask implements Runnable { - private final List<LocalAbstractObject> data; - private final float[][] distances; - private final int start; - private final int end; - - public DistanceMatrixTask(List<LocalAbstractObject> data, float[][] distances, int indexStart, int indexEnd) { - this.data = data; - this.distances = distances; - this.start = indexStart; - this.end = indexEnd; - } - - @Override - public void run() { - for (int i = start; i < end; i++) { - for (int j = i + 1; j < data.size(); j++) { - float d = data.get(i).getDistance(data.get(j)); - distances[i][j] = d; - distances[j][i] = d; - } - } - } - } - - public MinDistObjectPair minDist(float[][] arr, int[] selectedIndexes) { - float min = Float.MAX_VALUE; - int minIndexFst = -1; - int minIndexSec = -1; - for (int i = 0; i < selectedIndexes.length; i++) { - for (int j = i + 1; j < selectedIndexes.length; j++) { - float d = arr[i][j]; - if (d < min) { - min = d; - minIndexFst = selectedIndexes[i]; - minIndexSec = selectedIndexes[j]; - } - } - } - return new MinDistObjectPair(minIndexFst, minIndexSec, min); - } - - /** Computes distances from an extObject to all data objects */ - public float[] getDistancesToExtObj(LocalAbstractObject extObject){ - float[] dists = new float[data.size()]; - for (int i = 0; i < data.size(); i++) - dists[i] = extObject.getDistance(data.get(i)); - return dists; - } - - } - - public static class MinDistObjectPair implements Comparable<MinDistObjectPair>{ - protected int firstIdx; - protected int secondIdx; - protected float distance; - - public MinDistObjectPair(int indexFst, int indexSec, float distance){ - this.distance = distance; - this.firstIdx = indexFst; - this.secondIdx = indexSec; - } - - @Override - public int compareTo(MinDistObjectPair o) { - int cmp = Float.compare(this.distance, o.distance); - if (cmp == 0) - cmp = Integer.compare(this.firstIdx, o.firstIdx); - if (cmp == 0) - cmp = Integer.compare(this.secondIdx, this.secondIdx); - return cmp; - } - } -} diff --git a/src/cz/muni/fi/disa/similarityoperators/cover/BallRepresentation.java b/src/cz/muni/fi/disa/similarityoperators/cover/BallRepresentation.java deleted file mode 100644 index 40a7321..0000000 --- a/src/cz/muni/fi/disa/similarityoperators/cover/BallRepresentation.java +++ /dev/null @@ -1,110 +0,0 @@ -/* - * To change this license header, choose License Headers in Project Properties. - * To change this template file, choose Tools | Templates - * and open the template in the editor. - */ -package cz.muni.fi.disa.similarityoperators.cover; - -import cz.muni.fi.disa.similarityoperators.cover.types.BallBasedCover; -import messif.objects.LocalAbstractObject; -import messif.objects.keys.RadiusObjectKey; -import messif.utility.FileUtils; - -import java.io.IOException; -import java.util.Collections; -import java.util.Iterator; -import java.util.List; - -/** - * Ball representation of data set. - * - * @author Vlastislav Dohnal, Masaryk University, Brno, Czech Republic, dohnal@fi.muni.cz - */ -public class BallRepresentation extends AbstractRepresentation<BallRepresentation> implements BallBasedCover { - private LocalAbstractObject center; - private float radius; - - /** Instantiate a ball representation from a serialized file containing one object the key of which is RadiusObjectKey that holds the ball radius. - * @param locatorID ID/symbolic name of this representation - * @param objCls class of objects - * @param filename file containing MESSIF serialized objects - * @throws IllegalArgumentException - * @throws IOException - */ - public BallRepresentation(String locatorID, Class<LocalAbstractObject> objCls, String filename) throws IllegalArgumentException, IOException { - super(locatorID, Collections.EMPTY_LIST); - center = FileUtils.readObjects(objCls, filename).get(0); - radius = center.getObjectKey(RadiusObjectKey.class).getRadius(); - } - - public BallRepresentation(List<LocalAbstractObject> data) { - super(data); - } - - public BallRepresentation(PrecomputedDistances shared) { - super(shared); - } - - public void setCenter(LocalAbstractObject center) { - this.center = center; - radius = getRadius(center, shared.getObjects()); - setCoveredByBall(center, radius); - } - - /** Caution: this method just sets the radius manually and recalculated coverage of data in shared array. */ - public void setRadius(float radius) { - this.radius = radius; - setCoveredByBall(center, radius); - } - - @Override - protected void select() { - setCenter(getMedoid()); - } - - @Override - public boolean isExternalCovered(LocalAbstractObject o) { - return (o.getDistance(center) <= radius); - } - - public CoverRank getExternalCoverRank(LocalAbstractObject o) { - final float d = o.getDistance(center); - return new CoverRank((d <= radius), d); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder(getClass().getSimpleName()); - sb.append('['); - sb.append("objects=").append(shared.getObjectCount()).append(", "); - sb.append("center=").append(center).append(", radius=").append(radius).append(']'); - return sb.toString(); - } - - @Override - public String toShortString() { - StringBuilder sb = new StringBuilder(getClass().getSimpleName()); - sb.append('['); - if (locatorID != null) - sb.append("id=").append(locatorID).append(", "); - sb.append("objects=").append(shared.getObjectCount()).append(", "); - sb.append("radius=").append(radius).append(']'); - return sb.toString(); - } - - @Override - public Iterator<LocalAbstractObject> getRepresentatives() { - return Collections.singletonList(center).iterator(); - } - - @Override - public int getRepresentativesCount() { - return 1; - } - - @Override - public float getAverageRadius() { - return radius; - } - -} diff --git a/src/cz/muni/fi/disa/similarityoperators/cover/CoverRank.java b/src/cz/muni/fi/disa/similarityoperators/cover/CoverRank.java deleted file mode 100644 index 4f6c74c..0000000 --- a/src/cz/muni/fi/disa/similarityoperators/cover/CoverRank.java +++ /dev/null @@ -1,42 +0,0 @@ -/* - * To change this license header, choose License Headers in Project Properties. - * To change this template file, choose Tools | Templates - * and open the template in the editor. - */ -package cz.muni.fi.disa.similarityoperators.cover; - -/** - * Encapsulated result of coverage test and ranking of the object, i.e. how much the object is contained in a representation (aka in the middle of a cluster). - * The smaller value of rank, the better. - */ -public class CoverRank implements Comparable<CoverRank> { - boolean covered; - float rank; - - public CoverRank(boolean covered, float rank) { - this.covered = covered; - this.rank = rank; - } - - @Override - public int compareTo(CoverRank o) { - if (this.covered != o.covered) { - return (o.covered) ? 1 : -1; - } - return Float.compare(this.rank, o.rank); // The lower the rank the better (more in the middle of the hull) - } - - public boolean isCovered() { - return covered; - } - - public float getRank() { - return rank; - } - - @Override - public String toString() { - return "CoverRank{" + "covered=" + covered + ", rank=" + rank + '}'; - } - -} diff --git a/src/cz/muni/fi/disa/similarityoperators/cover/FVDMRepresentation.java b/src/cz/muni/fi/disa/similarityoperators/cover/FVDMRepresentation.java deleted file mode 100644 index 7278a47..0000000 --- a/src/cz/muni/fi/disa/similarityoperators/cover/FVDMRepresentation.java +++ /dev/null @@ -1,84 +0,0 @@ -/* - * To change this license header, choose License Headers in Project Properties. - * To change this template file, choose Tools | Templates - * and open the template in the editor. - */ -package cz.muni.fi.disa.similarityoperators.cover; - - -import messif.objects.LocalAbstractObject; -import messif.utility.FileUtils; - -import java.io.IOException; -import java.util.List; - -/** - * FVDM method for hull vertices (points) -- full name: Finding Vertices with Distance Matrix - * - * This variant does not use any out-of-sample data to select the hull point finely. - * - * Paper: Zhong, J., Tang, K., & Qin, A. K. (2014). Finding convex hull vertices in metric space. - * In Proceedings of the International Joint Conference on Neural Networks (pp. 1587–1592). - * Institute of Electrical and Electronics Engineers Inc. https://doi.org/10.1109/IJCNN.2014.6889699 - * - * @author Vlastislav Dohnal, Masaryk University, Brno, Czech Republic, dohnal@fi.muni.cz - */ -public class FVDMRepresentation extends AbstractHullRepresentation<FVDMRepresentation> { - - /** Instantiate a FVDM representation from a serialized file containing the outlier objects. - * @param locatorID ID/symbolic name of this representation - * @param objCls class of objects - * @param filename file containing MESSIF serialized objects - * @throws IllegalArgumentException - * @throws IOException - */ - public FVDMRepresentation(String locatorID, Class<LocalAbstractObject> objCls, String filename) throws IllegalArgumentException, IOException { - super(locatorID, FileUtils.readObjects(objCls, filename)); // We do not select hull objects here, we rather take existing ones (already selected). - hull.addAll(getObjects()); - } - - public FVDMRepresentation(List<LocalAbstractObject> data) { - super(data); - } - - public FVDMRepresentation(PrecomputedDistances shared) { - super(shared); - } - - @Override - protected void select() { - fvdmSelect(); - //fvdmSelectByPaper(); - setCoveredByHull(); - } - - private void fvdmSelect() { - for (int i = 0; i < shared.getObjectCount(); i++) { - float[] dists = shared.getDistances(i); - float max = dists[0]; - int maxIdx = 0; - for (int j = 1; j < shared.getObjectCount(); j++) { - float d = dists[j]; - if (d > max) { - max = d; - maxIdx = j; - } - } - LocalAbstractObject hoNew = shared.getObject(maxIdx); - if (!hull.contains(hoNew)) - hull.add(hoNew); - } - } - - private void fvdmSelectByPaper() { - for (int i = 0; i < shared.getObjectCount(); i++) { - for (int j = 0; j < shared.getObjectCount(); j++) { - float maxFromJ = getMaxDist(j); - if (maxFromJ == shared.getDistance(i, j)) { - hull.add(shared.getObject(i)); - break; - } - } - } - } -} diff --git a/src/cz/muni/fi/disa/similarityoperators/cover/HullBorderRepresentation.java b/src/cz/muni/fi/disa/similarityoperators/cover/HullBorderRepresentation.java deleted file mode 100644 index a101151..0000000 --- a/src/cz/muni/fi/disa/similarityoperators/cover/HullBorderRepresentation.java +++ /dev/null @@ -1,142 +0,0 @@ -/* - * To change this license header, choose License Headers in Project Properties. - * To change this template file, choose Tools | Templates - * and open the template in the editor. - */ -package cz.muni.fi.disa.similarityoperators.cover; - -import messif.objects.LocalAbstractObject; - -import java.util.Arrays; -import java.util.List; -import java.util.Map; -import java.util.TreeMap; - -/** - * BORDER method for hull vertices (points) -- BOundaRy points DEtectoR - * - * It employs the special property of the reverse k nearest neighbor (RkNN). - * The boundary points whose RkNN number is smaller than a user predefined threshold can be output incrementally. - * - * Here, we select boundary objects while coverage by the hull objects is increasing and we also use the optimization procedure from our Hull - * to minimize the number of hull objects. - * - * Paper: Xia, C., Hsu, W., Lee, M. L., & Ooi, B. C. (2006). - * BORDER: Efficient computation of boundary points. - * IEEE Transactions on Knowledge and Data Engineering, 18(3), 289–303. https://doi.org/10.1109/TKDE.2006.38 - * - * @author Vlastislav Dohnal, Masaryk University, Brno, Czech Republic, dohnal@fi.muni.cz - */ -public class HullBorderRepresentation extends AbstractHullRepresentation<HullFastBorderRepresentation> { - /** Ration of kThreshold / K used by the authors. In particular, it was 35 / 50. */ - private static final float K_TO_THRESHOLD_RATIO = 0.70f; - - /** First values is object index; second value is rnnCount */ - private int[][] rnnCounts; - - public HullBorderRepresentation(List<LocalAbstractObject> data) { - super(data); - } - - public HullBorderRepresentation(PrecomputedDistances shared) { - super(shared); - } - - @Override - protected void select() { - // Take 1/4 of data at maximum! - int k = (shared.getObjectCount() >= 200) ? 50 : shared.getObjectCount() / 4; - int kThreshold = (int)(K_TO_THRESHOLD_RATIO * k); // was: 35; - - initReverseNNCounts(k, kThreshold); - // Take the first 3 - hull.add(shared.getObject(rnnCounts[0][0])); - hull.add(shared.getObject(rnnCounts[1][0])); - hull.add(shared.getObject(rnnCounts[2][0])); - - setCoveredByHull(); - float bestCoverage = getCoverage(); - - int prevRnnCount = 0; - for (int i = 3; !isAllCovered() && i < rnnCounts.length && rnnCounts[i][1] < kThreshold; i++) { - if (rnnCounts[i][1] != prevRnnCount) { - prevRnnCount = rnnCounts[i][1]; - System.err.println("Current RkNN value: " + prevRnnCount + " Threshold: " + kThreshold + " Obj index: " + i); - } - LocalAbstractObject o = shared.getObject(i); - hull.add(o); - setCoveredByHull(); - float coverage = getCoverage(); - if (coverage >= bestCoverage) { - bestCoverage = coverage; - System.err.println("Current coverage: " + bestCoverage + " Hull size: " + hull.size()); - } else { - bestCoverage = optimize(bestCoverage); - } - } - setCoveredByHull(); - System.err.println("Border hull: " + hull.size() + " objects"); - System.err.println("Coverage: " + (int)Math.round(getCoverage() * 100f) + "%"); - System.err.println("Cov ratio: " + getCoverageScore()); - } - - /** - * Taken from {@link HullOptimizedRepresentationV2} but the "presenceInHull" is removed. - * @return resulting coverage after optimization - */ - private float optimize(float coverageBest) { - int indexBest = -1; - - for (int i = 0; i < hull.size() - 1; i++) { - LocalAbstractObject hoRemoved = hull.remove(i); - setCoveredByHull(); - float cvg = getCoverage(); - if (cvg > coverageBest) { - coverageBest = cvg; - indexBest = i; - } - hull.add(i, hoRemoved); - } - // Improved by replacement, so apply the removal - if (indexBest >= 0) { - hull.remove(indexBest); - // Update the coverage back to original state! - setCoveredByHull(); - System.err.print("# Removed hull object at " + indexBest + " to increase coverage: "); System.err.println(toString()); - } else { - setCoveredByHull(); // Restore original state - } - return coverageBest; - } - - /** Calculate counts of being kNN for the passed "k" */ - private void initReverseNNCounts(int k, int kThreshold) { - long start = System.nanoTime(); - rnnCounts = new int[shared.getObjectCount()][2]; - int[] allData = new int[shared.getObjectCount()]; - for (int i = 0; i < shared.getObjectCount(); i++) { - allData[i] = i; - rnnCounts[i] = new int[] {i, 0}; - } - for (int i = 0; i < shared.getObjectCount(); i++) { - int[] knns = getKNNIndexes(i, k, allData); - for (int idx : knns) - rnnCounts[idx][1]++; - } - Arrays.sort(rnnCounts, (int[] o1, int[] o2) -> { - return Integer.compare(o1[1], o2[1]); - }); - System.err.println("RnnCounts took: " + (System.nanoTime() - start) / 10000000f + " msec"); - // Get histogram of RkNN counts - Map<Integer,Integer> hist = new TreeMap<>(); - int objsUpToThreshold = 0; - for (int[] cnt : rnnCounts) { - hist.put(cnt[1], hist.getOrDefault(cnt[1], 0) + 1); - if (cnt[1] < kThreshold) - objsUpToThreshold++; - } - System.err.println("RkNN counts histogram: " + hist); - System.err.println("Objects up to kThreshold " + kThreshold + ": " + objsUpToThreshold); - } - -} diff --git a/src/cz/muni/fi/disa/similarityoperators/cover/HullCenterRepresentation.java b/src/cz/muni/fi/disa/similarityoperators/cover/HullCenterRepresentation.java deleted file mode 100644 index efc4b63..0000000 --- a/src/cz/muni/fi/disa/similarityoperators/cover/HullCenterRepresentation.java +++ /dev/null @@ -1,45 +0,0 @@ -/* - * To change this license header, choose License Headers in Project Properties. - * To change this template file, choose Tools | Templates - * and open the template in the editor. - */ -package cz.muni.fi.disa.similarityoperators.cover; - -import messif.objects.LocalAbstractObject; - -import java.util.List; - -/** - * - * @author Vlastislav Dohnal, Masaryk University, Brno, Czech Republic, dohnal@fi.muni.cz - */ -public class HullCenterRepresentation extends HullRepresentation { - /** Center of the hull -- can be selected from covered objects or given from outside */ - private LocalAbstractObject center; - - public HullCenterRepresentation(List<LocalAbstractObject> data) { - super(data); - } - - public HullCenterRepresentation(PrecomputedDistances shared) { - super(shared); - } - - public LocalAbstractObject getCenter() { - return center; - } - - public void setCenter(LocalAbstractObject center) { - this.center = center; - } - - @Override - public CoverRank getExternalCoverRank(LocalAbstractObject o) { - CoverRank rank = super.getExternalCoverRank(o); - float d = o.getDistance(center); - System.err.println("HullRank: ID=" + getLocatorID() + ", hull_center=" + center.getLocatorURI() + ", obj=" + o.getLocatorURI() - + ", hullrank=" + rank.rank + ", centerrank=" + d + ",covered=" + ((rank.isCovered()) ? 1 : 0 )); - rank.rank = d; // Just update the rank here... - return rank; - } -} diff --git a/src/cz/muni/fi/disa/similarityoperators/cover/HullFastBorderRepresentation.java b/src/cz/muni/fi/disa/similarityoperators/cover/HullFastBorderRepresentation.java deleted file mode 100644 index 7b4b3d9..0000000 --- a/src/cz/muni/fi/disa/similarityoperators/cover/HullFastBorderRepresentation.java +++ /dev/null @@ -1,111 +0,0 @@ -/* - * To change this license header, choose License Headers in Project Properties. - * To change this template file, choose Tools | Templates - * and open the template in the editor. - */ -package cz.muni.fi.disa.similarityoperators.cover; - -import messif.objects.LocalAbstractObject; - -import java.util.*; - -/** - * BORDER method for hull vertices (points) -- BOundaRy points DEtectoR - * - * It employs the special property of the reverse k nearest neighbor (RkNN). - * The boundary points whose RkNN number is smaller than a user predefined threshold can be output incrementally. - * - * Here, we select boundary objects in the order of increasing RkNN count but only up to a predefined number of objects. - * The best hull in terms of coverage is output. - * - * Paper: Xia, C., Hsu, W., Lee, M. L., & Ooi, B. C. (2006). - * BORDER: Efficient computation of boundary points. - * IEEE Transactions on Knowledge and Data Engineering, 18(3), 289–303. https://doi.org/10.1109/TKDE.2006.38 - * - * @author Vlastislav Dohnal, Masaryk University, Brno, Czech Republic, dohnal@fi.muni.cz - */ -public class HullFastBorderRepresentation extends AbstractHullRepresentation<HullFastBorderRepresentation> { - /** Ration of kThreshold / K used by the authors. In particular, it was 35 / 50. */ - private static final float K_TO_THRESHOLD_RATIO = 0.70f; - - public static final int MAX_HULL_SIZE = 150; - - /** First values is object index; second value is rnnCount */ - private int[][] rnnCounts; - - public HullFastBorderRepresentation(List<LocalAbstractObject> data) { - super(data); - } - - public HullFastBorderRepresentation(PrecomputedDistances shared) { - super(shared); - } - - @Override - protected void select() { - // Take 1/4 of data at maximum! - int k = (shared.getObjectCount() >= 200) ? 50 : shared.getObjectCount() / 4; - int kThreshold = (int)(K_TO_THRESHOLD_RATIO * k); // was: 35; - - initReverseNNCounts(k, kThreshold); - // Take the first 3 - hull.add(shared.getObject(rnnCounts[0][0])); - hull.add(shared.getObject(rnnCounts[1][0])); - hull.add(shared.getObject(rnnCounts[2][0])); - - setCoveredByHull(); - float bestScore = getCoverageScore(); - int bestIndex = 2; - - for (int i = 3; !isAllCovered() && i < rnnCounts.length && rnnCounts[i][1] < kThreshold && i < MAX_HULL_SIZE; i++) { - LocalAbstractObject o = shared.getObject(i); - hull.add(o); - setCoveredByHull(); - float score = getCoverageScore(); - if (score >= bestScore) { - bestScore = score; - bestIndex = i; - } - } - // Remove all hull objects until we get the best configuration. - while ((hull.size() - 1) > bestIndex) - hull.remove(hull.size() - 1); - - setCoveredByHull(); - System.err.println("Border hull: " + hull.size() + " objects"); - System.err.println("Coverage: " + (int)Math.round(getCoverage() * 100f) + "%"); - System.err.println("Cov ratio: " + getCoverageScore()); - } - - /** Calculate counts of being kNN for the passed "k" */ - private void initReverseNNCounts(int k, int kThreshold) { - long start = System.nanoTime(); - rnnCounts = new int[shared.getObjectCount()][2]; - int[] allData = new int[shared.getObjectCount()]; - for (int i = 0; i < shared.getObjectCount(); i++) { - allData[i] = i; - rnnCounts[i] = new int[] {i, 0}; - } - for (int i = 0; i < shared.getObjectCount(); i++) { - int[] knns = getKNNIndexes(i, k, allData); - for (int idx : knns) - rnnCounts[idx][1]++; - } - Collections.shuffle(Arrays.asList(hull)); // Try to have different ordering of objects of the same RkNN counts. - Arrays.sort(rnnCounts, (int[] o1, int[] o2) -> { - return Integer.compare(o1[1], o2[1]); - }); - System.err.println("RnnCounts took: " + (System.nanoTime() - start) / 10000000f + " msec"); - // Get histogram of RkNN counts - Map<Integer,Integer> hist = new TreeMap<>(); - int objsUpToThreshold = 0; - for (int[] cnt : rnnCounts) { - hist.put(cnt[1], hist.getOrDefault(cnt[1], 0) + 1); - if (cnt[1] < kThreshold) - objsUpToThreshold++; - } - System.err.println("RkNN counts histogram: " + hist); - System.err.println("Objects up to kThreshold " + kThreshold + ": " + objsUpToThreshold); - } - -} diff --git a/src/cz/muni/fi/disa/similarityoperators/cover/HullIncrementalRepresentation.java b/src/cz/muni/fi/disa/similarityoperators/cover/HullIncrementalRepresentation.java deleted file mode 100644 index df219f0..0000000 --- a/src/cz/muni/fi/disa/similarityoperators/cover/HullIncrementalRepresentation.java +++ /dev/null @@ -1,429 +0,0 @@ -package cz.muni.fi.disa.similarityoperators.cover; - -import cz.muni.fi.disa.similarityoperators.utils.MergingType; -import messif.objects.LocalAbstractObject; - -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.OutputStream; -import java.util.*; - -/** - * Incrementally build a hull over the passed dataset. - * - * Initial 3-object hulls are created in the order of closest objects (first,second) to which the third object (as 1NN(first) or 1NN(second)) is added. - * Iteratively the initial hulls are merged -- the closest hulls first. The closest hulls HS to a hull H is obtained on hull objects, i.e. - * the closest objects win (i.e. 1NN join(H,HS) is executed). - * The iterations are "generation"-based -- all hulls are merged first, then such hulls are taken to the next iteration. The reduction in the number of hulls is 2. - * - * @author Miriama Janosova, Masaryk University, Brno, Czech Republic, x424615@fi.muni.cz - */ -public class HullIncrementalRepresentation extends HullRepresentation { - - private List<HullRepresentation> hullArray; - /** Pair-wise distances among hull objects */ - protected float[][] hullMinDistances; - protected MergingType mergingType; - - public HullIncrementalRepresentation(List<LocalAbstractObject> data, MergingType type) { - super(data); - this.hullArray = new ArrayList<>(); - this.mergingType = type; - } - - public HullIncrementalRepresentation(PrecomputedDistances shared, MergingType type) { - super(shared); - this.hullArray = new ArrayList<>(); - this.mergingType = type; - } - - public HullIncrementalRepresentation(PrecomputedDistances shared) { - super(shared); - this.hullArray = new ArrayList<>(); - this.mergingType = MergingType.NONE; - } - - public HullIncrementalRepresentation(List<LocalAbstractObject> data) { - super(data); - this.hullArray = new ArrayList<>(); - this.mergingType = MergingType.NONE; - } - - @Override - protected boolean initHullPoints() { - if (shared.data.size() <= 3) { - hull.addAll(shared.data); - return false; - } - BitSet notProcessedIndexes = new BitSet(this.shared.getObjectCount()); - notProcessedIndexes.set(0, this.shared.getObjectCount()); //sets bits to 1 - MinDistObjectPairIterator iterator = new MinDistObjectPairIterator(shared.getDistances(), notProcessedIndexes); // updates notProcessedIndexes internally - - while (iterator.hasNext()) { - if (notProcessedIndexes.cardinality() < 3) { - for (int i = notProcessedIndexes.nextSetBit(0); i != -1; - i = notProcessedIndexes.nextSetBit(i + 1)) { // find spare objects and merge them with closest hull - addObjectToClosestHull(i); // i is index - } - return true; // finish - } - MinDistObjectPair minDistsObject = iterator.next(); - int third = findClosestThirdIdx(notProcessedIndexes, minDistsObject.firstIdx, minDistsObject.secondIdx); - notProcessedIndexes.clear(third); - - List<LocalAbstractObject> hullPoints = new ArrayList<>(Arrays.asList(shared.getObject(minDistsObject.firstIdx), // vytvorim hull nad najdenymi bodmi - shared.getObject(minDistsObject.secondIdx), shared.getObject(third))); - HullRepresentation newHull = new HullRepresentation(hullPoints); //create new hull - newHull.hull.addAll(hullPoints); // set hull points - hullArray.add(newHull); - } - return true; - } - - /** Identify an object that is the closest to the passed objects (all by indexes) for creating initial hulls. - * - * It pick the object that is either 1NN of the first passed object or 1NN of the second passed objects - * depending on which 1NN is closer. - * - * @param notProcessed list of indexes of unused objects - * @param firstIdx index of the first object - * @param secondIdx index of the second object - * @return index of an object closest to the passed ones. - */ - protected int findClosestThirdIdx(BitSet notProcessed, int firstIdx, int secondIdx){ - int[] firstCandidate = getKNNIndexes(firstIdx, 1, notProcessed); - int[] secondCandidate = getKNNIndexes(secondIdx, 1, notProcessed); - int firstCandIdx = firstCandidate[0]; - int secondCandIdx = secondCandidate[0]; - if (this.shared.getDistance(firstIdx, firstCandIdx) <= - this.shared.getDistance(secondIdx, secondCandIdx)){ - return firstCandIdx; - } - return secondCandIdx; - } - - /** Find the closest hull to the passed object (by its index) and add the object to the hull. - * - * @param index index of object to find the closest hull for - */ - protected void addObjectToClosestHull(int index) { - int[] closesIdx = getKNNIndexes(index, 1, getAllCurrentHullIndexes()); // closest hull object among all hull objects of all hulls. - LocalAbstractObject object = shared.getObject(index); - - HullRepresentation closestHull = hullArray.get(findCorrespondingHullIdx(shared.getObject(closesIdx[0]))); - - closestHull.shared.data.add(object); // add just to data - if (isCoveredByHull(index, closestHull.getRepresentativesCount(), shared.getIndexes(closestHull.hull))) { - return; // don't enlarge the hull - } - closestHull.hull.add(object); //otherwise add to hull points - } - - /** Get index of a hull object in the hulls */ - protected int findCorrespondingHullIdx(LocalAbstractObject object) { - return findCorrespondingHullIdx(object, hullArray); - } - - /** Get index of a hull object in the passed hulls */ - private int findCorrespondingHullIdx(LocalAbstractObject object, List<HullRepresentation> hulls){ - for (int i = 0; i < hulls.size(); i++) { - if (hulls.get(i).hull.contains(object)) - return i; - } - return -1; // a hull has to be closest, does not make sense otherwise - } - - protected List<LocalAbstractObject> getAllCurrentHullObjects() { - return getAllHullObjects(this.hullArray); - } - - private List<LocalAbstractObject> getAllHullObjects(List<HullRepresentation> hulls) { - List<LocalAbstractObject> hullList = new ArrayList<>(); - for (HullRepresentation h : hulls) { - hullList.addAll(h.hull); - } - return hullList; - } - - protected int[] getAllCurrentHullIndexes() { - return shared.getIndexes(getAllCurrentHullObjects()); - } - - private float[][] precomputeHullDistances(List<HullRepresentation> hullArray){ - float[][] minDistances = new float[hullArray.size()][hullArray.size()]; - for (int i = 0; i < hullArray.size(); i++) { // pre vsetky hulls - for (int j = i + 1; j < hullArray.size(); j++) { // prejdi zvysne - float minDist = Float.MAX_VALUE;// udrzi min vzdialenost medzi vsetkymi komb hullPointov - for (LocalAbstractObject firstHP : hullArray.get(i).hull){ // prejdi vsetky hull points prveho a najdi najmensiu vzial k hociakemu druhemu - for (LocalAbstractObject secondHP : hullArray.get(j).hull){ - if (shared.getDistance(firstHP, secondHP) < minDist){ - minDist = shared.getDistance(firstHP, secondHP); - } - } - } - if (minDist == 0f) - throw new RuntimeException("Zero distance between " + hullArray.get(i).toString() + - " and " + hullArray.get(j).toString()); - minDistances[i][j] = minDist; - minDistances[j][i] = minDist; - } - } - return minDistances; - } - - protected void precomputeHullDistances(){ - hullMinDistances = precomputeHullDistances(hullArray); - } - - @Override - public void select() { - if (!initHullPoints()) - return; - System.out.println("First generation \n"); - for (int i = 0; i < hullArray.size(); i++) { - try { - OutputStream out = new FileOutputStream("./hullIncTest/pripad 3.1/generations/gen1/h" + i + ".txt", false); - hullArray.get(i).write(out); - } catch (IOException e) { - e.printStackTrace(); - } - } - - int generationNumber = 2; - while (hullArray.size() != 1) { -// System.out.println("NEW GENERATION of hulls" + generationNumber + "\n"); -// String path = "./hullIncTest/pripad 3.1/generations/gen" + generationNumber + "/"; - precomputeHullDistances(); - BitSet notProcessedIndexes = new BitSet(hullMinDistances.length); - notProcessedIndexes.set(0, hullMinDistances.length); //sets bits to 1 - List<HullRepresentation> newHullArray = new ArrayList<>(); - MinDistObjectPairIterator iterator = new MinDistObjectPairIterator(hullMinDistances, notProcessedIndexes); - int hullNum = 0; - - while (iterator.hasNext()) { - if (notProcessedIndexes.cardinality() == 1) { // there is either one spare hull or none (even or odd number of them) - int index = notProcessedIndexes.nextSetBit(0); - int closestIdx = getClosetsHullIdx(index, newHullArray);// returns closest hull within newHullArray - mergeHullsByIndexes(index, closestIdx, newHullArray); - newHullArray.remove(closestIdx); // uprav newHullArray, aby obsahovalo vyhradne newHull - break; - } - MinDistObjectPair minDistsObject = iterator.next(); - mergeHullsByIndexes(minDistsObject.firstIdx, minDistsObject.secondIdx, newHullArray); - hullNum ++; - } - hullArray = newHullArray; - generationNumber ++; - } - this.hull = hullArray.get(0).hull; - setCoveredByHull(); - } - - /** get closest hull within hulls */ - private int getClosetsHullIdx(int hullIndex, List<HullRepresentation> hulls){ - List<LocalAbstractObject> allHulls = getAllHullObjects(hulls); - LocalAbstractObject closestPoint = null; - float minDist = Float.MAX_VALUE; - HullRepresentation bbb = hullArray.get(hullIndex); - List<LocalAbstractObject> uf = bbb.hull; - for (LocalAbstractObject hullPoint : uf ){ - List<LocalAbstractObject> knn = getKNNObjects(hullPoint, 1, allHulls); - if (this.shared.getDistance(knn.get(0), hullPoint) < minDist) { - closestPoint = knn.get(0); - minDist = shared.getDistance(closestPoint, hullPoint); - } - } - return findCorrespondingHullIdx(closestPoint, hulls); - } - - private List<LocalAbstractObject> getPointsToComputeCoverage(List<HullRepresentation> hulls, - LocalAbstractObject excludedPoint, - HashSet<LocalAbstractObject> covered) { - List<LocalAbstractObject> hullPoints = new ArrayList<>(); - hulls.forEach(hull -> hullPoints.addAll(hull.hull)); - - hullPoints.remove(excludedPoint); - hullPoints.removeAll(covered); - - return hullPoints; - } - - private List<LocalAbstractObject> getPointsToComputeCoverage(HullRepresentation first, - HullRepresentation second, - LocalAbstractObject notToIncludePoint, - HashSet<LocalAbstractObject> covered){ - List<LocalAbstractObject> hullPoints = new ArrayList<>(first.hull); - hullPoints.addAll(second.hull); - hullPoints.remove(notToIncludePoint); - hullPoints.removeAll(covered); - return hullPoints; - } - - @Override - public boolean isCoveredByHull(int objIdx, int knHullPoints, int[] hullIdxs) { - int[] knns = getKNNIndexes(objIdx, knHullPoints, hullIdxs); - return isCoveredByDistSum(objIdx, hullIdxs[knns[0]], hullIdxs); - } - - private void mergeHullsByIndexes(int firstIdx, int secondIdx, List<HullRepresentation> newHullArray){ - newHullArray.add(mergeHulls(hullArray.get(firstIdx), hullArray.get(secondIdx))); - } - - protected HullRepresentation mergeHulls(List<HullRepresentation> hulls) { - if (hulls.size() == 0) return null; - if (hulls.size() == 1) return hulls.get(0); - - List<LocalAbstractObject> points = new ArrayList<>(); - - // Minimal number of points in a hull is 3 - int maxCoveredPoints = -3; - int maxIndex = hulls.get(0).hull.size(); - - for (HullRepresentation hull : hulls) { - points.addAll(hull.shared.getObjects()); - maxCoveredPoints += hull.hull.size(); - maxIndex = Math.max(maxIndex, hull.hull.size()); - } - - HashSet<LocalAbstractObject> coveredObjects = new HashSet<>(); - HullRepresentation newHull = new HullRepresentation(points); - - for (int i = 0; i < maxIndex; i++) - for (HullRepresentation hull : hulls) - if (i < hull.hull.size()) - checkCoveredObjects(hull, hulls, hull.hull.get(i), coveredObjects, maxCoveredPoints, newHull); - - hulls.forEach(hull -> addNotCoveredHullPoints(hull, newHull, coveredObjects)); - - return newHull; - } - - protected HullRepresentation mergeHulls(HullRepresentation first, HullRepresentation second){ - // create new hull - List<LocalAbstractObject> data = new ArrayList<>(first.shared.getObjects()); - data.addAll(second.shared.getObjects()); - HullRepresentation newHull = new HullRepresentation(data); - - // prejdi vsetky body prveho a druheho a mergeni to dokopy - HashSet<LocalAbstractObject> coveredObjects = new HashSet<>(); - sortHullPointsBasedOnMinDistanceToOther(first, second); - sortHullPointsBasedOnMinDistanceToOther(second, first); -// printPoints(first, second, path, hullNum, idx); - int maxCoveredPointsNumber = first.hull.size() + second.hull.size() - 3; //minimalne chceme ponechat 3 hullPoints - int maxIVal = (first.hull.size() > second.hull.size()) ? first.hull.size() : second.hull.size(); // max index to check - - for (int i = 0; i < maxIVal; i++) {// check iteratively both hulls whether their points are covered - if (first.hull.size() > i){ - checkCoveredObj(first, second, first.hull.get(i), coveredObjects, maxCoveredPointsNumber, newHull); - } - if (second.hull.size() > i){ - checkCoveredObj(first, second, second.hull.get(i), coveredObjects, maxCoveredPointsNumber, newHull); - } - } - addNotCoveredHullPoints(first, newHull, coveredObjects); - addNotCoveredHullPoints(second, newHull, coveredObjects); - return newHull; - } - - private void sortHullPointsBasedOnMinDistanceToOther(HullRepresentation first, HullRepresentation second){ - List<LocalAbstractObject> sorted = new ArrayList<>(); - int[] hullIdxs = new int[first.hull.size()]; - float[] minDists = new float[first.getRepresentativesCount()]; - for (int i = 0; i < first.hull.size(); i++) { - minDists[i] = this.shared.minDist(this.shared.getIndex(first.hull.get(i)), - this.shared.getIndexes(second.hull)); // vzdialenost pocitam nad vsetkymi precomputed distances, - hullIdxs[i] = i; // creating hullIndexes - } - Ranked[] ranked = getRankedIdxs(hullIdxs, minDists); // chcem zoradit firstHullIndexes - - for (int i = 0; i < first.getRepresentativesCount(); i++) - sorted.add(first.hull.get(ranked[i].index)); - first.hull = sorted; - } - - /** TODO: Vlasta 2020/06/23 -- should not we try to replace an existing hull object in newHull instead of "blindly" adding the hull object? */ - private void addNotCoveredHullPoints(HullRepresentation hull, HullRepresentation newHull, - HashSet<LocalAbstractObject> coveredObjects){ - for (LocalAbstractObject hullPoint : hull.hull) { - if (coveredObjects.contains(hullPoint)) - continue; - newHull.hull.add(hullPoint); - } - } - - private void checkCoveredObjects(HullRepresentation firstHull, - List<HullRepresentation> hulls, - LocalAbstractObject excludedPoint, - HashSet<LocalAbstractObject> coveredPoints, - int maxCoveredPoints, - HullRepresentation newHull) { - List<HullRepresentation> everyHull = new ArrayList<>(); - everyHull.add(firstHull); - everyHull.addAll(hulls); - - List<LocalAbstractObject> pointsForCoverageCheck = getPointsToComputeCoverage(everyHull, excludedPoint, coveredPoints); - if (isCoveredByHull(shared.getIndex(excludedPoint), pointsForCoverageCheck.size(), this.shared.getIndexes(pointsForCoverageCheck)) && coveredPoints.size() < maxCoveredPoints) { - if (this.mergingType == MergingType.BASIC) { - checkCoverageWithOrWithoutHullPoint(newHull, excludedPoint, coveredPoints, pointsForCoverageCheck); - return; - } - - coveredPoints.add(excludedPoint); - } - } - - private void checkCoveredObj(HullRepresentation first, HullRepresentation second, LocalAbstractObject hullPoint, - HashSet<LocalAbstractObject> coveredObjects, int maxCoveredPointsNumber, HullRepresentation newHull){ - List<LocalAbstractObject> pointsForCoverageCheck = - getPointsToComputeCoverage(first, second, hullPoint, coveredObjects); - if (isCoveredByHull(shared.getIndex(hullPoint), pointsForCoverageCheck.size(), - this.shared.getIndexes(pointsForCoverageCheck)) && - coveredObjects.size() < maxCoveredPointsNumber ){ - if (this.mergingType == MergingType.BASIC - // && hullArray.size() <= Math.sqrt(this.shared.getObjectCount()) - ) { - checkCoverageWithOrWithoutHullPoint(newHull, hullPoint, coveredObjects, pointsForCoverageCheck); - return; - } - coveredObjects.add(hullPoint); // just simply add the hull points to covered - } - } - - private void checkCoverageWithOrWithoutHullPoint(HullRepresentation newHull, LocalAbstractObject hullPoint, - HashSet<LocalAbstractObject> coveredObjects, - List<LocalAbstractObject> pointsForCoverageCheck) { - // check whether the coverage is better when we consider the hullPoint to be "left" between the hull points or on, - newHull.hull = pointsForCoverageCheck; // ptsFCCh contains only so far valid candidates for remaining hull pts - newHull.setCoveredByHull(); - float coverageWithoutHP = newHull.getCoverage(); - newHull.hull.add(hullPoint); // try to add new point - newHull.setCoveredByHull(); - float coverageWithHP = newHull.getCoverage(); - newHull.hull = new ArrayList<>(); //reset - if (coverageWithoutHP >= coverageWithHP) { - coveredObjects.add(hullPoint); - } // else ignore - - } - - private void printPoints(HullRepresentation first, HullRepresentation second, String path, int i, int idx) { - try { - System.out.println("hulls"); - first.write(System.out); - second.write(System.out); - OutputStream dataS; - if(idx != -1){ - // rewrite the data ... if we are adding the third hull - dataS = new FileOutputStream(path + "d" + idx + ".txt", false); - } - else { - dataS = new FileOutputStream(path + "d" + i + ".txt", false); - } - first.writeData(dataS); - second.writeData(dataS); - - } catch (IOException e) { - e.printStackTrace(); - } - } -} \ No newline at end of file diff --git a/src/cz/muni/fi/disa/similarityoperators/cover/HullIncrementalRepresentationV2.java b/src/cz/muni/fi/disa/similarityoperators/cover/HullIncrementalRepresentationV2.java deleted file mode 100644 index 2b9afe1..0000000 --- a/src/cz/muni/fi/disa/similarityoperators/cover/HullIncrementalRepresentationV2.java +++ /dev/null @@ -1,400 +0,0 @@ -package cz.muni.fi.disa.similarityoperators.cover; - -import messif.objects.LocalAbstractObject; -import mhtree.HullNode; -import mhtree.HullTree; - -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.OutputStream; -import java.util.*; - -/** TODO: This looks like it should not inherit from HullIncrementalRepresentation (almost all is overwritten) or - * it must be refactored (creates its own hulls array?). - * - * Incrementally build a hull over the passed dataset -- version 2. - * - * Initial 3-object hulls are created from farthest object and their two neighbors. Their 3rd "neighbor" is selected as having min sum to 1st and 2nd. - * Iteratively the initial hulls are merged -- the closest hulls first. The closest hulls HS to a hull H is obtained on hull objects, i.e. - * the closest objects win (i.e. 1NN join(H,HS) is executed). - * The iterations are "generation"-based -- all hulls are merged first, then such hulls are taken to the next iteration. The reduction in the number of hulls is 2. - * - * @author Miriama Janosova, Masaryk University, Brno, Czech Republic, x424615@fi.muni.cz - */ -public class HullIncrementalRepresentationV2 extends HullIncrementalRepresentation { - - // MH-Tree - private HullTree mhtree; - private final HullNode[] nodes; - private int hullsMergedIntoGroupsOf = 3; - - private int initialHullsSize = 3; - - private HullRepresentation[] hulls; - private BitSet validHullIdxs; - private boolean joinWithNotProcessedClosest; - - public HullIncrementalRepresentationV2(List<LocalAbstractObject> data, boolean joinWithNotProcessedClosest) { - super(data); - - // MH-Tree - nodes = new HullNode[shared.data.size() / initialHullsSize]; - - hulls = new HullRepresentation[data.size() / initialHullsSize]; - validHullIdxs = new BitSet(hulls.length); - validHullIdxs.set(0, hulls.length); // each index is valid, - hullMinDistances = new float[hulls.length][hulls.length]; - this.joinWithNotProcessedClosest = joinWithNotProcessedClosest; - } - - public HullIncrementalRepresentationV2(List<LocalAbstractObject> data, boolean joinWithNotProcessedClosest, int initialHullsSize) { - super(data); - - this.initialHullsSize = Math.max(initialHullsSize, 3); - - // MH-Tree - nodes = new HullNode[shared.data.size() / this.initialHullsSize]; - - hulls = new HullRepresentation[data.size() / this.initialHullsSize]; - validHullIdxs = new BitSet(hulls.length); - validHullIdxs.set(0, hulls.length); // each index is valid, - hullMinDistances = new float[hulls.length][hulls.length]; - this.joinWithNotProcessedClosest = joinWithNotProcessedClosest; - } - - public HullIncrementalRepresentationV2(PrecomputedDistances shared, boolean joinWithNotProcessedClosest) { - super(shared); - - // MH-Tree - nodes = new HullNode[shared.data.size() / initialHullsSize]; - - hulls = new HullRepresentation[shared.data.size() / initialHullsSize]; - validHullIdxs = new BitSet(hulls.length); - validHullIdxs.set(0, hulls.length); // each index is valid, - hullMinDistances = new float[hulls.length][hulls.length]; - this.joinWithNotProcessedClosest = joinWithNotProcessedClosest; - } - - public HullIncrementalRepresentationV2(List<LocalAbstractObject> data) { - this(data, true); - } - - public HullIncrementalRepresentationV2(PrecomputedDistances shared) { - this(shared, true); - } - - @Override - protected boolean initHullPoints() { - if (shared.data.size() < initialHullsSize) { - hull.addAll(shared.data); - - // MH-Tree - HullRepresentation newHull = new HullRepresentation(hull); - newHull.hull.addAll(hull); - mhtree = new HullTree(new HullNode(newHull)); - - return false; - } - - BitSet notProcessedIndices = new BitSet(shared.getObjectCount()); - notProcessedIndices.set(0, shared.getObjectCount()); - int hullIndex = 0; - - while (!notProcessedIndices.isEmpty()) { - if (notProcessedIndices.cardinality() < initialHullsSize) { - for (int i = notProcessedIndices.nextSetBit(0); i >= 0; i = notProcessedIndices.nextSetBit(i + 1)) - addObjectToClosestHull(i); - - return true; - } - - List<LocalAbstractObject> hullPoints = new ArrayList<>(); - - // Select a base point - int furthestIndex = getFurthestIdx(notProcessedIndices); - notProcessedIndices.clear(furthestIndex); - hullPoints.add(shared.getObject(furthestIndex)); - - // Select the rest of the `initialHullsSize` number of points - findClosestIndices(furthestIndex, initialHullsSize - 1, notProcessedIndices) - .forEach(i -> hullPoints.add(shared.getObject(i))); - - HullRepresentation newHull = new HullRepresentation(hullPoints); - newHull.hull.addAll(hullPoints); - - hulls[hullIndex] = newHull; - nodes[hullIndex] = new HullNode(newHull); - - hullIndex++; - } - - return true; - } - - private List<Integer> findClosestIndices(int basePointIndex, int numberOfIndices, BitSet notProcessedIndices) { - List<Integer> closestIndices = new ArrayList<>(); - List<Integer> hullPointIndices = new ArrayList<>(); - - hullPointIndices.add(basePointIndex); - - for (int i = 0; i < numberOfIndices; i++) { - List<Integer> candidateIndices = new ArrayList<>(); - List<Float> candidateDistances = new ArrayList<>(); - - for (int index : hullPointIndices) { - int nnIndex = getKNNIndexes(index, 1, notProcessedIndices)[0]; - - float distanceSum = hullPointIndices.stream() - .map(pointIndex -> shared.getDistance(pointIndex, nnIndex)) - .reduce(0f, Float::sum); - - candidateIndices.add(nnIndex); - candidateDistances.add(distanceSum); - } - - int smallestDistanceIndex = candidateDistances.indexOf(Collections.min(candidateDistances)); - int closestPointIndex = candidateIndices.get(smallestDistanceIndex); - - notProcessedIndices.clear(closestPointIndex); - - hullPointIndices.add(closestPointIndex); - closestIndices.add(closestPointIndex); - } - - return closestIndices; - } - - /** TODO: This is the same as in HullIncrementalRepresentation but it operates on this.hulls and not on super.hullArray REFACTOR?!? */ - protected void addObjectToClosestHull(int index) { - int[] closesIdx = getKNNIndexes(index, 1, getAllCurrentHullIndexes()); - // returns closest index within the data - LocalAbstractObject object = shared.getObject(index); - int closestIndex = getAllCurrentHullIndexes()[closesIdx[0]]; - HullRepresentation closestHull = hulls[findCorrespondingHullIdx(shared.getObject(closestIndex))]; - closestHull.shared.data.add(object); // add just to data - if (isCoveredByHull(index, closestHull.getRepresentativesCount(), shared.getIndexes(closestHull.hull))) { - return; // don't add to hulls - } - closestHull.hull.add(object); //otherwise add to hull points - } - - @Override - protected int findCorrespondingHullIdx(LocalAbstractObject object) { - for (int i = 0; i < hulls.length; i++) { - if (hulls[i].hull.contains(object)) - return i; - } - return -1; // some hull has to be closest, does not make sense otherwise - } - - @Override - protected List<LocalAbstractObject> getAllCurrentHullObjects() { - List<LocalAbstractObject> hullList = new ArrayList<>(); - for (HullRepresentation h : hulls) { - hullList.addAll(h.hull); - } - return hullList; - } - - /** TODO: Refactor? It is the same as private float[][] precomputeHullDistances in super */ - @Override - protected void precomputeHullDistances(){ - for (int i = 0; i < hulls.length; i++) { // pre vsetky hullsIdxs - for (int j = i + 1; j < hulls.length; j++) { // prejdi zvysne - float minDist = Float.MAX_VALUE;// udrzi min vzdialenost medzi vsetkymi komb hullPointov - for (LocalAbstractObject firstPoint : hulls[i].shared.data){ // prejdi vsetky hull points prveho a najdi najmensiu vzial k hociakemu druhemu - for (LocalAbstractObject secondPoint : hulls[j].shared.data){ - if (shared.getDistance(firstPoint, secondPoint) < minDist){ - minDist = shared.getDistance(firstPoint, secondPoint); - } - } - } - if (minDist == 0f) - throw new RuntimeException("Zero distance between " + hulls[i].toString() + - " and " + hulls[j].toString()); - hullMinDistances[i][j] = minDist; - hullMinDistances[j][i] = minDist; - } - } - } - - @Override - public void select() { - if (!initHullPoints()) return; - - precomputeHullDistances(); - - while (validHullIdxs.cardinality() != 1) { - BitSet notProcessedIndices = (BitSet) validHullIdxs.clone(); - - while (!notProcessedIndices.isEmpty()) { - if (notProcessedIndices.cardinality() < hullsMergedIntoGroupsOf) { - List<Integer> restOfTheIndices = new ArrayList<>(); - - for (int j = notProcessedIndices.nextSetBit(0); j >= 0; j = notProcessedIndices.nextSetBit(j + 1)) - restOfTheIndices.add(j); - - int baseIndex = restOfTheIndices.remove(0); - mergeHulls(baseIndex, restOfTheIndices); - - break; - } - - int furthestHullIndex = getFurthestIdx(hullMinDistances, notProcessedIndices); - notProcessedIndices.clear(furthestHullIndex); - - List<Integer> nnIndices = new ArrayList<>(); - - for (int i = 0; i < hullsMergedIntoGroupsOf - 1; i++) { - int index = shared.minDistInArrayExceptIdx(hullMinDistances[furthestHullIndex], joinWithNotProcessedClosest ? notProcessedIndices : validHullIdxs, furthestHullIndex); - notProcessedIndices.clear(index); - nnIndices.add(index); - } - - mergeHulls(furthestHullIndex, nnIndices); - } - } - - int rootIndex = validHullIdxs.nextSetBit(0); - - mhtree = new HullTree(nodes[rootIndex]); - this.hull = hulls[rootIndex].hull; - - setCoveredByHull(); - } - - /** get closest hull within hulls */ - private int getClosetsHullIdx(int hullIndex){ - return shared.minDistInArrayExceptIdx(hullMinDistances[hullIndex], validHullIdxs, hullIndex); - } - - private void mergeHulls(int baseHullIndex, List<Integer> indices) { - if (indices.size() == 0) return; - - // Create list of all the hulls - List<HullRepresentation> hulls = new ArrayList<>(); - hulls.add(this.hulls[baseHullIndex]); - indices.forEach(i -> hulls.add(this.hulls[i])); - - // Create a new node - HullRepresentation newHull = mergeHulls(hulls); - HullNode newNode = new HullNode(newHull); - - // Set parent for every corresponding input node - nodes[baseHullIndex].setParent(newNode); - indices.forEach(i -> nodes[i].setParent(newNode)); - - // Set children for the new node - newNode.addChildren(nodes[baseHullIndex]); - newNode.addChildren(indices.stream().map(i -> nodes[i])); - - // Cleanup the nodes array and place the new node onto the first index - nodes[baseHullIndex] = newNode; - indices.forEach(i -> nodes[i] = null); - - // Cleanup the merged hulls - this.hulls[baseHullIndex] = newHull; - indices.forEach(i -> { - validHullIdxs.clear(i); - this.hulls[i] = null; - }); - - updateHullDistances(baseHullIndex, indices); - } - - private void mergeHullsByIndexes(int firstIdx, int secondIdx){ - // create new hull - HullRepresentation first = hulls[firstIdx]; - HullRepresentation second = hulls[secondIdx]; - HullRepresentation newHull = mergeHulls(first, second); - - // MH-Tree - HullNode parent = new HullNode(newHull); - - nodes[firstIdx].setParent(parent); - nodes[secondIdx].setParent(parent); - - parent.addChildren(nodes[firstIdx], nodes[secondIdx]); - - nodes[firstIdx] = parent; - nodes[secondIdx] = null; - - - hulls[firstIdx] = newHull; - validHullIdxs.clear(secondIdx); //flip second hull index, first hull is updated - hulls[secondIdx] = null; - updateHullDists(firstIdx, secondIdx); - } - - private void updateHullDistances(int baseHullIndex, List<Integer> hullIndices) { - if (hullIndices.size() == 0) return; - - for (int i = validHullIdxs.nextSetBit(0); i >= 0; i = validHullIdxs.nextSetBit(i + 1)) { - float minDistance = hullMinDistances[baseHullIndex][i]; - - for (int index : hullIndices) - minDistance = Math.min(minDistance, hullMinDistances[index][i]); - - hullMinDistances[baseHullIndex][i] = minDistance; - hullMinDistances[i][baseHullIndex] = minDistance; - } - } - - private void updateHullDists(int firstIdx, int secondIdx) { - for (int i = validHullIdxs.nextSetBit(0); i >= 0; i = validHullIdxs.nextSetBit(i + 1)) { - float minDist = (hullMinDistances[firstIdx][i] < hullMinDistances[secondIdx][i]) - ? hullMinDistances[firstIdx][i] : hullMinDistances[secondIdx][i]; - hullMinDistances[firstIdx][i] = minDist; - hullMinDistances[i][firstIdx] = minDist; - } - } - - private List<LocalAbstractObject> getPointsToComputeCoverage(HullRepresentation first, - HullRepresentation second, - LocalAbstractObject notToIncludePoint, - HashSet<LocalAbstractObject> covered){ - List<LocalAbstractObject> hullPoints = new ArrayList<>(first.hull); - hullPoints.addAll(second.hull); - hullPoints.remove(notToIncludePoint); - hullPoints.removeAll(covered); - return hullPoints; - } - - private void printGeneration(String path) { - try { - int idx = 0; - for (int i = validHullIdxs.nextSetBit(0); i >= 0; - i = validHullIdxs.nextSetBit(i + 1)) { - OutputStream dataS = new FileOutputStream(path + "d" + idx + ".txt", false); - hulls[i].writeData(dataS); // write data - System.out.println("new hull\n"); - OutputStream out; - OutputStream ncOut; - - out = new FileOutputStream(path + "h" + idx + ".txt", false); - ncOut = new FileOutputStream(path + "nc" + idx + ".txt", false); - - hulls[i].write(out); // write hp - hulls[i].setCoveredByHull(); - hulls[i].printNotCovered(ncOut); // write not covered - idx++; - } - - } catch (IOException e) { - e.printStackTrace(); - } - } - - // MH-Tree - public HullTree getMHTree() { - return mhtree; - } - - public HullIncrementalRepresentationV2 mergeHullsIntoGroupsOfSize(int groupSize) { - if (mhtree == null) - this.hullsMergedIntoGroupsOf = groupSize; - - return this; - } -} - diff --git a/src/cz/muni/fi/disa/similarityoperators/cover/HullIncrementalRepresentationV3.java b/src/cz/muni/fi/disa/similarityoperators/cover/HullIncrementalRepresentationV3.java deleted file mode 100644 index 96fdd8b..0000000 --- a/src/cz/muni/fi/disa/similarityoperators/cover/HullIncrementalRepresentationV3.java +++ /dev/null @@ -1,27 +0,0 @@ -/* - * To change this license header, choose License Headers in Project Properties. - * To change this template file, choose Tools | Templates - * and open the template in the editor. - */ -package cz.muni.fi.disa.similarityoperators.cover; - -import messif.objects.LocalAbstractObject; - -import java.util.List; - -/** - * This is just a wrapper to enable "agglomerative" way of merging hulls. - * - * @author Vlastislav Dohnal, Masaryk University, Brno, Czech Republic, dohnal@fi.muni.cz - */ -public class HullIncrementalRepresentationV3 extends HullIncrementalRepresentationV2 { - - public HullIncrementalRepresentationV3(List<LocalAbstractObject> data) { - super(data, false); - } - - public HullIncrementalRepresentationV3(PrecomputedDistances shared) { - super(shared, false); - } - -} diff --git a/src/cz/muni/fi/disa/similarityoperators/cover/HullKNHPRepresentation.java b/src/cz/muni/fi/disa/similarityoperators/cover/HullKNHPRepresentation.java deleted file mode 100644 index ca8031e..0000000 --- a/src/cz/muni/fi/disa/similarityoperators/cover/HullKNHPRepresentation.java +++ /dev/null @@ -1,46 +0,0 @@ -package cz.muni.fi.disa.similarityoperators.cover; - -import messif.objects.LocalAbstractObject; - -import java.util.List; - -/** - * works above HullRepresentation with min hull points - * checks whether each data point is covered by kNearest Hull points, - * if not adds the point into hull. - */ -public class HullKNHPRepresentation extends HullMHPRepresentation{ - - private int kNearestHullPoints; - - public HullKNHPRepresentation(List<LocalAbstractObject> data, int minHullPoints, int kNearestHullPoints) { - super(data, minHullPoints); - this.kNearestHullPoints = kNearestHullPoints; - } - - public HullKNHPRepresentation(PrecomputedDistances shared, int minHullPoints, int kNearestHullPoints) { - super(shared, minHullPoints); - this.kNearestHullPoints = kNearestHullPoints; - } - - @Override - protected void setCoveredByHull() { - setCoveredByKNHullPoints(kNearestHullPoints); - } - - private void setCoveredByKNHullPoints(int knHullPoints) { - int[] hullIdxs = shared.getIndexes(hull); - setCoveredByHullShared(hullIdxs); - - for (int i = 0; i < shared.getObjectCount(); i++) { - if (isCovered(i)) // Skip hull objects that were set above - continue; - if (isCoveredByHull(i, knHullPoints, hullIdxs)) // checks for coverage by only kn hull points - setCovered(i); - } - } - - public int getkNearestHullPoints() { - return kNearestHullPoints; - } -} diff --git a/src/cz/muni/fi/disa/similarityoperators/cover/HullMHPRepresentation.java b/src/cz/muni/fi/disa/similarityoperators/cover/HullMHPRepresentation.java deleted file mode 100644 index e3ddfda..0000000 --- a/src/cz/muni/fi/disa/similarityoperators/cover/HullMHPRepresentation.java +++ /dev/null @@ -1,43 +0,0 @@ -package cz.muni.fi.disa.similarityoperators.cover; - -import messif.objects.LocalAbstractObject; - -import java.util.List; - -public class HullMHPRepresentation extends HullOptimizedRepresentationV2{ - - private int minHullPoints; - - public HullMHPRepresentation(List<LocalAbstractObject> shared, int minHullPoints){ - super(shared); - this.minHullPoints = minHullPoints; - } - - public HullMHPRepresentation(PrecomputedDistances shared, int minHullPoints) { - super(shared); - this.minHullPoints = minHullPoints; - } - - @Override - protected boolean initHullPoints(){ - // not enough data to proceed - if (shared.data.size() < minHullPoints){ - hull.addAll(shared.data); - return false; - } - hull.add(getFurthest()); //get first hull point - // add other hull points - for (int i = 1; i < minHullPoints; i++){ - hull.add(nextFurthest()); - } - - hull.forEach((ho) -> presenceInHull.put(ho, 1)); - - setCoveredByHull(); - return true; - } - - public int getMinHullPoints() { - return minHullPoints; - } -} diff --git a/src/cz/muni/fi/disa/similarityoperators/cover/HullOptimizedRepresentation.java b/src/cz/muni/fi/disa/similarityoperators/cover/HullOptimizedRepresentation.java deleted file mode 100644 index 32353d9..0000000 --- a/src/cz/muni/fi/disa/similarityoperators/cover/HullOptimizedRepresentation.java +++ /dev/null @@ -1,104 +0,0 @@ -/* - * To change this license header, choose License Headers in Project Properties. - * To change this template file, choose Tools | Templates - * and open the template in the editor. - */ -package cz.muni.fi.disa.similarityoperators.cover; - -import messif.objects.LocalAbstractObject; - -import java.util.ArrayList; -import java.util.List; - - -/** - * Version 2 of the proposal for metric hull objects. - * - * It starts with two mutually furthest objects and one additional selected as an object having - * the maximum sum of distance to the previous hull objects and is not close to any previous hull object. - * - * Until the data-set is covered (completely) repeat: - * - select a new hull object in the same way as the third point above. - * - compute coverage as if the next hull object was added. - * - try to replace any existing hull object with the new one (so without adding a new object to the hull): - * - compute coverage. If it is higher than the coverage of the hull extended with the new object, - * choose the maximum coverage improvement and replace that object with the new one. - * - if there have not been any improvement (replacement), add the new object to the hull. - * - * @author Vlastislav Dohnal, Masaryk University, Brno, Czech Republic, dohnal@fi.muni.cz - */ -public class HullOptimizedRepresentation extends HullRepresentation { - //private int bannedSize; - //private Set<AbstractObjectKey> bannedKeys; - - public HullOptimizedRepresentation(List<LocalAbstractObject> data) { - super(data); - //bannedSize = 0; - //bannedKeys = new HashSet<>(); - } - - public HullOptimizedRepresentation(PrecomputedDistances shared) { - super(shared); - //bannedSize = 0; - //bannedKeys = new HashSet<>(); - } - - @Override - public void select() { - if (!initHullPoints()){ - return; - } - while (!isAllCovered()) { - List<LocalAbstractObject> notCovered = getNotCovered(); - // Next candidate - hull.add(nextFurthest()); - setCoveredByHull(); - notCovered = getNotCovered(); - - System.out.print("# Selected next hull object: "); System.out.println(toString()); - - optimize(); - } - } - - /*@Override - protected boolean filter(int objIndex, LocalAbstractObject obj) { - return bannedKeys.contains(obj.getObjectKey()); - }*/ - - protected void optimize() { - LocalAbstractObject hoNew = hull.get(hull.size() - 1); // new object added to the hull - float coverageAll = getCoverage(); - float coverageBest = coverageAll; - - int indexBest = -1; - List<LocalAbstractObject> notCovered = new ArrayList<LocalAbstractObject>(); - - for (int i = hull.size() - 2; i >= 0; i--) { - LocalAbstractObject hoRemoved = hull.remove(i); - setCoveredByHull(); - - notCovered = getNotCovered(); - - float cvg = getCoverage(); - if (cvg > coverageBest) { - coverageBest = cvg; - indexBest = i; - } - hull.add(i, hoRemoved); - } - // Improved by replacement, so do it - if (indexBest >= 0) { - LocalAbstractObject hoRemoved = hull.remove(indexBest); - // Add the object to the banned list - //bannedKeys.add(hoRemoved.getObjectKey()); - } else { - // Enlarged, so clear the banned list - //bannedKeys.clear(); - //bannedSize = hull.size(); - } - // Update the coverage back to original state! - setCoveredByHull(); - notCovered = getNotCovered(); - } -} diff --git a/src/cz/muni/fi/disa/similarityoperators/cover/HullOptimizedRepresentationV2.java b/src/cz/muni/fi/disa/similarityoperators/cover/HullOptimizedRepresentationV2.java deleted file mode 100644 index 8d263b0..0000000 --- a/src/cz/muni/fi/disa/similarityoperators/cover/HullOptimizedRepresentationV2.java +++ /dev/null @@ -1,167 +0,0 @@ -/* - * To change this license header, choose License Headers in Project Properties. - * To change this template file, choose Tools | Templates - * and open the template in the editor. - */ -package cz.muni.fi.disa.similarityoperators.cover; - -import messif.objects.LocalAbstractObject; - -import java.util.*; - -/** - * Initially selects three mutually furthest objects. - * Next hull object is iteratively selected out of remaining non-covered objects that has the best coverage. - * If the resulting coverage is worse than before adding it, optimization is executed. - * The optimization tries to expell any previous hull object. - * - * @author Vlastislav Dohnal, Masaryk University, Brno, Czech Republic, dohnal@fi.muni.cz - */ -public class HullOptimizedRepresentationV2 extends HullRepresentation { - /** Counts how many times an object (key) was present/selected as a hull object */ - protected Map<LocalAbstractObject, Integer> presenceInHull = new HashMap<>(); - - public HullOptimizedRepresentationV2(List<LocalAbstractObject> data) { - super(data); - } - - public HullOptimizedRepresentationV2(PrecomputedDistances shared) { - super(shared); - } - - protected boolean initHullPoints(){ - if (shared.data.size() < 3){ - hull.addAll(shared.data); - return false; - } - - // Start from the outlier - hull.add(getFurthest()); - hull.add(nextFurthest()); - hull.add(nextFurthest()); - hull.forEach((ho) -> presenceInHull.put(ho, 1)); - - setCoveredByHull(); - return true; - } - - @Override - public void select() { - if (!initHullPoints()) - return; - - float prevCov = getCoverage(); - while (!isAllCovered()) { - float cov = addNextHullObject(); - System.err.print("# Selected next hull object: "); System.err.println(toString()); - if (cov < prevCov) { - // Optimization by replacement - prevCov = optimize(); - } else { - prevCov = cov; - } - reportNonCovered(); - } - } - - /** - * - * @return resulting coverage after adding new hull object - */ - protected float addNextHullObject() { - float bestCoverage = -1; - LocalAbstractObject bestObject = null; - int[] hullIndexes = shared.getIndexes(hull); - for (int i = 0; i < shared.getObjectCount(); i++) { - if (isCovered(i) || inHull(i, hullIndexes)) - continue; - - LocalAbstractObject o = shared.getObject(i); - hull.add(o); - setCoveredByHull(); - System.err.println("# Testing next hull object: " + o.getLocatorURI()); - reportNonCovered(); - float coverage = getCoverage(); - if (coverage > bestCoverage) { // toto sposobuje problemy s cyklenim - bestCoverage = coverage; - bestObject = o; - } else if (coverage == bestCoverage) { - // Prefer object not selected yet - if (presenceInHull.getOrDefault(o, 0) < presenceInHull.getOrDefault(bestObject, 0)) { - bestObject = o; - } - } - hull.remove(hull.size()-1); - setCoveredByHull(); - } - if (bestObject == null) { - System.out.println("No object has been added."); - return bestCoverage; // can this happen ? - } - // Add the best object and update internal structure about covered objects - hull.add(bestObject); - setCoveredByHull(); - presenceInHull.put(bestObject, presenceInHull.getOrDefault(bestObject, 0) + 1); - return bestCoverage; - } - - /** - * - * @return resulting coverage after optimization - */ - protected float optimize() { - float coverageBest = getCoverage(); // initial coverage - int indexToDel = -1; - LocalAbstractObject hoToDel = null; - - int medianPresence = getMedianPresence(); - - //for (int i = hull.size() - 2; i >= 0; i--) { - for (int i = 0; i < hull.size() - 1; i++) { - LocalAbstractObject hoRemoved = hull.remove(i); - setCoveredByHull(); - float cvg = getCoverage(); - if (cvg > coverageBest && presenceInHull.getOrDefault(hoRemoved, 0) <= medianPresence) { - coverageBest = cvg; - indexToDel = i; - hoToDel = hoRemoved; - } else if (cvg == coverageBest) { - // Prefer object present in the hull fewer times - if (presenceInHull.getOrDefault(hoRemoved, 0) < presenceInHull.getOrDefault(hoToDel, 0)) { // Vlasta, 2020/05/26 -- should not it be >??? - indexToDel = i; - hoToDel = hoRemoved; - } - } - hull.add(i, hoRemoved); - } - // Improved by replacement, so apply the removal - if (indexToDel >= 0) { - hull.remove(indexToDel); - // Update the coverage back to original state! - setCoveredByHull(); - System.err.print("# Removed hull object " + hoToDel.getLocatorURI() + " at " + indexToDel + " to increase coverage: "); System.err.println(toString()); - } else { - setCoveredByHull(); // Restore original state - } - return coverageBest; - } - - @Override - public String toString() { - String base = super.toString(); - StringBuilder sb = new StringBuilder(base); - sb.append('\n').append("Presence in hull:"); - for (Map.Entry<LocalAbstractObject, Integer> e : presenceInHull.entrySet()) { - String key = e.getKey().getLocatorURI(); - Integer value = e.getValue(); - sb.append(' ').append(key).append(':').append(value); - } - return sb.toString(); - } - - private int getMedianPresence() { - List<Integer> values = new ArrayList<>(presenceInHull.values()); - Collections.sort(values); - return values.get((int)(values.size() / 2)); - } -} diff --git a/src/cz/muni/fi/disa/similarityoperators/cover/HullOptimizedRepresentationV3.java b/src/cz/muni/fi/disa/similarityoperators/cover/HullOptimizedRepresentationV3.java deleted file mode 100644 index b78bfb8..0000000 --- a/src/cz/muni/fi/disa/similarityoperators/cover/HullOptimizedRepresentationV3.java +++ /dev/null @@ -1,108 +0,0 @@ -package cz.muni.fi.disa.similarityoperators.cover; - -import messif.objects.LocalAbstractObject; - -import java.util.ArrayList; -import java.util.List; - -/** - * Testing hull optimized representation, which may overcome the infinite loops which are a problem of the optimized and optimizedV2 hull representations. - * - * The technique is based on creating hashes for each size of the hull (i.e. we store the hashes only for hulls of the same cardinality), - * such that if we end up in a situation where the current hull hashes to an already computed hash, - * we consider this situation as loop, and therefore in the next step ??? we do not optimize(). The hull size would therefore necessarily increase despite possible coverage loss. - * In the end the most desirable hull object would be selected (i.e. replacement of the objects takes place in the optimize() method). - * - * The implementation guarantees that after each iteration of select() the size of the hull either remains unchanged or increases. - */ -public class HullOptimizedRepresentationV3 extends HullOptimizedRepresentationV2 { - - List<Integer> currentHullSizeHashes; - - public HullOptimizedRepresentationV3(List<LocalAbstractObject> data) { - super(data); - currentHullSizeHashes = new ArrayList<>(); - } - - public HullOptimizedRepresentationV3(PrecomputedDistances shared) { - super(shared); - currentHullSizeHashes = new ArrayList<>(); - } - - protected boolean initHullPoints(){ - if (shared.data.size() < 3){ - hull.addAll(shared.data); - return false; - } - - // Start from the outlier - hull.add(getFurthest()); - hull.add(nextFurthest()); - hull.add(nextFurthest()); - - setCoveredByHull(); - currentHullSizeHashes.add(hashCode()); - - return true; - } - - @Override - public void select() { - if (!initHullPoints()) - return; - - float prevCov = getCoverage(); - while (!isAllCovered()) { - // po pridani objektu sa nutne zvacsi pocet objektov, ale nemusi to takto zostat nastalo.. kvoli metode optimize - float cov = addNextHullObject(); - int hullObjectsCountBeforeOptimization = hull.size(); - System.out.print("# Selected next hull object: "); System.out.println(toString()); - if (cov < prevCov) { - // Optimization by replacement - prevCov = optimize(); - //newly added hull object has replaced some 'older' one, therefore the size of the hull stays the same - if (hullObjectsCountBeforeOptimization == hull.size()){ - // nedoslo k optimalizacii, teda nic nemaz :) - resetCurrentHullHashesList(); - continue; - } - int currentHash = hashCode(); - if (currentHullSizeHashes.contains(currentHash)){ - // the current hull object has already been selected before, therefore this may cause infinite loop. - // we have to add another hull object, so that the size of hull necessarily increases. - // afterwards another optimization steps may occur, which would lead to better selection of hull objects - addNextHullObject(); - System.out.print("# Selected next hull object: "); - System.out.println(toString()); - resetCurrentHullHashesList(); - } - else { - currentHullSizeHashes.add(currentHash); - } - } else { - prevCov = cov; - // no optimizations, therefore the size of hull increased, hence we have to delete the currentHullSizeHashes - resetCurrentHullHashesList(); - } - } - } - - protected void resetCurrentHullHashesList(){ - currentHullSizeHashes = new ArrayList<>(); - currentHullSizeHashes.add(hashCode()); - } - - - /** - Computes hash of the currently selected hullPoints - */ - public int hashCode() { - int hash = 7; - int prime = 31; - for (LocalAbstractObject o : hull){ - hash = prime * hash + o.dataHashCode(); - } - return hash; - } - -} diff --git a/src/cz/muni/fi/disa/similarityoperators/cover/HullPreselectedRepresentation.java b/src/cz/muni/fi/disa/similarityoperators/cover/HullPreselectedRepresentation.java deleted file mode 100644 index e5431f1..0000000 --- a/src/cz/muni/fi/disa/similarityoperators/cover/HullPreselectedRepresentation.java +++ /dev/null @@ -1,66 +0,0 @@ -/* - * To change this license header, choose License Headers in Project Properties. - * To change this template file, choose Tools | Templates - * and open the template in the editor. - */ -package cz.muni.fi.disa.similarityoperators.cover; - -import messif.objects.LocalAbstractObject; - -import java.util.List; - -/** - * The input file (passed to {@link #select} method) is assumed to be a list of hull objects. - * - * This hull representation adds more and more hull objects and tries to find the best coverage. - * - * @author Vlastislav Dohnal, Masaryk University, Brno, Czech Republic, dohnal@fi.muni.cz - */ -public class HullPreselectedRepresentation extends AbstractHullRepresentation<HullPreselectedRepresentation> { - public static final int MAX_HULL_SIZE = 150; - - public HullPreselectedRepresentation(List<LocalAbstractObject> data) { - super(data); - } - - public HullPreselectedRepresentation(PrecomputedDistances shared) { - super(shared); - } - @Override - protected void select() { - // Take the first 3 - hull.add(shared.getObject(0)); - hull.add(shared.getObject(1)); - hull.add(shared.getObject(2)); - - setCoveredByHull(); -// System.err.println("Initial hull: " + hull.size() + " objects"); -// System.err.println("Coverage: " + (int)Math.round(getCoverage() * 100f) + "%"); -// System.err.println("Cov ratio: " + getCoverageScore()); - - float bestScore = getCoverageScore(); - int bestIndex = 2; - - for (int i = 3; !isAllCovered() && i < shared.getObjectCount() && i < MAX_HULL_SIZE; i++) { - LocalAbstractObject o = shared.getObject(i); - hull.add(o); - setCoveredByHull(); - float score = getCoverageScore(); - if (score >= bestScore) { - bestScore = score; - bestIndex = i; -// System.err.println("Preliminary hull: " + hull.size() + " objects"); -// System.err.println("Coverage: " + (int)Math.round(getCoverage() * 100f) + "%"); -// System.err.println("Cov ratio: " + getCoverageScore()); - } - } - // Remove all hull objects until we get the best configuration. - while ((hull.size() - 1) > bestIndex) - hull.remove(hull.size() - 1); - - setCoveredByHull(); - System.err.println("Preselected hull: " + hull.size() + " objects"); - System.err.println("Coverage: " + (int)Math.round(getCoverage() * 100f) + "%"); - System.err.println("Cov ratio: " + getCoverageScore()); - } -} diff --git a/src/cz/muni/fi/disa/similarityoperators/cover/HullRepresentation.java b/src/cz/muni/fi/disa/similarityoperators/cover/HullRepresentation.java deleted file mode 100644 index 1055551..0000000 --- a/src/cz/muni/fi/disa/similarityoperators/cover/HullRepresentation.java +++ /dev/null @@ -1,139 +0,0 @@ -/* - * To change this license header, choose License Headers in Project Properties. - * To change this template file, choose Tools | Templates - * and open the template in the editor. - */ -package cz.muni.fi.disa.similarityoperators.cover; - -import messif.objects.LocalAbstractObject; -import messif.utility.FileUtils; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - -/** - * Original proposal for metric hull objects. - * - * It starts with two mutually furthest objects and one additional selected as an object having - * the maximum sum of distance to the previous hull objects and is not close to any previous hull object. - * - * Until the data-set is covered (completely) a new hull object is appended. It is selected in the same way as - * the third point above. - * - * @author Vlastislav Dohnal, Masaryk University, Brno, Czech Republic, dohnal@fi.muni.cz - */ -public class HullRepresentation extends AbstractHullRepresentation<HullRepresentation> { - - /** Instantiate a hull representation from a serialized file containing hull objects. - * @param locatorID ID/symbolic name of this representation - * @param objCls class of objects - * @param filename file containing MESSIF serialized objects - * @throws IllegalArgumentException - * @throws IOException - */ - public HullRepresentation(String locatorID, Class<LocalAbstractObject> objCls, String filename) throws IllegalArgumentException, IOException { - super(locatorID, FileUtils.readObjects(objCls, filename)); // We do not select hull objects here, we rather take existing ones (already selected). - hull.addAll(getObjects()); - } - - public HullRepresentation(List<LocalAbstractObject> data) { - super(data); - } - - public HullRepresentation(PrecomputedDistances shared) { - super(shared); - } - - public void setHullObjects(List<LocalAbstractObject> hullObjects) { - hull.addAll(hullObjects); - } - - protected boolean initHullPoints(){ - if (shared.data.size() <= 3){ - hull.addAll(shared.data); - return false; - } - - hull.add(getFurthest()); - hull.add(nextFurthest()); - hull.add(nextFurthest()); - - setCoveredByHull(); - return true; - } - - @Override - public void select() { - - if(!initHullPoints()){ - return; - } - while (!isAllCovered()) { - // Next candidate - hull.add(nextFurthest()); - setCoveredByHull(); - System.out.print("# Selected next hull object: "); System.out.println(toString()); - } - } - - protected LocalAbstractObject nextFurthest() { - float distThreshold = getMinDist(hull) / 2f; - float maxSum = 0f; - int maxIdx = -1; - List<Integer> filtered = new ArrayList<>(); - boolean skipFilter = false; - do { - int[] hullIndexes = shared.getIndexes(hull); - for (int i = 0; i < shared.getObjectCount(); i++) { - if (isCovered(i) || inHull(i, hullIndexes)) - continue; - LocalAbstractObject o = shared.getObject(i); - if (!skipFilter && filter(i, o)) { - filtered.add(i); - continue; - } - float[] dists = shared.getDistances(i); - - float sum = 0; - for (int hullIndex : hullIndexes) { - float d = dists[hullIndex]; - if (d == 0 && (distThreshold/2 == 0f)){ - sum += distThreshold; - continue; - } - if (d < distThreshold){ - sum = 0f; - break; // object o is too close to an existing hull point, thus ignore - } - sum += d; // else add distance to hull point to sum -// if (d > distThreshold) -// sum += d; - } - if (sum > maxSum) { - maxSum = sum; - maxIdx = i; - } - - } - if (maxSum == 0 && !filtered.isEmpty()) { // (maxSum > 0) <=> we have checked at least one candidate. - skipFilter = true; - filtered.clear(); - } - distThreshold /= 2f; - } while (maxIdx == -1 && distThreshold > 0f); - - assert maxIdx != -1; - return shared.getObject(maxIdx); - } - - /** - * Additional filter on selection of next furthest object to be added into the hull. - * @param objIndex index to {@link #shared} - * @param obj object at the objIndex - * @return <code>true</code> to skip this object, <code>false</code> to include it in the candidates for the next furthest object. - */ - protected boolean filter(int objIndex, LocalAbstractObject obj) { - return false; // We do not any additional filtering here - } -} diff --git a/src/cz/muni/fi/disa/similarityoperators/cover/MinDistObjectPairIterator.java b/src/cz/muni/fi/disa/similarityoperators/cover/MinDistObjectPairIterator.java deleted file mode 100644 index f062dcb..0000000 --- a/src/cz/muni/fi/disa/similarityoperators/cover/MinDistObjectPairIterator.java +++ /dev/null @@ -1,89 +0,0 @@ -package cz.muni.fi.disa.similarityoperators.cover; - -import cz.muni.fi.disa.similarityoperators.cover.AbstractRepresentation.MinDistObjectPair; - -import java.util.BitSet; -import java.util.Collections; -import java.util.Iterator; -import java.util.PriorityQueue; - -/** - * Iterates over all pairs of objects (using the pair-wise distance matrix) and returns pairs of objects having the smallest distance. - * It skips already processed objects, so they cannot be reported once marked in the passed BitSet (its bit is cleared). - * Caching is implemented to improve performance. - * - * @author Miriama Janosova, Masaryk University, Brno, Czech Republic, x424615@fi.muni.cz - */ -public class MinDistObjectPairIterator implements Iterator<MinDistObjectPair> { - private static final int CACHE_SIZE = 10; - - private float[][] distancesArray; - private BitSet notProcessedIndexes; //stores which objects has been processed - private MinDistObjectPair[] cache; //sorted array of minDistObjects - private int nextMinDistIndex; - - public MinDistObjectPairIterator(float[][] distancesArray, BitSet notProcessedIdxs){ - this.distancesArray = distancesArray; - this.notProcessedIndexes = notProcessedIdxs; - this.cache = new MinDistObjectPair[CACHE_SIZE]; - this.nextMinDistIndex = 0; - } - - @Override - public boolean hasNext() { - return notProcessedIndexes.cardinality() > 0; - } - - @Override - public MinDistObjectPair next() { - while (true) { - // either we are out of bounds or we do not have cache values anymore - if (nextMinDistIndex >= CACHE_SIZE || cache[nextMinDistIndex] == null){ - recomputeMinDists(); - } - MinDistObjectPair minDistsObject = cache[nextMinDistIndex]; //get current minDistObject - cache[nextMinDistIndex] = null; - nextMinDistIndex++; - if(notProcessedIndexes.get(minDistsObject.firstIdx) - && notProcessedIndexes.get(minDistsObject.secondIdx)){ // none of the indexes has been processed yet - //therefore we flip them - notProcessedIndexes.clear(minDistsObject.firstIdx); - notProcessedIndexes.clear(minDistsObject.secondIdx); - return minDistsObject; // we can return the object - } - //otherwise we continue - } - } - - private void recomputeMinDists() { - PriorityQueue<MinDistObjectPair> maxHeap = new PriorityQueue<>(Collections.reverseOrder()); //max heap - // we use max heap for storing CACHE_SIZE minimal distances between objects - for (int i = notProcessedIndexes.nextSetBit(0); i != -1; i = notProcessedIndexes.nextSetBit(i + 1)) { - for (int j = notProcessedIndexes.nextSetBit(i + 1); j != -1; j = notProcessedIndexes.nextSetBit(j + 1)) { - float d = distancesArray[i][j]; - if (maxHeap.size() < CACHE_SIZE) { - maxHeap.add(new MinDistObjectPair(i, j, d)); - continue; - } - if (d >= maxHeap.peek().distance) { - continue; //dist greater than max_value in heap, therefore continue - } - maxHeap.poll(); //remove the old root - maxHeap.add(new MinDistObjectPair(i, j, d)); // add new object - } - } - transformMaxHeapIntoMinArray(maxHeap); - } - - private void transformMaxHeapIntoMinArray(PriorityQueue<MinDistObjectPair> maxHeap) { - int i = maxHeap.size(); // i can be smaller than CACHE_SIZE - while (!maxHeap.isEmpty()) { - cache[--i] = maxHeap.poll(); //extract the max value from heap, put it in the further possible position - } - nextMinDistIndex = 0; // reset of cache completed - } - - public BitSet getNotProcessedIndexes(){ - return notProcessedIndexes; - } -} diff --git a/src/cz/muni/fi/disa/similarityoperators/cover/SnakeAutoRepresentation.java b/src/cz/muni/fi/disa/similarityoperators/cover/SnakeAutoRepresentation.java deleted file mode 100644 index d56ad26..0000000 --- a/src/cz/muni/fi/disa/similarityoperators/cover/SnakeAutoRepresentation.java +++ /dev/null @@ -1,382 +0,0 @@ -package cz.muni.fi.disa.similarityoperators.cover; - - -import cz.muni.fi.disa.similarityoperators.cover.types.BallBasedCover; -import messif.objects.LocalAbstractObject; -import messif.objects.keys.RadiusObjectKey; -import messif.utility.FileUtils; - -import java.io.IOException; -import java.util.*; -import java.util.logging.Level; -import java.util.logging.Logger; - -/* - * To change this license header, choose License Headers in Project Properties. - * To change this template file, choose Tools | Templates - * and open the template in the editor. - */ - -/** - * - * @author Vlastislav Dohnal, Masaryk University, Brno, Czech Republic, dohnal@fi.muni.cz - */ -public class SnakeAutoRepresentation extends AbstractRepresentation<SnakeAutoRepresentation> implements BallBasedCover { - private static final Logger logger = Logger.getLogger(SnakeAutoRepresentation.class.getSimpleName()); - - private List<Ball> balls; - private int nearestReverseNeighbors; - private float stopDistance; - - private int[] reverseNeighborCount; - - /** Instantiate a snake representation from a serialized file containing objects one for a ball representation. - * Each object must have the key of RadiusObjectKey that holds the ball radius. - * - * @param locatorID ID/symbolic name of this representation - * @param objCls class of objects - * @param filename file containing MESSIF serialized objects - * @throws IllegalArgumentException - * @throws IOException - */ - public SnakeAutoRepresentation(String locatorID, Class<LocalAbstractObject> objCls, String filename) throws IllegalArgumentException, IOException { - super(locatorID, Collections.EMPTY_LIST); - - this.nearestReverseNeighbors = 0; - this.stopDistance = 0; - this.reverseNeighborCount = new int[0]; - - this.balls = new ArrayList<>(); - FileUtils.readObjects(objCls, filename).forEach((c) -> { - balls.add(new Ball(c, c.getObjectKey(RadiusObjectKey.class).getRadius())); - }); - } - - public SnakeAutoRepresentation(List<LocalAbstractObject> data, int nearestReverseNeighbors) { - this(data, nearestReverseNeighbors, 0f); - } - - public SnakeAutoRepresentation(PrecomputedDistances shared, int nearestReverseNeighbors) { - this(shared, nearestReverseNeighbors, 0f); - } - - public SnakeAutoRepresentation(List<LocalAbstractObject> data, int nearestReverseNeighbors, float stopDistance) { - super(data); - this.stopDistance = stopDistance; - init(nearestReverseNeighbors); - } - - public SnakeAutoRepresentation(PrecomputedDistances shared, int nearestReverseNeighbors, float stopDistance) { - super(shared); - this.stopDistance = stopDistance; - init(nearestReverseNeighbors); - } - - private void init(int nearestReverseNeighbors) { - this.nearestReverseNeighbors = nearestReverseNeighbors; - this.balls = new ArrayList<>(); - this.reverseNeighborCount = new int[shared.getObjectCount()]; - } - - @Override - public String toString() { - String base = toShortString(); - StringBuilder sb = new StringBuilder(base.substring(0, base.length()-1)); - sb.append(", "); - boolean sep = false; - for (Ball b : balls) { - if (sep) - sb.append("; "); - sb.append(b); - sep = true; - } - sb.append("]"); - return sb.toString(); - } - - @Override - public String toShortString() { - float min = Float.MAX_VALUE, - max = 0f, - sum = 0f; - for (Ball b : balls) { - if (b.getRadius() > max) - max = b.getRadius(); - if (b.getRadius() < min) - min = b.getRadius(); - sum += b.getRadius(); - } - float avg = (balls.isEmpty()) ? 0f : (sum / (float)balls.size()); - - StringBuilder sb = new StringBuilder(getClass().getSimpleName()); - sb.append('['); - sb.append("objects=").append(shared.getObjectCount()).append(", "); - sb.append("rnn=").append(nearestReverseNeighbors).append(", "); - sb.append("stopDist=").append(stopDistance).append(", "); - sb.append("centers=").append(balls.size()).append(", "); - sb.append("radius_min=").append(min).append(", "); - sb.append("radius_max=").append(max).append(", "); - sb.append("radius_avg=").append(avg).append("]"); - return sb.toString(); - } - - @Override - public Iterator<LocalAbstractObject> getRepresentatives() { - return new Iterator<LocalAbstractObject>() { - final Iterator<Ball> it = balls.iterator(); - - @Override - public boolean hasNext() { - return it.hasNext(); - } - - @Override - public LocalAbstractObject next() { - return it.next().center; - } - }; - } - - @Override - public int getRepresentativesCount() { - return balls.size(); - } - - @Override - public float getAverageRadius() { - float sum = 0f; - for (Ball b : balls) - sum += b.getRadius(); - return (sum == 0f) ? sum : (sum / balls.size()); - } - - @Override - protected void select() { - // Calculate counts of being "reverse nearest neighbor" - for (int i = 0; i < shared.getObjectCount(); i++) { - List<LocalAbstractObject> objs = getKNNObjects(shared.getObject(i), this.nearestReverseNeighbors); - for (LocalAbstractObject o : objs) - reverseNeighborCount[shared.getIndex(o)]++; - } - // Map number of being rkNN to object count - Map<Integer,Integer> mapNNtoCnt = new TreeMap<>(); - // Get maximum - int max = 0; - for (int i = 0; i < reverseNeighborCount.length; i++) { - int cnt = 1 + mapNNtoCnt.getOrDefault(reverseNeighborCount[i], 0); - mapNNtoCnt.put(reverseNeighborCount[i], cnt); - if (reverseNeighborCount[i] > max) - max = reverseNeighborCount[i]; - } - if (logger.isLoggable(Level.FINE)) { - logger.log(Level.FINE, "Maximum of being {0}-RNN: {1}", new Object[]{nearestReverseNeighbors, max}); - StringBuilder sb = new StringBuilder(); - for (Map.Entry<Integer, Integer> e : mapNNtoCnt.entrySet()) - sb.append(e.getKey()).append('=').append(e.getValue()).append(' '); - logger.log(Level.FINE, "Occurences of being {0}-RNN (rNN=obj_count): {1}", new Object[]{nearestReverseNeighbors, sb.toString()}); - } - - // Repeatedly add balls around the object having their RNN count equal to current max - boolean repeat, repeat2 = false; - int iterLimit = 20; - int minCoverSize = 5; // can be proportional to the data-set size? - do { - if (--iterLimit == 0) - break; - // Select cover objects - for (int i = 0; i < shared.getObjectCount(); i++) { - if (reverseNeighborCount[i] == max && !isCovered(i)) { - setCovered(i); - balls.add(new Ball(shared.getObject(i), 0)); - } - } - // Set radii for cover objects - if (balls.size() < minCoverSize) { - // Next round automatically - max -= 1; - logger.log(Level.FINE, "Decrementing maximum of being {0}-RNN due to too few cover objects: {1}", new Object[]{nearestReverseNeighbors, max}); - repeat = true; - continue; - } - computeBallRadii(false); - - List<BallPair> overlaps = new ArrayList<>(); - // Test if any ball is inside another cover ball - Iterator<Ball> iter = balls.iterator(); - while (iter.hasNext()) { - Ball b = iter.next(); - // Remove zero-radius balls automatically - if (b.radius == 0f) { - iter.remove(); - b.setRadius(-1f); - logger.log(Level.FINE, "Removing zero-radius ball {0}", b); - continue; - } - int bIdx = shared.getIndex(b.center); - float[] bDists = shared.getDistances(bIdx); - // Find a ball that contains 'b' completely - for (Ball bOn : balls) { - if (b == bOn) // Skip myself! - continue; - float d = bDists[shared.getIndex(bOn.center)]; - if (b.radius + d <= bOn.radius) { - // bOn covers b completely - iter.remove(); // do not clear its "covered" flag - logger.log(Level.FINE, "Removing ball {0} because it is covered by ball {1}", new Object[]{b, bOn}); - b.setRadius(-1f); // a flag to recognize that this ball was removed! - break; - } else if (d <= Math.min(b.radius, bOn.radius)) { - if (stopDistance >= Math.max(b.radius, bOn.radius)) - logger.log(Level.FINE, "Too small balls, so stoping: stopDists={0}, radius1={1}, radius2={2}", - new Object[]{stopDistance, b.radius, bOn.radius}); - else - overlaps.add(new BallPair(b, bOn, d)); - } - } - } - // Test all pairs whether they are still valid - repeat = (balls.size() < minCoverSize); - repeat2 = false; - for (BallPair bp : overlaps) { - if (bp.left.getRadius() >= 0 || bp.right.getRadius() >= 0) { - // OK, there is a major overlap, so decrement max. - repeat2 = true; - logger.log(Level.FINE, "Too close balls, removing the one covering fewer objects: " - + "dist={0}, objectsI={1}, objectsJ={2}", - new Object[]{bp.distance, bp.left.coveredObjects, bp.right.coveredObjects}); - if (bp.left.coveredObjects < bp.right.coveredObjects) { - balls.remove(bp.left); - bp.left.setRadius(-1); - } else { - balls.remove(bp.right); - bp.right.setRadius(-1); - } - } - } - if (repeat2 || repeat) { - max -= 1; - logger.log(Level.FINE, "Decrementing maximum of being {0}-RNN: {1}", new Object[]{nearestReverseNeighbors, max}); - } - } while (max > 0 && (repeat || repeat2)); - - // Update radii and coverage flags for points inside - computeBallRadii(true); - } - - private void computeBallRadii(boolean setAsCovered) { - if (balls.isEmpty()) - return; - // Clear radii and covered counts first - balls.forEach((b) -> { b.clear(); }); - // Set new radii - for (int i = 0; i < shared.getObjectCount(); i++) { - //if (isCovered(i)) // Assign all (ball centers can be processed since they are assigned to themselves) - // continue; - - BallDistPair nearest = getNearestBall(shared.getObject(i)); - if (nearest.ball.getRadius() < nearest.distance) - nearest.ball.setRadius(nearest.distance); - nearest.ball.incrCoveredObjects(); - if (setAsCovered) - setCovered(i); - } - } - - - protected BallDistPair getNearestBall(LocalAbstractObject center) { - int centerIdx = shared.getIndex(center); - float[] dists = shared.getDistances(centerIdx); - - float distMin = Float.MAX_VALUE; - Ball ballMin = null; - for (Ball b : balls) { - int ballIdx = shared.getIndex(b.center); - if (dists[ballIdx] < distMin) { - distMin = dists[ballIdx]; - ballMin = b; - } - } - if (ballMin != null) - return new BallDistPair(ballMin, distMin); - else - return null; - } - - - private List<LocalAbstractObject> getCenters() { - List<LocalAbstractObject> lst = new ArrayList<>(balls.size()); - for (Ball b : balls) - lst.add(b.center); - return lst; - } - - @Override - public boolean isExternalCovered(LocalAbstractObject o) { - for (Ball b : balls) { - if (o.getDistance(b.center) <= b.radius) - return true; - } - return false; - } - - public static class Ball { - private LocalAbstractObject center; - private float radius; - private int coveredObjects; - - public Ball(LocalAbstractObject center, float radius) { - this.center = center; - this.radius = radius; - } - - public void clear() { - setRadius(0f); - coveredObjects = 0; - } - - public void setRadius(float radius) { - this.radius = radius; - } - - public float getRadius() { - return radius; - } - - private void incrCoveredObjects() { - coveredObjects++; - } - - public int getCoveredObjects() { - return coveredObjects; - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder(); - sb.append("center=").append(center).append(", radius=").append(radius); - return sb.toString(); - } - } - - public static class BallDistPair { - public final Ball ball; - public final float distance; - - public BallDistPair(Ball ball, float distance) { - this.ball = ball; - this.distance = distance; - } - } - - public static class BallPair { - public final Ball left; - public final Ball right; - public final float distance; - - public BallPair(Ball left, Ball right, float dist) { - this.left = left; - this.right = right; - this.distance = dist; - } - } -} diff --git a/src/cz/muni/fi/disa/similarityoperators/cover/SnakeRepresentation.java b/src/cz/muni/fi/disa/similarityoperators/cover/SnakeRepresentation.java deleted file mode 100644 index eae4a10..0000000 --- a/src/cz/muni/fi/disa/similarityoperators/cover/SnakeRepresentation.java +++ /dev/null @@ -1,134 +0,0 @@ -/* - * To change this license header, choose License Headers in Project Properties. - * To change this template file, choose Tools | Templates - * and open the template in the editor. - */ -package cz.muni.fi.disa.similarityoperators.cover; - -import messif.objects.LocalAbstractObject; -import messif.objects.keys.RadiusObjectKey; -import messif.utility.FileUtils; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Iterator; -import java.util.List; - -/** - * Snake representation of data set. - * - * @author Vlastislav Dohnal, Masaryk University, Brno, Czech Republic, dohnal@fi.muni.cz - */ -public class SnakeRepresentation extends AbstractRepresentation<SnakeRepresentation> { - private float maxRadius; - private List<LocalAbstractObject> centers; - - /** Instantiate a snake representation from a serialized file containing objects one for a ball representation. - * The first object must have the key of RadiusObjectKey that holds the ball radius. - * The other objects can have this key too, but they are ignored. - * - * @param locatorID ID/symbolic name of this representation - * @param objCls class of objects - * @param filename file containing MESSIF serialized objects - * @throws IllegalArgumentException - * @throws IOException - */ - public SnakeRepresentation(String locatorID, Class<LocalAbstractObject> objCls, String filename) throws IllegalArgumentException, IOException { - super(locatorID, Collections.EMPTY_LIST); - centers = FileUtils.readObjects(objCls, filename); - maxRadius = centers.get(0).getObjectKey(RadiusObjectKey.class).getRadius(); - } - - public SnakeRepresentation(List<LocalAbstractObject> data, float maxRadius) { - super(data); - this.maxRadius = maxRadius; - this.centers = new ArrayList<>(); - } - - public SnakeRepresentation(PrecomputedDistances shared, float maxRadius) { - super(shared); - this.maxRadius = maxRadius; - this.centers = new ArrayList<>(); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder(getClass().getSimpleName()); - sb.append('['); - sb.append("objects=").append(shared.getObjectCount()).append(", "); - sb.append("max_radius=").append(maxRadius).append(", "); - sb.append("centers=").append(centers.size()).append(", "); - boolean sep = false; - for (LocalAbstractObject c : centers) { - if (sep) - sb.append("; "); - sb.append("center=").append(c).append(", radius=").append(maxRadius); - sep = true; - } - sb.append("]"); - return sb.toString(); - } - - @Override - public String toShortString() { - StringBuilder sb = new StringBuilder(getClass().getSimpleName()); - sb.append('['); - sb.append("objects=").append(shared.getObjectCount()).append(", "); - sb.append("max_radius=").append(maxRadius).append(", "); - sb.append("centers=").append(centers.size()).append("]"); - return sb.toString(); - } - - @Override - public Iterator<LocalAbstractObject> getRepresentatives() { - return centers.iterator(); - } - - @Override - public int getRepresentativesCount() { - return centers.size(); - } - - @Override - protected void select() { - // Start from the mediod - LocalAbstractObject cur = getMedoid(); - centers.add(cur); - setCoveredByBall(cur, maxRadius); - - while (!isAllCovered()) { - // Next candidate - LocalAbstractObject next = closestCandidate(); - List<LocalAbstractObject> nextBall = getBallObjects(next, maxRadius, true); - - // Update candidate to mediod from object in the ball around "next" - LocalAbstractObject med = getMedoid(nextBall); - if (med != null) - next = med; - - centers.add(next); - setCoveredByBall(next, maxRadius); - } - } - - /** Next not-covered object that is closest to currently selected cover objects (ball centers) */ - private LocalAbstractObject closestCandidate() { - float min = Float.MAX_VALUE; - LocalAbstractObject cand = null; - - int[] centerIdxs = shared.getIndexes(centers); - - for (int i = 0; i < shared.getObjectCount(); i++) { - if (isCovered(i)) - continue; - - float sum = shared.sumArray(i, centerIdxs); - if (sum < min) { - min = sum; - cand = shared.getObject(i); - } - } - return cand; - } -} diff --git a/src/cz/muni/fi/disa/similarityoperators/cover/types/BallBasedCover.java b/src/cz/muni/fi/disa/similarityoperators/cover/types/BallBasedCover.java deleted file mode 100644 index 475e622..0000000 --- a/src/cz/muni/fi/disa/similarityoperators/cover/types/BallBasedCover.java +++ /dev/null @@ -1,15 +0,0 @@ -/* - * To change this license header, choose License Headers in Project Properties. - * To change this template file, choose Tools | Templates - * and open the template in the editor. - */ -package cz.muni.fi.disa.similarityoperators.cover.types; - -/** - * Marker for cover representatives based on metric balls. - * - * @author Vlastislav Dohnal, Masaryk University, Brno, Czech Republic, dohnal@fi.muni.cz - */ -public interface BallBasedCover { - float getAverageRadius(); -} diff --git a/src/cz/muni/fi/disa/similarityoperators/groupBy/SGBAll.java b/src/cz/muni/fi/disa/similarityoperators/groupBy/SGBAll.java deleted file mode 100644 index c5b1b6a..0000000 --- a/src/cz/muni/fi/disa/similarityoperators/groupBy/SGBAll.java +++ /dev/null @@ -1,63 +0,0 @@ -package cz.muni.fi.disa.similarityoperators.groupBy; - -import cz.muni.fi.disa.similarityoperators.cover.AbstractRepresentation.PrecomputedDistances; -import cz.muni.fi.disa.similarityoperators.utils.OverlapType; -import cz.muni.fi.disa.similarityoperators.utils.QueryType; -import messif.objects.LocalAbstractObject; - -import java.util.*; - -// range-query ... an object belongs to the group if it is closer or at the same distance to every other object in the group -// knn-query ... -// important ! Unlike the SGB-Any, an object may belong to multiple groups in case the overlapping properties are met. -// overlapping constraints : JOIN-ANY, ELIMINATE (discard object), FORM-NEW-GROUP (creates groups for overlapping objects .. todo nov group dokopy? pre kazdy zvlast?) -public class SGBAll extends SGBQuery { - - public List<List<LocalAbstractObject>> executeQuery(PrecomputedDistances objects, QueryType queryType, double threshold, OverlapType overlapType) { - List<Set<LocalAbstractObject>> groups = new ArrayList<>(); - - for (LocalAbstractObject object : objects.getObjects()) { - List<Integer> candidates = new ArrayList<>(); - List<Integer> overlap = new ArrayList<>(); - List<LocalAbstractObject> result = getSuitableObjects(object, objects, queryType, threshold); - - for (LocalAbstractObject o : result) { - List<Integer> groupsIndexes = getGroupIdxsWithObject(o, groups); - if (!groupsIndexes.isEmpty()) - overlap.addAll(groupsIndexes); // store group indexes - } - Set<Integer> distinctOverlap = new HashSet<>(overlap); - for (Integer groupIndex : distinctOverlap) { - // check whether each object in the group satisfies the threshold - if (Collections.frequency(overlap, groupIndex) == groups.get(groupIndex).size()) - candidates.add(groupIndex); - } - if (candidates.isEmpty()) { - Set<LocalAbstractObject> group = new HashSet<>(); - group.add(object); - groups.add(group); - } - else if (candidates.size() == 1) { - groups.get(candidates.get(0)).add(object); - } - else { - switch (overlapType) { - case JOIN_ANY: { - groups.get(candidates.get(0)).add(object); - break; - } - case FORM_NEW_GROUP: { - Set<LocalAbstractObject> group = new HashSet<>(); - group.add(object); - groups.add(group); - break; - } - case ELIMINATE: { - //todo .. toto je fakt bleh BitSet + ignoruj "zcernane prvky" ? - } - } - } - } - return castInnerSetToList(groups); - } -} diff --git a/src/cz/muni/fi/disa/similarityoperators/groupBy/SGBAll_N.java b/src/cz/muni/fi/disa/similarityoperators/groupBy/SGBAll_N.java deleted file mode 100644 index c0b6910..0000000 --- a/src/cz/muni/fi/disa/similarityoperators/groupBy/SGBAll_N.java +++ /dev/null @@ -1,73 +0,0 @@ -package cz.muni.fi.disa.similarityoperators.groupBy; - -import cz.muni.fi.disa.similarityoperators.cover.AbstractRepresentation; -import cz.muni.fi.disa.similarityoperators.utils.ObjectGroups; -import cz.muni.fi.disa.similarityoperators.utils.OverlapType; -import cz.muni.fi.disa.similarityoperators.utils.QueryType; -import messif.objects.LocalAbstractObject; - -import java.util.*; - -public class SGBAll_N extends SGBQuery { - - public ObjectGroups data; // object storing objects and computed clusters - - public SGBAll_N(AbstractRepresentation.PrecomputedDistances objects) { - data = new ObjectGroups(objects); - } - - public ObjectGroups executeQuery(QueryType queryType, double threshold, OverlapType overlapType) { - - for (int objIdx = 0; objIdx < data.objects.getObjectCount(); objIdx++) { - LocalAbstractObject object = data.objects.getObject(objIdx); - - List<Integer> candidates = new ArrayList<>(); // stores the id of a candidate group - List<Integer> overlap = new ArrayList<>(); - List<Integer> result = getSuitableObjectIdxs(object, data.objects, queryType, threshold, null); - - for (Integer index : result) { - int group = data.groupIndexes.get(index); - if (group != -1) { // if a group has already been assigned - overlap.add(group); // store group indexes - } - } - Set<Integer> distinctOverlap = new HashSet<>(overlap); - for (Integer groupIndex : distinctOverlap) { - // check whether each object in the group satisfies the threshold - if (Collections.frequency(overlap, groupIndex) == data.groups.get(groupIndex).size()) - candidates.add(groupIndex); - } - if (candidates.isEmpty()) { - // we need to create new group and create mapping from object to group - Set<Integer> group = new HashSet<>(); - group.add(objIdx); - data.groups.add(group); - data.groupIndexes.put(objIdx, data.groups.size() - 1); // we've created a new group, hence the index is size-1 - } - else if (candidates.size() == 1) { - data.groups.get(candidates.get(0)).add(objIdx); - data.groupIndexes.put(objIdx, candidates.get(0)); - } - else { - switch (overlapType) { - case JOIN_ANY: { - data.groups.get(candidates.get(0)).add(objIdx); - data.groupIndexes.put(objIdx, candidates.get(0)); - break; - } - case FORM_NEW_GROUP: { - Set<Integer> group = new HashSet<>(); - group.add(objIdx); - data.groups.add(group); - data.groupIndexes.put(objIdx, data.groups.size() - 1); - break; - } - case ELIMINATE: { - //todo .. toto je fakt bleh BitSet + ignoruj "zcernane prvky" ? - } - } - } - } - return data; - } -} diff --git a/src/cz/muni/fi/disa/similarityoperators/groupBy/SGBAll_N_Shuffle.java b/src/cz/muni/fi/disa/similarityoperators/groupBy/SGBAll_N_Shuffle.java deleted file mode 100644 index 14c4eef..0000000 --- a/src/cz/muni/fi/disa/similarityoperators/groupBy/SGBAll_N_Shuffle.java +++ /dev/null @@ -1,74 +0,0 @@ -package cz.muni.fi.disa.similarityoperators.groupBy; - -import cz.muni.fi.disa.similarityoperators.cover.AbstractRepresentation; -import cz.muni.fi.disa.similarityoperators.utils.ObjectGroups; -import cz.muni.fi.disa.similarityoperators.utils.OverlapType; -import cz.muni.fi.disa.similarityoperators.utils.QueryType; -import messif.objects.LocalAbstractObject; - -import java.util.*; - -public class SGBAll_N_Shuffle extends SGBQuery { - - public ObjectGroups data; // object storing objects and computed clusters - - public SGBAll_N_Shuffle(AbstractRepresentation.PrecomputedDistances objects) { - data = new ObjectGroups(objects); - } - - public ObjectGroups executeQuery(QueryType queryType, double threshold, OverlapType overlapType) { - - for (int objIdx = 0; objIdx < data.objects.getObjectCount(); objIdx++) { - // instead of retrieving object on position i, selects object on position in shuffleArray[i] - LocalAbstractObject object = data.objects.getObject(data.shuffleArray[objIdx]); - - List<Integer> candidates = new ArrayList<>(); // stores the id of a candidate group - List<Integer> overlap = new ArrayList<>(); - List<Integer> result = getSuitableObjectIdxs(object, data.objects, queryType, threshold, data.shuffleArray); - - for (Integer index : result) { - int group = data.groupIndexes.get(index); - if (group != -1) { // if a group has already been assigned - overlap.add(group); // store group indexes - } - } - Set<Integer> distinctOverlap = new HashSet<>(overlap); - for (Integer groupIndex : distinctOverlap) { - // check whether each object in the group satisfies the threshold - if (Collections.frequency(overlap, groupIndex) == data.groups.get(groupIndex).size()) - candidates.add(groupIndex); - } - if (candidates.isEmpty()) { - // we need to create new group and create mapping from object to group - Set<Integer> group = new HashSet<>(); - group.add(objIdx); // priadaj objekt - data.groups.add(group); //pridaj group - data.groupIndexes.put(objIdx, data.groups.size() - 1); // we've created a new group, hence the index is size-1 - } - else if (candidates.size() == 1) { - data.groups.get(candidates.get(0)).add(objIdx); - data.groupIndexes.put(objIdx, candidates.get(0)); - } - else { - switch (overlapType) { - case JOIN_ANY: { - data.groups.get(candidates.get(0)).add(objIdx); - data.groupIndexes.put(objIdx, candidates.get(0)); - break; - } - case FORM_NEW_GROUP: { - Set<Integer> group = new HashSet<>(); - group.add(objIdx); - data.groups.add(group); - data.groupIndexes.put(objIdx, data.groups.size() - 1); - break; - } - case ELIMINATE: { - //todo .. toto je fakt bleh BitSet + ignoruj "zcernane prvky" ? - } - } - } - } - return data; - } -} diff --git a/src/cz/muni/fi/disa/similarityoperators/groupBy/SGBAny.java b/src/cz/muni/fi/disa/similarityoperators/groupBy/SGBAny.java deleted file mode 100644 index 3c7ed06..0000000 --- a/src/cz/muni/fi/disa/similarityoperators/groupBy/SGBAny.java +++ /dev/null @@ -1,46 +0,0 @@ -package cz.muni.fi.disa.similarityoperators.groupBy; - -import cz.muni.fi.disa.similarityoperators.cover.AbstractRepresentation.PrecomputedDistances; -import cz.muni.fi.disa.similarityoperators.utils.QueryType; -import cz.muni.fi.disa.similarityoperators.utils.UnionFind; -import messif.objects.LocalAbstractObject; - -import java.util.List; - -public class SGBAny extends SGBQuery{ - - //private QueryType queryType; // specifies the queryType - //private UnionFind objects; - //private double threshold; // k for the NN query or distance for the range query - - public List<List<LocalAbstractObject>> executeQuery(PrecomputedDistances objects, - QueryType queryType, double threshold) { - UnionFind objectsUnionFind = new UnionFind(objects); // creates sets, each containing a single object - - for (LocalAbstractObject queryObject : objects.getObjects()) { - List<LocalAbstractObject> result = getSuitableObjects(queryObject, objectsUnionFind, queryType, threshold); - for (LocalAbstractObject object : result) { - // there cannot exist two groups with intersecting objects..therefore merge both "groups" - objectsUnionFind.union(queryObject, object); - } - } - - //objectsUnionFind.printParent(); - - return objectsUnionFind.getFinalGroups(); - } - - private List<LocalAbstractObject> getSuitableObjects(LocalAbstractObject queryObject, UnionFind objects, QueryType queryType, double threshold) { - return getSuitableObjects(queryObject, objects.getPrecomputedDistances(), queryType, threshold); - } - -// // searches for objects that are threshold-NN neighbours of the object -// private List<LocalAbstractObject> knnQuery(LocalAbstractObject object, UnionFind objects, double threshold) { -// PrecomputedDistances data = objects.getPrecomputedDistances(); -// return knnQuery(object, data, threshold); -// } -// -// private List<LocalAbstractObject> rangeQuery(LocalAbstractObject object, UnionFind objects, double threshold) { -// return rangeQuery(object, objects.getPrecomputedDistances(), threshold); -// } -} diff --git a/src/cz/muni/fi/disa/similarityoperators/groupBy/SGBAny_N.java b/src/cz/muni/fi/disa/similarityoperators/groupBy/SGBAny_N.java deleted file mode 100644 index b291690..0000000 --- a/src/cz/muni/fi/disa/similarityoperators/groupBy/SGBAny_N.java +++ /dev/null @@ -1,50 +0,0 @@ -package cz.muni.fi.disa.similarityoperators.groupBy; - -import cz.muni.fi.disa.similarityoperators.cover.AbstractRepresentation.PrecomputedDistances; -import cz.muni.fi.disa.similarityoperators.utils.ObjectGroups; -import cz.muni.fi.disa.similarityoperators.utils.QueryType; -import cz.muni.fi.disa.similarityoperators.utils.UnionFind; -import messif.objects.LocalAbstractObject; - -import java.util.List; - -public class SGBAny_N extends SGBQuery{ - - public ObjectGroups data; // object storing objects and computed clusters - - - public SGBAny_N(PrecomputedDistances objects) { - data = new ObjectGroups(objects); - } - //private QueryType queryType; // specifies the queryType - //private UnionFind objects; - //private double threshold; // k for the NN query or distance for the range query - - public ObjectGroups executeQuery(QueryType queryType, double threshold) { - UnionFind objectsUnionFind = new UnionFind(data.objects); // creates sets, each containing a single object - - for (LocalAbstractObject queryObject : data.objects.getObjects()) { - List<LocalAbstractObject> result = getSuitableObjects(queryObject, objectsUnionFind, queryType, threshold); - for (LocalAbstractObject object : result) { - // there cannot exist two groups with intersecting objects..therefore merge both "groups" - objectsUnionFind.union(queryObject, object); - } - } - - return objectsUnionFind.createGrouping(); - } - - private List<LocalAbstractObject> getSuitableObjects(LocalAbstractObject queryObject, UnionFind objects, QueryType queryType, double threshold) { - return getSuitableObjects(queryObject, objects.getPrecomputedDistances(), queryType, threshold); - } - -// // searches for objects that are threshold-NN neighbours of the object -// private List<LocalAbstractObject> knnQuery(LocalAbstractObject object, UnionFind objects, double threshold) { -// PrecomputedDistances data = objects.getPrecomputedDistances(); -// return knnQuery(object, data, threshold); -// } -// -// private List<LocalAbstractObject> rangeQuery(LocalAbstractObject object, UnionFind objects, double threshold) { -// return rangeQuery(object, objects.getPrecomputedDistances(), threshold); -// } -} diff --git a/src/cz/muni/fi/disa/similarityoperators/groupBy/SGBQuery.java b/src/cz/muni/fi/disa/similarityoperators/groupBy/SGBQuery.java deleted file mode 100644 index d5b3a4e..0000000 --- a/src/cz/muni/fi/disa/similarityoperators/groupBy/SGBQuery.java +++ /dev/null @@ -1,192 +0,0 @@ -package cz.muni.fi.disa.similarityoperators.groupBy; - -import cz.muni.fi.disa.similarityoperators.cover.AbstractRepresentation; -import cz.muni.fi.disa.similarityoperators.cover.AbstractRepresentation.PrecomputedDistances; -import cz.muni.fi.disa.similarityoperators.utils.ObjectGroups; -import cz.muni.fi.disa.similarityoperators.utils.QueryType; -import messif.objects.LocalAbstractObject; - -import java.io.BufferedWriter; -import java.io.IOException; -import java.util.*; - -public abstract class SGBQuery { - - protected List<LocalAbstractObject> getSuitableObjects(LocalAbstractObject queryObject, PrecomputedDistances objects, - QueryType queryType, double threshold) { - if (queryType == QueryType.RANGE) { - return rangeQuery(queryObject, objects, threshold); - } - return knnQuery(queryObject, objects, threshold); - } - - /** - * get indexes of suitable objects with respect to the shuffle array - */ - protected List<Integer> getSuitableObjectIdxs(LocalAbstractObject queryObject, PrecomputedDistances objects, - QueryType queryType, double threshold, Integer[] shuffleArray) { - if (queryType == QueryType.RANGE) { - if (shuffleArray == null){ - return rangeQueryIndexes(queryObject, objects, threshold); - } - return rangeQueryIndexes(queryObject, objects, threshold, shuffleArray); - } - return knnQueryIndexes(queryObject, objects, threshold); - } - - - protected List<LocalAbstractObject> knnQuery(LocalAbstractObject object, PrecomputedDistances data, double threshold) { - int objectIdx = data.getIndex(object); - float[] dists = data.getDistances(objectIdx); - - List<Integer> res = knnQuery(objectIdx, dists, threshold); - List<LocalAbstractObject> knnObjects = new ArrayList<>(); - for (int i = 0; i < res.size(); i++) { - knnObjects.add(data.getObject(res.get(i))); - } - return knnObjects; - } - - protected List<Integer> knnQuery(int objectIdx, float[] dists, double threshold) { - - AbstractRepresentation.Ranked[] ranked = AbstractRepresentation.Ranked.create(dists); - Arrays.sort(ranked); - List<Integer> res = new ArrayList<>((int)threshold); - int i = 0; - while (res.size() < Math.min((int) threshold, ranked.length)) { - res.add(ranked[i].index); - i++; - } - return res; - } - - /** returns indexes of knn-neighbours*/ - protected List<Integer> knnQueryIndexes(LocalAbstractObject object, PrecomputedDistances data, - double threshold) { - int objectIdx = data.getIndex(object); - float[] dists = data.getDistances(objectIdx); - - return knnQuery(objectIdx, dists, threshold); - } - - protected List<LocalAbstractObject> rangeQuery(LocalAbstractObject object, PrecomputedDistances data, double threshold) { - List<LocalAbstractObject> result = new ArrayList<>(); - for (LocalAbstractObject other : data.getObjects()) { - if (data.getDistance(object, other) <= threshold) { - result.add(other); - } - } - return result; - } - - protected List<Integer> rangeQueryIndexes(LocalAbstractObject object, PrecomputedDistances data, - double threshold) { - List<Integer> result = new ArrayList<>(); - int objectIndex = data.getIndex(object); - for(int i = 0; i < data.getObjectCount(); i++) { - if (data.getDistance(objectIndex, i) <= threshold ) { - result.add(i); - } - } - return result; - } - - protected List<Integer> rangeQueryIndexes(LocalAbstractObject object, PrecomputedDistances data, - double threshold, Integer[] shuffleArray) { - List<Integer> result = new ArrayList<>(); - int objectIndex = data.getIndex(object); // toto je uz spravny objekt - for(int i = 0; i < data.getObjectCount(); i++) { - if (data.getDistance(objectIndex, shuffleArray[i]) <= threshold ) { // hladam susedov podla ich indexov v shuffle array - result.add(i); // vrati indexy objektov odpovedajuce indexom v shuffle array - } - } - return result; - } - - protected List<Integer> getGroupIdxsWithObject(LocalAbstractObject object, - List<Set<LocalAbstractObject>> groups) { - List<Integer> result = new ArrayList<>(); - for (int i = 0; i < groups.size(); i++) { - if (groups.get(i).contains(object)) - result.add(i); - } - return result; - } - - protected List<List<LocalAbstractObject>> castInnerSetToList(List<Set<LocalAbstractObject>> groups) { - List<List<LocalAbstractObject>> result = new ArrayList<>(); - for (Set<LocalAbstractObject> group : groups) { - result.add(new ArrayList<>(group)); - } - return result; - } - -// public String toShortString(List<List<LocalAbstractObject>> groups) { -// StringBuilder sb = new StringBuilder(); -// sb.append("GROUPS count: ").append(groups.size()).append("\n"); -// for (int i = 0; i < groups.size(); i++) { -// sb.append("Group: ").append(i).append(" size: ").append(groups.get(i).size()).append("\n"); -// } -// return sb.toString(); -// } - - public String toShortStringN(List<Set<Integer>> groups) { - StringBuilder sb = new StringBuilder(); - sb.append("GROUPS count: ").append(groups.size()).append("\n"); - for (int i = 0; i < groups.size(); i++) { - sb.append("Group: ").append(i).append(" size: ").append(groups.get(i).size()).append("\n"); - } - return sb.toString(); - } - -// public void write(List<List<LocalAbstractObject>> groups) throws IOException { -// System.out.println("GROUPS count: " + groups.size()); -// StringBuilder sb = new StringBuilder(); -// for (int i = 0; i < groups.size(); i++) { -// System.out.println("Group " + i + " size: " + groups.get(i).size() + "\n" + "objects: "); -// for (LocalAbstractObject object : groups.get(i)) { -// object.write(System.out); -// } -// } -// } - - public void write(ObjectGroups objectGroups, BufferedWriter writer) throws IOException { - String objectNameConst = "#objectKey messif.objects.keys.AbstractObjectKey "; - System.out.println("GROUPS count: " + objectGroups.groups.size()); - StringBuilder sb = new StringBuilder(); - for (int i = 0; i < objectGroups.groups.size(); i++) { - //System.out.println("Group " + i + " size: " + groups.get(i).size() + "\n" + "objects: "); - for (int j = 0; j < objectGroups.groups.get(i).size(); j++) { - LocalAbstractObject object = objectGroups.objects.getObject(objectGroups.shuffleArray[j]); - sb.append(objectNameConst).append(object.getLocatorURI()).append("\n").append(i).append("\n"); // object identifier and group separated by \n - } - } - writer.write(sb.toString()); - //System.out.println(sb.toString()); - } - - public void write(HashMap<Integer, List<Integer>> mapping, SoftSGBStrategies clustering, BufferedWriter writer) throws IOException { - String objectNameConst = "#objectKey messif.objects.keys.AbstractObjectKey "; - System.out.println("GROUPS count: " + clustering.mainGrouping.groups.size()); - StringBuilder sb = new StringBuilder(); - for (Map.Entry<Integer, List<Integer>> objClusterPair : mapping.entrySet()) { - LocalAbstractObject object = clustering.mainGrouping.objects.getObject - (clustering.mainGrouping.shuffleArray[objClusterPair.getKey()]); - sb.append(objectNameConst).append(object.getLocatorURI()).append("\n"); // object identifier and group separated by \n - boolean flag = false; - for (Integer clusterId : objClusterPair.getValue()){ - if (flag) { - sb.append(",").append(clusterId); - } - else{ - sb.append(clusterId); - flag = true; - } - } - sb.append("\n"); - } - writer.write(sb.toString()); - //System.out.println(sb.toString()); - } - -} diff --git a/src/cz/muni/fi/disa/similarityoperators/groupBy/SGBVote.java b/src/cz/muni/fi/disa/similarityoperators/groupBy/SGBVote.java deleted file mode 100644 index aaf8536..0000000 --- a/src/cz/muni/fi/disa/similarityoperators/groupBy/SGBVote.java +++ /dev/null @@ -1,51 +0,0 @@ -package cz.muni.fi.disa.similarityoperators.groupBy; - -import cz.muni.fi.disa.similarityoperators.cover.AbstractRepresentation; -import cz.muni.fi.disa.similarityoperators.utils.QueryType; -import messif.objects.LocalAbstractObject; - -import java.util.ArrayList; -import java.util.HashSet; -import java.util.List; -import java.util.Set; - -public class SGBVote extends SGBQuery { - - public List<List<LocalAbstractObject>> executeQuery(AbstractRepresentation.PrecomputedDistances objects, - QueryType queryType, double threshold) { - List<Set<LocalAbstractObject>> groups = new ArrayList<>(); - - for (LocalAbstractObject queryObject : objects.getObjects()) { - List<LocalAbstractObject> result = getSuitableObjects(queryObject, objects, queryType, threshold); - float[] weights = new float[groups.size()]; - for (LocalAbstractObject o : result) { - List<Integer> groupsIndexes = getGroupIdxsWithObject(o, groups); - if (!groupsIndexes.isEmpty()) { - if (objects.getDistance(queryObject, o) == 0f) { - weights[groupsIndexes.get(0)] = Float.MAX_VALUE; - } else { - weights[groupsIndexes.get(0)] += (1 / objects.getDistance(queryObject, o)); - } - } - } - int maxIdx = -1; - float maxWeight = 0f; - for (int i = 0; i < weights.length; i++) { - if (weights[i] != 0 && weights[i] > maxWeight) { - maxIdx = i; - maxWeight = weights[i]; - } - } - - if (maxIdx == -1){ - Set<LocalAbstractObject> newGroup = new HashSet<>(); - newGroup.add(queryObject); - groups.add(newGroup); - } - else { - groups.get(maxIdx).add(queryObject); - } - } - return castInnerSetToList(groups); - } -} diff --git a/src/cz/muni/fi/disa/similarityoperators/groupBy/SGBVote_N.java b/src/cz/muni/fi/disa/similarityoperators/groupBy/SGBVote_N.java deleted file mode 100644 index 833c359..0000000 --- a/src/cz/muni/fi/disa/similarityoperators/groupBy/SGBVote_N.java +++ /dev/null @@ -1,61 +0,0 @@ -package cz.muni.fi.disa.similarityoperators.groupBy; - -import cz.muni.fi.disa.similarityoperators.cover.AbstractRepresentation; -import cz.muni.fi.disa.similarityoperators.utils.ObjectGroups; -import cz.muni.fi.disa.similarityoperators.utils.QueryType; -import messif.objects.LocalAbstractObject; - -import java.util.HashSet; -import java.util.List; -import java.util.Set; - -public class SGBVote_N extends SGBQuery { - - public ObjectGroups data; // object storing objects and computed clusters - - public SGBVote_N(AbstractRepresentation.PrecomputedDistances objects) { - data = new ObjectGroups(objects); - } - - public ObjectGroups executeQuery(QueryType queryType, double threshold) { - - for (int objIdx = 0; objIdx < data.objects.getObjectCount(); objIdx++) { - LocalAbstractObject queryObject = data.objects.getObject(objIdx); - - List<Integer> result = getSuitableObjectIdxs(queryObject, data.objects, queryType, threshold, null); - float[] weights = new float[data.groups.size()]; - - for (Integer otherIdx : result) { - int group = data.groupIndexes.get(otherIdx); - - if (group != -1) { // object has already a group assigned - if (data.objects.getDistance(objIdx, otherIdx) == 0f) { - weights[group] = Float.MAX_VALUE; - } else { - weights[group] += (1 / data.objects.getDistance(objIdx, otherIdx)); - } - } - } - int maxIdx = -1; - float maxWeight = 0f; - for (int i = 0; i < weights.length; i++) { - if (weights[i] != 0 && weights[i] > maxWeight) { - maxIdx = i; - maxWeight = weights[i]; - } - } - - if (maxIdx == -1){ - Set<Integer> newGroup = new HashSet<>(); - newGroup.add(objIdx); - data.groups.add(newGroup); - data.groupIndexes.put(objIdx, data.groups.size() - 1); - } - else { - data.groups.get(maxIdx).add(objIdx); - data.groupIndexes.put(objIdx, maxIdx); - } - } - return data; - } -} diff --git a/src/cz/muni/fi/disa/similarityoperators/groupBy/SGBVote_N_Shuffle.java b/src/cz/muni/fi/disa/similarityoperators/groupBy/SGBVote_N_Shuffle.java deleted file mode 100644 index ae2a809..0000000 --- a/src/cz/muni/fi/disa/similarityoperators/groupBy/SGBVote_N_Shuffle.java +++ /dev/null @@ -1,63 +0,0 @@ -package cz.muni.fi.disa.similarityoperators.groupBy; - -import cz.muni.fi.disa.similarityoperators.cover.AbstractRepresentation; -import cz.muni.fi.disa.similarityoperators.utils.ObjectGroups; -import cz.muni.fi.disa.similarityoperators.utils.QueryType; -import messif.objects.LocalAbstractObject; - -import java.util.HashSet; -import java.util.List; -import java.util.Set; - -public class SGBVote_N_Shuffle extends SGBQuery { - - public ObjectGroups data; // object storing objects and computed clusters - - public SGBVote_N_Shuffle(AbstractRepresentation.PrecomputedDistances objects) { - data = new ObjectGroups(objects); - } - - public ObjectGroups executeQuery(QueryType queryType, double threshold) { - - for (int objIdx = 0; objIdx < data.objects.getObjectCount(); objIdx++) { - - LocalAbstractObject queryObject = data.objects.getObject(data.shuffleArray[objIdx]); - - List<Integer> result = getSuitableObjectIdxs(queryObject, data.objects, queryType, threshold, data.shuffleArray); - float[] weights = new float[data.groups.size()]; - - for (Integer otherIdx : result) { - int group = data.groupIndexes.get(otherIdx); // odpoveda group risluchajucej indexu v shuffle array - - if (group != -1) { // object has already a group assigned - // najdi prvky podla indexov v shuffle array - if (data.objects.getDistance(data.shuffleArray[objIdx], data.shuffleArray[otherIdx]) == 0f) { - weights[group] = Float.MAX_VALUE; - } else { - weights[group] += (1 / data.objects.getDistance(data.shuffleArray[objIdx], data.shuffleArray[otherIdx])); - } - } - } - int maxIdx = -1; - float maxWeight = 0f; - for (int i = 0; i < weights.length; i++) { - if (weights[i] != 0 && weights[i] > maxWeight) { - maxIdx = i; - maxWeight = weights[i]; - } - } - - if (maxIdx == -1){ - Set<Integer> newGroup = new HashSet<>(); - newGroup.add(objIdx); - data.groups.add(newGroup); - data.groupIndexes.put(objIdx, data.groups.size() - 1); - } - else { - data.groups.get(maxIdx).add(objIdx); - data.groupIndexes.put(objIdx, maxIdx); - } - } - return data; - } -} diff --git a/src/cz/muni/fi/disa/similarityoperators/groupBy/SGB_All_Any.java b/src/cz/muni/fi/disa/similarityoperators/groupBy/SGB_All_Any.java deleted file mode 100644 index 1ad94cd..0000000 --- a/src/cz/muni/fi/disa/similarityoperators/groupBy/SGB_All_Any.java +++ /dev/null @@ -1,6 +0,0 @@ -package cz.muni.fi.disa.similarityoperators.groupBy; - -public class SGB_All_Any { - - -} diff --git a/src/cz/muni/fi/disa/similarityoperators/groupBy/SoftSGBStrategies.java b/src/cz/muni/fi/disa/similarityoperators/groupBy/SoftSGBStrategies.java deleted file mode 100644 index bc5190d..0000000 --- a/src/cz/muni/fi/disa/similarityoperators/groupBy/SoftSGBStrategies.java +++ /dev/null @@ -1,259 +0,0 @@ -package cz.muni.fi.disa.similarityoperators.groupBy; -import cz.muni.fi.disa.similarityoperators.cover.*; -import cz.muni.fi.disa.similarityoperators.utils.ObjectGroups; -import messif.objects.LocalAbstractObject; - -import java.util.*; - -public class SoftSGBStrategies extends SGBQuery{ - - public ObjectGroups mainGrouping; - - private int MAX_NN; - - public SoftSGBStrategies(ObjectGroups grouping) { - this.mainGrouping = grouping; - } - - // porovnavam vyhradne vzdialenosti medoidov voci sebe.. - // dostatocne blizke clustre spojim tak, ze vsetkym objektom v clustri nastavim ako dalsie cluster_ids prave clusterIds tychto medoidov - public HashMap<Integer, List<Integer>> knnClusteringStrategy(double thresholdDist, int maxNNs) { - this.MAX_NN = maxNNs; - // vracaj indexy nie objekty - int[] medoids = computeMedoidsIdxs(); - HashMap<Integer, List<Integer>> softClustering = new HashMap<>(); - - for(int i = 0; i < medoids.length; i++){ - // search for other object within the arrayList for objects within thresholdDist - int mClusterId = this.mainGrouping.groupIndexes.get(i); - List<Integer> clusterIds = new ArrayList<>(); - clusterIds.add(mClusterId); - List<Integer> kNNClusterIds = getClusterIdsOfNNsWithinThreshold(i, thresholdDist, medoids); - clusterIds.addAll(kNNClusterIds); - for (Integer clusterObjectIdx : mainGrouping.groups.get(mClusterId)){ - softClustering.put(clusterObjectIdx, clusterIds); - } - } - return softClustering; - } - - // pre kazdy objekt hladam blizke medoidy.. - // dostatocne blizke clustre spojim tak, ze vsetkym objektom v clustri nastavim ako dalsie cluster_ids prave clusterIds tychto medoidov - public HashMap<Integer, List<Integer>> knnMedoidsClusteringStrategy(double thresholdDist, int maxNNs) { - // vracaj indexy nie objekty - this.MAX_NN = maxNNs; - int[] medoidIdxs = computeMedoidsIdxs(); - HashMap<Integer, List<Integer>> softClustering = new HashMap<>(); - - for(int i = 0; i < this.mainGrouping.objects.getObjectCount(); i++){ - // search for other object within the arrayList for objects within thresholdDist - // je mi jedno ci to spravim v poradi shuffle array alebo objektov v precomp dists - int mClusterId = this.mainGrouping.groupIndexes.get(i); - List<Integer> clusterIds = new ArrayList<>(); - clusterIds.add(mClusterId); - List<Integer> kNNclusterIds = getClusterIdsOfNNsWithinThreshold(i, thresholdDist, medoidIdxs); - clusterIds.addAll(kNNclusterIds); - softClustering.put(i, clusterIds); - } - return softClustering; - } - - - public HashMap<Integer, List<Integer>> hullClusteringStrategy(double thresholdDist, int maxNNs) { - this.MAX_NN = maxNNs; - HashMap<Integer, List<Integer>> softClustering = new HashMap<>(); - int[] hullObjectsIdxs = getHullIdxs(); - - // pre kazy objekt vyber najblizsich kNN hullObjektov blizsich ako threshold - for (int i = 0; i < this.mainGrouping.objects.getObjectCount(); i++) { - int mClusterId = this.mainGrouping.groupIndexes.get(i); - List<Integer> clusterIds = new ArrayList<>(); - clusterIds.add(mClusterId); - List<Integer> kNNclusterIds = getClusterIdsOfNNsWithinThreshold(i, thresholdDist,hullObjectsIdxs ); - clusterIds.addAll(kNNclusterIds); - softClustering.put(i, clusterIds); - } - return softClustering; - } - - private int[] getHullIdxs() { - List<Integer> hullIndexes = new ArrayList<>(); - for (Set<Integer> cluster : this.mainGrouping.groups) { - Map<LocalAbstractObject, Integer> clusterObjects = getClusterObjects(cluster); - HullRepresentation hull = new HullOptimizedRepresentationV3(new ArrayList<>(clusterObjects.keySet())).build(); - List<LocalAbstractObject> hullObjects = hull.getHull(); - - for (LocalAbstractObject o : hullObjects) { // preiteruje vsetky objekty a najde indexy odpovedajuce hullObjektom - hullIndexes.add(clusterObjects.get(o)); - } - } - return hullIndexes.stream().mapToInt(i->i).toArray(); - } - - private Map<LocalAbstractObject, Integer> getClusterObjects(Set<Integer> clusterIdxs) { - Map<LocalAbstractObject, Integer> result = new HashMap<>(); - for (Integer idx : clusterIdxs) { - result.put(this.mainGrouping.objects.getObject(this.mainGrouping.shuffleArray[idx]), idx); - } - return result; - } - - - public HashMap<Integer, List<Integer>> categoriesClusteringStrategies(double thresholdDist, int maxNNs) { - this.MAX_NN = maxNNs; - HashMap<Integer, List<Integer>> softClustering = new HashMap<>(); - - for(int i = 0; i < this.mainGrouping.objects.getObjectCount(); i++){ - // search for other objects within the thresholdDist - List<Integer> objectsIdxWithinThreshold = rangeQueryWithinIndexesExcludingMyClusterObject(i, thresholdDist); - - // mam blizke clustre vratane seba, teda vytvor "soft" - List<Integer> selectedKnnClusterIds = selectKnnWithDifferentCategories(i, objectsIdxWithinThreshold); - - // najdi si clusterIds pre pozice knnMedoids - // pre vsetky objekty v m-clustri nastav soft :P - List<Integer> clusterIds = new ArrayList<>(); - int oClusteId = this.mainGrouping.groupIndexes.get(i); - clusterIds.add(oClusteId); - clusterIds.addAll(selectedKnnClusterIds); - - softClustering.put(i, clusterIds); - } - return softClustering; - } - - private List<Integer> selectKnnWithDifferentCategories(int objectIdx, List<Integer> candidateIdxs) { - // zaprve usporiadaj podla vzdialeosti.. nasledne parsni nazov aa vyber Set kategorii pokym nie je velky ako k - LocalAbstractObject object = this.mainGrouping.objects.getObject(this.mainGrouping.shuffleArray[objectIdx]); - String objectCategory = getObjectsCategory(object); - - float[] dists = selectDistsToObjects(objectIdx, candidateIdxs); - AbstractRepresentation.Ranked[] ranked = AbstractRepresentation.Ranked.create(dists); - Arrays.sort(ranked); - Set<Integer> selectedClusterIds = new HashSet<>(); - selectedClusterIds.add(this.mainGrouping.groupIndexes.get(objectIdx)); - int i = 0; - while (selectedClusterIds.size() != MAX_NN && i < ranked.length){ - int candidateIdx = candidateIdxs.get(ranked[i].index); - LocalAbstractObject candidate = this.mainGrouping.objects.getObject(this.mainGrouping.shuffleArray[candidateIdx]); - String candidateCategory = getObjectsCategory(candidate); - if (candidateCategory.equals(objectCategory) && - !this.mainGrouping.groupIndexes.get(objectIdx).equals(this.mainGrouping.groupIndexes.get(candidateIdx))) { - selectedClusterIds.add(this.mainGrouping.groupIndexes.get(candidateIdx)); - } - i++; - } - selectedClusterIds.remove(this.mainGrouping.groupIndexes.get(objectIdx)); - return new ArrayList<>(selectedClusterIds); - } - - private String getObjectsCategory(LocalAbstractObject object) { - String objectIdentifier = object.getLocatorURI(); - String[] splits = objectIdentifier.split("_"); - return splits[1]; - } - - private List<Integer> selectKnnIndexesWithinArray(int objectIdx, List<Integer> selectedIndexes) { - - return getKnnClusterIds(objectIdx, selectedIndexes); // vrati indexy vramci selectedIdxs - } - - private float[] selectDistsToObjects(int objectIdx, List<Integer> selectedIndexes) { - float[] dists = new float[selectedIndexes.size()]; - for (int i = 0; i < selectedIndexes.size(); i++){ - dists[i] = this.mainGrouping.objects.getDistance( - this.mainGrouping.shuffleArray[objectIdx], - this.mainGrouping.shuffleArray[selectedIndexes.get(i)]); - } - return dists; - } - - // vrati indexy medoidov with respect to the shuffle array - private int[] computeMedoidsIdxs() { - int[] medoidsIdxs = new int[mainGrouping.groups.size()]; // wrt shuffle array - for (int i = 0; i < mainGrouping.groups.size(); i++) { - Set<Integer> cluster = mainGrouping.groups.get(i); // ziskaj cluster - // sum dists for each object of the clusters and pick the min - int medoidIdx = getMedoidIdx(cluster); - medoidsIdxs[i] = medoidIdx; - } - - return medoidsIdxs; - } - - private int getMedoidIdx(Set<Integer> cluster) { - double minSum = Double.MAX_VALUE; - int medoidIdx = -1; // some object will always be assigned, since minDist cannot be - for (int candidateIdx : cluster) { - double objectSumDists = 0; - for (int otherIdx : cluster) { - objectSumDists += Math.pow(mainGrouping.objects.getDistance(mainGrouping.shuffleArray[candidateIdx], - mainGrouping.shuffleArray[otherIdx]),2); // - } - if (objectSumDists < minSum) { - minSum = objectSumDists; - medoidIdx = candidateIdx; - } - } - return medoidIdx; - } - - private List<Integer> getClusterIdsOfNNsWithinThreshold(int objIdx, double thresholdDist, int[] medoidsIdxs) { - - List<Integer> nearMedoids = rangeQueryWithinIndexes(objIdx, thresholdDist, medoidsIdxs); - // mam blizke clustre vratane seba, teda vytvor "soft" - // dostanem clusterIds pr kNNs - List<Integer> knnClusterIDs = selectKnnIndexesWithinArray(objIdx, nearMedoids); - // vratane seba - - return knnClusterIDs; - } - - private List<Integer> rangeQueryWithinIndexes(int objectIdx, double threshold, int[] indexes) { - List<Integer> result = new ArrayList<>(); - //int objectIndex = data.getIndex(object); - for(int i = 0; i < indexes.length; i++) { - if (this.mainGrouping.objects.getDistance( - this.mainGrouping.shuffleArray[objectIdx], - this.mainGrouping.shuffleArray[indexes[i]]) <= threshold) { - result.add(indexes[i]); - } - } - return result; - } - - private List<Integer> rangeQueryWithinIndexesExcludingMyClusterObject(int objectIdx, double threshold) { - List<Integer> result = new ArrayList<>(); - //int objectIndex = data.getIndex(object); - for(int i = 0; i < this.mainGrouping.objects.getObjectCount(); i++) { - if (this.mainGrouping.objects.getDistance( - this.mainGrouping.shuffleArray[objectIdx], - this.mainGrouping.shuffleArray[i]) <= threshold) { - if (!this.mainGrouping.groupIndexes.get(objectIdx).equals(this.mainGrouping.groupIndexes.get(i))){ - result.add(i); - } - } - } - return result; - } - - private List<Integer> getKnnClusterIds(int objectIdx, List<Integer> candidateIdxs ){ - - float[] dists = selectDistsToObjects(objectIdx, candidateIdxs); - AbstractRepresentation.Ranked[] ranked = AbstractRepresentation.Ranked.create(dists); - Arrays.sort(ranked); - Set<Integer> selectedClusterIds = new HashSet<>(); - selectedClusterIds.add(this.mainGrouping.groupIndexes.get(objectIdx)); - int i = 0; - while (selectedClusterIds.size() != MAX_NN && i < ranked.length){ - int candidateIdx = candidateIdxs.get(ranked[i].index); - if (!this.mainGrouping.groupIndexes.get(objectIdx).equals(this.mainGrouping.groupIndexes.get(candidateIdx))) { - selectedClusterIds.add(this.mainGrouping.groupIndexes.get(candidateIdx)); - } - i++; - } - selectedClusterIds.remove(this.mainGrouping.groupIndexes.get(objectIdx)); - return new ArrayList<>(selectedClusterIds); - } - -} diff --git a/src/cz/muni/fi/disa/similarityoperators/operations/Containment.java b/src/cz/muni/fi/disa/similarityoperators/operations/Containment.java deleted file mode 100644 index 2c5bba5..0000000 --- a/src/cz/muni/fi/disa/similarityoperators/operations/Containment.java +++ /dev/null @@ -1,22 +0,0 @@ -package cz.muni.fi.disa.similarityoperators.operations; - -import cz.muni.fi.disa.similarityoperators.cover.HullRepresentation; -import messif.objects.LocalAbstractObject; - -public class Containment { - - public static boolean isContainedInHull(HullRepresentation hull, - LocalAbstractObject object) { - //int[] hullIdxs = hull.getHullIdxs(); - //return hull.isExternalPointCoveredByHull(object, hullIdxs, hullIdxs.length); - return hull.isExternalCovered(object); - } - - public static boolean isContainedInHull(HullRepresentation hull, - LocalAbstractObject object, - int k) { - //int[] hullIdxs = hull.getHullIdxs(); - //return hull.isExternalPointCoveredByHull(object, hullIdxs, k); - return hull.isExternalCovered(object, k); - } -} diff --git a/src/cz/muni/fi/disa/similarityoperators/operations/Intersection.java b/src/cz/muni/fi/disa/similarityoperators/operations/Intersection.java deleted file mode 100644 index 5cb2514..0000000 --- a/src/cz/muni/fi/disa/similarityoperators/operations/Intersection.java +++ /dev/null @@ -1,157 +0,0 @@ -package cz.muni.fi.disa.similarityoperators.operations; - -import cz.muni.fi.disa.similarityoperators.cover.*; -import messif.objects.LocalAbstractObject; - -import java.io.FileNotFoundException; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.OutputStream; -import java.util.ArrayList; -import java.util.List; - -public class Intersection { - - // probably will not be used - public static HullRepresentation computeIntersectionV1( - HullRepresentation firstHull, HullRepresentation secondHull) { - - List<LocalAbstractObject> firstHullData = firstHull.getObjects(); - List<LocalAbstractObject> secondHullData = secondHull.getObjects(); - - return computeIntersectionShared(firstHullData, secondHullData, - firstHull, secondHull, - "", 0, 0); - } - - // computes intersection on HullPoints - public static HullRepresentation computeIntersection(HullMHPRepresentation firstHull, - HullMHPRepresentation secondHull) { - List<LocalAbstractObject> firstHullData = firstHull.getHull(); - List<LocalAbstractObject> secondHullData = secondHull.getHull(); - return computeIntersectionShared(firstHullData, secondHullData, - firstHull, secondHull, "", 3, 0); - } - - public static HullRepresentation computeIntersection(HullOptimizedRepresentation firstHull, - HullOptimizedRepresentation secondHull) { - List<LocalAbstractObject> firstHullData = firstHull.getHull(); - List<LocalAbstractObject> secondHullData = secondHull.getHull(); - return computeIntersectionShared(firstHullData, secondHullData, - firstHull, secondHull, "", 0, 0); - } - - public static HullRepresentation computeIntersection(HullKNHPRepresentation firstHull, - HullKNHPRepresentation secondHull) { - List<LocalAbstractObject> firstHullData = firstHull.getHull(); - List<LocalAbstractObject> secondHullData = secondHull.getHull(); - return computeIntersectionShared(firstHullData, secondHullData, - firstHull, secondHull, "", firstHull.getMinHullPoints(), firstHull.getkNearestHullPoints()); - } - - public static HullRepresentation computeIntersection(HullOptimizedRepresentationV2 firstHull, - HullOptimizedRepresentationV2 secondHull) { - List<LocalAbstractObject> firstHullData = firstHull.getHull(); - List<LocalAbstractObject> secondHullData = secondHull.getHull(); - return computeIntersectionShared(firstHullData, secondHullData, - firstHull, secondHull, "V2", 0, 0); - } - - public static HullRepresentation computeIntersection(HullRepresentation firstHull, - HullRepresentation secondHull) { - List<LocalAbstractObject> firstHullData = firstHull.getHull(); - List<LocalAbstractObject> secondHullData = secondHull.getHull(); - return computeIntersectionShared(firstHullData, secondHullData, - firstHull, secondHull, "", 0, 0); - } - - private static HullRepresentation computeIntersectionShared(List<LocalAbstractObject> firstHullData, - List<LocalAbstractObject> secondHullData, - HullRepresentation firstHull, - HullRepresentation secondHull, - String version, - int minHullPoints, - int kNHullPoints) { - List<LocalAbstractObject> coveredData = new ArrayList<>(); - - //each point is covered by points in firstHull, we are checking for coverage using secondHull - getHullCoveredObjects(firstHullData, secondHull, coveredData, kNHullPoints); - getHullCoveredObjects(secondHullData, firstHull, coveredData, kNHullPoints); - - // based on parameters create correct version on Hull representation class - if (version.equals("V2")) - return new HullOptimizedRepresentationV2(coveredData).build(); - if (kNHullPoints != 0) - return new HullKNHPRepresentation(coveredData, 3, kNHullPoints).build(); - if (minHullPoints != 0) - return new HullMHPRepresentation(coveredData, minHullPoints).build(); - return new HullRepresentation(coveredData).build(); - } - - // method checking whether intersection between two hulls exists - public static boolean doesIntersectionExist(HullRepresentation firstHull, - HullRepresentation secondHull, - int k){ - List<LocalAbstractObject> coveredList = new ArrayList<LocalAbstractObject>(); - - List<LocalAbstractObject> firstHullData = firstHull.getHull(); - List<LocalAbstractObject> secondHullData = secondHull.getHull(); - - getHullCoveredObjects(firstHullData, secondHull, coveredList, k); - getHullCoveredObjects(secondHullData, firstHull, coveredList, k); - return !coveredList.isEmpty(); - } - - public static void getHullCoveredObjects(List<LocalAbstractObject> hullSharedData, - AbstractHullRepresentation hull, - List<LocalAbstractObject> coveredData, - int kNHullPoints){ - int[] hullIdxs = hull.getHullIdxs(); - - for(LocalAbstractObject hullPoint : hullSharedData){ - int hullPoints = kNHullPoints == 0 ? hullIdxs.length : kNHullPoints; - if (hull.isExternalCovered(hullPoint, hullPoints, hullIdxs)) { - // insert point only if it is not already present in covered data .. - if (!isPointInList(hullPoint, coveredData)) - coveredData.add(hullPoint); //point from first hull covered by point in second hull - } - } - } - - private static boolean isPointInList(LocalAbstractObject obj, List<LocalAbstractObject> covered){ - for (LocalAbstractObject cov : covered) { - if (cov.dataEquals(obj)){ - return true; - } - } - return false; - } - - public static void printIntersectionPoints(HullRepresentation first, HullRepresentation second) { - System.out.println("Points in intersection"); - try { - OutputStream out = new FileOutputStream("./intersection" + ".txt", false); - for( LocalAbstractObject obj : first.getObjects()) { - if (second.isExternalCovered(obj, second.getRepresentativesCount(), second.getHullIdxs())){ - try { - obj.write(out); - } catch (IOException e) { - e.printStackTrace(); - } - } - } - for( LocalAbstractObject obj : second.getObjects()) { - if (first.isExternalCovered(obj, first.getRepresentativesCount(), first.getHullIdxs())){ - try { - obj.write(out); - } catch (IOException e) { - e.printStackTrace(); - } - } - } - } catch (FileNotFoundException e) { - e.printStackTrace(); - } - - } -} diff --git a/src/cz/muni/fi/disa/similarityoperators/operations/Subset.java b/src/cz/muni/fi/disa/similarityoperators/operations/Subset.java deleted file mode 100644 index be92707..0000000 --- a/src/cz/muni/fi/disa/similarityoperators/operations/Subset.java +++ /dev/null @@ -1,23 +0,0 @@ -package cz.muni.fi.disa.similarityoperators.operations; - -import cz.muni.fi.disa.similarityoperators.cover.HullRepresentation; -import messif.objects.LocalAbstractObject; - -import java.util.ArrayList; -import java.util.List; - -public class Subset { - - /** - * checks whether first hull is subset of the other - */ - public static boolean isSubset(HullRepresentation firstHull, - HullRepresentation secondHull, - int k) { - List<LocalAbstractObject> coveredList = new ArrayList<>(); - List<LocalAbstractObject> firstHullData = firstHull.getHull(); - - Intersection.getHullCoveredObjects(firstHullData, secondHull, coveredList, k); - return coveredList.size() == firstHullData.size(); - } -} diff --git a/src/cz/muni/fi/disa/similarityoperators/operations/Union.java b/src/cz/muni/fi/disa/similarityoperators/operations/Union.java deleted file mode 100644 index 94481df..0000000 --- a/src/cz/muni/fi/disa/similarityoperators/operations/Union.java +++ /dev/null @@ -1,152 +0,0 @@ -package cz.muni.fi.disa.similarityoperators.operations; - -import cz.muni.fi.disa.similarityoperators.cover.*; -import cz.muni.fi.disa.similarityoperators.cover.AbstractRepresentation.PrecomputedDistances; -import messif.objects.LocalAbstractObject; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - -// "static" class in java, final = no extensions possible -public final class Union { - - //private default constructor, instances cannot be created - private Union() {} - - //Computes union on hull points KNHP - public static HullRepresentation ComputeUnionOnHullPoints( - HullKNHPRepresentation firstHull, HullKNHPRepresentation secondHull) { - List<LocalAbstractObject> hullPoints = new ArrayList<>(); - - hullPoints.addAll(firstHull.getHull()); - hullPoints.addAll(secondHull.getHull()); - - return new HullKNHPRepresentation( - hullPoints, - firstHull.getMinHullPoints() , - firstHull.getkNearestHullPoints()) - .build(); - } - - // union on Hulls - min hp - public static HullRepresentation ComputeUnionOnHullPoints( - HullMHPRepresentation firstHull, HullMHPRepresentation secondHull) { - List<LocalAbstractObject> hullPoints = new ArrayList<>(); - - hullPoints.addAll(firstHull.getHull()); - hullPoints.addAll(secondHull.getHull()); - - return new HullMHPRepresentation( - hullPoints, - firstHull.getMinHullPoints()) - .build(); - } - - public static HullRepresentation ComputeUnionOnHullPoints( - HullOptimizedRepresentationV2 firstHull, HullOptimizedRepresentationV2 secondHull) { - List<LocalAbstractObject> hullPoints = new ArrayList<>(); - - hullPoints.addAll(firstHull.getHull()); - hullPoints.addAll(secondHull.getHull()); - - return new HullOptimizedRepresentationV2(hullPoints).build(); - } - - public static HullRepresentation ComputeUnionOnHullPoints( - HullOptimizedRepresentation firstHull, HullOptimizedRepresentation secondHull) { - List<LocalAbstractObject> hullPoints = new ArrayList<>(); - - hullPoints.addAll(firstHull.getHull()); - hullPoints.addAll(secondHull.getHull()); - - return new HullOptimizedRepresentation(hullPoints).build(); - } - - public static HullRepresentation ComputeUnionOnHullPoints(HullRepresentation firstHull, - HullRepresentation secondHull) { - List<LocalAbstractObject> hullPoints = new ArrayList<>(); - - hullPoints.addAll(firstHull.getHull()); - hullPoints.addAll(secondHull.getHull()); - - return new HullRepresentation(hullPoints).build(); - } - - - //Computes union of hulls - used when having only single picture // - public static HullRepresentation ComputeUnionOnData(PrecomputedDistances shared, String version) { - if (version.equals("V2")) - return new HullOptimizedRepresentationV2(shared).build(); - return new HullOptimizedRepresentation(shared).build(); - } - - public static HullRepresentation ComputeUnionOnData(PrecomputedDistances shared, - int minHullPoints) { - return new HullMHPRepresentation(shared, minHullPoints).build(); - } - - public static HullRepresentation ComputeUnionOnData(PrecomputedDistances shared, - int minHullPoints, int kNHullPoints) { - return new HullKNHPRepresentation(shared, minHullPoints, kNHullPoints).build(); - } - - public static HullRepresentation ComputeUnionOnData(PrecomputedDistances shared) { - return new HullRepresentation(shared).build(); - } - - //hullIndexes found by hand in shared - public static boolean CheckCoveredByHull(PrecomputedDistances shared, HullRepresentation union, - int kNHullPoints){ - - int kNN = kNHullPoints == 0 || kNHullPoints > union.getRepresentativesCount() ? union.getRepresentativesCount() : kNHullPoints; - - for (int i = 0; i < shared.getObjectCount(); i++){ -// if (!union.isExternalPointCoveredByHull(shared.getObject(i), union.getHullIdxs(), kNN)) { - if (!union.isExternalCovered(shared.getObject(i), kNN)) { - try { - shared.getObject(i).write(System.out); - } catch (IOException e) { - e.printStackTrace(); - } - //System.out.println("not covered " + i); - //return false; - } - } - return true; - } - -// public static boolean CheckCoveredByHull(PrecomputedDistances shared, HullRepresentation union, -// int kNHullPoints){ -// HullRepresentation hull = new HullRepresentation(shared); -// int[] idxs = mapHullPointsToSharedData(shared, union); //maps each point in hull into shared -// System.out.println(Arrays.toString(idxs)); -// -// int kNN = kNHullPoints == 0 || kNHullPoints < union.getRepresentativesCount() ? union.getRepresentativesCount() : kNHullPoints; -// -// -// for (int i = 0; i < shared.getObjectCount(); i++){ -// if (!hull.isCoveredByHull(i, kNN, idxs)) { -// System.out.println("not covered " + i); -// //return false; -// } -// } -// return true; -// } -// -// -// public static int[] mapHullPointsToSharedData(PrecomputedDistances points, -// HullRepresentation hull){ -// int[] hullIndexes = new int[hull.getHull().size()]; -// int nextHullIndex = 0; -// for (LocalAbstractObject hullPoint : hull.getHull()) { -// for (int i = 0; i < points.getObjectCount(); i++) { -// if (hullPoint.dataEquals(points.getObject(i))) { -// hullIndexes[nextHullIndex] = i; -// nextHullIndex ++; -// } -// } -// } -// return hullIndexes; -// } -} diff --git a/src/cz/muni/fi/disa/similarityoperators/utils/ImageNet.java b/src/cz/muni/fi/disa/similarityoperators/utils/ImageNet.java deleted file mode 100644 index ce8b2e0..0000000 --- a/src/cz/muni/fi/disa/similarityoperators/utils/ImageNet.java +++ /dev/null @@ -1,89 +0,0 @@ -/* - * To change this license header, choose License Headers in Project Properties. - * To change this template file, choose Tools | Templates - * and open the template in the editor. - */ -package cz.muni.fi.disa.similarityoperators.utils; - -import java.io.*; -import java.util.HashMap; -import java.util.Map; -import java.util.logging.Level; -import java.util.logging.Logger; -import java.util.zip.GZIPInputStream; - -/** - * - * @author Vlastislav Dohnal, Masaryk University, Brno, Czech Republic, dohnal@fi.muni.cz - */ -public class ImageNet { - private static final String MAP_SERIALIZED_FILE = "id-to-url.bin"; - private static Map<String,String> idToUrl = null; - - public static String urlForId(String objectLocator) { - if (idToUrl == null) - readIdToUrlMap(); - - String url = idToUrl.get(objectLocator); - if (url == null) { - objectLocator = objectLocator.replaceFirst("\\..*$", ""); - url = idToUrl.get(objectLocator); - } - if (url == null) { - objectLocator = objectLocator.replaceFirst("^.*/", ""); - url = idToUrl.get(objectLocator); - } - return url; - } - - - public static void readIdToUrlMap() { - File f = new File(MAP_SERIALIZED_FILE); - if (f.canRead()) { - try { - ObjectInputStream in = new ObjectInputStream(new FileInputStream(f)); - idToUrl = (Map)in.readObject(); - in.close(); - return; - } catch (IOException | ClassNotFoundException ex) { - Logger.getLogger(ImageNet.class.getName()).log(Level.SEVERE, null, ex); - } - } - - readIdToUrlMap("//andromeda/share/datasets/imagenet/imagenet_fall11_urls.txt.gz"); - - if (!f.canRead()) { - try { - ObjectOutputStream out = new ObjectOutputStream(new FileOutputStream(f)); - out.writeObject(idToUrl); - out.close(); - } catch (IOException ex) { - Logger.getLogger(ImageNet.class.getName()).log(Level.SEVERE, null, ex); - } - } - } - - public static void readIdToUrlMap(String filePath) { - idToUrl = new HashMap<>(); - try { - GZIPInputStream gzipIn = new GZIPInputStream(new FileInputStream(filePath)); - BufferedReader in = new BufferedReader(new InputStreamReader(gzipIn)); - - while (true) { - String line = in.readLine(); - if (line == null || line.isEmpty()) - break; - String[] parts = line.split("[ \\t]+", 2); - if (parts.length == 2) - idToUrl.put(parts[0], parts[1]); - if ((idToUrl.size() % 100000) == 0) - System.out.println("Map size: " + idToUrl.size()); - } - in.close(); - } catch (FileNotFoundException ex) { - Logger.getLogger(ImageNet.class.getName()).log(Level.SEVERE, null, ex); - } catch (IOException ex) { - Logger.getLogger(ImageNet.class.getName()).log(Level.SEVERE, null, ex); - } - } -} diff --git a/src/cz/muni/fi/disa/similarityoperators/utils/MergingType.java b/src/cz/muni/fi/disa/similarityoperators/utils/MergingType.java deleted file mode 100644 index 9c84c5a..0000000 --- a/src/cz/muni/fi/disa/similarityoperators/utils/MergingType.java +++ /dev/null @@ -1,7 +0,0 @@ -package cz.muni.fi.disa.similarityoperators.utils; - -public enum MergingType { - NONE, - BASIC, - SUM_BASED -} diff --git a/src/cz/muni/fi/disa/similarityoperators/utils/ObjectGroups.java b/src/cz/muni/fi/disa/similarityoperators/utils/ObjectGroups.java deleted file mode 100644 index f7cef61..0000000 --- a/src/cz/muni/fi/disa/similarityoperators/utils/ObjectGroups.java +++ /dev/null @@ -1,33 +0,0 @@ -package cz.muni.fi.disa.similarityoperators.utils; - -import cz.muni.fi.disa.similarityoperators.cover.AbstractRepresentation; - -import java.util.*; - -public class ObjectGroups { - - public AbstractRepresentation.PrecomputedDistances objects; - - /** For shuffling purposes of the precomputed distances matrix */ - public Integer[] shuffleArray; - - /** Mapping from ids in precomputed distances to group (i.e. cluster) the object belongs to */ - public HashMap<Integer, Integer> groupIndexes; // - /** clusters */ - public List<Set<Integer>> groups ; - - public ObjectGroups(AbstractRepresentation.PrecomputedDistances objects) { - this.objects = objects; - groupIndexes = new HashMap<>(); - groups = new ArrayList<>(); - this.shuffleArray = new Integer[objects.getObjectCount()]; - // initialization of grouping .. - for (int i = 0; i < objects.getObjectCount(); i++) { - groupIndexes.put(i, -1); - shuffleArray[i] = i; - } - List<Integer> arrayList = Arrays.asList(shuffleArray); - Collections.shuffle(arrayList); - arrayList.toArray(shuffleArray); - } -} diff --git a/src/cz/muni/fi/disa/similarityoperators/utils/OverlapType.java b/src/cz/muni/fi/disa/similarityoperators/utils/OverlapType.java deleted file mode 100644 index f4d97bf..0000000 --- a/src/cz/muni/fi/disa/similarityoperators/utils/OverlapType.java +++ /dev/null @@ -1,7 +0,0 @@ -package cz.muni.fi.disa.similarityoperators.utils; - -public enum OverlapType { - JOIN_ANY, - FORM_NEW_GROUP, - ELIMINATE; -} diff --git a/src/cz/muni/fi/disa/similarityoperators/utils/QueryType.java b/src/cz/muni/fi/disa/similarityoperators/utils/QueryType.java deleted file mode 100644 index 93dc7fe..0000000 --- a/src/cz/muni/fi/disa/similarityoperators/utils/QueryType.java +++ /dev/null @@ -1,6 +0,0 @@ -package cz.muni.fi.disa.similarityoperators.utils; - -public enum QueryType { - kNN, - RANGE -} diff --git a/src/cz/muni/fi/disa/similarityoperators/utils/UnionFind.java b/src/cz/muni/fi/disa/similarityoperators/utils/UnionFind.java deleted file mode 100644 index 6dbdba0..0000000 --- a/src/cz/muni/fi/disa/similarityoperators/utils/UnionFind.java +++ /dev/null @@ -1,126 +0,0 @@ -package cz.muni.fi.disa.similarityoperators.utils; - -import cz.muni.fi.disa.similarityoperators.cover.AbstractRepresentation.PrecomputedDistances; -import messif.objects.LocalAbstractObject; - -import java.util.*; - -// Union-find data structure with link by rank and path compression -// therefore combination of m makeSet, find and union operations over the set of n elements takes O(m * @alpha(n)) time. -public class UnionFind { - - //private Map<LocalAbstractObject, Integer> mapping; // object and its index - private PrecomputedDistances objects; // the data structure itself stores indexes for each object - private byte[] rank; - private int[] parent; - private int numberOfGroups; - - - // initializes empty union-find data structure - // each element is in its own set - public UnionFind(PrecomputedDistances objects) { - //this.mapping = new HashMap<>(); - this.rank = new byte[objects.getObjectCount()]; - this.parent = new int[objects.getObjectCount()]; - for (int i = 0; i < objects.getObjectCount(); i++){ - makeSet(objects.getObject(i), i); - } - this.objects = objects; - } - - public void makeSet(LocalAbstractObject x, int i) { - rank[i] = 0; - parent[i] = i; - //mapping.put(x, i); - } - - // implementation of find with path compression - public LocalAbstractObject find(LocalAbstractObject x) { // finds the root of the tree elem belongs to - //int index = mapping.get(x); - int index = objects.getIndex(x); - if (index != parent[index]) { - LocalAbstractObject grandParent = find(objects.getObject(parent[index])); // find the root - parent[index] = objects.getIndex(grandParent); // get grandparent index - } - return objects.getObject(parent[index]); - } - - public int getNumberOfGroups(){ - return numberOfGroups; - } - - public List<LocalAbstractObject> getObjects() { - return this.objects.getObjects(); - } - - public PrecomputedDistances getPrecomputedDistances() { return this.objects; } - - // - public void union(LocalAbstractObject x, LocalAbstractObject y) { - LocalAbstractObject rootX = find(x); - LocalAbstractObject rootY = find(y); - int rootXIdx = objects.getIndex(rootX); - int rootYIdx = objects.getIndex(rootY); - - if (rootXIdx == rootYIdx) return; // already in the same group - - // link by rank - else if (rank[rootXIdx] < rank[rootYIdx]) - parent[rootXIdx] = rootYIdx; - else if (rank[rootXIdx] > rank[rootYIdx]) - parent[rootYIdx] = rootXIdx; - else { // the rank is the same, therefore increase rank of the new root - parent[rootYIdx] = rootXIdx; // can be switched by parent[rootXIdx] = rootYIdx - rank[rootXIdx]++; - } - numberOfGroups--; - } - - public ObjectGroups createGrouping() { - ObjectGroups data = new ObjectGroups(objects); - - //Map<Integer, List<LocalAbstractObject>> groups = new HashMap<>(); - - for (int i = 0; i < this.parent.length; i++) { // loop cez vsetky prvky - int parentIdx = parent[i]; - Integer clusterIdx = data.groupIndexes.get(parentIdx); - LocalAbstractObject queryObject = data.objects.getObject(i); - if (clusterIdx == -1) { // no group assigned yet for this parent - Set<Integer> newGroup = new HashSet<>(); - newGroup.add(i); - data.groups.add(newGroup); - data.groupIndexes.put(i, data.groups.size() - 1); - data.groupIndexes.put(parentIdx, data.groups.size()-1); - } - else { - data.groups.get(clusterIdx).add(i); - data.groupIndexes.put(i, clusterIdx); - } - } - - return data; - } - - public List<List<LocalAbstractObject>> getFinalGroups() { - Map<Integer, List<LocalAbstractObject>> groups = new HashMap<>(); - for (int i = 0; i < this.parent.length; i++) { - int parentIdx = parent[i]; - List<LocalAbstractObject> cluster = groups.get(parentIdx); - if (cluster == null) { // no entry for this parent - cluster = new ArrayList<>(); - } - cluster.add(objects.getObject(i)); // add objects index - groups.put(parentIdx, cluster); // value is replaced - } - - return new ArrayList<>(groups.values()); - - } - - public void printParent() { - for (int i =0; i < parent.length; i++){ - System.out.println(i + " parent - " + parent[i]); - } - } - -} diff --git a/src/cz/muni/fi/disa/similarityoperators/utils/UnsupervisedBinaryClassification.java b/src/cz/muni/fi/disa/similarityoperators/utils/UnsupervisedBinaryClassification.java deleted file mode 100644 index 76c5a98..0000000 --- a/src/cz/muni/fi/disa/similarityoperators/utils/UnsupervisedBinaryClassification.java +++ /dev/null @@ -1,289 +0,0 @@ -/* - * To change this license header, choose License Headers in Project Properties. - * To change this template file, choose Tools | Templates - * and open the template in the editor. - */ -package cz.muni.fi.disa.similarityoperators.utils; - -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; - -/** - * Manages binary classification of pairs of objects in totals in TP, FP, TN and FN categories. - * - * The pairs are judged to fall into a category by threshold distances (similarity, dissimilarity). - * - * @author Vlastislav Dohnal, Masaryk University, Brno, Czech Republic, dohnal@fi.muni.cz - */ -public class UnsupervisedBinaryClassification implements Iterable<UnsupervisedBinaryClassification.BinaryClassification> { - List<BinaryClassification> items; - - /** Create a list of binary classification for updating all by passing one distance at once. - * @param thresholds a list of elements of even length, even (from 0) positions are similarity thresholds and odd positions are dissimilarity thresholds - */ - public UnsupervisedBinaryClassification(float... thresholds) { - items = new ArrayList<>(thresholds.length / 2); - for (int i = 0; i < thresholds.length; i += 2) - items.add(new BinaryClassification(thresholds[i], thresholds[i+1])); - } - - /** Soft-assingment binary classificaation. - * Binary-classify the passed distance between a pair of objects that are assigned to cluster1 and to soft clusters (2), respectively. - * @param pairDist distance between objects - * @param cluster1 cluster ID of the first object - * @param clusters2 array of cluster IDs of the second object - */ - public void classify(float pairDist, List<Integer> cluster1, List<Integer> clusters2) { - for (BinaryClassification bc : items) { - bc.classify(pairDist, cluster1, clusters2); - } - } - - /** Soft-assingment binary classificaation. - * Binary-classify the passed distance between a pair of objects that are assigned to cluster1 and to soft clusters (2), respectively. - * @param pairDist distance between objects - * @param cluster1 cluster ID of the first object - * @param clusters2 array of cluster IDs of the second object - */ - public void classifyAnd(float pairDist, List<Integer> cluster1, List<Integer> clusters2) { - for (BinaryClassification bc : items) { - bc.classifyAnd(pairDist, cluster1, clusters2); - } - } - - /** Binary-classify the passed distance between a pair of objects that are assigned to cluster1 and cluster2 clusters IDs, respectively. - * @param pairDist distance between objects - * @param cluster1 cluster ID of the first object - * @param cluster2 cluster ID of the second object - */ - public void classify(float pairDist, int cluster1, int cluster2) { - for (BinaryClassification bc : items) { - bc.classify(pairDist, cluster1, cluster2); - } - } - - /** Binary-classify the passed distance between a pair of objects that match to the passed degree (from 0 (non-matching) to 1 (perfectly matching)). - * @param pairDist distance between objects - * @param matchConfidence confidence of match for the pair of objects - */ - public void classify(float pairDist, float matchConfidence) { - for (BinaryClassification bc : items) { - bc.classify(pairDist, matchConfidence); - } - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder(); - for (BinaryClassification bc : items) { - sb.append(bc).append("\n"); - } - return sb.toString(); - } - - @Override - public Iterator<BinaryClassification> iterator() { - return items.iterator(); - } - - public static class BinaryClassification { - /** Threshold on distance to mark a pair (distance) as similar (close) */ - private final float similar; - /** Threshold on distance to mark a pair (distance) as dissimilar (far apart) */ - private final float dissimilar; - - /** True positive count */ - private long tp; - /** False positive count */ - private long fp; - /** True negative count */ - private long tn; - /** False negative count */ - private long fn; - - /** Number of pairs in the grey zone -- non-distiquishable by threholds */ - private long greyZone = 0; - - /** Grey area counts -- they are weighted */ - private double tpGrey = 0; - private double fpGrey = 0; - private double tnGrey = 0; - private double fnGrey = 0; - - public BinaryClassification(float similarThreshold, float dissimilarThreshold) { - this.similar = similarThreshold; - this.dissimilar = dissimilarThreshold; - } - - public void classify(float pairDist, int cluster1, int cluster2) { - classify(pairDist, ((cluster1 == cluster2) ? 1f : 0f)); -// if (pairDist <= similar) { -// if (cluster1 == cluster2) -// tp++; -// else -// fn++; -// } else if (pairDist > dissimilar) { -// if (cluster1 != cluster2) -// tn++; -// else -// fp++; -// } else { // ignore the distance between the thresholds -- we cannot distiguish them without the real GT. -// greyZone++; -// double w = weightDistance(pairDist); -// if (cluster1 == cluster2) { -// // Same cluster, so contribute to TP and FP -// tpGrey += (1.0d - w); -// fpGrey += w; -// } else { -// // Diffrent clusters, so contribute to TN and FN -// tnGrey += w; -// fnGrey += (1.0d - w); -// } -// assert (greyZone < 10000 || (greyZone-1) < (long)(tpGrey + fpGrey + tnGrey + fnGrey)) : "Grey zone classification inconsistency: greyZone=" + greyZone + ", tpGrey=" + tpGrey + ", fpGrey=" + fpGrey + ", tnGrey=" + tnGrey + ", fnGrey=" + fnGrey; -// } - } - - /** Soft-assignement classification. ... v podstate odpoveda OR-klasifikaci, lebo sledujem obe strany a staci ze najdem jeden match - * It might not be symmetric to compare the objects, so we have to check both the directions here!!! - */ - public void classify(float pairDist, List<Integer> clusters1, List<Integer> clusters2) { - // Test for a match from o1->o2 - for (Integer c2 : clusters2) { - if (clusters1.get(0).equals(c2)) { - classify(pairDist, 1f); - return; - } - } - // Test for a match from o2->o1 - for (Integer c1 : clusters1) { - if (clusters2.get(0).equals(c1)) { - classify(pairDist, 1f); - return; - } - } - classify(pairDist, 0f); - } - - /** Soft-assignement classification. ... v podstate odpoveda AND-klasifikaci, lebo sledujem obe strany a staci ze najdem jeden match - * It might not be symmetric to compare the objects, so we have to check both the directions here!!! - */ - public void classifyAnd(float pairDist, List<Integer> clusters1, List<Integer> clusters2) { - // Test for a match from o1->o2 - boolean doubleMatch = false; - for (Integer c2 : clusters2) { - if (clusters1.get(0).equals(c2)) { - doubleMatch = true; - break; - } - } - if (doubleMatch) { - // Test for a match from o2->o1 - for (Integer c1 : clusters1) { - if (clusters2.get(0).equals(c1)) { - classify(pairDist, 1f); - return; - } - } - } - - classify(pairDist, 0f); - } - - /** Binary-classify the passed distance between a pair of objects that match to the passed degree (from 0 (non-matching) to 1 (perfectly matching)). - * @param pairDist distance between objects - * @param matchConfidence confidence of match for the pair of objects - */ - public void classify(float pairDist, float matchConfidence) { - if (pairDist <= similar) { - if (matchConfidence > 0) - tp++; - else - fn++; - } else if (pairDist > dissimilar) { - if (matchConfidence == 0) - tn++; - else - fp++; - } else { // ignore the distance between the thresholds -- we cannot distiguish them without the real GT. - greyZone++; - double w = weightDistance(pairDist); - if (matchConfidence > 0) { - // Same cluster, so contribute to TP and FP - tpGrey += (1.0d - w); - fpGrey += w; - } else { - // Diffrent clusters, so contribute to TN and FN - tnGrey += w; - fnGrey += (1.0d - w); - } - assert (greyZone < 10000 || (greyZone-1) < (long)(tpGrey + fpGrey + tnGrey + fnGrey)) : "Grey zone classification inconsistency: greyZone=" + greyZone + ", tpGrey=" + tpGrey + ", fpGrey=" + fpGrey + ", tnGrey=" + tnGrey + ", fnGrey=" + fnGrey; - } - } - - private double weightDistance(double dist) { - return (dist - similar) / (dissimilar - similar); - } - - @Override - public String toString() { - return "BinaryClassification{similar=" + similar + ", dissimilar=" + dissimilar + ": tp=" + tp + ", fp=" + fp + ", tn=" + tn + ", fn=" + fn - + ", greyZone=" + greyZone + ", tpGrey=" + tpGrey + ", fpGrey=" + fpGrey + ", tnGrey=" + tnGrey + ", fnGrey=" + fnGrey + '}'; - } - - public long getCorrectDecisions() { - return tp + tn; - } - - public double getCorrectDecisionsWithGreyZone() { - return tp + tn + tpGrey + tnGrey; - } - - public long getIncorrectDecisions() { - return fp + fn; - } - - public double getIncorrectDecisionsWithGreyZone() { - return fp + fn + fpGrey + fnGrey; - } - - public double getRandIndex() { - return (double)getCorrectDecisions() / (double)(getCorrectDecisions() + getIncorrectDecisions()); - } - - public double getRandIndexWithGreyZone() { - return getCorrectDecisionsWithGreyZone() / (getCorrectDecisionsWithGreyZone() + getIncorrectDecisionsWithGreyZone()); - } - - public float getAdjustedRandIndex(BinaryClassification rndCf) { - return (float)(getCorrectDecisions() - rndCf.getCorrectDecisions()) - / (float)(getCorrectDecisions() + getIncorrectDecisions() - rndCf.getCorrectDecisions()); - } - - public double getAdjustedRandIndexWithGreyZone(BinaryClassification rndCf) { - return (getCorrectDecisionsWithGreyZone() - rndCf.getCorrectDecisionsWithGreyZone()) - / (getCorrectDecisionsWithGreyZone() + getIncorrectDecisionsWithGreyZone() - rndCf.getCorrectDecisionsWithGreyZone()); - } - - public double getPrecision() { - return (double)tp / (double)(tp + fn); - } - - public double getRecall() { - return (double)tp / (double)(tp + fp); - } - - public double getFscore() { - return 2d * getPrecision() * getRecall() / (getPrecision() + getRecall()); - } - - public double getFBscore(double beta) { - return (1d + beta * beta) * getPrecision() * getRecall() / ((beta * beta * getPrecision()) + getRecall()); - } - - /** Fowlkes–Mallows index */ - public double getGmeasure() { - return Math.sqrt(getPrecision() * getRecall()); - } - } -} diff --git a/src/messif/objects/keys/RadiusObjectKey.java b/src/messif/objects/keys/RadiusObjectKey.java deleted file mode 100644 index e945c7f..0000000 --- a/src/messif/objects/keys/RadiusObjectKey.java +++ /dev/null @@ -1,186 +0,0 @@ -/* - * To change this license header, choose License Headers in Project Properties. - * To change this template file, choose Tools | Templates - * and open the template in the editor. - */ -package messif.objects.keys; - -import messif.objects.nio.BinaryInput; -import messif.objects.nio.BinaryOutput; -import messif.objects.nio.BinarySerializator; - -import java.io.IOException; -import java.io.OutputStream; - -/** - * This class adds to the standard object key {@link AbstractObjectKey} radius of the ball region that this object represents. - * So a float value is stored. - * Radius is stored before the URI (locator)! E.g. #objectKey messif.objects.keys.RadiusObjectKey [6.234] all_souls_000000.jpg - * - * @author Vlastislav Dohnal, Masaryk University, Brno, Czech Republic, dohnal@fi.muni.cz - */ -public class RadiusObjectKey extends AbstractObjectKey { - /** Class serial id for serialization. */ - private static final long serialVersionUID = 1L; - - //************ Attributes ************// - - /** Radius of a ball */ - private final float radius; - - - //************ Constructor ************// - - /** - * Creates a new instance for a string value. - * @param value the string representaion of the key - */ - public RadiusObjectKey(String value) { - // Radius is stored before the URI (locator)! E.g. #objectKey messif.objects.keys.RadiusObjectKey [6.234] all_souls_000000.jpg - super(AbstractObjectKey.getKeyStringPart(value, " ", 1)); - // Dimensions: - try { - String d = AbstractObjectKey.getKeyStringPart(value, " ", 0).substring(value.indexOf('[') + 1, value.indexOf(']')); - this.radius = Float.parseFloat(d); - } catch (NumberFormatException ex) { - throw new IllegalArgumentException("Incorrect radius object key format! Expected: \"[radius] URI\" Passed: " + value, ex); - } - } - - /** - * Creates a new instance. - * @param locatorURI the URI locator - * @param radius ball radius value - */ - public RadiusObjectKey(String locatorURI, float radius) { - super(locatorURI); - this.radius = radius; - } - - //************ Attribute access methods ************// - - /** - * @return value of the radius - */ - public float getRadius() { - return radius; - } - - //****************** Serialization ******************// - - /** - * Store this key's data to a text stream. - * This method should have the opposite deserialization in constructor. - * Note that this method should <em>not</em> write a line separator (\n). - * - * @param stream the stream to store this object to - * @throws IOException if there was an error while writing to stream - */ - @Override - protected void writeData(OutputStream stream) throws IOException { - stream.write(formatDimensionsToString().getBytes()); - super.writeData(stream); - } - - private String formatDimensionsToString() { - return "[" + radius + "]"; - } - - //************ Comparator and equality methods ************// - - /** - * Compare the keys according to their locators. - * @param o the key to compare this key with - * @return a negative integer, zero, or a positive integer if this object - * is less than, equal to, or greater than the specified object - */ - @Override - public int compareTo(AbstractObjectKey o) { - if (o == null || !(o.getClass().equals(RadiusObjectKey.class))) - return 3; - int cmp = super.compareTo(o); - if (cmp != 0) - return cmp; - - RadiusObjectKey obj = (RadiusObjectKey)o; - return Float.compare(this.radius, obj.radius); - } - - /** - * Return the hashCode of the locator URI or 0, if it is null. - * @return the hashCode of the locator URI or 0, if it is null - */ - @Override - public int hashCode() { - return super.hashCode() * 47 + Float.hashCode(radius); - } - - /** - * Returns whether this key is equal to the <code>obj</code>. - * It is only and only if the <code>obj</code> is descendant of - * {@link AbstractObjectKey} and has an equal locator URI. - * - * @param obj the object to compare this object to - * @return <tt>true</tt> if this object is the same as the <code>obj</code> argument; <tt>false</tt> otherwise - */ - @Override - public boolean equals(Object obj) { - if (!super.equals(obj)) - return false; - if (!(obj.getClass().equals(RadiusObjectKey.class))) - return false; - - RadiusObjectKey o = (RadiusObjectKey)obj; - return (o.radius == this.radius); - } - - - //************ String representation ************// - - /** - * Returns the URI string. - * @return the URI string - */ - @Override - public String toString() { - return formatDimensionsToString() + super.toString(); - } - - - //************ BinarySerializable interface ************// - - /** - * Creates a new instance of AbstractObjectKey loaded from binary input. - * - * @param input the input to read the AbstractObjectKey from - * @param serializator the serializator used to write objects - * @throws IOException if there was an I/O error reading from the input - */ - protected RadiusObjectKey(BinaryInput input, BinarySerializator serializator) throws IOException { - super(input, serializator); - this.radius = serializator.readFloat(input); - } - - /** - * Binary-serialize this object into the <code>output</code>. - * @param output the output that this object is binary-serialized into - * @param serializator the serializator used to write objects - * @return the number of bytes actually written - * @throws IOException if there was an I/O error during serialization - */ - @Override - public int binarySerialize(BinaryOutput output, BinarySerializator serializator) throws IOException { - return super.binarySerialize(output, serializator) + serializator.write(output, radius); - } - - /** - * Returns the exact size of the binary-serialized version of this object in bytes. - * @param serializator the serializator used to write objects - * @return size of the binary-serialized version of this object - */ - @Override - public int getBinarySize(BinarySerializator serializator) { - return super.getBinarySize(serializator) + serializator.getBinarySize(radius); - } - -} diff --git a/src/messif/utility/FileUtils.java b/src/messif/utility/FileUtils.java deleted file mode 100644 index 4d8555e..0000000 --- a/src/messif/utility/FileUtils.java +++ /dev/null @@ -1,38 +0,0 @@ -package messif.utility; - - -import messif.objects.LocalAbstractObject; -import messif.objects.util.StreamGenericAbstractObjectIterator; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - -/* - * To change this license header, choose License Headers in Project Properties. - * To change this template file, choose Tools | Templates - * and open the template in the editor. - */ - -/** - * - * @author Vlastislav Dohnal, Masaryk University, Brno, Czech Republic, dohnal@fi.muni.cz - */ -public class FileUtils { - public static <E extends LocalAbstractObject> StreamGenericAbstractObjectIterator<E> openDB(Class<E> clazz, String file) { - try { - return new StreamGenericAbstractObjectIterator(clazz, file); - } catch (IllegalArgumentException | IOException ex) { - throw new RuntimeException(ex); - } - } - - public static <E extends LocalAbstractObject> List<E> readObjects(Class<E> clazz, String file) { - StreamGenericAbstractObjectIterator<E> iter = openDB(clazz, file); - List<E> objs = new ArrayList<>(); - while (iter.hasNext()) - objs.add(iter.next()); - return objs; - } - -} diff --git a/src/messif/utility/HullRepresentationAsLocalAbstractObject.java b/src/messif/utility/HullRepresentationAsLocalAbstractObject.java deleted file mode 100644 index bab6fab..0000000 --- a/src/messif/utility/HullRepresentationAsLocalAbstractObject.java +++ /dev/null @@ -1,107 +0,0 @@ -/* - * To change this license header, choose License Headers in Project Properties. - * To change this template file, choose Tools | Templates - * and open the template in the editor. - */ -package messif.utility; - -import cz.muni.fi.disa.similarityoperators.cover.HullRepresentation; -import messif.objects.LocalAbstractObject; - -import java.io.IOException; -import java.io.OutputStream; -import java.util.Iterator; -import java.util.Objects; - -/** - * Encapsulates a hull representation as a {@link LocalAbstractObject}. - * No distance function is implemented here yet. - * - * @author Vlastislav Dohnal, Masaryk University, Brno, Czech Republic, dohnal@fi.muni.cz - */ -public class HullRepresentationAsLocalAbstractObject extends LocalAbstractObject { - - private HullRepresentation hull; - - public HullRepresentationAsLocalAbstractObject(HullRepresentation h) { - super(h.getLocatorID()); - this.hull = h; - } - - public HullRepresentation getHull() { - return hull; - } - - @Override - public int getSize() { - return -1; - } - - @Override - public String toString() { - return "HullRepresentationAsLocalAbstractObject{" + hull.getLocatorID() + '}'; - } - - @Override - public boolean dataEquals(Object obj) { - if (this == obj) - return true; - if (!getClass().isAssignableFrom(obj.getClass())) - return false; - HullRepresentationAsLocalAbstractObject other = (HullRepresentationAsLocalAbstractObject)obj; - if (hull.getRepresentativesCount() != other.hull.getRepresentativesCount()) - return false; - final Iterator<LocalAbstractObject> it1 = hull.getRepresentatives(); - final Iterator<LocalAbstractObject> it2 = other.hull.getRepresentatives(); - while (it1.hasNext() && it2.hasNext()) { - if (!it1.next().dataEquals(it2.next())) - return false; - } - return true; - } - - @Override - public int dataHashCode() { - int hash = 7; - final Iterator<LocalAbstractObject> it = hull.getRepresentatives(); - while (it.hasNext()) - hash = 31 * hash + it.next().dataHashCode(); - return hash; - } - - @Override - public int hashCode() { - int hash = 5; - hash = 71 * hash + this.hull.getLocatorID().hashCode(); - return hash; - } - - @Override - public boolean equals(Object obj) { - if (this == obj) { - return true; - } - if (obj == null) { - return false; - } - if (getClass() != obj.getClass()) { - return false; - } - final HullRepresentationAsLocalAbstractObject other = (HullRepresentationAsLocalAbstractObject) obj; - if (!Objects.equals(this.hull, other.hull)) { - return false; - } - return this.hull.getLocatorID().equals(other.hull.getLocatorID()); - } - - @Override - protected void writeData(OutputStream stream) throws IOException { - throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. - } - - @Override - protected float getDistanceImpl(LocalAbstractObject obj, float distThreshold) { - throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. - } - -} -- GitLab