Commit ae990533 authored by Michal Balazia's avatar Michal Balazia
Browse files

Merge branch 'fix'

parents 80db4423 755b5801
......@@ -6,8 +6,7 @@
/build.xml
/catalog.xml
/manifest.mf
/amc59.4/
/src.rar
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
......
package algorithms;
import java.util.List;
import objects.Template;
public abstract class Clustering extends Retriever {
public Clustering() {
}
public abstract List<List<Template>> cluster(List<Template> templatesGallery);
public Template getCentroid(List<Template> templates) {
Template centroid = new Template(null, null);
if (templates.isEmpty()) {
System.out.println("empty class");
} else {
double minSum = Double.MAX_VALUE;
for (Template centroidCandidate : templates) {
double sum = 0;
for (Template template : templates) {
sum += getDistance(template, centroidCandidate);
}
if (minSum > sum) {
minSum = sum;
centroid = centroidCandidate;
}
}
}
return centroid;
}
}
package algorithms;
import java.util.ArrayList;
import java.util.List;
import objects.Template;
public class ClusteringAgglomerativeHierarchical extends Clustering {
private final int k;
private final double d;
public ClusteringAgglomerativeHierarchical(int k) {
this.k = k;
this.d = Double.MAX_VALUE;
}
public ClusteringAgglomerativeHierarchical(double d) {
this.k = Integer.MAX_VALUE;
this.d = d;
}
@Override
public List<List<Template>> cluster(List<Template> templatesGallery) {
int numberOfTemplates = templatesGallery.size();
if (numberOfTemplates < k) {
System.out.println(numberOfTemplates + " < " + k);
}
List<List<Template>> clusters = new ArrayList();
List<Template> centroids = new ArrayList();
for (Template template : templatesGallery) {
List<Template> cluster = new ArrayList();
cluster.add(template);
clusters.add(cluster);
centroids.add(template);
}
double minDistance = 0;
while (clusters.size() > k && minDistance < d) {
int numberOfClusters = clusters.size();
List<Template> clusterI = clusters.get(0);
List<Template> clusterJ = clusters.get(0);
Template centroidI = centroids.get(0);
Template centroidJ = centroids.get(0);
minDistance = Double.MAX_VALUE;
for (int i = 0; i < numberOfClusters - 1; i++) {
for (int j = i + 1; j < numberOfClusters; j++) {
double distance;
distance = getDistance(centroids.get(i), centroids.get(j));
if (minDistance > distance) {
minDistance = distance;
clusterI = clusters.get(i);
clusterJ = clusters.get(j);
centroidI = centroids.get(i);
centroidJ = centroids.get(j);
}
}
}
clusters.remove(clusterI);
clusters.remove(clusterJ);
centroids.remove(centroidI);
centroids.remove(centroidJ);
clusterI.addAll(clusterJ);
clusters.add(clusterI);
centroids.add(getCentroid(clusterI));
}
return clusters;
}
}
package algorithms;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
import objects.Template;
public class ClusteringKmeans extends Clustering {
private final int k;
public ClusteringKmeans(int k) {
this.k = k;
}
@Override
public List<List<Template>> cluster(List<Template> templatesGallery) {
int numberOfTemplates = templatesGallery.size();
if (numberOfTemplates < k) {
System.out.println(numberOfTemplates + " < " + k);
return null;
}
Random random = new Random();
List<Integer> seeds = new ArrayList();
for (int i = 0; i < k; i++) {
int seed = random.nextInt(numberOfTemplates);
while (seeds.contains(seed)) {
seed = random.nextInt(numberOfTemplates);
}
seeds.add(seed);
}
List<Template> centroids = new ArrayList();
for (int i = 0; i < k; i++) {
centroids.add(templatesGallery.get(seeds.get(i)));
}
List<List<Template>> clusters = new ArrayList();
boolean done = false;
while (!done) {//CHANGE
done = true;
clusters = new ArrayList();
for (int i = 0; i < k; i++) {
clusters.add(new ArrayList());
}
for (Template template : templatesGallery) {
double minDistance = Double.MAX_VALUE;
int closestCentroidIndex = 0;
for (int i = 0; i < k; i++) {
double distance = getDistance(template, centroids.get(i));
if (minDistance > distance) {
minDistance = distance;
closestCentroidIndex = i;
}
}
clusters.get(closestCentroidIndex).add(template);
}
for (int i = 0; i < k; i++) {
Template centroidOld = centroids.get(i);
Template centroidNew = getCentroid(clusters.get(i));
centroids.set(i, centroidNew);
if(!centroidOld.equals(centroidNew)){
done = false;
}
}
}
return clusters;
}
}
package algorithms;
import java.io.Serializable;
import java.util.List;
import objects.DistanceMatrix;
import objects.Template;
public abstract class Decision implements Serializable {
public abstract class Decision extends Retriever {
private List gallery;
private DistanceTemplates distanceTemplates;
private DistanceMatrix distanceMatrix;
public Decision() {
}
......@@ -22,22 +18,6 @@ public abstract class Decision implements Serializable {
this.gallery = gallery;
}
public DistanceTemplates getDistanceTemplates() {
return distanceTemplates;
}
public void setDistanceTemplates(DistanceTemplates distanceTemplates) {
this.distanceTemplates = distanceTemplates;
}
public void setDistanceMatrix(DistanceMatrix distanceMatrix) {
this.distanceMatrix = distanceMatrix;
}
public double getDistance(Template template1, Template template2) {
return distanceMatrix == null ? distanceTemplates.getDistance(template1, template2) : distanceMatrix.getDistance(template1, template2);
}
public abstract void importGallery(List<Template> templatesGallery);
public abstract String decide(Template template);
......
......@@ -5,7 +5,7 @@ import java.util.List;
import objects.ClassifiableList;
import objects.Template;
public class DecisionLinear extends Decision {
public class DecisionRocchio extends Decision {
@Override
public void importGallery(List<Template> templatesGallery) {
......
......@@ -110,8 +110,8 @@ public abstract class MethodEvaluator {
}
public double[] getCumulativeMatchCharacteristic(List<Template> templatesTest, List<Template> templatesGallery) {
List<List<Template>> split = new ClassifiableList(templatesGallery).splitBySubject();
int numberOfSubjects = split.size();
List<List<Template>> templatesBySubject = new ClassifiableList(templatesGallery).splitBySubject();
int numberOfSubjects = templatesBySubject.size();
int number = templatesTest.size();
double[] cmc = new double[numberOfSubjects];
for (Template templateTest : templatesTest) {
......@@ -123,10 +123,10 @@ public abstract class MethodEvaluator {
String classifiedSubject = classifier.classify(templateTest);
ranking[rank] = classifiedSubject;
int i = 0;
while (!split.get(i).get(0).getSubject().equals(classifiedSubject)) {
while (!templatesBySubject.get(i).get(0).getSubject().equals(classifiedSubject)) {
i++;
}
templatesGalleryRank.removeAll(split.get(i));
templatesGalleryRank.removeAll(templatesBySubject.get(i));
}
for (int rank = 0; rank < numberOfSubjects; rank++) {
boolean isIn = false;
......@@ -144,31 +144,37 @@ public abstract class MethodEvaluator {
}
public double getDaviesBouldinIndex(List<Template> templatesEvaluation) { // low is good
List<List<Template>> split = new ClassifiableList(templatesEvaluation).splitBySubject();
int numberOfSubjects = split.size();
List<List<Template>> templatesBySubject = new ClassifiableList(templatesEvaluation).splitBySubject();
int numberOfSubjects = templatesBySubject.size();
double sum = 0;
for (int c1 = 0; c1 < numberOfSubjects; c1++) {
List<Template> templatesOfSubject1 = split.get(c1);
List<Template> templatesOfSubject1 = templatesBySubject.get(c1);
Template centroid1 = getCentroid(templatesOfSubject1);
double avgDistanceToCentroid1 = getAverageDistanceToTemplate(templatesOfSubject1, centroid1);
double maxFrac = 0;
for (int c2 = c1 + 1; c2 < numberOfSubjects; c2++) {
List<Template> templatesOfSubject2 = split.get(c2);
double frac = (getAverageDistanceFromCentroid(templatesOfSubject1) + getAverageDistanceFromCentroid(templatesOfSubject2)) / getDistanceBetweenCentroids(templatesOfSubject1, templatesOfSubject2);
List<Template> templatesOfSubject2 = templatesBySubject.get(c2);
Template centroid2 = getCentroid(templatesOfSubject2);
double avgDistanceToCentroid2 = getAverageDistanceToTemplate(templatesOfSubject2, centroid2);
double frac = (avgDistanceToCentroid1 + avgDistanceToCentroid2) / getDistance(centroid1, centroid2);
if (maxFrac < frac) {
maxFrac = frac;
}
}
sum += maxFrac;
}
return sum;
return sum / numberOfSubjects;
}
public double getDunnIndex(List<Template> templatesEvaluation) { // high is good
List<List<Template>> split = new ClassifiableList(templatesEvaluation).splitBySubject();
int numberOfSubjects = split.size();
List<List<Template>> templatesBySubject = new ClassifiableList(templatesEvaluation).splitBySubject();
int numberOfSubjects = templatesBySubject.size();
double min = Double.MAX_VALUE;
for (int c1 = 0; c1 < numberOfSubjects; c1++) {
Template centroid1 = getCentroid(templatesBySubject.get(c1));
for (int c2 = c1 + 1; c2 < numberOfSubjects; c2++) {
double dist = getDistanceBetweenCentroids(split.get(c1), split.get(c2));
Template centroid2 = getCentroid(templatesBySubject.get(c2));
double dist = getDistance(centroid1, centroid2);
if (min > dist) {
min = dist;
}
......@@ -176,7 +182,7 @@ public abstract class MethodEvaluator {
}
double max = 0;
for (int c = 0; c < numberOfSubjects; c++) {
double dist = getAverageDistanceFromCentroid(split.get(c));
double dist = getAverageDistanceToCentroid(templatesBySubject.get(c));
if (max < dist) {
max = dist;
}
......@@ -185,25 +191,17 @@ public abstract class MethodEvaluator {
}
public double getSilhouetteCoefficient(List<Template> templatesEvaluation) { // high is good
List<List<Template>> split = new ClassifiableList(templatesEvaluation).splitBySubject();
List<List<Template>> templatesBySubject = new ClassifiableList(templatesEvaluation).splitBySubject();
double sum = 0;
for (List<Template> templatesOfSubject : split) {
for (Template templateOfThisSubject : templatesOfSubject) {
double sumA = 0;
for (Template otherTemplateOfSubject : templatesOfSubject) {
double dist = distanceMatrix.getDistance(templateOfThisSubject, otherTemplateOfSubject);
sumA += dist;
}
double avgA = sumA / templatesOfSubject.size();
for (List<Template> templatesOfThisSubject : templatesBySubject) {
String thisSubject = templatesOfThisSubject.get(0).getSubject();
for (Template template : templatesOfThisSubject) {
double avgA = getAverageDistanceToTemplate(templatesOfThisSubject, template);
double minAvgB = Double.MAX_VALUE;
for (List<Template> templatesOfOtherSubject : split) {
if (!templatesOfSubject.equals(templatesOfOtherSubject)) {
double sumB = 0;
for (Template templateOfOtherSubject : templatesOfOtherSubject) {
double dist = distanceMatrix.getDistance(templateOfThisSubject, templateOfOtherSubject);
sumB += dist;
}
double avgB = sumB / templatesOfOtherSubject.size();
for (List<Template> templatesOfOtherSubject : templatesBySubject) {
String otherSubject = templatesOfOtherSubject.get(0).getSubject();
if (!thisSubject.equals(otherSubject)) {
double avgB = getAverageDistanceToTemplate(templatesOfOtherSubject, template);
if (minAvgB > avgB) {
minAvgB = avgB;
}
......@@ -216,51 +214,80 @@ public abstract class MethodEvaluator {
}
public double getFishersDiscriminantRatio(List<Template> templatesEvaluation) { // high is good
List<List<Template>> split = new ClassifiableList(templatesEvaluation).splitBySubject();
return getBetweenClassScatter(split) / getWithinClassScatter(split);
List<List<Template>> templatesBySubject = new ClassifiableList(templatesEvaluation).splitBySubject();
return getBetweenClassScatter(templatesBySubject) / getWithinClassScatter(templatesBySubject);
}
public double getPurity(List<List<Template>> clusters) { // high is good
int numberOfTemplatesOfDominantSubjects = 0;
int numberOfTemplatesOfAllSubjects = 0;
for (List<Template> cluster : clusters) {
List<List<Template>> templatesBySubject = new ClassifiableList(cluster).splitBySubject();
int numberOfTemplatesOfDominantSubject = 0;
for (List<Template> templatesOfSubject : templatesBySubject) {
int numberOfTemplatesOfSubject = templatesOfSubject.size();
if (numberOfTemplatesOfDominantSubject < numberOfTemplatesOfSubject) {
numberOfTemplatesOfDominantSubject = numberOfTemplatesOfSubject;
}
}
numberOfTemplatesOfDominantSubjects += numberOfTemplatesOfDominantSubject;
numberOfTemplatesOfAllSubjects += cluster.size();
}
return (double) numberOfTemplatesOfDominantSubjects / numberOfTemplatesOfAllSubjects;
}
public double getRandIndex(int tp, int fp, int fn, int tn) { // high is good
return (double) (tp + tn) / (tp + fp + fn + tn);
}
public double getFmeasure(int tp, int fp, int fn, int tn) { // high is good
double p = (double) tp / (tp + fp);
double r = (double) tp / (tp + fn);
return 2 * p * r / (p + r);
}
public double getJaccardIndex(int tp, int fp, int fn, int tn) { // high is good
return (double) tp / (tp + fp + fn);
}
public double getFowlkesMallowsIndex(int tp, int fp, int fn, int tn) { // high is good
return Math.sqrt(((double) tp / (tp + fp)) * ((double) tp / (tp + fn)));
}
private double getBetweenClassScatter(List<List<Template>> templatesByClass) {
private double getBetweenClassScatter(List<List<Template>> templatesBySubject) {
List<Template> centroids = new ArrayList();
for (List<Template> templatesOfSubject : templatesByClass) {
for (List<Template> templatesOfSubject : templatesBySubject) {
centroids.add(getCentroid(templatesOfSubject));
}
Template centroidOverall = getCentroid(centroids);
return getAverageDistanceToCentroid(centroids);
}
private double getWithinClassScatter(List<List<Template>> templatesBySubject) {
double sum = 0;
for (Template centroid : centroids) {
sum += distanceMatrix.getDistance(centroidOverall, centroid);
for (List<Template> templatesOfSubject : templatesBySubject) {
sum += getAverageDistanceToCentroid(templatesOfSubject);
}
return sum / templatesByClass.size();
return sum / templatesBySubject.size();
}
private double getWithinClassScatter(List<List<Template>> templatesByClass) {
double sumTotal = 0;
for (List<Template> templatesOfSubject : templatesByClass) {
Template centroid = getCentroid(templatesOfSubject);
double sumClass = 0;
for (Template template : templatesOfSubject) {
sumClass += distanceMatrix.getDistance(centroid, template);
}
sumTotal += sumClass / templatesOfSubject.size();
}
return sumTotal / templatesByClass.size();
private double getDistance(Template template1, Template template2) {
return distanceMatrix.getDistance(template1, template2);
}
private double getDistanceBetweenCentroids(List<Template> templates1, List<Template> templates2) {
return distanceMatrix.getDistance(getCentroid(templates1), getCentroid(templates2));
private double getAverageDistanceToCentroid(List<Template> templates) {
return getAverageDistanceToTemplate(templates, getCentroid(templates));
}
private double getAverageDistanceFromCentroid(List<Template> templates) {
Template centroid = getCentroid(templates);
private double getAverageDistanceToTemplate(List<Template> templates, Template template) {
double sum = 0;
for (Template template : templates) {
sum += distanceMatrix.getDistance(centroid, template);
for (Template otherTemplate : templates) {
sum += getDistance(template, otherTemplate);
}
return sum / templates.size();
}
private Template getCentroid(List<Template> templates) {
Template centroid = new Template("", null);
Template centroid = new Template(null, null);
if (templates.isEmpty()) {
System.out.println("empty class");
} else {
......@@ -268,7 +295,7 @@ public abstract class MethodEvaluator {
for (Template centroidCandidate : templates) {
double sum = 0;
for (Template template : templates) {
sum += distanceMatrix.getDistance(centroidCandidate, template);
sum += getDistance(template, centroidCandidate);
}
if (minSum > sum) {
minSum = sum;
......@@ -279,6 +306,45 @@ public abstract class MethodEvaluator {
return centroid;
}
public int[] getContingency(List<List<Template>> clusters) {
List<String> subjectsList = new ArrayList();
List<Integer> clustersList = new ArrayList();
for (int c = 0; c < clusters.size(); c++) {
for (Template template : clusters.get(c)) {
subjectsList.add(template.getSubject());
clustersList.add(c);
}
}
int numberOfTemplates = subjectsList.size();
int tp = 0;
int fp = 0;
int fn = 0;
int tn = 0;
for (int i = 0; i < numberOfTemplates - 1; i++) {
String subjectI = subjectsList.get(i);
int clusterI = clustersList.get(i);
for (int j = i + 1; j < numberOfTemplates; j++) {
String subjectJ = subjectsList.get(j);
int clusterJ = clustersList.get(j);
boolean sameSubject = subjectI.equals(subjectJ);
boolean sameCluster = clusterI == clusterJ;
if (sameSubject && sameCluster) {
tp++;
}
if (!sameSubject && sameCluster) {
fp++;
}
if (sameSubject && !sameCluster) {
fn++;
}
if (!sameSubject && !sameCluster) {
tn++;
}
}
}
return new int[]{tp, fp, fn, tn};
}
public double getTemplateDimensionality(List<Template> templatesEvaluation) {
int sum = 0;
for (Template template : templatesEvaluation) {
......
......@@ -15,7 +15,7 @@ public class MethodEvaluatorClassification extends MethodEvaluator {
private final int nFolds;
private int nClasses;
private List<String> subjects;
private List<Motion> motions;
private List<List<Double>> scoresCCR;
......@@ -25,9 +25,14 @@ public class MethodEvaluatorClassification extends MethodEvaluator {
super(methods, directoryAMC);
this.nFolds = nFolds;
}
public void setSubjects(List<String> subjects) {
this.subjects = subjects;
}
public void setNClasses(int nClasses) {
int numberOfSubjects = new ClassifiableList(getClassifiables()).splitBySubject().size();
public void setSubjects(int nClasses) {
List<List<Classifiable>> classifiablesBySubject = new ClassifiableList(getClassifiables()).splitBySubject();
int numberOfSubjects = classifiablesBySubject.size();
if (nClasses < 2) {
System.out.println("nClasses=" + nClasses + " < 2");
return;
......@@ -36,7 +41,14 @@ public class MethodEvaluatorClassification extends MethodEvaluator {
System.out.println("nClasses=" + nClasses + " > " + "numberOfSubjects=" + numberOfSubjects);
return;
}
this.nClasses = nClasses;
Random random = new Random();
List<String> subjectsEvaluation = new ArrayList();
while (subjectsEvaluation.size() < nClasses) {
String subject = classifiablesBySubject.get(random.nextInt(numberOfSubjects)).get(0).getSubject();
if (!subjectsEvaluation.contains(subject)) {
subjectsEvaluation.add(subject);
}
}
}
@Override
......@@ -46,7 +58,7 @@ public class MethodEvaluatorClassification extends MethodEvaluator {
for (int m = 0; m <= getNumberOfMethods(); m++) {
scoresCCR.add(new ArrayList());
scoresListCMC.add(new ArrayList());
for (int rank = 0; rank <= nClasses; rank++) {
for (int rank = 0; rank <= subjects.size(); rank++) {
scoresListCMC.get(m).add(new ArrayList());
}
}
......@@ -54,19 +66,9 @@ public class MethodEvaluatorClassification extends MethodEvaluator {
@Override
public void evaluate() throws IOException {
List<List<Classifiable>> classifiablesBySubject = new ClassifiableList(getClassifiables()).splitBySubject();
int numberOfSubjects = classifiablesBySubject.size();
Random random = new Random();
List<String> subjectsEvaluation = new ArrayList();
while (subjectsEvaluation.size() < nClasses) {
String subject = classifiablesBySubject.get(random.nextInt(numberOfSubjects)).get(0).getSubject();
if (!subjectsEvaluation.contains(subject)) {
subjectsEvaluation.add(subject);
}
}
for (int m = 0; m < getNumberOfMethods(); m++) {
Method method = getMethod(m);
motions = loadMotions(method, subjectsEvaluation);
motions = loadMotions(method, subjects);
List<List<Motion>> motionsBySequence = new ClassifiableList(motions).splitBySequence();
List<List<Motion>> folds = new ArrayList();
for (int fFold = 0; fFold < nFolds; fFold++) {
......@@ -93,7 +95,7 @@ public class MethodEvaluatorClassification extends MethodEvaluator {
templatesAll.addAll(templatesGallery);
getClassifier().getDecision().setDistanceMatrix(calculateDistanceMatrix(templatesAll));
double[] cumulativeMatchCharacteristic = getCumulativeMatchCharacteristic(templatesTest, templatesGallery);
for (int rank = 0; rank < nClasses; rank++) {
for (int rank