Commit 9a1ab1ca authored by Jakub's avatar Jakub
Browse files

Bugfixxing

parent 2ec5a5e0
Loading
Loading
Loading
Loading
+6 −5
Original line number Original line Diff line number Diff line
@@ -71,6 +71,7 @@ public record ExperimentReport(String name, DBReport dbStats, int threshold, dou
                avgRepeatedMap.get(algo).add(new Pair<>(report.dbStats().avgRepeatedItems(), report.time()));
                avgRepeatedMap.get(algo).add(new Pair<>(report.dbStats().avgRepeatedItems(), report.time()));
                thresholdMap.get(algo).add(new Pair<>(report.threshold(), report.time()));
                thresholdMap.get(algo).add(new Pair<>(report.threshold(), report.time()));
            }
            }
        }
            mapSort(sizeMap);
            mapSort(sizeMap);
            mapSort(uniqueMap);
            mapSort(uniqueMap);
            mapSort(minLengthMap);
            mapSort(minLengthMap);
@@ -93,11 +94,10 @@ public record ExperimentReport(String name, DBReport dbStats, int threshold, dou
            mapPrint("avgRepeated", avgRepeatedMap);
            mapPrint("avgRepeated", avgRepeatedMap);
            mapPrint("threshold", thresholdMap);
            mapPrint("threshold", thresholdMap);
    }
    }
    }


    private static <K extends Comparable,V> void mapSort(Map<Algo, List<Pair<K,V>>> map){
    private static <K extends Comparable,V> void mapSort(Map<Algo, List<Pair<K,V>>> map){
        for (Algo algo : map.keySet()){
        for (Algo algo : map.keySet()){
            map.get(algo).stream().sorted(Comparator.comparing(Pair::getKey));
            map.get(algo).sort(Comparator.comparing(Pair::getKey));
        }
        }
    }
    }
    private static <K extends Comparable,V extends Comparable> void mapPrint(String name, Map<Algo, List<Pair<K,V>>> map){
    private static <K extends Comparable,V extends Comparable> void mapPrint(String name, Map<Algo, List<Pair<K,V>>> map){
@@ -115,8 +115,9 @@ public record ExperimentReport(String name, DBReport dbStats, int threshold, dou
            for (var p : a)
            for (var p : a)
                uniqueK.add(p.getKey());
                uniqueK.add(p.getKey());
        }
        }

        List<K> listUniqueK = new ArrayList<>(uniqueK);
        for (K key: uniqueK) {
        listUniqueK.sort(K::compareTo);
        for (K key: listUniqueK) {
            transposedMap.put(key, new ArrayList<>());
            transposedMap.put(key, new ArrayList<>());
            for(Algo algo : algoOrder){
            for(Algo algo : algoOrder){


@@ -137,7 +138,7 @@ public record ExperimentReport(String name, DBReport dbStats, int threshold, dou
        List<K> keyOrder = new ArrayList<>(transposedMap.keySet());
        List<K> keyOrder = new ArrayList<>(transposedMap.keySet());
        keyOrder = keyOrder.stream().sorted().collect(Collectors.toList());
        keyOrder = keyOrder.stream().sorted().collect(Collectors.toList());
        for (K key : keyOrder){
        for (K key : keyOrder){
            valuesCSV.append(key.toString());
            valuesCSV.append(key.toString()).append(", ");
            var pairList = transposedMap.get(key);
            var pairList = transposedMap.get(key);
            for (var algo : algoOrder){
            for (var algo : algoOrder){
                //todo:compute candle value and average
                //todo:compute candle value and average
+289 −22
Original line number Original line Diff line number Diff line
@@ -19,40 +19,307 @@ set style line 3 \
    linetype 1 linewidth 2 \
    linetype 1 linewidth 2 \
    pointtype 7 pointsize 1.5
    pointtype 7 pointsize 1.5


set boxwidth 0.1 relative
set style line 4 \
    linecolor rgb '#00b300' \
    linetype 1 linewidth 2 \
    pointtype 7 pointsize 1.5

set style line 5 \
    linecolor rgb '#0000cc' \
    linetype 1 linewidth 2 \
    pointtype 7 pointsize 1.5

set style line 6 \
    linecolor rgb '#cc0000' \
    linetype 1 linewidth 2 \
    pointtype 7 pointsize 1.5

set style fill empty
set style fill empty


set output 'sizeExp.png'
set output 'sizeExp.png'
stats 'size.csv' using 3 name 'MIN_GSP' nooutput
stats 'size.csv' using 5 name 'MAX_GSP' nooutput
stats 'size.csv' using 7 name 'MIN_PS' nooutput
stats 'size.csv' using 10 name 'MAX_PS' nooutput
stats 'size.csv' using 13 name 'MIN_SPAM' nooutput
stats 'size.csv' using 15 name 'MAX_SPAM' nooutput
MAX_max = (MAX_GSP_max + MAX_PS_max + MAX_SPAM_max)/3
MIN_min = (MIN_GSP_min + MIN_PS_min + MIN_SPAM_min)/3
rangeOffset = ((MAX_max - MIN_min)/10)

stats 'size.csv' using 1 name 'X' nooutput
boxes = X_records*3
rangesize = X_max- X_min
rangeoffset = rangesize / 10
boxsize = (rangesize / boxes)/10
print boxsize
set boxwidth boxsize
set xrange [(X_min - rangeoffset):(X_max + rangeoffset)]
set yrange [(MIN_min-rangeOffset):(MAX_max+rangeOffset)]
plot 'size.csv' using 1:4 with lp title 'GSP' ls 1 , \
plot 'size.csv' using 1:4 with lp title 'GSP' ls 1 , \
             '' using 1:9 with lp title 'PREFIX_SPAN' ls 2 , \
             '' using 1:9 with lp title 'PREFIX_SPAN' ls 2 , \
             '' using 1:14 with lp title 'SPAM' ls 3, \
             '' using 1:14 with lp title 'SPAM' ls 3, \
             '' using ($1-1):3:2:6:5 with candlesticks  title 'GSP' whiskerbars, \
             '' using ($1- (boxsize)):3:2:6:5 with candlesticks  title 'GSP' whiskerbars ls 4, \
             '' using ($1):8:7:11:10 with candlesticks  title 'PREFIX_SPAN' whiskerbars, \
             '' using ($1):8:7:11:10 with candlesticks  title 'PREFIX_SPAN' whiskerbars ls 5, \
             '' using ($1+1):13:12:16:15 with candlesticks title 'SPAM' whiskerbars
             '' using ($1+ (boxsize)):13:12:16:15 with candlesticks title 'SPAM' whiskerbars ls 6


#set output 'unique.png'
set xrange [*:*]
#plot 'unique.csv' using 1:2 with lp ls 1, '' using 1:3 with lp ls 2, '' using 1:4 with lp ls 3
set yrange [*:*]
set output 'unique.png'
stats 'unique.csv' using 3 name 'MIN_GSP' nooutput
stats 'unique.csv' using 5 name 'MAX_GSP' nooutput
stats 'unique.csv' using 7 name 'MIN_PS' nooutput
stats 'unique.csv' using 10 name 'MAX_PS' nooutput
stats 'unique.csv' using 13 name 'MIN_SPAM' nooutput
stats 'unique.csv' using 15 name 'MAX_SPAM' nooutput
MAX_max = (MAX_GSP_max + MAX_PS_max + MAX_SPAM_max)/3
MIN_min = (MIN_GSP_min + MIN_PS_min + MIN_SPAM_min)/3
rangeOffset = ((MAX_max - MIN_min)/10)


#set output 'minLength.png'
stats 'unique.csv' using 1 name 'X' nooutput
#plot 'minLength.csv' using 1:2 with lp ls 1, '' using 1:3 with lp ls 2, '' using 1:4 with lp ls 3
boxes = X_records*3
rangesize = X_max- X_min
rangeoffset = rangesize / 10
boxsize = (rangesize / boxes)/10
print boxsize
set boxwidth boxsize
set xrange [(X_min - rangeoffset):(X_max + rangeoffset)]
set yrange [(MIN_min-rangeOffset):(MAX_max+rangeOffset)]
plot 'unique.csv' using 1:4 with lp title 'GSP' ls 1 , \
             '' using 1:9 with lp title 'PREFIX_SPAN' ls 2 , \
             '' using 1:14 with lp title 'SPAM' ls 3, \
             '' using ($1- (boxsize)):3:2:6:5 with candlesticks  title 'GSP' whiskerbars ls 4, \
             '' using ($1):8:7:11:10 with candlesticks  title 'PREFIX_SPAN' whiskerbars ls 5, \
             '' using ($1+ (boxsize)):13:12:16:15 with candlesticks title 'SPAM' whiskerbars ls 6


#set output 'avgLength.png'
set xrange [*:*]
#plot 'avgLength.csv' using 1:2 with lp ls 1, '' using 1:3 with lp ls 2, '' using 1:4 with lp ls 3
set yrange [*:*]
set output 'minLength.png'
stats 'minLength.csv' using 3 name 'MIN_GSP' nooutput
stats 'minLength.csv' using 5 name 'MAX_GSP' nooutput
stats 'minLength.csv' using 7 name 'MIN_PS' nooutput
stats 'minLength.csv' using 10 name 'MAX_PS' nooutput
stats 'minLength.csv' using 13 name 'MIN_SPAM' nooutput
stats 'minLength.csv' using 15 name 'MAX_SPAM' nooutput
MAX_max = (MAX_GSP_max + MAX_PS_max + MAX_SPAM_max)/3
MIN_min = (MIN_GSP_min + MIN_PS_min + MIN_SPAM_min)/3
rangeOffset = ((MAX_max - MIN_min)/10)


#set output 'maxLength.png'
stats 'minLength.csv' using 1 name 'X' nooutput
#plot 'maxLength.csv' using 1:2 with lp ls 1, '' using 1:3 with lp ls 2, '' using 1:4 with lp ls 3
boxes = X_records*3
rangesize = X_max- X_min
rangeoffset = rangesize / 10
boxsize = (rangesize / boxes)/10
print boxsize
set boxwidth boxsize
set xrange [(X_min - rangeoffset):(X_max + rangeoffset)]
set yrange [(MIN_min-rangeOffset):(MAX_max+rangeOffset)]
plot 'minLength.csv' using 1:4 with lp title 'GSP' ls 1 , \
             '' using 1:9 with lp title 'PREFIX_SPAN' ls 2 , \
             '' using 1:14 with lp title 'SPAM' ls 3, \
             '' using ($1- (boxsize)):3:2:6:5 with candlesticks  title 'GSP' whiskerbars ls 4, \
             '' using ($1):8:7:11:10 with candlesticks  title 'PREFIX_SPAN' whiskerbars ls 5, \
             '' using ($1+ (boxsize)):13:12:16:15 with candlesticks title 'SPAM' whiskerbars ls 6


#set output 'minRepeatibility.png'
set xrange [*:*]
#plot 'minRepeatibility.csv' using 1:2 with lp ls 1, '' using 1:3 with lp ls 2, '' using 1:4 with lp ls 3
set yrange [*:*]
set output 'avgLength.png'
stats 'avgLength.csv' using 3 name 'MIN_GSP' nooutput
stats 'avgLength.csv' using 5 name 'MAX_GSP' nooutput
stats 'avgLength.csv' using 7 name 'MIN_PS' nooutput
stats 'avgLength.csv' using 10 name 'MAX_PS' nooutput
stats 'avgLength.csv' using 13 name 'MIN_SPAM' nooutput
stats 'avgLength.csv' using 15 name 'MAX_SPAM' nooutput
MAX_max = (MAX_GSP_max + MAX_PS_max + MAX_SPAM_max)/3
MIN_min = (MIN_GSP_min + MIN_PS_min + MIN_SPAM_min)/3
rangeOffset = ((MAX_max - MIN_min)/10)


#set output 'avgRepeatibility.png'
stats 'avgLength.csv' using 1 name 'X' nooutput
#plot 'avgRepeatibility.csv' using 1:2 with lp ls 1, '' using 1:3 with lp ls 2, '' using 1:4 with lp ls 3
boxes = X_records*3
rangesize = X_max- X_min
rangeoffset = rangesize / 10
boxsize = (rangesize / boxes)/10
print boxsize
set boxwidth boxsize
set xrange [(X_min - rangeoffset):(X_max + rangeoffset)]
set yrange [(MIN_min-rangeOffset):(MAX_max+rangeOffset)]
plot 'avgLength.csv' using 1:4 with lp title 'GSP' ls 1 , \
             '' using 1:9 with lp title 'PREFIX_SPAN' ls 2 , \
             '' using 1:14 with lp title 'SPAM' ls 3, \
             '' using ($1- (boxsize)):3:2:6:5 with candlesticks  title 'GSP' whiskerbars ls 4, \
             '' using ($1):8:7:11:10 with candlesticks  title 'PREFIX_SPAN' whiskerbars ls 5, \
             '' using ($1+ (boxsize)):13:12:16:15 with candlesticks title 'SPAM' whiskerbars ls 6

set xrange [*:*]
set yrange [*:*]
set output 'maxLength.png'
stats 'maxLength.csv' using 3 name 'MIN_GSP' nooutput
stats 'maxLength.csv' using 5 name 'MAX_GSP' nooutput
stats 'maxLength.csv' using 7 name 'MIN_PS' nooutput
stats 'maxLength.csv' using 10 name 'MAX_PS' nooutput
stats 'maxLength.csv' using 13 name 'MIN_SPAM' nooutput
stats 'maxLength.csv' using 15 name 'MAX_SPAM' nooutput
MAX_max = (MAX_GSP_max + MAX_PS_max + MAX_SPAM_max)/3
MIN_min = (MIN_GSP_min + MIN_PS_min + MIN_SPAM_min)/3
rangeOffset = ((MAX_max - MIN_min)/10)


#set output 'maxRepeatibility.png'
stats 'maxLength.csv' using 1 name 'X' nooutput
#plot 'maxRepeatibility.csv' using 1:2 with lp ls 1, '' using 1:3 with lp ls 2, '' using 1:4 with lp ls 3
boxes = X_records*3
rangesize = X_max- X_min
rangeoffset = rangesize / 10
boxsize = (rangesize / boxes)/10
print boxsize
set boxwidth boxsize
set xrange [(X_min - rangeoffset):(X_max + rangeoffset)]
set yrange [(MIN_min-rangeOffset):(MAX_max+rangeOffset)]
plot 'maxLength.csv' using 1:4 with lp title 'GSP' ls 1 , \
             '' using 1:9 with lp title 'PREFIX_SPAN' ls 2 , \
             '' using 1:14 with lp title 'SPAM' ls 3, \
             '' using ($1- (boxsize)):3:2:6:5 with candlesticks  title 'GSP' whiskerbars ls 4, \
             '' using ($1):8:7:11:10 with candlesticks  title 'PREFIX_SPAN' whiskerbars ls 5, \
             '' using ($1+ (boxsize)):13:12:16:15 with candlesticks title 'SPAM' whiskerbars ls 6


#set output 'avgRepeated.png'
set xrange [*:*]
#plot 'avgRepeated.csv' using 1:2 with lp ls 1, '' using 1:3 with lp ls 2, '' using 1:4 with lp ls 3
set yrange [*:*]
set output 'minRepeatibility.png'
stats 'minRepeatibility.csv' using 3 name 'MIN_GSP' nooutput
stats 'minRepeatibility.csv' using 5 name 'MAX_GSP' nooutput
stats 'minRepeatibility.csv' using 7 name 'MIN_PS' nooutput
stats 'minRepeatibility.csv' using 10 name 'MAX_PS' nooutput
stats 'minRepeatibility.csv' using 13 name 'MIN_SPAM' nooutput
stats 'minRepeatibility.csv' using 15 name 'MAX_SPAM' nooutput
MAX_max = (MAX_GSP_max + MAX_PS_max + MAX_SPAM_max)/3
MIN_min = (MIN_GSP_min + MIN_PS_min + MIN_SPAM_min)/3
rangeOffset = ((MAX_max - MIN_min)/10)


#set output 'threshold.png'
stats 'minRepeatibility.csv' using 1 name 'X' nooutput
#plot 'threshold.csv' using 1:2 with lp ls 1, '' using 1:3 with lp ls 2, '' using 1:4 with lp ls 3
boxes = X_records*3
rangesize = X_max- X_min
rangeoffset = rangesize / 10
boxsize = (rangesize / boxes)/10
print boxsize
set boxwidth boxsize
set xrange [(X_min - rangeoffset):(X_max + rangeoffset)]
set yrange [(MIN_min-rangeOffset):(MAX_max+rangeOffset)]
#plot 'minRepeatibility.csv' using 1:4 with lp title 'GSP' ls 1 , \
             '' using 1:9 with lp title 'PREFIX_SPAN' ls 2 , \
             '' using 1:14 with lp title 'SPAM' ls 3, \
             '' using ($1- (boxsize)):3:2:6:5 with candlesticks  title 'GSP' whiskerbars ls 4, \
             '' using ($1):8:7:11:10 with candlesticks  title 'PREFIX_SPAN' whiskerbars ls 5, \
             '' using ($1+ (boxsize)):13:12:16:15 with candlesticks title 'SPAM' whiskerbars ls 6

set xrange [*:*]
set yrange [*:*]
set output 'avgRepeatibility.png'
stats 'avgRepeatibility.csv' using 3 name 'MIN_GSP' nooutput
stats 'avgRepeatibility.csv' using 5 name 'MAX_GSP' nooutput
stats 'avgRepeatibility.csv' using 7 name 'MIN_PS' nooutput
stats 'avgRepeatibility.csv' using 10 name 'MAX_PS' nooutput
stats 'avgRepeatibility.csv' using 13 name 'MIN_SPAM' nooutput
stats 'avgRepeatibility.csv' using 15 name 'MAX_SPAM' nooutput
MAX_max = (MAX_GSP_max + MAX_PS_max + MAX_SPAM_max)/3
MIN_min = (MIN_GSP_min + MIN_PS_min + MIN_SPAM_min)/3
rangeOffset = ((MAX_max - MIN_min)/10)

stats 'avgRepeatibility.csv' using 1 name 'X' nooutput
boxes = X_records*3
rangesize = X_max- X_min
rangeoffset = rangesize / 10
boxsize = (rangesize / boxes)/10
print boxsize
set boxwidth boxsize
set xrange [(X_min - rangeoffset):(X_max + rangeoffset)]
set yrange [(MIN_min-rangeOffset):(MAX_max+rangeOffset)]
plot 'avgRepeatibility.csv' using 1:4 with lp title 'GSP' ls 1 , \
             '' using 1:9 with lp title 'PREFIX_SPAN' ls 2 , \
             '' using 1:14 with lp title 'SPAM' ls 3, \
             '' using ($1- (boxsize)):3:2:6:5 with candlesticks  title 'GSP' whiskerbars ls 4, \
             '' using ($1):8:7:11:10 with candlesticks  title 'PREFIX_SPAN' whiskerbars ls 5, \
             '' using ($1+ (boxsize)):13:12:16:15 with candlesticks title 'SPAM' whiskerbars ls 6

set xrange [*:*]
set yrange [*:*]
set output 'maxRepeatibility.png'
stats 'maxRepeatibility.csv' using 3 name 'MIN_GSP' nooutput
stats 'maxRepeatibility.csv' using 5 name 'MAX_GSP' nooutput
stats 'maxRepeatibility.csv' using 7 name 'MIN_PS' nooutput
stats 'maxRepeatibility.csv' using 10 name 'MAX_PS' nooutput
stats 'maxRepeatibility.csv' using 13 name 'MIN_SPAM' nooutput
stats 'maxRepeatibility.csv' using 15 name 'MAX_SPAM' nooutput
MAX_max = (MAX_GSP_max + MAX_PS_max + MAX_SPAM_max)/3
MIN_min = (MIN_GSP_min + MIN_PS_min + MIN_SPAM_min)/3
rangeOffset = ((MAX_max - MIN_min)/10)

stats 'maxRepeatibility.csv' using 1 name 'X' nooutput
boxes = X_records*3
rangesize = X_max- X_min
rangeoffset = rangesize / 10
boxsize = (rangesize / boxes)/10
print boxsize
set boxwidth boxsize
set xrange [(X_min - rangeoffset):(X_max + rangeoffset)]
set yrange [(MIN_min-rangeOffset):(MAX_max+rangeOffset)]
plot 'maxRepeatibility.csv' using 1:4 with lp title 'GSP' ls 1 , \
             '' using 1:9 with lp title 'PREFIX_SPAN' ls 2 , \
             '' using 1:14 with lp title 'SPAM' ls 3, \
             '' using ($1- (boxsize)):3:2:6:5 with candlesticks  title 'GSP' whiskerbars ls 4, \
             '' using ($1):8:7:11:10 with candlesticks  title 'PREFIX_SPAN' whiskerbars ls 5, \
             '' using ($1+ (boxsize)):13:12:16:15 with candlesticks title 'SPAM' whiskerbars ls 6

set xrange [*:*]
set yrange [*:*]
set output 'avgRepeated.png'
stats 'avgRepeated.csv' using 3 name 'MIN_GSP' nooutput
stats 'avgRepeated.csv' using 5 name 'MAX_GSP' nooutput
stats 'avgRepeated.csv' using 7 name 'MIN_PS' nooutput
stats 'avgRepeated.csv' using 10 name 'MAX_PS' nooutput
stats 'avgRepeated.csv' using 13 name 'MIN_SPAM' nooutput
stats 'avgRepeated.csv' using 15 name 'MAX_SPAM' nooutput
MAX_max = (MAX_GSP_max + MAX_PS_max + MAX_SPAM_max)/3
MIN_min = (MIN_GSP_min + MIN_PS_min + MIN_SPAM_min)/3
rangeOffset = ((MAX_max - MIN_min)/10)

stats 'avgRepeated.csv' using 1 name 'X' nooutput
boxes = X_records*3
rangesize = X_max- X_min
rangeoffset = rangesize / 10
boxsize = (rangesize / boxes)/10
print boxsize
set boxwidth boxsize
set xrange [(X_min - rangeoffset):(X_max + rangeoffset)]
set yrange [(MIN_min-rangeOffset):(MAX_max+rangeOffset)]
plot 'avgRepeated.csv' using 1:4 with lp title 'GSP' ls 1 , \
             '' using 1:9 with lp title 'PREFIX_SPAN' ls 2 , \
             '' using 1:14 with lp title 'SPAM' ls 3, \
             '' using ($1- (boxsize)):3:2:6:5 with candlesticks  title 'GSP' whiskerbars ls 4, \
             '' using ($1):8:7:11:10 with candlesticks  title 'PREFIX_SPAN' whiskerbars ls 5, \
             '' using ($1+ (boxsize)):13:12:16:15 with candlesticks title 'SPAM' whiskerbars ls 6

set xrange [*:*]
set yrange [*:*]
set output 'threshold.png'
stats 'threshold.csv' using 3 name 'MIN_GSP' nooutput
stats 'threshold.csv' using 5 name 'MAX_GSP' nooutput
stats 'threshold.csv' using 7 name 'MIN_PS' nooutput
stats 'threshold.csv' using 10 name 'MAX_PS' nooutput
stats 'threshold.csv' using 13 name 'MIN_SPAM' nooutput
stats 'threshold.csv' using 15 name 'MAX_SPAM' nooutput
MAX_max = (MAX_GSP_max + MAX_PS_max + MAX_SPAM_max)/3
MIN_min = (MIN_GSP_min + MIN_PS_min + MIN_SPAM_min)/3
rangeOffset = ((MAX_max - MIN_min)/10)

stats 'threshold.csv' using 1 name 'X' nooutput
boxes = X_records*3
rangesize = X_max- X_min
rangeoffset = rangesize / 10
boxsize = (rangesize / boxes)/10
print boxsize
set boxwidth boxsize
set xrange [(X_min - rangeoffset):(X_max + rangeoffset)]
set yrange [(MIN_min-rangeOffset):(MAX_max+rangeOffset)]
plot 'threshold.csv' using 1:4 with lp title 'GSP' ls 1 , \
             '' using 1:9 with lp title 'PREFIX_SPAN' ls 2 , \
             '' using 1:14 with lp title 'SPAM' ls 3, \
             '' using ($1- (boxsize)):3:2:6:5 with candlesticks  title 'GSP' whiskerbars ls 4, \
             '' using ($1):8:7:11:10 with candlesticks  title 'PREFIX_SPAN' whiskerbars ls 5, \
             '' using ($1+ (boxsize)):13:12:16:15 with candlesticks title 'SPAM' whiskerbars ls 6