Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
import collections
import copy
import random
from typing import List, Dict
from sympy.utilities.iterables import multiset_permutations
"""
DEBUG if true, there will be small debug logs (on system.in)
"""
DEBUG = True
"""
SHUFFLE directs shuffling result with the same number of occurrences. If true, occurrences are going to be shuffeled
"""
SHUFFLE = True
class TripSimilarity:
"""
Finds out recommendation with similarity of people trips
"""
@staticmethod
def find_recommendation_with_removal(data: List[List[str]], user_data: List[str], min_number_of_rec: int = 5) -> List[str]:
"""
Find recommendation based of similarity with given trips and returns untraveled places that
has those similarities. If no similarity found it tries to find similarity for set that is smaller
(removing one city from user data, two,...)
:param data: groups of cities to find similarity with
:param user_data: to find similarity by
:param min_number_of_rec: minimal result that is required to be recommended
:return: list of recommended cities
"""
result = TripSimilarity.find_recommendation(data, user_data, min_number_of_rec)
if len(result) < min_number_of_rec:
for count_to_remove in range(1, len(user_data) - 1):
remove = [True] * count_to_remove
not_remove = [False] * (len(user_data) - count_to_remove)
remove.extend(not_remove)
permutation = list(multiset_permutations(remove))
for mutation in permutation:
user_data_copy = user_data.copy()
if DEBUG:
print("\nfor this round removing: ", end=" ")
for i in range(0, len(user_data)):
if mutation[i]:
user_data_copy.remove(user_data[i])
if DEBUG:
print(user_data[i], end=", ")
result = TripSimilarity.find_recommendation(data, user_data_copy, min_number_of_rec + 1)
if len(result) >= min_number_of_rec:
return result
return result
@staticmethod
def find_recommendation(data: List[List[str]], user_data: List[str], max_number_of_rec: int = 5) -> List[str]:
"""
Find recommendation based of similarity with given trips and returns untraveled places that
has those similarities.
:param data: groups of cities to find similarity with
:param user_data: to find similarity by
:param max_number_of_rec: maximum number of cities that will be return
:return: list of recommended cities
"""
list_of_groups = copy.deepcopy(data) # TripSimilarity.__load_from_csv(data_file)
list_of_groups = TripSimilarity.__remove_duplicities_in_group(list_of_groups)
list_of_groups = TripSimilarity.__get_lists_containing(list_of_groups, user_data)
list_of_groups = TripSimilarity.__remove_given_values_from_each_group(list_of_groups, user_data)
cities_occurrences = TripSimilarity.__get_occurrences_of_cities(list_of_groups)
cities_occurrences = TripSimilarity.__shuffle_in_groups(cities_occurrences) if SHUFFLE else cities_occurrences
recommended_cities = TripSimilarity.__get_first_x_cities(cities_occurrences, max_number_of_rec)
return recommended_cities
@staticmethod
def __get_occurrences_of_cities(groups: List[List[str]]) -> Dict[int, List[str]]:
"""
Gets number and for this number list of cities that appeared number-times in param groups.
Count one city in one group max one time (aka does not count duplicities in one group)
:param groups: groups of cities (aka trips)
:return: key: number, value: cities that appeared number-times in different groups
"""
occurrences = {}
for v in groups:
for city in list(dict.fromkeys(v)):
if city not in occurrences:
occurrences[city] = 1
else:
occurrences[city] += 1
result = {}
for key in occurrences.keys():
value = occurrences[key]
if value not in result:
result[value] = []
result[value].append(key)
return result
@staticmethod
def __get_first_x_cities(groups: Dict[int, List[str]], x=10) -> List[str]:
"""
Get X cities based on they key (higher = better)
:param groups: key: number, value: cities that appeared number-times in different groups
:param x: number of cities to be returned
:return: x cities with highest number
"""
sorted_occurrences = sorted(groups.items(), key=lambda kv: kv[1])
sorted_occurrences.reverse()
result = []
for key, group in collections.OrderedDict(sorted_occurrences).items():
for city in group:
result.append(city)
if DEBUG:
print("Adding city [{}] {} ".format(key, city))
if len(result) >= x:
return result
return result
@staticmethod
def __shuffle_in_groups(groups: Dict[int, List[str]]) -> Dict[int, List[str]]:
"""
Shuffles lists in groups
:param groups: dict that values in Dict.values() will be shuffled
:return:
"""
for key in groups:
random.shuffle(groups[key])
return groups
@staticmethod
def __remove_duplicities_in_group(groups: List[List[str]]) -> List[List[str]]:
"""
Removes duplicates in inner lists
:param groups:
:return:
"""
result = []
for group in groups:
result.append(list(dict.fromkeys(group)))
return result
@staticmethod
def __remove_given_values_from_each_group(groups: List[List[str]], to_remove: List[str]) -> List[List[str]]:
"""
Removes given values from groups
:param groups: to be removed from
:param to_remove: to be removed
:return:
"""
result = []
for group in groups:
result.append([elem for elem in group if elem not in to_remove])
return result
@staticmethod
def __get_lists_containing(groups: List[List[str]], to_contain: List[str]) -> List[List[str]]:
"""
Gets lists that contains values given
:param groups: to be find in
:param to_contain: to be contained
:return: lists that contains values given in to_contain
"""
result = []
for group in groups:
if all(elem in group for elem in to_contain):
result.append(group)
return result
def load_from_csv(filepath: str) -> List[List[str]]:
dictionary = {}
with open(filepath, 'r') as csvFile:
csvFile.readline() # remove header
line = csvFile.readline()
while line:
try:
parts = line.split(",")
user = parts[1]
city = parts[2]
if user not in dictionary:
dictionary[user] = []
dictionary[user].append(city)
except Exception as e:
print("Fail at " + line)
line = csvFile.readline()
result = []
for value in dictionary.values():
result.append(value)
return result
def main():
print(TripSimilarity.find_recommendation_with_removal(load_from_csv("../../data/trips.csv"),
['prague', 'london', 'jakubov', 'berlin', 'amsterdam',
'madrid', "znojmo"]))
main()