Newer
Older
import collections
import copy
import random
from typing import List, Dict
from sympy.utilities.iterables import multiset_permutations

Terézia Slanináková
committed
import pandas as pd

Terézia Slanináková
committed
#from helpers import *
"""
DEBUG if true, there will be small debug logs (on system.in)
"""

Terézia Slanináková
committed
DEBUG = False
"""
SHUFFLE directs shuffling result with the same number of occurrences. If true, occurrences are going to be shuffeled
"""
SHUFFLE = True
class TripSimilarity:
"""
Finds out recommendation with similarity of people trips
"""

Terézia Slanináková
committed
def __init__(self, data_path="data/"):
trips, cities = load_data(data_path)
self.trips = trips
self.cities = cities

Terézia Slanináková
committed
def find_recommendation_with_removal(self, data: List[List[str]], user_data: List[str], min_number_of_rec: int = 5) -> List[str]:
"""
Find recommendation based of similarity with given trips and returns untraveled places that
has those similarities. If no similarity found it tries to find similarity for set that is smaller
(removing one city from user data, two,...)
:param data: groups of cities to find similarity with
:param user_data: to find similarity by
:param min_number_of_rec: minimal result that is required to be recommended
:return: list of recommended cities
"""

Terézia Slanináková
committed
result = self.find_recommendation(data, user_data, min_number_of_rec)
if len(result) < min_number_of_rec:
for count_to_remove in range(1, len(user_data) - 1):
remove = [True] * count_to_remove
not_remove = [False] * (len(user_data) - count_to_remove)
remove.extend(not_remove)
permutation = list(multiset_permutations(remove))
for mutation in permutation:
user_data_copy = user_data.copy()
if DEBUG:
print("\nfor this round removing: ", end=" ")
for i in range(0, len(user_data)):
if mutation[i]:
user_data_copy.remove(user_data[i])
if DEBUG:
print(user_data[i], end=", ")

Terézia Slanináková
committed
result = self.find_recommendation(data, user_data_copy, min_number_of_rec + 1)

Terézia Slanináková
committed
return result[:min_number_of_rec]

Terézia Slanináková
committed
return result[:min_number_of_rec]

Terézia Slanináková
committed
def find_recommendation(self, data: List[List[str]], user_data: List[str], max_number_of_rec: int = 5) -> List[str]:
"""
Find recommendation based of similarity with given trips and returns untraveled places that
has those similarities.
:param data: groups of cities to find similarity with
:param user_data: to find similarity by
:param max_number_of_rec: maximum number of cities that will be return
:return: list of recommended cities
"""

Terézia Slanináková
committed
list_of_groups = copy.deepcopy(data)
list_of_groups = self.__remove_duplicities_in_group(list_of_groups)
list_of_groups = self.__get_lists_containing(list_of_groups, user_data)
list_of_groups = self.__remove_given_values_from_each_group(list_of_groups, user_data)
cities_occurrences = self.__get_occurrences_of_cities(list_of_groups)
cities_occurrences = self.__shuffle_in_groups(cities_occurrences) if SHUFFLE else cities_occurrences
recommended_cities = self.__get_first_x_cities(cities_occurrences, max_number_of_rec)

Terézia Slanináková
committed
def __get_occurrences_of_cities(self, groups: List[List[str]]) -> Dict[int, List[str]]:
"""
Gets number and for this number list of cities that appeared number-times in param groups.
Count one city in one group max one time (aka does not count duplicities in one group)
:param groups: groups of cities (aka trips)
:return: key: number, value: cities that appeared number-times in different groups
"""
occurrences = {}
for v in groups:
for city in list(dict.fromkeys(v)):
if city not in occurrences:
occurrences[city] = 1
else:
occurrences[city] += 1
result = {}
for key in occurrences.keys():
value = occurrences[key]
if value not in result:
result[value] = []
result[value].append(key)
return result

Terézia Slanináková
committed
def __get_first_x_cities(self, groups: Dict[int, List[str]], x=10) -> List[str]:
"""
Get X cities based on they key (higher = better)
:param groups: key: number, value: cities that appeared number-times in different groups
:param x: number of cities to be returned
:return: x cities with highest number
"""
sorted_occurrences = sorted(groups.items(), key=lambda kv: kv[1])
sorted_occurrences.reverse()
result = []
for key, group in collections.OrderedDict(sorted_occurrences).items():
for city in group:
result.append(city)
if DEBUG:
print("Adding city [{}] {} ".format(key, city))
if len(result) >= x:
return result
return result

Terézia Slanináková
committed
def __shuffle_in_groups(self, groups: Dict[int, List[str]]) -> Dict[int, List[str]]:
"""
Shuffles lists in groups
:param groups: dict that values in Dict.values() will be shuffled
:return:
"""
for key in groups:
random.shuffle(groups[key])
return groups

Terézia Slanináková
committed
def __remove_duplicities_in_group(self, groups: List[List[str]]) -> List[List[str]]:
"""
Removes duplicates in inner lists
:param groups:
:return:
"""
result = []
for group in groups:
result.append(list(dict.fromkeys(group)))
return result

Terézia Slanináková
committed
def __remove_given_values_from_each_group(self, groups: List[List[str]], to_remove: List[str]) -> List[List[str]]:
"""
Removes given values from groups
:param groups: to be removed from
:param to_remove: to be removed
:return:
"""
result = []
for group in groups:
result.append([elem for elem in group if elem not in to_remove])
return result

Terézia Slanináková
committed
def __get_lists_containing(self, groups: List[List[str]], to_contain: List[str]) -> List[List[str]]:
"""
Gets lists that contains values given
:param groups: to be find in
:param to_contain: to be contained
:return: lists that contains values given in to_contain
"""
result = []
for group in groups:
if all(elem in group for elem in to_contain):
result.append(group)
return result

Terézia Slanináková
committed
def load_from_csv(self) -> List[List[str]]:
df = self.trips
grouped = df.groupby("user")
result = []
for name, group in grouped:
result.append(group["city"].values)

Terézia Slanináková
committed
return result

Terézia Slanináková
committed
def get_recommendations_with_removal(self, user_data: List[str]) -> List[List[str]]:
recom = self.find_recommendation_with_removal(self.load_from_csv(), user_data)
return self.get_final_results(recom)

Terézia Slanináková
committed
def get_recommendations_without_removal(self, user_data: List[str]) -> List[List[str]]:
recom = self.find_recommendation(self.load_from_csv(), user_data)
return self.get_final_results(recom)

Terézia Slanináková
committed
def get_final_results(self , recommendations: List[str] ) -> List[List[str]]:
results = []
for r in recommendations:
results.append([r, "N/A", "N/A"])
return results