Skip to content
Snippets Groups Projects
TripSimilarity.py 7.63 KiB
Newer Older
Jiří Vrbka's avatar
Jiří Vrbka committed
import collections
import copy
import random
from typing import List, Dict
from sympy.utilities.iterables import multiset_permutations
Jiří Vrbka's avatar
Jiří Vrbka committed
from src.algorithms.helpers import *
Jiří Vrbka's avatar
Jiří Vrbka committed
"""
DEBUG if true, there will be small debug logs (on system.in)
"""
Jiří Vrbka's avatar
Jiří Vrbka committed
"""
SHUFFLE directs shuffling result with the same number of occurrences. If true, occurrences are going to be shuffeled
"""
SHUFFLE = True


class TripSimilarity:
    """
    Finds out recommendation with similarity of people trips
    """
    def __init__(self, data_path="data/"):
        trips, cities = load_data(data_path)
        self.trips = trips
        self.cities = cities
    def find_recommendation_with_removal(self, data: List[List[str]], user_data: List[str], min_number_of_rec: int = 5) -> List[str]:
Jiří Vrbka's avatar
Jiří Vrbka committed
        """
        Find recommendation based of similarity with given trips and returns untraveled places that
        has those similarities. If no similarity found it tries to find similarity for set that is smaller
         (removing one city from user data, two,...)
        :param data: groups of cities to find similarity with
        :param user_data: to find similarity by
        :param min_number_of_rec: minimal result that is required to be recommended
        :return: list of recommended cities
        """
        result = self.find_recommendation(data, user_data, min_number_of_rec)
Jiří Vrbka's avatar
Jiří Vrbka committed
        if len(result) < min_number_of_rec:
            for count_to_remove in range(1, len(user_data) - 1):
                remove = [True] * count_to_remove
                not_remove = [False] * (len(user_data) - count_to_remove)
                remove.extend(not_remove)

                permutation = list(multiset_permutations(remove))
                for mutation in permutation:

                    user_data_copy = user_data.copy()

                    if DEBUG:
                        print("\nfor this round removing: ", end=" ")

                    for i in range(0, len(user_data)):
                        if mutation[i]:
                            user_data_copy.remove(user_data[i])

                            if DEBUG:
                                print(user_data[i], end=", ")

                    result = self.find_recommendation(data, user_data_copy, min_number_of_rec + 1)
Jiří Vrbka's avatar
Jiří Vrbka committed
                    if len(result) >= min_number_of_rec:
    def find_recommendation(self, data: List[List[str]], user_data: List[str], max_number_of_rec: int = 5) -> List[str]:
Jiří Vrbka's avatar
Jiří Vrbka committed
        """
        Find recommendation based of similarity with given trips and returns untraveled places that
        has those similarities.
        :param data: groups of cities to find similarity with
        :param user_data: to find similarity by
        :param max_number_of_rec: maximum number of cities that will be return
        :return: list of recommended cities
        """
        list_of_groups = copy.deepcopy(data)
        list_of_groups = self.__remove_duplicities_in_group(list_of_groups)
        list_of_groups = self.__get_lists_containing(list_of_groups, user_data)
        list_of_groups = self.__remove_given_values_from_each_group(list_of_groups, user_data)
        cities_occurrences = self.__get_occurrences_of_cities(list_of_groups)
        cities_occurrences = self.__shuffle_in_groups(cities_occurrences) if SHUFFLE else cities_occurrences
        recommended_cities = self.__get_first_x_cities(cities_occurrences, max_number_of_rec)
Jiří Vrbka's avatar
Jiří Vrbka committed

        return recommended_cities

    def __get_occurrences_of_cities(self, groups: List[List[str]]) -> Dict[int, List[str]]:
Jiří Vrbka's avatar
Jiří Vrbka committed
        """
        Gets number and for this number list of cities that appeared number-times in param groups.
        Count one city in one group max one time (aka does not count duplicities in one group)
        :param groups: groups of cities (aka trips)
        :return: key: number, value: cities that appeared number-times in different groups
        """
        occurrences = {}
        for v in groups:
            for city in list(dict.fromkeys(v)):
                if city not in occurrences:
                    occurrences[city] = 1
                else:
                    occurrences[city] += 1

        result = {}
        for key in occurrences.keys():
            value = occurrences[key]
            if value not in result:
                result[value] = []

            result[value].append(key)

        return result

    def __get_first_x_cities(self, groups: Dict[int, List[str]], x=10) -> List[str]:
Jiří Vrbka's avatar
Jiří Vrbka committed
        """
        Get X cities based on they key (higher = better)
        :param groups: key: number, value: cities that appeared number-times in different groups
        :param x: number of cities to be returned
        :return: x cities with highest number
        """

        sorted_occurrences = sorted(groups.items(), key=lambda kv: kv[1])
        sorted_occurrences.reverse()
        result = []

        for key, group in collections.OrderedDict(sorted_occurrences).items():
            for city in group:
                result.append(city)

                if DEBUG:
                    print("Adding city [{}] {} ".format(key, city))

                if len(result) >= x:
                    return result

        return result

    def __shuffle_in_groups(self, groups: Dict[int, List[str]]) -> Dict[int, List[str]]:
Jiří Vrbka's avatar
Jiří Vrbka committed
        """
        Shuffles lists in groups
        :param groups: dict that values in Dict.values() will be shuffled
        :return:
        """
        for key in groups:
            random.shuffle(groups[key])
        return groups

    def __remove_duplicities_in_group(self, groups: List[List[str]]) -> List[List[str]]:
Jiří Vrbka's avatar
Jiří Vrbka committed
        """
        Removes duplicates in inner lists
        :param groups:
        :return:
        """
        result = []
        for group in groups:
            result.append(list(dict.fromkeys(group)))

        return result

    def __remove_given_values_from_each_group(self, groups: List[List[str]], to_remove: List[str]) -> List[List[str]]:
Jiří Vrbka's avatar
Jiří Vrbka committed
        """
        Removes given values from groups
        :param groups: to be removed from
        :param to_remove: to be removed
        :return:
        """
        result = []
        for group in groups:
            result.append([elem for elem in group if elem not in to_remove])

        return result

    def __get_lists_containing(self, groups: List[List[str]], to_contain: List[str]) -> List[List[str]]:
Jiří Vrbka's avatar
Jiří Vrbka committed
        """
        Gets lists that contains values given
        :param groups: to be find in
        :param to_contain: to be contained
        :return: lists that contains values given in to_contain
        """
        result = []
        for group in groups:
            if all(elem in group for elem in to_contain):
                result.append(group)

        return result


    def load_from_csv(self) -> List[List[str]]:
        df = self.trips
        grouped = df.groupby("user")
        result = []
        for name, group in grouped:
            result.append(group["city"].values)
    def get_recommendations_with_removal(self, user_data: List[str]) -> List[List[str]]:
        recom = self.find_recommendation_with_removal(self.load_from_csv(), user_data)
        return self.get_final_results(recom)
    def get_recommendations_without_removal(self, user_data: List[str]) -> List[List[str]]:
        recom = self.find_recommendation(self.load_from_csv(), user_data)
        return self.get_final_results(recom)
    def get_final_results(self , recommendations: List[str] ) -> List[List[str]]:
        results = []
        for r in recommendations:
            results.append([r, "N/A", "N/A"])
        return results