Skip to content
Snippets Groups Projects
nomad_city_crawler.py 1.5 KiB
Newer Older
Jiří Vrbka's avatar
Jiří Vrbka committed
from typing import List, Dict

from src.handler.csv_handler import CsvHandler
from src.handler.json_handler import JsonHandler
from src.scraper.scraper import Scraper


class NomadCityCrawler:
    attribute_to_find = "data-key"
    attribute_to_find_values = [
        "hospital_score",
        "english_speaking",
        "nightlife",
        "female_friendly",
        "racial_tolerance",
        "peace_score"]

    @staticmethod
    def get_cities_info(cities_to_browse: List[str]) -> dict:
        cities_stats = {}
        cities_stats["cities"] = {}

        for city in cities_to_browse:
            print("working on " + city)
            try:
                cities_stats["cities"][city] = {}

                context = Scraper.at("https://nomadlist.com/" + city)
                for key, element in context.with_attribute_having_values(NomadCityCrawler.attribute_to_find, NomadCityCrawler.attribute_to_find_values):
                    score = element.find().get_attribute_value("data-value")
                    cities_stats["cities"][city].update({key: score})

            except Exception as e:
                print("For city " + city)
                print(e)

        return cities_stats


def main():
    cities = []
    for line in CsvHandler.load("./../../data/trips.csv")[1]:
        cities.append(line["city"].lower().replace(" ", "-"))

    cities = list(dict.fromkeys(cities))
    cities_info = NomadCityCrawler.get_cities_info(cities)
    JsonHandler.save("./../../data/cities_stats.json", cities_info)


main()