diff --git a/src/crawler/nomad_city_crawler.py b/src/crawler/nomad_city_crawler.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..3e8ad85997adf69e03d10ad70d453d61f1a77227 100644 --- a/src/crawler/nomad_city_crawler.py +++ b/src/crawler/nomad_city_crawler.py @@ -0,0 +1,50 @@ +from typing import List, Dict + +from src.handler.csv_handler import CsvHandler +from src.handler.json_handler import JsonHandler +from src.scraper.scraper import Scraper + + +class NomadCityCrawler: + attribute_to_find = "data-key" + attribute_to_find_values = [ + "hospital_score", + "english_speaking", + "nightlife", + "female_friendly", + "racial_tolerance", + "peace_score"] + + @staticmethod + def get_cities_info(cities_to_browse: List[str]) -> dict: + cities_stats = {} + cities_stats["cities"] = {} + + for city in cities_to_browse: + print("working on " + city) + try: + cities_stats["cities"][city] = {} + + context = Scraper.at("https://nomadlist.com/" + city) + for key, element in context.with_attribute_having_values(NomadCityCrawler.attribute_to_find, NomadCityCrawler.attribute_to_find_values): + score = element.find().get_attribute_value("data-value") + cities_stats["cities"][city].update({key: score}) + + except Exception as e: + print("For city " + city) + print(e) + + return cities_stats + + +def main(): + cities = [] + for line in CsvHandler.load("./../../data/trips.csv")[1]: + cities.append(line["city"].lower().replace(" ", "-")) + + cities = list(dict.fromkeys(cities)) + cities_info = NomadCityCrawler.get_cities_info(cities) + JsonHandler.save("./../../data/cities_stats2.json", cities_info) + + +main() \ No newline at end of file