Skip to content
Snippets Groups Projects
Commit 81f34ff4 authored by Jirka's avatar Jirka
Browse files

crawler for city knowledge base via nomadlist

parent be6b56c2
No related branches found
No related tags found
2 merge requests!5City knowledge base nomad,!3City knowledge base nomad
from typing import List, Dict
from src.handler.csv_handler import CsvHandler
from src.handler.json_handler import JsonHandler
from src.scraper.scraper import Scraper
class NomadCityCrawler:
attribute_to_find = "data-key"
attribute_to_find_values = [
"hospital_score",
"english_speaking",
"nightlife",
"female_friendly",
"racial_tolerance",
"peace_score"]
@staticmethod
def get_cities_info(cities_to_browse: List[str]) -> dict:
cities_stats = {}
cities_stats["cities"] = {}
for city in cities_to_browse:
print("working on " + city)
try:
cities_stats["cities"][city] = {}
context = Scraper.at("https://nomadlist.com/" + city)
for key, element in context.with_attribute_having_values(NomadCityCrawler.attribute_to_find, NomadCityCrawler.attribute_to_find_values):
score = element.find().get_attribute_value("data-value")
cities_stats["cities"][city].update({key: score})
except Exception as e:
print("For city " + city)
print(e)
return cities_stats
def main():
cities = []
for line in CsvHandler.load("./../../data/trips.csv")[1]:
cities.append(line["city"].lower().replace(" ", "-"))
cities = list(dict.fromkeys(cities))
cities_info = NomadCityCrawler.get_cities_info(cities)
JsonHandler.save("./../../data/cities_stats2.json", cities_info)
main()
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment