diff --git a/src/scraper.py b/src/scraper.py
index 775c5b40a4ea1588df1229891665e700260ad899..f8aeee79be016ed1327025f7ba016e847c0df147 100644
--- a/src/scraper.py
+++ b/src/scraper.py
@@ -1,8 +1,140 @@
-# PV254-city-recommender
+from bs4 import BeautifulSoup
+import requests
 
-Project for PV254 (Fall 2019). Recommends a city based on your travel history.
+BASE_URL = "https://nomadlist.com/"
+DEFAULT_USER = "@levelsio"
 
-## Dataset
+def get_users(soup, n=1000):
+  """Gets users from "crossed paths with" section on a user's profile.
+  Args:
+  soup: BeautifulSoup object from loaded page
+  n: number of users to get
+  Returns:
+  List of users
+  """
+  users = []
+  user_index = 0
+  while len(users) < n:
+    o = soup.find("div", {"id": "most-overlaps"})
+    if o:
+      for a in o.find_all('a', href=True):
+        users.append(a['href'])
+      users = list(dict.fromkeys(users)) # removing duplicates #
+    page = requests.get(f"{BASE_URL}{users[user_index]}"); user_index+=1
+    soup = BeautifulSoup(page.content, 'html.parser')
+  return users
 
-The dataset is scraped from [nomadlist.com](www.nomadlist.com)
-The dataset is found in `data/trips.csv` contains
\ No newline at end of file
+def get_most_visited_cities(soup):
+  """Gets the most visited cities by a user from 'Most visited section'.
+  Args:
+  soup: BeautifulSoup object from loaded page
+  Returns:
+  Dict of city: number of visits
+  """
+  trips_to_cities = soup.find("div", class_="most_trips_to_cities")
+  trips_to_cities_name = trips_to_cities.div.find_all("li")
+  trips_to_cities_count = trips_to_cities.div.find_all("span", class_="extra-people-counter")
+  city_visits = dict()
+  for city, counter in zip(trips_to_cities_name, trips_to_cities_count):
+      city_visits[city["data-slug"]] = counter.text.strip("x")
+  return city_visits
+
+def get_most_time_spent(soup):
+  """Gets the most time spent in cities by a user from 'Most time spent section'.
+  Args:
+  soup: BeautifulSoup object from loaded page
+  Returns:
+  Dict of city: time spent
+  """
+  most_visited_cities = soup.find_all("div", class_="most_visited_cities")
+  city_time = dict()
+  if len(most_visited_cities) > 1:
+    most_time_spent = most_visited_cities[1] # [1] because there's 2 divs with the same class name, i want the second one
+    most_time_spent_name = most_time_spent.div.find_all("li")
+    most_time_spent_count = most_time_spent.div.find_all("span", class_="extra-people-counter")
+    for city, time in zip(most_time_spent_name, most_time_spent_count):
+        city_time[city["data-slug"]] = time.text
+  return city_time
+
+def convert_time_to_days(t):
+  """ Converts time infomation in years and months to days
+  Args:
+  t: string of time
+  Returns:
+  time span as an integer (in days)
+  """
+  try:
+    if t[-2:] == "yr":
+      return float(t[:-2])*30.5*365.25
+    elif t[-2:] == "mo":
+      return float(t[:-2])*30.5
+    elif t[-1:] == "d":
+      return float(t[:-1])
+    else:
+      return float(t[:-2])
+  except ValueError:
+    return t
+
+def convert_list_time_to_days(time):
+  """ Converts time spans in the form: ['1yr', '11mo', ...] to days.
+  Args:
+  time: list of times
+  Returns:
+  list of times in days
+  """
+  time_in_days = []
+  for t in time:
+    time_in_days.append(convert_time_to_days(t))
+  return time_in_days
+
+def parse_trip_information(trip, user, table_row):
+  """ Gets relevant info from trip table row
+  Args:
+  trip: Specific trip to be parsed
+  user: User associated with the trip
+  table_row: csv string of rows to which a new trip info will be added
+  Returns:
+  One comma separated row corresponding to a single trip
+  """
+  delimiter = ", "
+  table_row += user.strip("/@") + delimiter
+  city_name = trip.find("td", class_="name").h2.text.split(",")[0]
+  table_row += city_name + delimiter
+  table_row += trip.find("td", class_="country").text + delimiter
+  table_row += trip['data-date-start'] + delimiter
+  table_row += trip['data-date-end'] + delimiter
+
+  table_row += str(convert_time_to_days(trip.find("td", class_="trip_start").find_next('td').text)) + delimiter
+
+  table_row += trip['data-latitude'] + delimiter + trip['data-longitude'] + "\n"
+
+  return table_row
+
+def create_dataset(n_users=3700):
+  """ Creates the user-trip dataset by scraping user web pages from nomadlist.com. 
+  Dumps the output to 'trips.csv' file.
+  Args:
+  n_users: Number of users to searche for
+  """
+  page = requests.get(f"{BASE_URL}{DEFAULT_USER}")
+  soup = BeautifulSoup(page.content, 'html.parser')
+  users = get_users(soup, n_users)
+  print(f"Found {len(users)} users.")
+  
+  f = open('trips.csv', 'w+', encoding="utf-8")
+  table_row="user, city, country, trip_start, trip_end, trip_duration, latitude, longitude\n"
+
+  for user in users:
+    page = requests.get(f"{BASE_URL}{user}")
+    soup = BeautifulSoup(page.content, 'html.parser')
+    trips = soup.find_all("tr", class_="trip")
+    print(f"Found {len(trips)} trips for {user}.")
+
+    for trip in trips:
+      table_row = parse_trip_information(trip, user, table_row)
+      f.write(table_row)
+      table_row = ""
+  f.close()
+
+if __name__ == "__main__":
+  create_dataset()
\ No newline at end of file