Skip to content
Snippets Groups Projects

City knowledge base nomad

Merged Jiří Vrbka requested to merge city-knowledge-base-nomad into master
1 file
+ 2
102
Compare changes
  • Side-by-side
  • Inline
+ 2
102
from __future__ import annotations
import re
from typing import List, Dict
from bs4 import BeautifulSoup, NavigableString
from bs4 import BeautifulSoup
import requests
class ScraperBodyGet:
def __init__(self, soup: BeautifulSoup):
self.soup = soup
def get_attribute_value(self, attribute: str):
return self.soup[attribute]
def get_text(self):
return self.soup.get_text()
def also(self):
return ScraperBodyInit(soup=self.soup)
class ScraperBodyInit:
soup: BeautifulSoup
element_name: str = None
element_attribute: (str, str) = None
def __init__(self, soup: BeautifulSoup):
self.soup = soup
def at_element(self, name_of_element: str) -> ScraperBodyInit:
self.element_name = name_of_element
return self
def with_attribute_having_value(self, attribute: str, value: str) -> ScraperBodyInit:
self.element_attribute = (attribute, value)
return self
def with_attribute_having_values(self, attribute: str, values: List[str]) -> List[(str, ScraperBodyInit)]:
results = []
for value in values:
s = ScraperBodyInit(self.soup)
s.at_element(self.element_name)
s.with_attribute_having_value(attribute, value)
results.append((value, s))
return results
def find(self):
attrs = {self.element_attribute[0]: self.element_attribute[1]}
new_soup = self.soup.find(self.element_name, attrs)
return ScraperBodyGet(new_soup)
def reset(self) -> None:
self.element_name = None
self.element_attribute = None
class Element:
_soup: BeautifulSoup
@@ -81,6 +31,7 @@ class Element:
if self._soup is None:
return None
from bs4 import NavigableString
if isinstance(self._soup, NavigableString):
return self._soup
@@ -106,57 +57,6 @@ class Page:
return Element(BeautifulSoup(self._page.content, "html.parser"))
class Then:
_soup: BeautifulSoup
_status_code: int
def __init__(self, soup: BeautifulSoup, status_code: int):
self._soup = soup
self._status_code = status_code
def get_status_code(self):
return self._status_code
def get_attribute(self, attribute: str):
return None if self._soup is None else self._soup[attribute]
def get_text(self):
return None if self._soup is None else self._soup.get_text()
class When:
_soup: BeautifulSoup
_status_code: int
def __init__(self, soup: BeautifulSoup, status_code: int):
self._soup = soup
self._status_code = status_code
def id(self, id: str) -> When:
self._soup = self._soup.find(id=id)
return self
def attribute(self, attribute: str, value: str = None) -> When:
self._soup = self._soup.find(attrs={attribute: value})
return self
def Then(self):
return Then(self._soup, self._status_code)
class Given:
_request: requests.api
def __init__(self, url: str):
self._request = requests.get(url=url)
def status_code(self) -> int:
return self._request.status_code
def When(self):
return When(BeautifulSoup(self._request.content, "html.parser"))
class Scraper:
@staticmethod
def at(web: str) -> ScraperBodyInit:
Loading