from __future__ import annotations import re from typing import List, Dict from bs4 import BeautifulSoup, NavigableString import requests class ScraperBodyGet: def __init__(self, soup: BeautifulSoup): self.soup = soup def get_attribute_value(self, attribute: str): return self.soup[attribute] def get_text(self): return self.soup.get_text() def also(self): return ScraperBodyInit(soup=self.soup) class ScraperBodyInit: soup: BeautifulSoup element_name: str = None element_attribute: (str, str) = None def __init__(self, soup: BeautifulSoup): self.soup = soup def at_element(self, name_of_element: str) -> ScraperBodyInit: self.element_name = name_of_element return self def with_attribute_having_value(self, attribute: str, value: str) -> ScraperBodyInit: self.element_attribute = (attribute, value) return self def with_attribute_having_values(self, attribute: str, values: List[str]) -> List[(str, ScraperBodyInit)]: results = [] for value in values: s = ScraperBodyInit(self.soup) s.at_element(self.element_name) s.with_attribute_having_value(attribute, value) results.append((value, s)) return results def find(self): attrs = {self.element_attribute[0]: self.element_attribute[1]} new_soup = self.soup.find(self.element_name, attrs) return ScraperBodyGet(new_soup) def reset(self) -> None: self.element_name = None self.element_attribute = None class Element: _soup: BeautifulSoup def __init__(self, soup: BeautifulSoup): self._soup = soup def with_id(self, id: str) -> Element: return Element(self._soup.find(id=id)) if self._soup is not None else None def with_attribute(self, attribute: str, value: str = None) -> Element: return Element(self._soup.find(attrs={attribute: value})) if self._soup is not None else None def contains_text(self, text: str): return Element(self._soup.find(text=re.compile('.*' + text + '.*'))) if self._soup is not None else None def parent(self): return Element(self._soup.parent) if self._soup is not None else None def get_attribute_value(self, attribute: str): return self._soup[attribute] if self._soup is not None else None def get_text(self): if self._soup is None: return None if isinstance(self._soup, NavigableString): return self._soup return self._soup.get_text() def is_none(self): return self._soup is None def get_soup(self): return self._soup class Page: _page: requests.api def __init__(self, page: requests.api): self._page = page def status_code(self) -> int: return self._page.status_code def find(self): return Element(BeautifulSoup(self._page.content, "html.parser")) class Then: _soup: BeautifulSoup _status_code: int def __init__(self, soup: BeautifulSoup, status_code: int): self._soup = soup self._status_code = status_code def get_status_code(self): return self._status_code def get_attribute(self, attribute: str): return None if self._soup is None else self._soup[attribute] def get_text(self): return None if self._soup is None else self._soup.get_text() class When: _soup: BeautifulSoup _status_code: int def __init__(self, soup: BeautifulSoup, status_code: int): self._soup = soup self._status_code = status_code def id(self, id: str) -> When: self._soup = self._soup.find(id=id) return self def attribute(self, attribute: str, value: str = None) -> When: self._soup = self._soup.find(attrs={attribute: value}) return self def Then(self): return Then(self._soup, self._status_code) class Given: _request: requests.api def __init__(self, url: str): self._request = requests.get(url=url) def status_code(self) -> int: return self._request.status_code def When(self): return When(BeautifulSoup(self._request.content, "html.parser")) class Scraper: @staticmethod def at(web: str) -> ScraperBodyInit: page = requests.get(web) if page.status_code == 200: return ScraperBodyInit(BeautifulSoup(page.content, "html.parser")) @staticmethod def get(web: str) -> Page: page = requests.get(web) return Page(page)