from __future__ import annotations import re from bs4 import BeautifulSoup import requests class Element: _soup: BeautifulSoup def __init__(self, soup: BeautifulSoup): self._soup = soup def with_id(self, id: str) -> Element: return Element(self._soup.find(id=id)) if self._soup is not None else None def with_attribute(self, attribute: str, value: str = None) -> Element: return Element(self._soup.find(attrs={attribute: value})) if self._soup is not None else None def contains_text(self, text: str): return Element(self._soup.find(text=re.compile('.*' + text + '.*'))) if self._soup is not None else None def parent(self): return Element(self._soup.parent) if self._soup is not None else None def get_attribute_value(self, attribute: str): return self._soup[attribute] if self._soup is not None else None def get_text(self): if self._soup is None: return None from bs4 import NavigableString if isinstance(self._soup, NavigableString): return self._soup return self._soup.get_text() def is_none(self): return self._soup is None def get_soup(self): return self._soup class Page: _page: requests.api def __init__(self, page: requests.api): self._page = page def status_code(self) -> int: return self._page.status_code def find(self): return Element(BeautifulSoup(self._page.content, "html.parser")) class Scraper: @staticmethod def at(web: str) -> ScraperBodyInit: page = requests.get(web) if page.status_code == 200: return ScraperBodyInit(BeautifulSoup(page.content, "html.parser")) @staticmethod def get(web: str) -> Page: page = requests.get(web) return Page(page)