Newer
Older
class Element:
_soup: BeautifulSoup
def __init__(self, soup: BeautifulSoup):
self._soup = soup
def with_id(self, id: str) -> Element:
return Element(self._soup.find(id=id)) if self._soup is not None else None
def with_attribute(self, attribute: str, value: str = None) -> Element:
return Element(self._soup.find(attrs={attribute: value})) if self._soup is not None else None
def contains_text(self, text: str):
return Element(self._soup.find(text=re.compile('.*' + text + '.*'))) if self._soup is not None else None
return Element(self._soup.parent) if self._soup is not None else None
def get_attribute_value(self, attribute: str):
return self._soup[attribute] if self._soup is not None else None
if isinstance(self._soup, NavigableString):
return self._soup
return self._soup.get_text()
def is_none(self):
return self._soup is None
def get_soup(self):
return self._soup
class Page:
_page: requests.api
def __init__(self, page: requests.api):
self._page = page
def status_code(self) -> int:
return self._page.status_code
def find(self):
return Element(BeautifulSoup(self._page.content, "html.parser"))
class Scraper:
@staticmethod
def at(web: str) -> ScraperBodyInit:
page = requests.get(web)
if page.status_code == 200:
return ScraperBodyInit(BeautifulSoup(page.content, "html.parser"))