Skip to content
Snippets Groups Projects
scraper.py 1.76 KiB
Newer Older
Jiří Vrbka's avatar
Jiří Vrbka committed
from __future__ import annotations

from typing import List, Dict

from bs4 import BeautifulSoup
import requests


class ScraperBodyGet:
    def __init__(self, soup: BeautifulSoup):
        self.soup = soup

    def get_attribute_value(self,  attribute: str):
        return self.soup[attribute]

    def get_text(self):
        return self.soup.get_text()

    def also(self):
        return ScraperBodyInit(soup=self.soup)


class ScraperBodyInit:
    soup: BeautifulSoup
    element_name: str = None
    element_attribute: (str, str) = None

    def __init__(self, soup: BeautifulSoup):
        self.soup = soup

    def at_element(self, name_of_element: str) -> ScraperBodyInit:
        self.element_name = name_of_element
        return self

    def with_attribute_having_value(self, attribute: str, value: str) -> ScraperBodyInit:
        self.element_attribute = (attribute, value)
        return self

    def with_attribute_having_values(self, attribute: str, values: List[str]) -> List[(str, ScraperBodyInit)]:
        results = []
        for value in values:
            s = ScraperBodyInit(self.soup)
            s.at_element(self.element_name)
            s.with_attribute_having_value(attribute, value)
            results.append((value, s))
        return results

    def find(self):
        attrs = {self.element_attribute[0]: self.element_attribute[1]}
        new_soup = self.soup.find(self.element_name, attrs)
        return ScraperBodyGet(new_soup)

    def reset(self) -> None:
        self.element_name = None
        self.element_attribute = None


class Scraper:
    @staticmethod
    def at(web: str) -> ScraperBodyInit:
        page = requests.get(web)
        if page.status_code == 200:
            return ScraperBodyInit(BeautifulSoup(page.content, "html.parser"))