Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
from __future__ import annotations
from typing import List, Dict
from bs4 import BeautifulSoup
import requests
class ScraperBodyGet:
def __init__(self, soup: BeautifulSoup):
self.soup = soup
def get_attribute_value(self, attribute: str):
return self.soup[attribute]
def get_text(self):
return self.soup.get_text()
def also(self):
return ScraperBodyInit(soup=self.soup)
class ScraperBodyInit:
soup: BeautifulSoup
element_name: str = None
element_attribute: (str, str) = None
def __init__(self, soup: BeautifulSoup):
self.soup = soup
def at_element(self, name_of_element: str) -> ScraperBodyInit:
self.element_name = name_of_element
return self
def with_attribute_having_value(self, attribute: str, value: str) -> ScraperBodyInit:
self.element_attribute = (attribute, value)
return self
def with_attribute_having_values(self, attribute: str, values: List[str]) -> List[(str, ScraperBodyInit)]:
results = []
for value in values:
s = ScraperBodyInit(self.soup)
s.at_element(self.element_name)
s.with_attribute_having_value(attribute, value)
results.append((value, s))
return results
def find(self):
attrs = {self.element_attribute[0]: self.element_attribute[1]}
new_soup = self.soup.find(self.element_name, attrs)
return ScraperBodyGet(new_soup)
def reset(self) -> None:
self.element_name = None
self.element_attribute = None
class Scraper:
@staticmethod
def at(web: str) -> ScraperBodyInit:
page = requests.get(web)
if page.status_code == 200:
return ScraperBodyInit(BeautifulSoup(page.content, "html.parser"))