spørsmål is scraped!

Co-authored-by: haraldnilsen <harald_998@hotmail.com>
Signed-off-by: Sindre Kjelsrud <kjelsrudsindre@gmail.com>
This commit is contained in:
Sindre Kjelsrud 2024-01-08 14:14:05 +01:00
parent 8b5f270b5e
commit 0da7504104
Signed by untrusted user who does not match committer: sidski
GPG key ID: D2BBDF3EDE6BA9A6

24
main.py
View file

@ -1,5 +1,5 @@
import httpx
import json
from selectolax.parser import HTMLParser
def fetch_question_url(url):
# Define headers
@ -36,9 +36,25 @@ def fetch_question_url(url):
else:
print(f"Failed to fetch data: {response.status_code}")
def fetch_all_info(url):
# Define headers
headers = {
"User-Agent":"Mozilla/5.0 (X11; Linux x86_64; rv:121.0) Gecko/20100101 Firefox/121.0"
}
response = httpx.get(url, headers=headers)
html = HTMLParser(response.text)
# Extracting the question and answer
sporsmal = html.css_first('div.article-text').text()
# Create array of results
#result = [sporsmal, svar, signatur]
return sporsmal
studenterspor_url = "https://www.studenterspor.no/ajax_handler.php"
urls = fetch_question_url(studenterspor_url)
if urls:
for url in urls:
print(url)
for url in urls:
print(fetch_all_info(url))