How To Speed Up Parsing Using Beautifulsoup?
I want to make a list of music festivals in Korea, so I tried to crawl a website selling festival tickets: import requests from bs4 import BeautifulSoup INTERPARK_BASE_URL = 'http
Solution 1:
import requests
from bs4 import BeautifulSoup
import json
from datetime import datetime as dt
import csv
defSoup(content):
soup = BeautifulSoup(content, 'html.parser')
return soup
defMain(url):
r = requests.get(url)
soup = Soup(r.content)
spans = soup.findAll('span', class_='fw_bold')
links = [f"{url[:27]}{span.a['href']}"for span in spans]
return links
defParent():
links = Main(
"http://ticket.interpark.com/TPGoodsList.asp?Ca=Liv&SubCa=Fes")
withopen("result.csv", 'w', newline="", encoding="utf-8") as f:
writer = csv.writer(f)
writer.writerow(["Name", "Singers", "Location", "Date", "ImageUrl"])
with requests.Session() as req:
for link in links:
r = req.get(link)
soup = Soup(r.content)
script = json.loads(
soup.find("script", type="application/ld+json").text)
name = script["name"]
print(f"Extracting: {name}")
singers = script["performer"]["name"]
location = script["location"]["name"]
datelist = list(script.values())[3:5]
datest = []
image = script["image"]
for date in datelist:
date = dt.strptime(date,
'%Y%m%d').strftime('%d-%m-%Y')
datest.append(date)
writer.writerow(
[name, singers, location, " : ".join(datest), *image])
Parent()
Post a Comment for "How To Speed Up Parsing Using Beautifulsoup?"