commit 4dea39acf003152e1f2d95b7e524342c9dcc02b1 Author: Crimson Tome <64846840+CrimsonTome@users.noreply.github.com> Date: Wed May 19 20:05:30 2021 +0100 Add files via upload Main file for the project. external file named urlname.py needed with url variable set to the url of the site you want to scrape diff --git a/webScraper.py b/webScraper.py new file mode 100644 index 0000000..fdcec3a --- /dev/null +++ b/webScraper.py @@ -0,0 +1,31 @@ +import requests +from bs4 import BeautifulSoup +from time import sleep +import urlname + +def getdata(url): + r = requests.get(url) + return r.text + +htmldata = getdata(urlname.url) +soupData = BeautifulSoup(htmldata, 'html.parser') +listofimages = [] +count = 1 +for item in soupData.find_all('img'): + listofimages.append(item['src']) +for i in listofimages: + try: + print (i) + response = requests.get(i) + + file = open("image"+str(count)+".png", "wb") + file.write(response.content) + file.close() + count+=1 + print("download of "+i+" complete \n") + sleep(1) + except: + print("Sorry, something went wrong. Moving onto the next image \n") + + +