Scraping A Dynamic Website, Selenium Part-III
So far, I have mentioned 2 approaches for scraping menus from doordash.com, in Part-I and Part-II. Here are part-III and another approach towards web scraping.
The 3rd Approach
In this approach, I have not used BeautifulSoup and Pandas libraries. This script scrapes the menus from the stores at any location of choice and saves the data as a JSON file. The JSON file contains the main header of “Menu”, some sub-headers with the names of the “food stores”, and inner nodes of “food categories” in each store. Then, each category contains the product name as the “Key”, and its price as the “Value”. Here is the output:
Now, let’s look at the code.
# importing libraries
import ctypes
import json
import sys
import time
from typing import List
import selenium.webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.remote.webelement import WebElement
from selenium.webdriver.support.wait import WebDriverWait
from selenium_move_cursor.MouseActions import move_to_element_chrome
1- import the necessary libraries
# The URL
url = 'https://www.doordash.com/en-US'
# # this will open the "doordash.com"
# defining driver and adding the “ — headless” argument
# opts = Options()
# opts.add_argument('— headless')
driver = selenium.webdriver.Chrome ('chromedriver')
# open the URL
driver.maximize_window()
driver.get(url)
driver.implicitly_wait(220)
time.sleep(5)
# to focus the current window
driver.switch_to.window(driver.window_handles[0])
time.sleep(10)
2- define and open the URL
# enter location and search
# Default Search string for location
search_query: str = "New York"
# (Manual Search String, sys.argv will interact with the terminal, 2nd argument will is
# the search string of location
if len(sys.argv) >= 2:
search_query = sys.argv[1]
print(search_query)
# send location to search element
driver.find_element_by_css_selector("input[class='sc-OqFzE kdYTND']").send_keys(search_query)
time.sleep(3)
# click on search button
driver.find_element_by_css_selector("button[class='sc-cRULEh fMiRie']").click()
driver.implicitly_wait(120)
3- search a particular location
# get the total number of stores on the page
time.sleep(5)
stores=driver.find_element_by_xpath ("//div[@class='sc-fQfKYo cUcAcc']\
//div[@class='sc-cOoQYZ eQkiJd']\
//span[@class='sc-bdVaJa hQDtnE']")
number=stores.text
strNo=[int (s) for s in number.split () if s.isdigit ()]
for i in strNo :
i=i
print(i)
4- get the total number of stores at the location
# scroll to each store and open, one-by-one, for scraping
#the 'for loop' starts
time.sleep (5)
# loop through the menus of each store on a page
#dictionay for jason file
Menus = {}
x: int
for x in range (0 , i , 1) :
div2=driver.find_element_by_xpath ("//div[@class='sc-jrOYZv bgfxZq']")
element: List [WebElement]=div2.find_elements_by_xpath ("//div[@class='sc-EHOje eOpoWF']\
//span[@class = 'sc-bdVaJa bTYYIJ']|//div[@class='sc-EHOje eOpoWF']\
//span[@class = 'sc-bdVaJa lncAvd']")
time.sleep (10)
WebDriverWait (driver , 60)
strnm: WebElement=element [x]
pos=strnm.location
y=pos.get ('y')-70
driver.execute_script (f"window.scrollTo(0, {y})")
store_name = str(strnm.text)
print (f'{x}- ' , store_name)
5- scroll to each store one-by-one, and print the name
# write the "store_name" in a JSON node
# sub-nodes of stores
sub_node = Menus [f'{store_name}']= {}
6- create sub-nodes in the JSON with names of the stores, in each iteration
# lists to store all names and check the length of results
names = []
# open the store
time.sleep (4)
move_to_element_chrome (driver , strnm , display_scaling=100)
actions=ActionChains (driver)
actions.send_keys_to_element (strnm , Keys.ENTER).perform ()
# create sub-node of each food category in the store under the "store_name" node
# scrape the menus of related food category
#look for the categories of food
categories = driver.find_element_by_xpath("//div[@class='sc-jrOYZv iAfIGO']")\
.find_elements_by_xpath("//div[@class='sc-ddcOto bPbXPf']")
no_of_categories = len(categories)
7- open the store and get the categories of food
for n in range(0,no_of_categories, 1):
# create a list for each category
cet = categories[n]
category = str(cet.text)
sub_node[f'{category}'] = []
# scrape menus' name and prices
actions.send_keys_to_element(cet, Keys.ENTER)
itm = driver.find_elements_by_xpath("//div[@class='sc-BOulX imSnGi']//div[@class='sc-bscRGj CaBXz']")
itms = itm[n]
item = itms.find_elements_by_xpath("//div[@class='sc-iBfVdv dWPDtB']//span[@class='sc-bdVaJa gImhEG']")
prcs = itms.find_elements_by_xpath("//div[@class='sc-eomEcv cVLFKN']//span[@class='sc-bdVaJa eEdxFA']")
price = []
foods = []
for food in item:
foods.append(food.text)
if prcs is not None:
for value in prcs:
price.append(value.text)
else:
for food in item:
price.append('NaN')
for m, p in zip(foods , price) :
# append "item" and "Price list" in food category list
sub_node[f"{category}"].append ({m:p})
# scrape all menu names
8- click on each category one-by-one, and scrape the related menu. The name of each category will be added to the JSON file as a sub-node of the “store name” node.
# scrape all menu names
target = driver.find_elements_by_xpath("//div[@class='sc-iBfVdv dWPDtB']//span[@class='sc-bdVaJa gImhEG']")
for nm in target:
names.append(nm)
# check if the target is reached
9- store the menu in a separate list to set the target
if len(names) >= 200:
# if target is reached, save the file as JSON,
with open('menus.json', 'w') as outfile:
json.dump (Menus , outfile)
driver.close ()
#give a completion msg
ctypes.windll.user32.MessageBoxW (0 ,
f'Congratulations! We have successfully scraped {len(names)} menus.' ,
'Project Completion' , 1)
#and exit the script
break
sys.exit()
10- set the target. And, if the target is reached, save the data as a JSON file. It will also produce a message of completion. Hence, the script exits.
if len(names) >= 200:
# if target is reached, save the file as JSON,
with open('menus.json', 'w') as outfile:
json.dump (Menus , outfile)
driver.close ()
#give a completion msg
ctypes.windll.user32.MessageBoxW (0 ,
f'Congratulations! We have successfully scraped {len(names)} menus.' ,
'Project Completion' , 1)
#and exit the script
break
sys.exit()
11- Else, repeat the loop for each store until the target is reached.
Yeah, it can be made even more efficient.
Happy Scripting!