Puppeteer Scrape Webpage

There are several ways to interact with a page and later scrape its results with Puppeteer. In my example, I’ll be querying Google to check for the price of Bitcoin.

Image for post
Image for post

Javascript method

Copy the contents below and paste it into the script. This method uses Javascript to loop through the elements by tag name.

const puppeteer = require('puppeteer');
async function run () {
const browser = await puppeteer.launch({
defaultViewport: {width: 1920, height: 1080}
});
const page = await browser.newPage();
await page.goto('https://www.google.com');

//wait for search box
await page.waitFor('input[name="q"]');
await page.evaluate(()=>{
var inputs=document.getElementsByTagName('input');
for(var x=0;x<inputs.length;x++){
var input=inputs[x];
if(input.getAttribute('name')=='q'){
input.value='bitcoin';
break;
}
}
for(var x=0;x<inputs.length;x++){
var input=inputs[x];
if(input.getAttribute('name')=='btnK'){
input.click();
break;
}
}
});

//wait for results
await page.waitFor('div[id="result-stats"]');
var price=await page.evaluate(()=>{
var divs=document.getElementsByTagName('div');
for(var x=0;x<divs.length;x++){
var div=divs[x];
if(div.getAttribute('class')=='dDoNo vk_bk gsrt gzfeS'){
return div.textContent;
}
}
});
console.log(price);
await page.screenshot({path: 'screenshot.png'});
browser.close();
}
run();

CSS selector method

This method is similar to the Javascript method to set value of the text box but instead of looping through the elements, it can specify an element directly using CSS selector.

const puppeteer = require('puppeteer');
async function run () {
const browser = await puppeteer.launch({
defaultViewport: {width: 1920, height: 1080}
});
const page = await browser.newPage();
await page.goto('https://www.google.com');
//wait for search box
await page.waitFor('input[name="q"]');
await page.$eval('input[name="q"]', el => el.value='bitcoin');
await page.$eval('input[name="btnK"]', el => el.click());
//wait for results
await page.waitFor('div[id="result-stats"]');
var price=await page.$eval('div[class="dDoNo vk_bk gsrt gzfeS"]', el => el.textContent);
console.log(price);
await page.screenshot({path: 'screenshot.png'});
browser.close();
}
run();

Keyboard type method

I had encountered pages that does not accept text box values that was set using Javascript. The workaround method is to simulate keyboard type.

const puppeteer = require('puppeteer');
async function run () {
const browser = await puppeteer.launch({
defaultViewport: {width: 1920, height: 1080}
});
const page = await browser.newPage();
await page.goto('https://www.google.com');
//wait for search box
await page.waitFor('input[name="q"]');
await page.$eval('input[name="q"]', el => el.focus());
await page.keyboard.type('bitcoin', {delay: 100});
await page.$eval('input[name="btnK"]', el => el.click());
//wait for results
await page.waitFor('div[id="result-stats"]');
var price=await page.$eval('div[class="dDoNo vk_bk gsrt gzfeS"]', el => el.textContent);
console.log(price);
await page.screenshot({path: 'screenshot.png'});
browser.close();
}
run();

< Back to all the stories I had written

Get the Medium app

A button that says 'Download on the App Store', and if clicked it will lead you to the iOS App store
A button that says 'Get it on, Google Play', and if clicked it will lead you to the Google Play store