Hey! Let's dive into creating an exciting web scraper using Python's requests and BeautifulSoup libraries. This is one of the most practical skills you can learn for data collection, research, market analysis, or just satisfying your curiosity about what's on the web!
First, install the required libraries:
pip install requests beautifulsoup4
Let's scrape famous quotes from quotes.toscrape.com (a perfect practice site!).
import requests
from bs4 import BeautifulSoup
# Step 1: Send a GET request to the website
url = "http://quotes.toscrape.com/"
response = requests.get(url)
# Step 2: Check if request was successful
if response.status_code == 200:
print("β
Successfully connected!")
# Step 3: Parse HTML with BeautifulSoup
soup = BeautifulSoup(response.text, 'html.parser')
# Step 4: Find all quote containers
quotes = soup.find_all('div', class_='quote')
# Step 5: Extract data from each quote
all_quotes = []
for quote in quotes:
# Get the quote text
text = quote.find('span', class_='text').text
# Get the author
author = quote.find('small', class_='author').text
# Get the tags
tags = [tag.text for tag in quote.find_all('a', class_='tag')]
all_quotes.append({
'quote': text,
'author': author,
'tags': tags
})
# Step 6: Display results
print(f"\nπ― Found {len(all_quotes)} quotes!\n")
for i, q in enumerate(all_quotes[:5], 1): # Show first 5
print(f"{i}. '{q['quote']}'")
print(f" π€ {q['author']}")
print(f" π·οΈ {', '.join(q['tags'])}")
print()
else:
print(f"β Error: {response.status_code}")
β
Successfully connected!
π― Found 10 quotes!
1. 'The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.'
π€ Albert Einstein
π·οΈ change, deep-thoughts, thinking, world
2. 'It is our choices, Harry, that show what we truly are, far more than our abilities.'
π€ J.K. Rowling
π·οΈ abilities, choices
Want all 10 pages of quotes? Here's how:
import requests
from bs4 import BeautifulSoup
import time
def scrape_all_quotes():
all_quotes = []
base_url = "http://quotes.toscrape.com/"
for page in range(1, 11): # 10 pages total
print(f"π Scraping page {page}...")
url = f"{base_url}page/{page}/"
response = requests.get(url)
if response.status_code == 200:
soup = BeautifulSoup(response.text, 'html.parser')
quotes = soup.find_all('div', class_='quote')
for quote in quotes:
text = quote.find('span', class_='text').text
author = quote.find('small', class_='author').text
all_quotes.append({'quote': text, 'author': author})
# Be respectful - add delay
time.sleep(1)
return all_quotes
# Run it!
quotes = scrape_all_quotes()
print(f"π Total quotes collected: {len(quotes)}")
Let's scrape book prices from a bookstore:
import requests
from bs4 import BeautifulSoup
url = "http://books.toscrape.com/"
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
# Find all books on the page
books = soup.find_all('article', class_='product_pod')
deals = []
for book in books:
title = book.find('h3').find('a')['title']
price = book.find('p', class_='price_color').text.replace('£', '')
price = float(price)
if price < 10:
deals.append({'title': title, 'price': price})
print("π₯ HOT DEALS UNDER £10:")
for deal in deals:
print(f"π {deal['title'][:50]}... - £{deal['price']:.2f}")
import csv
# Save quotes to CSV
with open('quotes.csv', 'w', newline='', encoding='utf-8') as file:
writer = csv.writer(file)
writer.writerow(['Quote', 'Author', 'Tags'])
for quote in all_quotes:
writer.writerow([quote['quote'], quote['author'], ', '.join(quote['tags'])])
print("πΎ Data saved to quotes.csv!")
# Use session for multiple requests
session = requests.Session()
response = session.get(url) # Reuses connection
try:
response = requests.get(url, timeout=10)
response.raise_for_status()
except requests.RequestException as e:
print(f"β Request failed: {e}")
headers = {
'User-Agent': 'MyScraper 1.0 (contact@example.com)'
}
response = requests.get(url, headers=headers)
time.sleep(2) # 2-second delay
# See the HTML
print(soup.prettify())
# Test selectors in browser console first!
# Right-click → Inspect → Copy selector
Ready to start? Copy the first code block, run it, and watch your scraper come alive! Once it works, try the price tracker next.
What's your first target website? Let me help you customize this scraper for ANY site you want!