Python Web Scraper + Auto Scheduler GUI | Requests + BeautifulSoup + Tkinter
Demo :
Click Video πππ
Features:
✔️ Share buttons
✔️ Featured Image (Thumbnail)
✔️ SEO Meta Description (same as YouTube description first 150 chars)
Content:
Embed YouTube Short + Explanation + Code Snippet + Screenshots
Code :
import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime
from apscheduler.schedulers.background import BackgroundScheduler
import customtkinter as ctk
from tkinter import messagebox
ctk.set_appearance_mode("dark")
ctk.set_default_color_theme("blue")
URL = 'https://books.toscrape.com/catalogue/a-light-in-the-attic_1000/index.html'
HEADERS = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'
}
CSV_FILE = 'products.csv'
def scrape_product():
try:
response = requests.get(URL, headers=HEADERS)
if response.status_code == 200:
soup = BeautifulSoup(response.content, 'html.parser')
product_name_tag = soup.select_one('div.product_main h1')
price_tag = soup.select_one('p.price_color')
if product_name_tag and price_tag:
product_name = product_name_tag.text.strip()
price = price_tag.text.strip()
timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
data = {
'Timestamp': timestamp,
'Product Name': product_name,
'Price': price
}
try:
df = pd.read_csv(CSV_FILE)
except FileNotFoundError:
df = pd.DataFrame(columns=['Timestamp', 'Product Name', 'Price'])
df = pd.concat([df, pd.DataFrame([data])], ignore_index=True)
df.to_csv(CSV_FILE, index=False)
log_text.insert(ctk.END, f"[{timestamp}] ✅ Scraped: {product_name} - {price}\n")
else:
log_text.insert(ctk.END, "[ERROR] Product name or price not found in page.\n")
else:
log_text.insert(ctk.END, f"[ERROR] Status Code: {response.status_code}\n")
except Exception as e:
log_text.insert(ctk.END, f"[ERROR] Exception: {str(e)}\n")
scheduler = BackgroundScheduler()
scheduler.add_job(scrape_product, 'interval', hours=24)
scheduler.start()
app = ctk.CTk()
app.geometry("450x500")
app.title("Python Web Scraper + Scheduler")
title_label = ctk.CTkLabel(app, text="π·️ Product Price Scraper Scheduler", font=("Arial", 20))
title_label.pack(pady=15)
desc_label = ctk.CTkLabel(app, text="Automatically scrapes product price every 24 hours and saves into CSV.", wraplength=600)
desc_label.pack(pady=10)
def manual_scrape():
scrape_product()
messagebox.showinfo("Success", "Manual scrape completed!")
scrape_btn = ctk.CTkButton(app, text="π‘ Scrape Now Manually", command=manual_scrape)
scrape_btn.pack(pady=10)
log_frame = ctk.CTkFrame(app)
log_frame.pack(pady=10, fill="both", expand=True)
log_text = ctk.CTkTextbox(log_frame, width=650, height=300)
log_text.pack(padx=10, pady=10)
app.protocol("WM_DELETE_WINDOW", lambda: (scheduler.shutdown(), app.destroy()))
app.mainloop()
Comments
Post a Comment