Développement d'un système complet de scraping pour collecter des données de voitures depuis https://www.albioccasion.com
{
"id": int,
"make": str,
"model": str,
"year": int,
"price": int,
"mileage": int,
"fuel": str,
"location": str,
"options": list[str],
"detail_url": str
}
class AlbiScraper: def __init__(self): self.navigator = AlbiNavigator() self.database = AlbiDatabase()
class AlbiNavigator: def __init__(self): self.base_url = "https://www.albioccasion.com" self.playwright = sync_playwright().start() self.browser = self.playwright.chromium.launch() self.page = self.browser.new_page()
class AlbiExtractor: def __init__(self): self.page = self.browser.new_page() self.soup = BeautifulSoup(self.page.content(), 'html.parser')
class CarDatabase: def __init__(self): self.conn = sqlite3.connect('cars.db') self.cursor = self.conn.cursor()
# Interface CLI python -m car_scrapper.cli --start-page 1 --pages 2 # Afficher toutes les voitures python -m car_scrapper.cli --show-all
# Démarrer le serveur python server.py # Tester les endpoints curl "http://localhost:5000/api/cars" curl "http://localhost:5000/api/stats"
# Filtrage et pagination curl "http://localhost:5000/api/cars?price_lt=30000&year_gt=2020" curl "http://localhost:5000/api/cars?page=1&per_page=10" curl "http://localhost:5000/api/cars?sort_by=price&sort_order=desc"
Scraping multi-thread
Métriques et logs détaillés
Collecte automatique périodique