image_crawler/emalls.js

59 lines
1.9 KiB
JavaScript

// emalls.js
const puppeteer = require("puppeteer");
async function searchEmalls(query, limit = 20) {
if (!query) return [];
let browser;
try {
browser = await puppeteer.launch({
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox']
});
const page = await browser.newPage();
await page.goto(`https://emalls.ir/search?q=${encodeURIComponent(query)}`, {
waitUntil: 'networkidle2',
timeout: 30000
});
// Wait for any content to load
await page.waitForSelector('body', { timeout: 10000 });
const products = await page.evaluate((limit) => {
// Try to find products by looking for links containing "/product/"
const links = Array.from(document.querySelectorAll('a[href*="/product/"]'));
const products = [];
for (let link of links) {
if (products.length >= limit) break;
const title = link.querySelector('h1, h2, h3, h4, .title, [class*="title"]')?.textContent ||
link.title || link.getAttribute('title') || "بدون عنوان";
const price = link.querySelector('.price, [class*="price"]')?.textContent || "—";
const img = link.querySelector('img');
const image = img?.src || img?.getAttribute('data-src') || "https://via.placeholder.com/150";
products.push({
title: title.trim(),
price: price.trim(),
image,
link: link.href
});
}
return products;
}, limit);
return products;
} catch (err) {
console.error("Emalls scraping error:", err.message);
return [];
} finally {
if (browser) await browser.close();
}
}
module.exports = { searchEmalls };