Initial commit: MangaReader iOS App
✨ Features: - App iOS completa para leer manga sin publicidad - Scraper con WKWebView para manhwaweb.com - Sistema de descargas offline - Lector con zoom y navegación - Favoritos y progreso de lectura - Compatible con iOS 15+ y Sideloadly/3uTools 📦 Contenido: - Backend Node.js con Puppeteer (opcional) - App iOS con SwiftUI - Scraper de capítulos e imágenes - Sistema de almacenamiento local - Testing completo - Documentación exhaustiva 🧪 Prueba: Capítulo 789 de One Piece descargado exitosamente - 21 páginas descargadas - 4.68 MB total - URLs verificadas y funcionales 🎉 Generated with Claude Code (https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
366
backend/scraper.js
Normal file
366
backend/scraper.js
Normal file
@@ -0,0 +1,366 @@
|
||||
import puppeteer from 'puppeteer';
|
||||
|
||||
const BASE_URL = 'https://manhwaweb.com';
|
||||
|
||||
// Configuración de Puppeteer
|
||||
const PUPPETEER_OPTIONS = {
|
||||
headless: 'new',
|
||||
args: [
|
||||
'--no-sandbox',
|
||||
'--disable-setuid-sandbox',
|
||||
'--disable-dev-shm-usage',
|
||||
'--disable-accelerated-2d-canvas',
|
||||
'--disable-gpu'
|
||||
]
|
||||
};
|
||||
|
||||
/**
|
||||
* Obtiene el HTML renderizado de una URL usando Puppeteer
|
||||
*/
|
||||
async function getRenderedHTML(url, waitFor = 3000) {
|
||||
let browser;
|
||||
try {
|
||||
browser = await puppeteer.launch(PUPPETEER_OPTIONS);
|
||||
const page = await browser.newPage();
|
||||
|
||||
// Set user agent
|
||||
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36');
|
||||
|
||||
// Navigate to the URL and wait for network to be idle
|
||||
await page.goto(url, {
|
||||
waitUntil: 'networkidle0',
|
||||
timeout: 30000
|
||||
});
|
||||
|
||||
// Additional wait to ensure JavaScript content is loaded
|
||||
await page.waitForTimeout(waitFor);
|
||||
|
||||
// Get the rendered HTML
|
||||
const html = await page.content();
|
||||
|
||||
await browser.close();
|
||||
return html;
|
||||
} catch (error) {
|
||||
if (browser) {
|
||||
await browser.close();
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Obtiene la lista de capítulos de un manga desde su página
|
||||
* @param {string} mangaSlug - Slug del manga (ej: "one-piece_1695365223767")
|
||||
* @returns {Promise<Array>} Lista de capítulos
|
||||
*/
|
||||
export async function getMangaChapters(mangaSlug) {
|
||||
let browser;
|
||||
try {
|
||||
const url = `${BASE_URL}/manga/${mangaSlug}`;
|
||||
|
||||
browser = await puppeteer.launch(PUPPETEER_OPTIONS);
|
||||
const page = await browser.newPage();
|
||||
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36');
|
||||
|
||||
await page.goto(url, {
|
||||
waitUntil: 'networkidle0',
|
||||
timeout: 30000
|
||||
});
|
||||
|
||||
// Wait for content to load
|
||||
await page.waitForTimeout(3000);
|
||||
|
||||
// Extract chapters using page.evaluate
|
||||
const chapters = await page.evaluate(() => {
|
||||
const chapters = [];
|
||||
|
||||
// Look for links that contain chapter information
|
||||
const links = document.querySelectorAll('a[href*="/leer/"]');
|
||||
|
||||
links.forEach(link => {
|
||||
const href = link.getAttribute('href');
|
||||
const text = link.textContent?.trim();
|
||||
|
||||
if (href && text && href.includes('/leer/')) {
|
||||
// Extract chapter number from href
|
||||
// Ej: /leer/one-piece_1695365223767-1172
|
||||
const match = href.match(/(\d+)(?:\/|\?|\s*$)/);
|
||||
const chapterNumber = match ? parseInt(match[1]) : null;
|
||||
|
||||
if (chapterNumber && !isNaN(chapterNumber)) {
|
||||
chapters.push({
|
||||
number: chapterNumber,
|
||||
title: text,
|
||||
url: href.startsWith('http') ? href : `https://manhwaweb.com${href}`,
|
||||
slug: href.replace('/leer/', '').replace(/^\//, '')
|
||||
});
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
return chapters;
|
||||
});
|
||||
|
||||
await browser.close();
|
||||
|
||||
// Remove duplicates and sort by chapter number (descending)
|
||||
const uniqueChapters = chapters
|
||||
.filter((chapter, index, self) =>
|
||||
index === self.findIndex((c) => c.number === chapter.number)
|
||||
)
|
||||
.sort((a, b) => b.number - a.number);
|
||||
|
||||
console.log(`✓ Found ${uniqueChapters.length} chapters for ${mangaSlug}`);
|
||||
return uniqueChapters;
|
||||
} catch (error) {
|
||||
if (browser) {
|
||||
await browser.close();
|
||||
}
|
||||
console.error('Error scraping manga chapters:', error.message);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Obtiene las imágenes de un capítulo específico
|
||||
* @param {string} chapterSlug - Slug del capítulo (ej: "one-piece_1695365223767-1172")
|
||||
* @returns {Promise<Array>} Lista de URLs de imágenes
|
||||
*/
|
||||
export async function getChapterImages(chapterSlug) {
|
||||
let browser;
|
||||
try {
|
||||
const url = `${BASE_URL}/leer/${chapterSlug}`;
|
||||
|
||||
browser = await puppeteer.launch(PUPPETEER_OPTIONS);
|
||||
const page = await browser.newPage();
|
||||
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36');
|
||||
|
||||
await page.goto(url, {
|
||||
waitUntil: 'networkidle0',
|
||||
timeout: 30000
|
||||
});
|
||||
|
||||
// Wait for images to load
|
||||
await page.waitForTimeout(3000);
|
||||
|
||||
// Extract image URLs
|
||||
const images = await page.evaluate(() => {
|
||||
const imageUrls = [];
|
||||
const images = document.querySelectorAll('img');
|
||||
|
||||
images.forEach(img => {
|
||||
let src = img.src || img.getAttribute('data-src');
|
||||
|
||||
if (src) {
|
||||
// Convert relative URLs to absolute
|
||||
if (!src.startsWith('http')) {
|
||||
if (src.startsWith('//')) {
|
||||
src = 'https:' + src;
|
||||
} else if (src.startsWith('/')) {
|
||||
src = 'https://manhwaweb.com' + src;
|
||||
}
|
||||
}
|
||||
|
||||
// Filter out small UI images
|
||||
const alt = img.alt?.toLowerCase() || '';
|
||||
const className = img.className?.toLowerCase() || '';
|
||||
|
||||
const isUIElement =
|
||||
src.includes('avatar') ||
|
||||
src.includes('icon') ||
|
||||
src.includes('logo') ||
|
||||
src.includes('button') ||
|
||||
alt.includes('avatar') ||
|
||||
className.includes('avatar') ||
|
||||
className.includes('icon');
|
||||
|
||||
if (!isUIElement && src.includes('http')) {
|
||||
imageUrls.push(src);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
return imageUrls;
|
||||
});
|
||||
|
||||
await browser.close();
|
||||
|
||||
// Remove duplicates while preserving order
|
||||
const uniqueImages = [...new Set(images)];
|
||||
|
||||
console.log(`✓ Found ${uniqueImages.length} images for chapter ${chapterSlug}`);
|
||||
return uniqueImages;
|
||||
} catch (error) {
|
||||
if (browser) {
|
||||
await browser.close();
|
||||
}
|
||||
console.error('Error scraping chapter images:', error.message);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Busca información de un manga específico
|
||||
* @param {string} mangaSlug - Slug del manga
|
||||
* @returns {Promise<Object>} Información del manga
|
||||
*/
|
||||
export async function getMangaInfo(mangaSlug) {
|
||||
let browser;
|
||||
try {
|
||||
const url = `${BASE_URL}/manga/${mangaSlug}`;
|
||||
|
||||
browser = await puppeteer.launch(PUPPETEER_OPTIONS);
|
||||
const page = await browser.newPage();
|
||||
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36');
|
||||
|
||||
await page.goto(url, {
|
||||
waitUntil: 'networkidle0',
|
||||
timeout: 30000
|
||||
});
|
||||
|
||||
await page.waitForTimeout(2000);
|
||||
|
||||
// Extract manga information
|
||||
const mangaInfo = await page.evaluate(() => {
|
||||
// Try to find title
|
||||
let title = '';
|
||||
const titleElement = document.querySelector('h1') ||
|
||||
document.querySelector('.title') ||
|
||||
document.querySelector('[class*="title"]');
|
||||
if (titleElement) {
|
||||
title = titleElement.textContent?.trim() || '';
|
||||
}
|
||||
|
||||
// If no title found, use page title
|
||||
if (!title) {
|
||||
title = document.title.replace(' - ManhwaWeb', '').replace(' - Manhwa Web', '').trim();
|
||||
}
|
||||
|
||||
// Extract description
|
||||
let description = '';
|
||||
const descElements = document.querySelectorAll('p');
|
||||
for (const p of descElements) {
|
||||
const text = p.textContent?.trim() || '';
|
||||
if (text.length > 100 && !text.includes('©')) {
|
||||
description = text;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Extract genres
|
||||
const genres = [];
|
||||
const genreLinks = document.querySelectorAll('a[href*="/genero/"]');
|
||||
genreLinks.forEach(link => {
|
||||
const genre = link.textContent?.trim();
|
||||
if (genre) genres.push(genre);
|
||||
});
|
||||
|
||||
// Extract status
|
||||
let status = 'UNKNOWN';
|
||||
const bodyText = document.body.textContent || '';
|
||||
const statusMatch = bodyText.match(/Estado\s*:?\s*(PUBLICANDOSE|FINALIZADO|EN PAUSA|EN_ESPERA)/i);
|
||||
if (statusMatch) {
|
||||
status = statusMatch[1].toUpperCase().replace(' ', '_');
|
||||
}
|
||||
|
||||
// Extract cover image
|
||||
let coverImage = '';
|
||||
const coverImg = document.querySelector('.cover img') ||
|
||||
document.querySelector('[class*="cover"] img') ||
|
||||
document.querySelector('img[alt*="cover"]');
|
||||
if (coverImg) {
|
||||
coverImage = coverImg.src || '';
|
||||
}
|
||||
|
||||
return {
|
||||
title,
|
||||
description,
|
||||
genres,
|
||||
status,
|
||||
coverImage
|
||||
};
|
||||
});
|
||||
|
||||
await browser.close();
|
||||
|
||||
return {
|
||||
slug: mangaSlug,
|
||||
title: mangaInfo.title || 'Unknown',
|
||||
description: mangaInfo.description || '',
|
||||
genres: mangaInfo.genres || [],
|
||||
status: mangaInfo.status,
|
||||
url: url,
|
||||
coverImage: mangaInfo.coverImage
|
||||
};
|
||||
} catch (error) {
|
||||
if (browser) {
|
||||
await browser.close();
|
||||
}
|
||||
console.error('Error scraping manga info:', error.message);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Obtiene una lista de mangas populares (para descubrimiento)
|
||||
* @returns {Promise<Array>} Lista de mangas populares
|
||||
*/
|
||||
export async function getPopularMangas() {
|
||||
let browser;
|
||||
try {
|
||||
const url = `${BASE_URL}/`;
|
||||
|
||||
browser = await puppeteer.launch(PUPPETEER_OPTIONS);
|
||||
const page = await browser.newPage();
|
||||
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36');
|
||||
|
||||
await page.goto(url, {
|
||||
waitUntil: 'networkidle0',
|
||||
timeout: 30000
|
||||
});
|
||||
|
||||
await page.waitForTimeout(3000);
|
||||
|
||||
// Extract manga list
|
||||
const mangas = await page.evaluate(() => {
|
||||
const mangaList = [];
|
||||
const links = document.querySelectorAll('a[href*="/manga/"]');
|
||||
|
||||
links.forEach(link => {
|
||||
const href = link.getAttribute('href');
|
||||
if (href && href.includes('/manga/')) {
|
||||
// Extract slug from href
|
||||
const slugMatch = href.match(/\/manga\/([^\/\?#]+)/);
|
||||
const slug = slugMatch ? slugMatch[1] : null;
|
||||
|
||||
// Extract title
|
||||
let title = link.textContent?.trim() ||
|
||||
link.querySelector('img')?.getAttribute('alt') ||
|
||||
slug?.split('_')[0]?.replace(/-/g, ' ');
|
||||
|
||||
if (slug && title && !mangaList.find(m => m.slug === slug)) {
|
||||
mangaList.push({
|
||||
slug,
|
||||
title,
|
||||
url: href.startsWith('http') ? href : `https://manhwaweb.com${href}`
|
||||
});
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
return mangaList;
|
||||
});
|
||||
|
||||
await browser.close();
|
||||
|
||||
const limitedMangas = mangas.slice(0, 50);
|
||||
console.log(`✓ Found ${limitedMangas.length} popular mangas`);
|
||||
return limitedMangas;
|
||||
} catch (error) {
|
||||
if (browser) {
|
||||
await browser.close();
|
||||
}
|
||||
console.error('Error scraping popular mangas:', error.message);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user