Fix regex pattern for page detection
Changed from: /^(\d+)\s+pages?$/i (exact match only) To: /(\d+)\s+pages?/i (matches anywhere in text) Added: - Shows ALL text matches found - Logs which div contains the match - Searches in body text as fallback - Better visibility into what text exists This will reveal why page detection is failing!
This commit is contained in:
38
content.js
38
content.js
@@ -809,14 +809,44 @@
|
|||||||
// Usar la misma lógica de detección
|
// Usar la misma lógica de detección
|
||||||
let detectedPages = 'UNKNOWN';
|
let detectedPages = 'UNKNOWN';
|
||||||
const allDivs = doc.querySelectorAll('div');
|
const allDivs = doc.querySelectorAll('div');
|
||||||
|
const allText = doc.body.textContent;
|
||||||
|
|
||||||
|
console.log('🔍 Searching in all divs...');
|
||||||
|
const textMatches = [];
|
||||||
|
|
||||||
for (let div of allDivs) {
|
for (let div of allDivs) {
|
||||||
const text = div.textContent.trim();
|
const text = div.textContent.trim();
|
||||||
const pageMatch = text.match(/^(\d+)\s+pages?$/i);
|
// Buscar CUALQUIER número seguido de "pages" o "page"
|
||||||
|
const pageMatch = text.match(/(\d+)\s+pages?/i);
|
||||||
if (pageMatch) {
|
if (pageMatch) {
|
||||||
detectedPages = parseInt(pageMatch[1]);
|
const count = parseInt(pageMatch[1]);
|
||||||
console.log(`✓ Found: "${text}" = ${detectedPages} pages (class: ${div.className})`);
|
textMatches.push({
|
||||||
break;
|
text: text,
|
||||||
|
count: count,
|
||||||
|
className: div.className
|
||||||
|
});
|
||||||
|
console.log(`🔍 Found text "${text}" = ${count} pages (class: ${div.className})`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`📊 Total text matches found: ${textMatches.length}`);
|
||||||
|
if (textMatches.length > 0) {
|
||||||
|
textMatches.forEach((match, idx) => {
|
||||||
|
console.log(` ${idx + 1}. "${match.text}" -> ${match.count} pages (class: ${match.className})`);
|
||||||
|
});
|
||||||
|
// Usar el primer match
|
||||||
|
detectedPages = textMatches[0].count;
|
||||||
|
console.log(`✅ Using: ${detectedPages} pages from "${textMatches[0].text}"`);
|
||||||
|
} else {
|
||||||
|
console.log('❌ No matches found in divs');
|
||||||
|
// Buscar en todo el texto
|
||||||
|
const bodyMatch = allText.match(/(\d+)\s+pages?/i);
|
||||||
|
if (bodyMatch) {
|
||||||
|
const count = parseInt(bodyMatch[1]);
|
||||||
|
console.log(`⚠️ Found in body text: ${count} pages`);
|
||||||
|
detectedPages = count;
|
||||||
|
} else {
|
||||||
|
console.log('❌ No page count found anywhere!');
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user