From 24b1a934a3dd4664fc9fa5594c7d6a5c1360137c Mon Sep 17 00:00:00 2001 From: Gaspard Rivoire <36168128+GaspardRivoire@users.noreply.github.com> Date: Fri, 26 Jul 2024 10:04:08 +0200 Subject: [PATCH] upgrade retry and errors handling --- src/archive.ts | 38 +++++++++++++++++++++++++++++++++++--- src/village.ts | 14 +++++++++----- 2 files changed, 44 insertions(+), 8 deletions(-) diff --git a/src/archive.ts b/src/archive.ts index 534397b..5c4c7a6 100644 --- a/src/archive.ts +++ b/src/archive.ts @@ -20,6 +20,35 @@ const SELECTORS = { const PHASES = [1, 2, 3]; +async function gotoWithRetry(page: Page, url: string, maxAttempts = 3): Promise { + for (let attempt = 1; attempt <= maxAttempts; attempt++) { + try { + await page.goto(url, { + waitUntil: 'domcontentloaded', + timeout: 60000 + }); + return; + } catch (error) { + if (attempt === maxAttempts) throw error; + logger.info(`Navigation attempt ${attempt} failed, retrying in 30 seconds...`); + await sleep(30000); // Wait 30 seconds before retrying + } + } +} + +async function clickWithRetry(page: Page, url: string, maxAttempts = 3): Promise { + for (let attempt = 1; attempt <= maxAttempts; attempt++) { + try { + await page.click(url); + return; + } catch (error) { + if (attempt === maxAttempts) throw error; + logger.info(`Click attempt ${attempt} failed, retrying in 30 seconds...`); + await page.reload({ waitUntil: 'domcontentloaded' }); + await sleep(30000); + } + } +} /** * Archive une page spécifique du site web. @@ -43,7 +72,7 @@ async function archivePage(dirPath: string, page: Page, ressources: Record= 1 && phase <= 3) { await sleep(4000); - await page.reload({ waitUntil: 'domcontentloaded' }); - await page.click(`${SELECTORS.PHASE_BUTTONS}(${phase})`); + await page.reload({ waitUntil: 'networkidle0' }); + await clickWithRetry(page, `${SELECTORS.PHASE_BUTTONS}(${phase})`); await sleep(500); } await autoScroll(page); @@ -185,6 +214,9 @@ export async function archiveWebsite() { const villages: string[] = []; for (let i = 0; i < villageCount; i++) { const villageName = await selectVillage(page, i + 1); + if (villageName == '') { + continue + } logger.info(villageName); villages.push(villageName); for (const phase of PHASES) { diff --git a/src/village.ts b/src/village.ts index acf9ad3..949d3b9 100644 --- a/src/village.ts +++ b/src/village.ts @@ -15,7 +15,7 @@ async function gotoWithRetry(page: Page, url: string, maxAttempts = 3): Promise< try { await page.goto(url, { waitUntil: 'domcontentloaded', - timeout: 60000 // Increased timeout to 60 seconds + timeout: 60000 }); return; } catch (error) { @@ -30,11 +30,14 @@ async function gotoWithRetry(page: Page, url: string, maxAttempts = 3): Promise< async function waitForSelectorWithRetry(page: Page, selector: string, maxAttempts = 3): Promise { for (let attempt = 1; attempt <= maxAttempts; attempt++) { try { - await page.waitForSelector(selector, { visible: true, timeout: 10000 }); + await page.waitForSelector(selector, { visible: true, timeout: 60000 }); return; } catch (error) { if (attempt === maxAttempts) throw error; logger.info(`Selector ${selector} not found on attempt ${attempt}, retrying...`); + await page.reload({ waitUntil: 'domcontentloaded' }); + const html = await page.evaluate(() => document.documentElement.outerHTML); + logger.info(`ERROR ---> ${html}`); await sleep(30000); } } @@ -89,12 +92,13 @@ export async function selectVillage(page: Page, index: number) { logger.info('Selecting village'); try { - await page.goto(`${process.env.URL_TO_ARCHIVE}`, { waitUntil: 'domcontentloaded' }); + // await page.goto(`${process.env.URL_TO_ARCHIVE}`, { waitUntil: 'domcontentloaded' }); + await gotoWithRetry(page, `${process.env.URL_TO_ARCHIVE}`); await sleep(2000); // Vérifier et cliquer sur le bouton pour ouvrir le menu déroulant const buttonSelector = SELECTORS.VILLAGE_BUTTON; - await page.waitForSelector(buttonSelector, { visible: true, timeout: 30000 }); + await waitForSelectorWithRetry(page, buttonSelector); await page.click(buttonSelector); await sleep(10000); @@ -103,7 +107,7 @@ export async function selectVillage(page: Page, index: number) { await page.click(SELECTORS.VILLAGE_SELECT); const selector = `${SELECTORS.VILLAGE_OPTION}:nth-child(${index})`; - await page.waitForSelector(selector, { timeout: 5000 }); + await waitForSelectorWithRetry(page, selector); const village = await page.evaluate(({index, SELECTORS}) => { const $el = document.querySelector(`${SELECTORS.VILLAGE_OPTION}:nth-child(${index})`); if ($el) {