From dfc4383b50b3e936e6ca0461a1c688ca063dbe7e Mon Sep 17 00:00:00 2001 From: ngosang Date: Fri, 8 Jan 2021 12:54:04 +0100 Subject: [PATCH] Simplify and document the "return only cookies" parameter --- README.md | 5 +-- src/routes.ts | 96 +++++++-------------------------------------------- 2 files changed, 15 insertions(+), 86 deletions(-) diff --git a/README.md b/README.md index 09b954e..0ef70c4 100644 --- a/README.md +++ b/README.md @@ -128,8 +128,9 @@ Parameter | Notes url | Mandatory session | Optional. Will send the request from and existing browser instance. If one is not sent it will create a temporary instance that will be destroyed immediately after the request is completed. headers | Optional. To specify user headers. -maxTimeout | Optional. Max timeout to solve the challenge -cookies | Optional. Will be used by the headless browser. Follow [this](https://github.com/puppeteer/puppeteer/blob/v3.3.0/docs/api.md#pagesetcookiecookies) format +maxTimeout | Optional, default value 60000. Max timeout to solve the challenge in milliseconds. +cookies | Optional. Will be used by the headless browser. Follow [this](https://github.com/puppeteer/puppeteer/blob/v3.3.0/docs/api.md#pagesetcookiecookies) format. +returnOnlyCookies | Optional, default false. Only returns the cookies. Response data, headers and other parts of the response are removed. Example response from running the `curl` above: diff --git a/src/routes.ts b/src/routes.ts index 6ad9dd7..8ecec05 100644 --- a/src/routes.ts +++ b/src/routes.ts @@ -74,52 +74,6 @@ const DEFAULT_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKi const CHALLENGE_SELECTORS = ['#trk_jschal_js', '.ray_id', '.attack-box'] const TOKEN_INPUT_NAMES = ['g-recaptcha-response', 'h-captcha-response'] -async function interceptResponse(page: Puppeteer.Page, callback: (payload: ChallengeResolutionT) => any) { - const client = await page.target().createCDPSession(); - await client.send('Fetch.enable', { - patterns: [ - { - urlPattern: '*', - resourceType: 'Document', - requestStage: 'Response', - }, - ], - }); - - client.on('Fetch.requestPaused', async (e) => { - log.debug('Fetch.requestPaused. Checking if the response has valid cookies') - let headers = e.responseHeaders || [] - - let cookies = await page.cookies(); - log.debug(cookies) - - if (cookies.filter((c: Cookie) => c.name === 'cf_clearance').length > 0) { - log.debug('Aborting request and return cookies. valid cookies found') - await client.send('Fetch.failRequest', {requestId: e.requestId, errorReason: 'Aborted'}) - - let status = 'ok' - let message = '' - const payload: ChallengeResolutionT = { - status, - message, - result: { - url: page.url(), - status: e.status, - headers: headers.reduce((a: any, x: { name: any; value: any }) => ({ ...a, [x.name]: x.value }), {}), - response: null, - cookies: cookies, - userAgent: '' - } - } - - callback(payload); - } else { - log.debug('Continuing request. no valid cookies found') - await client.send('Fetch.continueRequest', {requestId: e.requestId}) - } - }); -} - async function resolveChallengeWithTimeout(ctx: RequestContext, params: BaseRequestAPICall, page: Puppeteer.Page) { const maxTimeout = params.maxTimeout || 60000 const timer = new Timeout(); @@ -169,13 +123,6 @@ async function resolveChallenge(ctx: RequestContext, { url, proxy, download, ret log.debug(`'${selector}' challenge element detected.`) log.debug('Waiting for Cloudflare challenge...') - let interceptingResult: ChallengeResolutionT; - if (returnOnlyCookies) { //If we just want to get the cookies, intercept the response before we get the content/body (just cookies and headers) - await interceptResponse(page, async function(payload){ - interceptingResult = payload; - }); - } - while (true) { await page.waitFor(1000) try { @@ -183,11 +130,6 @@ async function resolveChallenge(ctx: RequestContext, { url, proxy, download, ret response = await page.waitForNavigation({ waitUntil: 'domcontentloaded', timeout: 5000 }) } catch (error) { } - if (returnOnlyCookies && interceptingResult) { - await page.close(); - return interceptingResult; - } - try { // catch Execution context was destroyed const cfChallengeElem = await page.$(selector) @@ -277,21 +219,10 @@ async function resolveChallenge(ctx: RequestContext, { url, proxy, download, ret const timeLeft = randomWaitTime - captchaSolveTotalTime if (timeLeft > 0) { await page.waitFor(timeLeft) } - let interceptingResult: ChallengeResolutionT; - if (returnOnlyCookies) { //If we just want to get the cookies, intercept the response before we get the content/body (just cookies and headers) - await interceptResponse(page, async function(payload){ - interceptingResult = payload; - }); - } - // submit captcha response challengeForm.evaluate((e: HTMLFormElement) => e.submit()) response = await page.waitForNavigation({ waitUntil: 'domcontentloaded' }) - if (returnOnlyCookies && interceptingResult) { - await page.close(); - return interceptingResult; - } } } else { status = 'warning' @@ -315,14 +246,19 @@ async function resolveChallenge(ctx: RequestContext, { url, proxy, download, ret } } - if (download) { - // for some reason we get an error unless we reload the page - // has something to do with a stale buffer and this is the quickest - // fix since I am short on time - response = await page.goto(url, { waitUntil: 'domcontentloaded' }) - payload.result.response = (await response.buffer()).toString('base64') + if (returnOnlyCookies) { + payload.result.headers = null; + payload.result.userAgent = null; } else { - payload.result.response = await page.content() + if (download) { + // for some reason we get an error unless we reload the page + // has something to do with a stale buffer and this is the quickest + // fix since I am short on time + response = await page.goto(url, { waitUntil: 'domcontentloaded' }) + payload.result.response = (await response.buffer()).toString('base64') + } else { + payload.result.response = await page.content() + } } // make sure the page is closed because if it isn't and error will be thrown @@ -474,14 +410,6 @@ export const routes: Routes = { await browserRequest(ctx, params) }, - 'request.cookies': async (ctx, params: BaseRequestAPICall) => { - params.returnOnlyCookies = true - params.method = 'GET' - if (params.postData) { - return ctx.errorResponse('Cannot use "postBody" when sending a GET request.') - } - await browserRequest(ctx, params) - }, } export default async function Router(ctx: RequestContext, params: BaseAPICall): Promise {