Simplify and document the "return only cookies" parameter

This commit is contained in:
ngosang 2021-01-08 12:54:04 +01:00
parent d140e9369d
commit dfc4383b50
2 changed files with 15 additions and 86 deletions

View File

@ -128,8 +128,9 @@ Parameter | Notes
url | Mandatory url | Mandatory
session | Optional. Will send the request from and existing browser instance. If one is not sent it will create a temporary instance that will be destroyed immediately after the request is completed. session | Optional. Will send the request from and existing browser instance. If one is not sent it will create a temporary instance that will be destroyed immediately after the request is completed.
headers | Optional. To specify user headers. headers | Optional. To specify user headers.
maxTimeout | Optional. Max timeout to solve the challenge maxTimeout | Optional, default value 60000. Max timeout to solve the challenge in milliseconds.
cookies | Optional. Will be used by the headless browser. Follow [this](https://github.com/puppeteer/puppeteer/blob/v3.3.0/docs/api.md#pagesetcookiecookies) format cookies | Optional. Will be used by the headless browser. Follow [this](https://github.com/puppeteer/puppeteer/blob/v3.3.0/docs/api.md#pagesetcookiecookies) format.
returnOnlyCookies | Optional, default false. Only returns the cookies. Response data, headers and other parts of the response are removed.
Example response from running the `curl` above: Example response from running the `curl` above:

View File

@ -74,52 +74,6 @@ const DEFAULT_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKi
const CHALLENGE_SELECTORS = ['#trk_jschal_js', '.ray_id', '.attack-box'] const CHALLENGE_SELECTORS = ['#trk_jschal_js', '.ray_id', '.attack-box']
const TOKEN_INPUT_NAMES = ['g-recaptcha-response', 'h-captcha-response'] const TOKEN_INPUT_NAMES = ['g-recaptcha-response', 'h-captcha-response']
async function interceptResponse(page: Puppeteer.Page, callback: (payload: ChallengeResolutionT) => any) {
const client = await page.target().createCDPSession();
await client.send('Fetch.enable', {
patterns: [
{
urlPattern: '*',
resourceType: 'Document',
requestStage: 'Response',
},
],
});
client.on('Fetch.requestPaused', async (e) => {
log.debug('Fetch.requestPaused. Checking if the response has valid cookies')
let headers = e.responseHeaders || []
let cookies = await page.cookies();
log.debug(cookies)
if (cookies.filter((c: Cookie) => c.name === 'cf_clearance').length > 0) {
log.debug('Aborting request and return cookies. valid cookies found')
await client.send('Fetch.failRequest', {requestId: e.requestId, errorReason: 'Aborted'})
let status = 'ok'
let message = ''
const payload: ChallengeResolutionT = {
status,
message,
result: {
url: page.url(),
status: e.status,
headers: headers.reduce((a: any, x: { name: any; value: any }) => ({ ...a, [x.name]: x.value }), {}),
response: null,
cookies: cookies,
userAgent: ''
}
}
callback(payload);
} else {
log.debug('Continuing request. no valid cookies found')
await client.send('Fetch.continueRequest', {requestId: e.requestId})
}
});
}
async function resolveChallengeWithTimeout(ctx: RequestContext, params: BaseRequestAPICall, page: Puppeteer.Page) { async function resolveChallengeWithTimeout(ctx: RequestContext, params: BaseRequestAPICall, page: Puppeteer.Page) {
const maxTimeout = params.maxTimeout || 60000 const maxTimeout = params.maxTimeout || 60000
const timer = new Timeout(); const timer = new Timeout();
@ -169,13 +123,6 @@ async function resolveChallenge(ctx: RequestContext, { url, proxy, download, ret
log.debug(`'${selector}' challenge element detected.`) log.debug(`'${selector}' challenge element detected.`)
log.debug('Waiting for Cloudflare challenge...') log.debug('Waiting for Cloudflare challenge...')
let interceptingResult: ChallengeResolutionT;
if (returnOnlyCookies) { //If we just want to get the cookies, intercept the response before we get the content/body (just cookies and headers)
await interceptResponse(page, async function(payload){
interceptingResult = payload;
});
}
while (true) { while (true) {
await page.waitFor(1000) await page.waitFor(1000)
try { try {
@ -183,11 +130,6 @@ async function resolveChallenge(ctx: RequestContext, { url, proxy, download, ret
response = await page.waitForNavigation({ waitUntil: 'domcontentloaded', timeout: 5000 }) response = await page.waitForNavigation({ waitUntil: 'domcontentloaded', timeout: 5000 })
} catch (error) { } } catch (error) { }
if (returnOnlyCookies && interceptingResult) {
await page.close();
return interceptingResult;
}
try { try {
// catch Execution context was destroyed // catch Execution context was destroyed
const cfChallengeElem = await page.$(selector) const cfChallengeElem = await page.$(selector)
@ -277,21 +219,10 @@ async function resolveChallenge(ctx: RequestContext, { url, proxy, download, ret
const timeLeft = randomWaitTime - captchaSolveTotalTime const timeLeft = randomWaitTime - captchaSolveTotalTime
if (timeLeft > 0) { await page.waitFor(timeLeft) } if (timeLeft > 0) { await page.waitFor(timeLeft) }
let interceptingResult: ChallengeResolutionT;
if (returnOnlyCookies) { //If we just want to get the cookies, intercept the response before we get the content/body (just cookies and headers)
await interceptResponse(page, async function(payload){
interceptingResult = payload;
});
}
// submit captcha response // submit captcha response
challengeForm.evaluate((e: HTMLFormElement) => e.submit()) challengeForm.evaluate((e: HTMLFormElement) => e.submit())
response = await page.waitForNavigation({ waitUntil: 'domcontentloaded' }) response = await page.waitForNavigation({ waitUntil: 'domcontentloaded' })
if (returnOnlyCookies && interceptingResult) {
await page.close();
return interceptingResult;
}
} }
} else { } else {
status = 'warning' status = 'warning'
@ -315,14 +246,19 @@ async function resolveChallenge(ctx: RequestContext, { url, proxy, download, ret
} }
} }
if (download) { if (returnOnlyCookies) {
// for some reason we get an error unless we reload the page payload.result.headers = null;
// has something to do with a stale buffer and this is the quickest payload.result.userAgent = null;
// fix since I am short on time
response = await page.goto(url, { waitUntil: 'domcontentloaded' })
payload.result.response = (await response.buffer()).toString('base64')
} else { } else {
payload.result.response = await page.content() if (download) {
// for some reason we get an error unless we reload the page
// has something to do with a stale buffer and this is the quickest
// fix since I am short on time
response = await page.goto(url, { waitUntil: 'domcontentloaded' })
payload.result.response = (await response.buffer()).toString('base64')
} else {
payload.result.response = await page.content()
}
} }
// make sure the page is closed because if it isn't and error will be thrown // make sure the page is closed because if it isn't and error will be thrown
@ -474,14 +410,6 @@ export const routes: Routes = {
await browserRequest(ctx, params) await browserRequest(ctx, params)
}, },
'request.cookies': async (ctx, params: BaseRequestAPICall) => {
params.returnOnlyCookies = true
params.method = 'GET'
if (params.postData) {
return ctx.errorResponse('Cannot use "postBody" when sending a GET request.')
}
await browserRequest(ctx, params)
},
} }
export default async function Router(ctx: RequestContext, params: BaseAPICall): Promise<void> { export default async function Router(ctx: RequestContext, params: BaseAPICall): Promise<void> {