mirror of
https://github.com/FlareSolverr/FlareSolverr.git
synced 2025-06-08 04:25:25 +00:00
Move Cloudflare provider logic to his own class
This commit is contained in:
parent
dfc4383b50
commit
f1e829fd3a
147
src/providers/cloudflare.ts
Normal file
147
src/providers/cloudflare.ts
Normal file
@ -0,0 +1,147 @@
|
|||||||
|
import {Response} from 'puppeteer'
|
||||||
|
import {Page} from "puppeteer-extra/dist/puppeteer";
|
||||||
|
import {TimeoutError} from "puppeteer/Errors";
|
||||||
|
|
||||||
|
import log from "../log";
|
||||||
|
import getCaptchaSolver, {CaptchaType} from "../captcha";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This class contains the logic to solve protections provided by CloudFlare
|
||||||
|
**/
|
||||||
|
|
||||||
|
const CHALLENGE_SELECTORS = ['#trk_jschal_js', '.ray_id', '.attack-box'];
|
||||||
|
const TOKEN_INPUT_NAMES = ['g-recaptcha-response', 'h-captcha-response'];
|
||||||
|
|
||||||
|
export default async function resolveChallenge(url: string, page: Page, response: Response): Promise<Response> {
|
||||||
|
|
||||||
|
// look for challenge and return fast if not detected
|
||||||
|
if (!response.headers().server.startsWith('cloudflare')) {
|
||||||
|
log.info('Cloudflare not detected');
|
||||||
|
return response;
|
||||||
|
}
|
||||||
|
log.info('Cloudflare detected');
|
||||||
|
|
||||||
|
if (await page.$('.cf-error-code')) {
|
||||||
|
throw new Error('Cloudflare has blocked this request (Code 1020 Detected).')
|
||||||
|
}
|
||||||
|
|
||||||
|
if (response.status() > 400) {
|
||||||
|
// detect cloudflare wait 5s
|
||||||
|
let selectorFoundCount = 0
|
||||||
|
for (const selector of CHALLENGE_SELECTORS) {
|
||||||
|
const cfChallengeElem = await page.$(selector)
|
||||||
|
if (cfChallengeElem) {
|
||||||
|
selectorFoundCount++
|
||||||
|
log.debug(`'${selector}' challenge element detected.`)
|
||||||
|
log.debug('Waiting for Cloudflare challenge...')
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
await page.waitFor(1000)
|
||||||
|
try {
|
||||||
|
// catch exception timeout in waitForNavigation
|
||||||
|
response = await page.waitForNavigation({ waitUntil: 'domcontentloaded', timeout: 5000 })
|
||||||
|
} catch (error) { }
|
||||||
|
|
||||||
|
try {
|
||||||
|
// catch Execution context was destroyed
|
||||||
|
const cfChallengeElem = await page.$(selector)
|
||||||
|
if (!cfChallengeElem) { break }
|
||||||
|
log.debug('Found challenge element again...')
|
||||||
|
} catch (error)
|
||||||
|
{ }
|
||||||
|
|
||||||
|
response = await page.reload({ waitUntil: 'domcontentloaded' })
|
||||||
|
log.debug('Reloaded page...')
|
||||||
|
log.html(await page.content())
|
||||||
|
}
|
||||||
|
|
||||||
|
log.debug('Validating HTML code...')
|
||||||
|
break
|
||||||
|
} else {
|
||||||
|
log.debug(`No '${selector}' challenge element detected.`)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
log.debug("Number of selector found: " + selectorFoundCount + ", total selector: " + CHALLENGE_SELECTORS.length)
|
||||||
|
if (selectorFoundCount == 0)
|
||||||
|
{
|
||||||
|
throw new Error('No challenge selectors found, unable to proceed')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// it seems some captcha pages return 200 sometimes
|
||||||
|
if (await page.$('input[name="cf_captcha_kind"]')) {
|
||||||
|
const captchaSolver = getCaptchaSolver()
|
||||||
|
if (captchaSolver) {
|
||||||
|
const captchaStartTimestamp = Date.now()
|
||||||
|
const challengeForm = await page.$('#challenge-form')
|
||||||
|
if (challengeForm) {
|
||||||
|
const captchaTypeElm = await page.$('input[name="cf_captcha_kind"]')
|
||||||
|
const cfCaptchaType: string = await captchaTypeElm.evaluate((e: any) => e.value)
|
||||||
|
const captchaType: CaptchaType = (CaptchaType as any)[cfCaptchaType]
|
||||||
|
if (!captchaType) {
|
||||||
|
throw new Error('Unknown captcha type!');
|
||||||
|
}
|
||||||
|
|
||||||
|
let sitekey = null
|
||||||
|
if (captchaType != 'hCaptcha' && process.env.CAPTCHA_SOLVER != 'hcaptcha-solver') {
|
||||||
|
const sitekeyElem = await page.$('*[data-sitekey]')
|
||||||
|
if (!sitekeyElem) {
|
||||||
|
throw new Error('Could not find sitekey!');
|
||||||
|
}
|
||||||
|
sitekey = await sitekeyElem.evaluate((e) => e.getAttribute('data-sitekey'))
|
||||||
|
}
|
||||||
|
|
||||||
|
log.info('Waiting to receive captcha token to bypass challenge...')
|
||||||
|
const token = await captchaSolver({
|
||||||
|
url,
|
||||||
|
sitekey,
|
||||||
|
type: captchaType
|
||||||
|
})
|
||||||
|
|
||||||
|
if (!token) {
|
||||||
|
throw new Error('Token solver failed to return a token.')
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const name of TOKEN_INPUT_NAMES) {
|
||||||
|
const input = await page.$(`textarea[name="${name}"]`)
|
||||||
|
if (input) { await input.evaluate((e: HTMLTextAreaElement, token) => { e.value = token }, token) }
|
||||||
|
}
|
||||||
|
|
||||||
|
// ignore preset event listeners on the form
|
||||||
|
await page.evaluate(() => {
|
||||||
|
window.addEventListener('submit', (e) => { event.stopPropagation() }, true)
|
||||||
|
})
|
||||||
|
|
||||||
|
// it seems some sites obfuscate their challenge forms
|
||||||
|
// TODO: look into how they do it and come up with a more solid solution
|
||||||
|
try {
|
||||||
|
// this element is added with js and we want to wait for all the js to load before submitting
|
||||||
|
await page.waitForSelector('#challenge-form [type=submit]', { timeout: 5000 })
|
||||||
|
} catch (err) {
|
||||||
|
if (err instanceof TimeoutError) {
|
||||||
|
log.debug(`No '#challenge-form [type=submit]' element detected.`)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// calculates the time it took to solve the captcha
|
||||||
|
const captchaSolveTotalTime = Date.now() - captchaStartTimestamp
|
||||||
|
|
||||||
|
// generates a random wait time
|
||||||
|
const randomWaitTime = (Math.floor(Math.random() * 20) + 10) * 1000
|
||||||
|
|
||||||
|
// waits, if any, time remaining to appear human but stay as fast as possible
|
||||||
|
const timeLeft = randomWaitTime - captchaSolveTotalTime
|
||||||
|
if (timeLeft > 0) { await page.waitFor(timeLeft) }
|
||||||
|
|
||||||
|
// submit captcha response
|
||||||
|
challengeForm.evaluate((e: HTMLFormElement) => e.submit())
|
||||||
|
response = await page.waitForNavigation({ waitUntil: 'domcontentloaded' })
|
||||||
|
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
throw new Error('Captcha detected but no automatic solver is configured.');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return response;
|
||||||
|
}
|
158
src/routes.ts
158
src/routes.ts
@ -1,12 +1,12 @@
|
|||||||
import { v1 as UUIDv1 } from 'uuid'
|
import { v1 as UUIDv1 } from 'uuid'
|
||||||
|
import { SetCookie, Request, Response, Headers, HttpMethod, Overrides } from 'puppeteer'
|
||||||
|
import { Page, Browser } from "puppeteer-extra/dist/puppeteer";
|
||||||
|
const Timeout = require('await-timeout');
|
||||||
|
|
||||||
|
import log from './log'
|
||||||
import sessions, { SessionsCacheItem } from './session'
|
import sessions, { SessionsCacheItem } from './session'
|
||||||
import { RequestContext } from './types'
|
import { RequestContext } from './types'
|
||||||
import log from './log'
|
import cloudflareProvider from './providers/cloudflare';
|
||||||
import { SetCookie, Request, Headers, HttpMethod, Overrides, Cookie } from 'puppeteer'
|
|
||||||
import { TimeoutError } from 'puppeteer/Errors'
|
|
||||||
import getCaptchaSolver, { CaptchaType } from './captcha'
|
|
||||||
import * as Puppeteer from "puppeteer-extra/dist/puppeteer";
|
|
||||||
const Timeout = require('await-timeout');
|
|
||||||
|
|
||||||
export interface BaseAPICall {
|
export interface BaseAPICall {
|
||||||
cmd: string
|
cmd: string
|
||||||
@ -69,12 +69,10 @@ type OverridesProps =
|
|||||||
'postData' |
|
'postData' |
|
||||||
'headers'
|
'headers'
|
||||||
|
|
||||||
// We always set a Windows User-Agent because ARM builds are detected by CloudFlare
|
// We always set a Windows User-Agent because ARM builds are detected by Cloudflare
|
||||||
const DEFAULT_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36"
|
const DEFAULT_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36"
|
||||||
const CHALLENGE_SELECTORS = ['#trk_jschal_js', '.ray_id', '.attack-box']
|
|
||||||
const TOKEN_INPUT_NAMES = ['g-recaptcha-response', 'h-captcha-response']
|
|
||||||
|
|
||||||
async function resolveChallengeWithTimeout(ctx: RequestContext, params: BaseRequestAPICall, page: Puppeteer.Page) {
|
async function resolveChallengeWithTimeout(ctx: RequestContext, params: BaseRequestAPICall, page: Page) {
|
||||||
const maxTimeout = params.maxTimeout || 60000
|
const maxTimeout = params.maxTimeout || 60000
|
||||||
const timer = new Timeout();
|
const timer = new Timeout();
|
||||||
try {
|
try {
|
||||||
@ -88,7 +86,7 @@ async function resolveChallengeWithTimeout(ctx: RequestContext, params: BaseRequ
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async function resolveChallenge(ctx: RequestContext, { url, proxy, download, returnOnlyCookies }: BaseRequestAPICall, page: Puppeteer.Page): Promise<ChallengeResolutionT | void> {
|
async function resolveChallenge(ctx: RequestContext, { url, proxy, download, returnOnlyCookies }: BaseRequestAPICall, page: Page): Promise<ChallengeResolutionT | void> {
|
||||||
|
|
||||||
let status = 'ok'
|
let status = 'ok'
|
||||||
let message = ''
|
let message = ''
|
||||||
@ -100,137 +98,15 @@ async function resolveChallenge(ctx: RequestContext, { url, proxy, download, ret
|
|||||||
}
|
}
|
||||||
|
|
||||||
log.debug(`Navigating to... ${url}`)
|
log.debug(`Navigating to... ${url}`)
|
||||||
let response = await page.goto(url, { waitUntil: 'domcontentloaded' })
|
let response: Response = await page.goto(url, { waitUntil: 'domcontentloaded' })
|
||||||
|
|
||||||
log.html(await page.content())
|
log.html(await page.content())
|
||||||
|
|
||||||
// look for challenge
|
// Detect protection services and solve challenges
|
||||||
if (response.headers().server.startsWith('cloudflare')) {
|
try {
|
||||||
log.info('Cloudflare detected')
|
response = await cloudflareProvider(url, page, response);
|
||||||
|
} catch (e) {
|
||||||
if (await page.$('.cf-error-code')) {
|
status = "error";
|
||||||
await page.close()
|
message = "Cloudflare " + e.toString();
|
||||||
return ctx.errorResponse('Cloudflare has blocked this request (Code 1020 Detected).')
|
|
||||||
}
|
|
||||||
|
|
||||||
if (response.status() > 400) {
|
|
||||||
// detect cloudflare wait 5s
|
|
||||||
let selectorFoundCount = 0
|
|
||||||
for (const selector of CHALLENGE_SELECTORS) {
|
|
||||||
const cfChallengeElem = await page.$(selector)
|
|
||||||
if (cfChallengeElem) {
|
|
||||||
selectorFoundCount++
|
|
||||||
log.debug(`'${selector}' challenge element detected.`)
|
|
||||||
log.debug('Waiting for Cloudflare challenge...')
|
|
||||||
|
|
||||||
while (true) {
|
|
||||||
await page.waitFor(1000)
|
|
||||||
try {
|
|
||||||
// catch exception timeout in waitForNavigation
|
|
||||||
response = await page.waitForNavigation({ waitUntil: 'domcontentloaded', timeout: 5000 })
|
|
||||||
} catch (error) { }
|
|
||||||
|
|
||||||
try {
|
|
||||||
// catch Execution context was destroyed
|
|
||||||
const cfChallengeElem = await page.$(selector)
|
|
||||||
if (!cfChallengeElem) { break }
|
|
||||||
log.debug('Found challenge element again...')
|
|
||||||
} catch (error)
|
|
||||||
{ }
|
|
||||||
|
|
||||||
response = await page.reload({ waitUntil: 'domcontentloaded' })
|
|
||||||
log.debug('Reloaded page...')
|
|
||||||
log.html(await page.content())
|
|
||||||
}
|
|
||||||
|
|
||||||
log.debug('Validating HTML code...')
|
|
||||||
break
|
|
||||||
} else {
|
|
||||||
log.debug(`No '${selector}' challenge element detected.`)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
log.debug("Number of selector found: " + selectorFoundCount + ", total selector: " + CHALLENGE_SELECTORS.length)
|
|
||||||
if (selectorFoundCount == 0)
|
|
||||||
{
|
|
||||||
await page.close()
|
|
||||||
return ctx.errorResponse('No challenge selectors found, unable to proceed')
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// it seems some captcha pages return 200 sometimes
|
|
||||||
if (await page.$('input[name="cf_captcha_kind"]')) {
|
|
||||||
const captchaSolver = getCaptchaSolver()
|
|
||||||
if (captchaSolver) {
|
|
||||||
const captchaStartTimestamp = Date.now()
|
|
||||||
const challengeForm = await page.$('#challenge-form')
|
|
||||||
if (challengeForm) {
|
|
||||||
const captchaTypeElm = await page.$('input[name="cf_captcha_kind"]')
|
|
||||||
const cfCaptchaType: string = await captchaTypeElm.evaluate((e: any) => e.value)
|
|
||||||
const captchaType: CaptchaType = (CaptchaType as any)[cfCaptchaType]
|
|
||||||
if (!captchaType) { return ctx.errorResponse('Unknown captcha type!') }
|
|
||||||
|
|
||||||
let sitekey = null
|
|
||||||
if (captchaType != 'hCaptcha' && process.env.CAPTCHA_SOLVER != 'hcaptcha-solver') {
|
|
||||||
const sitekeyElem = await page.$('*[data-sitekey]')
|
|
||||||
if (!sitekeyElem) { return ctx.errorResponse('Could not find sitekey!') }
|
|
||||||
sitekey = await sitekeyElem.evaluate((e) => e.getAttribute('data-sitekey'))
|
|
||||||
}
|
|
||||||
|
|
||||||
log.info('Waiting to receive captcha token to bypass challenge...')
|
|
||||||
const token = await captchaSolver({
|
|
||||||
url,
|
|
||||||
sitekey,
|
|
||||||
type: captchaType
|
|
||||||
})
|
|
||||||
|
|
||||||
if (!token) {
|
|
||||||
await page.close()
|
|
||||||
return ctx.errorResponse('Token solver failed to return a token.')
|
|
||||||
}
|
|
||||||
|
|
||||||
for (const name of TOKEN_INPUT_NAMES) {
|
|
||||||
const input = await page.$(`textarea[name="${name}"]`)
|
|
||||||
if (input) { await input.evaluate((e: HTMLTextAreaElement, token) => { e.value = token }, token) }
|
|
||||||
}
|
|
||||||
|
|
||||||
// ignore preset event listeners on the form
|
|
||||||
await page.evaluate(() => {
|
|
||||||
window.addEventListener('submit', (e) => { event.stopPropagation() }, true)
|
|
||||||
})
|
|
||||||
|
|
||||||
// it seems some sites obfuscate their challenge forms
|
|
||||||
// TODO: look into how they do it and come up with a more solid solution
|
|
||||||
try {
|
|
||||||
// this element is added with js and we want to wait for all the js to load before submitting
|
|
||||||
await page.waitForSelector('#challenge-form [type=submit]', { timeout: 5000 })
|
|
||||||
} catch (err) {
|
|
||||||
if (err instanceof TimeoutError) {
|
|
||||||
log.debug(`No '#challenge-form [type=submit]' element detected.`)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// calculates the time it took to solve the captcha
|
|
||||||
const captchaSolveTotalTime = Date.now() - captchaStartTimestamp
|
|
||||||
|
|
||||||
// generates a random wait time
|
|
||||||
const randomWaitTime = (Math.floor(Math.random() * 20) + 10) * 1000
|
|
||||||
|
|
||||||
// waits, if any, time remaining to appear human but stay as fast as possible
|
|
||||||
const timeLeft = randomWaitTime - captchaSolveTotalTime
|
|
||||||
if (timeLeft > 0) { await page.waitFor(timeLeft) }
|
|
||||||
|
|
||||||
// submit captcha response
|
|
||||||
challengeForm.evaluate((e: HTMLFormElement) => e.submit())
|
|
||||||
response = await page.waitForNavigation({ waitUntil: 'domcontentloaded' })
|
|
||||||
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
status = 'warning'
|
|
||||||
message = 'Captcha detected but no automatic solver is configured.'
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
log.debug("Response is: " + response.status())
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const payload: ChallengeResolutionT = {
|
const payload: ChallengeResolutionT = {
|
||||||
@ -278,7 +154,7 @@ function mergeSessionWithParams({ defaults }: SessionsCacheItem, params: BaseReq
|
|||||||
return copy
|
return copy
|
||||||
}
|
}
|
||||||
|
|
||||||
async function setupPage(ctx: RequestContext, params: BaseRequestAPICall, browser: Puppeteer.Browser): Promise<Puppeteer.Page> {
|
async function setupPage(ctx: RequestContext, params: BaseRequestAPICall, browser: Browser): Promise<Page> {
|
||||||
const page = await browser.newPage()
|
const page = await browser.newPage()
|
||||||
|
|
||||||
// merge session defaults with params
|
// merge session defaults with params
|
||||||
|
Loading…
x
Reference in New Issue
Block a user