Remove outdated chaptcha solvers

This commit is contained in:
ngosang 2021-10-17 18:25:44 +02:00
parent 744de4d158
commit a0e897067a
7 changed files with 81 additions and 853 deletions

View File

@ -250,37 +250,6 @@ If this is the case, FlareSolverr will return the error `Captcha detected but no
FlareSolverr can be customized to solve the captchas automatically by setting the environment variable `CAPTCHA_SOLVER`
to the file name of one of the adapters inside the [/captcha](src/captcha) directory.
### hcaptcha-solver
This method makes use of the [hcaptcha-solver](https://github.com/JimmyLaurent/hcaptcha-solver) project.
NOTE: This solver works picking random images so it will fail in a lot of requests and it's hard to know if it is
working or not. In a real use case with Sonarr/Radarr + Jackett it is still useful because those apps make a new request
each 15 minutes. Eventually one of the requests is going to work and Jackett saves the cookie forever (until it stops
working).
To use this solver you must set the environment variable:
```bash
CAPTCHA_SOLVER=hcaptcha-solver
```
### CaptchaHarvester
This method makes use of the [CaptchaHarvester](https://github.com/NoahCardoza/CaptchaHarvester) project which allows
users to collect their own tokens from ReCaptcha V2/V3 and hCaptcha for free.
To use this method you must set these environment variables:
```bash
CAPTCHA_SOLVER=harvester
HARVESTER_ENDPOINT=https://127.0.0.1:5000/token
```
**Note**: above I set `HARVESTER_ENDPOINT` to the default configuration of the captcha harvester's server, but that
could change if you customize the command line flags. Simply put, `HARVESTER_ENDPOINT` should be set to the URI of the
route that returns a token in plain text when called.
## Related projects
* C# implementation => https://github.com/FlareSolverr/FlareSolverrSharp

713
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@ -24,7 +24,6 @@
"console-log-level": "^1.4.1",
"express": "^4.17.1",
"got": "^11.8.2",
"hcaptcha-solver": "^1.0.2",
"puppeteer": "^3.3.0",
"uuid": "^8.3.2"
},

View File

@ -1,31 +0,0 @@
import got from 'got'
import { sleep } from '../services/utils'
/*
This method uses the captcha-harvester project:
https://github.com/NoahCardoza/CaptchaHarvester
While the function must take url/sitekey/type args,
they aren't used because the harvester server must
be preconfigured.
ENV:
HARVESTER_ENDPOINT: This must be the full path
to the /token endpoint of the harvester.
E.G. "https://127.0.0.1:5000/token"
*/
export default async function solve(): Promise<string> {
const endpoint = process.env.HARVESTER_ENDPOINT
if (!endpoint) { throw Error('ENV variable `HARVESTER_ENDPOINT` must be set.') }
while (true) {
try {
return (await got.get(process.env.HARVESTER_ENDPOINT, {
https: { rejectUnauthorized: false }
})).body
} catch (e) {
if (e.response.statusCode !== 418) { throw e }
}
await sleep(3000)
}
}

View File

@ -1,25 +0,0 @@
const solveCaptcha = require('hcaptcha-solver');
import { SolverOptions } from '.'
/*
This method uses the hcaptcha-solver project:
https://github.com/JimmyLaurent/hcaptcha-solver
TODO: allow user pass custom options to the solver.
ENV:
There are no other variables that must be set to get this to work
*/
export default async function solve({ url }: SolverOptions): Promise<string> {
throw new Error("hcaptcha-solver is not able to solve the new hCaptcha challenge. This issue is already reported #31.");
/*
try {
return await solveCaptcha(url)
} catch (e) {
console.error(e)
return null
}
*/
}

View File

@ -1,14 +1,14 @@
import {Page, Response} from 'puppeteer'
import log from "../services/log";
import getCaptchaSolver, {CaptchaType} from "../captcha";
/**
* This class contains the logic to solve protections provided by CloudFlare
**/
const BAN_SELECTORS = ['span[data-translate="error"]'];
const CHALLENGE_SELECTORS = ['#trk_jschal_js', '.ray_id', '.attack-box', '#cf-please-wait'];
const TOKEN_INPUT_NAMES = ['g-recaptcha-response', 'h-captcha-response'];
const CAPTCHA_SELECTORS = ['input[name="cf_captcha_kind"]'];
export default async function resolveChallenge(url: string, page: Page, response: Response): Promise<Response> {
@ -19,7 +19,7 @@ export default async function resolveChallenge(url: string, page: Page, response
}
log.info('Cloudflare detected');
if (await page.$('span[data-translate="error"]')) {
if (await findAnySelector(page, BAN_SELECTORS)) {
throw new Error('Cloudflare has blocked this request. Probably your IP is banned for this site, check in your web browser.')
}
@ -95,96 +95,30 @@ export default async function resolveChallenge(url: string, page: Page, response
selectorFound = true;
}
// it seems some captcha pages return 200 sometimes
if (await page.$('input[name="cf_captcha_kind"]')) {
log.info('Captcha challenge detected.');
const captchaSolver = getCaptchaSolver()
if (captchaSolver) {
const captchaStartTimestamp = Date.now()
const challengeForm = await page.$('#challenge-form')
if (challengeForm) {
const captchaTypeElm = await page.$('input[name="cf_captcha_kind"]')
const cfCaptchaType: string = await captchaTypeElm.evaluate((e: any) => e.value)
const captchaType: CaptchaType = (CaptchaType as any)[cfCaptchaType]
if (!captchaType) {
throw new Error('Unknown captcha type!');
}
// check for CAPTCHA challenge
if (await findAnySelector(page, CAPTCHA_SELECTORS)) {
log.info('CAPTCHA challenge detected.');
throw new Error('FlareSolverr can not resolve CAPTCHA challenges. Since the captcha doesn\'t always appear, you may have better luck with the next request.');
let sitekey = null
if (captchaType != 'hCaptcha' && process.env.CAPTCHA_SOLVER != 'hcaptcha-solver') {
const sitekeyElem = await page.$('*[data-sitekey]')
if (!sitekeyElem) {
throw new Error('Could not find sitekey!');
}
sitekey = await sitekeyElem.evaluate((e) => e.getAttribute('data-sitekey'))
}
log.info('Waiting to receive captcha token to bypass challenge...')
const token = await captchaSolver({
url,
sitekey,
type: captchaType
})
log.debug(`Token received: ${token}`);
if (!token) {
throw new Error('Token solver failed to return a token.')
}
let responseFieldsFoundCount = 0;
for (const name of TOKEN_INPUT_NAMES) {
const input = await page.$(`textarea[name="${name}"]`)
if (input) {
responseFieldsFoundCount ++;
log.debug(`Challenge response field '${name}' found in challenge form.`);
await input.evaluate((e: HTMLTextAreaElement, token) => { e.value = token }, token);
}
}
if (responseFieldsFoundCount == 0) {
throw new Error('Challenge response field not found in challenge form.');
}
// ignore preset event listeners on the form
await page.evaluate(() => {
window.addEventListener('submit', (e) => { e.stopPropagation() }, true)
})
// it seems some sites obfuscate their challenge forms
// TODO: look into how they do it and come up with a more solid solution
try {
// this element is added with js and we want to wait for all the js to load before submitting
await page.waitForSelector('#challenge-form', { timeout: 10000 })
} catch (err) {
throw new Error("No '#challenge-form' element detected.");
}
// calculates the time it took to solve the captcha
const captchaSolveTotalTime = Date.now() - captchaStartTimestamp
// generates a random wait time
const randomWaitTime = (Math.floor(Math.random() * 10) + 10) * 1000
// waits, if any, time remaining to appear human but stay as fast as possible
const timeLeft = randomWaitTime - captchaSolveTotalTime
if (timeLeft > 0) {
log.debug(`Waiting for '${timeLeft}' milliseconds.`);
await page.waitFor(timeLeft);
}
// submit captcha response
await challengeForm.evaluate((e: HTMLFormElement) => e.submit())
response = await page.waitForNavigation({ waitUntil: 'domcontentloaded' })
if (await page.$('input[name="cf_captcha_kind"]')) {
throw new Error('Captcha service failed to solve the challenge.');
}
}
} else {
throw new Error('Captcha detected but no automatic solver is configured.');
}
// const captchaSolver = getCaptchaSolver()
// if (captchaSolver) {
// // todo: get the params
// log.info('Waiting to receive captcha token to bypass challenge...')
// const token = await captchaSolver({
// url,
// sitekey,
// type: captchaType
// })
// log.debug(`Token received: ${token}`);
// // todo: send the token
// }
// } else {
// throw new Error('Captcha detected but no automatic solver is configured.');
// }
} else {
if (!selectorFound)
{
throw new Error('No challenge selectors found, unable to proceed')
throw new Error('No challenge selectors found, unable to proceed.')
} else {
// reload the page to make sure we get the real response
// do not use page.reload() to avoid #162 #143

View File

@ -9,6 +9,7 @@ const app = require("../app");
const version: string = require('../../package.json').version
const googleUrl = "https://www.google.com";
const cfUrl = "https://pirateiro.com/torrents/?search=s";
const cfCaptchaUrl = "https://idope.se"
describe("Test '/' path", () => {
test("GET method should return OK ", async () => {
@ -88,7 +89,7 @@ describe("Test '/v1' path", () => {
expect(solution.userAgent).toContain("Firefox/")
});
test("Cmd 'request.get' should return OK with Cloudflare", async () => {
test("Cmd 'request.get' should return OK with Cloudflare JS", async () => {
const payload = {
"cmd": "request.get",
"url": cfUrl
@ -117,6 +118,24 @@ describe("Test '/v1' path", () => {
expect(cfCookie.length).toBeGreaterThan(30)
});
test("Cmd 'request.get' should return fail with Cloudflare CAPTCHA", async () => {
const payload = {
"cmd": "request.get",
"url": cfCaptchaUrl
}
const response: Response = await request(app).post("/v1").send(payload);
expect(response.statusCode).toBe(200);
const apiResponse: V1ResponseSolution = response.body;
expect(apiResponse.status).toBe("error");
expect(apiResponse.message).toBe("Cloudflare Error: FlareSolverr can not resolve CAPTCHA challenges. Since the captcha doesn't always appear, you may have better luck with the next request.");
expect(apiResponse.startTimestamp).toBeGreaterThan(1000);
expect(apiResponse.endTimestamp).toBeGreaterThan(apiResponse.startTimestamp);
expect(apiResponse.version).toBe(version);
// solution is filled but not useful
expect(apiResponse.solution.url).toContain(cfCaptchaUrl)
});
test("Cmd 'request.get' should return timeout", async () => {
const payload = {
"cmd": "request.get",