Replace Chrome with Firefox and minor improvements

This commit is contained in:
ngosang 2020-06-21 20:45:42 +02:00
parent a38064aa70
commit 5ed7c09160
5 changed files with 66 additions and 44 deletions

View File

@ -8,7 +8,7 @@ WORKDIR /home/node/flaresolverr
COPY package*.json ./ COPY package*.json ./
USER node USER node
RUN npm install RUN PUPPETEER_PRODUCT=firefox npm install
COPY --chown=node:node . . COPY --chown=node:node . .
ENV LOG_LEVEL=info ENV LOG_LEVEL=info

View File

@ -10,15 +10,18 @@ See the known issues section.
FlareSolverr starts a proxy server and it waits for user requests in idle state using few resources. FlareSolverr starts a proxy server and it waits for user requests in idle state using few resources.
When some request arrives, it uses [puppeteer](https://github.com/puppeteer/puppeteer) with the When some request arrives, it uses [puppeteer](https://github.com/puppeteer/puppeteer) with the
[stealth plugin](https://github.com/berstend/puppeteer-extra/tree/master/packages/puppeteer-extra-plugin-stealth) [stealth plugin](https://github.com/berstend/puppeteer-extra/tree/master/packages/puppeteer-extra-plugin-stealth)
to create an headless browser (Chrome). It opens the URL with user parameters and waits until the Cloudflare to create an headless browser (Firefox). It opens the URL with user parameters and waits until the Cloudflare
challenge is solved (or timeout). The HTML code and the cookies are sent back to the user and those cookies can challenge is solved (or timeout). The HTML code and the cookies are sent back to the user and those cookies can
be used to bypass Cloudflare using other HTTP clients. be used to bypass Cloudflare using other HTTP clients.
NOTE: Web browsers consume a lot of memory. If you are running FlareSolverr on a machine with few RAM,
do not make many requests at once. With each request a new browser is launched.
### Installation ### Installation
It requires NodeJS. It requires NodeJS.
Run `npm install` to install FlareSolverr dependencies. Run `PUPPETEER_PRODUCT=firefox npm install` to install FlareSolverr dependencies.
### Usage ### Usage
@ -30,13 +33,16 @@ curl -L -X POST 'http://localhost:8191/v1' \
-H 'Content-Type: application/json' \ -H 'Content-Type: application/json' \
--data-raw '{ --data-raw '{
"url":"http://www.google.com/", "url":"http://www.google.com/",
"userAgent": "Mozilla/5.0 (X11; Linux x86_64; rv:76.0) Gecko/20100101 Firefox/76.0" "userAgent": "Mozilla/5.0 (X11; Linux x86_64; rv:76.0) Gecko/20100101 Firefox/76.0",
"maxTimeout": 60000
}' }'
``` ```
Parameter | Notes Parameter | Notes
|--|--| |--|--|
url | Mandatory url | Mandatory
userAgent | Optional. Will be used by the headless browser userAgent | Optional. Will be used by the headless browser
maxTimeout | Optional. Max timeout to solve the challenge
cookies | Optional. Will be used by the headless browser. Follow this format https://github.com/puppeteer/puppeteer/blob/v3.3.0/docs/api.md#pagesetcookiecookies
Example response: Example response:
```json ```json
@ -106,8 +112,10 @@ I hope this will be fixed soon in the [puppeteer stealth plugin](https://github.
TODO: TODO:
* Fix remaining issues in the code (see TODOs) * Fix remaining issues in the code (see TODOs)
* Make the maxTimeout configurable by the user * Make the maxTimeout more accurate (count the time to open the first page)
* Add support for more HTTP methods (POST, PUT, DELETE ...) * Add support for more HTTP methods (POST, PUT, DELETE ...)
* Add support for user HTTP headers * Add support for user HTTP headers
* Hide sensitive information in logs * Hide sensitive information in logs
* Reduce Docker image size * Reduce Docker image size
* Docker image for ARM architecture
* Install instructions for Windows

View File

@ -1,8 +1,12 @@
const os = require('os');
const path = require('path');
const fs = require('fs');
const { v1: uuidv1 } = require('uuid');
const log = require('console-log-level')( const log = require('console-log-level')(
{ {
level: process.env.LOG_LEVEL || 'info', level: process.env.LOG_LEVEL || 'info',
prefix: function (level) { prefix: function (level) {
return reqCounter.toString() + " " + new Date().toISOString() + " " + level.toUpperCase(); return new Date().toISOString() + " " + level.toUpperCase() + " REQ-" + reqCounter;
} }
}); });
const puppeteer = require('puppeteer-extra'); const puppeteer = require('puppeteer-extra');
@ -13,19 +17,17 @@ const version = pjson.version;
const serverPort = process.env.PORT || 8191; const serverPort = process.env.PORT || 8191;
const serverHost = process.env.HOST || '0.0.0.0'; const serverHost = process.env.HOST || '0.0.0.0';
const logHtml = process.env.LOG_HTML || false; const logHtml = process.env.LOG_HTML || false;
let reqCounter = 0;
// setting "user-agent-override" evasion is not working for us because it can't be changed // setting "user-agent-override" evasion is not working for us because it can't be changed
// in each request. we set the user-agent in the browser args instead // in each request. we set the user-agent in the browser args instead
puppeteer.use(StealthPlugin()); puppeteer.use(StealthPlugin());
// Help logging
var reqCounter = 0;
http.createServer(function(req, res) { http.createServer(function(req, res) {
reqCounter++; reqCounter++;
const startTimestamp = Date.now(); const startTimestamp = Date.now();
log.info('Incoming request: ' + req.method + " " + req.url); log.info('Incoming request: ' + req.method + " " + req.url);
var body = []; let body = [];
req.on('data', function(chunk) { req.on('data', function(chunk) {
body.push(chunk); body.push(chunk);
}).on('end', function() { }).on('end', function() {
@ -67,6 +69,16 @@ function errorResponse(errorMsg, res, startTimestamp) {
res.end(); res.end();
} }
function prepareBrowserProfile(userAgent) {
const userDataDir = path.join(os.tmpdir(), '/puppeteer_firefox_profile_' + uuidv1());
if (!fs.existsSync(userDataDir)) {
fs.mkdirSync(userDataDir, { recursive: true })
}
const prefs = `user_pref("general.useragent.override", "${userAgent}");`;
fs.writeFile(path.join(userDataDir, 'prefs.js'), prefs, () => {});
return userDataDir;
}
function validateIncomingRequest(params, req, res, startTimestamp) { function validateIncomingRequest(params, req, res, startTimestamp) {
log.info('Params: ' + JSON.stringify(params)); log.info('Params: ' + JSON.stringify(params));
@ -89,25 +101,19 @@ function validateIncomingRequest(params, req, res, startTimestamp) {
} }
function processRequest(params, req, res, startTimestamp) { function processRequest(params, req, res, startTimestamp) {
puppeteerArgs = [ let puppeteerOptions = {
'--no-sandbox', product: 'firefox',
'--disable-setuid-sandbox', headless: true
'--disable-infobars', };
'--window-position=0,0',
'--ignore-certifcate-errors',
'--ignore-certifcate-errors-spki-list'
];
const reqUserAgent = params["userAgent"]; const reqUserAgent = params["userAgent"];
if (reqUserAgent) { if (reqUserAgent) {
log.debug('Using custom User-Agent: ' + reqUserAgent); log.debug('Using custom User-Agent: ' + reqUserAgent);
puppeteerArgs.push('--user-agent=' + reqUserAgent); // TODO: remove the profile after closing the browser
puppeteerOptions['userDataDir'] = prepareBrowserProfile(reqUserAgent);
} }
log.debug('Launching headless browser...'); log.debug('Launching headless browser...');
puppeteer.launch({ puppeteer.launch(puppeteerOptions).then(async browser => {
headless: true,
args: puppeteerArgs
}).then(async browser => {
try { try {
await resolveCallenge(params, browser, res, startTimestamp); await resolveCallenge(params, browser, res, startTimestamp);
} catch (error) { } catch (error) {
@ -117,7 +123,7 @@ function processRequest(params, req, res, startTimestamp) {
} }
}).catch(error => { }).catch(error => {
errorResponse(error.message, res, startTimestamp); errorResponse(error.message, res, startTimestamp);
});; });
} }
async function resolveCallenge(params, browser, res, startTimestamp) { async function resolveCallenge(params, browser, res, startTimestamp) {
@ -125,38 +131,40 @@ async function resolveCallenge(params, browser, res, startTimestamp) {
const userAgent = await page.evaluate(() => navigator.userAgent); const userAgent = await page.evaluate(() => navigator.userAgent);
log.debug("User-Agent: " + userAgent); log.debug("User-Agent: " + userAgent);
const reqUrl = params["url"]; const reqUrl = params["url"];
const reqMaxTimeout = params["maxTimeout"] || 60000;
const reqCookies = params["cookies"]; const reqCookies = params["cookies"];
if (reqCookies) { if (reqCookies) {
log.debug('Applying cookies'); log.debug('Using custom cookies');
await page.setCookie(...reqCookies); await page.setCookie(...reqCookies);
} }
log.debug("Navegating to... " + reqUrl); log.debug("Navegating to... " + reqUrl);
await page.goto(reqUrl, {waitUntil: 'networkidle0'}); await page.goto(reqUrl, {waitUntil: 'domcontentloaded'});
// detect cloudflare // detect cloudflare
const cloudflareRay = await page.$('.ray_id'); const cloudflareRay = await page.$('.ray_id');
if (cloudflareRay) { if (cloudflareRay) {
log.debug('Waiting for Cloudflare challenge...'); log.debug('Waiting for Cloudflare challenge...');
// page.waitForNavigation and page.waitFor don't work well because Cloudflare refresh the page while(Date.now() - startTimestamp < reqMaxTimeout) {
// await page.waitForNavigation({ waitUntil: 'networkidle0', timeout: 30000 })
// await page.waitFor(() => !document.querySelector('.ray_id'), {timeout: 30000});
// TODO: get maxTimeout from params
while(Date.now() - startTimestamp < 60000) {
await page.waitFor(1000); await page.waitFor(1000);
// TODO: catch exception timeout in waitForNavigation try {
await page.waitForNavigation({ waitUntil: 'domcontentloaded', timeout: 5000 }); // catch exception timeout in waitForNavigation
await page.waitForNavigation({ waitUntil: 'domcontentloaded', timeout: 5000 });
} catch (error) {}
const cloudflareRay = await page.$('.ray_id'); const cloudflareRay = await page.$('.ray_id');
if (!cloudflareRay) if (!cloudflareRay)
break; break;
// TODO: throw timeout exception when maxTimeout is exceded
} }
if (Date.now() - startTimestamp >= reqMaxTimeout) {
errorResponse("Maximum timeout reached. maxTimeout=" + reqMaxTimeout + " (ms)", res, startTimestamp);
return;
}
log.debug("Validating HTML code..."); log.debug("Validating HTML code...");
const html = await page.content(); const html = await page.content();
if (html.includes("captcha-bypass") || html.includes("__cf_chl_captcha_tk__")) { if (html.includes("captcha-bypass") || html.includes("__cf_chl_captcha_tk__")) {

17
package-lock.json generated
View File

@ -10,9 +10,9 @@
"integrity": "sha512-Q1y515GcOdTHgagaVFhHnIFQ38ygs/kmxdNpvpou+raI9UO3YZcHDngBSYKQklcKlvA7iuQlmIKbzvmxcOE9CQ==" "integrity": "sha512-Q1y515GcOdTHgagaVFhHnIFQ38ygs/kmxdNpvpou+raI9UO3YZcHDngBSYKQklcKlvA7iuQlmIKbzvmxcOE9CQ=="
}, },
"@types/node": { "@types/node": {
"version": "14.0.11", "version": "14.0.13",
"resolved": "https://registry.npmjs.org/@types/node/-/node-14.0.11.tgz", "resolved": "https://registry.npmjs.org/@types/node/-/node-14.0.13.tgz",
"integrity": "sha512-lCvvI24L21ZVeIiyIUHZ5Oflv1hhHQ5E1S25IRlKIXaRkVgmXpJMI3wUJkmym2bTbCe+WoIibQnMVAU3FguaOg==" "integrity": "sha512-rouEWBImiRaSJsVA+ITTFM6ZxibuAlTuNOCyxVbwreu6k6+ujs7DfnU9o+PShFhET78pMBl3eH+AGSI5eOTkPA=="
}, },
"@types/puppeteer": { "@types/puppeteer": {
"version": "3.0.0", "version": "3.0.0",
@ -133,9 +133,9 @@
} }
}, },
"extract-zip": { "extract-zip": {
"version": "2.0.0", "version": "2.0.1",
"resolved": "https://registry.npmjs.org/extract-zip/-/extract-zip-2.0.0.tgz", "resolved": "https://registry.npmjs.org/extract-zip/-/extract-zip-2.0.1.tgz",
"integrity": "sha512-i42GQ498yibjdvIhivUsRslx608whtGoFIhF26Z7O4MYncBxp8CwalOs1lnHy21A9sIohWO2+uiE4SRtC9JXDg==", "integrity": "sha512-GDhU9ntwuKyGXdZBUgTIe+vXnWj0fppUEtMDL0+idd5Sta8TGpHssn/eusA9mrPr9qNDym6SxAYZjNvCn/9RBg==",
"requires": { "requires": {
"@types/yauzl": "^2.9.1", "@types/yauzl": "^2.9.1",
"debug": "^4.1.1", "debug": "^4.1.1",
@ -491,6 +491,11 @@
"resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz", "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz",
"integrity": "sha1-RQ1Nyfpw3nMnYvvS1KKJgUGaDM8=" "integrity": "sha1-RQ1Nyfpw3nMnYvvS1KKJgUGaDM8="
}, },
"uuid": {
"version": "8.1.0",
"resolved": "https://registry.npmjs.org/uuid/-/uuid-8.1.0.tgz",
"integrity": "sha512-CI18flHDznR0lq54xBycOVmphdCYnQLKn8abKn7PXUiKUGdEd+/l9LWNJmugXel4hXq7S+RMNl34ecyC9TntWg=="
},
"wrappy": { "wrappy": {
"version": "1.0.2", "version": "1.0.2",
"resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz",

View File

@ -16,6 +16,7 @@
"console-log-level": "^1.4.1", "console-log-level": "^1.4.1",
"puppeteer": "^3.3.0", "puppeteer": "^3.3.0",
"puppeteer-extra": "^3.1.9", "puppeteer-extra": "^3.1.9",
"puppeteer-extra-plugin-stealth": "^2.4.9" "puppeteer-extra-plugin-stealth": "^2.4.9",
"uuid": "^8.1.0"
} }
} }