Replace Chrome with Firefox and minor improvements

This commit is contained in:
ngosang 2020-06-21 20:45:42 +02:00
parent a38064aa70
commit 5ed7c09160
5 changed files with 66 additions and 44 deletions

View File

@ -8,7 +8,7 @@ WORKDIR /home/node/flaresolverr
COPY package*.json ./
USER node
RUN npm install
RUN PUPPETEER_PRODUCT=firefox npm install
COPY --chown=node:node . .
ENV LOG_LEVEL=info

View File

@ -10,15 +10,18 @@ See the known issues section.
FlareSolverr starts a proxy server and it waits for user requests in idle state using few resources.
When some request arrives, it uses [puppeteer](https://github.com/puppeteer/puppeteer) with the
[stealth plugin](https://github.com/berstend/puppeteer-extra/tree/master/packages/puppeteer-extra-plugin-stealth)
to create an headless browser (Chrome). It opens the URL with user parameters and waits until the Cloudflare
to create an headless browser (Firefox). It opens the URL with user parameters and waits until the Cloudflare
challenge is solved (or timeout). The HTML code and the cookies are sent back to the user and those cookies can
be used to bypass Cloudflare using other HTTP clients.
NOTE: Web browsers consume a lot of memory. If you are running FlareSolverr on a machine with few RAM,
do not make many requests at once. With each request a new browser is launched.
### Installation
It requires NodeJS.
Run `npm install` to install FlareSolverr dependencies.
Run `PUPPETEER_PRODUCT=firefox npm install` to install FlareSolverr dependencies.
### Usage
@ -30,13 +33,16 @@ curl -L -X POST 'http://localhost:8191/v1' \
-H 'Content-Type: application/json' \
--data-raw '{
"url":"http://www.google.com/",
"userAgent": "Mozilla/5.0 (X11; Linux x86_64; rv:76.0) Gecko/20100101 Firefox/76.0"
"userAgent": "Mozilla/5.0 (X11; Linux x86_64; rv:76.0) Gecko/20100101 Firefox/76.0",
"maxTimeout": 60000
}'
```
Parameter | Notes
|--|--|
url | Mandatory
userAgent | Optional. Will be used by the headless browser
maxTimeout | Optional. Max timeout to solve the challenge
cookies | Optional. Will be used by the headless browser. Follow this format https://github.com/puppeteer/puppeteer/blob/v3.3.0/docs/api.md#pagesetcookiecookies
Example response:
```json
@ -106,8 +112,10 @@ I hope this will be fixed soon in the [puppeteer stealth plugin](https://github.
TODO:
* Fix remaining issues in the code (see TODOs)
* Make the maxTimeout configurable by the user
* Make the maxTimeout more accurate (count the time to open the first page)
* Add support for more HTTP methods (POST, PUT, DELETE ...)
* Add support for user HTTP headers
* Hide sensitive information in logs
* Reduce Docker image size
* Docker image for ARM architecture
* Install instructions for Windows

View File

@ -1,8 +1,12 @@
const os = require('os');
const path = require('path');
const fs = require('fs');
const { v1: uuidv1 } = require('uuid');
const log = require('console-log-level')(
{
level: process.env.LOG_LEVEL || 'info',
prefix: function (level) {
return reqCounter.toString() + " " + new Date().toISOString() + " " + level.toUpperCase();
return new Date().toISOString() + " " + level.toUpperCase() + " REQ-" + reqCounter;
}
});
const puppeteer = require('puppeteer-extra');
@ -13,19 +17,17 @@ const version = pjson.version;
const serverPort = process.env.PORT || 8191;
const serverHost = process.env.HOST || '0.0.0.0';
const logHtml = process.env.LOG_HTML || false;
let reqCounter = 0;
// setting "user-agent-override" evasion is not working for us because it can't be changed
// in each request. we set the user-agent in the browser args instead
puppeteer.use(StealthPlugin());
// Help logging
var reqCounter = 0;
http.createServer(function(req, res) {
reqCounter++;
const startTimestamp = Date.now();
log.info('Incoming request: ' + req.method + " " + req.url);
var body = [];
let body = [];
req.on('data', function(chunk) {
body.push(chunk);
}).on('end', function() {
@ -67,6 +69,16 @@ function errorResponse(errorMsg, res, startTimestamp) {
res.end();
}
function prepareBrowserProfile(userAgent) {
const userDataDir = path.join(os.tmpdir(), '/puppeteer_firefox_profile_' + uuidv1());
if (!fs.existsSync(userDataDir)) {
fs.mkdirSync(userDataDir, { recursive: true })
}
const prefs = `user_pref("general.useragent.override", "${userAgent}");`;
fs.writeFile(path.join(userDataDir, 'prefs.js'), prefs, () => {});
return userDataDir;
}
function validateIncomingRequest(params, req, res, startTimestamp) {
log.info('Params: ' + JSON.stringify(params));
@ -89,25 +101,19 @@ function validateIncomingRequest(params, req, res, startTimestamp) {
}
function processRequest(params, req, res, startTimestamp) {
puppeteerArgs = [
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-infobars',
'--window-position=0,0',
'--ignore-certifcate-errors',
'--ignore-certifcate-errors-spki-list'
];
let puppeteerOptions = {
product: 'firefox',
headless: true
};
const reqUserAgent = params["userAgent"];
if (reqUserAgent) {
log.debug('Using custom User-Agent: ' + reqUserAgent);
puppeteerArgs.push('--user-agent=' + reqUserAgent);
// TODO: remove the profile after closing the browser
puppeteerOptions['userDataDir'] = prepareBrowserProfile(reqUserAgent);
}
log.debug('Launching headless browser...');
puppeteer.launch({
headless: true,
args: puppeteerArgs
}).then(async browser => {
puppeteer.launch(puppeteerOptions).then(async browser => {
try {
await resolveCallenge(params, browser, res, startTimestamp);
} catch (error) {
@ -117,7 +123,7 @@ function processRequest(params, req, res, startTimestamp) {
}
}).catch(error => {
errorResponse(error.message, res, startTimestamp);
});;
});
}
async function resolveCallenge(params, browser, res, startTimestamp) {
@ -125,36 +131,38 @@ async function resolveCallenge(params, browser, res, startTimestamp) {
const userAgent = await page.evaluate(() => navigator.userAgent);
log.debug("User-Agent: " + userAgent);
const reqUrl = params["url"];
const reqMaxTimeout = params["maxTimeout"] || 60000;
const reqCookies = params["cookies"];
if (reqCookies) {
log.debug('Applying cookies');
log.debug('Using custom cookies');
await page.setCookie(...reqCookies);
}
log.debug("Navegating to... " + reqUrl);
await page.goto(reqUrl, {waitUntil: 'networkidle0'});
await page.goto(reqUrl, {waitUntil: 'domcontentloaded'});
// detect cloudflare
const cloudflareRay = await page.$('.ray_id');
if (cloudflareRay) {
log.debug('Waiting for Cloudflare challenge...');
// page.waitForNavigation and page.waitFor don't work well because Cloudflare refresh the page
// await page.waitForNavigation({ waitUntil: 'networkidle0', timeout: 30000 })
// await page.waitFor(() => !document.querySelector('.ray_id'), {timeout: 30000});
// TODO: get maxTimeout from params
while(Date.now() - startTimestamp < 60000) {
while(Date.now() - startTimestamp < reqMaxTimeout) {
await page.waitFor(1000);
// TODO: catch exception timeout in waitForNavigation
try {
// catch exception timeout in waitForNavigation
await page.waitForNavigation({ waitUntil: 'domcontentloaded', timeout: 5000 });
} catch (error) {}
const cloudflareRay = await page.$('.ray_id');
if (!cloudflareRay)
break;
}
// TODO: throw timeout exception when maxTimeout is exceded
if (Date.now() - startTimestamp >= reqMaxTimeout) {
errorResponse("Maximum timeout reached. maxTimeout=" + reqMaxTimeout + " (ms)", res, startTimestamp);
return;
}
log.debug("Validating HTML code...");

17
package-lock.json generated
View File

@ -10,9 +10,9 @@
"integrity": "sha512-Q1y515GcOdTHgagaVFhHnIFQ38ygs/kmxdNpvpou+raI9UO3YZcHDngBSYKQklcKlvA7iuQlmIKbzvmxcOE9CQ=="
},
"@types/node": {
"version": "14.0.11",
"resolved": "https://registry.npmjs.org/@types/node/-/node-14.0.11.tgz",
"integrity": "sha512-lCvvI24L21ZVeIiyIUHZ5Oflv1hhHQ5E1S25IRlKIXaRkVgmXpJMI3wUJkmym2bTbCe+WoIibQnMVAU3FguaOg=="
"version": "14.0.13",
"resolved": "https://registry.npmjs.org/@types/node/-/node-14.0.13.tgz",
"integrity": "sha512-rouEWBImiRaSJsVA+ITTFM6ZxibuAlTuNOCyxVbwreu6k6+ujs7DfnU9o+PShFhET78pMBl3eH+AGSI5eOTkPA=="
},
"@types/puppeteer": {
"version": "3.0.0",
@ -133,9 +133,9 @@
}
},
"extract-zip": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/extract-zip/-/extract-zip-2.0.0.tgz",
"integrity": "sha512-i42GQ498yibjdvIhivUsRslx608whtGoFIhF26Z7O4MYncBxp8CwalOs1lnHy21A9sIohWO2+uiE4SRtC9JXDg==",
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/extract-zip/-/extract-zip-2.0.1.tgz",
"integrity": "sha512-GDhU9ntwuKyGXdZBUgTIe+vXnWj0fppUEtMDL0+idd5Sta8TGpHssn/eusA9mrPr9qNDym6SxAYZjNvCn/9RBg==",
"requires": {
"@types/yauzl": "^2.9.1",
"debug": "^4.1.1",
@ -491,6 +491,11 @@
"resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz",
"integrity": "sha1-RQ1Nyfpw3nMnYvvS1KKJgUGaDM8="
},
"uuid": {
"version": "8.1.0",
"resolved": "https://registry.npmjs.org/uuid/-/uuid-8.1.0.tgz",
"integrity": "sha512-CI18flHDznR0lq54xBycOVmphdCYnQLKn8abKn7PXUiKUGdEd+/l9LWNJmugXel4hXq7S+RMNl34ecyC9TntWg=="
},
"wrappy": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz",

View File

@ -16,6 +16,7 @@
"console-log-level": "^1.4.1",
"puppeteer": "^3.3.0",
"puppeteer-extra": "^3.1.9",
"puppeteer-extra-plugin-stealth": "^2.4.9"
"puppeteer-extra-plugin-stealth": "^2.4.9",
"uuid": "^8.1.0"
}
}