diff --git a/Dockerfile b/Dockerfile index c62a149..69e5081 100644 --- a/Dockerfile +++ b/Dockerfile @@ -8,7 +8,7 @@ WORKDIR /home/node/flaresolverr COPY package*.json ./ USER node -RUN npm install +RUN PUPPETEER_PRODUCT=firefox npm install COPY --chown=node:node . . ENV LOG_LEVEL=info diff --git a/README.md b/README.md index 1fd9bd1..e659457 100644 --- a/README.md +++ b/README.md @@ -10,15 +10,18 @@ See the known issues section. FlareSolverr starts a proxy server and it waits for user requests in idle state using few resources. When some request arrives, it uses [puppeteer](https://github.com/puppeteer/puppeteer) with the [stealth plugin](https://github.com/berstend/puppeteer-extra/tree/master/packages/puppeteer-extra-plugin-stealth) -to create an headless browser (Chrome). It opens the URL with user parameters and waits until the Cloudflare +to create an headless browser (Firefox). It opens the URL with user parameters and waits until the Cloudflare challenge is solved (or timeout). The HTML code and the cookies are sent back to the user and those cookies can be used to bypass Cloudflare using other HTTP clients. +NOTE: Web browsers consume a lot of memory. If you are running FlareSolverr on a machine with few RAM, +do not make many requests at once. With each request a new browser is launched. + ### Installation It requires NodeJS. -Run `npm install` to install FlareSolverr dependencies. +Run `PUPPETEER_PRODUCT=firefox npm install` to install FlareSolverr dependencies. ### Usage @@ -30,13 +33,16 @@ curl -L -X POST 'http://localhost:8191/v1' \ -H 'Content-Type: application/json' \ --data-raw '{ "url":"http://www.google.com/", - "userAgent": "Mozilla/5.0 (X11; Linux x86_64; rv:76.0) Gecko/20100101 Firefox/76.0" + "userAgent": "Mozilla/5.0 (X11; Linux x86_64; rv:76.0) Gecko/20100101 Firefox/76.0", + "maxTimeout": 60000 }' ``` Parameter | Notes |--|--| url | Mandatory userAgent | Optional. Will be used by the headless browser +maxTimeout | Optional. Max timeout to solve the challenge +cookies | Optional. Will be used by the headless browser. Follow this format https://github.com/puppeteer/puppeteer/blob/v3.3.0/docs/api.md#pagesetcookiecookies Example response: ```json @@ -106,8 +112,10 @@ I hope this will be fixed soon in the [puppeteer stealth plugin](https://github. TODO: * Fix remaining issues in the code (see TODOs) -* Make the maxTimeout configurable by the user +* Make the maxTimeout more accurate (count the time to open the first page) * Add support for more HTTP methods (POST, PUT, DELETE ...) * Add support for user HTTP headers * Hide sensitive information in logs * Reduce Docker image size +* Docker image for ARM architecture +* Install instructions for Windows diff --git a/index.js b/index.js index 65c2519..bf53770 100755 --- a/index.js +++ b/index.js @@ -1,8 +1,12 @@ +const os = require('os'); +const path = require('path'); +const fs = require('fs'); +const { v1: uuidv1 } = require('uuid'); const log = require('console-log-level')( { level: process.env.LOG_LEVEL || 'info', prefix: function (level) { - return reqCounter.toString() + " " + new Date().toISOString() + " " + level.toUpperCase(); + return new Date().toISOString() + " " + level.toUpperCase() + " REQ-" + reqCounter; } }); const puppeteer = require('puppeteer-extra'); @@ -13,19 +17,17 @@ const version = pjson.version; const serverPort = process.env.PORT || 8191; const serverHost = process.env.HOST || '0.0.0.0'; const logHtml = process.env.LOG_HTML || false; +let reqCounter = 0; // setting "user-agent-override" evasion is not working for us because it can't be changed // in each request. we set the user-agent in the browser args instead puppeteer.use(StealthPlugin()); -// Help logging -var reqCounter = 0; - http.createServer(function(req, res) { reqCounter++; const startTimestamp = Date.now(); log.info('Incoming request: ' + req.method + " " + req.url); - var body = []; + let body = []; req.on('data', function(chunk) { body.push(chunk); }).on('end', function() { @@ -67,6 +69,16 @@ function errorResponse(errorMsg, res, startTimestamp) { res.end(); } +function prepareBrowserProfile(userAgent) { + const userDataDir = path.join(os.tmpdir(), '/puppeteer_firefox_profile_' + uuidv1()); + if (!fs.existsSync(userDataDir)) { + fs.mkdirSync(userDataDir, { recursive: true }) + } + const prefs = `user_pref("general.useragent.override", "${userAgent}");`; + fs.writeFile(path.join(userDataDir, 'prefs.js'), prefs, () => {}); + return userDataDir; +} + function validateIncomingRequest(params, req, res, startTimestamp) { log.info('Params: ' + JSON.stringify(params)); @@ -89,25 +101,19 @@ function validateIncomingRequest(params, req, res, startTimestamp) { } function processRequest(params, req, res, startTimestamp) { - puppeteerArgs = [ - '--no-sandbox', - '--disable-setuid-sandbox', - '--disable-infobars', - '--window-position=0,0', - '--ignore-certifcate-errors', - '--ignore-certifcate-errors-spki-list' - ]; + let puppeteerOptions = { + product: 'firefox', + headless: true + }; const reqUserAgent = params["userAgent"]; if (reqUserAgent) { log.debug('Using custom User-Agent: ' + reqUserAgent); - puppeteerArgs.push('--user-agent=' + reqUserAgent); + // TODO: remove the profile after closing the browser + puppeteerOptions['userDataDir'] = prepareBrowserProfile(reqUserAgent); } log.debug('Launching headless browser...'); - puppeteer.launch({ - headless: true, - args: puppeteerArgs - }).then(async browser => { + puppeteer.launch(puppeteerOptions).then(async browser => { try { await resolveCallenge(params, browser, res, startTimestamp); } catch (error) { @@ -117,7 +123,7 @@ function processRequest(params, req, res, startTimestamp) { } }).catch(error => { errorResponse(error.message, res, startTimestamp); - });; + }); } async function resolveCallenge(params, browser, res, startTimestamp) { @@ -125,38 +131,40 @@ async function resolveCallenge(params, browser, res, startTimestamp) { const userAgent = await page.evaluate(() => navigator.userAgent); log.debug("User-Agent: " + userAgent); const reqUrl = params["url"]; + const reqMaxTimeout = params["maxTimeout"] || 60000; const reqCookies = params["cookies"]; if (reqCookies) { - log.debug('Applying cookies'); + log.debug('Using custom cookies'); await page.setCookie(...reqCookies); } log.debug("Navegating to... " + reqUrl); - await page.goto(reqUrl, {waitUntil: 'networkidle0'}); + await page.goto(reqUrl, {waitUntil: 'domcontentloaded'}); // detect cloudflare const cloudflareRay = await page.$('.ray_id'); if (cloudflareRay) { log.debug('Waiting for Cloudflare challenge...'); - // page.waitForNavigation and page.waitFor don't work well because Cloudflare refresh the page - // await page.waitForNavigation({ waitUntil: 'networkidle0', timeout: 30000 }) - // await page.waitFor(() => !document.querySelector('.ray_id'), {timeout: 30000}); - - // TODO: get maxTimeout from params - while(Date.now() - startTimestamp < 60000) { + while(Date.now() - startTimestamp < reqMaxTimeout) { await page.waitFor(1000); - // TODO: catch exception timeout in waitForNavigation - await page.waitForNavigation({ waitUntil: 'domcontentloaded', timeout: 5000 }); + try { + // catch exception timeout in waitForNavigation + await page.waitForNavigation({ waitUntil: 'domcontentloaded', timeout: 5000 }); + } catch (error) {} + const cloudflareRay = await page.$('.ray_id'); if (!cloudflareRay) break; - - // TODO: throw timeout exception when maxTimeout is exceded } - + + if (Date.now() - startTimestamp >= reqMaxTimeout) { + errorResponse("Maximum timeout reached. maxTimeout=" + reqMaxTimeout + " (ms)", res, startTimestamp); + return; + } + log.debug("Validating HTML code..."); const html = await page.content(); if (html.includes("captcha-bypass") || html.includes("__cf_chl_captcha_tk__")) { diff --git a/package-lock.json b/package-lock.json index 8d0f0b8..f93e198 100644 --- a/package-lock.json +++ b/package-lock.json @@ -10,9 +10,9 @@ "integrity": "sha512-Q1y515GcOdTHgagaVFhHnIFQ38ygs/kmxdNpvpou+raI9UO3YZcHDngBSYKQklcKlvA7iuQlmIKbzvmxcOE9CQ==" }, "@types/node": { - "version": "14.0.11", - "resolved": "https://registry.npmjs.org/@types/node/-/node-14.0.11.tgz", - "integrity": "sha512-lCvvI24L21ZVeIiyIUHZ5Oflv1hhHQ5E1S25IRlKIXaRkVgmXpJMI3wUJkmym2bTbCe+WoIibQnMVAU3FguaOg==" + "version": "14.0.13", + "resolved": "https://registry.npmjs.org/@types/node/-/node-14.0.13.tgz", + "integrity": "sha512-rouEWBImiRaSJsVA+ITTFM6ZxibuAlTuNOCyxVbwreu6k6+ujs7DfnU9o+PShFhET78pMBl3eH+AGSI5eOTkPA==" }, "@types/puppeteer": { "version": "3.0.0", @@ -133,9 +133,9 @@ } }, "extract-zip": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/extract-zip/-/extract-zip-2.0.0.tgz", - "integrity": "sha512-i42GQ498yibjdvIhivUsRslx608whtGoFIhF26Z7O4MYncBxp8CwalOs1lnHy21A9sIohWO2+uiE4SRtC9JXDg==", + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/extract-zip/-/extract-zip-2.0.1.tgz", + "integrity": "sha512-GDhU9ntwuKyGXdZBUgTIe+vXnWj0fppUEtMDL0+idd5Sta8TGpHssn/eusA9mrPr9qNDym6SxAYZjNvCn/9RBg==", "requires": { "@types/yauzl": "^2.9.1", "debug": "^4.1.1", @@ -491,6 +491,11 @@ "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz", "integrity": "sha1-RQ1Nyfpw3nMnYvvS1KKJgUGaDM8=" }, + "uuid": { + "version": "8.1.0", + "resolved": "https://registry.npmjs.org/uuid/-/uuid-8.1.0.tgz", + "integrity": "sha512-CI18flHDznR0lq54xBycOVmphdCYnQLKn8abKn7PXUiKUGdEd+/l9LWNJmugXel4hXq7S+RMNl34ecyC9TntWg==" + }, "wrappy": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", diff --git a/package.json b/package.json index 76b9fd4..219490a 100644 --- a/package.json +++ b/package.json @@ -16,6 +16,7 @@ "console-log-level": "^1.4.1", "puppeteer": "^3.3.0", "puppeteer-extra": "^3.1.9", - "puppeteer-extra-plugin-stealth": "^2.4.9" + "puppeteer-extra-plugin-stealth": "^2.4.9", + "uuid": "^8.1.0" } }