From cfd158462fec8586b85042728cf70a63af2e50ea Mon Sep 17 00:00:00 2001 From: ngosang Date: Mon, 18 Oct 2021 00:02:30 +0200 Subject: [PATCH] Add proxy support --- README.md | 1 + src/controllers/v1.ts | 8 +++- src/services/sessions.ts | 49 ++++++++++++++-------- src/services/solver.ts | 18 ++++---- src/tests/app.test.ts | 89 +++++++++++++++++++++++++++++++++++++++- 5 files changed, 138 insertions(+), 27 deletions(-) diff --git a/README.md b/README.md index d8dfd40..5e70ab9 100644 --- a/README.md +++ b/README.md @@ -140,6 +140,7 @@ session | Optional. Will send the request from and existing browser instance. If maxTimeout | Optional, default value 60000. Max timeout to solve the challenge in milliseconds. cookies | Optional. Will be used by the headless browser. Follow [this](https://github.com/puppeteer/puppeteer/blob/v3.3.0/docs/api.md#pagesetcookiecookies) format. returnOnlyCookies | Optional, default false. Only returns the cookies. Response data, headers and other parts of the response are removed. +proxy | Optional, default disabled. Eg: `"proxy": {"url": "http://127.0.0.1:8888"}`. Authorization (username/password) is not supported. Example response from running the `curl` above: diff --git a/src/controllers/v1.ts b/src/controllers/v1.ts index c733fd2..88c3b0c 100644 --- a/src/controllers/v1.ts +++ b/src/controllers/v1.ts @@ -12,11 +12,17 @@ interface V1Routes { [key: string]: (params: V1RequestBase, response: V1ResponseBase) => Promise } +export interface Proxy { + url?: string + username?: string + password?: string +} + export interface V1RequestBase { cmd: string cookies?: SetCookie[], maxTimeout?: number - proxy?: any// TODO: use interface not any + proxy?: Proxy session: string headers?: Headers // deprecated v2, not used userAgent?: string // deprecated v2, not used diff --git a/src/services/sessions.ts b/src/services/sessions.ts index 60291c2..4d22cb6 100644 --- a/src/services/sessions.ts +++ b/src/services/sessions.ts @@ -6,6 +6,7 @@ import {LaunchOptions, SetCookie, Browser} from 'puppeteer' import log from './log' import {deleteFolderRecursive, sleep} from './utils' +import {Proxy} from "../controllers/v1"; const puppeteer = require('puppeteer'); @@ -34,14 +35,39 @@ function userDataDirFromId(id: string): string { return path.join(os.tmpdir(), `/puppeteer_profile_${id}`) } -function prepareBrowserProfile(id: string): string { - // TODO: maybe pass SessionCreateOptions for loading later? +function prepareBrowserProfile(id: string, proxy: Proxy): string { const userDataDir = userDataDirFromId(id) if (!fs.existsSync(userDataDir)) { fs.mkdirSync(userDataDir, { recursive: true }) } + // proxy.url format => http://: + if (proxy && proxy.url) { + let [host, port] = proxy.url.replace(/https?:\/\//g, '').split(':'); + + let prefs = ` + user_pref("browser.newtabpage.enabled", false); + user_pref("browser.startup.homepage", "about:blank"); + user_pref("browser.tabs.warnOnClose", false); + user_pref("toolkit.telemetry.reportingpolicy.firstRun", false); + user_pref("trailhead.firstrun.branches", "nofirstrun-empty"); + user_pref("browser.aboutwelcome.enabled", false); + user_pref("network.proxy.ftp", "${host}"); + user_pref("network.proxy.ftp_port", ${port}); + user_pref("network.proxy.http", "${host}"); + user_pref("network.proxy.http_port", ${port}); + user_pref("network.proxy.share_proxy_settings", true); + user_pref("network.proxy.socks", "${host}"); + user_pref("network.proxy.socks_port", ${port}); + user_pref("network.proxy.ssl", "${host}"); + user_pref("network.proxy.ssl_port", ${port}); + user_pref("network.proxy.type", 1); + ` + + fs.writeFileSync(path.join(userDataDir, './prefs.js'), prefs); + } + return userDataDir } @@ -75,26 +101,15 @@ export async function create(session: string, options: SessionCreateOptions): Pr // todo: cookies can't be set in the session, you need to open the page first - // todo: these args are only supported in chrome - let args = [ - '--no-sandbox', - '--disable-setuid-sandbox', - '--disable-dev-shm-usage' // issue #45 - ]; - if (options.proxy && options.proxy.url) { - args.push(`--proxy-server=${options.proxy.url}`); - } - + const args: string[] = []; const puppeteerOptions: LaunchOptions = { product: 'firefox', headless: process.env.HEADLESS !== 'false', args } - if (!options.oneTimeSession) { - log.debug('Creating userDataDir for session.') - puppeteerOptions.userDataDir = prepareBrowserProfile(sessionId) - } + log.debug('Creating userDataDir for session.') + puppeteerOptions.userDataDir = prepareBrowserProfile(sessionId, options.proxy) // todo: fix native package with firefox // if we are running inside executable binary, change browser path @@ -151,7 +166,7 @@ export async function destroy(id: string): Promise{ const userDataDirPath = userDataDirFromId(id) try { // for some reason this keeps an error from being thrown in Windows, figures - await sleep(5000) + await sleep(100) deleteFolderRecursive(userDataDirPath) } catch (e) { console.error(e) diff --git a/src/services/solver.ts b/src/services/solver.ts index c2acfd1..ada736a 100644 --- a/src/services/solver.ts +++ b/src/services/solver.ts @@ -47,17 +47,19 @@ async function resolveChallenge(params: V1Request, session: SessionsCacheItem): // the user-agent is changed just for linux arm build await page.setUserAgent(sessions.getUserAgent()) - // todo: review + // set the proxy if (params.proxy) { - log.debug("Apply proxy"); - if (params.proxy.username) { - await page.authenticate({ - username: params.proxy.username, - password: params.proxy.password - }); - } + log.debug(`Using proxy: ${params.proxy.url}`); + // todo: credentials are not working + // if (params.proxy.username) { + // await page.authenticate({ + // username: params.proxy.username, + // password: params.proxy.password + // }); + // } } + // go to the page log.debug(`Navigating to... ${params.url}`) let response: Response = await gotoPage(params, page); diff --git a/src/tests/app.test.ts b/src/tests/app.test.ts index a89af2f..e34962c 100644 --- a/src/tests/app.test.ts +++ b/src/tests/app.test.ts @@ -194,7 +194,81 @@ describe("Test '/v1' path", () => { expect(solution.userAgent).toBe(null) }); - test("Cmd 'request.get' should return timeout", async () => { + test("Cmd 'request.get' should return OK with 'proxy' param", async () => { + /* + To configure TinyProxy in local: + * sudo vim /etc/tinyproxy/tinyproxy.conf + * edit => LogFile "/tmp/tinyproxy.log" + * edit => Syslog Off + * sudo tinyproxy -d + * sudo tail -f /tmp/tinyproxy.log + */ + const payload = { + "cmd": "request.get", + "url": googleUrl, + "proxy": { + "url": "http://127.0.0.1:8888" + } + } + const response: Response = await request(app).post("/v1").send(payload); + expect(response.statusCode).toBe(200); + + const apiResponse: V1ResponseSolution = response.body; + expect(apiResponse.status).toBe("ok"); + + const solution = apiResponse.solution; + expect(solution.url).toContain(googleUrl) + expect(solution.status).toBe(200); + }); + + // todo: credentials are not working + // test("Cmd 'request.get' should return OK with 'proxy' param with credentials", async () => { + // /* + // To configure TinyProxy in local: + // * sudo vim /etc/tinyproxy/tinyproxy.conf + // * edit => LogFile "/tmp/tinyproxy.log" + // * edit => Syslog Off + // * add => BasicAuth testuser testpass + // * sudo tinyproxy -d + // * sudo tail -f /tmp/tinyproxy.log + // */ + // const payload = { + // "cmd": "request.get", + // "url": googleUrl, + // "proxy": { + // "url": "http://127.0.0.1:8888", + // "username": "testuser", + // "password": "testpass" + // } + // } + // const response: Response = await request(app).post("/v1").send(payload); + // expect(response.statusCode).toBe(200); + // + // const apiResponse: V1ResponseSolution = response.body; + // expect(apiResponse.status).toBe("ok"); + // + // const solution = apiResponse.solution; + // expect(solution.url).toContain(googleUrl) + // expect(solution.status).toContain(200) + // }); + + test("Cmd 'request.get' should fail with wrong 'proxy' param", async () => { + const payload = { + "cmd": "request.get", + "url": googleUrl, + "proxy": { + "url": "http://127.0.0.1:43210" + } + } + const response: Response = await request(app).post("/v1").send(payload); + expect(response.statusCode).toBe(500); + + const apiResponse: V1ResponseSolution = response.body; + expect(apiResponse.status).toBe("error"); + expect(apiResponse.message).toBe("Error: Unable to process browser request. Error: Error: NS_ERROR_PROXY_CONNECTION_REFUSED at https://www.google.com"); + }); + + test("Cmd 'request.get' should return fail with timeout", async () => { const payload = { "cmd": "request.get", "url": googleUrl, @@ -211,6 +285,19 @@ describe("Test '/v1' path", () => { expect(apiResponse.version).toBe(version); }); + test("Cmd 'request.get' should return fail with bad domain", async () => { + const payload = { + "cmd": "request.get", + "url": "https://www.google.combad", + } + const response: Response = await request(app).post("/v1").send(payload); + expect(response.statusCode).toBe(500); + + const apiResponse: V1ResponseBase = response.body; + expect(apiResponse.status).toBe("error"); + expect(apiResponse.message).toBe("Error: Unable to process browser request. Error: Error: NS_ERROR_UNKNOWN_HOST at https://www.google.combad"); + }); + test("Cmd 'request.get' should accept deprecated params", async () => { const payload = { "cmd": "request.get",