Add proxy support

This commit is contained in:
ngosang 2021-10-18 00:02:30 +02:00
parent ccfe21c15a
commit cfd158462f
5 changed files with 138 additions and 27 deletions

View File

@ -140,6 +140,7 @@ session | Optional. Will send the request from and existing browser instance. If
maxTimeout | Optional, default value 60000. Max timeout to solve the challenge in milliseconds.
cookies | Optional. Will be used by the headless browser. Follow [this](https://github.com/puppeteer/puppeteer/blob/v3.3.0/docs/api.md#pagesetcookiecookies) format.
returnOnlyCookies | Optional, default false. Only returns the cookies. Response data, headers and other parts of the response are removed.
proxy | Optional, default disabled. Eg: `"proxy": {"url": "http://127.0.0.1:8888"}`. Authorization (username/password) is not supported.
Example response from running the `curl` above:

View File

@ -12,11 +12,17 @@ interface V1Routes {
[key: string]: (params: V1RequestBase, response: V1ResponseBase) => Promise<void>
}
export interface Proxy {
url?: string
username?: string
password?: string
}
export interface V1RequestBase {
cmd: string
cookies?: SetCookie[],
maxTimeout?: number
proxy?: any// TODO: use interface not any
proxy?: Proxy
session: string
headers?: Headers // deprecated v2, not used
userAgent?: string // deprecated v2, not used

View File

@ -6,6 +6,7 @@ import {LaunchOptions, SetCookie, Browser} from 'puppeteer'
import log from './log'
import {deleteFolderRecursive, sleep} from './utils'
import {Proxy} from "../controllers/v1";
const puppeteer = require('puppeteer');
@ -34,14 +35,39 @@ function userDataDirFromId(id: string): string {
return path.join(os.tmpdir(), `/puppeteer_profile_${id}`)
}
function prepareBrowserProfile(id: string): string {
// TODO: maybe pass SessionCreateOptions for loading later?
function prepareBrowserProfile(id: string, proxy: Proxy): string {
const userDataDir = userDataDirFromId(id)
if (!fs.existsSync(userDataDir)) {
fs.mkdirSync(userDataDir, { recursive: true })
}
// proxy.url format => http://<host>:<port>
if (proxy && proxy.url) {
let [host, port] = proxy.url.replace(/https?:\/\//g, '').split(':');
let prefs = `
user_pref("browser.newtabpage.enabled", false);
user_pref("browser.startup.homepage", "about:blank");
user_pref("browser.tabs.warnOnClose", false);
user_pref("toolkit.telemetry.reportingpolicy.firstRun", false);
user_pref("trailhead.firstrun.branches", "nofirstrun-empty");
user_pref("browser.aboutwelcome.enabled", false);
user_pref("network.proxy.ftp", "${host}");
user_pref("network.proxy.ftp_port", ${port});
user_pref("network.proxy.http", "${host}");
user_pref("network.proxy.http_port", ${port});
user_pref("network.proxy.share_proxy_settings", true);
user_pref("network.proxy.socks", "${host}");
user_pref("network.proxy.socks_port", ${port});
user_pref("network.proxy.ssl", "${host}");
user_pref("network.proxy.ssl_port", ${port});
user_pref("network.proxy.type", 1);
`
fs.writeFileSync(path.join(userDataDir, './prefs.js'), prefs);
}
return userDataDir
}
@ -75,26 +101,15 @@ export async function create(session: string, options: SessionCreateOptions): Pr
// todo: cookies can't be set in the session, you need to open the page first
// todo: these args are only supported in chrome
let args = [
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage' // issue #45
];
if (options.proxy && options.proxy.url) {
args.push(`--proxy-server=${options.proxy.url}`);
}
const args: string[] = [];
const puppeteerOptions: LaunchOptions = {
product: 'firefox',
headless: process.env.HEADLESS !== 'false',
args
}
if (!options.oneTimeSession) {
log.debug('Creating userDataDir for session.')
puppeteerOptions.userDataDir = prepareBrowserProfile(sessionId)
}
log.debug('Creating userDataDir for session.')
puppeteerOptions.userDataDir = prepareBrowserProfile(sessionId, options.proxy)
// todo: fix native package with firefox
// if we are running inside executable binary, change browser path
@ -151,7 +166,7 @@ export async function destroy(id: string): Promise<boolean>{
const userDataDirPath = userDataDirFromId(id)
try {
// for some reason this keeps an error from being thrown in Windows, figures
await sleep(5000)
await sleep(100)
deleteFolderRecursive(userDataDirPath)
} catch (e) {
console.error(e)

View File

@ -47,17 +47,19 @@ async function resolveChallenge(params: V1Request, session: SessionsCacheItem):
// the user-agent is changed just for linux arm build
await page.setUserAgent(sessions.getUserAgent())
// todo: review
// set the proxy
if (params.proxy) {
log.debug("Apply proxy");
if (params.proxy.username) {
await page.authenticate({
username: params.proxy.username,
password: params.proxy.password
});
}
log.debug(`Using proxy: ${params.proxy.url}`);
// todo: credentials are not working
// if (params.proxy.username) {
// await page.authenticate({
// username: params.proxy.username,
// password: params.proxy.password
// });
// }
}
// go to the page
log.debug(`Navigating to... ${params.url}`)
let response: Response = await gotoPage(params, page);

View File

@ -194,7 +194,81 @@ describe("Test '/v1' path", () => {
expect(solution.userAgent).toBe(null)
});
test("Cmd 'request.get' should return timeout", async () => {
test("Cmd 'request.get' should return OK with 'proxy' param", async () => {
/*
To configure TinyProxy in local:
* sudo vim /etc/tinyproxy/tinyproxy.conf
* edit => LogFile "/tmp/tinyproxy.log"
* edit => Syslog Off
* sudo tinyproxy -d
* sudo tail -f /tmp/tinyproxy.log
*/
const payload = {
"cmd": "request.get",
"url": googleUrl,
"proxy": {
"url": "http://127.0.0.1:8888"
}
}
const response: Response = await request(app).post("/v1").send(payload);
expect(response.statusCode).toBe(200);
const apiResponse: V1ResponseSolution = response.body;
expect(apiResponse.status).toBe("ok");
const solution = apiResponse.solution;
expect(solution.url).toContain(googleUrl)
expect(solution.status).toBe(200);
});
// todo: credentials are not working
// test("Cmd 'request.get' should return OK with 'proxy' param with credentials", async () => {
// /*
// To configure TinyProxy in local:
// * sudo vim /etc/tinyproxy/tinyproxy.conf
// * edit => LogFile "/tmp/tinyproxy.log"
// * edit => Syslog Off
// * add => BasicAuth testuser testpass
// * sudo tinyproxy -d
// * sudo tail -f /tmp/tinyproxy.log
// */
// const payload = {
// "cmd": "request.get",
// "url": googleUrl,
// "proxy": {
// "url": "http://127.0.0.1:8888",
// "username": "testuser",
// "password": "testpass"
// }
// }
// const response: Response = await request(app).post("/v1").send(payload);
// expect(response.statusCode).toBe(200);
//
// const apiResponse: V1ResponseSolution = response.body;
// expect(apiResponse.status).toBe("ok");
//
// const solution = apiResponse.solution;
// expect(solution.url).toContain(googleUrl)
// expect(solution.status).toContain(200)
// });
test("Cmd 'request.get' should fail with wrong 'proxy' param", async () => {
const payload = {
"cmd": "request.get",
"url": googleUrl,
"proxy": {
"url": "http://127.0.0.1:43210"
}
}
const response: Response = await request(app).post("/v1").send(payload);
expect(response.statusCode).toBe(500);
const apiResponse: V1ResponseSolution = response.body;
expect(apiResponse.status).toBe("error");
expect(apiResponse.message).toBe("Error: Unable to process browser request. Error: Error: NS_ERROR_PROXY_CONNECTION_REFUSED at https://www.google.com");
});
test("Cmd 'request.get' should return fail with timeout", async () => {
const payload = {
"cmd": "request.get",
"url": googleUrl,
@ -211,6 +285,19 @@ describe("Test '/v1' path", () => {
expect(apiResponse.version).toBe(version);
});
test("Cmd 'request.get' should return fail with bad domain", async () => {
const payload = {
"cmd": "request.get",
"url": "https://www.google.combad",
}
const response: Response = await request(app).post("/v1").send(payload);
expect(response.statusCode).toBe(500);
const apiResponse: V1ResponseBase = response.body;
expect(apiResponse.status).toBe("error");
expect(apiResponse.message).toBe("Error: Unable to process browser request. Error: Error: NS_ERROR_UNKNOWN_HOST at https://www.google.combad");
});
test("Cmd 'request.get' should accept deprecated params", async () => {
const payload = {
"cmd": "request.get",