mirror of
https://github.com/FlareSolverr/FlareSolverr.git
synced 2025-06-08 04:25:25 +00:00
Code clean up, remove returnRawHtml, download, headers params
This commit is contained in:
parent
a0e897067a
commit
a5b3e08e1f
18
README.md
18
README.md
@ -86,10 +86,7 @@ curl -L -X POST 'http://localhost:8191/v1' \
|
||||
--data-raw '{
|
||||
"cmd": "request.get",
|
||||
"url":"http://www.google.com/",
|
||||
"maxTimeout": 60000,
|
||||
"headers": {
|
||||
"X-Test": "Testing 123..."
|
||||
}
|
||||
"maxTimeout": 60000
|
||||
}'
|
||||
```
|
||||
|
||||
@ -140,11 +137,9 @@ Parameter | Notes
|
||||
|--|--|
|
||||
url | Mandatory
|
||||
session | Optional. Will send the request from and existing browser instance. If one is not sent it will create a temporary instance that will be destroyed immediately after the request is completed.
|
||||
headers | Optional. To specify user headers.
|
||||
maxTimeout | Optional, default value 60000. Max timeout to solve the challenge in milliseconds.
|
||||
cookies | Optional. Will be used by the headless browser. Follow [this](https://github.com/puppeteer/puppeteer/blob/v3.3.0/docs/api.md#pagesetcookiecookies) format.
|
||||
returnOnlyCookies | Optional, default false. Only returns the cookies. Response data, headers and other parts of the response are removed.
|
||||
returnRawHtml | Optional, default false. The response data will be returned without JS processing. This is useful for JSON or plain text content.
|
||||
|
||||
Example response from running the `curl` above:
|
||||
|
||||
@ -211,16 +206,7 @@ This is the same as `request.get` but it takes one more param:
|
||||
|
||||
Parameter | Notes
|
||||
|--|--|
|
||||
postData | Must be a string. If you want to POST a form, don't forget to set the `Content-Type` header to `application/x-www-form-urlencoded` or the server might not understand your request.
|
||||
|
||||
### Download small files
|
||||
|
||||
If you need to access an image/pdf or small file, you should pass the `download` parameter to `request.get` setting it
|
||||
to `true`. Rather than access the html and return text it will return the buffer **base64** encoded which you will be
|
||||
able to decode and save the image/pdf.
|
||||
|
||||
This method isn't recommended for videos or anything larger. As that should be streamed back to the client and at the
|
||||
moment there is nothing setup to do so. If this is something you need feel free to create an issue and/or submit a PR.
|
||||
postData | Must be a string. If you want to POST a form with format `application/x-www-form-urlencoded`.
|
||||
|
||||
## Environment variables
|
||||
|
||||
|
@ -15,11 +15,11 @@ interface V1Routes {
|
||||
export interface V1RequestBase {
|
||||
cmd: string
|
||||
cookies?: SetCookie[],
|
||||
headers?: Headers
|
||||
maxTimeout?: number
|
||||
proxy?: any// TODO: use interface not any
|
||||
session: string
|
||||
userAgent?: string // deprecated, not used
|
||||
headers?: Headers // deprecated v2, not used
|
||||
userAgent?: string // deprecated v2, not used
|
||||
}
|
||||
|
||||
interface V1RequestSession extends V1RequestBase {
|
||||
@ -29,9 +29,9 @@ export interface V1Request extends V1RequestBase {
|
||||
url: string
|
||||
method?: HttpMethod
|
||||
postData?: string
|
||||
download?: boolean
|
||||
returnOnlyCookies?: boolean
|
||||
returnRawHtml?: boolean
|
||||
download?: boolean // deprecated v2, not used
|
||||
returnRawHtml?: boolean // deprecated v2, not used
|
||||
}
|
||||
|
||||
export interface V1ResponseBase {
|
||||
@ -59,7 +59,6 @@ export const routes: V1Routes = {
|
||||
const options: SessionCreateOptions = {
|
||||
oneTimeSession: false,
|
||||
cookies: params.cookies,
|
||||
headers: params.headers,
|
||||
maxTimeout: params.maxTimeout,
|
||||
proxy: params.proxy
|
||||
}
|
||||
@ -87,12 +86,15 @@ export const routes: V1Routes = {
|
||||
},
|
||||
'request.get': async (params: V1Request, response: V1ResponseSolution): Promise<void> => {
|
||||
params.method = 'GET'
|
||||
if (params.userAgent) {
|
||||
log.warn('Request parameter "userAgent" was removed in FlareSolverr v2.')
|
||||
}
|
||||
if (params.postData) {
|
||||
throw Error('Cannot use "postBody" when sending a GET request.')
|
||||
}
|
||||
if (params.returnRawHtml) {
|
||||
log.warn("Request parameter 'returnRawHtml' was removed in FlareSolverr v2.")
|
||||
}
|
||||
if (params.download) {
|
||||
log.warn("Request parameter 'download' was removed in FlareSolverr v2.")
|
||||
}
|
||||
const result: ChallengeResolutionT = await browserRequest(params)
|
||||
|
||||
response.status = result.status;
|
||||
@ -104,12 +106,15 @@ export const routes: V1Routes = {
|
||||
},
|
||||
'request.post': async (params: V1Request, response: V1ResponseSolution): Promise<void> => {
|
||||
params.method = 'POST'
|
||||
if (params.userAgent) {
|
||||
log.warn('Request parameter "userAgent" was removed in FlareSolverr v2.')
|
||||
}
|
||||
if (!params.postData) {
|
||||
throw Error('Must send param "postBody" when sending a POST request.')
|
||||
}
|
||||
if (params.returnRawHtml) {
|
||||
log.warn("Request parameter 'returnRawHtml' was removed in FlareSolverr v2.")
|
||||
}
|
||||
if (params.download) {
|
||||
log.warn("Request parameter 'download' was removed in FlareSolverr v2.")
|
||||
}
|
||||
const result: ChallengeResolutionT = await browserRequest(params)
|
||||
|
||||
response.status = result.status;
|
||||
@ -133,8 +138,15 @@ export async function controllerV1(req: Request, res: Response): Promise<void> {
|
||||
try {
|
||||
const params: V1RequestBase = req.body
|
||||
if (!params.cmd) {
|
||||
throw Error("Parameter 'cmd' is mandatory.")
|
||||
throw Error("Request parameter 'cmd' is mandatory.")
|
||||
}
|
||||
if (params.headers) {
|
||||
log.warn("Request parameter 'headers' was removed in FlareSolverr v2.")
|
||||
}
|
||||
if (params.userAgent) {
|
||||
log.warn("Request parameter 'userAgent' was removed in FlareSolverr v2.")
|
||||
}
|
||||
|
||||
const route = routes[params.cmd]
|
||||
if (route) {
|
||||
await route(params, response)
|
||||
|
@ -2,22 +2,17 @@ import {v1 as UUIDv1} from 'uuid'
|
||||
import * as os from 'os'
|
||||
import * as path from 'path'
|
||||
import * as fs from 'fs'
|
||||
import {LaunchOptions, Headers, SetCookie, Browser} from 'puppeteer'
|
||||
import {LaunchOptions, SetCookie, Browser} from 'puppeteer'
|
||||
|
||||
import log from './log'
|
||||
import {deleteFolderRecursive, sleep, removeEmptyFields} from './utils'
|
||||
import {deleteFolderRecursive, sleep} from './utils'
|
||||
|
||||
const puppeteer = require('puppeteer');
|
||||
|
||||
interface SessionPageDefaults {
|
||||
headers?: Headers
|
||||
}
|
||||
|
||||
export interface SessionsCacheItem {
|
||||
sessionId: string
|
||||
browser: Browser
|
||||
userDataDir?: string
|
||||
defaults: SessionPageDefaults
|
||||
}
|
||||
|
||||
interface SessionsCache {
|
||||
@ -27,7 +22,6 @@ interface SessionsCache {
|
||||
export interface SessionCreateOptions {
|
||||
oneTimeSession: boolean
|
||||
cookies?: SetCookie[],
|
||||
headers?: Headers
|
||||
maxTimeout?: number
|
||||
proxy?: any// TODO: use interface not any
|
||||
}
|
||||
@ -79,6 +73,8 @@ export async function testWebBrowserInstallation(): Promise<void> {
|
||||
export async function create(session: string, options: SessionCreateOptions): Promise<SessionsCacheItem> {
|
||||
const sessionId = session || UUIDv1()
|
||||
|
||||
// todo: cookies can't be set in the session, you need to open the page first
|
||||
|
||||
// todo: these args are only supported in chrome
|
||||
let args = [
|
||||
'--no-sandbox',
|
||||
@ -126,18 +122,14 @@ export async function create(session: string, options: SessionCreateOptions): Pr
|
||||
}
|
||||
}
|
||||
|
||||
if (!browser) { throw Error(`Failed to launch browser 3 times in a row.`) }
|
||||
|
||||
if (options.cookies) {
|
||||
const page = await browser.newPage()
|
||||
await page.setCookie(...options.cookies)
|
||||
if (!browser) {
|
||||
throw Error(`Failed to launch browser 3 times in a row.`)
|
||||
}
|
||||
|
||||
sessionCache[sessionId] = {
|
||||
sessionId: sessionId,
|
||||
browser: browser,
|
||||
userDataDir: puppeteerOptions.userDataDir,
|
||||
defaults: removeEmptyFields(options) // todo: review
|
||||
userDataDir: puppeteerOptions.userDataDir
|
||||
}
|
||||
|
||||
return sessionCache[sessionId]
|
||||
|
@ -1,8 +1,8 @@
|
||||
import {Response, Headers, Page, Browser} from 'puppeteer'
|
||||
import {Response, Headers, Page} from 'puppeteer'
|
||||
const Timeout = require('await-timeout');
|
||||
|
||||
import log from './log'
|
||||
import {SessionsCacheItem} from "./sessions";
|
||||
import {SessionCreateOptions, SessionsCacheItem} from "./sessions";
|
||||
import {V1Request} from "../controllers/v1";
|
||||
import cloudflareProvider from '../providers/cloudflare';
|
||||
|
||||
@ -23,22 +23,11 @@ export interface ChallengeResolutionT {
|
||||
result: ChallengeResolutionResultT
|
||||
}
|
||||
|
||||
// interface OverrideResolvers {
|
||||
// method?: (request: Request) => HttpMethod,
|
||||
// postData?: (request: Request) => string,
|
||||
// headers?: (request: Request) => Headers
|
||||
// }
|
||||
//
|
||||
// type OverridesProps =
|
||||
// 'method' |
|
||||
// 'postData' |
|
||||
// 'headers'
|
||||
|
||||
async function resolveChallengeWithTimeout(params: V1Request, page: Page) {
|
||||
async function resolveChallengeWithTimeout(params: V1Request, session: SessionsCacheItem) {
|
||||
const maxTimeout = params.maxTimeout || 60000
|
||||
const timer = new Timeout();
|
||||
try {
|
||||
const promise = resolveChallenge(params, page);
|
||||
const promise = resolveChallenge(params, session);
|
||||
return await Promise.race([
|
||||
promise,
|
||||
timer.set(maxTimeout, `Maximum timeout reached. maxTimeout=${maxTimeout} (ms)`)
|
||||
@ -48,167 +37,107 @@ async function resolveChallengeWithTimeout(params: V1Request, page: Page) {
|
||||
}
|
||||
}
|
||||
|
||||
async function resolveChallenge({ url, proxy, download, returnOnlyCookies, returnRawHtml }: V1Request,
|
||||
page: Page): Promise<ChallengeResolutionT | void> {
|
||||
|
||||
let status = 'ok'
|
||||
let message = ''
|
||||
|
||||
if (proxy) {
|
||||
log.debug("Apply proxy");
|
||||
if (proxy.username)
|
||||
await page.authenticate({ username: proxy.username, password: proxy.password });
|
||||
}
|
||||
|
||||
log.debug(`Navigating to... ${url}`)
|
||||
let response: Response = await page.goto(url, { waitUntil: 'domcontentloaded' })
|
||||
log.html(await page.content())
|
||||
|
||||
// Detect protection services and solve challenges
|
||||
async function resolveChallenge(params: V1Request, session: SessionsCacheItem): Promise<ChallengeResolutionT | void> {
|
||||
try {
|
||||
response = await cloudflareProvider(url, page, response);
|
||||
} catch (e) {
|
||||
status = "error";
|
||||
message = "Cloudflare " + e.toString();
|
||||
}
|
||||
let status = 'ok'
|
||||
let message = ''
|
||||
|
||||
const payload: ChallengeResolutionT = {
|
||||
status,
|
||||
message,
|
||||
result: {
|
||||
url: page.url(),
|
||||
status: response.status(),
|
||||
headers: response.headers(),
|
||||
response: null,
|
||||
cookies: await page.cookies(),
|
||||
userAgent: await page.evaluate(() => navigator.userAgent)
|
||||
const page: Page = await session.browser.newPage()
|
||||
|
||||
// the user-agent is changed just for linux arm build
|
||||
await page.setUserAgent(sessions.getUserAgent())
|
||||
|
||||
// todo: review
|
||||
if (params.proxy) {
|
||||
log.debug("Apply proxy");
|
||||
if (params.proxy.username) {
|
||||
await page.authenticate({
|
||||
username: params.proxy.username,
|
||||
password: params.proxy.password
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (returnOnlyCookies) {
|
||||
payload.result.headers = null;
|
||||
payload.result.userAgent = null;
|
||||
} else {
|
||||
if (download) {
|
||||
// for some reason we get an error unless we reload the page
|
||||
// has something to do with a stale buffer and this is the quickest
|
||||
// fix since I am short on time
|
||||
response = await page.goto(url, { waitUntil: 'domcontentloaded' })
|
||||
payload.result.response = (await response.buffer()).toString('base64')
|
||||
log.debug(`Navigating to... ${params.url}`)
|
||||
let response: Response = await page.goto(params.url, { waitUntil: 'domcontentloaded' })
|
||||
|
||||
// todo: review this functionality
|
||||
// } else if (returnRawHtml) {
|
||||
// payload.result.response = await response.text()
|
||||
// set cookies
|
||||
if (params.cookies) {
|
||||
for (const cookie of params.cookies) {
|
||||
// the other fields in the cookie can cause issues
|
||||
await page.setCookie({
|
||||
"name": cookie.name,
|
||||
"value": cookie.value
|
||||
})
|
||||
}
|
||||
// reload the page
|
||||
response = await page.goto(params.url, { waitUntil: 'domcontentloaded' })
|
||||
}
|
||||
|
||||
// log html in debug mode
|
||||
log.html(await page.content())
|
||||
|
||||
// Detect protection services and solve challenges
|
||||
try {
|
||||
response = await cloudflareProvider(params.url, page, response);
|
||||
} catch (e) {
|
||||
status = "error";
|
||||
message = "Cloudflare " + e.toString();
|
||||
}
|
||||
|
||||
const payload: ChallengeResolutionT = {
|
||||
status,
|
||||
message,
|
||||
result: {
|
||||
url: page.url(),
|
||||
status: response.status(),
|
||||
headers: response.headers(),
|
||||
response: null,
|
||||
cookies: await page.cookies(),
|
||||
userAgent: sessions.getUserAgent()
|
||||
}
|
||||
}
|
||||
|
||||
if (params.returnOnlyCookies) {
|
||||
payload.result.headers = null;
|
||||
payload.result.userAgent = null;
|
||||
} else {
|
||||
payload.result.response = await page.content()
|
||||
}
|
||||
|
||||
// make sure the page is closed because if it isn't and error will be thrown
|
||||
// when a user uses a temporary session, the browser make be quit before
|
||||
// the page is properly closed.
|
||||
await page.close()
|
||||
|
||||
return payload
|
||||
} catch (e) {
|
||||
log.error("Unexpected error: " + e);
|
||||
throw e;
|
||||
}
|
||||
|
||||
// Add final url in result
|
||||
payload.result.url = page.url();
|
||||
|
||||
// make sure the page is closed because if it isn't and error will be thrown
|
||||
// when a user uses a temporary session, the browser make be quit before
|
||||
// the page is properly closed.
|
||||
await page.close()
|
||||
|
||||
return payload
|
||||
}
|
||||
|
||||
function mergeSessionWithParams({ defaults }: SessionsCacheItem, params: V1Request): V1Request {
|
||||
const copy = { ...defaults, ...params }
|
||||
|
||||
// custom merging logic
|
||||
copy.headers = { ...defaults.headers || {}, ...params.headers || {} } || null
|
||||
|
||||
return copy
|
||||
}
|
||||
|
||||
async function setupPage(params: V1Request, browser: Browser): Promise<Page> {
|
||||
const page = await browser.newPage()
|
||||
|
||||
// merge session defaults with params
|
||||
const { method, postData, headers, cookies } = params
|
||||
|
||||
// the user-agent is changed just for linux arm build
|
||||
await page.setUserAgent(sessions.getUserAgent())
|
||||
|
||||
// todo: redo all functionality
|
||||
|
||||
// let overrideResolvers: OverrideResolvers = {}
|
||||
//
|
||||
// if (method !== 'GET') {
|
||||
// log.debug(`Setting method to ${method}`)
|
||||
// overrideResolvers.method = request => method
|
||||
// }
|
||||
//
|
||||
// if (postData) {
|
||||
// log.debug(`Setting body data to ${postData}`)
|
||||
// overrideResolvers.postData = request => postData
|
||||
// }
|
||||
//
|
||||
// if (headers) {
|
||||
// log.debug(`Adding custom headers: ${JSON.stringify(headers)}`)
|
||||
// overrideResolvers.headers = request => Object.assign(request.headers(), headers)
|
||||
// }
|
||||
//
|
||||
// if (cookies) {
|
||||
// log.debug(`Setting custom cookies: ${JSON.stringify(cookies)}`)
|
||||
// await page.setCookie(...cookies)
|
||||
// }
|
||||
//
|
||||
// // if any keys have been set on the object
|
||||
// if (Object.keys(overrideResolvers).length > 0) {
|
||||
// let callbackRunOnce = false
|
||||
// const callback = (request: Request) => {
|
||||
//
|
||||
// // avoid loading resources to speed up page load
|
||||
// if(request.resourceType() == 'stylesheet' || request.resourceType() == 'font' || request.resourceType() == 'image') {
|
||||
// request.abort()
|
||||
// return
|
||||
// }
|
||||
//
|
||||
// if (callbackRunOnce || !request.isNavigationRequest()) {
|
||||
// request.continue()
|
||||
// return
|
||||
// }
|
||||
//
|
||||
// callbackRunOnce = true
|
||||
// const overrides: Overrides = {}
|
||||
//
|
||||
// Object.keys(overrideResolvers).forEach((key: OverridesProps) => {
|
||||
// // @ts-ignore
|
||||
// overrides[key] = overrideResolvers[key](request)
|
||||
// });
|
||||
//
|
||||
// log.debug(`Overrides: ${JSON.stringify(overrides)}`)
|
||||
// request.continue(overrides)
|
||||
// }
|
||||
//
|
||||
// await page.setRequestInterception(true)
|
||||
// page.on('request', callback)
|
||||
// }
|
||||
|
||||
return page
|
||||
}
|
||||
|
||||
export async function browserRequest(params: V1Request): Promise<ChallengeResolutionT> {
|
||||
const oneTimeSession = params.session === undefined;
|
||||
|
||||
const options: SessionCreateOptions = {
|
||||
oneTimeSession: oneTimeSession,
|
||||
cookies: params.cookies,
|
||||
maxTimeout: params.maxTimeout,
|
||||
proxy: params.proxy
|
||||
}
|
||||
|
||||
const session: SessionsCacheItem = oneTimeSession
|
||||
? await sessions.create(null, {
|
||||
oneTimeSession: true
|
||||
})
|
||||
? await sessions.create(null, options)
|
||||
: sessions.get(params.session)
|
||||
|
||||
if (!session) {
|
||||
throw Error('This session does not exist. Use \'list_sessions\' to see all the existing sessions.')
|
||||
}
|
||||
|
||||
params = mergeSessionWithParams(session, params)
|
||||
|
||||
try {
|
||||
const page = await setupPage(params, session.browser)
|
||||
return await resolveChallengeWithTimeout(params, page)
|
||||
// const page = await setupPage(params, session.browser)
|
||||
return await resolveChallengeWithTimeout(params, session)
|
||||
} catch (error) {
|
||||
throw Error("Unable to process browser request. Error: " + error)
|
||||
} finally {
|
||||
|
@ -19,13 +19,3 @@ export function deleteFolderRecursive(path: string) {
|
||||
fs.rmdirSync(path)
|
||||
}
|
||||
}
|
||||
|
||||
export const removeEmptyFields = (o: Record<string, any>): typeof o => {
|
||||
const r: typeof o = {}
|
||||
for (const k in o) {
|
||||
if (o[k] !== undefined) {
|
||||
r[k] = o[k]
|
||||
}
|
||||
}
|
||||
return r
|
||||
}
|
@ -7,6 +7,7 @@ import {testWebBrowserInstallation} from "../services/sessions";
|
||||
const request = require("supertest");
|
||||
const app = require("../app");
|
||||
const version: string = require('../../package.json').version
|
||||
|
||||
const googleUrl = "https://www.google.com";
|
||||
const cfUrl = "https://pirateiro.com/torrents/?search=s";
|
||||
const cfCaptchaUrl = "https://idope.se"
|
||||
@ -136,6 +137,60 @@ describe("Test '/v1' path", () => {
|
||||
expect(apiResponse.solution.url).toContain(cfCaptchaUrl)
|
||||
});
|
||||
|
||||
test("Cmd 'request.get' should return OK with 'cookies' param", async () => {
|
||||
const payload = {
|
||||
"cmd": "request.get",
|
||||
"url": googleUrl,
|
||||
"cookies": [
|
||||
{
|
||||
"name": "testcookie1",
|
||||
"value": "testvalue1"
|
||||
},
|
||||
{
|
||||
"name": "testcookie2",
|
||||
"value": "testvalue2"
|
||||
}
|
||||
]
|
||||
}
|
||||
const response: Response = await request(app).post("/v1").send(payload);
|
||||
expect(response.statusCode).toBe(200);
|
||||
|
||||
const apiResponse: V1ResponseSolution = response.body;
|
||||
expect(apiResponse.status).toBe("ok");
|
||||
|
||||
const solution = apiResponse.solution;
|
||||
expect(solution.url).toContain(googleUrl)
|
||||
expect(Object.keys(solution.cookies).length).toBeGreaterThan(1)
|
||||
const cookie1: string = (solution.cookies as any[]).filter(function(cookie) {
|
||||
return cookie.name == "testcookie1";
|
||||
})[0].value
|
||||
expect(cookie1).toBe("testvalue1")
|
||||
const cookie2: string = (solution.cookies as any[]).filter(function(cookie) {
|
||||
return cookie.name == "testcookie2";
|
||||
})[0].value
|
||||
expect(cookie2).toBe("testvalue2")
|
||||
});
|
||||
|
||||
test("Cmd 'request.get' should return OK with 'returnOnlyCookies' param", async () => {
|
||||
const payload = {
|
||||
"cmd": "request.get",
|
||||
"url": googleUrl,
|
||||
"returnOnlyCookies": true
|
||||
}
|
||||
const response: Response = await request(app).post("/v1").send(payload);
|
||||
expect(response.statusCode).toBe(200);
|
||||
|
||||
const apiResponse: V1ResponseSolution = response.body;
|
||||
|
||||
const solution = apiResponse.solution;
|
||||
expect(solution.url).toContain(googleUrl)
|
||||
expect(solution.status).toBe(200);
|
||||
expect(solution.headers).toBe(null)
|
||||
expect(solution.response).toBe(null)
|
||||
expect(Object.keys(solution.cookies).length).toBeGreaterThan(0)
|
||||
expect(solution.userAgent).toBe(null)
|
||||
});
|
||||
|
||||
test("Cmd 'request.get' should return timeout", async () => {
|
||||
const payload = {
|
||||
"cmd": "request.get",
|
||||
@ -306,4 +361,5 @@ describe("Test '/v1' path", () => {
|
||||
expect(cfCookie2.length).toBeGreaterThan(30)
|
||||
expect(cfCookie2).toBe(cfCookie)
|
||||
});
|
||||
|
||||
});
|
||||
|
Loading…
x
Reference in New Issue
Block a user