mirror of
https://github.com/FlareSolverr/FlareSolverr.git
synced 2025-06-07 12:05:37 +00:00
Backport changes from Cloudproxy (#11)
This commit is contained in:
parent
5ed7c09160
commit
a422756ae6
15
.eslintrc.js
Normal file
15
.eslintrc.js
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
module.exports = {
|
||||||
|
env: {
|
||||||
|
browser: true,
|
||||||
|
commonjs: true,
|
||||||
|
es2020: true
|
||||||
|
},
|
||||||
|
extends: [
|
||||||
|
'standard'
|
||||||
|
],
|
||||||
|
parserOptions: {
|
||||||
|
ecmaVersion: 11
|
||||||
|
},
|
||||||
|
rules: {
|
||||||
|
}
|
||||||
|
}
|
5
.gitignore
vendored
5
.gitignore
vendored
@ -115,4 +115,9 @@ dist
|
|||||||
.yarn/install-state.gz
|
.yarn/install-state.gz
|
||||||
.pnp.*
|
.pnp.*
|
||||||
|
|
||||||
|
# IntelliJ IDEA
|
||||||
.idea/
|
.idea/
|
||||||
|
*.iml
|
||||||
|
|
||||||
|
# Project Development
|
||||||
|
testing/
|
||||||
|
38
Dockerfile
38
Dockerfile
@ -1,20 +1,30 @@
|
|||||||
FROM node:10-jessie
|
FROM --platform=${TARGETPLATFORM:-linux/amd64} node:15.2.1-alpine3.11
|
||||||
|
|
||||||
RUN apt-get update -y && \
|
# Print build information
|
||||||
apt-get install -y gconf-service libasound2 libatk1.0-0 libc6 libcairo2 libcups2 libdbus-1-3 libexpat1 libfontconfig1 libgcc1 libgconf-2-4 libgdk-pixbuf2.0-0 libglib2.0-0 libgtk-3-0 libnspr4 libpango-1.0-0 libpangocairo-1.0-0 libstdc++6 libx11-6 libx11-xcb1 libxcb1 libxcomposite1 libxcursor1 libxdamage1 libxext6 libxfixes3 libxi6 libxrandr2 libxrender1 libxss1 libxtst6 ca-certificates fonts-liberation libappindicator1 libnss3 lsb-release xdg-utils wget libgbm1 && \
|
ARG TARGETPLATFORM
|
||||||
apt-get clean && rm -rf /tmp/* /var/lib/apt/lists/* /var/tmp/*
|
ARG BUILDPLATFORM
|
||||||
RUN mkdir -p /home/node/flaresolverr && chown -R node:node /home/node/flaresolverr
|
RUN printf "I am running on ${BUILDPLATFORM:-linux/amd64}, building for ${TARGETPLATFORM:-linux/amd64}\n$(uname -a)\n"
|
||||||
WORKDIR /home/node/flaresolverr
|
|
||||||
|
|
||||||
COPY package*.json ./
|
# Install Chromium, dumb-init and remove all locales but en-US
|
||||||
|
RUN apk add --no-cache chromium dumb-init && \
|
||||||
|
find /usr/lib/chromium/locales -type f ! -name 'en-US.*' -delete
|
||||||
|
|
||||||
|
# Copy FlareSolverr code
|
||||||
USER node
|
USER node
|
||||||
RUN PUPPETEER_PRODUCT=firefox npm install
|
RUN mkdir -p /home/node/flaresolverr
|
||||||
COPY --chown=node:node . .
|
WORKDIR /home/node/flaresolverr
|
||||||
|
COPY --chown=node:node package.json package-lock.json tsconfig.json ./
|
||||||
|
COPY --chown=node:node src ./src/
|
||||||
|
|
||||||
ENV LOG_LEVEL=info
|
# Install package. Skip installing Chrome, we will use the installed package.
|
||||||
ENV LOG_HTML=false
|
ENV PUPPETEER_PRODUCT=chrome \
|
||||||
ENV PORT=8191
|
PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true \
|
||||||
ENV HOST=0.0.0.0
|
PUPPETEER_EXECUTABLE_PATH=/usr/bin/chromium-browser
|
||||||
|
RUN npm install && \
|
||||||
|
npm run build && \
|
||||||
|
rm -rf src tsconfig.json && \
|
||||||
|
npm prune --production
|
||||||
|
|
||||||
EXPOSE 8191
|
EXPOSE 8191
|
||||||
CMD [ "node", "index.js" ]
|
ENTRYPOINT ["/usr/bin/dumb-init", "--"]
|
||||||
|
CMD ["npm", "start"]
|
||||||
|
1
LICENSE
1
LICENSE
@ -1,6 +1,7 @@
|
|||||||
MIT License
|
MIT License
|
||||||
|
|
||||||
Copyright (c) 2020 Diego Heras (ngosang)
|
Copyright (c) 2020 Diego Heras (ngosang)
|
||||||
|
Copyright (c) 2020 Noah Cardoza (NoahCardoza)
|
||||||
|
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
271
README.md
271
README.md
@ -1,102 +1,230 @@
|
|||||||
## FlareSolverr
|
# FlareSolverr
|
||||||
|
|
||||||
Proxy server to bypass Cloudflare protection
|
Proxy server to bypass Cloudflare protection
|
||||||
|
|
||||||
:warning: This project is in beta state. Some things may not work and the API can change at any time.
|
:warning: This project is in beta state. Some things may not work and the API can change at any time.
|
||||||
See the known issues section.
|
See the known issues section.
|
||||||
|
|
||||||
### How it works
|
## How it works
|
||||||
|
|
||||||
FlareSolverr starts a proxy server and it waits for user requests in idle state using few resources.
|
FlareSolverr starts a proxy server and it waits for user requests in an idle state using few resources.
|
||||||
When some request arrives, it uses [puppeteer](https://github.com/puppeteer/puppeteer) with the
|
When some request arrives, it uses [puppeteer](https://github.com/puppeteer/puppeteer) with the
|
||||||
[stealth plugin](https://github.com/berstend/puppeteer-extra/tree/master/packages/puppeteer-extra-plugin-stealth)
|
[stealth plugin](https://github.com/berstend/puppeteer-extra/tree/master/packages/puppeteer-extra-plugin-stealth)
|
||||||
to create an headless browser (Firefox). It opens the URL with user parameters and waits until the Cloudflare
|
to create a headless browser (Chrome). It opens the URL with user parameters and waits until the
|
||||||
challenge is solved (or timeout). The HTML code and the cookies are sent back to the user and those cookies can
|
Cloudflare challenge is solved (or timeout). The HTML code and the cookies are sent back to the
|
||||||
be used to bypass Cloudflare using other HTTP clients.
|
user and those cookies can be used to bypass Cloudflare using other HTTP clients.
|
||||||
|
|
||||||
NOTE: Web browsers consume a lot of memory. If you are running FlareSolverr on a machine with few RAM,
|
**NOTE**: Web browsers consume a lot of memory. If you are running FlareSolverr on a machine with few RAM,
|
||||||
do not make many requests at once. With each request a new browser is launched.
|
do not make many requests at once. With each request a new browser is launched.
|
||||||
|
(It is possible to use a permanent session. However, if you use sessions, you should make sure to close them as soon as you are done using them.)
|
||||||
|
|
||||||
### Installation
|
## Installation
|
||||||
|
|
||||||
It requires NodeJS.
|
It requires NodeJS.
|
||||||
|
|
||||||
Run `PUPPETEER_PRODUCT=firefox npm install` to install FlareSolverr dependencies.
|
Run `PUPPETEER_PRODUCT=chrome npm install` to install FlareSolverr dependencies.
|
||||||
|
|
||||||
### Usage
|
## Usage
|
||||||
|
|
||||||
Run `node index.js` to start FlareSolverr.
|
First run `npm run build`. Once the TypeScript is compiled, you can use `npm start` to start FlareSolverr.
|
||||||
|
|
||||||
Example request:
|
Example request:
|
||||||
```bash
|
```bash
|
||||||
curl -L -X POST 'http://localhost:8191/v1' \
|
curl -L -X POST 'http://localhost:8191/v1' \
|
||||||
-H 'Content-Type: application/json' \
|
-H 'Content-Type: application/json' \
|
||||||
--data-raw '{
|
--data-raw '{
|
||||||
"url":"http://www.google.com/",
|
"cmd": "request.get",
|
||||||
"userAgent": "Mozilla/5.0 (X11; Linux x86_64; rv:76.0) Gecko/20100101 Firefox/76.0",
|
"url":"http://www.google.com/",
|
||||||
"maxTimeout": 60000
|
"userAgent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36",
|
||||||
|
"maxTimeout": 60000,
|
||||||
|
"headers": {
|
||||||
|
"X-Test": "Testing 123..."
|
||||||
|
}
|
||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Commands
|
||||||
|
|
||||||
|
#### + `sessions.create`
|
||||||
|
|
||||||
|
This will launch a new browser instance which will retain cookies until you destroy it
|
||||||
|
with `sessions.destroy`. This comes in handy so you don't have to keep solving challenges
|
||||||
|
over and over and you won't need to keep sending cookies for the browser to use.
|
||||||
|
|
||||||
|
This also speeds up the requests since it won't have to launch a new browser instance for
|
||||||
|
every request.
|
||||||
|
|
||||||
Parameter | Notes
|
Parameter | Notes
|
||||||
|--|--|
|
|--|--|
|
||||||
url | Mandatory
|
session | Optional. The session ID that you want to be assinged to the instance. If one isn't set a random UUID will be assigned.
|
||||||
userAgent | Optional. Will be used by the headless browser
|
userAgent | Optional. Will be used by the headless browser.
|
||||||
maxTimeout | Optional. Max timeout to solve the challenge
|
|
||||||
cookies | Optional. Will be used by the headless browser. Follow this format https://github.com/puppeteer/puppeteer/blob/v3.3.0/docs/api.md#pagesetcookiecookies
|
#### + `sessions.list`
|
||||||
|
|
||||||
|
Returns a list of all the active sessions. More for debuging if you are curious to see
|
||||||
|
how many sessions are running. You should always make sure to properly close each
|
||||||
|
session when you are done using them as too many may slow your computer down.
|
||||||
|
|
||||||
Example response:
|
Example response:
|
||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"status": "ok",
|
"sessions": [
|
||||||
"message": "",
|
"session_id_1",
|
||||||
"startTimestamp": 1591679463498,
|
"session_id_2",
|
||||||
"endTimestamp": 1591679472781,
|
"session_id_3..."
|
||||||
"version": "1.0.0",
|
]
|
||||||
"solution": {
|
|
||||||
"url": "https://www.google.com/?gws_rd=ssl",
|
|
||||||
"response": "<!DOCTYPE html><html ...",
|
|
||||||
"cookies": [
|
|
||||||
{
|
|
||||||
"name": "ANID",
|
|
||||||
"value": "AHWqTUnRRMcmD0SxIOLAhv88SiY555FZpb4jeYCaSNZPHtYyBuY85AmaZEqLFTHe",
|
|
||||||
"domain": ".google.com",
|
|
||||||
"path": "/",
|
|
||||||
"expires": 1625375465.915947,
|
|
||||||
"size": 68,
|
|
||||||
"httpOnly": true,
|
|
||||||
"secure": true,
|
|
||||||
"session": false,
|
|
||||||
"sameSite": "None"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "1P_JAR",
|
|
||||||
"value": "2020-6-9-5",
|
|
||||||
"domain": ".google.com",
|
|
||||||
"path": "/",
|
|
||||||
"expires": 1594271465,
|
|
||||||
"size": 16,
|
|
||||||
"httpOnly": false,
|
|
||||||
"secure": true,
|
|
||||||
"session": false
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"userAgent": " Mozilla/5.0 (X11; Linux x86_64; rv:76.0) Gecko/20100101 Firefox/76.0"
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Environment variables
|
#### + `sessions.destroy`
|
||||||
|
|
||||||
|
This will properly shutdown a browser instance and remove all files associaded with it
|
||||||
|
to free up resources for a new session. Whenever you no longer need to use a session you
|
||||||
|
should make sure to close it.
|
||||||
|
|
||||||
|
Parameter | Notes
|
||||||
|
|--|--|
|
||||||
|
session | The session ID that you want to be destroyed.
|
||||||
|
|
||||||
|
#### + `request.get`
|
||||||
|
|
||||||
|
Parameter | Notes
|
||||||
|
|--|--|
|
||||||
|
url | Mandatory
|
||||||
|
session | Optional. Will send the request from and existing browser instance. If one is not sent it will create a temporary instance that will be destroyed immediately after the request is completed.
|
||||||
|
headers | Optional. To specify user headers.
|
||||||
|
maxTimeout | Optional. Max timeout to solve the challenge
|
||||||
|
cookies | Optional. Will be used by the headless browser. Follow [this](https://github.com/puppeteer/puppeteer/blob/v3.3.0/docs/api.md#pagesetcookiecookies) format
|
||||||
|
|
||||||
|
Example response from running the `curl` above:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"solution": {
|
||||||
|
"url": "https://www.google.com/?gws_rd=ssl",
|
||||||
|
"status": 200,
|
||||||
|
"headers": {
|
||||||
|
"status": "200",
|
||||||
|
"date": "Thu, 16 Jul 2020 04:15:49 GMT",
|
||||||
|
"expires": "-1",
|
||||||
|
"cache-control": "private, max-age=0",
|
||||||
|
"content-type": "text/html; charset=UTF-8",
|
||||||
|
"strict-transport-security": "max-age=31536000",
|
||||||
|
"p3p": "CP=\"This is not a P3P policy! See g.co/p3phelp for more info.\"",
|
||||||
|
"content-encoding": "br",
|
||||||
|
"server": "gws",
|
||||||
|
"content-length": "61587",
|
||||||
|
"x-xss-protection": "0",
|
||||||
|
"x-frame-options": "SAMEORIGIN",
|
||||||
|
"set-cookie": "1P_JAR=2020-07-16-04; expires=Sat, 15-Aug-2020 04:15:49 GMT; path=/; domain=.google.com; Secure; SameSite=none\nNID=204=QE3Ocq15XalczqjuDy52HeseG3zAZuJzID3R57g_oeQHyoV5DuvDhpWc4r9IcPoeIYmkr_ZTX_MNOU8IAbtXmVO7Bmq0adb-hpIHaTBIdBk3Ofifp4gO6vZleVuFYfj7ePkHeHdzGoX-en0FvKtd9iofX4O6RiAdEIAnpL7Wge4; expires=Fri, 15-Jan-2021 04:15:49 GMT; path=/; domain=.google.com; Secure; HttpOnly; SameSite=none",
|
||||||
|
"alt-svc": "h3-29=\":443\"; ma=2592000,h3-27=\":443\"; ma=2592000,h3-25=\":443\"; ma=2592000,h3-T050=\":443\"; ma=2592000,h3-Q050=\":443\"; ma=2592000,h3-Q046=\":443\"; ma=2592000,h3-Q043=\":443\"; ma=2592000,quic=\":443\"; ma=2592000; v=\"46,43\""
|
||||||
|
},
|
||||||
|
"response":"<!DOCTYPE html>...",
|
||||||
|
"cookies": [
|
||||||
|
{
|
||||||
|
"name": "NID",
|
||||||
|
"value": "204=QE3Ocq15XalczqjuDy52HeseG3zAZuJzID3R57g_oeQHyoV5DuvDhpWc4r9IcPoeIYmkr_ZTX_MNOU8IAbtXmVO7Bmq0adb-hpIHaTBIdBk3Ofifp4gO6vZleVuFYfj7ePkHeHdzGoX-en0FvKtd9iofX4O6RiAdEIAnpL7Wge4",
|
||||||
|
"domain": ".google.com",
|
||||||
|
"path": "/",
|
||||||
|
"expires": 1610684149.307722,
|
||||||
|
"size": 178,
|
||||||
|
"httpOnly": true,
|
||||||
|
"secure": true,
|
||||||
|
"session": false,
|
||||||
|
"sameSite": "None"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "1P_JAR",
|
||||||
|
"value": "2020-07-16-04",
|
||||||
|
"domain": ".google.com",
|
||||||
|
"path": "/",
|
||||||
|
"expires": 1597464949.307626,
|
||||||
|
"size": 19,
|
||||||
|
"httpOnly": false,
|
||||||
|
"secure": true,
|
||||||
|
"session": false,
|
||||||
|
"sameSite": "None"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"userAgent": "Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36"
|
||||||
|
},
|
||||||
|
"status": "ok",
|
||||||
|
"message": "",
|
||||||
|
"startTimestamp": 1594872947467,
|
||||||
|
"endTimestamp": 1594872949617,
|
||||||
|
"version": "1.0.0"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### + `request.post`
|
||||||
|
|
||||||
|
This is the same as `request.get` but it takes one more param:
|
||||||
|
|
||||||
|
Parameter | Notes
|
||||||
|
|--|--|
|
||||||
|
postData | Must be a string. If you want to POST a form, don't forget to set the `Content-Type` header to `application/x-www-form-urlencoded` or the server might not understand your request.
|
||||||
|
|
||||||
|
## Downloading Images and PDFs (small files)
|
||||||
|
|
||||||
|
If you need to access an image/pdf or small file, you should pass the `download` parameter to
|
||||||
|
`request.get` setting it to `true`. Rather than access the html and return text it will
|
||||||
|
return a the buffer **base64** encoded which you will be able to decode and save the image/pdf.
|
||||||
|
|
||||||
|
This method isn't recommended for videos or anything larger. As that should be streamed back to
|
||||||
|
the client and at the moment there is nothing setup to do so. If this is something you need feel
|
||||||
|
free to create an issue and/or submit a PR.
|
||||||
|
|
||||||
|
## Environment variables
|
||||||
|
|
||||||
To set the environment vars in Linux run `export LOG_LEVEL=debug` and then start FlareSolverr in the same shell.
|
To set the environment vars in Linux run `export LOG_LEVEL=debug` and then start FlareSolverr in the same shell.
|
||||||
|
|
||||||
Name | Default value
|
Name | Default | Notes
|
||||||
|--|--|
|
|--|--|--|
|
||||||
LOG_LEVEL | info
|
LOG_LEVEL | info | Used to change the verbosity of the logging.
|
||||||
LOG_HTML | false
|
LOG_HTML | false | Used for debugging. If `true` all html that passes through the proxy will be logged to the console.
|
||||||
PORT | 8191
|
PORT | 8191 | Change this if you already have a process running on port `8191`.
|
||||||
HOST | 0.0.0.0
|
HOST | 0.0.0.0 | This shouldn't need to be messed with but if you insist, it's here!
|
||||||
|
CAPTCHA_SOLVER | None | This is used to select which captcha solving method it used when a captcha is encounted.
|
||||||
|
HEADLESS | true | This is used to debug the browser by not running it in headless mode.
|
||||||
|
|
||||||
### Docker
|
## Captcha Solvers
|
||||||
|
|
||||||
|
Sometimes CF not only gives mathmatical computations and browser tests, sometimes they also require
|
||||||
|
the user to solve a captcha. If this is the case, FlareSolverr will return the captcha page. But that's
|
||||||
|
not very helpful to you is it?
|
||||||
|
|
||||||
|
FlareSolverr can be customized to solve the captcha's automatically by setting the environment variable
|
||||||
|
`CAPTCHA_SOLVER` to the file name of one of the adapters inside the [/captcha](src/captcha) directory.
|
||||||
|
|
||||||
|
### [CaptchaHarvester](https://github.com/NoahCardoza/CaptchaHarvester)
|
||||||
|
|
||||||
|
This method makes use of the [CaptchaHarvester](https://github.com/NoahCardoza/CaptchaHarvester) project which allows users to collect thier own tokens from ReCaptcha V2/V3 and hCaptcha for free.
|
||||||
|
|
||||||
|
To use this method you must set these ENV variables:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
CAPTCHA_SOLVER=harvester
|
||||||
|
HARVESTER_ENDPOINT=https://127.0.0.1:5000/token
|
||||||
|
```
|
||||||
|
|
||||||
|
**Note**: above I set `HARVESTER_ENDPOINT` to the default configureation
|
||||||
|
of the captcha harvester's server, but that could change if
|
||||||
|
you customize the command line flags. Simply put, `HARVESTER_ENDPOINT`
|
||||||
|
should be set to the URI of the route that returns a token in plain text when called.
|
||||||
|
|
||||||
|
### [hcaptcha-solver](https://github.com/JimmyLaurent/hcaptcha-solver)
|
||||||
|
|
||||||
|
This method makes use of the [hcaptcha-solver](https://github.com/JimmyLaurent/hcaptcha-solver) project which attempts to solve hcaptcha by randomly selecting images.
|
||||||
|
|
||||||
|
To use this solver you must first install it and then set it as the `CAPTCHA_SOLVER`.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
npm i hcaptcha-solver
|
||||||
|
CAPTCHA_SOLVER=hcaptcha-solver
|
||||||
|
```
|
||||||
|
|
||||||
|
## Docker
|
||||||
|
|
||||||
You can edit environment variables in `./Dockerfile` and build your own image.
|
You can edit environment variables in `./Dockerfile` and build your own image.
|
||||||
|
|
||||||
@ -105,17 +233,22 @@ docker build -t flaresolverr:latest .
|
|||||||
docker run --restart=always --name flaresolverr -p 8191:8191 -d flaresolverr:latest
|
docker run --restart=always --name flaresolverr -p 8191:8191 -d flaresolverr:latest
|
||||||
```
|
```
|
||||||
|
|
||||||
### Known issues / Roadmap
|
## TypeScript
|
||||||
|
|
||||||
The current implementation is not able to bypass Cloudflare because they are detecting the headless browser.
|
I'm quite new to TypeScript. If you spot any funny business or anything that is or isn't being
|
||||||
I hope this will be fixed soon in the [puppeteer stealth plugin](https://github.com/berstend/puppeteer-extra/tree/master/packages/puppeteer-extra-plugin-stealth)
|
used properly feel free to submit a PR or open an issue.
|
||||||
|
|
||||||
|
## Known issues / Roadmap
|
||||||
|
|
||||||
|
The current implementation seems to be working on the sites I have been testing them on. However, if you find it unable to access a site, open an issue and I'd be happy to investigate.
|
||||||
|
|
||||||
|
That being said, the project uses the [puppeteer stealth plugin](https://github.com/berstend/puppeteer-extra/tree/master/packages/puppeteer-extra-plugin-stealth). If Cloudflare is able to detect the headless browser, it's more that projects domain to fix.
|
||||||
|
|
||||||
TODO:
|
TODO:
|
||||||
* Fix remaining issues in the code (see TODOs)
|
|
||||||
* Make the maxTimeout more accurate (count the time to open the first page)
|
* Fix remaining issues in the code (see TODOs in code)
|
||||||
* Add support for more HTTP methods (POST, PUT, DELETE ...)
|
* Make the maxTimeout more accurate (count the time to open the first page / maybe count the captcha solve time?)
|
||||||
* Add support for user HTTP headers
|
* Hide sensitive information in logs
|
||||||
* Hide sensitive information in logs
|
|
||||||
* Reduce Docker image size
|
* Reduce Docker image size
|
||||||
* Docker image for ARM architecture
|
* Docker image for ARM architecture
|
||||||
* Install instructions for Windows
|
* Install instructions for Windows
|
||||||
|
206
index.js
206
index.js
@ -1,206 +0,0 @@
|
|||||||
const os = require('os');
|
|
||||||
const path = require('path');
|
|
||||||
const fs = require('fs');
|
|
||||||
const { v1: uuidv1 } = require('uuid');
|
|
||||||
const log = require('console-log-level')(
|
|
||||||
{
|
|
||||||
level: process.env.LOG_LEVEL || 'info',
|
|
||||||
prefix: function (level) {
|
|
||||||
return new Date().toISOString() + " " + level.toUpperCase() + " REQ-" + reqCounter;
|
|
||||||
}
|
|
||||||
});
|
|
||||||
const puppeteer = require('puppeteer-extra');
|
|
||||||
const StealthPlugin = require('puppeteer-extra-plugin-stealth');
|
|
||||||
const http = require('http');
|
|
||||||
const pjson = require('./package.json');
|
|
||||||
const version = pjson.version;
|
|
||||||
const serverPort = process.env.PORT || 8191;
|
|
||||||
const serverHost = process.env.HOST || '0.0.0.0';
|
|
||||||
const logHtml = process.env.LOG_HTML || false;
|
|
||||||
let reqCounter = 0;
|
|
||||||
|
|
||||||
// setting "user-agent-override" evasion is not working for us because it can't be changed
|
|
||||||
// in each request. we set the user-agent in the browser args instead
|
|
||||||
puppeteer.use(StealthPlugin());
|
|
||||||
|
|
||||||
http.createServer(function(req, res) {
|
|
||||||
reqCounter++;
|
|
||||||
const startTimestamp = Date.now();
|
|
||||||
log.info('Incoming request: ' + req.method + " " + req.url);
|
|
||||||
let body = [];
|
|
||||||
req.on('data', function(chunk) {
|
|
||||||
body.push(chunk);
|
|
||||||
}).on('end', function() {
|
|
||||||
// parse params
|
|
||||||
body = Buffer.concat(body).toString();
|
|
||||||
var params = {};
|
|
||||||
try {
|
|
||||||
params = JSON.parse(body);
|
|
||||||
}
|
|
||||||
catch (err) {
|
|
||||||
errorResponse("Body must be in JSON format", res, startTimestamp);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// validate params
|
|
||||||
if (!validateIncomingRequest(params, req, res, startTimestamp))
|
|
||||||
return;
|
|
||||||
|
|
||||||
// process request
|
|
||||||
processRequest(params, req, res, startTimestamp);
|
|
||||||
});
|
|
||||||
}).listen(serverPort, serverHost, function() {
|
|
||||||
log.info("FlareSolverr v" + version + " listening on http://" + serverHost + ":" + serverPort);
|
|
||||||
})
|
|
||||||
|
|
||||||
function errorResponse(errorMsg, res, startTimestamp) {
|
|
||||||
log.error(errorMsg);
|
|
||||||
const response = {
|
|
||||||
status: "error",
|
|
||||||
message: errorMsg,
|
|
||||||
startTimestamp: startTimestamp,
|
|
||||||
endTimestamp: Date.now(),
|
|
||||||
version: version
|
|
||||||
}
|
|
||||||
res.writeHead(500, {
|
|
||||||
'Content-Type': 'application/json'
|
|
||||||
});
|
|
||||||
res.write(JSON.stringify(response));
|
|
||||||
res.end();
|
|
||||||
}
|
|
||||||
|
|
||||||
function prepareBrowserProfile(userAgent) {
|
|
||||||
const userDataDir = path.join(os.tmpdir(), '/puppeteer_firefox_profile_' + uuidv1());
|
|
||||||
if (!fs.existsSync(userDataDir)) {
|
|
||||||
fs.mkdirSync(userDataDir, { recursive: true })
|
|
||||||
}
|
|
||||||
const prefs = `user_pref("general.useragent.override", "${userAgent}");`;
|
|
||||||
fs.writeFile(path.join(userDataDir, 'prefs.js'), prefs, () => {});
|
|
||||||
return userDataDir;
|
|
||||||
}
|
|
||||||
|
|
||||||
function validateIncomingRequest(params, req, res, startTimestamp) {
|
|
||||||
log.info('Params: ' + JSON.stringify(params));
|
|
||||||
|
|
||||||
if (req.method != 'POST') {
|
|
||||||
errorResponse("Only POST method is allowed", res, startTimestamp);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (req.url != '/v1') {
|
|
||||||
errorResponse("Only /v1 endpoint is allowed", res, startTimestamp);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!params['url']) {
|
|
||||||
errorResponse("Parameter 'url' is mandatory", res, startTimestamp);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
function processRequest(params, req, res, startTimestamp) {
|
|
||||||
let puppeteerOptions = {
|
|
||||||
product: 'firefox',
|
|
||||||
headless: true
|
|
||||||
};
|
|
||||||
const reqUserAgent = params["userAgent"];
|
|
||||||
if (reqUserAgent) {
|
|
||||||
log.debug('Using custom User-Agent: ' + reqUserAgent);
|
|
||||||
// TODO: remove the profile after closing the browser
|
|
||||||
puppeteerOptions['userDataDir'] = prepareBrowserProfile(reqUserAgent);
|
|
||||||
}
|
|
||||||
|
|
||||||
log.debug('Launching headless browser...');
|
|
||||||
puppeteer.launch(puppeteerOptions).then(async browser => {
|
|
||||||
try {
|
|
||||||
await resolveCallenge(params, browser, res, startTimestamp);
|
|
||||||
} catch (error) {
|
|
||||||
errorResponse(error.message, res, startTimestamp);
|
|
||||||
} finally {
|
|
||||||
await browser.close();
|
|
||||||
}
|
|
||||||
}).catch(error => {
|
|
||||||
errorResponse(error.message, res, startTimestamp);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
async function resolveCallenge(params, browser, res, startTimestamp) {
|
|
||||||
const page = await browser.newPage();
|
|
||||||
const userAgent = await page.evaluate(() => navigator.userAgent);
|
|
||||||
log.debug("User-Agent: " + userAgent);
|
|
||||||
const reqUrl = params["url"];
|
|
||||||
const reqMaxTimeout = params["maxTimeout"] || 60000;
|
|
||||||
const reqCookies = params["cookies"];
|
|
||||||
|
|
||||||
if (reqCookies) {
|
|
||||||
log.debug('Using custom cookies');
|
|
||||||
await page.setCookie(...reqCookies);
|
|
||||||
}
|
|
||||||
|
|
||||||
log.debug("Navegating to... " + reqUrl);
|
|
||||||
await page.goto(reqUrl, {waitUntil: 'domcontentloaded'});
|
|
||||||
|
|
||||||
// detect cloudflare
|
|
||||||
const cloudflareRay = await page.$('.ray_id');
|
|
||||||
if (cloudflareRay) {
|
|
||||||
log.debug('Waiting for Cloudflare challenge...');
|
|
||||||
|
|
||||||
while(Date.now() - startTimestamp < reqMaxTimeout) {
|
|
||||||
await page.waitFor(1000);
|
|
||||||
|
|
||||||
try {
|
|
||||||
// catch exception timeout in waitForNavigation
|
|
||||||
await page.waitForNavigation({ waitUntil: 'domcontentloaded', timeout: 5000 });
|
|
||||||
} catch (error) {}
|
|
||||||
|
|
||||||
const cloudflareRay = await page.$('.ray_id');
|
|
||||||
if (!cloudflareRay)
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (Date.now() - startTimestamp >= reqMaxTimeout) {
|
|
||||||
errorResponse("Maximum timeout reached. maxTimeout=" + reqMaxTimeout + " (ms)", res, startTimestamp);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
log.debug("Validating HTML code...");
|
|
||||||
const html = await page.content();
|
|
||||||
if (html.includes("captcha-bypass") || html.includes("__cf_chl_captcha_tk__")) {
|
|
||||||
errorResponse("Chaptcha detected!", res, startTimestamp);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
log.debug("No challenge detected");
|
|
||||||
}
|
|
||||||
|
|
||||||
const url = await page.url();
|
|
||||||
log.debug("Response URL: " + url);
|
|
||||||
const cookies = await page.cookies();
|
|
||||||
log.debug("Response cookies: " + JSON.stringify(cookies));
|
|
||||||
const html = await page.content();
|
|
||||||
if (logHtml)
|
|
||||||
log.debug(html);
|
|
||||||
|
|
||||||
const endTimestamp = Date.now();
|
|
||||||
log.info("Successful response in " + (endTimestamp - startTimestamp) / 1000 + " s");
|
|
||||||
const response = {
|
|
||||||
status: "ok",
|
|
||||||
message: "",
|
|
||||||
startTimestamp: startTimestamp,
|
|
||||||
endTimestamp: endTimestamp,
|
|
||||||
version: version,
|
|
||||||
solution: {
|
|
||||||
url: url,
|
|
||||||
response: html,
|
|
||||||
cookies: cookies,
|
|
||||||
userAgent: userAgent
|
|
||||||
}
|
|
||||||
}
|
|
||||||
res.writeHead(200, {
|
|
||||||
'Content-Type': 'application/json'
|
|
||||||
});
|
|
||||||
res.write(JSON.stringify(response));
|
|
||||||
res.end();
|
|
||||||
}
|
|
2786
package-lock.json
generated
2786
package-lock.json
generated
File diff suppressed because it is too large
Load Diff
40
package.json
40
package.json
@ -1,22 +1,46 @@
|
|||||||
{
|
{
|
||||||
"name": "flaresolverr",
|
"name": "flaresolverr",
|
||||||
"version": "1.0.0",
|
"version": "1.1.0",
|
||||||
"description": "Proxy server to bypass Cloudflare protection",
|
"description": "Proxy server to bypass Cloudflare protection.",
|
||||||
"main": "index.js",
|
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"test": "echo \"Error: no test specified\" && exit 1"
|
"start": "node ./dist/index.js",
|
||||||
|
"build": "tsc",
|
||||||
|
"dev": "nodemon -e ts --exec ts-node src/index.ts"
|
||||||
},
|
},
|
||||||
"author": "Diego Heras (ngosang)",
|
"author": "Diego Heras (ngosang)",
|
||||||
|
"contributors": [
|
||||||
|
{
|
||||||
|
"name": "Noah Cardoza",
|
||||||
|
"url": "https://github.com/NoahCardoza/CloudProxy.git"
|
||||||
|
}
|
||||||
|
],
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"repository": {
|
"repository": {
|
||||||
"type": "git",
|
"type": "git",
|
||||||
"url": "https://github.com/ngosang/FlareSolverr.git"
|
"url": "https://github.com/ngosang/FlareSolverr"
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"console-log-level": "^1.4.1",
|
"console-log-level": "^1.4.1",
|
||||||
|
"got": "^11.5.1",
|
||||||
|
"hcaptcha-solver": "^1.0.2",
|
||||||
"puppeteer": "^3.3.0",
|
"puppeteer": "^3.3.0",
|
||||||
"puppeteer-extra": "^3.1.9",
|
"puppeteer-extra": "^3.1.15",
|
||||||
"puppeteer-extra-plugin-stealth": "^2.4.9",
|
"puppeteer-extra-plugin-stealth": "^2.6.5",
|
||||||
"uuid": "^8.1.0"
|
"uuid": "^8.2.0"
|
||||||
|
},
|
||||||
|
"devDependencies": {
|
||||||
|
"@types/node": "^14.0.23",
|
||||||
|
"@types/puppeteer": "^3.0.1",
|
||||||
|
"@types/uuid": "^8.0.0",
|
||||||
|
"eslint": "^7.5.0",
|
||||||
|
"eslint-config-airbnb-base": "^14.2.0",
|
||||||
|
"eslint-config-standard": "^14.1.1",
|
||||||
|
"eslint-plugin-import": "^2.22.0",
|
||||||
|
"eslint-plugin-node": "^11.1.0",
|
||||||
|
"eslint-plugin-promise": "^4.2.1",
|
||||||
|
"eslint-plugin-standard": "^4.0.1",
|
||||||
|
"nodemon": "^2.0.4",
|
||||||
|
"ts-node": "^8.10.2",
|
||||||
|
"typescript": "^3.9.7"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
31
src/captcha/harvester.ts
Normal file
31
src/captcha/harvester.ts
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
import got from 'got'
|
||||||
|
import { sleep } from '../utils'
|
||||||
|
|
||||||
|
/*
|
||||||
|
This method uses the captcha-harvester project:
|
||||||
|
https://github.com/NoahCardoza/CaptchaHarvester
|
||||||
|
|
||||||
|
While the function must take url/sitekey/type args,
|
||||||
|
they aren't used because the harvester server must
|
||||||
|
be preconfigured.
|
||||||
|
|
||||||
|
ENV:
|
||||||
|
HARVESTER_ENDPOINT: This must be the full path
|
||||||
|
to the /token endpoint of the harvester.
|
||||||
|
E.G. "https://127.0.0.1:5000/token"
|
||||||
|
*/
|
||||||
|
|
||||||
|
export default async function solve(): Promise<string> {
|
||||||
|
const endpoint = process.env.HARVESTER_ENDPOINT
|
||||||
|
if (!endpoint) { throw Error('ENV variable `HARVESTER_ENDPOINT` must be set.') }
|
||||||
|
while (true) {
|
||||||
|
try {
|
||||||
|
return (await got.get(process.env.HARVESTER_ENDPOINT, {
|
||||||
|
https: { rejectUnauthorized: false }
|
||||||
|
})).body
|
||||||
|
} catch (e) {
|
||||||
|
if (e.response.statusCode !== 418) { throw e }
|
||||||
|
}
|
||||||
|
await sleep(3000)
|
||||||
|
}
|
||||||
|
}
|
21
src/captcha/hcaptcha-solver.ts
Normal file
21
src/captcha/hcaptcha-solver.ts
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
const solveCaptcha = require('hcaptcha-solver');
|
||||||
|
import { SolverOptions } from '.'
|
||||||
|
/*
|
||||||
|
This method uses the hcaptcha-solver project:
|
||||||
|
https://github.com/JimmyLaurent/hcaptcha-solver
|
||||||
|
|
||||||
|
TODO: allow user pass custom options to the solver.
|
||||||
|
|
||||||
|
ENV:
|
||||||
|
There are no other variables that must be set to get this to work
|
||||||
|
*/
|
||||||
|
|
||||||
|
export default async function solve({ url }: SolverOptions): Promise<string> {
|
||||||
|
try {
|
||||||
|
const token = await solveCaptcha(url)
|
||||||
|
return token
|
||||||
|
} catch (e) {
|
||||||
|
console.error(e)
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
}
|
35
src/captcha/index.ts
Normal file
35
src/captcha/index.ts
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
export enum CaptchaType {
|
||||||
|
re = 'reCaptcha',
|
||||||
|
h = 'hCaptcha'
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface SolverOptions {
|
||||||
|
url: string
|
||||||
|
sitekey: string
|
||||||
|
type: CaptchaType
|
||||||
|
}
|
||||||
|
|
||||||
|
export type Solver = (options: SolverOptions) => Promise<string>
|
||||||
|
|
||||||
|
const captchaSolvers: { [key: string]: Solver } = {}
|
||||||
|
|
||||||
|
export default (): Solver => {
|
||||||
|
const method = process.env.CAPTCHA_SOLVER
|
||||||
|
|
||||||
|
if (!method) { return null }
|
||||||
|
|
||||||
|
if (!(method in captchaSolvers)) {
|
||||||
|
try {
|
||||||
|
captchaSolvers[method] = require('./' + method).default as Solver
|
||||||
|
} catch (e) {
|
||||||
|
if (e.code === 'MODULE_NOT_FOUND') {
|
||||||
|
throw Error(`The solver '${method}' is not a valid captcha solving method.`)
|
||||||
|
} else {
|
||||||
|
console.error(e)
|
||||||
|
throw Error(`An error occured loading the solver '${method}'.`)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return captchaSolvers[method]
|
||||||
|
}
|
106
src/index.ts
Normal file
106
src/index.ts
Normal file
@ -0,0 +1,106 @@
|
|||||||
|
import log from './log'
|
||||||
|
import { createServer, IncomingMessage, ServerResponse } from 'http';
|
||||||
|
import { RequestContext } from './types'
|
||||||
|
import Router, { BaseAPICall } from './routes'
|
||||||
|
|
||||||
|
const version: string = require('../package.json').version
|
||||||
|
const serverPort: number = Number(process.env.PORT) || 8191
|
||||||
|
const serverHost: string = process.env.HOST || '0.0.0.0'
|
||||||
|
|
||||||
|
|
||||||
|
function errorResponse(errorMsg: string, res: ServerResponse, startTimestamp: number) {
|
||||||
|
log.error(errorMsg)
|
||||||
|
const response = {
|
||||||
|
status: 'error',
|
||||||
|
message: errorMsg,
|
||||||
|
startTimestamp,
|
||||||
|
endTimestamp: Date.now(),
|
||||||
|
version
|
||||||
|
}
|
||||||
|
res.writeHead(500, {
|
||||||
|
'Content-Type': 'application/json'
|
||||||
|
})
|
||||||
|
res.write(JSON.stringify(response))
|
||||||
|
res.end()
|
||||||
|
}
|
||||||
|
|
||||||
|
function successResponse(successMsg: string, extendedProperties: object, res: ServerResponse, startTimestamp: number) {
|
||||||
|
const endTimestamp = Date.now()
|
||||||
|
log.info(`Successful response in ${(endTimestamp - startTimestamp) / 1000} s`)
|
||||||
|
if (successMsg) { log.info(successMsg) }
|
||||||
|
|
||||||
|
const response = Object.assign({
|
||||||
|
status: 'ok',
|
||||||
|
message: successMsg || '',
|
||||||
|
startTimestamp,
|
||||||
|
endTimestamp,
|
||||||
|
version
|
||||||
|
}, extendedProperties || {})
|
||||||
|
res.writeHead(200, {
|
||||||
|
'Content-Type': 'application/json'
|
||||||
|
})
|
||||||
|
res.write(JSON.stringify(response))
|
||||||
|
res.end()
|
||||||
|
}
|
||||||
|
|
||||||
|
function validateIncomingRequest(ctx: RequestContext, params: BaseAPICall) {
|
||||||
|
log.info(`Params: ${JSON.stringify(params)}`)
|
||||||
|
|
||||||
|
if (ctx.req.method !== 'POST') {
|
||||||
|
ctx.errorResponse('Only the POST method is allowed')
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ctx.req.url !== '/v1') {
|
||||||
|
ctx.errorResponse('Only /v1 endpoint is allowed')
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!params.cmd) {
|
||||||
|
ctx.errorResponse("Parameter 'cmd' is mandatory")
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
createServer((req: IncomingMessage, res: ServerResponse) => {
|
||||||
|
// count the request for the log prefix
|
||||||
|
log.incRequests()
|
||||||
|
|
||||||
|
const startTimestamp = Date.now()
|
||||||
|
log.info(`Incoming request: ${req.method} ${req.url}`)
|
||||||
|
const bodyParts: any[] = []
|
||||||
|
req.on('data', chunk => {
|
||||||
|
bodyParts.push(chunk)
|
||||||
|
}).on('end', () => {
|
||||||
|
// parse params
|
||||||
|
const body = Buffer.concat(bodyParts).toString()
|
||||||
|
let params: BaseAPICall = null
|
||||||
|
try {
|
||||||
|
params = JSON.parse(body)
|
||||||
|
} catch (err) {
|
||||||
|
errorResponse('Body must be in JSON format', res, startTimestamp)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
const ctx: RequestContext = {
|
||||||
|
req,
|
||||||
|
res,
|
||||||
|
startTimestamp,
|
||||||
|
errorResponse: (msg) => errorResponse(msg, res, startTimestamp),
|
||||||
|
successResponse: (msg, extendedProperties) => successResponse(msg, extendedProperties, res, startTimestamp)
|
||||||
|
}
|
||||||
|
|
||||||
|
// validate params
|
||||||
|
if (!validateIncomingRequest(ctx, params)) { return }
|
||||||
|
|
||||||
|
// process request
|
||||||
|
Router(ctx, params).catch(e => {
|
||||||
|
console.error(e)
|
||||||
|
ctx.errorResponse(e.message)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
}).listen(serverPort, serverHost, () => {
|
||||||
|
log.info(`FlareSolverr v${version} listening on http://${serverHost}:${serverPort}`)
|
||||||
|
})
|
19
src/log.ts
Normal file
19
src/log.ts
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
let requests = 0
|
||||||
|
|
||||||
|
const LOG_HTML: boolean = Boolean(process.env.LOG_HTML) || false
|
||||||
|
|
||||||
|
export default {
|
||||||
|
incRequests: () => { requests++ },
|
||||||
|
html(html: string) {
|
||||||
|
if (LOG_HTML)
|
||||||
|
this.debug(html)
|
||||||
|
},
|
||||||
|
...require('console-log-level')(
|
||||||
|
{
|
||||||
|
level: process.env.LOG_LEVEL || 'debug',
|
||||||
|
prefix(level: string) {
|
||||||
|
return `${new Date().toISOString()} ${level.toUpperCase()} REQ-${requests}`
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
}
|
466
src/routes.ts
Normal file
466
src/routes.ts
Normal file
@ -0,0 +1,466 @@
|
|||||||
|
import { v1 as UUIDv1 } from 'uuid'
|
||||||
|
import sessions, { SessionsCacheItem } from './session'
|
||||||
|
import { RequestContext } from './types'
|
||||||
|
import log from './log'
|
||||||
|
import { Browser, SetCookie, Request, Page, Headers, HttpMethod, Overrides, Cookie } from 'puppeteer'
|
||||||
|
import { TimeoutError } from 'puppeteer/Errors'
|
||||||
|
import getCaptchaSolver, { CaptchaType } from './captcha'
|
||||||
|
|
||||||
|
export interface BaseAPICall {
|
||||||
|
cmd: string
|
||||||
|
}
|
||||||
|
|
||||||
|
interface BaseSessionsAPICall extends BaseAPICall {
|
||||||
|
session?: string
|
||||||
|
}
|
||||||
|
|
||||||
|
interface SessionsCreateAPICall extends BaseSessionsAPICall {
|
||||||
|
userAgent?: string,
|
||||||
|
cookies?: SetCookie[],
|
||||||
|
headers?: Headers
|
||||||
|
maxTimeout?: number
|
||||||
|
proxy?: any
|
||||||
|
}
|
||||||
|
|
||||||
|
interface BaseRequestAPICall extends BaseAPICall {
|
||||||
|
url: string
|
||||||
|
method?: HttpMethod
|
||||||
|
postData?: string
|
||||||
|
session?: string
|
||||||
|
userAgent?: string
|
||||||
|
maxTimeout?: number
|
||||||
|
cookies?: SetCookie[],
|
||||||
|
headers?: Headers
|
||||||
|
proxy?: any, // TODO: use interface not any
|
||||||
|
download?: boolean
|
||||||
|
returnOnlyCookies?: boolean
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
interface Routes {
|
||||||
|
[key: string]: (ctx: RequestContext, params: BaseAPICall) => void | Promise<void>
|
||||||
|
}
|
||||||
|
|
||||||
|
interface ChallengeResolutionResultT {
|
||||||
|
url: string
|
||||||
|
status: number,
|
||||||
|
headers?: Headers,
|
||||||
|
response: string,
|
||||||
|
cookies: object[]
|
||||||
|
userAgent: string
|
||||||
|
}
|
||||||
|
|
||||||
|
interface ChallengeResolutionT {
|
||||||
|
status?: string
|
||||||
|
message: string
|
||||||
|
result: ChallengeResolutionResultT
|
||||||
|
}
|
||||||
|
|
||||||
|
interface OverrideResolvers {
|
||||||
|
method?: (request: Request) => HttpMethod,
|
||||||
|
postData?: (request: Request) => string,
|
||||||
|
headers?: (request: Request) => Headers
|
||||||
|
}
|
||||||
|
|
||||||
|
type OverridesProps =
|
||||||
|
'method' |
|
||||||
|
'postData' |
|
||||||
|
'headers'
|
||||||
|
|
||||||
|
const CHALLENGE_SELECTORS = ['#trk_jschal_js', '.ray_id', '.attack-box']
|
||||||
|
const TOKEN_INPUT_NAMES = ['g-recaptcha-response', 'h-captcha-response']
|
||||||
|
|
||||||
|
async function interceptResponse(page: Page, callback: (payload: ChallengeResolutionT) => any) {
|
||||||
|
const client = await page.target().createCDPSession();
|
||||||
|
await client.send('Fetch.enable', {
|
||||||
|
patterns: [
|
||||||
|
{
|
||||||
|
urlPattern: '*',
|
||||||
|
resourceType: 'Document',
|
||||||
|
requestStage: 'Response',
|
||||||
|
},
|
||||||
|
],
|
||||||
|
});
|
||||||
|
|
||||||
|
client.on('Fetch.requestPaused', async (e) => {
|
||||||
|
log.debug('Fetch.requestPaused. Checking if the response has valid cookies')
|
||||||
|
let headers = e.responseHeaders || []
|
||||||
|
|
||||||
|
let cookies = await page.cookies();
|
||||||
|
log.debug(cookies)
|
||||||
|
|
||||||
|
if (cookies.filter((c: Cookie) => c.name === 'cf_clearance').length > 0) {
|
||||||
|
log.debug('Aborting request and return cookies. valid cookies found')
|
||||||
|
await client.send('Fetch.failRequest', {requestId: e.requestId, errorReason: 'Aborted'})
|
||||||
|
|
||||||
|
let status = 'ok'
|
||||||
|
let message = ''
|
||||||
|
const payload: ChallengeResolutionT = {
|
||||||
|
status,
|
||||||
|
message,
|
||||||
|
result: {
|
||||||
|
url: page.url(),
|
||||||
|
status: e.status,
|
||||||
|
headers: headers.reduce((a: any, x: { name: any; value: any }) => ({ ...a, [x.name]: x.value }), {}),
|
||||||
|
response: null,
|
||||||
|
cookies: cookies,
|
||||||
|
userAgent: ''
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
callback(payload);
|
||||||
|
} else {
|
||||||
|
log.debug('Continuing request. no valid cookies found')
|
||||||
|
await client.send('Fetch.continueRequest', {requestId: e.requestId})
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
async function resolveChallenge(ctx: RequestContext, { url, maxTimeout, proxy, download, returnOnlyCookies }: BaseRequestAPICall, page: Page): Promise<ChallengeResolutionT | void> {
|
||||||
|
|
||||||
|
maxTimeout = maxTimeout || 60000
|
||||||
|
let status = 'ok'
|
||||||
|
let message = ''
|
||||||
|
|
||||||
|
if (proxy) {
|
||||||
|
log.debug("Apply proxy");
|
||||||
|
if (proxy.username)
|
||||||
|
await page.authenticate({ username: proxy.username, password: proxy.password });
|
||||||
|
}
|
||||||
|
|
||||||
|
log.debug(`Navigating to... ${url}`)
|
||||||
|
let response = await page.goto(url, { waitUntil: 'domcontentloaded' })
|
||||||
|
|
||||||
|
// look for challenge
|
||||||
|
if (response.headers().server.startsWith('cloudflare')) {
|
||||||
|
log.info('Cloudflare detected')
|
||||||
|
|
||||||
|
if (await page.$('.cf-error-code')) {
|
||||||
|
await page.close()
|
||||||
|
return ctx.errorResponse('Cloudflare has blocked this request (Code 1020 Detected).')
|
||||||
|
}
|
||||||
|
|
||||||
|
if (response.status() > 400) {
|
||||||
|
// detect cloudflare wait 5s
|
||||||
|
for (const selector of CHALLENGE_SELECTORS) {
|
||||||
|
const cfChallengeElem = await page.$(selector)
|
||||||
|
if (cfChallengeElem) {
|
||||||
|
log.html(await page.content())
|
||||||
|
log.debug('Waiting for Cloudflare challenge...')
|
||||||
|
|
||||||
|
let interceptingResult: ChallengeResolutionT;
|
||||||
|
if (returnOnlyCookies) { //If we just want to get the cookies, intercept the response before we get the content/body (just cookies and headers)
|
||||||
|
await interceptResponse(page, async function(payload){
|
||||||
|
interceptingResult = payload;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: find out why these pages hang sometimes
|
||||||
|
while (Date.now() - ctx.startTimestamp < maxTimeout) {
|
||||||
|
await page.waitFor(1000)
|
||||||
|
try {
|
||||||
|
// catch exception timeout in waitForNavigation
|
||||||
|
response = await page.waitForNavigation({ waitUntil: 'domcontentloaded', timeout: 5000 })
|
||||||
|
} catch (error) { }
|
||||||
|
|
||||||
|
if (returnOnlyCookies && interceptingResult) {
|
||||||
|
await page.close();
|
||||||
|
return interceptingResult;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
// catch Execution context was destroyed
|
||||||
|
const cfChallengeElem = await page.$(selector)
|
||||||
|
if (!cfChallengeElem) { break }
|
||||||
|
log.debug('Found challenge element again...')
|
||||||
|
} catch (error)
|
||||||
|
{ }
|
||||||
|
|
||||||
|
response = await page.reload({ waitUntil: 'domcontentloaded' })
|
||||||
|
log.debug('Reloaded page...')
|
||||||
|
}
|
||||||
|
|
||||||
|
if (Date.now() - ctx.startTimestamp >= maxTimeout) {
|
||||||
|
ctx.errorResponse(`Maximum timeout reached. maxTimeout=${maxTimeout} (ms)`)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
log.debug('Validating HTML code...')
|
||||||
|
break
|
||||||
|
} else {
|
||||||
|
log.debug(`No '${selector}' challenge element detected.`)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// it seems some captcha pages return 200 sometimes
|
||||||
|
if (await page.$('input[name="cf_captcha_kind"]')) {
|
||||||
|
const captchaSolver = getCaptchaSolver()
|
||||||
|
if (captchaSolver) {
|
||||||
|
const captchaStartTimestamp = Date.now()
|
||||||
|
const challengeForm = await page.$('#challenge-form')
|
||||||
|
if (challengeForm) {
|
||||||
|
log.html(await page.content())
|
||||||
|
const captchaTypeElm = await page.$('input[name="cf_captcha_kind"]')
|
||||||
|
const cfCaptchaType: string = await captchaTypeElm.evaluate((e: any) => e.value)
|
||||||
|
const captchaType: CaptchaType = (CaptchaType as any)[cfCaptchaType]
|
||||||
|
if (!captchaType) { return ctx.errorResponse('Unknown captcha type!') }
|
||||||
|
|
||||||
|
let sitekey = null
|
||||||
|
if (captchaType != 'hCaptcha' && process.env.CAPTCHA_SOLVER != 'hcaptcha-solver') {
|
||||||
|
const sitekeyElem = await page.$('*[data-sitekey]')
|
||||||
|
if (!sitekeyElem) { return ctx.errorResponse('Could not find sitekey!') }
|
||||||
|
sitekey = await sitekeyElem.evaluate((e) => e.getAttribute('data-sitekey'))
|
||||||
|
}
|
||||||
|
|
||||||
|
log.info('Waiting to receive captcha token to bypass challenge...')
|
||||||
|
const token = await captchaSolver({
|
||||||
|
url,
|
||||||
|
sitekey,
|
||||||
|
type: captchaType
|
||||||
|
})
|
||||||
|
|
||||||
|
if (!token) {
|
||||||
|
await page.close()
|
||||||
|
return ctx.errorResponse('Token solver failed to return a token.')
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const name of TOKEN_INPUT_NAMES) {
|
||||||
|
const input = await page.$(`textarea[name="${name}"]`)
|
||||||
|
if (input) { await input.evaluate((e: HTMLTextAreaElement, token) => { e.value = token }, token) }
|
||||||
|
}
|
||||||
|
|
||||||
|
// ignore preset event listeners on the form
|
||||||
|
await page.evaluate(() => {
|
||||||
|
window.addEventListener('submit', (e) => { event.stopPropagation() }, true)
|
||||||
|
})
|
||||||
|
|
||||||
|
// it seems some sites obfuscate their challenge forms
|
||||||
|
// TODO: look into how they do it and come up with a more solid solution
|
||||||
|
try {
|
||||||
|
// this element is added with js and we want to wait for all the js to load before submitting
|
||||||
|
await page.waitForSelector('#challenge-form [type=submit]', { timeout: 5000 })
|
||||||
|
} catch (err) {
|
||||||
|
if (err instanceof TimeoutError) {
|
||||||
|
log.debug(`No '#challenge-form [type=submit]' element detected.`)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// calculates the time it took to solve the captcha
|
||||||
|
const captchaSolveTotalTime = Date.now() - captchaStartTimestamp
|
||||||
|
|
||||||
|
// generates a random wait time
|
||||||
|
const randomWaitTime = (Math.floor(Math.random() * 20) + 10) * 1000
|
||||||
|
|
||||||
|
// waits, if any, time remaining to appear human but stay as fast as possible
|
||||||
|
const timeLeft = randomWaitTime - captchaSolveTotalTime
|
||||||
|
if (timeLeft > 0) { await page.waitFor(timeLeft) }
|
||||||
|
|
||||||
|
let interceptingResult: ChallengeResolutionT;
|
||||||
|
if (returnOnlyCookies) { //If we just want to get the cookies, intercept the response before we get the content/body (just cookies and headers)
|
||||||
|
await interceptResponse(page, async function(payload){
|
||||||
|
interceptingResult = payload;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// submit captcha response
|
||||||
|
challengeForm.evaluate((e: HTMLFormElement) => e.submit())
|
||||||
|
response = await page.waitForNavigation({ waitUntil: 'domcontentloaded' })
|
||||||
|
|
||||||
|
if (returnOnlyCookies && interceptingResult) {
|
||||||
|
await page.close();
|
||||||
|
return interceptingResult;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
status = 'warning'
|
||||||
|
message = 'Captcha detected but no automatic solver is configured.'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const payload: ChallengeResolutionT = {
|
||||||
|
status,
|
||||||
|
message,
|
||||||
|
result: {
|
||||||
|
url: page.url(),
|
||||||
|
status: response.status(),
|
||||||
|
headers: response.headers(),
|
||||||
|
response: null,
|
||||||
|
cookies: await page.cookies(),
|
||||||
|
userAgent: await page.evaluate(() => navigator.userAgent)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (download) {
|
||||||
|
// for some reason we get an error unless we reload the page
|
||||||
|
// has something to do with a stale buffer and this is the quickest
|
||||||
|
// fix since I am short on time
|
||||||
|
response = await page.goto(url, { waitUntil: 'domcontentloaded' })
|
||||||
|
payload.result.response = (await response.buffer()).toString('base64')
|
||||||
|
} else {
|
||||||
|
payload.result.response = await page.content()
|
||||||
|
}
|
||||||
|
|
||||||
|
// make sure the page is closed because if it isn't and error will be thrown
|
||||||
|
// when a user uses a temporary session, the browser make be quit before
|
||||||
|
// the page is properly closed.
|
||||||
|
await page.close()
|
||||||
|
|
||||||
|
return payload
|
||||||
|
}
|
||||||
|
|
||||||
|
function mergeSessionWithParams({ defaults }: SessionsCacheItem, params: BaseRequestAPICall): BaseRequestAPICall {
|
||||||
|
const copy = { ...defaults, ...params }
|
||||||
|
|
||||||
|
// custom merging logic
|
||||||
|
copy.headers = { ...defaults.headers || {}, ...params.headers || {} } || null
|
||||||
|
|
||||||
|
return copy
|
||||||
|
}
|
||||||
|
|
||||||
|
async function setupPage(ctx: RequestContext, params: BaseRequestAPICall, browser: Browser): Promise<Page> {
|
||||||
|
const page = await browser.newPage()
|
||||||
|
|
||||||
|
// merge session defaults with params
|
||||||
|
const { method, postData, userAgent, headers, cookies } = params
|
||||||
|
|
||||||
|
let overrideResolvers: OverrideResolvers = {}
|
||||||
|
|
||||||
|
if (method !== 'GET') {
|
||||||
|
log.debug(`Setting method to ${method}`)
|
||||||
|
overrideResolvers.method = request => method
|
||||||
|
}
|
||||||
|
|
||||||
|
if (postData) {
|
||||||
|
log.debug(`Setting body data to ${postData}`)
|
||||||
|
overrideResolvers.postData = request => postData
|
||||||
|
}
|
||||||
|
|
||||||
|
if (userAgent) {
|
||||||
|
log.debug(`Using custom UA: ${userAgent}`)
|
||||||
|
await page.setUserAgent(userAgent)
|
||||||
|
}
|
||||||
|
|
||||||
|
if (headers) {
|
||||||
|
log.debug(`Adding custom headers: ${JSON.stringify(headers, null, 2)}`,)
|
||||||
|
overrideResolvers.headers = request => Object.assign(request.headers(), headers)
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cookies) {
|
||||||
|
log.debug(`Setting custom cookies: ${JSON.stringify(cookies, null, 2)}`,)
|
||||||
|
await page.setCookie(...cookies)
|
||||||
|
}
|
||||||
|
|
||||||
|
// if any keys have been set on the object
|
||||||
|
if (Object.keys(overrideResolvers).length > 0) {
|
||||||
|
log.debug(overrideResolvers)
|
||||||
|
let callbackRunOnce = false
|
||||||
|
const callback = (request: Request) => {
|
||||||
|
|
||||||
|
if (callbackRunOnce || !request.isNavigationRequest()) {
|
||||||
|
request.continue()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
callbackRunOnce = true
|
||||||
|
const overrides: Overrides = {}
|
||||||
|
|
||||||
|
Object.keys(overrideResolvers).forEach((key: OverridesProps) => {
|
||||||
|
// @ts-ignore
|
||||||
|
overrides[key] = overrideResolvers[key](request)
|
||||||
|
});
|
||||||
|
|
||||||
|
log.debug(overrides)
|
||||||
|
|
||||||
|
request.continue(overrides)
|
||||||
|
}
|
||||||
|
|
||||||
|
await page.setRequestInterception(true)
|
||||||
|
page.on('request', callback)
|
||||||
|
}
|
||||||
|
|
||||||
|
return page
|
||||||
|
}
|
||||||
|
|
||||||
|
const browserRequest = async (ctx: RequestContext, params: BaseRequestAPICall) => {
|
||||||
|
const oneTimeSession = params.session === undefined
|
||||||
|
const sessionId = params.session || UUIDv1()
|
||||||
|
const session = oneTimeSession
|
||||||
|
? await sessions.create(sessionId, {
|
||||||
|
userAgent: params.userAgent,
|
||||||
|
oneTimeSession
|
||||||
|
})
|
||||||
|
: sessions.get(sessionId)
|
||||||
|
|
||||||
|
if (session === false) {
|
||||||
|
return ctx.errorResponse('This session does not exist. Use \'list_sessions\' to see all the existing sessions.')
|
||||||
|
}
|
||||||
|
|
||||||
|
params = mergeSessionWithParams(session, params)
|
||||||
|
|
||||||
|
try {
|
||||||
|
const page = await setupPage(ctx, params, session.browser)
|
||||||
|
const data = await resolveChallenge(ctx, params, page)
|
||||||
|
|
||||||
|
if (data) {
|
||||||
|
const { status } = data
|
||||||
|
delete data.status
|
||||||
|
ctx.successResponse(data.message, {
|
||||||
|
...(oneTimeSession ? {} : { session: sessionId }),
|
||||||
|
...(status ? { status } : {}),
|
||||||
|
solution: data.result
|
||||||
|
})
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
log.error(error)
|
||||||
|
return ctx.errorResponse("Unable to process browser request")
|
||||||
|
} finally {
|
||||||
|
if (oneTimeSession) { sessions.destroy(sessionId) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export const routes: Routes = {
|
||||||
|
'sessions.create': async (ctx, { session, ...options }: SessionsCreateAPICall) => {
|
||||||
|
session = session || UUIDv1()
|
||||||
|
const { browser } = await sessions.create(session, options)
|
||||||
|
if (browser) { ctx.successResponse('Session created successfully.', { session }) }
|
||||||
|
},
|
||||||
|
'sessions.list': (ctx) => {
|
||||||
|
ctx.successResponse(null, { sessions: sessions.list() })
|
||||||
|
},
|
||||||
|
'sessions.destroy': async (ctx, { session }: BaseSessionsAPICall) => {
|
||||||
|
if (await sessions.destroy(session)) { return ctx.successResponse('The session has been removed.') }
|
||||||
|
ctx.errorResponse('This session does not exist.')
|
||||||
|
},
|
||||||
|
'request.get': async (ctx, params: BaseRequestAPICall) => {
|
||||||
|
params.method = 'GET'
|
||||||
|
if (params.postData) {
|
||||||
|
return ctx.errorResponse('Cannot use "postBody" when sending a GET request.')
|
||||||
|
}
|
||||||
|
await browserRequest(ctx, params)
|
||||||
|
},
|
||||||
|
'request.post': async (ctx, params: BaseRequestAPICall) => {
|
||||||
|
params.method = 'POST'
|
||||||
|
|
||||||
|
if (!params.postData) {
|
||||||
|
return ctx.errorResponse('Must send param "postBody" when sending a POST request.')
|
||||||
|
}
|
||||||
|
|
||||||
|
await browserRequest(ctx, params)
|
||||||
|
},
|
||||||
|
'request.cookies': async (ctx, params: BaseRequestAPICall) => {
|
||||||
|
params.returnOnlyCookies = true
|
||||||
|
params.method = 'GET'
|
||||||
|
if (params.postData) {
|
||||||
|
return ctx.errorResponse('Cannot use "postBody" when sending a GET request.')
|
||||||
|
}
|
||||||
|
await browserRequest(ctx, params)
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
export default async function Router(ctx: RequestContext, params: BaseAPICall): Promise<void> {
|
||||||
|
const route = routes[params.cmd]
|
||||||
|
if (route) { return await route(ctx, params) }
|
||||||
|
return ctx.errorResponse(`The command '${params.cmd}' is invalid.`)
|
||||||
|
}
|
139
src/session.ts
Normal file
139
src/session.ts
Normal file
@ -0,0 +1,139 @@
|
|||||||
|
import * as os from 'os'
|
||||||
|
import * as path from 'path'
|
||||||
|
import * as fs from 'fs'
|
||||||
|
|
||||||
|
import puppeteer from 'puppeteer-extra'
|
||||||
|
import { LaunchOptions, Browser, Headers, SetCookie } from 'puppeteer'
|
||||||
|
|
||||||
|
import log from './log'
|
||||||
|
import { deleteFolderRecursive, sleep, removeEmptyFields } from './utils'
|
||||||
|
|
||||||
|
interface SessionPageDefaults {
|
||||||
|
headers?: Headers
|
||||||
|
userAgent?: string
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface SessionsCacheItem {
|
||||||
|
browser: Browser
|
||||||
|
userDataDir?: string
|
||||||
|
defaults: SessionPageDefaults
|
||||||
|
}
|
||||||
|
|
||||||
|
interface SessionsCache {
|
||||||
|
[key: string]: SessionsCacheItem
|
||||||
|
}
|
||||||
|
|
||||||
|
interface SessionCreateOptions {
|
||||||
|
oneTimeSession?: boolean
|
||||||
|
userAgent?: string
|
||||||
|
cookies?: SetCookie[]
|
||||||
|
headers?: Headers,
|
||||||
|
maxTimeout?: number
|
||||||
|
proxy?: any
|
||||||
|
}
|
||||||
|
|
||||||
|
const sessionCache: SessionsCache = {}
|
||||||
|
|
||||||
|
// setting "user-agent-override" evasion is not working for us because it can't be changed
|
||||||
|
// in each request. we set the user-agent in the browser args instead
|
||||||
|
puppeteer.use(require('puppeteer-extra-plugin-stealth')())
|
||||||
|
|
||||||
|
function userDataDirFromId(id: string): string {
|
||||||
|
return path.join(os.tmpdir(), `/puppeteer_chrome_profile_${id}`)
|
||||||
|
}
|
||||||
|
|
||||||
|
function prepareBrowserProfile(id: string): string {
|
||||||
|
// TODO: maybe pass SessionCreateOptions for loading later?
|
||||||
|
const userDataDir = userDataDirFromId(id)
|
||||||
|
|
||||||
|
if (!fs.existsSync(userDataDir)) {
|
||||||
|
fs.mkdirSync(userDataDir, { recursive: true })
|
||||||
|
}
|
||||||
|
|
||||||
|
return userDataDir
|
||||||
|
}
|
||||||
|
|
||||||
|
export default {
|
||||||
|
create: async (id: string, { cookies, oneTimeSession, userAgent, headers, maxTimeout, proxy }: SessionCreateOptions): Promise<SessionsCacheItem> => {
|
||||||
|
let args = ['--no-sandbox', '--disable-setuid-sandbox'];
|
||||||
|
if (proxy && proxy.url) {
|
||||||
|
args.push(`--proxy-server=${proxy.url}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const puppeteerOptions: LaunchOptions = {
|
||||||
|
product: 'chrome',
|
||||||
|
headless: process.env.HEADLESS !== 'false',
|
||||||
|
args
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!oneTimeSession) {
|
||||||
|
log.debug('Creating userDataDir for session.')
|
||||||
|
puppeteerOptions.userDataDir = prepareBrowserProfile(id)
|
||||||
|
}
|
||||||
|
|
||||||
|
log.debug('Launching headless browser...')
|
||||||
|
|
||||||
|
// TODO: maybe access env variable?
|
||||||
|
// TODO: sometimes browser instances are created and not connected to correctly.
|
||||||
|
// how do we handle/quit those instances inside Docker?
|
||||||
|
let launchTries = 3
|
||||||
|
let browser;
|
||||||
|
|
||||||
|
while (0 <= launchTries--) {
|
||||||
|
try {
|
||||||
|
browser = await puppeteer.launch(puppeteerOptions)
|
||||||
|
break
|
||||||
|
} catch (e) {
|
||||||
|
if (e.message !== 'Failed to launch the browser process!')
|
||||||
|
throw e
|
||||||
|
log.warn('Failed to open browser, trying again...')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!browser) { throw Error(`Failed to lanch browser 3 times in a row.`) }
|
||||||
|
|
||||||
|
if (cookies) {
|
||||||
|
const page = await browser.newPage()
|
||||||
|
await page.setCookie(...cookies)
|
||||||
|
}
|
||||||
|
|
||||||
|
sessionCache[id] = {
|
||||||
|
browser,
|
||||||
|
userDataDir: puppeteerOptions.userDataDir,
|
||||||
|
defaults: removeEmptyFields({
|
||||||
|
userAgent,
|
||||||
|
headers,
|
||||||
|
maxTimeout
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
return sessionCache[id]
|
||||||
|
},
|
||||||
|
|
||||||
|
list: (): string[] => Object.keys(sessionCache),
|
||||||
|
|
||||||
|
// TODO: create a sessions.close that doesn't rm the userDataDir
|
||||||
|
|
||||||
|
destroy: async (id: string): Promise<boolean> => {
|
||||||
|
const { browser, userDataDir } = sessionCache[id]
|
||||||
|
if (browser) {
|
||||||
|
await browser.close()
|
||||||
|
delete sessionCache[id]
|
||||||
|
if (userDataDir) {
|
||||||
|
const userDataDirPath = userDataDirFromId(id)
|
||||||
|
try {
|
||||||
|
// for some reason this keeps an error from being thrown in Windows, figures
|
||||||
|
await sleep(5000)
|
||||||
|
deleteFolderRecursive(userDataDirPath)
|
||||||
|
} catch (e) {
|
||||||
|
console.error(e)
|
||||||
|
throw Error(`Error deleting browser session folder. ${e.message}`)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
},
|
||||||
|
|
||||||
|
get: (id: string): SessionsCacheItem | false => sessionCache[id] && sessionCache[id] || false
|
||||||
|
}
|
9
src/types.ts
Normal file
9
src/types.ts
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
import { IncomingMessage, ServerResponse } from 'http';
|
||||||
|
|
||||||
|
export interface RequestContext {
|
||||||
|
req: IncomingMessage
|
||||||
|
res: ServerResponse
|
||||||
|
startTimestamp: number
|
||||||
|
errorResponse: (msg: string) => void,
|
||||||
|
successResponse: (msg: string, extendedProperties?: object) => void
|
||||||
|
}
|
31
src/utils.ts
Normal file
31
src/utils.ts
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
import * as fs from 'fs'
|
||||||
|
import * as Path from 'path'
|
||||||
|
import { promisify } from 'util'
|
||||||
|
|
||||||
|
export const sleep = promisify(setTimeout)
|
||||||
|
|
||||||
|
// recursive fs.rmdir needs node version 12:
|
||||||
|
// https://github.com/ngosang/FlareSolverr/issues/5#issuecomment-655572712
|
||||||
|
export function deleteFolderRecursive(path: string) {
|
||||||
|
if (fs.existsSync(path)) {
|
||||||
|
fs.readdirSync(path).forEach((file) => {
|
||||||
|
const curPath = Path.join(path, file)
|
||||||
|
if (fs.lstatSync(curPath).isDirectory()) { // recurse
|
||||||
|
deleteFolderRecursive(curPath)
|
||||||
|
} else { // delete file
|
||||||
|
fs.unlinkSync(curPath)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
fs.rmdirSync(path)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export const removeEmptyFields = (o: Record<string, any>): typeof o => {
|
||||||
|
const r: typeof o = {}
|
||||||
|
for (const k in o) {
|
||||||
|
if (o[k] !== undefined) {
|
||||||
|
r[k] = o[k]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return r
|
||||||
|
}
|
21
tsconfig.json
Normal file
21
tsconfig.json
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
{
|
||||||
|
"compilerOptions": {
|
||||||
|
"moduleResolution": "node",
|
||||||
|
"target": "es2017",
|
||||||
|
"noImplicitAny": true,
|
||||||
|
"removeComments": true,
|
||||||
|
"preserveConstEnums": true,
|
||||||
|
"allowSyntheticDefaultImports": true,
|
||||||
|
"resolveJsonModule": true,
|
||||||
|
"lib": [
|
||||||
|
"es2015", "dom"
|
||||||
|
],
|
||||||
|
"module": "commonjs",
|
||||||
|
"outDir": "dist",
|
||||||
|
"sourceMap": true
|
||||||
|
},
|
||||||
|
"include": [
|
||||||
|
"src", "node_modules/@types/puppeteer/index.d.ts"
|
||||||
|
],
|
||||||
|
"exclude": ["node_modules"]
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user