Compare commits

...

160 Commits

Author SHA1 Message Date
ilike2burnthing
3dd3e7559d
u_c: remove apparent c&p typo
https://github.com/ultrafunkamsterdam/undetected-chromedriver/pull/1933
2025-06-06 08:04:00 +01:00
ilike2burnthing
f21c1d51bc
Restore example service file. #1204 2025-06-04 23:13:00 +01:00
ilike2burnthing
957347f73a
Bump version 3.3.24 (#1505) 2025-06-04 19:02:06 +01:00
ilike2burnthing
c55080b0ec
Remove hidden character 2025-06-04 18:54:48 +01:00
ilike2burnthing
639bfca020
Bump version 3.3.23 (#1504) 2025-06-04 18:51:31 +01:00
ilike2burnthing
237694df76
Update base image to bookworm. resolves #1503 2025-06-04 18:44:17 +01:00
ilike2burnthing
6e5d6f1795
Bump version 3.3.22 (#1500) 2025-06-03 05:54:25 +01:00
ilike2burnthing
30804a86e5
Bump Chromium to v137 for build 2025-06-03 05:39:46 +01:00
ilike2burnthing
e0bdaf7745
Don't open devtools 2025-06-03 05:38:55 +01:00
ilike2burnthing
795365dbe4
Change from click to keys
credit to @sh4dowb - #1497
2025-06-03 05:38:06 +01:00
ilike2burnthing
ce5369dd41
Update bug_report.yml
fix accidental delete
2025-03-04 02:05:23 +00:00
ilike2burnthing
600b09d498
Update bug_report.yml 2025-03-04 02:04:29 +00:00
ilike2burnthing
d1f19405a1
Update README.md. closes #1267 2025-01-21 20:27:36 +00:00
ilike2burnthing
82a1366d34
Remove dead directory link from readme. resolve #1436 2025-01-20 17:37:14 +00:00
dependabot[bot]
a2fe9e7776
Bump waitress from 2.1.2 to 3.0.1 (#1418)
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-11-29 01:53:34 +00:00
ilike2burnthing
6cc628df9e
Disable search engine choice screen 2024-11-24 18:31:50 +00:00
Eduard Tykhoniuk
8b1851eeb1
Fix headless=false stalling 2024-11-24 18:30:35 +00:00
ilike2burnthing
54668a11e7
bug_report: lint fix again 2024-09-28 08:08:49 +01:00
ilike2burnthing
701d8fb4ff
bug_report: change to input 2024-09-28 08:07:56 +01:00
ilike2burnthing
39a265ccb8
bug_report: lint fix 2024-09-28 07:58:14 +01:00
ilike2burnthing
e32b247014
bug_report: default=0 2024-09-28 07:57:11 +01:00
ilike2burnthing
0d8fe8fe50
bug_report: no booleans allowed 2024-09-28 07:51:57 +01:00
ilike2burnthing
718da3a36f
bug_report: add 'no really' drop down
maybe this will help... 🙄
2024-09-28 07:50:11 +01:00
ilike2burnthing
a798561338
Bump requests version
*.0 was yanked
2024-07-30 02:38:13 +01:00
Bogdan
eb680efc90
Don't build docker images for PRs from forks (#1281) 2024-07-20 22:08:40 +03:00
ilike2burnthing
0f8f0bec25
revert and bump action version 2024-07-20 19:41:49 +01:00
ilike2burnthing
3d9bc5627b
Change to GITHUB_TOKEN for GHRC login 2024-07-20 14:21:34 +01:00
ilike2burnthing
dd7eaee2e3
Bump requirements
resolves Dependabot alerts
2024-07-12 17:11:40 +01:00
ilike2burnthing
031177bbdb
Bump version 3.3.21 (#1240) 2024-06-26 02:14:25 +01:00
Bogdan
a8644532a1
Escape values for generated form used in request.post (#1236)
and build docker images for PRs
2024-06-26 02:04:59 +01:00
ilike2burnthing
e96161c873
Add challenge selector to catch reloading page on non-English systems. resolves #1237 2024-06-25 22:32:06 +01:00
ilike2burnthing
5a1f25cd52
Bump version 3.3.20 (#1229) 2024-06-21 22:21:37 +01:00
tenettow
a2c0e4348e
Update Cloudflare challenge and checkbox selectors (#1224) 2024-06-21 22:07:03 +01:00
ilike2burnthing
2ecf88895b
Check not running in Docker before logging version_main error 2024-06-15 08:37:42 +01:00
ilike2burnthing
984368edb5
maxTimeout should always be int. resolves #1212 2024-06-15 05:41:45 +01:00
21hsmw
6c1d78cb84
Fix occasional headless issue on Linux when set to "false" (#1199)
* Fix occasional headless issue on Linux when set to "false"

- Add a variable containing the current platform
- Check if the platform is "nt" (Windows) before closing the driver

* Update CHANGELOG.md

---------

Co-authored-by: ilike2burnthing <59480337+ilike2burnthing@users.noreply.github.com>
2024-05-24 17:33:46 +01:00
ilike2burnthing
5a2c61601e
Fix Chrome v124+ not closing on Windows. resolves #1161 (#1193) 2024-05-20 00:52:55 +01:00
ilike2burnthing
c304da2964
Update README.md 2024-04-22 23:30:52 +01:00
ilike2burnthing
b811412699
Fix LANG ENV for Linux. #1036 2024-04-20 03:41:53 +01:00
Ross Patterson
0bb8de144f
Add Compose V2 command to readme (#1154)
Co-authored-by: root@library.moodysalon.net <root@library.moodysalon.net>
Co-authored-by: ilike2burnthing <59480337+ilike2burnthing@users.noreply.github.com>
2024-04-18 05:07:06 +01:00
ilike2burnthing
38166dfaa0
Bump version 3.3.17 (#1147) 2024-04-09 20:31:21 +01:00
ilike2burnthing
8dea0ed017
Fix file descriptor leak in service on quit(). resolves #983
credit: @zkulis - https://github.com/ultrafunkamsterdam/undetected-chromedriver/pull/1812
2024-04-09 20:27:42 +01:00
francisco-lafe
20cd2944a7
Update README.md (#1127)
Co-authored-by: ilike2burnthing <59480337+ilike2burnthing@users.noreply.github.com>
2024-03-19 18:05:11 +00:00
ilike2burnthing
fd773e5909
Bump version 3.3.16 (#1105) 2024-02-28 21:38:53 +00:00
Justin Kromlinger
35c7bff3c8
Use headless configuration properly (#1104) 2024-02-28 20:36:38 +00:00
Raphaël
afdc1c7a8e
Add FreeBSD support (#1054) 2024-02-28 02:45:04 +00:00
ilike2burnthing
0bc7a4498c
Update README.md 2024-02-23 05:01:00 +00:00
Xewdy
c5a5f6d65e
Add platform specifiers to dependencies (#877) 2024-02-21 21:14:25 +00:00
ilike2burnthing
aaf29be8e1
Bump version 3.3.15 (#1088) 2024-02-20 23:48:23 +00:00
Tadas Gedgaudas
800866d033
Fix looping challenges. #1036 (#1065)
Co-authored-by: Tadas Gedgaudas <tg@infrahub.io>
Co-authored-by: ilike2burnthing <59480337+ilike2burnthing@users.noreply.github.com>
2024-02-20 23:41:02 +00:00
ilike2burnthing
043f18b231
Bump UC version to 3.5.5 2024-02-17 19:28:06 +00:00
ilike2burnthing
d21a332519
Hotfix 2 - bad Chromium build, instances failed to terminate (#1072) 2024-02-17 05:53:45 +00:00
ilike2burnthing
3ca6d08f41
Hotfix for Linux build - some Chrome files no longer exist (#1071) 2024-02-17 01:15:32 +00:00
ilike2burnthing
227bd7ac72
Update Chrome downloads (#1070) 2024-02-17 00:50:14 +00:00
ilike2burnthing
e6a08584c0
Update README.md
thanks @kimboslice99
2024-02-16 04:35:37 +00:00
ilike2burnthing
df06d13cf8
Update README.md 2024-01-12 23:38:18 +00:00
ilike2burnthing
993b8c41ac
Fix too many open files error. resolves #983 (#1033) 2024-01-07 21:22:14 +00:00
ilike2burnthing
a4d42d7834
Remove unnecessary comment 2023-12-17 00:33:06 +00:00
21hsmw
1c855b8af0
Fix looping challenges and invalid cookies. resolves #1006 (#1010)
Co-authored-by: ilike2burnthing <59480337+ilike2burnthing@users.noreply.github.com>
2023-12-15 22:11:58 +00:00
ilike2burnthing
745c69491f
Bump version 3.3.11 (#999) 2023-12-11 20:56:14 +00:00
txtsd
f7e316fd5a
updates: UC 3.5.4 & Selenium 4.15.2 (#970)
Co-authored-by: GaspardRuan <1039553124@qq.com>
Co-authored-by: ilike2burnthing <59480337+ilike2burnthing@users.noreply.github.com>
2023-12-11 20:51:16 +00:00
ilike2burnthing
16c8ab5f3d
Update README.md 2023-11-14 07:54:09 +00:00
ilike2burnthing
7af311b73c
Bump version 3.3.10 (#969) 2023-11-14 04:04:42 +00:00
ilike2burnthing
daec97532d
Update README.md 2023-11-14 04:00:01 +00:00
ilike2burnthing
8d7ed48f21
Add LANG ENV. resolves #951 2023-11-14 03:56:57 +00:00
ilike2burnthing
220f2599ae
Bump version 3.3.9 (#963) 2023-11-13 07:17:28 +00:00
ilike2burnthing
d772cf3f50
Fix for Docker build, capture TypeError. Fixes #962 2023-11-13 07:14:13 +00:00
ilike2burnthing
ab4365894b
Bump version 3.3.8 (#961) 2023-11-13 04:55:49 +00:00
ilike2burnthing
3fa9631559
Fix "OSError: [WinError 6] The handle is invalid" on exit 2023-11-13 04:28:19 +00:00
ilike2burnthing
04858c22fd
Support running Chrome 119 from source (#960) 2023-11-13 04:23:06 +00:00
Nabi KaramAliZadeh
5085ca6990
Fix headless=true for Chrome 117+. Fixes #910 (#921) 2023-11-13 04:03:56 +00:00
ilike2burnthing
cd4df1e061
Bump version 3.3.7 (#944) 2023-11-05 14:41:12 +00:00
ilike2burnthing
6c79783f7c
Bump version 3.3.6 (#905) 2023-09-15 20:40:56 +01:00
ilike2burnthing
4139e8d47c
Update checkbox selector, again 2023-09-15 20:37:26 +01:00
zax2002
1942eb5fdc
Typo in README (#901) 2023-09-14 07:41:59 +01:00
ilike2burnthing
401bf5be76
Bump version 3.3.5 (#902) 2023-09-13 10:28:02 +01:00
ilike2burnthing
d8ffdd3061
Change checkbox selector, support language other than English. resolves #891 2023-09-13 10:19:19 +01:00
ilike2burnthing
2d66590b08
Bump version 3.3.4 (#884) 2023-09-02 12:30:21 +01:00
Zachary Hampton
a217510dc7
Update checkbox selector (#882) 2023-09-02 12:24:25 +01:00
ilike2burnthing
553bd8ab4f
Bump version 3.3.3 (#879) 2023-08-31 20:02:17 +01:00
ilike2burnthing
1b197c3e53
Update undetected_chromedriver to v3.5.3 (#860) 2023-08-31 19:56:06 +01:00
ngosang
fd308f01be Bump version 3.3.2 2023-08-03 10:00:16 +02:00
ngosang
b5eef32615 Fix URL domain in Prometheus exporter 2023-08-03 09:02:46 +02:00
ngosang
644a843d89 Bump version 3.3.1 2023-08-03 08:13:01 +02:00
ngosang
82e1c94c6f Fix HEADLESS=false in Windows binary 2023-08-03 08:10:14 +02:00
ngosang
fbc71516f5 Fix for Cloudflare verify checkbox 2023-08-03 07:28:58 +02:00
ngosang
40bd1cba4c Fix Prometheus exporter for management and health endpoints 2023-08-03 06:36:31 +02:00
ngosang
d1588c1156 Remove misleading stack trace when a button is not found 2023-08-03 05:45:38 +02:00
ngosang
b4ad583baa Revert "Update base Docker image to Debian Bookworm"
This reverts commit 0edc50e27152a0d9497b3f4dd8c83cce7fb2735b.
2023-08-03 05:19:56 +02:00
ngosang
5d31e551cc Revert "Install Chromium 115 from Debian testing"
This reverts commit 2aa095ed5d8053242ef82ba2fbab51ddfd97623f.
2023-08-03 05:19:27 +02:00
ngosang
d92845f34f Bump version 3.3.0 2023-08-02 20:10:35 +02:00
ngosang
5d3b73ea9d Add more traces in build_package.py 2023-08-02 20:05:42 +02:00
ngosang
2aa095ed5d Install Chromium 115 from Debian testing 2023-08-02 19:30:39 +02:00
ngosang
687c8f75ae Update pyinstaller 5.13.0 2023-08-02 19:30:03 +02:00
ngosang
22ed3d324b Fix for new Cloudflare detection. Thanks @cedric-bour for #845 2023-08-02 19:29:44 +02:00
ngosang
5ba9ef03f3 Update Selenium 4.11.2 2023-08-02 19:23:08 +02:00
ngosang
d2e144ea12 Implement Prometheus metrics 2023-07-23 21:52:06 +02:00
ngosang
313fb2c14b Add support for proxy authentication username/password. Thanks @jacobprice808 2023-07-23 19:46:46 +02:00
ngosang
6d69f40b58 Update Chromium 115 in binary packages 2023-07-23 19:46:18 +02:00
ngosang
a1c36f60d2 Fix for Chrome / Chromium version > 114 2023-07-23 19:46:18 +02:00
ngosang
0edc50e271 Update base Docker image to Debian Bookworm 2023-07-23 19:46:18 +02:00
ngosang
f4a4baa57c Update Selenium 4.10.0 2023-07-23 19:46:18 +02:00
ngosang
f7e434c6e3 Simplify 'Verify you are human' resolver. Related #811 2023-07-23 19:46:15 +02:00
Maksim Kurnosenko
7728f2ab31
Update undetected_chromedriver to v3.5.0 (#803)
Co-authored-by: ilike2burnthing <59480337+ilike2burnthing@users.noreply.github.com>
2023-07-17 18:46:43 +01:00
Garfield69
c920bea4ca Update .gitignore 2023-07-17 13:50:11 +12:00
ilike2burnthing
a785f83034
Update CHANGELOG.md 2023-07-16 23:46:31 +01:00
ilike2burnthing
b42c22f5b1
Bump version 3.2.2 2023-07-16 23:46:25 +01:00
Tyler Hagstrom
9c62410a8b
Workaround for updated 'verify your are human' check (#816)
Co-authored-by: ilike2burnthing <59480337+ilike2burnthing@users.noreply.github.com>
2023-07-16 23:42:41 +01:00
ngosang
b8768ae17d Bump version 3.2.1 2023-06-10 19:27:55 +02:00
ngosang
9b2c602a1f Kill dead Chrome processes in Windows 2023-06-10 19:26:28 +02:00
ngosang
8316350b98 Fix Chrome GL erros in ASUSTOR NAS 2023-06-10 18:49:09 +02:00
ilike2burnthing
33307ce461
bug_report: add troubleshooting check 2023-06-04 04:08:25 +01:00
ngosang
cedb7bc54e Bump version 3.2.0 2023-05-23 23:13:31 +02:00
ngosang
6ecaf2362c Support cookies param in request cmd 2023-05-23 22:39:07 +02:00
ngosang
3c97c9603a Add integration tests for requests with proxy param 2023-05-23 22:13:52 +02:00
NyaMisty
efaa5f31b6
Support proxy in request and session cmds (#754) 2023-05-23 21:53:13 +02:00
ngosang
4db85a2d0f Update Python dependencies 2023-05-23 20:18:06 +02:00
ngosang
66b9db21e5 Fix Chromium exec permissions in Linux package 2023-05-21 21:09:06 +02:00
ngosang
ab0fe58d4a Bump version 3.1.2 2023-04-02 21:06:52 +02:00
ngosang
f68ddb7573 Bump Selenium dependency 2023-04-02 21:05:07 +02:00
ngosang
ac77110578 Remove redundant artifact from Windows binary package 2023-04-02 21:04:34 +02:00
Rawand Ahmed Shaswar
a9d1a2de2d
Fix headless mode in mac OS (#750) 2023-04-02 20:49:18 +02:00
ngosang
ab5f14d6c3 Bump version 3.1.1 2023-03-25 22:20:56 +01:00
ngosang
e0bf02fb8b Distribute binary executables in compressed package 2023-03-25 22:19:26 +01:00
ilike2burnthing
82a1cd835a
Add icon for binary executable (#739) 2023-03-25 21:35:37 +01:00
ngosang
7017715e21 Include information about supported architectures in the readme 2023-03-25 21:32:17 +01:00
ngosang
ae18559db1 Check Python version on start 2023-03-25 21:23:06 +01:00
ngosang
2680521008 Bump version 3.1.0 2023-03-20 23:17:05 +01:00
ngosang
2297bab185 Update changelog 2023-03-20 23:16:39 +01:00
ngosang
8d9bac9dd4 Build binaries for Linux x64 and Windows x64 2023-03-20 22:30:52 +01:00
ngosang
30ccf18e85 Several fixes in Sessions 2023-03-20 17:06:16 +01:00
ngosang
a15d041a0c Fix Waitress server error with asyncore_use_poll=true. Resolves #680 2023-03-20 16:27:25 +01:00
Martino Mensio
c6c74e7c9d Add Fairlane challenge selector (#723) 2023-03-20 16:27:24 +01:00
Artemiy Ryabinkov
49fd1aacfc
Sessions with auto-creation on fetch request and TTL (#736)
* Add support for sessions

* Add tests for sessions

* Missing return type

* Don't re-create an existing session

* Return success in case of session doesn't exists on destroy

* Create session if necessary on get request

* Add session TTL to the request.get method

When fetching some webpage with a predefined session id,
FlareSorverr is using existing instance of WebDriver.
That allows user to not manage cookies explicitly
and rely on WebDriver to maintain the session. However,
if session has been created long time ago, CloudFlare might
stop accepting the requests, so we want to recreate the session
time to time. From the user perspective the easiest way of doing it
is to define their expectation on the session duration.

These changes add an option to define Time-to-live (TTL) for the session
and FlareSorverr takes care about rotating the sessions.

* Update message for session destroy in tests

---------

Co-authored-by: Michel Roux <xefir@crystalyx.net>
2023-03-20 16:25:48 +01:00
Martino Mensio
f6879c70de
Add Fairlane challenge selector (#723) 2023-03-20 16:13:21 +01:00
ngosang
24f59a39cb Print platform information on start up 2023-03-20 15:07:12 +01:00
ngosang
4d16105176 Fix error trace: Crash Reports/pending No such file or directory 2023-03-20 15:07:12 +01:00
ngosang
5957b7b3bc Update dependencies 2023-03-20 15:07:12 +01:00
ngosang
8de16058d0 Attempt to fix Docker ARM32 build 2023-03-20 15:07:12 +01:00
bilditup1
5fc4f966a5
Update ddos-guard title (#692)
* update ddos-guard title (anidex.info)

* make page_title test case-insensitive

---------

Co-authored-by: bilditup1 <git@github.com>
2023-03-20 15:06:27 +01:00
ilike2burnthing
b903a5dd84
Bump version 3.0.4 2023-03-07 03:54:10 +00:00
ilike2burnthing
7e9d5f424f
Update changelog 2023-03-07 03:54:05 +00:00
ilike2burnthing
fc6d2d9095
Click on the Cloudflare's 'Verify you are human' button if necessary
Co-authored-by: furdarius <furdarius@users.noreply.github.com>

69e023b946 minus screenshot function
2023-03-07 03:51:17 +00:00
ilike2burnthing
aef9b2d4d6
Update changelog 2023-03-06 14:02:56 +00:00
ilike2burnthing
6dc279a9d3
Bump version 3.0.3 2023-03-06 13:59:20 +00:00
Artemiy Ryabinkov
96fcd21174
Update undetected_chromedriver version to 3.4.6 (#715)
Co-authored-by: ilike2burnthing <59480337+ilike2burnthing@users.noreply.github.com>
2023-03-06 13:57:38 +00:00
ngosang
3a6e8e0f92 Update GitHub bug report template 2023-01-28 18:00:57 +01:00
ilike2burnthing
2d97f88276
Update README.md 2023-01-09 21:51:49 +00:00
ngosang
ac5c64319e Bump version 3.0.2 2023-01-08 20:48:20 +01:00
ngosang
c93834e2f0 Check Chrome / Chromium web browser is installed correctly 2023-01-08 20:46:11 +01:00
ngosang
e3b4200d94 Detect Cloudflare blocked access 2023-01-08 20:40:10 +01:00
ngosang
0941861f80 Update changelog 2023-01-06 18:49:05 +01:00
ngosang
8a10eb27a6 Bump version 3.0.1 2023-01-06 18:33:02 +01:00
ngosang
e9c08c84ef Update GitHub actions 2023-01-06 18:32:34 +01:00
ngosang
2aa1744476 Add more selectors to detect blocked access 2023-01-06 18:12:26 +01:00
ngosang
a89679a52d Disable Zygote sandbox in Chromium browser 2023-01-06 18:05:23 +01:00
ngosang
410ee7981f Apply undetected-chromedriver patches
* Hide Chrome window in Windows/NT
* Not use subprocess by default (independent process)
* Kill Chromium processes properly to avoid defunct/zombie processes
2023-01-06 17:50:52 +01:00
ngosang
e163019f28 Update undetected-chromedriver 2023-01-06 17:19:11 +01:00
ngosang
7d84f1b663 Kill Chromium processes properly to avoid defunct/zombie processes 2023-01-06 13:58:24 +01:00
ngosang
4807e9dbe2 Include procps (ps), curl and vim packages in the Docker image 2023-01-05 13:25:45 +01:00
32 changed files with 3484 additions and 2014 deletions

View File

@ -8,6 +8,13 @@ body:
options: options:
- label: I have checked the README - label: I have checked the README
required: true required: true
- type: checkboxes
attributes:
label: Have you followed our Troubleshooting?
description: Please follow our <a href="https://github.com/FlareSolverr/FlareSolverr/wiki/Troubleshooting">Troubleshooting</a>.
options:
- label: I have followed your Troubleshooting
required: true
- type: checkboxes - type: checkboxes
attributes: attributes:
label: Is there already an issue for your problem? label: Is there already an issue for your problem?
@ -22,6 +29,13 @@ body:
options: options:
- label: I have read the Discussions - label: I have read the Discussions
required: true required: true
- type: input
attributes:
label: Have you ACTUALLY checked all these?
description: Please do not waste our time and yours; these checks are there for a reason, it is not just so you can tick boxes for fun. If you type <b>YES</b> and it is clear you did not or have put in no effort, your issue will be closed and locked without comment. If you type <b>NO</b> but still open this issue, you will be permanently blocked for timewasting.
placeholder: YES or NO
validations:
required: true
- type: textarea - type: textarea
attributes: attributes:
label: Environment label: Environment
@ -32,7 +46,8 @@ body:
- Operating system: - Operating system:
- Are you using Docker: [yes/no] - Are you using Docker: [yes/no]
- FlareSolverr User-Agent (see log traces or / endpoint): - FlareSolverr User-Agent (see log traces or / endpoint):
- Are you using a proxy or VPN: [yes/no] - Are you using a VPN: [yes/no]
- Are you using a Proxy: [yes/no]
- Are you using Captcha Solver: [yes/no] - Are you using Captcha Solver: [yes/no]
- If using captcha solver, which one: - If using captcha solver, which one:
- URL to test this issue: - URL to test this issue:

View File

@ -6,12 +6,12 @@ on:
- "master" - "master"
jobs: jobs:
build: tag-release:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- -
name: Checkout name: Checkout
uses: actions/checkout@v2 uses: actions/checkout@v3
- -
name: Auto Tag name: Auto Tag
uses: Klemensas/action-autotag@stable uses: Klemensas/action-autotag@stable

View File

@ -4,50 +4,64 @@ on:
push: push:
tags: tags:
- 'v*.*.*' - 'v*.*.*'
pull_request:
branches:
- master
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs: jobs:
build: build-docker-images:
runs-on: ubuntu-latest if: ${{ !github.event.pull_request.head.repo.fork }}
runs-on: ubuntu-22.04
steps: steps:
- - name: Checkout
name: Checkout uses: actions/checkout@v4
uses: actions/checkout@v2
- - name: Downcase repo
name: Downcase repo
run: echo REPOSITORY=$(echo ${{ github.repository }} | tr '[:upper:]' '[:lower:]') >> $GITHUB_ENV run: echo REPOSITORY=$(echo ${{ github.repository }} | tr '[:upper:]' '[:lower:]') >> $GITHUB_ENV
-
name: Docker meta - name: Docker meta
id: docker_meta id: docker_meta
uses: crazy-max/ghaction-docker-meta@v1 uses: docker/metadata-action@v5
with: with:
images: ${{ env.REPOSITORY }},ghcr.io/${{ env.REPOSITORY }} images: |
tag-sha: false ${{ env.REPOSITORY }},enable=${{ github.event_name != 'pull_request' }}
- ghcr.io/${{ env.REPOSITORY }}
name: Set up QEMU tags: |
uses: docker/setup-qemu-action@v1.0.1 type=semver,pattern={{version}},prefix=v
- type=ref,event=pr
name: Set up Docker Buildx flavor: |
uses: docker/setup-buildx-action@v1 latest=auto
-
name: Login to DockerHub - name: Set up QEMU
uses: docker/login-action@v1 uses: docker/setup-qemu-action@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to DockerHub
uses: docker/login-action@v3
if: github.event_name != 'pull_request'
with: with:
username: ${{ secrets.DOCKERHUB_USERNAME }} username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }} password: ${{ secrets.DOCKERHUB_TOKEN }}
-
name: Login to GitHub Container Registry - name: Login to GitHub Container Registry
uses: docker/login-action@v1 uses: docker/login-action@v3
with: with:
registry: ghcr.io registry: ghcr.io
username: ${{ github.repository_owner }} username: ${{ github.repository_owner }}
password: ${{ secrets.GH_PAT }} password: ${{ secrets.GH_PAT }}
-
name: Build and push - name: Build and push
uses: docker/build-push-action@v2 uses: docker/build-push-action@v6
with: with:
context: . context: .
file: ./Dockerfile file: ./Dockerfile
platforms: linux/386,linux/amd64,linux/arm/v7,linux/arm64/v8 platforms: linux/386,linux/amd64,linux/arm/v7,linux/arm64/v8
push: ${{ github.event_name != 'pull_request' }} push: true
tags: ${{ steps.docker_meta.outputs.tags }} tags: ${{ steps.docker_meta.outputs.tags }}
labels: ${{ steps.docker_meta.outputs.labels }} labels: ${{ steps.docker_meta.outputs.labels }}

View File

@ -6,26 +6,15 @@ on:
- 'v*.*.*' - 'v*.*.*'
jobs: jobs:
build: create-release:
name: Create release name: Create release
runs-on: ubuntu-latest runs-on: ubuntu-22.04
steps: steps:
- name: Checkout code - name: Checkout code
uses: actions/checkout@v2 uses: actions/checkout@v3
with: with:
fetch-depth: 0 # get all commits, branches and tags (required for the changelog) fetch-depth: 0 # get all commits, branches and tags (required for the changelog)
- name: Setup Node
uses: actions/setup-node@v2
with:
node-version: '16'
- name: Build artifacts
run: |
npm install
npm run build
npm run package
- name: Build changelog - name: Build changelog
id: github_changelog id: github_changelog
run: | run: |
@ -47,9 +36,60 @@ jobs:
draft: false draft: false
prerelease: false prerelease: false
build-linux-package:
name: Build Linux binary
needs: create-release
runs-on: ubuntu-22.04
steps:
- name: Checkout code
uses: actions/checkout@v3
with:
fetch-depth: 0 # get all commits, branches and tags (required for the changelog)
- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: '3.11'
- name: Build artifacts
run: |
python -m pip install -r requirements.txt
python -m pip install pyinstaller==5.13.0
cd src
python build_package.py
- name: Upload release artifacts - name: Upload release artifacts
uses: alexellis/upload-assets@0.2.2 uses: alexellis/upload-assets@0.4.0
env: env:
GITHUB_TOKEN: ${{ secrets.GH_PAT }} GITHUB_TOKEN: ${{ secrets.GH_PAT }}
with: with:
asset_paths: '["./bin/*.zip"]' asset_paths: '["./dist/flaresolverr_*"]'
build-windows-package:
name: Build Windows binary
needs: create-release
runs-on: windows-2022
steps:
- name: Checkout code
uses: actions/checkout@v3
with:
fetch-depth: 0 # get all commits, branches and tags (required for the changelog)
- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: '3.11'
- name: Build artifacts
run: |
python -m pip install -r requirements.txt
python -m pip install pyinstaller==5.13.0
cd src
python build_package.py
- name: Upload release artifacts
uses: alexellis/upload-assets@0.4.0
env:
GITHUB_TOKEN: ${{ secrets.GH_PAT }}
with:
asset_paths: '["./dist/flaresolverr_*"]'

4
.gitignore vendored
View File

@ -25,6 +25,7 @@ __pycache__/
build/ build/
develop-eggs/ develop-eggs/
dist/ dist/
dist_chrome/
downloads/ downloads/
eggs/ eggs/
.eggs/ .eggs/
@ -123,3 +124,6 @@ venv.bak/
.mypy_cache/ .mypy_cache/
.dmypy.json .dmypy.json
dmypy.json dmypy.json
# node
node_modules/

View File

@ -1,5 +1,196 @@
# Changelog # Changelog
## v3.3.24 (2025/06/04)
* Remove hidden character
## v3.3.23 (2025/06/04)
* Update base image to bookworm. Thanks @rwjack
## v3.3.22 (2025/06/03)
* Disable search engine choice screen
* Fix headless=false stalling. Thanks @MAKMED1337
* Change from click to keys. Thanks @sh4dowb
* Don't open devtools
* Bump Chromium to v137 for build
* Bump requirements
## v3.3.21 (2024/06/26)
* Add challenge selector to catch reloading page on non-English systems
* Escape values for generated form used in request.post. Thanks @mynameisbogdan
## v3.3.20 (2024/06/21)
* maxTimeout should always be int
* Check not running in Docker before logging version_main error
* Update Cloudflare challenge and checkbox selectors. Thanks @tenettow & @21hsmw
## v3.3.19 (2024/05/23)
* Fix occasional headless issue on Linux when set to "false". Thanks @21hsmw
## v3.3.18 (2024/05/20)
* Fix LANG ENV for Linux
* Fix Chrome v124+ not closing on Windows. Thanks @RileyXX
## v3.3.17 (2024/04/09)
* Fix file descriptor leak in service on quit(). Thanks @zkulis
## v3.3.16 (2024/02/28)
* Fix of the subprocess.STARTUPINFO() call. Thanks @ceconelo
* Add FreeBSD support. Thanks @Asthowen
* Use headless configuration properly. Thanks @hashworks
## v3.3.15 (2024/02/20)
* Fix looping challenges
## v3.3.14-hotfix2 (2024/02/17)
* Hotfix 2 - bad Chromium build, instances failed to terminate
## v3.3.14-hotfix (2024/02/17)
* Hotfix for Linux build - some Chrome files no longer exist
## v3.3.14 (2024/02/17)
* Update Chrome downloads. Thanks @opemvbs
## v3.3.13 (2024/01/07)
* Fix too many open files error
## v3.3.12 (2023/12/15)
* Fix looping challenges and invalid cookies
## v3.3.11 (2023/12/11)
* Update UC 3.5.4 & Selenium 4.15.2. Thanks @txtsd
## v3.3.10 (2023/11/14)
* Add LANG ENV - resolves issues with YGGtorrent
## v3.3.9 (2023/11/13)
* Fix for Docker build, capture TypeError
## v3.3.8 (2023/11/13)
* Fix headless=true for Chrome 117+. Thanks @NabiKAZ
* Support running Chrome 119 from source. Thanks @koleg and @Chris7X
* Fix "OSError: [WinError 6] The handle is invalid" on exit. Thanks @enesgorkemgenc
## v3.3.7 (2023/11/05)
* Bump to rebuild. Thanks @JoachimDorchies
## v3.3.6 (2023/09/15)
* Update checkbox selector, again
## v3.3.5 (2023/09/13)
* Change checkbox selector, support languages other than English
## v3.3.4 (2023/09/02)
* Update checkbox selector
## v3.3.3 (2023/08/31)
* Update undetected_chromedriver to v3.5.3
## v3.3.2 (2023/08/03)
* Fix URL domain in Prometheus exporter
## v3.3.1 (2023/08/03)
* Fix for Cloudflare verify checkbox
* Fix HEADLESS=false in Windows binary
* Fix Prometheus exporter for management and health endpoints
* Remove misleading stack trace when the verify checkbox is not found
* Revert "Update base Docker image to Debian Bookworm" #849
* Revert "Install Chromium 115 from Debian testing" #849
## v3.3.0 (2023/08/02)
* Fix for new Cloudflare detection. Thanks @cedric-bour for #845
* Add support for proxy authentication username/password. Thanks @jacobprice808 for #807
* Implement Prometheus metrics
* Fix Chromium Driver for Chrome / Chromium version > 114
* Use Chromium 115 in binary packages (Windows and Linux)
* Install Chromium 115 from Debian testing (Docker)
* Update base Docker image to Debian Bookworm
* Update Selenium 4.11.2
* Update pyinstaller 5.13.0
* Add more traces in build_package.py
## v3.2.2 (2023/07/16)
* Workaround for updated 'verify you are human' check
## v3.2.1 (2023/06/10)
* Kill dead Chrome processes in Windows
* Fix Chrome GL erros in ASUSTOR NAS
## v3.2.0 (2023/05/23)
* Support "proxy" param in requests and sessions
* Support "cookies" param in requests
* Fix Chromium exec permissions in Linux package
* Update Python dependencies
## v3.1.2 (2023/04/02)
* Fix headless mode in macOS
* Remove redundant artifact from Windows binary package
* Bump Selenium dependency
## v3.1.1 (2023/03/25)
* Distribute binary executables in compressed package
* Add icon for binary executable
* Include information about supported architectures in the readme
* Check Python version on start
## v3.1.0 (2023/03/20)
* Build binaries for Linux x64 and Windows x64
* Sessions with auto-creation on fetch request and TTL
* Fix error trace: Crash Reports/pending No such file or directory
* Fix Waitress server error with asyncore_use_poll=true
* Attempt to fix Docker ARM32 build
* Print platform information on start up
* Add Fairlane challenge selector
* Update DDOS-GUARD title
* Update dependencies
## v3.0.4 (2023/03/07)
* Click on the Cloudflare's 'Verify you are human' button if necessary
## v3.0.3 (2023/03/06)
* Update undetected_chromedriver version to 3.4.6
## v3.0.2 (2023/01/08)
* Detect Cloudflare blocked access
* Check Chrome / Chromium web browser is installed correctly
## v3.0.1 (2023/01/06)
* Kill Chromium processes properly to avoid defunct/zombie processes
* Update undetected-chromedriver
* Disable Zygote sandbox in Chromium browser
* Add more selectors to detect blocked access
* Include procps (ps), curl and vim packages in the Docker image
## v3.0.0 (2023/01/04) ## v3.0.0 (2023/01/04)
* This is the first release of FlareSolverr v3. There are some breaking changes * This is the first release of FlareSolverr v3. There are some breaking changes

View File

@ -1,4 +1,4 @@
FROM python:3.11-slim-bullseye as builder FROM python:3.11-slim-bookworm as builder
# Build dummy packages to skip installing them and their dependencies # Build dummy packages to skip installing them and their dependencies
RUN apt-get update \ RUN apt-get update \
@ -12,7 +12,7 @@ RUN apt-get update \
&& equivs-build adwaita-icon-theme \ && equivs-build adwaita-icon-theme \
&& mv adwaita-icon-theme_*.deb /adwaita-icon-theme.deb && mv adwaita-icon-theme_*.deb /adwaita-icon-theme.deb
FROM python:3.11-slim-bullseye FROM python:3.11-slim-bookworm
# Copy dummy packages # Copy dummy packages
COPY --from=builder /*.deb / COPY --from=builder /*.deb /
@ -29,7 +29,8 @@ RUN dpkg -i /libgl1-mesa-dri.deb \
&& dpkg -i /adwaita-icon-theme.deb \ && dpkg -i /adwaita-icon-theme.deb \
# Install dependencies # Install dependencies
&& apt-get update \ && apt-get update \
&& apt-get install -y --no-install-recommends chromium chromium-common chromium-driver xvfb \ && apt-get install -y --no-install-recommends chromium chromium-common chromium-driver xvfb dumb-init \
procps curl vim xauth \
# Remove temporary files and hardware decoding libraries # Remove temporary files and hardware decoding libraries
&& rm -rf /var/lib/apt/lists/* \ && rm -rf /var/lib/apt/lists/* \
&& rm -f /usr/lib/x86_64-linux-gnu/libmfxhw* \ && rm -f /usr/lib/x86_64-linux-gnu/libmfxhw* \
@ -47,19 +48,31 @@ RUN pip install -r requirements.txt \
USER flaresolverr USER flaresolverr
RUN mkdir -p "/app/.config/chromium/Crash Reports/pending"
COPY src . COPY src .
COPY package.json ../ COPY package.json ../
EXPOSE 8191 EXPOSE 8191
EXPOSE 8192
# dumb-init avoids zombie chromium processes
ENTRYPOINT ["/usr/bin/dumb-init", "--"]
CMD ["/usr/local/bin/python", "-u", "/app/flaresolverr.py"] CMD ["/usr/local/bin/python", "-u", "/app/flaresolverr.py"]
# Local build # Local build
# docker build -t ngosang/flaresolverr:3.0.0 . # docker build -t ngosang/flaresolverr:3.3.24 .
# docker run -p 8191:8191 ngosang/flaresolverr:3.0.0 # docker run -p 8191:8191 ngosang/flaresolverr:3.3.24
# Multi-arch build # Multi-arch build
# docker run --rm --privileged multiarch/qemu-user-static --reset -p yes # docker run --rm --privileged multiarch/qemu-user-static --reset -p yes
# docker buildx create --use # docker buildx create --use
# docker buildx build -t ngosang/flaresolverr:3.0.0 --platform linux/386,linux/amd64,linux/arm/v7,linux/arm64/v8 . # docker buildx build -t ngosang/flaresolverr:3.3.24 --platform linux/386,linux/amd64,linux/arm/v7,linux/arm64/v8 .
# add --push to publish in DockerHub # add --push to publish in DockerHub
# Test multi-arch build
# docker run --rm --privileged multiarch/qemu-user-static --reset -p yes
# docker buildx create --use
# docker buildx build -t ngosang/flaresolverr:3.3.24 --platform linux/arm/v7 --load .
# docker run -p 8191:8191 --platform linux/arm/v7 ngosang/flaresolverr:3.3.24

147
README.md
View File

@ -45,7 +45,8 @@ Supported architectures are:
| ARM32 | linux/arm/v7 | | ARM32 | linux/arm/v7 |
| ARM64 | linux/arm64 | | ARM64 | linux/arm64 |
We provide a `docker-compose.yml` configuration file. Clone this repository and execute `docker-compose up -d` to start We provide a `docker-compose.yml` configuration file. Clone this repository and execute
`docker-compose up -d` _(Compose V1)_ or `docker compose up -d` _(Compose V2)_ to start
the container. the container.
If you prefer the `docker cli` execute the following command. If you prefer the `docker cli` execute the following command.
@ -64,38 +65,76 @@ Remember to restart the Docker daemon and the container after the update.
### Precompiled binaries ### Precompiled binaries
> **Warning**
> Precompiled binaries are only available for x64 architecture. For other architectures see Docker images.
This is the recommended way for Windows users. This is the recommended way for Windows users.
* Download the [FlareSolverr zip](https://github.com/FlareSolverr/FlareSolverr/releases) from the release's assets. It is available for Windows and Linux. * Download the [FlareSolverr executable](https://github.com/FlareSolverr/FlareSolverr/releases) from the release's page. It is available for Windows x64 and Linux x64.
* Extract the zip file. FlareSolverr executable and firefox folder must be in the same directory.
* Execute FlareSolverr binary. In the environment variables section you can find how to change the configuration. * Execute FlareSolverr binary. In the environment variables section you can find how to change the configuration.
### From source code ### From source code
This is the recommended way for macOS users and for developers. > **Warning**
* Install [Python 3.10](https://www.python.org/downloads/). > Installing from source code only works for x64 architecture. For other architectures see Docker images.
* Install [Chrome](https://www.google.com/intl/en_us/chrome/) or [Chromium](https://www.chromium.org/getting-involved/download-chromium/) web browser.
* (Only in Linux / macOS) Install [Xvfb](https://en.wikipedia.org/wiki/Xvfb) package. * Install [Python 3.11](https://www.python.org/downloads/).
* Install [Chrome](https://www.google.com/intl/en_us/chrome/) (all OS) or [Chromium](https://www.chromium.org/getting-involved/download-chromium/) (just Linux, it doesn't work in Windows) web browser.
* (Only in Linux) Install [Xvfb](https://en.wikipedia.org/wiki/Xvfb) package.
* (Only in macOS) Install [XQuartz](https://www.xquartz.org/) package.
* Clone this repository and open a shell in that path. * Clone this repository and open a shell in that path.
* Run `pip install -r requirements.txt` command to install FlareSolverr dependencies. * Run `pip install -r requirements.txt` command to install FlareSolverr dependencies.
* Run `python src/flaresolverr.py` command to start FlareSolverr. * Run `python src/flaresolverr.py` command to start FlareSolverr.
### From source code (FreeBSD/TrueNAS CORE)
* Run `pkg install chromium python311 py311-pip xorg-vfbserver` command to install the required dependencies.
* Clone this repository and open a shell in that path.
* Run `python3.11 -m pip install -r requirements.txt` command to install FlareSolverr dependencies.
* Run `python3.11 src/flaresolverr.py` command to start FlareSolverr.
### Systemd service ### Systemd service
We provide an example Systemd unit file `flaresolverr.service` as reference. You have to modify the file to suit your needs: paths, user and environment variables. We provide an example Systemd unit file `flaresolverr.service` as reference. You have to modify the file to suit your needs: paths, user and environment variables.
## Usage ## Usage
Example request: Example Bash request:
```bash ```bash
curl -L -X POST 'http://localhost:8191/v1' \ curl -L -X POST 'http://localhost:8191/v1' \
-H 'Content-Type: application/json' \ -H 'Content-Type: application/json' \
--data-raw '{ --data-raw '{
"cmd": "request.get", "cmd": "request.get",
"url":"http://www.google.com/", "url": "http://www.google.com/",
"maxTimeout": 60000 "maxTimeout": 60000
}' }'
``` ```
Example Python request:
```py
import requests
url = "http://localhost:8191/v1"
headers = {"Content-Type": "application/json"}
data = {
"cmd": "request.get",
"url": "http://www.google.com/",
"maxTimeout": 60000
}
response = requests.post(url, headers=headers, json=data)
print(response.text)
```
Example PowerShell request:
```ps1
$body = @{
cmd = "request.get"
url = "http://www.google.com/"
maxTimeout = 60000
} | ConvertTo-Json
irm -UseBasicParsing 'http://localhost:8191/v1' -Headers @{"Content-Type"="application/json"} -Method Post -Body $body
```
### Commands ### Commands
#### + `sessions.create` #### + `sessions.create`
@ -106,10 +145,10 @@ cookies for the browser to use.
This also speeds up the requests since it won't have to launch a new browser instance for every request. This also speeds up the requests since it won't have to launch a new browser instance for every request.
| Parameter | Notes | | Parameter | Notes |
|-----------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| |-----------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| session | Optional. The session ID that you want to be assigned to the instance. If isn't set a random UUID will be assigned. | | session | Optional. The session ID that you want to be assigned to the instance. If isn't set a random UUID will be assigned. |
| proxy | Optional, default disabled. Eg: `"proxy": {"url": "http://127.0.0.1:8888"}`. You must include the proxy schema in the URL: `http://`, `socks4://` or `socks5://`. Authorization (username/password) is not supported. | | proxy | Optional, default disabled. Eg: `"proxy": {"url": "http://127.0.0.1:8888"}`. You must include the proxy schema in the URL: `http://`, `socks4://` or `socks5://`. Authorization (username/password) is supported. Eg: `"proxy": {"url": "http://127.0.0.1:8888", "username": "testuser", "password": "testpass"}` |
#### + `sessions.list` #### + `sessions.list`
@ -140,16 +179,18 @@ session. When you no longer need to use a session you should make sure to close
#### + `request.get` #### + `request.get`
| Parameter | Notes | | Parameter | Notes |
|-------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| |---------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| url | Mandatory | | url | Mandatory |
| session | Optional. Will send the request from and existing browser instance. If one is not sent it will create a temporary instance that will be destroyed immediately after the request is completed. | | session | Optional. Will send the request from and existing browser instance. If one is not sent it will create a temporary instance that will be destroyed immediately after the request is completed. |
| maxTimeout | Optional, default value 60000. Max timeout to solve the challenge in milliseconds. | | session_ttl_minutes | Optional. FlareSolverr will automatically rotate expired sessions based on the TTL provided in minutes. |
| cookies | Optional. Will be used by the headless browser. Follow [this](https://github.com/puppeteer/puppeteer/blob/v3.3.0/docs/api.md#pagesetcookiecookies) format. | | maxTimeout | Optional, default value 60000. Max timeout to solve the challenge in milliseconds. |
| returnOnlyCookies | Optional, default false. Only returns the cookies. Response data, headers and other parts of the response are removed. | | cookies | Optional. Will be used by the headless browser. Eg: `"cookies": [{"name": "cookie1", "value": "value1"}, {"name": "cookie2", "value": "value2"}]`. |
| proxy | Optional, default disabled. Eg: `"proxy": {"url": "http://127.0.0.1:8888"}`. You must include the proxy schema in the URL: `http://`, `socks4://` or `socks5://`. Authorization (username/password) is not supported. (When the `session` parameter is set, the proxy is ignored; a session specific proxy can be set in `sessions.create`.) | | returnOnlyCookies | Optional, default false. Only returns the cookies. Response data, headers and other parts of the response are removed. |
| proxy | Optional, default disabled. Eg: `"proxy": {"url": "http://127.0.0.1:8888"}`. You must include the proxy schema in the URL: `http://`, `socks4://` or `socks5://`. Authorization (username/password) is not supported. (When the `session` parameter is set, the proxy is ignored; a session specific proxy can be set in `sessions.create`.) |
:warning: If you want to use Cloudflare clearance cookie in your scripts, make sure you use the FlareSolverr User-Agent too. If they don't match you will see the challenge. > **Warning**
> If you want to use Cloudflare clearance cookie in your scripts, make sure you use the FlareSolverr User-Agent too. If they don't match you will see the challenge.
Example response from running the `curl` above: Example response from running the `curl` above:
@ -220,33 +261,63 @@ This is the same as `request.get` but it takes one more param:
## Environment variables ## Environment variables
| Name | Default | Notes | | Name | Default | Notes |
|-----------------|------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------| |--------------------|------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------|
| LOG_LEVEL | info | Verbosity of the logging. Use `LOG_LEVEL=debug` for more information. | | LOG_LEVEL | info | Verbosity of the logging. Use `LOG_LEVEL=debug` for more information. |
| LOG_HTML | false | Only for debugging. If `true` all HTML that passes through the proxy will be logged to the console in `debug` level. | | LOG_HTML | false | Only for debugging. If `true` all HTML that passes through the proxy will be logged to the console in `debug` level. |
| CAPTCHA_SOLVER | none | Captcha solving method. It is used when a captcha is encountered. See the Captcha Solvers section. | | CAPTCHA_SOLVER | none | Captcha solving method. It is used when a captcha is encountered. See the Captcha Solvers section. |
| TZ | UTC | Timezone used in the logs and the web browser. Example: `TZ=Europe/London`. | | TZ | UTC | Timezone used in the logs and the web browser. Example: `TZ=Europe/London`. |
| HEADLESS | true | Only for debugging. To run the web browser in headless mode or visible. | | LANG | none | Language used in the web browser. Example: `LANG=en_GB`. |
| BROWSER_TIMEOUT | 40000 | If you are experiencing errors/timeouts because your system is slow, you can try to increase this value. Remember to increase the `maxTimeout` parameter too. | | HEADLESS | true | Only for debugging. To run the web browser in headless mode or visible. |
| TEST_URL | https://www.google.com | FlareSolverr makes a request on start to make sure the web browser is working. You can change that URL if it is blocked in your country. | | BROWSER_TIMEOUT | 40000 | If you are experiencing errors/timeouts because your system is slow, you can try to increase this value. Remember to increase the `maxTimeout` parameter too. |
| PORT | 8191 | Listening port. You don't need to change this if you are running on Docker. | | TEST_URL | https://www.google.com | FlareSolverr makes a request on start to make sure the web browser is working. You can change that URL if it is blocked in your country. |
| HOST | 0.0.0.0 | Listening interface. You don't need to change this if you are running on Docker. | | PORT | 8191 | Listening port. You don't need to change this if you are running on Docker. |
| HOST | 0.0.0.0 | Listening interface. You don't need to change this if you are running on Docker. |
| PROMETHEUS_ENABLED | false | Enable Prometheus exporter. See the Prometheus section below. |
| PROMETHEUS_PORT | 8192 | Listening port for Prometheus exporter. See the Prometheus section below. |
Environment variables are set differently depending on the operating system. Some examples: Environment variables are set differently depending on the operating system. Some examples:
* Docker: Take a look at the Docker section in this document. Environment variables can be set in the `docker-compose.yml` file or in the Docker CLI command. * Docker: Take a look at the Docker section in this document. Environment variables can be set in the `docker-compose.yml` file or in the Docker CLI command.
* Linux: Run `export LOG_LEVEL=debug` and then start FlareSolverr in the same shell. * Linux: Run `export LOG_LEVEL=debug` and then run `flaresolverr` in the same shell.
* Windows: Open `cmd.exe`, run `set LOG_LEVEL=debug` and then start FlareSolverr in the same shell. * Windows: Open `cmd.exe`, run `set LOG_LEVEL=debug` and then run `flaresolverr.exe` in the same shell.
## Prometheus exporter
The Prometheus exporter for FlareSolverr is disabled by default. It can be enabled with the environment variable `PROMETHEUS_ENABLED`. If you are using Docker make sure you expose the `PROMETHEUS_PORT`.
Example metrics:
```shell
# HELP flaresolverr_request_total Total requests with result
# TYPE flaresolverr_request_total counter
flaresolverr_request_total{domain="nowsecure.nl",result="solved"} 1.0
# HELP flaresolverr_request_created Total requests with result
# TYPE flaresolverr_request_created gauge
flaresolverr_request_created{domain="nowsecure.nl",result="solved"} 1.690141657157109e+09
# HELP flaresolverr_request_duration Request duration in seconds
# TYPE flaresolverr_request_duration histogram
flaresolverr_request_duration_bucket{domain="nowsecure.nl",le="0.0"} 0.0
flaresolverr_request_duration_bucket{domain="nowsecure.nl",le="10.0"} 1.0
flaresolverr_request_duration_bucket{domain="nowsecure.nl",le="25.0"} 1.0
flaresolverr_request_duration_bucket{domain="nowsecure.nl",le="50.0"} 1.0
flaresolverr_request_duration_bucket{domain="nowsecure.nl",le="+Inf"} 1.0
flaresolverr_request_duration_count{domain="nowsecure.nl"} 1.0
flaresolverr_request_duration_sum{domain="nowsecure.nl"} 5.858
# HELP flaresolverr_request_duration_created Request duration in seconds
# TYPE flaresolverr_request_duration_created gauge
flaresolverr_request_duration_created{domain="nowsecure.nl"} 1.6901416571570296e+09
```
## Captcha Solvers ## Captcha Solvers
:warning: At this time none of the captcha solvers work. You can check the status in the open issues. Any help is welcome. > **Warning**
> At this time none of the captcha solvers work. You can check the status in the open issues. Any help is welcome.
Sometimes CloudFlare not only gives mathematical computations and browser tests, sometimes they also require the user to Sometimes CloudFlare not only gives mathematical computations and browser tests, sometimes they also require the user to
solve a captcha. solve a captcha.
If this is the case, FlareSolverr will return the error `Captcha detected but no automatic solver is configured.` If this is the case, FlareSolverr will return the error `Captcha detected but no automatic solver is configured.`
FlareSolverr can be customized to solve the captchas automatically by setting the environment variable `CAPTCHA_SOLVER` FlareSolverr can be customized to solve the CAPTCHA automatically by setting the environment variable `CAPTCHA_SOLVER`
to the file name of one of the adapters inside the [/captcha](src/captcha) directory. to the file name of one of the adapters inside the `/captcha` directory.
## Related projects ## Related projects

19
flaresolverr.service Normal file
View File

@ -0,0 +1,19 @@
[Unit]
Description=FlareSolverr
After=network.target
[Service]
SyslogIdentifier=flaresolverr
Restart=always
RestartSec=5
Type=simple
User=flaresolverr
Group=flaresolverr
Environment="LOG_LEVEL=info"
Environment="CAPTCHA_SOLVER=none"
WorkingDirectory=/opt/flaresolverr
ExecStart=/opt/flaresolverr/flaresolverr
TimeoutStopSec=30
[Install]
WantedBy=multi-user.target

View File

@ -1,6 +1,6 @@
{ {
"name": "flaresolverr", "name": "flaresolverr",
"version": "3.0.0", "version": "3.3.24",
"description": "Proxy server to bypass Cloudflare protection", "description": "Proxy server to bypass Cloudflare protection",
"author": "Diego Heras (ngosang / ngosang@hotmail.es)", "author": "Diego Heras (ngosang / ngosang@hotmail.es)",
"license": "MIT" "license": "MIT"

View File

@ -1,9 +1,13 @@
bottle==0.12.23 bottle==0.12.25
waitress==2.1.2 waitress==3.0.1
selenium==4.4.3 selenium==4.15.2
func-timeout==4.3.5 func-timeout==4.3.5
prometheus-client==0.17.1
# required by undetected_chromedriver # required by undetected_chromedriver
requests==2.28.1 requests==2.32.3
websockets==10.3 certifi==2024.07.04
# only required for linux websockets==11.0.3
xvfbwrapper==0.2.9 # only required for linux and macos
xvfbwrapper==0.2.9; platform_system != "Windows"
# only required for windows
pefile==2023.2.7; platform_system == "Windows"

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.8 KiB

View File

@ -0,0 +1,66 @@
import logging
import os
import urllib.parse
from bottle import request
from dtos import V1RequestBase, V1ResponseBase
from metrics import start_metrics_http_server, REQUEST_COUNTER, REQUEST_DURATION
PROMETHEUS_ENABLED = os.environ.get('PROMETHEUS_ENABLED', 'false').lower() == 'true'
PROMETHEUS_PORT = int(os.environ.get('PROMETHEUS_PORT', 8192))
def setup():
if PROMETHEUS_ENABLED:
start_metrics_http_server(PROMETHEUS_PORT)
def prometheus_plugin(callback):
"""
Bottle plugin to expose Prometheus metrics
http://bottlepy.org/docs/dev/plugindev.html
"""
def wrapper(*args, **kwargs):
actual_response = callback(*args, **kwargs)
if PROMETHEUS_ENABLED:
try:
export_metrics(actual_response)
except Exception as e:
logging.warning("Error exporting metrics: " + str(e))
return actual_response
def export_metrics(actual_response):
res = V1ResponseBase(actual_response)
if res.startTimestamp is None or res.endTimestamp is None:
# skip management and healthcheck endpoints
return
domain = "unknown"
if res.solution and res.solution.url:
domain = parse_domain_url(res.solution.url)
else:
# timeout error
req = V1RequestBase(request.json)
if req.url:
domain = parse_domain_url(req.url)
run_time = (res.endTimestamp - res.startTimestamp) / 1000
REQUEST_DURATION.labels(domain=domain).observe(run_time)
result = "unknown"
if res.message == "Challenge solved!":
result = "solved"
elif res.message == "Challenge not detected!":
result = "not_detected"
elif res.message.startswith("Error"):
result = "error"
REQUEST_COUNTER.labels(domain=domain, result=result).inc()
def parse_domain_url(url):
parsed_url = urllib.parse.urlparse(url)
return parsed_url.hostname
return wrapper

110
src/build_package.py Normal file
View File

@ -0,0 +1,110 @@
import os
import platform
import shutil
import subprocess
import sys
import zipfile
import requests
def clean_files():
try:
shutil.rmtree(os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, 'build'))
except Exception:
pass
try:
shutil.rmtree(os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, 'dist'))
except Exception:
pass
try:
shutil.rmtree(os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, 'dist_chrome'))
except Exception:
pass
def download_chromium():
# https://commondatastorage.googleapis.com/chromium-browser-snapshots/index.html?prefix=Linux_x64/
revision = "1453032" if os.name == 'nt' else '1453031'
arch = 'Win_x64' if os.name == 'nt' else 'Linux_x64'
dl_file = 'chrome-win' if os.name == 'nt' else 'chrome-linux'
dl_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, 'dist_chrome')
dl_path_folder = os.path.join(dl_path, dl_file)
dl_path_zip = dl_path_folder + '.zip'
# response = requests.get(
# f'https://commondatastorage.googleapis.com/chromium-browser-snapshots/{arch}/LAST_CHANGE',
# timeout=30)
# revision = response.text.strip()
print("Downloading revision: " + revision)
os.mkdir(dl_path)
with requests.get(
f'https://commondatastorage.googleapis.com/chromium-browser-snapshots/{arch}/{revision}/{dl_file}.zip',
stream=True) as r:
r.raise_for_status()
with open(dl_path_zip, 'wb') as f:
for chunk in r.iter_content(chunk_size=8192):
f.write(chunk)
print("File downloaded: " + dl_path_zip)
with zipfile.ZipFile(dl_path_zip, 'r') as zip_ref:
zip_ref.extractall(dl_path)
os.remove(dl_path_zip)
chrome_path = os.path.join(dl_path, "chrome")
shutil.move(dl_path_folder, chrome_path)
print("Extracted in: " + chrome_path)
if os.name != 'nt':
# Give executable permissions for *nix
# file * | grep executable | cut -d: -f1
print("Giving executable permissions...")
execs = ['chrome', 'chrome_crashpad_handler', 'chrome_sandbox', 'chrome-wrapper', 'xdg-mime', 'xdg-settings']
for exec_file in execs:
exec_path = os.path.join(chrome_path, exec_file)
os.chmod(exec_path, 0o755)
def run_pyinstaller():
sep = ';' if os.name == 'nt' else ':'
result = subprocess.run([sys.executable, "-m", "PyInstaller",
"--icon", "resources/flaresolverr_logo.ico",
"--add-data", f"package.json{sep}.",
"--add-data", f"{os.path.join('dist_chrome', 'chrome')}{sep}chrome",
os.path.join("src", "flaresolverr.py")],
cwd=os.pardir, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if result.returncode != 0:
print(result.stderr.decode('utf-8'))
raise Exception("Error running pyInstaller")
def compress_package():
dist_folder = os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, 'dist')
package_folder = os.path.join(dist_folder, 'package')
shutil.move(os.path.join(dist_folder, 'flaresolverr'), os.path.join(package_folder, 'flaresolverr'))
print("Package folder: " + package_folder)
compr_format = 'zip' if os.name == 'nt' else 'gztar'
compr_file_name = 'flaresolverr_windows_x64' if os.name == 'nt' else 'flaresolverr_linux_x64'
compr_file_path = os.path.join(dist_folder, compr_file_name)
shutil.make_archive(compr_file_path, compr_format, package_folder)
print("Compressed file path: " + compr_file_path)
if __name__ == "__main__":
print("Building package...")
print("Platform: " + platform.platform())
print("Cleaning previous build...")
clean_files()
print("Downloading Chromium...")
download_chromium()
print("Building pyinstaller executable... ")
run_pyinstaller()
print("Compressing package... ")
compress_package()
# NOTE: python -m pip install pyinstaller

View File

@ -33,6 +33,7 @@ class V1RequestBase(object):
maxTimeout: int = None maxTimeout: int = None
proxy: dict = None proxy: dict = None
session: str = None session: str = None
session_ttl_minutes: int = None
headers: list = None # deprecated v2.0.0, not used headers: list = None # deprecated v2.0.0, not used
userAgent: str = None # deprecated v2.0.0, not used userAgent: str = None # deprecated v2.0.0, not used
@ -51,6 +52,8 @@ class V1ResponseBase(object):
# V1ResponseBase # V1ResponseBase
status: str = None status: str = None
message: str = None message: str = None
session: str = None
sessions: list[str] = None
startTimestamp: int = None startTimestamp: int = None
endTimestamp: int = None endTimestamp: int = None
version: str = None version: str = None

View File

@ -3,11 +3,13 @@ import logging
import os import os
import sys import sys
from bottle import run, response, Bottle, request import certifi
from bottle import run, response, Bottle, request, ServerAdapter
from bottle_plugins.error_plugin import error_plugin from bottle_plugins.error_plugin import error_plugin
from bottle_plugins.logger_plugin import logger_plugin from bottle_plugins.logger_plugin import logger_plugin
from dtos import IndexResponse, V1RequestBase from bottle_plugins import prometheus_plugin
from dtos import V1RequestBase
import flaresolverr_service import flaresolverr_service
import utils import utils
@ -23,10 +25,6 @@ class JSONErrorBottle(Bottle):
app = JSONErrorBottle() app = JSONErrorBottle()
# plugin order is important
app.install(logger_plugin)
app.install(error_plugin)
@app.route('/') @app.route('/')
def index(): def index():
@ -60,6 +58,22 @@ def controller_v1():
if __name__ == "__main__": if __name__ == "__main__":
# check python version
if sys.version_info < (3, 9):
raise Exception("The Python version is less than 3.9, a version equal to or higher is required.")
# fix for HEADLESS=false in Windows binary
# https://stackoverflow.com/a/27694505
if os.name == 'nt':
import multiprocessing
multiprocessing.freeze_support()
# fix ssl certificates for compiled binaries
# https://github.com/pyinstaller/pyinstaller/issues/7229
# https://stackoverflow.com/questions/55736855/how-to-change-the-cafile-argument-in-the-ssl-module-in-python3
os.environ["REQUESTS_CA_BUNDLE"] = certifi.where()
os.environ["SSL_CERT_FILE"] = certifi.where()
# validate configuration # validate configuration
log_level = os.environ.get('LOG_LEVEL', 'info').upper() log_level = os.environ.get('LOG_LEVEL', 'info').upper()
log_html = utils.get_config_log_html() log_html = utils.get_config_log_html()
@ -87,9 +101,25 @@ if __name__ == "__main__":
logging.info(f'FlareSolverr {utils.get_flaresolverr_version()}') logging.info(f'FlareSolverr {utils.get_flaresolverr_version()}')
logging.debug('Debug log enabled') logging.debug('Debug log enabled')
# Get current OS for global variable
utils.get_current_platform()
# test browser installation # test browser installation
flaresolverr_service.test_browser_installation() flaresolverr_service.test_browser_installation()
# start bootle plugins
# plugin order is important
app.install(logger_plugin)
app.install(error_plugin)
prometheus_plugin.setup()
app.install(prometheus_plugin.prometheus_plugin)
# start webserver # start webserver
# default server 'wsgiref' does not support concurrent requests # default server 'wsgiref' does not support concurrent requests
run(app, host=server_host, port=server_port, quiet=True, server='waitress') # https://github.com/FlareSolverr/FlareSolverr/issues/680
# https://github.com/Pylons/waitress/issues/31
class WaitressServerPoll(ServerAdapter):
def run(self, handler):
from waitress import serve
serve(handler, host=self.host, port=self.port, asyncore_use_poll=True)
run(app, host=server_host, port=server_port, quiet=True, server=WaitressServerPoll)

View File

@ -1,42 +1,79 @@
import logging import logging
import platform
import sys
import time import time
from urllib.parse import unquote from datetime import timedelta
from html import escape
from urllib.parse import unquote, quote
from func_timeout import func_timeout, FunctionTimedOut from func_timeout import FunctionTimedOut, func_timeout
from selenium.common import TimeoutException from selenium.common import TimeoutException
from selenium.webdriver.chrome.webdriver import WebDriver from selenium.webdriver.chrome.webdriver import WebDriver
from selenium.webdriver.common.by import By from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.expected_conditions import (
presence_of_element_located, staleness_of, title_is)
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.wait import WebDriverWait from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support.expected_conditions import presence_of_element_located, staleness_of, title_is
from dtos import V1RequestBase, V1ResponseBase, ChallengeResolutionT, ChallengeResolutionResultT, IndexResponse, \
HealthResponse, STATUS_OK, STATUS_ERROR
import utils import utils
from dtos import (STATUS_ERROR, STATUS_OK, ChallengeResolutionResultT,
ChallengeResolutionT, HealthResponse, IndexResponse,
V1RequestBase, V1ResponseBase)
from sessions import SessionsStorage
ACCESS_DENIED_TITLES = [
# Cloudflare
'Access denied',
# Cloudflare http://bitturk.net/ Firefox
'Attention Required! | Cloudflare'
]
ACCESS_DENIED_SELECTORS = [ ACCESS_DENIED_SELECTORS = [
# Cloudflare # Cloudflare
'div.cf-error-title span.cf-code-label span' 'div.cf-error-title span.cf-code-label span',
# Cloudflare http://bitturk.net/ Firefox
'#cf-error-details div.cf-error-overview h1'
] ]
CHALLENGE_TITLE = [ CHALLENGE_TITLES = [
# Cloudflare # Cloudflare
'Just a moment...', 'Just a moment...',
# DDoS-GUARD # DDoS-GUARD
'DDOS-GUARD', 'DDoS-Guard'
] ]
CHALLENGE_SELECTORS = [ CHALLENGE_SELECTORS = [
# Cloudflare # Cloudflare
'#cf-challenge-running', '.ray_id', '.attack-box', '#cf-please-wait', '#challenge-spinner', '#trk_jschal_js', '#cf-challenge-running', '.ray_id', '.attack-box', '#cf-please-wait', '#challenge-spinner', '#trk_jschal_js', '#turnstile-wrapper', '.lds-ring',
# Custom CloudFlare for EbookParadijs, Film-Paleis, MuziekFabriek and Puur-Hollands # Custom CloudFlare for EbookParadijs, Film-Paleis, MuziekFabriek and Puur-Hollands
'td.info #js_info' 'td.info #js_info',
# Fairlane / pararius.com
'div.vc div.text-box h2'
] ]
SHORT_TIMEOUT = 10 SHORT_TIMEOUT = 1
SESSIONS_STORAGE = SessionsStorage()
def test_browser_installation(): def test_browser_installation():
logging.info("Testing web browser installation...") logging.info("Testing web browser installation...")
logging.info("Platform: " + platform.platform())
chrome_exe_path = utils.get_chrome_exe_path()
if chrome_exe_path is None:
logging.error("Chrome / Chromium web browser not installed!")
sys.exit(1)
else:
logging.info("Chrome / Chromium path: " + chrome_exe_path)
chrome_major_version = utils.get_chrome_major_version()
if chrome_major_version == '':
logging.error("Chrome / Chromium version not detected!")
sys.exit(1)
else:
logging.info("Chrome / Chromium major version: " + chrome_major_version)
logging.info("Launching web browser...")
user_agent = utils.get_user_agent() user_agent = utils.get_user_agent()
logging.info("FlareSolverr User-Agent: " + user_agent) logging.info("FlareSolverr User-Agent: " + user_agent)
logging.info("Test successful") logging.info("Test successful!")
def index_endpoint() -> IndexResponse: def index_endpoint() -> IndexResponse:
@ -84,17 +121,17 @@ def _controller_v1_handler(req: V1RequestBase) -> V1ResponseBase:
logging.warning("Request parameter 'userAgent' was removed in FlareSolverr v2.") logging.warning("Request parameter 'userAgent' was removed in FlareSolverr v2.")
# set default values # set default values
if req.maxTimeout is None or req.maxTimeout < 1: if req.maxTimeout is None or int(req.maxTimeout) < 1:
req.maxTimeout = 60000 req.maxTimeout = 60000
# execute the command # execute the command
res: V1ResponseBase res: V1ResponseBase
if req.cmd == 'sessions.create': if req.cmd == 'sessions.create':
raise Exception("Not implemented yet.") res = _cmd_sessions_create(req)
elif req.cmd == 'sessions.list': elif req.cmd == 'sessions.list':
raise Exception("Not implemented yet.") res = _cmd_sessions_list(req)
elif req.cmd == 'sessions.destroy': elif req.cmd == 'sessions.destroy':
raise Exception("Not implemented yet.") res = _cmd_sessions_destroy(req)
elif req.cmd == 'request.get': elif req.cmd == 'request.get':
res = _cmd_request_get(req) res = _cmd_request_get(req)
elif req.cmd == 'request.post': elif req.cmd == 'request.post':
@ -141,19 +178,108 @@ def _cmd_request_post(req: V1RequestBase) -> V1ResponseBase:
return res return res
def _cmd_sessions_create(req: V1RequestBase) -> V1ResponseBase:
logging.debug("Creating new session...")
session, fresh = SESSIONS_STORAGE.create(session_id=req.session, proxy=req.proxy)
session_id = session.session_id
if not fresh:
return V1ResponseBase({
"status": STATUS_OK,
"message": "Session already exists.",
"session": session_id
})
return V1ResponseBase({
"status": STATUS_OK,
"message": "Session created successfully.",
"session": session_id
})
def _cmd_sessions_list(req: V1RequestBase) -> V1ResponseBase:
session_ids = SESSIONS_STORAGE.session_ids()
return V1ResponseBase({
"status": STATUS_OK,
"message": "",
"sessions": session_ids
})
def _cmd_sessions_destroy(req: V1RequestBase) -> V1ResponseBase:
session_id = req.session
existed = SESSIONS_STORAGE.destroy(session_id)
if not existed:
raise Exception("The session doesn't exist.")
return V1ResponseBase({
"status": STATUS_OK,
"message": "The session has been removed."
})
def _resolve_challenge(req: V1RequestBase, method: str) -> ChallengeResolutionT: def _resolve_challenge(req: V1RequestBase, method: str) -> ChallengeResolutionT:
timeout = req.maxTimeout / 1000 timeout = int(req.maxTimeout) / 1000
driver = None driver = None
try: try:
driver = utils.get_webdriver() if req.session:
session_id = req.session
ttl = timedelta(minutes=req.session_ttl_minutes) if req.session_ttl_minutes else None
session, fresh = SESSIONS_STORAGE.get(session_id, ttl)
if fresh:
logging.debug(f"new session created to perform the request (session_id={session_id})")
else:
logging.debug(f"existing session is used to perform the request (session_id={session_id}, "
f"lifetime={str(session.lifetime())}, ttl={str(ttl)})")
driver = session.driver
else:
driver = utils.get_webdriver(req.proxy)
logging.debug('New instance of webdriver has been created to perform the request')
return func_timeout(timeout, _evil_logic, (req, driver, method)) return func_timeout(timeout, _evil_logic, (req, driver, method))
except FunctionTimedOut: except FunctionTimedOut:
raise Exception(f'Error solving the challenge. Timeout after {timeout} seconds.') raise Exception(f'Error solving the challenge. Timeout after {timeout} seconds.')
except Exception as e: except Exception as e:
raise Exception('Error solving the challenge. ' + str(e)) raise Exception('Error solving the challenge. ' + str(e).replace('\n', '\\n'))
finally: finally:
if driver is not None: if not req.session and driver is not None:
if utils.PLATFORM_VERSION == "nt":
driver.close()
driver.quit() driver.quit()
logging.debug('A used instance of webdriver has been destroyed')
def click_verify(driver: WebDriver):
try:
logging.debug("Try to find the Cloudflare verify checkbox...")
actions = ActionChains(driver)
actions.pause(5).send_keys(Keys.TAB).pause(1).send_keys(Keys.SPACE).perform()
logging.debug("Cloudflare verify checkbox found and clicked!")
except Exception:
logging.debug("Cloudflare verify checkbox not found on the page.")
finally:
driver.switch_to.default_content()
try:
logging.debug("Try to find the Cloudflare 'Verify you are human' button...")
button = driver.find_element(
by=By.XPATH,
value="//input[@type='button' and @value='Verify you are human']",
)
if button:
actions = ActionChains(driver)
actions.move_to_element_with_offset(button, 5, 7)
actions.click(button)
actions.perform()
logging.debug("The Cloudflare 'Verify you are human' button found and clicked!")
except Exception:
logging.debug("The Cloudflare 'Verify you are human' button not found on the page.")
time.sleep(2)
def _evil_logic(req: V1RequestBase, driver: WebDriver, method: str) -> ChallengeResolutionT: def _evil_logic(req: V1RequestBase, driver: WebDriver, method: str) -> ChallengeResolutionT:
@ -161,18 +287,37 @@ def _evil_logic(req: V1RequestBase, driver: WebDriver, method: str) -> Challenge
res.status = STATUS_OK res.status = STATUS_OK
res.message = "" res.message = ""
# navigate to the page # navigate to the page
logging.debug(f'Navigating to... {req.url}') logging.debug(f'Navigating to... {req.url}')
if method == 'POST': if method == 'POST':
_post_request(req, driver) _post_request(req, driver)
else: else:
driver.get(req.url) driver.get(req.url)
if utils.get_config_log_html():
logging.debug(f"Response HTML:\n{driver.page_source}") # set cookies if required
if req.cookies is not None and len(req.cookies) > 0:
logging.debug(f'Setting cookies...')
for cookie in req.cookies:
driver.delete_cookie(cookie['name'])
driver.add_cookie(cookie)
# reload the page
if method == 'POST':
_post_request(req, driver)
else:
driver.get(req.url)
# wait for the page # wait for the page
if utils.get_config_log_html():
logging.debug(f"Response HTML:\n{driver.page_source}")
html_element = driver.find_element(By.TAG_NAME, "html") html_element = driver.find_element(By.TAG_NAME, "html")
page_title = driver.title
# find access denied titles
for title in ACCESS_DENIED_TITLES:
if title == page_title:
raise Exception('Cloudflare has blocked this request. '
'Probably your IP is banned for this site, check in your web browser.')
# find access denied selectors # find access denied selectors
for selector in ACCESS_DENIED_SELECTORS: for selector in ACCESS_DENIED_SELECTORS:
found_elements = driver.find_elements(By.CSS_SELECTOR, selector) found_elements = driver.find_elements(By.CSS_SELECTOR, selector)
@ -182,11 +327,10 @@ def _evil_logic(req: V1RequestBase, driver: WebDriver, method: str) -> Challenge
# find challenge by title # find challenge by title
challenge_found = False challenge_found = False
page_title = driver.title for title in CHALLENGE_TITLES:
for title in CHALLENGE_TITLE: if title.lower() == page_title.lower():
if title == page_title:
challenge_found = True challenge_found = True
logging.info("Challenge detected. Title found: " + title) logging.info("Challenge detected. Title found: " + page_title)
break break
if not challenge_found: if not challenge_found:
# find challenge by selectors # find challenge by selectors
@ -197,17 +341,19 @@ def _evil_logic(req: V1RequestBase, driver: WebDriver, method: str) -> Challenge
logging.info("Challenge detected. Selector found: " + selector) logging.info("Challenge detected. Selector found: " + selector)
break break
attempt = 0
if challenge_found: if challenge_found:
while True: while True:
try: try:
# wait until the title change attempt = attempt + 1
for title in CHALLENGE_TITLE: # wait until the title changes
logging.debug("Waiting for title: " + title) for title in CHALLENGE_TITLES:
logging.debug("Waiting for title (attempt " + str(attempt) + "): " + title)
WebDriverWait(driver, SHORT_TIMEOUT).until_not(title_is(title)) WebDriverWait(driver, SHORT_TIMEOUT).until_not(title_is(title))
# then wait until all the selectors disappear # then wait until all the selectors disappear
for selector in CHALLENGE_SELECTORS: for selector in CHALLENGE_SELECTORS:
logging.debug("Waiting for selector: " + selector) logging.debug("Waiting for selector (attempt " + str(attempt) + "): " + selector)
WebDriverWait(driver, SHORT_TIMEOUT).until_not( WebDriverWait(driver, SHORT_TIMEOUT).until_not(
presence_of_element_located((By.CSS_SELECTOR, selector))) presence_of_element_located((By.CSS_SELECTOR, selector)))
@ -216,6 +362,9 @@ def _evil_logic(req: V1RequestBase, driver: WebDriver, method: str) -> Challenge
except TimeoutException: except TimeoutException:
logging.debug("Timeout waiting for selector") logging.debug("Timeout waiting for selector")
click_verify(driver)
# update the html (cloudflare reloads the page every 5 s) # update the html (cloudflare reloads the page every 5 s)
html_element = driver.find_element(By.TAG_NAME, "html") html_element = driver.find_element(By.TAG_NAME, "html")
@ -265,7 +414,7 @@ def _post_request(req: V1RequestBase, driver: WebDriver):
value = unquote(parts[1]) value = unquote(parts[1])
except Exception: except Exception:
value = parts[1] value = parts[1]
post_form += f'<input type="text" name="{name}" value="{value}"><br>' post_form += f'<input type="text" name="{escape(quote(name))}" value="{escape(quote(value))}"><br>'
post_form += '</form>' post_form += '</form>'
html_content = f""" html_content = f"""
<!DOCTYPE html> <!DOCTYPE html>
@ -275,4 +424,4 @@ def _post_request(req: V1RequestBase, driver: WebDriver):
<script>document.getElementById('hackForm').submit();</script> <script>document.getElementById('hackForm').submit();</script>
</body> </body>
</html>""" </html>"""
driver.get("data:text/html;charset=utf-8," + html_content) driver.get("data:text/html;charset=utf-8,{html_content}".format(html_content=html_content))

32
src/metrics.py Normal file
View File

@ -0,0 +1,32 @@
import logging
from prometheus_client import Counter, Histogram, start_http_server
import time
REQUEST_COUNTER = Counter(
name='flaresolverr_request',
documentation='Total requests with result',
labelnames=['domain', 'result']
)
REQUEST_DURATION = Histogram(
name='flaresolverr_request_duration',
documentation='Request duration in seconds',
labelnames=['domain'],
buckets=[0, 10, 25, 50]
)
def serve(port):
start_http_server(port=port)
while True:
time.sleep(600)
def start_metrics_http_server(prometheus_port: int):
logging.info(f"Serving Prometheus exporter on http://0.0.0.0:{prometheus_port}/metrics")
from threading import Thread
Thread(
target=serve,
kwargs=dict(port=prometheus_port),
daemon=True,
).start()

84
src/sessions.py Normal file
View File

@ -0,0 +1,84 @@
import logging
from dataclasses import dataclass
from datetime import datetime, timedelta
from typing import Optional, Tuple
from uuid import uuid1
from selenium.webdriver.chrome.webdriver import WebDriver
import utils
@dataclass
class Session:
session_id: str
driver: WebDriver
created_at: datetime
def lifetime(self) -> timedelta:
return datetime.now() - self.created_at
class SessionsStorage:
"""SessionsStorage creates, stores and process all the sessions"""
def __init__(self):
self.sessions = {}
def create(self, session_id: Optional[str] = None, proxy: Optional[dict] = None,
force_new: Optional[bool] = False) -> Tuple[Session, bool]:
"""create creates new instance of WebDriver if necessary,
assign defined (or newly generated) session_id to the instance
and returns the session object. If a new session has been created
second argument is set to True.
Note: The function is idempotent, so in case if session_id
already exists in the storage a new instance of WebDriver won't be created
and existing session will be returned. Second argument defines if
new session has been created (True) or an existing one was used (False).
"""
session_id = session_id or str(uuid1())
if force_new:
self.destroy(session_id)
if self.exists(session_id):
return self.sessions[session_id], False
driver = utils.get_webdriver(proxy)
created_at = datetime.now()
session = Session(session_id, driver, created_at)
self.sessions[session_id] = session
return session, True
def exists(self, session_id: str) -> bool:
return session_id in self.sessions
def destroy(self, session_id: str) -> bool:
"""destroy closes the driver instance and removes session from the storage.
The function is noop if session_id doesn't exist.
The function returns True if session was found and destroyed,
and False if session_id wasn't found.
"""
if not self.exists(session_id):
return False
session = self.sessions.pop(session_id)
if utils.PLATFORM_VERSION == "nt":
session.driver.close()
session.driver.quit()
return True
def get(self, session_id: str, ttl: Optional[timedelta] = None) -> Tuple[Session, bool]:
session, fresh = self.create(session_id)
if ttl is not None and not fresh and session.lifetime() > ttl:
logging.debug(f'session\'s lifetime has expired, so the session is recreated (session_id={session_id})')
session, fresh = self.create(session_id, force_new=True)
return session, fresh
def session_ids(self) -> list[str]:
return list(self.sessions.keys())

View File

@ -1,4 +1,5 @@
import unittest import unittest
from typing import Optional
from webtest import TestApp from webtest import TestApp
@ -7,7 +8,7 @@ import flaresolverr
import utils import utils
def _find_obj_by_key(key: str, value: str, _list: list) -> dict | None: def _find_obj_by_key(key: str, value: str, _list: list) -> Optional[dict]:
for obj in _list: for obj in _list:
if obj[key] == value: if obj[key] == value:
return obj return obj
@ -23,10 +24,13 @@ class TestFlareSolverr(unittest.TestCase):
cloudflare_url = "https://nowsecure.nl" cloudflare_url = "https://nowsecure.nl"
cloudflare_url_2 = "https://idope.se/torrent-list/harry/" cloudflare_url_2 = "https://idope.se/torrent-list/harry/"
ddos_guard_url = "https://anidex.info/" ddos_guard_url = "https://anidex.info/"
fairlane_url = "https://www.pararius.com/apartments/amsterdam"
custom_cloudflare_url = "https://www.muziekfabriek.org" custom_cloudflare_url = "https://www.muziekfabriek.org"
cloudflare_blocked_url = "https://cpasbiens3.fr/index.php?do=search&subaction=search" cloudflare_blocked_url = "https://cpasbiens3.fr/index.php?do=search&subaction=search"
app = TestApp(flaresolverr.app) app = TestApp(flaresolverr.app)
# wait until the server is ready
app.get('/')
def test_wrong_endpoint(self): def test_wrong_endpoint(self):
res = self.app.get('/wrong', status=404) res = self.app.get('/wrong', status=404)
@ -166,6 +170,32 @@ class TestFlareSolverr(unittest.TestCase):
self.assertIsNotNone(cf_cookie, "DDOS-Guard cookie not found") self.assertIsNotNone(cf_cookie, "DDOS-Guard cookie not found")
self.assertGreater(len(cf_cookie["value"]), 10) self.assertGreater(len(cf_cookie["value"]), 10)
def test_v1_endpoint_request_get_fairlane_js(self):
res = self.app.post_json('/v1', {
"cmd": "request.get",
"url": self.fairlane_url
})
self.assertEqual(res.status_code, 200)
body = V1ResponseBase(res.json)
self.assertEqual(STATUS_OK, body.status)
self.assertEqual("Challenge solved!", body.message)
self.assertGreater(body.startTimestamp, 10000)
self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
self.assertEqual(utils.get_flaresolverr_version(), body.version)
solution = body.solution
self.assertIn(self.fairlane_url, solution.url)
self.assertEqual(solution.status, 200)
self.assertIs(len(solution.headers), 0)
self.assertIn("<title>Rental Apartments Amsterdam</title>", solution.response)
self.assertGreater(len(solution.cookies), 0)
self.assertIn("Chrome/", solution.userAgent)
cf_cookie = _find_obj_by_key("name", "fl_pass_v2_b", solution.cookies)
self.assertIsNotNone(cf_cookie, "Fairlane cookie not found")
self.assertGreater(len(cf_cookie["value"]), 50)
def test_v1_endpoint_request_get_custom_cloudflare_js(self): def test_v1_endpoint_request_get_custom_cloudflare_js(self):
res = self.app.post_json('/v1', { res = self.app.post_json('/v1', {
"cmd": "request.get", "cmd": "request.get",
@ -209,7 +239,45 @@ class TestFlareSolverr(unittest.TestCase):
self.assertGreaterEqual(body.endTimestamp, body.startTimestamp) self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
self.assertEqual(utils.get_flaresolverr_version(), body.version) self.assertEqual(utils.get_flaresolverr_version(), body.version)
# todo: test Cmd 'request.get' should return OK with 'cookies' param def test_v1_endpoint_request_get_cookies_param(self):
res = self.app.post_json('/v1', {
"cmd": "request.get",
"url": self.google_url,
"cookies": [
{
"name": "testcookie1",
"value": "testvalue1"
},
{
"name": "testcookie2",
"value": "testvalue2"
}
]
})
self.assertEqual(res.status_code, 200)
body = V1ResponseBase(res.json)
self.assertEqual(STATUS_OK, body.status)
self.assertEqual("Challenge not detected!", body.message)
self.assertGreater(body.startTimestamp, 10000)
self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
self.assertEqual(utils.get_flaresolverr_version(), body.version)
solution = body.solution
self.assertIn(self.google_url, solution.url)
self.assertEqual(solution.status, 200)
self.assertIs(len(solution.headers), 0)
self.assertIn("<title>Google</title>", solution.response)
self.assertGreater(len(solution.cookies), 1)
self.assertIn("Chrome/", solution.userAgent)
user_cookie1 = _find_obj_by_key("name", "testcookie1", solution.cookies)
self.assertIsNotNone(user_cookie1, "User cookie 1 not found")
self.assertEqual("testvalue1", user_cookie1["value"])
user_cookie2 = _find_obj_by_key("name", "testcookie2", solution.cookies)
self.assertIsNotNone(user_cookie2, "User cookie 2 not found")
self.assertEqual("testvalue2", user_cookie2["value"])
def test_v1_endpoint_request_get_returnOnlyCookies_param(self): def test_v1_endpoint_request_get_returnOnlyCookies_param(self):
res = self.app.post_json('/v1', { res = self.app.post_json('/v1', {
@ -234,10 +302,124 @@ class TestFlareSolverr(unittest.TestCase):
self.assertGreater(len(solution.cookies), 0) self.assertGreater(len(solution.cookies), 0)
self.assertIn("Chrome/", solution.userAgent) self.assertIn("Chrome/", solution.userAgent)
# todo: test Cmd 'request.get' should return OK with HTTP 'proxy' param def test_v1_endpoint_request_get_proxy_http_param(self):
# todo: test Cmd 'request.get' should return OK with HTTP 'proxy' param with credentials """
# todo: test Cmd 'request.get' should return OK with SOCKSv5 'proxy' param To configure TinyProxy in local:
# todo: test Cmd 'request.get' should fail with wrong 'proxy' param * sudo vim /etc/tinyproxy/tinyproxy.conf
* edit => LogFile "/tmp/tinyproxy.log"
* edit => Syslog Off
* sudo tinyproxy -d
* sudo tail -f /tmp/tinyproxy.log
"""
res = self.app.post_json('/v1', {
"cmd": "request.get",
"url": self.google_url,
"proxy": {
"url": self.proxy_url
}
})
self.assertEqual(res.status_code, 200)
body = V1ResponseBase(res.json)
self.assertEqual(STATUS_OK, body.status)
self.assertEqual("Challenge not detected!", body.message)
self.assertGreater(body.startTimestamp, 10000)
self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
self.assertEqual(utils.get_flaresolverr_version(), body.version)
solution = body.solution
self.assertIn(self.google_url, solution.url)
self.assertEqual(solution.status, 200)
self.assertIs(len(solution.headers), 0)
self.assertIn("<title>Google</title>", solution.response)
self.assertGreater(len(solution.cookies), 0)
self.assertIn("Chrome/", solution.userAgent)
def test_v1_endpoint_request_get_proxy_http_param_with_credentials(self):
"""
To configure TinyProxy in local:
* sudo vim /etc/tinyproxy/tinyproxy.conf
* edit => LogFile "/tmp/tinyproxy.log"
* edit => Syslog Off
* add => BasicAuth testuser testpass
* sudo tinyproxy -d
* sudo tail -f /tmp/tinyproxy.log
"""
res = self.app.post_json('/v1', {
"cmd": "request.get",
"url": self.google_url,
"proxy": {
"url": self.proxy_url,
"username": "testuser",
"password": "testpass"
}
})
self.assertEqual(res.status_code, 200)
body = V1ResponseBase(res.json)
self.assertEqual(STATUS_OK, body.status)
self.assertEqual("Challenge not detected!", body.message)
self.assertGreater(body.startTimestamp, 10000)
self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
self.assertEqual(utils.get_flaresolverr_version(), body.version)
solution = body.solution
self.assertIn(self.google_url, solution.url)
self.assertEqual(solution.status, 200)
self.assertIs(len(solution.headers), 0)
self.assertIn("<title>Google</title>", solution.response)
self.assertGreater(len(solution.cookies), 0)
self.assertIn("Chrome/", solution.userAgent)
def test_v1_endpoint_request_get_proxy_socks_param(self):
"""
To configure Dante in local:
* https://linuxhint.com/set-up-a-socks5-proxy-on-ubuntu-with-dante/
* sudo vim /etc/sockd.conf
* sudo systemctl restart sockd.service
* curl --socks5 socks5://127.0.0.1:1080 https://www.google.com
"""
res = self.app.post_json('/v1', {
"cmd": "request.get",
"url": self.google_url,
"proxy": {
"url": self.proxy_socks_url
}
})
self.assertEqual(res.status_code, 200)
body = V1ResponseBase(res.json)
self.assertEqual(STATUS_OK, body.status)
self.assertEqual("Challenge not detected!", body.message)
self.assertGreater(body.startTimestamp, 10000)
self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
self.assertEqual(utils.get_flaresolverr_version(), body.version)
solution = body.solution
self.assertIn(self.google_url, solution.url)
self.assertEqual(solution.status, 200)
self.assertIs(len(solution.headers), 0)
self.assertIn("<title>Google</title>", solution.response)
self.assertGreater(len(solution.cookies), 0)
self.assertIn("Chrome/", solution.userAgent)
def test_v1_endpoint_request_get_proxy_wrong_param(self):
res = self.app.post_json('/v1', {
"cmd": "request.get",
"url": self.google_url,
"proxy": {
"url": "http://127.0.0.1:43210"
}
}, status=500)
self.assertEqual(res.status_code, 500)
body = V1ResponseBase(res.json)
self.assertEqual(STATUS_ERROR, body.status)
self.assertIn("Error: Error solving the challenge. Message: unknown error: net::ERR_PROXY_CONNECTION_FAILED",
body.message)
self.assertGreater(body.startTimestamp, 10000)
self.assertGreaterEqual(body.endTimestamp, body.startTimestamp)
self.assertEqual(utils.get_flaresolverr_version(), body.version)
def test_v1_endpoint_request_get_fail_timeout(self): def test_v1_endpoint_request_get_fail_timeout(self):
res = self.app.post_json('/v1', { res = self.app.post_json('/v1', {
@ -351,12 +533,99 @@ class TestFlareSolverr(unittest.TestCase):
self.assertEqual(STATUS_OK, body.status) self.assertEqual(STATUS_OK, body.status)
self.assertEqual("Challenge not detected!", body.message) self.assertEqual("Challenge not detected!", body.message)
# todo: test Cmd 'sessions.create' should return OK def test_v1_endpoint_sessions_create_without_session(self):
# todo: test Cmd 'sessions.create' should return OK with session res = self.app.post_json('/v1', {
# todo: test Cmd 'sessions.list' should return OK "cmd": "sessions.create"
# todo: test Cmd 'sessions.destroy' should return OK })
# todo: test Cmd 'sessions.destroy' should fail self.assertEqual(res.status_code, 200)
# todo: test Cmd 'request.get' should use session
body = V1ResponseBase(res.json)
self.assertEqual(STATUS_OK, body.status)
self.assertEqual("Session created successfully.", body.message)
self.assertIsNotNone(body.session)
def test_v1_endpoint_sessions_create_with_session(self):
res = self.app.post_json('/v1', {
"cmd": "sessions.create",
"session": "test_create_session"
})
self.assertEqual(res.status_code, 200)
body = V1ResponseBase(res.json)
self.assertEqual(STATUS_OK, body.status)
self.assertEqual("Session created successfully.", body.message)
self.assertEqual(body.session, "test_create_session")
def test_v1_endpoint_sessions_create_with_proxy(self):
res = self.app.post_json('/v1', {
"cmd": "sessions.create",
"proxy": {
"url": self.proxy_url
}
})
self.assertEqual(res.status_code, 200)
body = V1ResponseBase(res.json)
self.assertEqual(STATUS_OK, body.status)
self.assertEqual("Session created successfully.", body.message)
self.assertIsNotNone(body.session)
def test_v1_endpoint_sessions_list(self):
self.app.post_json('/v1', {
"cmd": "sessions.create",
"session": "test_list_sessions"
})
res = self.app.post_json('/v1', {
"cmd": "sessions.list"
})
self.assertEqual(res.status_code, 200)
body = V1ResponseBase(res.json)
self.assertEqual(STATUS_OK, body.status)
self.assertEqual("", body.message)
self.assertGreaterEqual(len(body.sessions), 1)
self.assertIn("test_list_sessions", body.sessions)
def test_v1_endpoint_sessions_destroy_existing_session(self):
self.app.post_json('/v1', {
"cmd": "sessions.create",
"session": "test_destroy_sessions"
})
res = self.app.post_json('/v1', {
"cmd": "sessions.destroy",
"session": "test_destroy_sessions"
})
self.assertEqual(res.status_code, 200)
body = V1ResponseBase(res.json)
self.assertEqual(STATUS_OK, body.status)
self.assertEqual("The session has been removed.", body.message)
def test_v1_endpoint_sessions_destroy_non_existing_session(self):
res = self.app.post_json('/v1', {
"cmd": "sessions.destroy",
"session": "non_existing_session_name"
}, status=500)
self.assertEqual(res.status_code, 500)
body = V1ResponseBase(res.json)
self.assertEqual(STATUS_ERROR, body.status)
self.assertEqual("Error: The session doesn't exist.", body.message)
def test_v1_endpoint_request_get_with_session(self):
self.app.post_json('/v1', {
"cmd": "sessions.create",
"session": "test_request_sessions"
})
res = self.app.post_json('/v1', {
"cmd": "request.get",
"session": "test_request_sessions",
"url": self.google_url
})
self.assertEqual(res.status_code, 200)
body = V1ResponseBase(res.json)
self.assertEqual(STATUS_OK, body.status)
if __name__ == '__main__': if __name__ == '__main__':

View File

@ -39,6 +39,8 @@ def asset_cloudflare_solution(self, res, site_url, site_text):
class TestFlareSolverr(unittest.TestCase): class TestFlareSolverr(unittest.TestCase):
app = TestApp(flaresolverr.app) app = TestApp(flaresolverr.app)
# wait until the server is ready
app.get('/')
def test_v1_endpoint_request_get_cloudflare(self): def test_v1_endpoint_request_get_cloudflare(self):
sites_get = [ sites_get = [

View File

@ -1,7 +1,4 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from __future__ import annotations
import subprocess
""" """
@ -17,33 +14,39 @@ Y88b. 888 888 888 Y88..88P 888 888 888 Y8b. Y88b 888 888 888 Y
by UltrafunkAmsterdam (https://github.com/ultrafunkamsterdam) by UltrafunkAmsterdam (https://github.com/ultrafunkamsterdam)
""" """
from __future__ import annotations
__version__ = "3.1.5r4" __version__ = "3.5.5"
import json import json
import logging import logging
import os import os
import pathlib
import re import re
import shutil import shutil
import subprocess
import sys import sys
import tempfile import tempfile
import time import time
import inspect from weakref import finalize
import threading
import selenium.webdriver.chrome.service import selenium.webdriver.chrome.service
import selenium.webdriver.chrome.webdriver import selenium.webdriver.chrome.webdriver
import selenium.webdriver.common.service from selenium.webdriver.common.by import By
import selenium.webdriver.chromium.service
import selenium.webdriver.remote.command
import selenium.webdriver.remote.webdriver import selenium.webdriver.remote.webdriver
from .cdp import CDP from .cdp import CDP
from .dprocess import start_detached
from .options import ChromeOptions from .options import ChromeOptions
from .patcher import IS_POSIX from .patcher import IS_POSIX
from .patcher import Patcher from .patcher import Patcher
from .reactor import Reactor from .reactor import Reactor
from .dprocess import start_detached from .webelement import UCWebElement
from .webelement import WebElement
__all__ = ( __all__ = (
"Chrome", "Chrome",
@ -107,10 +110,11 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
browser_executable_path=None, browser_executable_path=None,
port=0, port=0,
enable_cdp_events=False, enable_cdp_events=False,
service_args=None, # service_args=None,
# service_creationflags=None,
desired_capabilities=None, desired_capabilities=None,
advanced_elements=False, advanced_elements=False,
service_log_path=None, # service_log_path=None,
keep_alive=True, keep_alive=True,
log_level=0, log_level=0,
headless=False, headless=False,
@ -119,8 +123,10 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
suppress_welcome=True, suppress_welcome=True,
use_subprocess=False, use_subprocess=False,
debug=False, debug=False,
no_sandbox=True,
windows_headless=False, windows_headless=False,
**kw user_multi_procs: bool = False,
**kw,
): ):
""" """
Creates a new instance of the chrome driver. Creates a new instance of the chrome driver.
@ -147,7 +153,9 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
If not specified, make sure the executable's folder is in $PATH If not specified, make sure the executable's folder is in $PATH
port: int, optional, default: 0 port: int, optional, default: 0
port you would like the service to run, if left as 0, a free port will be found. port to be used by the chromedriver executable, this is NOT the debugger port.
leave it at 0 unless you know what you are doing.
the default value of 0 automatically picks an available port.
enable_cdp_events: bool, default: False enable_cdp_events: bool, default: False
:: currently for chrome only :: currently for chrome only
@ -207,11 +215,12 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
now, in case you are nag-fetishist, or a diagnostics data feeder to google, you can set this to False. now, in case you are nag-fetishist, or a diagnostics data feeder to google, you can set this to False.
Note: if you don't handle the nag screen in time, the browser loses it's connection and throws an Exception. Note: if you don't handle the nag screen in time, the browser loses it's connection and throws an Exception.
use_subprocess: bool, optional , default: False, use_subprocess: bool, optional , default: True,
False (the default) makes sure Chrome will get it's own process (so no subprocess of chromedriver.exe or python False (the default) makes sure Chrome will get it's own process (so no subprocess of chromedriver.exe or python
This fixes a LOT of issues, like multithreaded run, but mst importantly. shutting corectly after This fixes a LOT of issues, like multithreaded run, but mst importantly. shutting corectly after
program exits or using .quit() program exits or using .quit()
you should be knowing what you're doing, and know how python works.
unfortunately, there is always an edge case in which one would like to write an single script with the only contents being: unfortunately, there is always an edge case in which one would like to write an single script with the only contents being:
--start script-- --start script--
@ -224,19 +233,35 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
in that case you can set this to `True`. The browser will start via subprocess, and will keep running most of times. in that case you can set this to `True`. The browser will start via subprocess, and will keep running most of times.
! setting it to True comes with NO support when being detected. ! ! setting it to True comes with NO support when being detected. !
no_sandbox: bool, optional, default=True
uses the --no-sandbox option, and additionally does suppress the "unsecure option" status bar
this option has a default of True since many people seem to run this as root (....) , and chrome does not start
when running as root without using --no-sandbox flag.
user_multi_procs:
set to true when you are using multithreads/multiprocessing
ensures not all processes are trying to modify a binary which is in use by another.
for this to work. YOU MUST HAVE AT LEAST 1 UNDETECTED_CHROMEDRIVER BINARY IN YOUR ROAMING DATA FOLDER.
this requirement can be easily satisfied, by just running this program "normal" and close/kill it.
""" """
finalize(self, self._ensure_close, self)
self.debug = debug self.debug = debug
patcher = Patcher( self.patcher = Patcher(
executable_path=driver_executable_path, executable_path=driver_executable_path,
force=patcher_force_close, force=patcher_force_close,
version_main=version_main, version_main=version_main,
user_multi_procs=user_multi_procs,
) )
patcher.auto() # self.patcher.auto(user_multiprocess = user_multi_num_procs)
self.patcher = patcher self.patcher.auto()
# self.patcher = patcher
if not options: if not options:
options = ChromeOptions() options = ChromeOptions()
try: try:
if hasattr(options, "_session") and options._session is not None: if hasattr(options, "_session") and options._session is not None:
# prevent reuse of options, # prevent reuse of options,
@ -248,11 +273,17 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
options._session = self options._session = self
debug_port = selenium.webdriver.common.service.utils.free_port()
debug_host = "127.0.0.1"
if not options.debugger_address: if not options.debugger_address:
debug_port = (
port
if port != 0
else selenium.webdriver.common.service.utils.free_port()
)
debug_host = "127.0.0.1"
options.debugger_address = "%s:%d" % (debug_host, debug_port) options.debugger_address = "%s:%d" % (debug_host, debug_port)
else:
debug_host, debug_port = options.debugger_address.split(":")
debug_port = int(debug_port)
if enable_cdp_events: if enable_cdp_events:
options.set_capability( options.set_capability(
@ -263,13 +294,17 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
options.add_argument("--remote-debugging-port=%s" % debug_port) options.add_argument("--remote-debugging-port=%s" % debug_port)
if user_data_dir: if user_data_dir:
options.add_argument('--user-data-dir=%s' % user_data_dir) options.add_argument("--user-data-dir=%s" % user_data_dir)
language, keep_user_data_dir = None, bool(user_data_dir) language, keep_user_data_dir = None, bool(user_data_dir)
# see if a custom user profile is specified in options # see if a custom user profile is specified in options
for arg in options.arguments: for arg in options.arguments:
if any([_ in arg for _ in ("--headless", "headless")]):
options.arguments.remove(arg)
options.headless = True
if "lang" in arg: if "lang" in arg:
m = re.search("(?:--)?lang(?:[ =])?(.*)", arg) m = re.search("(?:--)?lang(?:[ =])?(.*)", arg)
try: try:
@ -294,7 +329,6 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
) )
if not user_data_dir: if not user_data_dir:
# backward compatiblity # backward compatiblity
# check if an old uc.ChromeOptions is used, and extract the user data dir # check if an old uc.ChromeOptions is used, and extract the user data dir
@ -340,6 +374,18 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
browser_executable_path or find_chrome_executable() browser_executable_path or find_chrome_executable()
) )
if not options.binary_location or not \
pathlib.Path(options.binary_location).exists():
raise FileNotFoundError(
"\n---------------------\n"
"Could not determine browser executable."
"\n---------------------\n"
"Make sure your browser is installed in the default location (path).\n"
"If you are sure about the browser executable, you can specify it using\n"
"the `browser_executable_path='{}` parameter.\n\n"
.format("/path/to/browser/executable" if IS_POSIX else "c:/path/to/your/browser.exe")
)
self._delay = 3 self._delay = 3
self.user_data_dir = user_data_dir self.user_data_dir = user_data_dir
@ -347,20 +393,34 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
if suppress_welcome: if suppress_welcome:
options.arguments.extend(["--no-default-browser-check", "--no-first-run"]) options.arguments.extend(["--no-default-browser-check", "--no-first-run"])
if headless or options.headless: if no_sandbox:
options.headless = True options.arguments.extend(["--no-sandbox", "--test-type"])
options.add_argument("--window-size=1920,1080")
options.add_argument("--start-maximized") if headless or getattr(options, 'headless', None):
options.add_argument("--no-sandbox") #workaround until a better checking is found
# fixes "could not connect to chrome" error when running try:
# on linux using privileged user like root (which i don't recommend) v_main = int(self.patcher.version_main) if self.patcher.version_main else 108
if v_main < 108:
options.add_argument("--headless=chrome")
elif v_main >= 108:
options.add_argument("--headless=new")
except:
logger.warning("could not detect version_main."
"therefore, we are assuming it is chrome 108 or higher")
options.add_argument("--headless=new")
options.add_argument("--window-size=1920,1080")
options.add_argument("--start-maximized")
options.add_argument("--no-sandbox")
# fixes "could not connect to chrome" error when running
# on linux using privileged user like root (which i don't recommend)
options.add_argument( options.add_argument(
"--log-level=%d" % log_level "--log-level=%d" % log_level
or divmod(logging.getLogger().getEffectiveLevel(), 10)[0] or divmod(logging.getLogger().getEffectiveLevel(), 10)[0]
) )
if hasattr(options, 'handle_prefs'): if hasattr(options, "handle_prefs"):
options.handle_prefs(user_data_dir) options.handle_prefs(user_data_dir)
# fix exit_type flag to prevent tab-restore nag # fix exit_type flag to prevent tab-restore nag
@ -376,6 +436,7 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
config["profile"]["exit_type"] = None config["profile"]["exit_type"] = None
fs.seek(0, 0) fs.seek(0, 0)
json.dump(config, fs) json.dump(config, fs)
fs.truncate() # the file might be shorter
logger.debug("fixed exit_type flag") logger.debug("fixed exit_type flag")
except Exception as e: except Exception as e:
logger.debug("did not find a bad exit_type flag ") logger.debug("did not find a bad exit_type flag ")
@ -390,8 +451,10 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
options.binary_location, *options.arguments options.binary_location, *options.arguments
) )
else: else:
startupinfo = subprocess.STARTUPINFO() startupinfo = None
if os.name == 'nt' and windows_headless: if os.name == 'nt' and windows_headless:
# STARTUPINFO() is Windows only
startupinfo = subprocess.STARTUPINFO()
startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
browser = subprocess.Popen( browser = subprocess.Popen(
[options.binary_location, *options.arguments], [options.binary_location, *options.arguments],
@ -403,13 +466,14 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
) )
self.browser_pid = browser.pid self.browser_pid = browser.pid
service = selenium.webdriver.chromium.service.ChromiumService(
self.patcher.executable_path
)
super(Chrome, self).__init__( super(Chrome, self).__init__(
executable_path=patcher.executable_path, service=service,
port=port,
options=options, options=options,
service_args=service_args,
desired_capabilities=desired_capabilities,
service_log_path=service_log_path,
keep_alive=keep_alive, keep_alive=keep_alive,
) )
@ -425,35 +489,14 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
self.reactor = reactor self.reactor = reactor
if advanced_elements: if advanced_elements:
from .webelement import WebElement self._web_element_cls = UCWebElement
else:
self._web_element_cls = WebElement self._web_element_cls = WebElement
if options.headless: if headless or getattr(options, 'headless', None):
self._configure_headless() self._configure_headless()
def __getattribute__(self, item):
if not super().__getattribute__("debug"):
return super().__getattribute__(item)
else:
import inspect
original = super().__getattribute__(item)
if inspect.ismethod(original) and not inspect.isclass(original):
def newfunc(*args, **kwargs):
logger.debug(
"calling %s with args %s and kwargs %s\n"
% (original.__qualname__, args, kwargs)
)
return original(*args, **kwargs)
return newfunc
return original
def _configure_headless(self): def _configure_headless(self):
orig_get = self.get orig_get = self.get
logger.info("setting properties for headless") logger.info("setting properties for headless")
@ -464,19 +507,17 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
"Page.addScriptToEvaluateOnNewDocument", "Page.addScriptToEvaluateOnNewDocument",
{ {
"source": """ "source": """
Object.defineProperty(window, "navigator", {
Object.defineProperty(window, 'navigator', { value: new Proxy(navigator, {
value: new Proxy(navigator, { has: (target, key) => (key === "webdriver" ? false : key in target),
has: (target, key) => (key === 'webdriver' ? false : key in target), get: (target, key) =>
get: (target, key) => key === "webdriver"
key === 'webdriver' ? ? false
false : : typeof target[key] === "function"
typeof target[key] === 'function' ? ? target[key].bind(target)
target[key].bind(target) : : target[key],
target[key] }),
}) });
});
""" """
}, },
) )
@ -494,49 +535,139 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
"Page.addScriptToEvaluateOnNewDocument", "Page.addScriptToEvaluateOnNewDocument",
{ {
"source": """ "source": """
Object.defineProperty(navigator, 'maxTouchPoints', { Object.defineProperty(navigator, 'maxTouchPoints', {get: () => 1});
get: () => 1 Object.defineProperty(navigator.connection, 'rtt', {get: () => 100});
})"""
// https://github.com/microlinkhq/browserless/blob/master/packages/goto/src/evasions/chrome-runtime.js
window.chrome = {
app: {
isInstalled: false,
InstallState: {
DISABLED: 'disabled',
INSTALLED: 'installed',
NOT_INSTALLED: 'not_installed'
},
RunningState: {
CANNOT_RUN: 'cannot_run',
READY_TO_RUN: 'ready_to_run',
RUNNING: 'running'
}
},
runtime: {
OnInstalledReason: {
CHROME_UPDATE: 'chrome_update',
INSTALL: 'install',
SHARED_MODULE_UPDATE: 'shared_module_update',
UPDATE: 'update'
},
OnRestartRequiredReason: {
APP_UPDATE: 'app_update',
OS_UPDATE: 'os_update',
PERIODIC: 'periodic'
},
PlatformArch: {
ARM: 'arm',
ARM64: 'arm64',
MIPS: 'mips',
MIPS64: 'mips64',
X86_32: 'x86-32',
X86_64: 'x86-64'
},
PlatformNaclArch: {
ARM: 'arm',
MIPS: 'mips',
MIPS64: 'mips64',
X86_32: 'x86-32',
X86_64: 'x86-64'
},
PlatformOs: {
ANDROID: 'android',
CROS: 'cros',
LINUX: 'linux',
MAC: 'mac',
OPENBSD: 'openbsd',
WIN: 'win'
},
RequestUpdateCheckStatus: {
NO_UPDATE: 'no_update',
THROTTLED: 'throttled',
UPDATE_AVAILABLE: 'update_available'
}
}
}
// https://github.com/microlinkhq/browserless/blob/master/packages/goto/src/evasions/navigator-permissions.js
if (!window.Notification) {
window.Notification = {
permission: 'denied'
}
}
const originalQuery = window.navigator.permissions.query
window.navigator.permissions.__proto__.query = parameters =>
parameters.name === 'notifications'
? Promise.resolve({ state: window.Notification.permission })
: originalQuery(parameters)
const oldCall = Function.prototype.call
function call() {
return oldCall.apply(this, arguments)
}
Function.prototype.call = call
const nativeToStringFunctionString = Error.toString().replace(/Error/g, 'toString')
const oldToString = Function.prototype.toString
function functionToString() {
if (this === window.navigator.permissions.query) {
return 'function query() { [native code] }'
}
if (this === functionToString) {
return nativeToStringFunctionString
}
return oldCall.call(oldToString, this)
}
// eslint-disable-next-line
Function.prototype.toString = functionToString
"""
}, },
) )
return orig_get(*args, **kwargs) return orig_get(*args, **kwargs)
self.get = get_wrapped self.get = get_wrapped
def __dir__(self): # def _get_cdc_props(self):
return object.__dir__(self) # return self.execute_script(
# """
def _get_cdc_props(self): # let objectToInspect = window,
return self.execute_script( # result = [];
""" # while(objectToInspect !== null)
let objectToInspect = window, # { result = result.concat(Object.getOwnPropertyNames(objectToInspect));
result = []; # objectToInspect = Object.getPrototypeOf(objectToInspect); }
while(objectToInspect !== null) #
{ result = result.concat(Object.getOwnPropertyNames(objectToInspect)); # return result.filter(i => i.match(/^([a-zA-Z]){27}(Array|Promise|Symbol)$/ig))
objectToInspect = Object.getPrototypeOf(objectToInspect); } # """
return result.filter(i => i.match(/.+_.+_(Array|Promise|Symbol)/ig)) # )
""" #
) # def _hook_remove_cdc_props(self):
# self.execute_cdp_cmd(
def _hook_remove_cdc_props(self): # "Page.addScriptToEvaluateOnNewDocument",
self.execute_cdp_cmd( # {
"Page.addScriptToEvaluateOnNewDocument", # "source": """
{ # let objectToInspect = window,
"source": """ # result = [];
let objectToInspect = window, # while(objectToInspect !== null)
result = []; # { result = result.concat(Object.getOwnPropertyNames(objectToInspect));
while(objectToInspect !== null) # objectToInspect = Object.getPrototypeOf(objectToInspect); }
{ result = result.concat(Object.getOwnPropertyNames(objectToInspect)); # result.forEach(p => p.match(/^([a-zA-Z]){27}(Array|Promise|Symbol)$/ig)
objectToInspect = Object.getPrototypeOf(objectToInspect); } # &&delete window[p]&&console.log('removed',p))
result.forEach(p => p.match(/.+_.+_(Array|Promise|Symbol)/ig) # """
&&delete window[p]&&console.log('removed',p)) # },
""" # )
},
)
def get(self, url): def get(self, url):
if self._get_cdc_props(): # if self._get_cdc_props():
self._hook_remove_cdc_props() # self._hook_remove_cdc_props()
return super().get(url) return super().get(url)
def add_cdp_listener(self, event_name, callback): def add_cdp_listener(self, event_name, callback):
@ -553,6 +684,11 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
if self.reactor and isinstance(self.reactor, Reactor): if self.reactor and isinstance(self.reactor, Reactor):
self.reactor.handlers.clear() self.reactor.handlers.clear()
def window_new(self):
self.execute(
selenium.webdriver.remote.command.Command.NEW_WINDOW, {"type": "window"}
)
def tab_new(self, url: str): def tab_new(self, url: str):
""" """
this opens a url in a new tab. this opens a url in a new tab.
@ -592,29 +728,64 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
if not capabilities: if not capabilities:
capabilities = self.options.to_capabilities() capabilities = self.options.to_capabilities()
super(selenium.webdriver.chrome.webdriver.WebDriver, self).start_session( super(selenium.webdriver.chrome.webdriver.WebDriver, self).start_session(
capabilities, browser_profile capabilities
) )
# super(Chrome, self).start_session(capabilities, browser_profile) # super(Chrome, self).start_session(capabilities, browser_profile)
def find_elements_recursive(self, by, value):
"""
find elements in all frames
this is a generator function, which is needed
since if it would return a list of elements, they
will be stale on arrival.
using generator, when the element is returned we are in the correct frame
to use it directly
Args:
by: By
value: str
Returns: Generator[webelement.WebElement]
"""
def search_frame(f=None):
if not f:
# ensure we are on main content frame
self.switch_to.default_content()
else:
self.switch_to.frame(f)
for elem in self.find_elements(by, value):
yield elem
# switch back to main content, otherwise we will get StaleElementReferenceException
self.switch_to.default_content()
# search root frame
for elem in search_frame():
yield elem
# get iframes
frames = self.find_elements('css selector', 'iframe')
# search per frame
for f in frames:
for elem in search_frame(f):
yield elem
def quit(self): def quit(self):
logger.debug("closing webdriver") try:
if hasattr(self, "service") and getattr(self.service, "process", None): self.service.stop()
self.service.process.kill() self.service.process.kill()
try: self.command_executor.close()
if self.reactor and isinstance(self.reactor, Reactor): self.service.process.wait(5)
logger.debug("shutting down reactor") logger.debug("webdriver process ended")
self.reactor.event.set() except (AttributeError, RuntimeError, OSError):
except Exception: # noqa pass
try:
self.reactor.event.set()
logger.debug("shutting down reactor")
except AttributeError:
pass pass
try: try:
logger.debug("killing browser")
os.kill(self.browser_pid, 15) os.kill(self.browser_pid, 15)
logger.debug("gracefully closed browser")
except TimeoutError as e: except Exception as e: # noqa
logger.debug(e, exc_info=True)
except Exception: # noqa
pass pass
if ( if (
hasattr(self, "keep_user_data_dir") hasattr(self, "keep_user_data_dir")
and hasattr(self, "user_data_dir") and hasattr(self, "user_data_dir")
@ -622,7 +793,6 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
): ):
for _ in range(5): for _ in range(5):
try: try:
shutil.rmtree(self.user_data_dir, ignore_errors=False) shutil.rmtree(self.user_data_dir, ignore_errors=False)
except FileNotFoundError: except FileNotFoundError:
pass pass
@ -634,19 +804,34 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
else: else:
logger.debug("successfully removed %s" % self.user_data_dir) logger.debug("successfully removed %s" % self.user_data_dir)
break break
time.sleep(0.1)
try:
time.sleep(0.1)
except OSError:
pass
# dereference patcher, so patcher can start cleaning up as well. # dereference patcher, so patcher can start cleaning up as well.
# this must come last, otherwise it will throw 'in use' errors # this must come last, otherwise it will throw 'in use' errors
self.patcher = None self.patcher = None
def __del__(self): def __getattribute__(self, item):
try: if not super().__getattribute__("debug"):
super().quit() return super().__getattribute__(item)
# self.service.process.kill() else:
except: # noqa import inspect
pass
self.quit() original = super().__getattribute__(item)
if inspect.ismethod(original) and not inspect.isclass(original):
def newfunc(*args, **kwargs):
logger.debug(
"calling %s with args %s and kwargs %s\n"
% (original.__qualname__, args, kwargs)
)
return original(*args, **kwargs)
return newfunc
return original
def __enter__(self): def __enter__(self):
return self return self
@ -660,6 +845,27 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
def __hash__(self): def __hash__(self):
return hash(self.options.debugger_address) return hash(self.options.debugger_address)
def __dir__(self):
return object.__dir__(self)
def __del__(self):
try:
self.service.process.kill()
except: # noqa
pass
self.quit()
@classmethod
def _ensure_close(cls, self):
# needs to be a classmethod so finalize can find the reference
logger.info("ensuring close")
if (
hasattr(self, "service")
and hasattr(self.service, "process")
and hasattr(self.service.process, "kill")
):
self.service.process.kill()
def find_chrome_executable(): def find_chrome_executable():
""" """
@ -691,14 +897,16 @@ def find_chrome_executable():
) )
else: else:
for item in map( for item in map(
os.environ.get, ("PROGRAMFILES", "PROGRAMFILES(X86)", "LOCALAPPDATA") os.environ.get,
("PROGRAMFILES", "PROGRAMFILES(X86)", "LOCALAPPDATA", "PROGRAMW6432"),
): ):
for subitem in ( if item is not None:
"Google/Chrome/Application", for subitem in (
"Google/Chrome Beta/Application", "Google/Chrome/Application",
"Google/Chrome Canary/Application", ):
): candidates.add(os.sep.join((item, subitem, "chrome.exe")))
candidates.add(os.sep.join((item, subitem, "chrome.exe")))
for candidate in candidates: for candidate in candidates:
logger.debug('checking if %s exists and is executable' % candidate)
if os.path.exists(candidate) and os.access(candidate, os.X_OK): if os.path.exists(candidate) and os.access(candidate, os.X_OK):
logger.debug('found! using %s' % candidate)
return os.path.normpath(candidate) return os.path.normpath(candidate)

View File

@ -1,259 +0,0 @@
#!/usr/bin/env python3
# this module is part of undetected_chromedriver
"""
888 888 d8b
888 888 Y8P
888 888
.d8888b 88888b. 888d888 .d88b. 88888b.d88b. .d88b. .d88888 888d888 888 888 888 .d88b. 888d888
d88P" 888 "88b 888P" d88""88b 888 "888 "88b d8P Y8b d88" 888 888P" 888 888 888 d8P Y8b 888P"
888 888 888 888 888 888 888 888 888 88888888 888 888 888 888 Y88 88P 88888888 888
Y88b. 888 888 888 Y88..88P 888 888 888 Y8b. Y88b 888 888 888 Y8bd8P Y8b. 888
"Y8888P 888 888 888 "Y88P" 888 888 888 "Y8888 "Y88888 888 888 Y88P "Y8888 888 88888888
by UltrafunkAmsterdam (https://github.com/ultrafunkamsterdam)
"""
import io
import logging
import os
import random
import re
import string
import sys
import zipfile
from distutils.version import LooseVersion
from urllib.request import urlopen, urlretrieve
from selenium.webdriver import Chrome as _Chrome, ChromeOptions as _ChromeOptions
TARGET_VERSION = 0
logger = logging.getLogger("uc")
class Chrome:
def __new__(cls, *args, emulate_touch=False, **kwargs):
if not ChromeDriverManager.installed:
ChromeDriverManager(*args, **kwargs).install()
if not ChromeDriverManager.selenium_patched:
ChromeDriverManager(*args, **kwargs).patch_selenium_webdriver()
if not kwargs.get("executable_path"):
kwargs["executable_path"] = "./{}".format(
ChromeDriverManager(*args, **kwargs).executable_path
)
if not kwargs.get("options"):
kwargs["options"] = ChromeOptions()
instance = object.__new__(_Chrome)
instance.__init__(*args, **kwargs)
instance._orig_get = instance.get
def _get_wrapped(*args, **kwargs):
if instance.execute_script("return navigator.webdriver"):
instance.execute_cdp_cmd(
"Page.addScriptToEvaluateOnNewDocument",
{
"source": """
Object.defineProperty(window, 'navigator', {
value: new Proxy(navigator, {
has: (target, key) => (key === 'webdriver' ? false : key in target),
get: (target, key) =>
key === 'webdriver'
? undefined
: typeof target[key] === 'function'
? target[key].bind(target)
: target[key]
})
});
"""
},
)
return instance._orig_get(*args, **kwargs)
instance.get = _get_wrapped
instance.get = _get_wrapped
instance.get = _get_wrapped
original_user_agent_string = instance.execute_script(
"return navigator.userAgent"
)
instance.execute_cdp_cmd(
"Network.setUserAgentOverride",
{
"userAgent": original_user_agent_string.replace("Headless", ""),
},
)
if emulate_touch:
instance.execute_cdp_cmd(
"Page.addScriptToEvaluateOnNewDocument",
{
"source": """
Object.defineProperty(navigator, 'maxTouchPoints', {
get: () => 1
})"""
},
)
logger.info(f"starting undetected_chromedriver.Chrome({args}, {kwargs})")
return instance
class ChromeOptions:
def __new__(cls, *args, **kwargs):
if not ChromeDriverManager.installed:
ChromeDriverManager(*args, **kwargs).install()
if not ChromeDriverManager.selenium_patched:
ChromeDriverManager(*args, **kwargs).patch_selenium_webdriver()
instance = object.__new__(_ChromeOptions)
instance.__init__()
instance.add_argument("start-maximized")
instance.add_experimental_option("excludeSwitches", ["enable-automation"])
instance.add_argument("--disable-blink-features=AutomationControlled")
return instance
class ChromeDriverManager(object):
installed = False
selenium_patched = False
target_version = None
DL_BASE = "https://chromedriver.storage.googleapis.com/"
def __init__(self, executable_path=None, target_version=None, *args, **kwargs):
_platform = sys.platform
if TARGET_VERSION:
# use global if set
self.target_version = TARGET_VERSION
if target_version:
# use explicitly passed target
self.target_version = target_version # user override
if not self.target_version:
# none of the above (default) and just get current version
self.target_version = self.get_release_version_number().version[
0
] # only major version int
self._base = base_ = "chromedriver{}"
exe_name = self._base
if _platform in ("win32",):
exe_name = base_.format(".exe")
if _platform in ("linux",):
_platform += "64"
exe_name = exe_name.format("")
if _platform in ("darwin",):
_platform = "mac64"
exe_name = exe_name.format("")
self.platform = _platform
self.executable_path = executable_path or exe_name
self._exe_name = exe_name
def patch_selenium_webdriver(self_):
"""
Patches selenium package Chrome, ChromeOptions classes for current session
:return:
"""
import selenium.webdriver.chrome.service
import selenium.webdriver
selenium.webdriver.Chrome = Chrome
selenium.webdriver.ChromeOptions = ChromeOptions
logger.info("Selenium patched. Safe to import Chrome / ChromeOptions")
self_.__class__.selenium_patched = True
def install(self, patch_selenium=True):
"""
Initialize the patch
This will:
download chromedriver if not present
patch the downloaded chromedriver
patch selenium package if <patch_selenium> is True (default)
:param patch_selenium: patch selenium webdriver classes for Chrome and ChromeDriver (for current python session)
:return:
"""
if not os.path.exists(self.executable_path):
self.fetch_chromedriver()
if not self.__class__.installed:
if self.patch_binary():
self.__class__.installed = True
if patch_selenium:
self.patch_selenium_webdriver()
def get_release_version_number(self):
"""
Gets the latest major version available, or the latest major version of self.target_version if set explicitly.
:return: version string
"""
path = (
"LATEST_RELEASE"
if not self.target_version
else f"LATEST_RELEASE_{self.target_version}"
)
return LooseVersion(urlopen(self.__class__.DL_BASE + path).read().decode())
def fetch_chromedriver(self):
"""
Downloads ChromeDriver from source and unpacks the executable
:return: on success, name of the unpacked executable
"""
base_ = self._base
zip_name = base_.format(".zip")
ver = self.get_release_version_number().vstring
if os.path.exists(self.executable_path):
return self.executable_path
urlretrieve(
f"{self.__class__.DL_BASE}{ver}/{base_.format(f'_{self.platform}')}.zip",
filename=zip_name,
)
with zipfile.ZipFile(zip_name) as zf:
zf.extract(self._exe_name)
os.remove(zip_name)
if sys.platform != "win32":
os.chmod(self._exe_name, 0o755)
return self._exe_name
@staticmethod
def random_cdc():
cdc = random.choices(string.ascii_lowercase, k=26)
cdc[-6:-4] = map(str.upper, cdc[-6:-4])
cdc[2] = cdc[0]
cdc[3] = "_"
return "".join(cdc).encode()
def patch_binary(self):
"""
Patches the ChromeDriver binary
:return: False on failure, binary name on success
"""
linect = 0
replacement = self.random_cdc()
with io.open(self.executable_path, "r+b") as fh:
for line in iter(lambda: fh.readline(), b""):
if b"cdc_" in line:
fh.seek(-len(line), 1)
newline = re.sub(b"cdc_.{22}", replacement, line)
fh.write(newline)
linect += 1
return linect
def install(executable_path=None, target_version=None, *args, **kwargs):
ChromeDriverManager(executable_path, target_version, *args, **kwargs).install()

View File

@ -3,11 +3,11 @@
import json import json
import logging import logging
from collections.abc import Mapping, Sequence
import requests import requests
import websockets import websockets
log = logging.getLogger(__name__) log = logging.getLogger(__name__)

View File

@ -1,17 +1,17 @@
import asyncio import asyncio
import logging
import time
import traceback
from collections.abc import Mapping from collections.abc import Mapping
from collections.abc import Sequence from collections.abc import Sequence
from functools import wraps
import os
import logging
import threading
import time
import traceback
from typing import Any from typing import Any
from typing import Awaitable from typing import Awaitable
from typing import Callable from typing import Callable
from typing import List from typing import List
from typing import Optional from typing import Optional
from contextlib import ExitStack
import threading
from functools import wraps, partial
class Structure(dict): class Structure(dict):
@ -188,4 +188,6 @@ def test():
time.sleep(10) time.sleep(10)
if os.name == "nt":
driver.close()
driver.quit() driver.quit()

View File

@ -1,13 +1,13 @@
import atexit
import logging
import multiprocessing import multiprocessing
import os import os
import platform import platform
import sys import signal
from subprocess import PIPE from subprocess import PIPE
from subprocess import Popen from subprocess import Popen
import atexit import sys
import traceback
import logging
import signal
CREATE_NEW_PROCESS_GROUP = 0x00000200 CREATE_NEW_PROCESS_GROUP = 0x00000200
DETACHED_PROCESS = 0x00000008 DETACHED_PROCESS = 0x00000008
@ -27,24 +27,26 @@ def start_detached(executable, *args):
reader, writer = multiprocessing.Pipe(False) reader, writer = multiprocessing.Pipe(False)
# do not keep reference # do not keep reference
multiprocessing.Process( process = multiprocessing.Process(
target=_start_detached, target=_start_detached,
args=(executable, *args), args=(executable, *args),
kwargs={"writer": writer}, kwargs={"writer": writer},
daemon=True, daemon=True,
).start() )
process.start()
process.join()
# receive pid from pipe # receive pid from pipe
pid = reader.recv() pid = reader.recv()
REGISTERED.append(pid) REGISTERED.append(pid)
# close pipes # close pipes
writer.close() writer.close()
reader.close() reader.close()
process.close()
return pid return pid
def _start_detached(executable, *args, writer: multiprocessing.Pipe = None): def _start_detached(executable, *args, writer: multiprocessing.Pipe = None):
# configure launch # configure launch
kwargs = {} kwargs = {}
if platform.system() == "Windows": if platform.system() == "Windows":

View File

@ -39,10 +39,23 @@ class ChromeOptions(_ChromiumOptions):
value = ChromeOptions._undot_key(rest, value) value = ChromeOptions._undot_key(rest, value)
return {key: value} return {key: value}
@staticmethod
def _merge_nested(a, b):
"""
merges b into a
leaf values in a are overwritten with values from b
"""
for key in b:
if key in a:
if isinstance(a[key], dict) and isinstance(b[key], dict):
ChromeOptions._merge_nested(a[key], b[key])
continue
a[key] = b[key]
return a
def handle_prefs(self, user_data_dir): def handle_prefs(self, user_data_dir):
prefs = self.experimental_options.get("prefs") prefs = self.experimental_options.get("prefs")
if prefs: if prefs:
user_data_dir = user_data_dir or self._user_data_dir user_data_dir = user_data_dir or self._user_data_dir
default_path = os.path.join(user_data_dir, "Default") default_path = os.path.join(user_data_dir, "Default")
os.makedirs(default_path, exist_ok=True) os.makedirs(default_path, exist_ok=True)
@ -50,12 +63,14 @@ class ChromeOptions(_ChromiumOptions):
# undot prefs dict keys # undot prefs dict keys
undot_prefs = {} undot_prefs = {}
for key, value in prefs.items(): for key, value in prefs.items():
undot_prefs.update(self._undot_key(key, value)) undot_prefs = self._merge_nested(
undot_prefs, self._undot_key(key, value)
)
prefs_file = os.path.join(default_path, "Preferences") prefs_file = os.path.join(default_path, "Preferences")
if os.path.exists(prefs_file): if os.path.exists(prefs_file):
with open(prefs_file, encoding="latin1", mode="r") as f: with open(prefs_file, encoding="latin1", mode="r") as f:
undot_prefs.update(json.load(f)) undot_prefs = self._merge_nested(json.load(f), undot_prefs)
with open(prefs_file, encoding="latin1", mode="w") as f: with open(prefs_file, encoding="latin1", mode="w") as f:
json.dump(undot_prefs, f) json.dump(undot_prefs, f)

View File

@ -1,44 +1,39 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# this module is part of undetected_chromedriver # this module is part of undetected_chromedriver
from distutils.version import LooseVersion
import io import io
import json
import logging import logging
import os import os
import pathlib
import platform
import random import random
import re import re
import shutil
import string import string
import sys import sys
import time import time
from urllib.request import urlopen
from urllib.request import urlretrieve
import zipfile import zipfile
from distutils.version import LooseVersion from multiprocessing import Lock
from urllib.request import urlopen, urlretrieve
import secrets
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
IS_POSIX = sys.platform.startswith(("darwin", "cygwin", "linux")) IS_POSIX = sys.platform.startswith(("darwin", "cygwin", "linux", "linux2", "freebsd"))
class Patcher(object): class Patcher(object):
url_repo = "https://chromedriver.storage.googleapis.com" lock = Lock()
zip_name = "chromedriver_%s.zip"
exe_name = "chromedriver%s" exe_name = "chromedriver%s"
platform = sys.platform platform = sys.platform
if platform.endswith("win32"):
zip_name %= "win32"
exe_name %= ".exe"
if platform.endswith("linux"):
zip_name %= "linux64"
exe_name %= ""
if platform.endswith("darwin"):
zip_name %= "mac64"
exe_name %= ""
if platform.endswith("win32"): if platform.endswith("win32"):
d = "~/appdata/roaming/undetected_chromedriver" d = "~/appdata/roaming/undetected_chromedriver"
elif platform.startswith("linux"): elif "LAMBDA_TASK_ROOT" in os.environ:
d = "/tmp/undetected_chromedriver"
elif platform.startswith(("linux", "linux2")):
d = "~/.local/share/undetected_chromedriver" d = "~/.local/share/undetected_chromedriver"
elif platform.endswith("darwin"): elif platform.endswith("darwin"):
d = "~/Library/Application Support/undetected_chromedriver" d = "~/Library/Application Support/undetected_chromedriver"
@ -46,9 +41,14 @@ class Patcher(object):
d = "~/.undetected_chromedriver" d = "~/.undetected_chromedriver"
data_path = os.path.abspath(os.path.expanduser(d)) data_path = os.path.abspath(os.path.expanduser(d))
def __init__(self, executable_path=None, force=False, version_main: int = 0): def __init__(
self,
executable_path=None,
force=False,
version_main: int = 0,
user_multi_procs=False,
):
""" """
Args: Args:
executable_path: None = automatic executable_path: None = automatic
a full file path to the chromedriver executable a full file path to the chromedriver executable
@ -57,18 +57,39 @@ class Patcher(object):
version_main: 0 = auto version_main: 0 = auto
specify main chrome version (rounded, ex: 82) specify main chrome version (rounded, ex: 82)
""" """
self.force = force self.force = force
self.executable_path = None self._custom_exe_path = False
prefix = secrets.token_hex(8) prefix = "undetected"
self.user_multi_procs = user_multi_procs
try:
# Try to convert version_main into an integer
version_main_int = int(version_main)
# check if version_main_int is less than or equal to e.g 114
self.is_old_chromedriver = version_main and version_main_int <= 114
except (ValueError,TypeError):
# Check not running inside Docker
if not os.path.exists("/app/chromedriver"):
# If the conversion fails, log an error message
logging.info("version_main cannot be converted to an integer")
# Set self.is_old_chromedriver to False if the conversion fails
self.is_old_chromedriver = False
# Needs to be called before self.exe_name is accessed
self._set_platform_name()
if not os.path.exists(self.data_path): if not os.path.exists(self.data_path):
os.makedirs(self.data_path, exist_ok=True) os.makedirs(self.data_path, exist_ok=True)
if not executable_path: if not executable_path:
self.executable_path = os.path.join( if sys.platform.startswith("freebsd"):
self.data_path, "_".join([prefix, self.exe_name]) self.executable_path = os.path.join(
) self.data_path, self.exe_name
)
else:
self.executable_path = os.path.join(
self.data_path, "_".join([prefix, self.exe_name])
)
if not IS_POSIX: if not IS_POSIX:
if executable_path: if executable_path:
@ -78,20 +99,67 @@ class Patcher(object):
self.zip_path = os.path.join(self.data_path, prefix) self.zip_path = os.path.join(self.data_path, prefix)
if not executable_path: if not executable_path:
self.executable_path = os.path.abspath( if not self.user_multi_procs:
os.path.join(".", self.executable_path) self.executable_path = os.path.abspath(
) os.path.join(".", self.executable_path)
)
self._custom_exe_path = False
if executable_path: if executable_path:
self._custom_exe_path = True self._custom_exe_path = True
self.executable_path = executable_path self.executable_path = executable_path
# Set the correct repository to download the Chromedriver from
if self.is_old_chromedriver:
self.url_repo = "https://chromedriver.storage.googleapis.com"
else:
self.url_repo = "https://googlechromelabs.github.io/chrome-for-testing"
self.version_main = version_main self.version_main = version_main
self.version_full = None self.version_full = None
def auto(self, executable_path=None, force=False, version_main=None): def _set_platform_name(self):
"""""" """
Set the platform and exe name based on the platform undetected_chromedriver is running on
in order to download the correct chromedriver.
"""
if self.platform.endswith("win32"):
self.platform_name = "win32"
self.exe_name %= ".exe"
if self.platform.endswith(("linux", "linux2")):
self.platform_name = "linux64"
self.exe_name %= ""
if self.platform.endswith("darwin"):
if self.is_old_chromedriver:
self.platform_name = "mac64"
else:
self.platform_name = "mac-x64"
self.exe_name %= ""
if self.platform.startswith("freebsd"):
self.platform_name = "freebsd"
self.exe_name %= ""
def auto(self, executable_path=None, force=False, version_main=None, _=None):
"""
Args:
executable_path:
force:
version_main:
Returns:
"""
p = pathlib.Path(self.data_path)
if self.user_multi_procs:
with Lock():
files = list(p.rglob("*chromedriver*"))
most_recent = max(files, key=lambda f: f.stat().st_mtime)
files.remove(most_recent)
list(map(lambda f: f.unlink(), files))
if self.is_binary_patched(most_recent):
self.executable_path = str(most_recent)
return True
if executable_path: if executable_path:
self.executable_path = executable_path self.executable_path = executable_path
self._custom_exe_path = True self._custom_exe_path = True
@ -108,27 +176,104 @@ class Patcher(object):
if force is True: if force is True:
self.force = force self.force = force
try:
os.unlink(self.executable_path) if self.platform_name == "freebsd":
except PermissionError: chromedriver_path = shutil.which("chromedriver")
if self.force:
self.force_kill_instances(self.executable_path) if not os.path.isfile(chromedriver_path) or not os.access(chromedriver_path, os.X_OK):
return self.auto(force=not self.force) logging.error("Chromedriver not installed!")
return
version_path = os.path.join(os.path.dirname(self.executable_path), "version.txt")
process = os.popen(f'"{chromedriver_path}" --version')
chromedriver_version = process.read().split(' ')[1].split(' ')[0]
process.close()
current_version = None
if os.path.isfile(version_path) or os.access(version_path, os.X_OK):
with open(version_path, 'r') as f:
current_version = f.read()
if current_version != chromedriver_version:
logging.info("Copying chromedriver executable...")
shutil.copy(chromedriver_path, self.executable_path)
os.chmod(self.executable_path, 0o755)
with open(version_path, 'w') as f:
f.write(chromedriver_version)
logging.info("Chromedriver executable copied!")
else:
try: try:
if self.is_binary_patched(): os.unlink(self.executable_path)
# assumes already running AND patched
return True
except PermissionError: except PermissionError:
if self.force:
self.force_kill_instances(self.executable_path)
return self.auto(force=not self.force)
try:
if self.is_binary_patched():
# assumes already running AND patched
return True
except PermissionError:
pass
# return False
except FileNotFoundError:
pass pass
# return False
except FileNotFoundError: release = self.fetch_release_number()
self.version_main = release.version[0]
self.version_full = release
self.unzip_package(self.fetch_package())
return self.patch()
def driver_binary_in_use(self, path: str = None) -> bool:
"""
naive test to check if a found chromedriver binary is
currently in use
Args:
path: a string or PathLike object to the binary to check.
if not specified, we check use this object's executable_path
"""
if not path:
path = self.executable_path
p = pathlib.Path(path)
if not p.exists():
raise OSError("file does not exist: %s" % p)
try:
with open(p, mode="a+b") as fs:
exc = []
try:
fs.seek(0, 0)
except PermissionError as e:
exc.append(e) # since some systems apprently allow seeking
# we conduct another test
try:
fs.readline()
except PermissionError as e:
exc.append(e)
if exc:
return True
return False
# ok safe to assume this is in use
except Exception as e:
# logger.exception("whoops ", e)
pass pass
release = self.fetch_release_number() def cleanup_unused_files(self):
self.version_main = release.version[0] p = pathlib.Path(self.data_path)
self.version_full = release items = list(p.glob("*undetected*"))
self.unzip_package(self.fetch_package()) for item in items:
return self.patch() try:
item.unlink()
except:
pass
def patch(self): def patch(self):
self.patch_exe() self.patch_exe()
@ -140,12 +285,32 @@ class Patcher(object):
:return: version string :return: version string
:rtype: LooseVersion :rtype: LooseVersion
""" """
path = "/latest_release" # Endpoint for old versions of Chromedriver (114 and below)
if self.version_main: if self.is_old_chromedriver:
path += f"_{self.version_main}" path = f"/latest_release_{self.version_main}"
path = path.upper() path = path.upper()
logger.debug("getting release number from %s" % path)
return LooseVersion(urlopen(self.url_repo + path).read().decode())
# Endpoint for new versions of Chromedriver (115+)
if not self.version_main:
# Fetch the latest version
path = "/last-known-good-versions-with-downloads.json"
logger.debug("getting release number from %s" % path)
with urlopen(self.url_repo + path) as conn:
response = conn.read().decode()
last_versions = json.loads(response)
return LooseVersion(last_versions["channels"]["Stable"]["version"])
# Fetch the latest minor version of the major version provided
path = "/latest-versions-per-milestone-with-downloads.json"
logger.debug("getting release number from %s" % path) logger.debug("getting release number from %s" % path)
return LooseVersion(urlopen(self.url_repo + path).read().decode()) with urlopen(self.url_repo + path) as conn:
response = conn.read().decode()
major_versions = json.loads(response)
return LooseVersion(major_versions["milestones"][str(self.version_main)]["version"])
def parse_exe_version(self): def parse_exe_version(self):
with io.open(self.executable_path, "rb") as f: with io.open(self.executable_path, "rb") as f:
@ -160,10 +325,16 @@ class Patcher(object):
:return: path to downloaded file :return: path to downloaded file
""" """
u = "%s/%s/%s" % (self.url_repo, self.version_full.vstring, self.zip_name) zip_name = f"chromedriver_{self.platform_name}.zip"
logger.debug("downloading from %s" % u) if self.is_old_chromedriver:
# return urlretrieve(u, filename=self.data_path)[0] download_url = "%s/%s/%s" % (self.url_repo, self.version_full.vstring, zip_name)
return urlretrieve(u)[0] else:
zip_name = zip_name.replace("_", "-", 1)
download_url = "https://storage.googleapis.com/chrome-for-testing-public/%s/%s/%s"
download_url %= (self.version_full.vstring, self.platform_name, zip_name)
logger.debug("downloading from %s" % download_url)
return urlretrieve(download_url)[0]
def unzip_package(self, fp): def unzip_package(self, fp):
""" """
@ -171,6 +342,12 @@ class Patcher(object):
:return: path to unpacked executable :return: path to unpacked executable
""" """
exe_path = self.exe_name
if not self.is_old_chromedriver:
# The new chromedriver unzips into its own folder
zip_name = f"chromedriver-{self.platform_name}"
exe_path = os.path.join(zip_name, self.exe_name)
logger.debug("unzipping %s" % fp) logger.debug("unzipping %s" % fp)
try: try:
os.unlink(self.zip_path) os.unlink(self.zip_path)
@ -179,10 +356,10 @@ class Patcher(object):
os.makedirs(self.zip_path, mode=0o755, exist_ok=True) os.makedirs(self.zip_path, mode=0o755, exist_ok=True)
with zipfile.ZipFile(fp, mode="r") as zf: with zipfile.ZipFile(fp, mode="r") as zf:
zf.extract(self.exe_name, self.zip_path) zf.extractall(self.zip_path)
os.rename(os.path.join(self.zip_path, self.exe_name), self.executable_path) os.rename(os.path.join(self.zip_path, exe_path), self.executable_path)
os.remove(fp) os.remove(fp)
os.rmdir(self.zip_path) shutil.rmtree
os.chmod(self.executable_path, 0o755) os.chmod(self.executable_path, 0o755)
return self.executable_path return self.executable_path
@ -203,43 +380,46 @@ class Patcher(object):
@staticmethod @staticmethod
def gen_random_cdc(): def gen_random_cdc():
cdc = random.choices(string.ascii_lowercase, k=26) cdc = random.choices(string.ascii_letters, k=27)
cdc[-6:-4] = map(str.upper, cdc[-6:-4])
cdc[2] = cdc[0]
cdc[3] = "_"
return "".join(cdc).encode() return "".join(cdc).encode()
def is_binary_patched(self, executable_path=None): def is_binary_patched(self, executable_path=None):
"""simple check if executable is patched.
:return: False if not patched, else True
"""
executable_path = executable_path or self.executable_path executable_path = executable_path or self.executable_path
with io.open(executable_path, "rb") as fh: try:
for line in iter(lambda: fh.readline(), b""): with io.open(executable_path, "rb") as fh:
if b"cdc_" in line: return fh.read().find(b"undetected chromedriver") != -1
return False except FileNotFoundError:
else: return False
return True
def patch_exe(self): def patch_exe(self):
""" start = time.perf_counter()
Patches the ChromeDriver binary
:return: False on failure, binary name on success
"""
logger.info("patching driver executable %s" % self.executable_path) logger.info("patching driver executable %s" % self.executable_path)
linect = 0
replacement = self.gen_random_cdc()
with io.open(self.executable_path, "r+b") as fh: with io.open(self.executable_path, "r+b") as fh:
for line in iter(lambda: fh.readline(), b""): content = fh.read()
if b"cdc_" in line: # match_injected_codeblock = re.search(rb"{window.*;}", content)
fh.seek(-len(line), 1) match_injected_codeblock = re.search(rb"\{window\.cdc.*?;\}", content)
newline = re.sub(b"cdc_.{22}", replacement, line) if match_injected_codeblock:
fh.write(newline) target_bytes = match_injected_codeblock[0]
linect += 1 new_target_bytes = (
return linect b'{console.log("undetected chromedriver 1337!")}'.ljust(
len(target_bytes), b" "
)
)
new_content = content.replace(target_bytes, new_target_bytes)
if new_content == content:
logger.warning(
"something went wrong patching the driver binary. could not find injection code block"
)
else:
logger.debug(
"found block:\n%s\nreplacing with:\n%s"
% (target_bytes, new_target_bytes)
)
fh.seek(0)
fh.write(new_content)
logger.debug(
"patching took us {:.2f} seconds".format(time.perf_counter() - start)
)
def __repr__(self): def __repr__(self):
return "{0:s}({1:s})".format( return "{0:s}({1:s})".format(
@ -248,7 +428,6 @@ class Patcher(object):
) )
def __del__(self): def __del__(self):
if self._custom_exe_path: if self._custom_exe_path:
# if the driver binary is specified by user # if the driver binary is specified by user
# we assume it is important enough to not delete it # we assume it is important enough to not delete it
@ -256,21 +435,17 @@ class Patcher(object):
else: else:
timeout = 3 # stop trying after this many seconds timeout = 3 # stop trying after this many seconds
t = time.monotonic() t = time.monotonic()
while True: now = lambda: time.monotonic()
now = time.monotonic() while now() - t > timeout:
if now - t > timeout: # we don't want to wait until the end of time
# we don't want to wait until the end of time
logger.debug(
"could not unlink %s in time (%d seconds)"
% (self.executable_path, timeout)
)
break
try: try:
if self.user_multi_procs:
break
os.unlink(self.executable_path) os.unlink(self.executable_path)
logger.debug("successfully unlinked %s" % self.executable_path) logger.debug("successfully unlinked %s" % self.executable_path)
break break
except (OSError, RuntimeError, PermissionError): except (OSError, RuntimeError, PermissionError):
time.sleep(0.1) time.sleep(0.01)
continue continue
except FileNotFoundError: except FileNotFoundError:
break break

View File

@ -6,6 +6,7 @@ import json
import logging import logging
import threading import threading
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -63,9 +64,7 @@ class Reactor(threading.Thread):
break break
async def listen(self): async def listen(self):
while self.running: while self.running:
await self._wait_service_started() await self._wait_service_started()
await asyncio.sleep(1) await asyncio.sleep(1)
@ -74,9 +73,7 @@ class Reactor(threading.Thread):
log_entries = self.driver.get_log("performance") log_entries = self.driver.get_log("performance")
for entry in log_entries: for entry in log_entries:
try: try:
obj_serialized: str = entry.get("message") obj_serialized: str = entry.get("message")
obj = json.loads(obj_serialized) obj = json.loads(obj_serialized)
message = obj.get("message") message = obj.get("message")

View File

@ -1,4 +0,0 @@
# for backward compatibility
import sys
sys.modules[__name__] = sys.modules[__package__]

View File

@ -1,7 +1,30 @@
from typing import List
from selenium.webdriver.common.by import By
import selenium.webdriver.remote.webelement import selenium.webdriver.remote.webelement
class WebElement(selenium.webdriver.remote.webelement.WebElement): class WebElement(selenium.webdriver.remote.webelement.WebElement):
def click_safe(self):
super().click()
self._parent.reconnect(0.1)
def children(
self, tag=None, recursive=False
) -> List[selenium.webdriver.remote.webelement.WebElement]:
"""
returns direct child elements of current element
:param tag: str, if supplied, returns <tag> nodes only
"""
script = "return [... arguments[0].children]"
if tag:
script += ".filter( node => node.tagName === '%s')" % tag.upper()
if recursive:
return list(_recursive_children(self, tag))
return list(self._parent.execute_script(script, self))
class UCWebElement(WebElement):
""" """
Custom WebElement class which makes it easier to view elements when Custom WebElement class which makes it easier to view elements when
working in an interactive environment. working in an interactive environment.
@ -14,9 +37,13 @@ class WebElement(selenium.webdriver.remote.webelement.WebElement):
""" """
def __init__(self, parent, id_):
super().__init__(parent, id_)
self._attrs = None
@property @property
def attrs(self): def attrs(self):
if not hasattr(self, "_attrs"): if not self._attrs:
self._attrs = self._parent.execute_script( self._attrs = self._parent.execute_script(
""" """
var items = {}; var items = {};
@ -35,3 +62,25 @@ class WebElement(selenium.webdriver.remote.webelement.WebElement):
if strattrs: if strattrs:
strattrs = " " + strattrs strattrs = " " + strattrs
return f"{self.__class__.__name__} <{self.tag_name}{strattrs}>" return f"{self.__class__.__name__} <{self.tag_name}{strattrs}>"
def _recursive_children(element, tag: str = None, _results=None):
"""
returns all children of <element> recursively
:param element: `WebElement` object.
find children below this <element>
:param tag: str = None.
if provided, return only <tag> elements. example: 'a', or 'img'
:param _results: do not use!
"""
results = _results or set()
for element in element.children():
if tag:
if element.tag_name == tag:
results.add(element)
else:
results.add(element)
results |= _recursive_children(element, tag, results)
return results

View File

@ -1,13 +1,19 @@
import json import json
import logging import logging
import os import os
import platform
import re import re
import shutil import shutil
import sys
import tempfile
import urllib.parse
from selenium.webdriver.chrome.webdriver import WebDriver from selenium.webdriver.chrome.webdriver import WebDriver
import undetected_chromedriver as uc import undetected_chromedriver as uc
FLARESOLVERR_VERSION = None FLARESOLVERR_VERSION = None
PLATFORM_VERSION = None
CHROME_EXE_PATH = None
CHROME_MAJOR_VERSION = None CHROME_MAJOR_VERSION = None
USER_AGENT = None USER_AGENT = None
XVFB_DISPLAY = None XVFB_DISPLAY = None
@ -28,24 +34,139 @@ def get_flaresolverr_version() -> str:
return FLARESOLVERR_VERSION return FLARESOLVERR_VERSION
package_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, 'package.json') package_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, 'package.json')
if not os.path.isfile(package_path):
package_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'package.json')
with open(package_path) as f: with open(package_path) as f:
FLARESOLVERR_VERSION = json.loads(f.read())['version'] FLARESOLVERR_VERSION = json.loads(f.read())['version']
return FLARESOLVERR_VERSION return FLARESOLVERR_VERSION
def get_current_platform() -> str:
global PLATFORM_VERSION
if PLATFORM_VERSION is not None:
return PLATFORM_VERSION
PLATFORM_VERSION = os.name
return PLATFORM_VERSION
def get_webdriver() -> WebDriver:
global PATCHED_DRIVER_PATH def create_proxy_extension(proxy: dict) -> str:
parsed_url = urllib.parse.urlparse(proxy['url'])
scheme = parsed_url.scheme
host = parsed_url.hostname
port = parsed_url.port
username = proxy['username']
password = proxy['password']
manifest_json = """
{
"version": "1.0.0",
"manifest_version": 2,
"name": "Chrome Proxy",
"permissions": [
"proxy",
"tabs",
"unlimitedStorage",
"storage",
"<all_urls>",
"webRequest",
"webRequestBlocking"
],
"background": {"scripts": ["background.js"]},
"minimum_chrome_version": "76.0.0"
}
"""
background_js = """
var config = {
mode: "fixed_servers",
rules: {
singleProxy: {
scheme: "%s",
host: "%s",
port: %d
},
bypassList: ["localhost"]
}
};
chrome.proxy.settings.set({value: config, scope: "regular"}, function() {});
function callbackFn(details) {
return {
authCredentials: {
username: "%s",
password: "%s"
}
};
}
chrome.webRequest.onAuthRequired.addListener(
callbackFn,
{ urls: ["<all_urls>"] },
['blocking']
);
""" % (
scheme,
host,
port,
username,
password
)
proxy_extension_dir = tempfile.mkdtemp()
with open(os.path.join(proxy_extension_dir, "manifest.json"), "w") as f:
f.write(manifest_json)
with open(os.path.join(proxy_extension_dir, "background.js"), "w") as f:
f.write(background_js)
return proxy_extension_dir
def get_webdriver(proxy: dict = None) -> WebDriver:
global PATCHED_DRIVER_PATH, USER_AGENT
logging.debug('Launching web browser...') logging.debug('Launching web browser...')
# undetected_chromedriver # undetected_chromedriver
options = uc.ChromeOptions() options = uc.ChromeOptions()
options.add_argument('--no-sandbox') options.add_argument('--no-sandbox')
options.add_argument('--window-size=1920,1080') options.add_argument('--window-size=1920,1080')
options.add_argument('--disable-search-engine-choice-screen')
# todo: this param shows a warning in chrome head-full # todo: this param shows a warning in chrome head-full
options.add_argument('--disable-setuid-sandbox') options.add_argument('--disable-setuid-sandbox')
options.add_argument('--disable-dev-shm-usage') options.add_argument('--disable-dev-shm-usage')
# this option removes the zygote sandbox (it seems that the resolution is a bit faster)
options.add_argument('--no-zygote')
# attempt to fix Docker ARM32 build
IS_ARMARCH = platform.machine().startswith(('arm', 'aarch'))
if IS_ARMARCH:
options.add_argument('--disable-gpu-sandbox')
options.add_argument('--disable-software-rasterizer')
options.add_argument('--ignore-certificate-errors')
options.add_argument('--ignore-ssl-errors')
# fix GL errors in ASUSTOR NAS
# https://github.com/FlareSolverr/FlareSolverr/issues/782
# https://github.com/microsoft/vscode/issues/127800#issuecomment-873342069
# https://peter.sh/experiments/chromium-command-line-switches/#use-gl
options.add_argument('--use-gl=swiftshader')
# note: headless mode is detected (options.headless = True) language = os.environ.get('LANG', None)
if language is not None:
options.add_argument('--accept-lang=%s' % language)
# Fix for Chrome 117 | https://github.com/FlareSolverr/FlareSolverr/issues/910
if USER_AGENT is not None:
options.add_argument('--user-agent=%s' % USER_AGENT)
proxy_extension_dir = None
if proxy and all(key in proxy for key in ['url', 'username', 'password']):
proxy_extension_dir = create_proxy_extension(proxy)
options.add_argument("--load-extension=%s" % os.path.abspath(proxy_extension_dir))
elif proxy and 'url' in proxy:
proxy_url = proxy['url']
logging.debug("Using webdriver proxy: %s", proxy_url)
options.add_argument('--proxy-server=%s' % proxy_url)
# note: headless mode is detected (headless = True)
# we launch the browser in head-full mode with the window hidden # we launch the browser in head-full mode with the window hidden
windows_headless = False windows_headless = False
if get_config_headless(): if get_config_headless():
@ -53,6 +174,8 @@ def get_webdriver() -> WebDriver:
windows_headless = True windows_headless = True
else: else:
start_xvfb_display() start_xvfb_display()
# For normal headless mode:
# options.add_argument('--headless')
# if we are inside the Docker container, we avoid downloading the driver # if we are inside the Docker container, we avoid downloading the driver
driver_exe_path = None driver_exe_path = None
@ -65,15 +188,27 @@ def get_webdriver() -> WebDriver:
if PATCHED_DRIVER_PATH is not None: if PATCHED_DRIVER_PATH is not None:
driver_exe_path = PATCHED_DRIVER_PATH driver_exe_path = PATCHED_DRIVER_PATH
# detect chrome path
browser_executable_path = get_chrome_exe_path()
# downloads and patches the chromedriver # downloads and patches the chromedriver
# if we don't set driver_executable_path it downloads, patches, and deletes the driver each time # if we don't set driver_executable_path it downloads, patches, and deletes the driver each time
driver = uc.Chrome(options=options, driver_executable_path=driver_exe_path, version_main=version_main, try:
windows_headless=windows_headless) driver = uc.Chrome(options=options, browser_executable_path=browser_executable_path,
driver_executable_path=driver_exe_path, version_main=version_main,
windows_headless=windows_headless, headless=get_config_headless())
except Exception as e:
logging.error("Error starting Chrome: %s" % e)
# save the patched driver to avoid re-downloads # save the patched driver to avoid re-downloads
if driver_exe_path is None: if driver_exe_path is None:
PATCHED_DRIVER_PATH = os.path.join(driver.patcher.data_path, driver.patcher.exe_name) PATCHED_DRIVER_PATH = os.path.join(driver.patcher.data_path, driver.patcher.exe_name)
shutil.copy(driver.patcher.executable_path, PATCHED_DRIVER_PATH) if PATCHED_DRIVER_PATH != driver.patcher.executable_path:
shutil.copy(driver.patcher.executable_path, PATCHED_DRIVER_PATH)
# clean up proxy extension directory
if proxy_extension_dir is not None:
shutil.rmtree(proxy_extension_dir)
# selenium vanilla # selenium vanilla
# options = webdriver.ChromeOptions() # options = webdriver.ChromeOptions()
@ -86,23 +221,45 @@ def get_webdriver() -> WebDriver:
return driver return driver
def get_chrome_exe_path() -> str:
global CHROME_EXE_PATH
if CHROME_EXE_PATH is not None:
return CHROME_EXE_PATH
# linux pyinstaller bundle
chrome_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'chrome', "chrome")
if os.path.exists(chrome_path):
if not os.access(chrome_path, os.X_OK):
raise Exception(f'Chrome binary "{chrome_path}" is not executable. '
f'Please, extract the archive with "tar xzf <file.tar.gz>".')
CHROME_EXE_PATH = chrome_path
return CHROME_EXE_PATH
# windows pyinstaller bundle
chrome_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'chrome', "chrome.exe")
if os.path.exists(chrome_path):
CHROME_EXE_PATH = chrome_path
return CHROME_EXE_PATH
# system
CHROME_EXE_PATH = uc.find_chrome_executable()
return CHROME_EXE_PATH
def get_chrome_major_version() -> str: def get_chrome_major_version() -> str:
global CHROME_MAJOR_VERSION global CHROME_MAJOR_VERSION
if CHROME_MAJOR_VERSION is not None: if CHROME_MAJOR_VERSION is not None:
return CHROME_MAJOR_VERSION return CHROME_MAJOR_VERSION
if os.name == 'nt': if os.name == 'nt':
# Example: '104.0.5112.79'
try: try:
stream = os.popen( complete_version = extract_version_nt_executable(get_chrome_exe_path())
'reg query "HKLM\\SOFTWARE\\Wow6432Node\\Microsoft\\Windows\\CurrentVersion\\Uninstall\\Google Chrome"')
output = stream.read()
# Example: '104.0.5112.79'
complete_version = extract_version_registry(output)
except Exception: except Exception:
# Example: '104.0.5112.79' try:
complete_version = extract_version_folder() complete_version = extract_version_nt_registry()
except Exception:
# Example: '104.0.5112.79'
complete_version = extract_version_nt_folder()
else: else:
chrome_path = uc.find_chrome_executable() chrome_path = get_chrome_exe_path()
process = os.popen(f'"{chrome_path}" --version') process = os.popen(f'"{chrome_path}" --version')
# Example 1: 'Chromium 104.0.5112.79 Arch Linux\n' # Example 1: 'Chromium 104.0.5112.79 Arch Linux\n'
# Example 2: 'Google Chrome 104.0.5112.79 Arch Linux\n' # Example 2: 'Google Chrome 104.0.5112.79 Arch Linux\n'
@ -110,24 +267,32 @@ def get_chrome_major_version() -> str:
process.close() process.close()
CHROME_MAJOR_VERSION = complete_version.split('.')[0].split(' ')[-1] CHROME_MAJOR_VERSION = complete_version.split('.')[0].split(' ')[-1]
logging.info(f"Chrome major version: {CHROME_MAJOR_VERSION}")
return CHROME_MAJOR_VERSION return CHROME_MAJOR_VERSION
def extract_version_registry(output) -> str: def extract_version_nt_executable(exe_path: str) -> str:
try: import pefile
google_version = '' pe = pefile.PE(exe_path, fast_load=True)
for letter in output[output.rindex('DisplayVersion REG_SZ') + 24:]: pe.parse_data_directories(
if letter != '\n': directories=[pefile.DIRECTORY_ENTRY["IMAGE_DIRECTORY_ENTRY_RESOURCE"]]
google_version += letter )
else: return pe.FileInfo[0][0].StringTable[0].entries[b"FileVersion"].decode('utf-8')
break
return google_version.strip()
except TypeError:
return ''
def extract_version_folder() -> str: def extract_version_nt_registry() -> str:
stream = os.popen(
'reg query "HKLM\\SOFTWARE\\Wow6432Node\\Microsoft\\Windows\\CurrentVersion\\Uninstall\\Google Chrome"')
output = stream.read()
google_version = ''
for letter in output[output.rindex('DisplayVersion REG_SZ') + 24:]:
if letter != '\n':
google_version += letter
else:
break
return google_version.strip()
def extract_version_nt_folder() -> str:
# Check if the Chrome folder exists in the x32 or x64 Program Files folders. # Check if the Chrome folder exists in the x32 or x64 Program Files folders.
for i in range(2): for i in range(2):
path = 'C:\\Program Files' + (' (x86)' if i else '') + '\\Google\\Chrome\\Application' path = 'C:\\Program Files' + (' (x86)' if i else '') + '\\Google\\Chrome\\Application'
@ -152,11 +317,15 @@ def get_user_agent(driver=None) -> str:
if driver is None: if driver is None:
driver = get_webdriver() driver = get_webdriver()
USER_AGENT = driver.execute_script("return navigator.userAgent") USER_AGENT = driver.execute_script("return navigator.userAgent")
# Fix for Chrome 117 | https://github.com/FlareSolverr/FlareSolverr/issues/910
USER_AGENT = re.sub('HEADLESS', '', USER_AGENT, flags=re.IGNORECASE)
return USER_AGENT return USER_AGENT
except Exception as e: except Exception as e:
raise Exception("Error getting browser User-Agent. " + str(e)) raise Exception("Error getting browser User-Agent. " + str(e))
finally: finally:
if driver is not None: if driver is not None:
if PLATFORM_VERSION == "nt":
driver.close()
driver.quit() driver.quit()