diff --git a/.github/workflows/proxy_integration_tests_javascript.yml b/.github/workflows/proxy_integration_tests_javascript.yml index 25c4aa1..de0089d 100644 --- a/.github/workflows/proxy_integration_tests_javascript.yml +++ b/.github/workflows/proxy_integration_tests_javascript.yml @@ -19,12 +19,12 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 (Node 24) with: persist-credentials: false - name: Set up Node - uses: actions/setup-node@v4 + uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # v6.3.0 (Node 24) with: node-version: "24" cache: npm diff --git a/.github/workflows/proxy_integration_tests_php.yml b/.github/workflows/proxy_integration_tests_php.yml index d20ced9..1f41742 100644 --- a/.github/workflows/proxy_integration_tests_php.yml +++ b/.github/workflows/proxy_integration_tests_php.yml @@ -19,7 +19,7 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 (Node 24) with: persist-credentials: false diff --git a/.github/workflows/proxy_integration_tests_python.yml b/.github/workflows/proxy_integration_tests_python.yml index 8f85aab..f2c8c49 100644 --- a/.github/workflows/proxy_integration_tests_python.yml +++ b/.github/workflows/proxy_integration_tests_python.yml @@ -19,22 +19,23 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 (Node 24) with: persist-credentials: false - name: Set up Python - uses: actions/setup-python@v5 + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 (Node 24) with: - python-version: "3.x" + # Pin for reproducible dependency wheels (pycurl, etc.); adjust as needed. + python-version: "3.12" - name: Install system dependencies (pycurl) run: sudo apt-get update && sudo apt-get install -y libcurl4-openssl-dev - - name: Install python-proxy-headers and example dependencies + - name: Install example dependencies run: | python -m pip install --upgrade pip - pip install python-proxy-headers requests urllib3 aiohttp httpx cloudscraper autoscraper pycurl + pip install -r python/requirements.txt - name: Require PROXY_URL Actions secret env: diff --git a/.github/workflows/proxy_integration_tests_ruby.yml b/.github/workflows/proxy_integration_tests_ruby.yml index e9f6ebf..648e3f9 100644 --- a/.github/workflows/proxy_integration_tests_ruby.yml +++ b/.github/workflows/proxy_integration_tests_ruby.yml @@ -19,12 +19,12 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 (Node 24) with: persist-credentials: false - name: Set up Ruby - uses: ruby/setup-ruby@2e007403fc1ec238429ecaa57af6f22f019cc135 # v1.234.0 + uses: ruby/setup-ruby@3ff19f5e2baf30647122352b96108b1fbe250c64 # v1.299.0 (Node 24) with: ruby-version: "3.3" bundler-cache: true diff --git a/README.md b/README.md index bc35894..93e4fdc 100644 --- a/README.md +++ b/README.md @@ -9,69 +9,56 @@ Example code for using proxy servers in different programming languages. Current ## Python Proxy Examples -### Using python-proxy-headers - -The [python-proxy-headers](https://github.com/proxymesh/python-proxy-headers) library enables sending custom headers to proxy servers and receiving proxy response headers. This is essential for services like [ProxyMesh](https://proxymesh.com) that use custom headers for country selection and IP assignment. - **Installation:** ```bash -pip install python-proxy-headers +pip install -r python/requirements.txt ``` -**Running Examples:** +`pycurl` needs libcurl and `curl-config` (for example Debian/Ubuntu: `libcurl4-openssl-dev`). The test runner skips `pycurl-*` examples when `pycurl` is not installed, and skips `scrapy-proxy` when `import scrapy` fails (for example a broken `cryptography` / `cffi` install). -All examples read proxy configuration from environment variables: +**Running Examples:** ```bash # Required: Set your proxy URL export PROXY_URL='http://user:pass@proxy.example.com:8080' -# Optional: Custom test URL (default: https://api.ipify.org?format=json) +# Optional: Target URL (default: https://api.ipify.org?format=json) export TEST_URL='https://httpbin.org/ip' -# Optional: Send a custom header to the proxy -export PROXY_HEADER='X-ProxyMesh-Country' -export PROXY_VALUE='US' - -# Optional: Read a specific header from the response +# Optional: Print one response header export RESPONSE_HEADER='X-ProxyMesh-IP' -# Run a single example -python python/requests-proxy-headers.py +# Single example +python python/requests-proxy.py -# Run all examples as tests +# All examples as tests python python/run_tests.py -# Run specific examples -python python/run_tests.py requests-proxy-headers httpx-proxy-headers +# Specific examples (substring match, like the JS runner) +python python/run_tests.py requests httpx ``` **Examples:** | Library | Example | Description | |---------|---------|-------------| -| [requests](https://docs.python-requests.org/) | [requests-proxy-headers.py](python/requests-proxy-headers.py) | Simple HTTP requests with proxy headers | -| [requests](https://docs.python-requests.org/) | [requests-proxy-headers-session.py](python/requests-proxy-headers-session.py) | Session-based requests for connection pooling | -| [urllib3](https://urllib3.readthedocs.io/) | [urllib3-proxy-headers.py](python/urllib3-proxy-headers.py) | Low-level HTTP client with proxy headers | -| [aiohttp](https://docs.aiohttp.org/) | [aiohttp-proxy-headers.py](python/aiohttp-proxy-headers.py) | Async HTTP client with proxy headers | -| [httpx](https://www.python-httpx.org/) | [httpx-proxy-headers.py](python/httpx-proxy-headers.py) | Modern HTTP client with proxy headers | -| [httpx](https://www.python-httpx.org/) | [httpx-async-proxy-headers.py](python/httpx-async-proxy-headers.py) | Async httpx with proxy headers | -| [pycurl](http://pycurl.io/) | [pycurl-proxy-headers.py](python/pycurl-proxy-headers.py) | libcurl bindings with proxy headers | -| [pycurl](http://pycurl.io/) | [pycurl-proxy-headers-lowlevel.py](python/pycurl-proxy-headers-lowlevel.py) | Low-level pycurl integration | -| [cloudscraper](https://github.com/venomous/cloudscraper) | [cloudscraper-proxy-headers.py](python/cloudscraper-proxy-headers.py) | Cloudflare bypass with proxy headers | -| [autoscraper](https://github.com/alirezamika/autoscraper) | [autoscraper-proxy-headers.py](python/autoscraper-proxy-headers.py) | Automatic web scraping with proxy headers | - -> **Note:** Most Python HTTP libraries do not expose custom headers on HTTPS `CONNECT` tunneling by default. These examples use [python-proxy-headers](https://github.com/proxymesh/python-proxy-headers) adapters to send proxy headers and read proxy response headers consistently. - -### Basic Proxy Examples - -* [requests-proxy.py](python/requests-proxy.py) - Basic proxy usage with requests -* [requests-random-proxy.py](python/requests-random-proxy.py) - Random proxy rotation +| [requests](https://docs.python-requests.org/) | [requests-proxy.py](python/requests-proxy.py) | Basic `GET` with `proxies=` | +| [requests](https://docs.python-requests.org/) | [requests-session-proxy.py](python/requests-session-proxy.py) | Session with pooled connections | +| [urllib3](https://urllib3.readthedocs.io/) | [urllib3-proxy.py](python/urllib3-proxy.py) | `ProxyManager` | +| [aiohttp](https://docs.aiohttp.org/) | [aiohttp-proxy.py](python/aiohttp-proxy.py) | Async client, `proxy=` on the request | +| [httpx](https://www.python-httpx.org/) | [httpx-proxy.py](python/httpx-proxy.py) | Sync client, `proxy=` on the client | +| [httpx](https://www.python-httpx.org/) | [httpx-async-proxy.py](python/httpx-async-proxy.py) | Async client | +| [pycurl](http://pycurl.io/) | [pycurl-proxy.py](python/pycurl-proxy.py) | libcurl via `setopt` (`PROXY`, `WRITEDATA`, etc.) | +| [cloudscraper](https://github.com/VeNoMouS/cloudscraper) | [cloudscraper-proxy.py](python/cloudscraper-proxy.py) | Requests-based scraper with `proxies` | +| [autoscraper](https://github.com/alirezamika/autoscraper) | [autoscraper-proxy.py](python/autoscraper-proxy.py) | Offline `html=` demo (matches upstream tests); README shows `request_args` + `proxies` for live URLs | +| [Scrapy](https://scrapy.org/) | [scrapy-proxy.py](python/scrapy-proxy.py) | `scrapy runspider` with `meta['proxy']` | + +### Other Python scripts -### Scrapy +* [requests-random-proxy.py](python/requests-random-proxy.py) - Random proxy rotation -* [scrapy-proxy-headers.py](python/scrapy-proxy-headers.py) - Scrapy spider with proxy headers +> **Note:** Like the Ruby, JavaScript, and PHP examples here, these scripts use each library's normal proxy options only. Most of them do not send custom headers on the HTTPS `CONNECT` tunnel or surface proxy `CONNECT` response headers. For that, see [python-proxy-headers](https://github.com/proxymesh/python-proxy-headers) or [scrapy-proxy-headers](https://github.com/proxymesh/scrapy-proxy-headers). ## JavaScript / Node.js Proxy Examples diff --git a/python/aiohttp-proxy-headers.py b/python/aiohttp-proxy-headers.py deleted file mode 100755 index cde02c8..0000000 --- a/python/aiohttp-proxy-headers.py +++ /dev/null @@ -1,48 +0,0 @@ -#!/usr/bin/env python3 -""" -aiohttp with proxy headers example. - -Configuration via environment variables: - PROXY_URL - Proxy URL (required), e.g., http://user:pass@proxy:8080 - TEST_URL - URL to request (default: https://api.ipify.org?format=json) - PROXY_HEADER - Header name to send to proxy (optional) - PROXY_VALUE - Header value to send to proxy (optional) - RESPONSE_HEADER - Header name to read from response (optional) - -See: https://github.com/proxymesh/python-proxy-headers -""" -import os -import sys -import asyncio -from python_proxy_headers import aiohttp_proxy - -# Get configuration from environment -proxy_url = os.environ.get('PROXY_URL') or os.environ.get('HTTPS_PROXY') -if not proxy_url: - print("Error: Set PROXY_URL environment variable", file=sys.stderr) - sys.exit(1) - -test_url = os.environ.get('TEST_URL', 'https://api.ipify.org?format=json') -proxy_header = os.environ.get('PROXY_HEADER') -proxy_value = os.environ.get('PROXY_VALUE') -response_header = os.environ.get('RESPONSE_HEADER') - -proxy_headers = {proxy_header: proxy_value} if proxy_header and proxy_value else None - - -async def main(): - async with aiohttp_proxy.ProxyClientSession() as session: - async with session.get( - test_url, - proxy=proxy_url, - proxy_headers=proxy_headers - ) as response: - body = await response.text() - print(f"Status: {response.status}") - print(f"Body: {body}") - if response_header: - print(f"{response_header}: {response.headers.get(response_header)}") - - -if __name__ == '__main__': - asyncio.run(main()) diff --git a/python/aiohttp-proxy.py b/python/aiohttp-proxy.py new file mode 100644 index 0000000..c22da3d --- /dev/null +++ b/python/aiohttp-proxy.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python3 +""" +aiohttp with an HTTP proxy. + +Configuration via environment variables: + PROXY_URL - Proxy URL (required), e.g., http://user:pass@proxy:8080 + TEST_URL - URL to request (default: https://api.ipify.org?format=json) + RESPONSE_HEADER - Optional header name to print from the response + +Documentation: https://docs.aiohttp.org/en/stable/client_advanced.html#proxy-support +""" +import asyncio +import os +import sys + +import aiohttp + +proxy_url = os.environ.get('PROXY_URL') or os.environ.get('HTTPS_PROXY') +if not proxy_url: + print('Error: Set PROXY_URL environment variable', file=sys.stderr) + sys.exit(1) + +test_url = os.environ.get('TEST_URL', 'https://api.ipify.org?format=json') +response_header = os.environ.get('RESPONSE_HEADER') + + +async def main() -> None: + timeout = aiohttp.ClientTimeout(total=30) + async with aiohttp.ClientSession(timeout=timeout) as session: + async with session.get(test_url, proxy=proxy_url) as response: + body = await response.text() + print(f'Status: {response.status}') + print(f'Body: {body}') + if response_header: + print(f'{response_header}: {response.headers.get(response_header)}') + + +if __name__ == '__main__': + asyncio.run(main()) diff --git a/python/autoscraper-proxy-headers.py b/python/autoscraper-proxy-headers.py deleted file mode 100755 index 4df0d1b..0000000 --- a/python/autoscraper-proxy-headers.py +++ /dev/null @@ -1,45 +0,0 @@ -#!/usr/bin/env python3 -""" -AutoScraper with proxy headers example. - -Configuration via environment variables: - PROXY_URL - Proxy URL (required), e.g., http://user:pass@proxy:8080 - TEST_URL - URL to request (default: https://api.ipify.org?format=json) - PROXY_HEADER - Header name to send to proxy (optional) - PROXY_VALUE - Header value to send to proxy (optional) - RESPONSE_HEADER - Header name to read from response (optional) - -See: https://github.com/proxymesh/python-proxy-headers -""" -import os -import sys -from python_proxy_headers.autoscraper_proxy import ProxyAutoScraper - -# Get configuration from environment -proxy_url = os.environ.get('PROXY_URL') or os.environ.get('HTTPS_PROXY') -if not proxy_url: - print("Error: Set PROXY_URL environment variable", file=sys.stderr) - sys.exit(1) - -test_url = os.environ.get('TEST_URL', 'https://api.ipify.org?format=json') -proxy_header = os.environ.get('PROXY_HEADER') -proxy_value = os.environ.get('PROXY_VALUE') -response_header = os.environ.get('RESPONSE_HEADER') - -proxy_headers = {proxy_header: proxy_value} if proxy_header and proxy_value else None - -# Create scraper and test via underlying session -scraper = ProxyAutoScraper(proxy_headers=proxy_headers) -session = scraper._get_session() -session.proxies = {'http': proxy_url, 'https': proxy_url} - -# Make request -response = session.get(test_url) - -# Output -print(f"Status: {response.status_code}") -print(f"Body: {response.text}") -if response_header: - print(f"{response_header}: {response.headers.get(response_header)}") - -scraper.close() diff --git a/python/autoscraper-proxy.py b/python/autoscraper-proxy.py new file mode 100644 index 0000000..8cb6661 --- /dev/null +++ b/python/autoscraper-proxy.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python3 +""" +AutoScraper with a proxy (how to pass ``request_args``). + +The AutoScraper project tests ``build`` / ``get_result_similar`` with **inline HTML** +only — see ``tests/unit/test_build.py`` and ``tests/integration/`` in +https://github.com/alirezamika/autoscraper — not with live URLs. That keeps tests +deterministic. This script does the same for the integration runner. + +**Using a proxy with a real URL** matches the library README:: + + scraper.build(url, wanted_list, request_args={'proxies': proxies, 'timeout': 30}) + scraper.get_result_similar(url, request_args={'proxies': proxies, 'timeout': 30}) + +``PROXY_URL`` is required here so this example fits the same env as the other scripts; +this demo does not open a network connection — it only exercises AutoScraper on +embedded HTML. + +Configuration via environment variables: + PROXY_URL - Required by the test runner (same as other examples), e.g. + http://user:pass@proxy:8080 + +Documentation: https://github.com/alirezamika/autoscraper +""" +import os +import sys + +from autoscraper import AutoScraper + +# Same idea as upstream tests/unit/test_build.py — fixed HTML, no HTTP. +SAMPLE_HTML = """ +
Paragraph one.
+ +""" +PLACEHOLDER_URL = 'https://example.invalid/autoscraper-proxy-demo' + + +def main() -> None: + scraper = AutoScraper() + wanted_list = ['AutoScraper proxy example'] + learned = scraper.build( + html=SAMPLE_HTML, + url=PLACEHOLDER_URL, + wanted_list=wanted_list, + ) + similar = scraper.get_result_similar(html=SAMPLE_HTML, url=PLACEHOLDER_URL) + print(f'AutoScraper build: {learned}') + print(f'AutoScraper get_result_similar: {similar}') + if not learned: + sys.exit(1) + + +if __name__ == '__main__': + if not (os.environ.get('PROXY_URL') or os.environ.get('HTTPS_PROXY')): + print('Error: Set PROXY_URL environment variable', file=sys.stderr) + sys.exit(1) + main() diff --git a/python/cloudscraper-proxy-headers.py b/python/cloudscraper-proxy-headers.py deleted file mode 100755 index ef3e9ea..0000000 --- a/python/cloudscraper-proxy-headers.py +++ /dev/null @@ -1,42 +0,0 @@ -#!/usr/bin/env python3 -""" -CloudScraper with proxy headers example. - -Configuration via environment variables: - PROXY_URL - Proxy URL (required), e.g., http://user:pass@proxy:8080 - TEST_URL - URL to request (default: https://api.ipify.org?format=json) - PROXY_HEADER - Header name to send to proxy (optional) - PROXY_VALUE - Header value to send to proxy (optional) - RESPONSE_HEADER - Header name to read from response (optional) - -See: https://github.com/proxymesh/python-proxy-headers -""" -import os -import sys -from python_proxy_headers.cloudscraper_proxy import create_scraper - -# Get configuration from environment -proxy_url = os.environ.get('PROXY_URL') or os.environ.get('HTTPS_PROXY') -if not proxy_url: - print("Error: Set PROXY_URL environment variable", file=sys.stderr) - sys.exit(1) - -test_url = os.environ.get('TEST_URL', 'https://api.ipify.org?format=json') -proxy_header = os.environ.get('PROXY_HEADER') -proxy_value = os.environ.get('PROXY_VALUE') -response_header = os.environ.get('RESPONSE_HEADER') - -proxy_headers = {proxy_header: proxy_value} if proxy_header and proxy_value else None - -# Create scraper with proxy headers -scraper = create_scraper(proxy_headers=proxy_headers, browser='chrome') -scraper.proxies = {'http': proxy_url, 'https': proxy_url} - -# Make request -response = scraper.get(test_url) - -# Output -print(f"Status: {response.status_code}") -print(f"Body: {response.text}") -if response_header: - print(f"{response_header}: {response.headers.get(response_header)}") diff --git a/python/cloudscraper-proxy.py b/python/cloudscraper-proxy.py new file mode 100644 index 0000000..a4fc5bd --- /dev/null +++ b/python/cloudscraper-proxy.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python3 +""" +cloudscraper with an HTTP proxy. + +Configuration via environment variables: + PROXY_URL - Proxy URL (required), e.g., http://user:pass@proxy:8080 + TEST_URL - URL to request (default: https://api.ipify.org?format=json) + RESPONSE_HEADER - Optional header name to print from the response + +cloudscraper builds on requests; set ``proxies`` on the scraper like a Session. + +Documentation: https://github.com/VeNoMouS/cloudscraper +""" +import os +import sys + +import cloudscraper + +proxy_url = os.environ.get('PROXY_URL') or os.environ.get('HTTPS_PROXY') +if not proxy_url: + print('Error: Set PROXY_URL environment variable', file=sys.stderr) + sys.exit(1) + +test_url = os.environ.get('TEST_URL', 'https://api.ipify.org?format=json') +response_header = os.environ.get('RESPONSE_HEADER') + +proxies = {'http': proxy_url, 'https': proxy_url} +scraper = cloudscraper.create_scraper(browser='chrome') +scraper.proxies = proxies + +response = scraper.get(test_url, timeout=30) + +print(f'Status: {response.status_code}') +print(f'Body: {response.text}') +if response_header: + print(f'{response_header}: {response.headers.get(response_header)}') diff --git a/python/httpx-async-proxy-headers.py b/python/httpx-async-proxy-headers.py deleted file mode 100755 index 5553c15..0000000 --- a/python/httpx-async-proxy-headers.py +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/bin/env python3 -""" -httpx async with proxy headers example. - -Configuration via environment variables: - PROXY_URL - Proxy URL (required), e.g., http://user:pass@proxy:8080 - TEST_URL - URL to request (default: https://api.ipify.org?format=json) - PROXY_HEADER - Header name to send to proxy (optional) - PROXY_VALUE - Header value to send to proxy (optional) - RESPONSE_HEADER - Header name to read from response (optional) - -See: https://github.com/proxymesh/python-proxy-headers -""" -import os -import sys -import asyncio -import httpx -from python_proxy_headers.httpx_proxy import AsyncHTTPProxyTransport - -# Get configuration from environment -proxy_url = os.environ.get('PROXY_URL') or os.environ.get('HTTPS_PROXY') -if not proxy_url: - print("Error: Set PROXY_URL environment variable", file=sys.stderr) - sys.exit(1) - -test_url = os.environ.get('TEST_URL', 'https://api.ipify.org?format=json') -proxy_header = os.environ.get('PROXY_HEADER') -proxy_value = os.environ.get('PROXY_VALUE') -response_header = os.environ.get('RESPONSE_HEADER') - -proxy_headers = {proxy_header: proxy_value} if proxy_header and proxy_value else None - - -async def main(): - # Create proxy with optional headers - if proxy_headers: - proxy = httpx.Proxy(proxy_url, headers=proxy_headers) - else: - proxy = proxy_url - - transport = AsyncHTTPProxyTransport(proxy=proxy) - - async with httpx.AsyncClient(mounts={'http://': transport, 'https://': transport}) as client: - response = await client.get(test_url) - - print(f"Status: {response.status_code}") - print(f"Body: {response.text}") - if response_header: - print(f"{response_header}: {response.headers.get(response_header)}") - - -if __name__ == '__main__': - asyncio.run(main()) diff --git a/python/httpx-async-proxy.py b/python/httpx-async-proxy.py new file mode 100644 index 0000000..e4b866d --- /dev/null +++ b/python/httpx-async-proxy.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python3 +""" +httpx (async) with an HTTP proxy. + +Configuration via environment variables: + PROXY_URL - Proxy URL (required), e.g., http://user:pass@proxy:8080 + TEST_URL - URL to request (default: https://api.ipify.org?format=json) + RESPONSE_HEADER - Optional header name to print from the response + +Documentation: https://www.python-httpx.org/async/ +""" +import asyncio +import os +import sys + +import httpx + +proxy_url = os.environ.get('PROXY_URL') or os.environ.get('HTTPS_PROXY') +if not proxy_url: + print('Error: Set PROXY_URL environment variable', file=sys.stderr) + sys.exit(1) + +test_url = os.environ.get('TEST_URL', 'https://api.ipify.org?format=json') +response_header = os.environ.get('RESPONSE_HEADER') + + +async def main() -> None: + async with httpx.AsyncClient(proxy=proxy_url, timeout=30.0) as client: + response = await client.get(test_url) + + print(f'Status: {response.status_code}') + print(f'Body: {response.text}') + if response_header: + print(f'{response_header}: {response.headers.get(response_header)}') + + +if __name__ == '__main__': + asyncio.run(main()) diff --git a/python/httpx-proxy-headers.py b/python/httpx-proxy-headers.py deleted file mode 100755 index 5952448..0000000 --- a/python/httpx-proxy-headers.py +++ /dev/null @@ -1,45 +0,0 @@ -#!/usr/bin/env python3 -""" -httpx with proxy headers example. - -Configuration via environment variables: - PROXY_URL - Proxy URL (required), e.g., http://user:pass@proxy:8080 - TEST_URL - URL to request (default: https://api.ipify.org?format=json) - PROXY_HEADER - Header name to send to proxy (optional) - PROXY_VALUE - Header value to send to proxy (optional) - RESPONSE_HEADER - Header name to read from response (optional) - -See: https://github.com/proxymesh/python-proxy-headers -""" -import os -import sys -import httpx -from python_proxy_headers import httpx_proxy - -# Get configuration from environment -proxy_url = os.environ.get('PROXY_URL') or os.environ.get('HTTPS_PROXY') -if not proxy_url: - print("Error: Set PROXY_URL environment variable", file=sys.stderr) - sys.exit(1) - -test_url = os.environ.get('TEST_URL', 'https://api.ipify.org?format=json') -proxy_header = os.environ.get('PROXY_HEADER') -proxy_value = os.environ.get('PROXY_VALUE') -response_header = os.environ.get('RESPONSE_HEADER') - -proxy_headers = {proxy_header: proxy_value} if proxy_header and proxy_value else None - -# Create proxy with optional headers -if proxy_headers: - proxy = httpx.Proxy(proxy_url, headers=proxy_headers) -else: - proxy = proxy_url - -# Make request -response = httpx_proxy.get(test_url, proxy=proxy) - -# Output -print(f"Status: {response.status_code}") -print(f"Body: {response.text}") -if response_header: - print(f"{response_header}: {response.headers.get(response_header)}") diff --git a/python/httpx-proxy.py b/python/httpx-proxy.py new file mode 100644 index 0000000..df586ed --- /dev/null +++ b/python/httpx-proxy.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python3 +""" +httpx (sync) with an HTTP proxy. + +Configuration via environment variables: + PROXY_URL - Proxy URL (required), e.g., http://user:pass@proxy:8080 + TEST_URL - URL to request (default: https://api.ipify.org?format=json) + RESPONSE_HEADER - Optional header name to print from the response + +Documentation: https://www.python-httpx.org/advanced/proxies/ +""" +import os +import sys + +import httpx + +proxy_url = os.environ.get('PROXY_URL') or os.environ.get('HTTPS_PROXY') +if not proxy_url: + print('Error: Set PROXY_URL environment variable', file=sys.stderr) + sys.exit(1) + +test_url = os.environ.get('TEST_URL', 'https://api.ipify.org?format=json') +response_header = os.environ.get('RESPONSE_HEADER') + +with httpx.Client(proxy=proxy_url, timeout=30.0) as client: + response = client.get(test_url) + +print(f'Status: {response.status_code}') +print(f'Body: {response.text}') +if response_header: + print(f'{response_header}: {response.headers.get(response_header)}') diff --git a/python/pycurl-proxy-headers-lowlevel.py b/python/pycurl-proxy-headers-lowlevel.py deleted file mode 100755 index c957927..0000000 --- a/python/pycurl-proxy-headers-lowlevel.py +++ /dev/null @@ -1,57 +0,0 @@ -#!/usr/bin/env python3 -""" -PycURL with proxy headers - low-level example. - -Configuration via environment variables: - PROXY_URL - Proxy URL (required), e.g., http://user:pass@proxy:8080 - TEST_URL - URL to request (default: https://api.ipify.org?format=json) - PROXY_HEADER - Header name to send to proxy (optional) - PROXY_VALUE - Header value to send to proxy (optional) - RESPONSE_HEADER - Header name to read from response (optional) - -See: https://github.com/proxymesh/python-proxy-headers -""" -import os -import sys -import pycurl -from io import BytesIO -from python_proxy_headers.pycurl_proxy import set_proxy_headers, HeaderCapture - -# Get configuration from environment -proxy_url = os.environ.get('PROXY_URL') or os.environ.get('HTTPS_PROXY') -if not proxy_url: - print("Error: Set PROXY_URL environment variable", file=sys.stderr) - sys.exit(1) - -test_url = os.environ.get('TEST_URL', 'https://api.ipify.org?format=json') -proxy_header = os.environ.get('PROXY_HEADER') -proxy_value = os.environ.get('PROXY_VALUE') -response_header = os.environ.get('RESPONSE_HEADER') - -proxy_headers = {proxy_header: proxy_value} if proxy_header and proxy_value else None - -# Create pycurl handle -c = pycurl.Curl() -buffer = BytesIO() - -c.setopt(pycurl.URL, test_url) -c.setopt(pycurl.PROXY, proxy_url) -c.setopt(pycurl.WRITEDATA, buffer) - -# Add proxy headers if configured -if proxy_headers: - set_proxy_headers(c, proxy_headers) - -# Capture response headers -capture = HeaderCapture(c) - -# Perform request -c.perform() - -# Output -print(f"Status: {c.getinfo(pycurl.RESPONSE_CODE)}") -print(f"Body: {buffer.getvalue().decode('utf-8')}") -if response_header: - print(f"{response_header}: {capture.proxy_headers.get(response_header)}") - -c.close() diff --git a/python/pycurl-proxy-headers.py b/python/pycurl-proxy-headers.py deleted file mode 100755 index ce05d9e..0000000 --- a/python/pycurl-proxy-headers.py +++ /dev/null @@ -1,38 +0,0 @@ -#!/usr/bin/env python3 -""" -PycURL with proxy headers example. - -Configuration via environment variables: - PROXY_URL - Proxy URL (required), e.g., http://user:pass@proxy:8080 - TEST_URL - URL to request (default: https://api.ipify.org?format=json) - PROXY_HEADER - Header name to send to proxy (optional) - PROXY_VALUE - Header value to send to proxy (optional) - RESPONSE_HEADER - Header name to read from response (optional) - -See: https://github.com/proxymesh/python-proxy-headers -""" -import os -import sys -from python_proxy_headers.pycurl_proxy import get - -# Get configuration from environment -proxy_url = os.environ.get('PROXY_URL') or os.environ.get('HTTPS_PROXY') -if not proxy_url: - print("Error: Set PROXY_URL environment variable", file=sys.stderr) - sys.exit(1) - -test_url = os.environ.get('TEST_URL', 'https://api.ipify.org?format=json') -proxy_header = os.environ.get('PROXY_HEADER') -proxy_value = os.environ.get('PROXY_VALUE') -response_header = os.environ.get('RESPONSE_HEADER') - -proxy_headers = {proxy_header: proxy_value} if proxy_header and proxy_value else None - -# Make request -response = get(test_url, proxy=proxy_url, proxy_headers=proxy_headers) - -# Output -print(f"Status: {response.status_code}") -print(f"Body: {response.text}") -if response_header: - print(f"{response_header}: {response.proxy_headers.get(response_header)}") diff --git a/python/pycurl-proxy.py b/python/pycurl-proxy.py new file mode 100644 index 0000000..8ae75c1 --- /dev/null +++ b/python/pycurl-proxy.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python3 +""" +pycurl (libcurl) with an HTTP proxy. + +Configuration via environment variables: + PROXY_URL - Proxy URL (required), e.g., http://user:pass@proxy:8080 + TEST_URL - URL to request (default: https://api.ipify.org?format=json) + +Requires libcurl development headers to install the ``pycurl`` package. Options are +set with :meth:`pycurl.Curl.setopt` like any libcurl binding. + +Documentation: https://pycurl.io/docs/latest/curlobject.html +""" +import os +import sys +from io import BytesIO + +import pycurl + +proxy_url = os.environ.get('PROXY_URL') or os.environ.get('HTTPS_PROXY') +if not proxy_url: + print('Error: Set PROXY_URL environment variable', file=sys.stderr) + sys.exit(1) + +test_url = os.environ.get('TEST_URL', 'https://api.ipify.org?format=json') + +buffer = BytesIO() +c = pycurl.Curl() +try: + c.setopt(pycurl.URL, test_url) + c.setopt(pycurl.PROXY, proxy_url) + c.setopt(pycurl.FOLLOWLOCATION, 1) + c.setopt(pycurl.TIMEOUT, 30) + c.setopt(pycurl.SSL_VERIFYPEER, 1) + c.setopt(pycurl.WRITEDATA, buffer) + c.perform() + status = c.getinfo(pycurl.RESPONSE_CODE) +except pycurl.error as exc: + errno, msg = exc.args + print(f'Error: {msg}', file=sys.stderr) + sys.exit(1) +finally: + c.close() + +print(f'Status: {status}') +print(f'Body: {buffer.getvalue().decode("utf-8", errors="replace")}') diff --git a/python/requests-proxy-headers-session.py b/python/requests-proxy-headers-session.py deleted file mode 100755 index 4a12171..0000000 --- a/python/requests-proxy-headers-session.py +++ /dev/null @@ -1,40 +0,0 @@ -#!/usr/bin/env python3 -""" -Requests with proxy headers - Session example. - -Configuration via environment variables: - PROXY_URL - Proxy URL (required), e.g., http://user:pass@proxy:8080 - TEST_URL - URL to request (default: https://api.ipify.org?format=json) - PROXY_HEADER - Header name to send to proxy (optional) - PROXY_VALUE - Header value to send to proxy (optional) - RESPONSE_HEADER - Header name to read from response (optional) - -See: https://github.com/proxymesh/python-proxy-headers -""" -import os -import sys -from python_proxy_headers.requests_adapter import ProxySession - -# Get configuration from environment -proxy_url = os.environ.get('PROXY_URL') or os.environ.get('HTTPS_PROXY') -if not proxy_url: - print("Error: Set PROXY_URL environment variable", file=sys.stderr) - sys.exit(1) - -test_url = os.environ.get('TEST_URL', 'https://api.ipify.org?format=json') -proxy_header = os.environ.get('PROXY_HEADER') -proxy_value = os.environ.get('PROXY_VALUE') -response_header = os.environ.get('RESPONSE_HEADER') - -proxies = {'http': proxy_url, 'https': proxy_url} -proxy_headers = {proxy_header: proxy_value} if proxy_header and proxy_value else None - -# Make requests using session -with ProxySession(proxy_headers=proxy_headers) as session: - session.proxies = proxies - - response = session.get(test_url) - print(f"Status: {response.status_code}") - print(f"Body: {response.text}") - if response_header: - print(f"{response_header}: {response.headers.get(response_header)}") diff --git a/python/requests-proxy-headers.py b/python/requests-proxy-headers.py deleted file mode 100755 index 35e56a9..0000000 --- a/python/requests-proxy-headers.py +++ /dev/null @@ -1,39 +0,0 @@ -#!/usr/bin/env python3 -""" -Requests with proxy headers example. - -Configuration via environment variables: - PROXY_URL - Proxy URL (required), e.g., http://user:pass@proxy:8080 - TEST_URL - URL to request (default: https://api.ipify.org?format=json) - PROXY_HEADER - Header name to send to proxy (optional) - PROXY_VALUE - Header value to send to proxy (optional) - RESPONSE_HEADER - Header name to read from response (optional) - -See: https://github.com/proxymesh/python-proxy-headers -""" -import os -import sys -from python_proxy_headers import requests_adapter - -# Get configuration from environment -proxy_url = os.environ.get('PROXY_URL') or os.environ.get('HTTPS_PROXY') -if not proxy_url: - print("Error: Set PROXY_URL environment variable", file=sys.stderr) - sys.exit(1) - -test_url = os.environ.get('TEST_URL', 'https://api.ipify.org?format=json') -proxy_header = os.environ.get('PROXY_HEADER') -proxy_value = os.environ.get('PROXY_VALUE') -response_header = os.environ.get('RESPONSE_HEADER') - -proxies = {'http': proxy_url, 'https': proxy_url} -proxy_headers = {proxy_header: proxy_value} if proxy_header and proxy_value else None - -# Make request -response = requests_adapter.get(test_url, proxies=proxies, proxy_headers=proxy_headers) - -# Output -print(f"Status: {response.status_code}") -print(f"Body: {response.text}") -if response_header: - print(f"{response_header}: {response.headers.get(response_header)}") diff --git a/python/requests-proxy.py b/python/requests-proxy.py index fb1301f..1f4e918 100755 --- a/python/requests-proxy.py +++ b/python/requests-proxy.py @@ -3,8 +3,11 @@ Basic requests with proxy example. Configuration via environment variables: - PROXY_URL - Proxy URL (required), e.g., http://user:pass@proxy:8080 - TEST_URL - URL to request (default: https://api.ipify.org?format=json) + PROXY_URL - Proxy URL (required), e.g., http://user:pass@proxy:8080 + TEST_URL - URL to request (default: https://api.ipify.org?format=json) + RESPONSE_HEADER - Optional header name to print from the response + +Documentation: https://docs.python-requests.org/en/latest/user/advanced/#proxies """ import os import sys @@ -17,12 +20,13 @@ sys.exit(1) test_url = os.environ.get('TEST_URL', 'https://api.ipify.org?format=json') +response_header = os.environ.get('RESPONSE_HEADER') proxies = {'http': proxy_url, 'https': proxy_url} -# Make request -response = requests.get(test_url, proxies=proxies) +response = requests.get(test_url, proxies=proxies, timeout=30) -# Output print(f"Status: {response.status_code}") print(f"Body: {response.text}") +if response_header: + print(f"{response_header}: {response.headers.get(response_header)}") diff --git a/python/requests-session-proxy.py b/python/requests-session-proxy.py new file mode 100644 index 0000000..4d7a9ac --- /dev/null +++ b/python/requests-session-proxy.py @@ -0,0 +1,37 @@ +#!/usr/bin/env python3 +""" +Requests Session with an HTTP proxy. + +Configuration via environment variables: + PROXY_URL - Proxy URL (required), e.g., http://user:pass@proxy:8080 + TEST_URL - URL to request (default: https://api.ipify.org?format=json) + RESPONSE_HEADER - Optional header name to print from the response + +Uses a :class:`requests.Session` for connection pooling. Same proxy options as +``requests.get(..., proxies=...)``. + +Documentation: https://docs.python-requests.org/en/latest/user/advanced/#proxies +""" +import os +import sys + +import requests + +proxy_url = os.environ.get('PROXY_URL') or os.environ.get('HTTPS_PROXY') +if not proxy_url: + print('Error: Set PROXY_URL environment variable', file=sys.stderr) + sys.exit(1) + +test_url = os.environ.get('TEST_URL', 'https://api.ipify.org?format=json') +response_header = os.environ.get('RESPONSE_HEADER') + +proxies = {'http': proxy_url, 'https': proxy_url} + +with requests.Session() as session: + session.proxies.update(proxies) + response = session.get(test_url, timeout=30) + +print(f'Status: {response.status_code}') +print(f'Body: {response.text}') +if response_header: + print(f'{response_header}: {response.headers.get(response_header)}') diff --git a/python/requirements.txt b/python/requirements.txt new file mode 100644 index 0000000..b744ad5 --- /dev/null +++ b/python/requirements.txt @@ -0,0 +1,13 @@ +# Third-party libraries used by the Python proxy examples (see run_tests.py). +# Install: pip install -r python/requirements.txt +requests>=2.28.0 +urllib3>=2.0.0 +aiohttp>=3.9.0 +httpx>=0.27.0 +pycurl>=7.45.0; platform_system != "Windows" +cloudscraper>=1.2.71 +autoscraper>=1.1.0 +# autoscraper 1.1.x fails build() with beautifulsoup4 4.13+ (empty learned list); keep 4.12.x +beautifulsoup4>=4.12.0,<4.13 +lxml>=5.0.0 +scrapy>=2.11.0 diff --git a/python/run_tests.py b/python/run_tests.py index 8479b87..ebb2651 100755 --- a/python/run_tests.py +++ b/python/run_tests.py @@ -1,12 +1,11 @@ #!/usr/bin/env python3 """ -Run all Python proxy header examples as tests. +Run all Python proxy examples as tests. Configuration via environment variables: PROXY_URL - Proxy URL (required), e.g., http://user:pass@proxy:8080 TEST_URL - URL to request (default: https://api.ipify.org?format=json) - PROXY_HEADER - Header name to send to proxy (optional) - PROXY_VALUE - Header value to send to proxy (optional) + RESPONSE_HEADER - Optional header name for examples that print response headers Usage: python run_tests.py # Run all examples @@ -17,103 +16,145 @@ import sys import subprocess from pathlib import Path +from typing import List # Examples to test (filename without .py) EXAMPLES = [ - 'requests-proxy-headers', - 'requests-proxy-headers-session', - 'urllib3-proxy-headers', - 'aiohttp-proxy-headers', - 'httpx-proxy-headers', - 'httpx-async-proxy-headers', - 'pycurl-proxy-headers', - 'pycurl-proxy-headers-lowlevel', - 'cloudscraper-proxy-headers', - 'autoscraper-proxy-headers', + 'requests-proxy', + 'requests-session-proxy', + 'urllib3-proxy', + 'aiohttp-proxy', + 'httpx-proxy', + 'httpx-async-proxy', + 'pycurl-proxy', + 'cloudscraper-proxy', + 'autoscraper-proxy', + 'scrapy-proxy', ] +def _scrapy_import_ok() -> bool: + """Check Scrapy in a subprocess so a broken cryptography/cffi stack cannot abort this runner.""" + try: + r = subprocess.run( + [sys.executable, '-c', 'import scrapy'], + capture_output=True, + timeout=30, + ) + return r.returncode == 0 + except (subprocess.TimeoutExpired, OSError): + return False + + +def _available_examples() -> List[str]: + """Skip examples when optional native / heavy dependencies are missing or broken.""" + try: + import pycurl # noqa: F401 + + has_pycurl = True + except ImportError: + has_pycurl = False + + has_scrapy = _scrapy_import_ok() + + out: List[str] = [] + for e in EXAMPLES: + if 'pycurl' in e and not has_pycurl: + continue + if e == 'scrapy-proxy' and not has_scrapy: + continue + out.append(e) + return out + + def run_example(name: str) -> bool: """Run an example script and return True if successful.""" script_dir = Path(__file__).parent - script_path = script_dir / f"{name}.py" - + script_path = script_dir / f'{name}.py' + if not script_path.exists(): - print(f" Script not found: {script_path}") + print(f' Script not found: {script_path}') return False - + + if name == 'scrapy-proxy': + cmd = [sys.executable, '-m', 'scrapy', 'runspider', str(script_path)] + timeout = 90 + else: + cmd = [sys.executable, str(script_path)] + timeout = 30 + try: result = subprocess.run( - [sys.executable, str(script_path)], + cmd, + cwd=str(script_dir), capture_output=True, text=True, - timeout=30 + timeout=timeout, ) - + if result.returncode == 0: return True - else: - print(f" Exit code: {result.returncode}") - if result.stderr: - print(f" Error: {result.stderr.strip()}") - return False - + print(f' Exit code: {result.returncode}') + if result.stderr: + print(f' Error: {result.stderr.strip()}') + return False + except subprocess.TimeoutExpired: - print(" Timeout after 30s") + print(f' Timeout after {timeout}s') return False except Exception as e: - print(f" Exception: {e}") + print(f' Exception: {e}') return False def main(): args = sys.argv[1:] - - # Handle -l/--list + if '-l' in args or '--list' in args: - print("Available examples:") - for name in EXAMPLES: - print(f" {name}") + print('Available examples:') + for name in _available_examples(): + print(f' {name}') sys.exit(0) - - # Handle -h/--help + if '-h' in args or '--help' in args: print(__doc__) sys.exit(0) - - # Check for PROXY_URL + if not os.environ.get('PROXY_URL') and not os.environ.get('HTTPS_PROXY'): - print("Error: Set PROXY_URL environment variable", file=sys.stderr) - print("\nExample:", file=sys.stderr) + print('Error: Set PROXY_URL environment variable', file=sys.stderr) + print('\nExample:', file=sys.stderr) print(" export PROXY_URL='http://user:pass@proxy:8080'", file=sys.stderr) sys.exit(1) - - # Determine which examples to run + + available = _available_examples() if args: - examples = args + examples = [e for e in available if any(a in e for a in args)] + if not examples: + print('No matching examples.', file=sys.stderr) + sys.exit(1) else: - examples = EXAMPLES - - print(f"\n{'='*50}") - print("Running Python Proxy Header Examples") - print(f"{'='*50}\n") - + examples = available + + print(f"\n{'=' * 50}") + print('Running Python Proxy Examples') + print(f"{'=' * 50}\n") + passed = 0 failed = 0 - + for name in examples: - print(f"[TEST] {name}...", end=" ", flush=True) + print(f'[TEST] {name}... ', end='', flush=True) if run_example(name): - print("PASS") + print('PASS') passed += 1 else: - print("FAIL") + print('FAIL') failed += 1 - - print(f"\n{'='*50}") - print(f"Results: {passed} passed, {failed} failed") - print(f"{'='*50}\n") - + + print(f"\n{'=' * 50}") + print(f'Results: {passed} passed, {failed} failed') + print(f"{'=' * 50}\n") + sys.exit(0 if failed == 0 else 1) diff --git a/python/scrapy-proxy-headers.py b/python/scrapy-proxy-headers.py deleted file mode 100755 index 213b715..0000000 --- a/python/scrapy-proxy-headers.py +++ /dev/null @@ -1,12 +0,0 @@ -# See https://github.com/proxymesh/scrapy-proxy-headers - -# In your Scrapy `settings.py`, add the following: -DOWNLOAD_HANDLERS = { - "https": "scrapy_proxy_headers.HTTP11ProxyDownloadHandler" -} - -# add to your request procesing method -request.meta["proxy_headers"] = {"X-ProxyMesh-Country": "US"} - -# then when you get a response -response.headers["X-ProxyMesh-IP"] \ No newline at end of file diff --git a/python/scrapy-proxy.py b/python/scrapy-proxy.py new file mode 100644 index 0000000..e06a3f1 --- /dev/null +++ b/python/scrapy-proxy.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python3 +""" +Scrapy with an HTTP proxy (runspider). + +Configuration via environment variables: + PROXY_URL - Proxy URL (required), e.g., http://user:pass@proxy:8080 + TEST_URL - URL to request (default: https://api.ipify.org?format=json) + RESPONSE_HEADER - Optional header name to print from the response + +Uses ``meta['proxy']`` on the request. For custom headers on the proxy tunnel +(ProxyMesh-style), see the scrapy-proxy-headers package linked from the README. + +Documentation: https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#module-scrapy.downloadermiddlewares.httpproxy +""" +import os +import sys + +import scrapy +from scrapy.crawler import CrawlerProcess + + +class ProxiedIpifySpider(scrapy.Spider): + name = 'proxied_ipify' + custom_settings = { + 'LOG_LEVEL': 'WARNING', + 'ROBOTSTXT_OBEY': False, + } + + def start_requests(self): + proxy_url = os.environ['PROXY_URL'] + test_url = os.environ.get('TEST_URL', 'https://api.ipify.org?format=json') + yield scrapy.Request(test_url, meta={'proxy': proxy_url}, dont_filter=True) + + def parse(self, response): + rh = os.environ.get('RESPONSE_HEADER') + print(f'Status: {response.status}') + print(f'Body: {response.text}') + if rh: + key = rh.encode('utf-8') + raw = response.headers.get(key) + val = raw.decode('utf-8') if raw else None + print(f'{rh}: {val}') + + +def main() -> None: + if not (os.environ.get('PROXY_URL') or os.environ.get('HTTPS_PROXY')): + print('Error: Set PROXY_URL environment variable', file=sys.stderr) + sys.exit(1) + if not os.environ.get('PROXY_URL'): + os.environ['PROXY_URL'] = os.environ['HTTPS_PROXY'] + + process = CrawlerProcess() + process.crawl(ProxiedIpifySpider) + process.start() + + +if __name__ == '__main__': + main() diff --git a/python/urllib3-proxy-headers.py b/python/urllib3-proxy-headers.py deleted file mode 100755 index 654ac40..0000000 --- a/python/urllib3-proxy-headers.py +++ /dev/null @@ -1,39 +0,0 @@ -#!/usr/bin/env python3 -""" -urllib3 with proxy headers example. - -Configuration via environment variables: - PROXY_URL - Proxy URL (required), e.g., http://user:pass@proxy:8080 - TEST_URL - URL to request (default: https://api.ipify.org?format=json) - PROXY_HEADER - Header name to send to proxy (optional) - PROXY_VALUE - Header value to send to proxy (optional) - RESPONSE_HEADER - Header name to read from response (optional) - -See: https://github.com/proxymesh/python-proxy-headers -""" -import os -import sys -from python_proxy_headers import urllib3_proxy_manager - -# Get configuration from environment -proxy_url = os.environ.get('PROXY_URL') or os.environ.get('HTTPS_PROXY') -if not proxy_url: - print("Error: Set PROXY_URL environment variable", file=sys.stderr) - sys.exit(1) - -test_url = os.environ.get('TEST_URL', 'https://api.ipify.org?format=json') -proxy_header = os.environ.get('PROXY_HEADER') -proxy_value = os.environ.get('PROXY_VALUE') -response_header = os.environ.get('RESPONSE_HEADER') - -proxy_headers = {proxy_header: proxy_value} if proxy_header and proxy_value else None - -# Create proxy manager and make request -proxy = urllib3_proxy_manager.ProxyHeaderManager(proxy_url, proxy_headers=proxy_headers) -response = proxy.request('GET', test_url) - -# Output -print(f"Status: {response.status}") -print(f"Body: {response.data.decode('utf-8')}") -if response_header: - print(f"{response_header}: {response.headers.get(response_header)}") diff --git a/python/urllib3-proxy.py b/python/urllib3-proxy.py new file mode 100644 index 0000000..b6a4efb --- /dev/null +++ b/python/urllib3-proxy.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python3 +""" +urllib3 with an HTTP proxy. + +Configuration via environment variables: + PROXY_URL - Proxy URL (required), e.g., http://user:pass@proxy:8080 + TEST_URL - URL to request (default: https://api.ipify.org?format=json) + RESPONSE_HEADER - Optional header name to print from the response + +urllib3's :class:`urllib3.ProxyManager` routes traffic through the proxy. It does +not support sending custom headers on the HTTPS CONNECT request or reading proxy +CONNECT response headers (see python-proxy-headers for that). + +Default urllib3 retries repeat HTTPS CONNECT through the proxy; some providers +return errors such as ``407 too many failures`` when that happens, so retries +are disabled here. + +Documentation: https://urllib3.readthedocs.io/en/stable/reference/urllib3.poolmanager.html +""" +import os +import sys +from urllib.parse import urlparse, urlunparse + +import urllib3 + +proxy_url = os.environ.get('PROXY_URL') or os.environ.get('HTTPS_PROXY') +if not proxy_url: + print('Error: Set PROXY_URL environment variable', file=sys.stderr) + sys.exit(1) + +test_url = os.environ.get('TEST_URL', 'https://api.ipify.org?format=json') +response_header = os.environ.get('RESPONSE_HEADER') + +parsed = urlparse(proxy_url) +pool_kw = {'retries': False} +if parsed.username is not None: + # Some stacks omit CONNECT credentials unless they are sent as Proxy-Authorization. + user = parsed.username + password = parsed.password or '' + pool_kw['proxy_headers'] = urllib3.util.make_headers( + proxy_basic_auth=f'{user}:{password}' + ) + host = parsed.hostname or '' + if parsed.port: + host = f'{host}:{parsed.port}' + proxy_for_pool = urlunparse((parsed.scheme, host, '', '', '', '')) +else: + proxy_for_pool = proxy_url + +http = urllib3.ProxyManager(proxy_for_pool, **pool_kw) +response = http.request('GET', test_url, timeout=urllib3.Timeout(30)) + +body = response.data.decode('utf-8', errors='replace') +print(f'Status: {response.status}') +print(f'Body: {body}') +if response_header: + print(f'{response_header}: {response.headers.get(response_header)}')