from __future__ import annotations
import base64
from pathlib import Path
from typing import Any, BinaryIO
[docs]
class BaseTask:
"""Base class for all Anticaptcha task types.
Each subclass represents a specific captcha type (ReCAPTCHA, hCaptcha, etc.)
and serializes its parameters into the format expected by the Anticaptcha API.
You do not use this class directly — instead, instantiate one of the concrete
task classes and pass it to :meth:`AnticaptchaClient.create_task`.
"""
type: str | None = None
[docs]
def serialize(self, **result: Any) -> dict[str, Any]:
"""Serialize the task into a dictionary for the Anticaptcha API request.
:returns: Dictionary with task parameters including the ``type`` field.
"""
result["type"] = self.type
return result
def __repr__(self) -> str:
attrs = {k: v for k, v in self.__dict__.items() if not k.startswith("_") and v is not None}
fields = " ".join(f"{k}={v!r}" for k, v in attrs.items())
return f"<{self.__class__.__name__} {fields}>"
[docs]
class UserAgentMixin(BaseTask):
"""Mixin that adds a ``user_agent`` parameter to a task.
Required by proxy-enabled task variants so the captcha solver can
emulate the same browser.
:param user_agent: Browser User-Agent string to use when solving.
"""
def __init__(self, *args: Any, **kwargs: Any) -> None:
self.userAgent: str = kwargs.pop("user_agent")
super().__init__(*args, **kwargs)
[docs]
def serialize(self, **result: Any) -> dict[str, Any]:
data = super().serialize(**result)
data["userAgent"] = self.userAgent
return data
[docs]
class CookieMixin(BaseTask):
"""Mixin that adds an optional ``cookies`` parameter to a task.
Pass cookies when the target page requires them for the captcha to render
correctly.
:param cookies: Cookie string to include with the request (optional).
"""
def __init__(self, *args: Any, **kwargs: Any) -> None:
self.cookies: str = kwargs.pop("cookies", "")
super().__init__(*args, **kwargs)
[docs]
def serialize(self, **result: Any) -> dict[str, Any]:
data = super().serialize(**result)
if self.cookies:
data["cookies"] = self.cookies
return data
[docs]
class ProxyMixin(BaseTask):
"""Mixin that adds proxy parameters to a task.
Use this (via proxy-enabled task variants like :class:`NoCaptchaTask`) when
the captcha must be solved through a specific proxy. You can build the
keyword arguments conveniently with :meth:`Proxy.to_kwargs`.
:param proxy_type: Proxy protocol — ``"http"``, ``"socks4"``, or ``"socks5"``.
:param proxy_address: Proxy server hostname or IP address.
:param proxy_port: Proxy server port.
:param proxy_login: Username for proxy authentication (empty string if none).
:param proxy_password: Password for proxy authentication (empty string if none).
"""
def __init__(self, *args: Any, **kwargs: Any) -> None:
self.proxyType: str = kwargs.pop("proxy_type")
self.proxyAddress: str = kwargs.pop("proxy_address")
self.proxyPort: int = kwargs.pop("proxy_port")
self.proxyLogin: str = kwargs.pop("proxy_login")
self.proxyPassword: str = kwargs.pop("proxy_password")
super().__init__(*args, **kwargs)
[docs]
def serialize(self, **result: Any) -> dict[str, Any]:
data = super().serialize(**result)
data["proxyType"] = self.proxyType
data["proxyAddress"] = self.proxyAddress
data["proxyPort"] = self.proxyPort
if self.proxyLogin:
data["proxyLogin"] = self.proxyLogin
data["proxyPassword"] = self.proxyPassword
return data
[docs]
class NoCaptchaTaskProxylessTask(BaseTask):
"""Solve a Google ReCAPTCHA v2 challenge without a proxy.
This is the most common task type. The solver will access the target page
directly from Anticaptcha's servers.
After the job completes, retrieve the token with
:meth:`Job.get_solution_response`.
:param website_url: Full URL of the page where the captcha appears.
:param website_key: The ``data-sitekey`` value from the ReCAPTCHA element.
:param website_s_token: Optional ``data-s`` token for Google Search captchas.
:param is_invisible: Set to ``True`` for invisible ReCAPTCHA. The system
auto-detects this, so the parameter is optional.
:param recaptcha_data_s_value: Value of the ``data-s`` parameter if present.
Example::
task = NoCaptchaTaskProxylessTask(
website_url="https://example.com",
website_key="6Le-wvkSAAAAAPBMRTvw0Q4Muexq9bi0DJwx_mJ-",
)
"""
type = "NoCaptchaTaskProxyless"
websiteURL = None
websiteKey = None
websiteSToken = None
recaptchaDataSValue = None
def __init__(
self,
website_url: str,
website_key: str,
website_s_token: str | None = None,
is_invisible: bool | None = None,
recaptcha_data_s_value: str | None = None,
*args: Any,
**kwargs: Any,
) -> None:
self.websiteURL = website_url
self.websiteKey = website_key
self.websiteSToken = website_s_token
self.recaptchaDataSValue = recaptcha_data_s_value
self.isInvisible = is_invisible
super().__init__(*args, **kwargs)
[docs]
def serialize(self, **result: Any) -> dict[str, Any]:
data = super().serialize(**result)
data["websiteURL"] = self.websiteURL
data["websiteKey"] = self.websiteKey
if self.websiteSToken is not None:
data["websiteSToken"] = self.websiteSToken
if self.isInvisible is not None:
data["isInvisible"] = self.isInvisible
if self.recaptchaDataSValue is not None:
data["recaptchaDataSValue"] = self.recaptchaDataSValue
return data
[docs]
class RecaptchaV2TaskProxyless(NoCaptchaTaskProxylessTask):
"""Solve a Google ReCAPTCHA v2 challenge without a proxy (newer API type name).
Identical to :class:`NoCaptchaTaskProxylessTask` but uses the updated
``RecaptchaV2TaskProxyless`` type identifier.
"""
type = "RecaptchaV2TaskProxyless"
[docs]
class NoCaptchaTask(ProxyMixin, UserAgentMixin, CookieMixin, NoCaptchaTaskProxylessTask):
"""Solve a Google ReCAPTCHA v2 challenge through a proxy.
Same as :class:`NoCaptchaTaskProxylessTask` but additionally requires
proxy and user-agent parameters. Use :class:`Proxy` to build the proxy
keyword arguments conveniently::
proxy = Proxy.parse_url("socks5://user:pass@host:port")
task = NoCaptchaTask(url, site_key, user_agent=UA, **proxy.to_kwargs())
:param user_agent: Browser User-Agent string.
:param cookies: Optional cookie string (default: ``""``).
:param proxy_type: Proxy protocol (``"http"``, ``"socks4"``, ``"socks5"``).
:param proxy_address: Proxy server hostname or IP.
:param proxy_port: Proxy server port.
:param proxy_login: Proxy username (empty string if none).
:param proxy_password: Proxy password (empty string if none).
"""
type = "NoCaptchaTask"
[docs]
class RecaptchaV2Task(NoCaptchaTask):
"""Solve a Google ReCAPTCHA v2 challenge through a proxy (newer API type name).
Identical to :class:`NoCaptchaTask` but uses the updated
``RecaptchaV2Task`` type identifier.
"""
type = "RecaptchaV2Task"
[docs]
class FunCaptchaProxylessTask(BaseTask):
"""Solve an Arkose Labs FunCaptcha challenge without a proxy.
After the job completes, retrieve the token with
:meth:`Job.get_token_response`.
:param website_url: Full URL of the page where the captcha appears.
:param website_key: The FunCaptcha public key (e.g.
``"DE0B0BB7-1EE4-4D70-1853-31B835D4506B"``).
:param subdomain: Custom FunCaptcha API subdomain, if the site uses one
(e.g. ``"mysite-api.arkoselabs.com"``).
:param data: Additional data blob required by some FunCaptcha implementations.
"""
type = "FunCaptchaTaskProxyless"
websiteURL = None
websiteKey = None
funcaptchaApiJSSubdomain = None
data = None
def __init__(
self,
website_url: str,
website_key: str,
subdomain: str | None = None,
data: str | None = None,
*args: Any,
**kwargs: Any,
) -> None:
self.websiteURL = website_url
self.websiteKey = website_key
self.funcaptchaApiJSSubdomain = subdomain
self.data = data
super().__init__(*args, **kwargs)
[docs]
def serialize(self, **result: Any) -> dict[str, Any]:
data = super().serialize(**result)
data["websiteURL"] = self.websiteURL
data["websitePublicKey"] = self.websiteKey
if self.funcaptchaApiJSSubdomain:
data["funcaptchaApiJSSubdomain"] = self.funcaptchaApiJSSubdomain
if self.data:
data["data"] = self.data
return data
[docs]
class FunCaptchaTask(ProxyMixin, UserAgentMixin, CookieMixin, FunCaptchaProxylessTask):
"""Solve an Arkose Labs FunCaptcha challenge through a proxy.
Same as :class:`FunCaptchaProxylessTask` but additionally requires
proxy, user-agent, and optional cookie parameters.
"""
type = "FunCaptchaTask"
[docs]
class ImageToTextTask(BaseTask):
"""Solve a classic image-based captcha by extracting text from an image.
The image is automatically base64-encoded. You can pass a file path,
raw ``bytes``, or an open binary file object.
After the job completes, retrieve the text with
:meth:`Job.get_captcha_text`.
:param image: Captcha image as a file path (``str`` or ``Path``), raw
``bytes``, or a binary file-like object.
:param phrase: ``True`` if the answer contains multiple words.
:param case: ``True`` if the answer is case-sensitive.
:param numeric: ``0`` — no requirements, ``1`` — numbers only,
``2`` — letters only.
:param math: ``True`` if the captcha is a math expression to solve.
:param min_length: Minimum number of characters in the answer.
:param max_length: Maximum number of characters in the answer.
:param comment: Hint text shown to the worker (e.g. ``"Enter red letters"``).
:param website_url: URL of the page where the captcha was found (optional,
used for context).
Example::
task = ImageToTextTask("captcha.png")
task = ImageToTextTask(open("captcha.png", "rb").read())
"""
type = "ImageToTextTask"
_body = None
phrase = None
case = None
numeric = None
math = None
minLength = None
maxLength = None
comment = None
websiteUrl = None
def __init__(
self,
image: str | Path | bytes | BinaryIO,
phrase: bool | None = None,
case: bool | None = None,
numeric: int | None = None,
math: bool | None = None,
min_length: int | None = None,
max_length: int | None = None,
comment: str | None = None,
website_url: str | None = None,
*args: Any,
**kwargs: Any,
) -> None:
if isinstance(image, (str, Path)):
with open(image, "rb") as f:
self._body = base64.b64encode(f.read()).decode("utf-8")
elif isinstance(image, bytes):
self._body = base64.b64encode(image).decode("utf-8")
else:
self._body = base64.b64encode(image.read()).decode("utf-8")
self.phrase = phrase
self.case = case
self.numeric = numeric
self.math = math
self.minLength = min_length
self.maxLength = max_length
self.comment = comment
self.websiteUrl = website_url
super().__init__(*args, **kwargs)
[docs]
def serialize(self, **result: Any) -> dict[str, Any]:
data = super().serialize(**result)
data["body"] = self._body
if self.phrase is not None:
data["phrase"] = self.phrase
if self.case is not None:
data["case"] = self.case
if self.numeric is not None:
data["numeric"] = self.numeric
if self.math is not None:
data["math"] = self.math
if self.minLength is not None:
data["minLength"] = self.minLength
if self.maxLength is not None:
data["maxLength"] = self.maxLength
if self.comment is not None:
data["comment"] = self.comment
if self.websiteUrl is not None:
data["websiteUrl"] = self.websiteUrl
return data
[docs]
class RecaptchaV3TaskProxyless(BaseTask):
"""Solve a Google ReCAPTCHA v3 challenge (score-based, proxyless only).
ReCAPTCHA v3 returns a score (0.0–1.0) rather than a visual challenge.
You must specify the minimum acceptable score and the page action.
After the job completes, retrieve the token with
:meth:`Job.get_solution_response`.
:param website_url: Full URL of the page where the captcha appears.
:param website_key: The ``data-sitekey`` value from the ReCAPTCHA element.
:param min_score: Minimum score threshold (e.g. ``0.3``, ``0.7``, ``0.9``).
:param page_action: The action value from ``grecaptcha.execute(key, {action: ...})``.
:param is_enterprise: Set to ``True`` if the site uses the Enterprise version
of ReCAPTCHA v3.
"""
type = "RecaptchaV3TaskProxyless"
websiteURL = None
websiteKey = None
minScore = None
pageAction = None
isEnterprise = False
def __init__(
self,
website_url: str,
website_key: str,
min_score: float,
page_action: str,
is_enterprise: bool = False,
*args: Any,
**kwargs: Any,
) -> None:
self.websiteURL = website_url
self.websiteKey = website_key
self.minScore = min_score
self.pageAction = page_action
self.isEnterprise = is_enterprise
super().__init__(*args, **kwargs)
[docs]
def serialize(self, **result: Any) -> dict[str, Any]:
data = super().serialize(**result)
data["websiteURL"] = self.websiteURL
data["websiteKey"] = self.websiteKey
data["minScore"] = self.minScore
data["pageAction"] = self.pageAction
data["isEnterprise"] = self.isEnterprise
return data
[docs]
class HCaptchaTaskProxyless(BaseTask):
"""Solve an hCaptcha challenge without a proxy.
After the job completes, retrieve the token with
:meth:`Job.get_solution_response`.
:param website_url: Full URL of the page where the captcha appears.
:param website_key: The ``data-sitekey`` value from the hCaptcha element.
"""
type = "HCaptchaTaskProxyless"
websiteURL = None
websiteKey = None
def __init__(self, website_url: str, website_key: str, *args: Any, **kwargs: Any) -> None:
self.websiteURL = website_url
self.websiteKey = website_key
super().__init__(*args, **kwargs)
[docs]
def serialize(self, **result: Any) -> dict[str, Any]:
data = super().serialize(**result)
data["websiteURL"] = self.websiteURL
data["websiteKey"] = self.websiteKey
return data
[docs]
class HCaptchaTask(ProxyMixin, UserAgentMixin, CookieMixin, HCaptchaTaskProxyless):
"""Solve an hCaptcha challenge through a proxy.
Same as :class:`HCaptchaTaskProxyless` but additionally requires
proxy, user-agent, and optional cookie parameters.
"""
type = "HCaptchaTask"
[docs]
class RecaptchaV2EnterpriseTaskProxyless(BaseTask):
"""Solve a Google ReCAPTCHA v2 Enterprise challenge without a proxy.
Use this for sites that use the Enterprise version of ReCAPTCHA v2.
After the job completes, retrieve the token with
:meth:`Job.get_solution_response`.
:param website_url: Full URL of the page where the captcha appears.
:param website_key: The ``data-sitekey`` value from the ReCAPTCHA element.
:param enterprise_payload: Optional dictionary with Enterprise-specific
parameters (e.g. ``{"s": "...", "action": "..."}``) or ``None``.
:param api_domain: Custom API domain if the site uses a non-standard
ReCAPTCHA endpoint (e.g. ``"recaptcha.net"``) or ``None``.
"""
type = "RecaptchaV2EnterpriseTaskProxyless"
websiteURL = None
websiteKey = None
enterprisePayload = None
apiDomain = None
def __init__(
self,
website_url: str,
website_key: str,
enterprise_payload: dict[str, Any] | None,
api_domain: str | None,
*args: Any,
**kwargs: Any,
) -> None:
self.websiteURL = website_url
self.websiteKey = website_key
self.enterprisePayload = enterprise_payload
self.apiDomain = api_domain
super().__init__(*args, **kwargs)
[docs]
def serialize(self, **result: Any) -> dict[str, Any]:
data = super().serialize(**result)
data["websiteURL"] = self.websiteURL
data["websiteKey"] = self.websiteKey
if self.enterprisePayload:
data["enterprisePayload"] = self.enterprisePayload
if self.apiDomain:
data["apiDomain"] = self.apiDomain
return data
[docs]
class RecaptchaV2EnterpriseTask(ProxyMixin, UserAgentMixin, CookieMixin, RecaptchaV2EnterpriseTaskProxyless):
"""Solve a Google ReCAPTCHA v2 Enterprise challenge through a proxy.
Same as :class:`RecaptchaV2EnterpriseTaskProxyless` but additionally requires
proxy, user-agent, and optional cookie parameters.
"""
type = "RecaptchaV2EnterpriseTask"
[docs]
class GeeTestTaskProxyless(BaseTask):
"""Solve a GeeTest (slide / click) captcha without a proxy.
After the job completes, use :meth:`Job.get_solution` to get the full
solution dictionary containing ``challenge``, ``validate``, and ``seccode``.
:param website_url: Full URL of the page where the captcha appears.
:param gt: The ``gt`` parameter value from the GeeTest script.
:param challenge: The ``challenge`` token obtained from the GeeTest API.
:param subdomain: Custom GeeTest API subdomain, if the site uses one.
:param lib: Custom ``getLib`` parameter value, if required.
"""
type = "GeeTestTaskProxyless"
websiteURL = None
gt = None
challenge = None
geetestApiServerSubdomain = None
geetestGetLib = None
def __init__(
self,
website_url: str,
gt: str,
challenge: str,
subdomain: str | None = None,
lib: str | None = None,
*args: Any,
**kwargs: Any,
) -> None:
self.websiteURL = website_url
self.gt = gt
self.challenge = challenge
self.geetestApiServerSubdomain = subdomain
self.geetestGetLib = lib
super().__init__(*args, **kwargs)
[docs]
def serialize(self, **result: Any) -> dict[str, Any]:
data = super().serialize(**result)
data["websiteURL"] = self.websiteURL
data["gt"] = self.gt
data["challenge"] = self.challenge
if self.geetestApiServerSubdomain:
data["geetestApiServerSubdomain"] = self.geetestApiServerSubdomain
if self.geetestGetLib:
data["geetestGetLib"] = self.geetestGetLib
return data
[docs]
class GeeTestTask(ProxyMixin, UserAgentMixin, GeeTestTaskProxyless):
"""Solve a GeeTest captcha through a proxy.
Same as :class:`GeeTestTaskProxyless` but additionally requires
proxy and user-agent parameters.
"""
type = "GeeTestTask"
[docs]
class AntiGateTaskProxyless(BaseTask):
"""Solve a custom AntiGate task using a predefined template.
AntiGate tasks use templates to automate complex browser-based actions.
Browse available templates at https://anti-captcha.com/antigate.
After the job completes, use :meth:`Job.get_solution` to get the full
solution dictionary.
:param website_url: Full URL of the page to process.
:param template_name: Name of the AntiGate template
(e.g. ``"Sign up on MailChimp"``).
:param variables: Dictionary of template variables (keys and values
depend on the template).
"""
type = "AntiGateTask"
websiteURL = None
templateName = None
variables = None
def __init__(
self,
website_url: str,
template_name: str,
variables: dict[str, Any],
*args: Any,
**kwargs: Any,
) -> None:
self.websiteURL = website_url
self.templateName = template_name
self.variables = variables
super().__init__(*args, **kwargs)
[docs]
def serialize(self, **result: Any) -> dict[str, Any]:
data = super().serialize(**result)
data["websiteURL"] = self.websiteURL
data["templateName"] = self.templateName
data["variables"] = self.variables
return data
[docs]
class AntiGateTask(ProxyMixin, AntiGateTaskProxyless):
"""Solve a custom AntiGate task through a proxy.
Same as :class:`AntiGateTaskProxyless` but additionally requires
proxy parameters.
"""