Source code for python_anticaptcha.tasks

from __future__ import annotations

import base64
from pathlib import Path
from typing import Any, BinaryIO


[docs] class BaseTask: """Base class for all Anticaptcha task types. Each subclass represents a specific captcha type (ReCAPTCHA, hCaptcha, etc.) and serializes its parameters into the format expected by the Anticaptcha API. You do not use this class directly — instead, instantiate one of the concrete task classes and pass it to :meth:`AnticaptchaClient.create_task`. """ type: str | None = None
[docs] def serialize(self, **result: Any) -> dict[str, Any]: """Serialize the task into a dictionary for the Anticaptcha API request. :returns: Dictionary with task parameters including the ``type`` field. """ result["type"] = self.type return result
def __repr__(self) -> str: attrs = {k: v for k, v in self.__dict__.items() if not k.startswith("_") and v is not None} fields = " ".join(f"{k}={v!r}" for k, v in attrs.items()) return f"<{self.__class__.__name__} {fields}>"
[docs] class UserAgentMixin(BaseTask): """Mixin that adds a ``user_agent`` parameter to a task. Required by proxy-enabled task variants so the captcha solver can emulate the same browser. :param user_agent: Browser User-Agent string to use when solving. """ def __init__(self, *args: Any, **kwargs: Any) -> None: self.userAgent: str = kwargs.pop("user_agent") super().__init__(*args, **kwargs)
[docs] def serialize(self, **result: Any) -> dict[str, Any]: data = super().serialize(**result) data["userAgent"] = self.userAgent return data
[docs] class CookieMixin(BaseTask): """Mixin that adds an optional ``cookies`` parameter to a task. Pass cookies when the target page requires them for the captcha to render correctly. :param cookies: Cookie string to include with the request (optional). """ def __init__(self, *args: Any, **kwargs: Any) -> None: self.cookies: str = kwargs.pop("cookies", "") super().__init__(*args, **kwargs)
[docs] def serialize(self, **result: Any) -> dict[str, Any]: data = super().serialize(**result) if self.cookies: data["cookies"] = self.cookies return data
[docs] class ProxyMixin(BaseTask): """Mixin that adds proxy parameters to a task. Use this (via proxy-enabled task variants like :class:`NoCaptchaTask`) when the captcha must be solved through a specific proxy. You can build the keyword arguments conveniently with :meth:`Proxy.to_kwargs`. :param proxy_type: Proxy protocol — ``"http"``, ``"socks4"``, or ``"socks5"``. :param proxy_address: Proxy server hostname or IP address. :param proxy_port: Proxy server port. :param proxy_login: Username for proxy authentication (empty string if none). :param proxy_password: Password for proxy authentication (empty string if none). """ def __init__(self, *args: Any, **kwargs: Any) -> None: self.proxyType: str = kwargs.pop("proxy_type") self.proxyAddress: str = kwargs.pop("proxy_address") self.proxyPort: int = kwargs.pop("proxy_port") self.proxyLogin: str = kwargs.pop("proxy_login") self.proxyPassword: str = kwargs.pop("proxy_password") super().__init__(*args, **kwargs)
[docs] def serialize(self, **result: Any) -> dict[str, Any]: data = super().serialize(**result) data["proxyType"] = self.proxyType data["proxyAddress"] = self.proxyAddress data["proxyPort"] = self.proxyPort if self.proxyLogin: data["proxyLogin"] = self.proxyLogin data["proxyPassword"] = self.proxyPassword return data
[docs] class NoCaptchaTaskProxylessTask(BaseTask): """Solve a Google ReCAPTCHA v2 challenge without a proxy. This is the most common task type. The solver will access the target page directly from Anticaptcha's servers. After the job completes, retrieve the token with :meth:`Job.get_solution_response`. :param website_url: Full URL of the page where the captcha appears. :param website_key: The ``data-sitekey`` value from the ReCAPTCHA element. :param website_s_token: Optional ``data-s`` token for Google Search captchas. :param is_invisible: Set to ``True`` for invisible ReCAPTCHA. The system auto-detects this, so the parameter is optional. :param recaptcha_data_s_value: Value of the ``data-s`` parameter if present. Example:: task = NoCaptchaTaskProxylessTask( website_url="https://example.com", website_key="6Le-wvkSAAAAAPBMRTvw0Q4Muexq9bi0DJwx_mJ-", ) """ type = "NoCaptchaTaskProxyless" websiteURL = None websiteKey = None websiteSToken = None recaptchaDataSValue = None def __init__( self, website_url: str, website_key: str, website_s_token: str | None = None, is_invisible: bool | None = None, recaptcha_data_s_value: str | None = None, *args: Any, **kwargs: Any, ) -> None: self.websiteURL = website_url self.websiteKey = website_key self.websiteSToken = website_s_token self.recaptchaDataSValue = recaptcha_data_s_value self.isInvisible = is_invisible super().__init__(*args, **kwargs)
[docs] def serialize(self, **result: Any) -> dict[str, Any]: data = super().serialize(**result) data["websiteURL"] = self.websiteURL data["websiteKey"] = self.websiteKey if self.websiteSToken is not None: data["websiteSToken"] = self.websiteSToken if self.isInvisible is not None: data["isInvisible"] = self.isInvisible if self.recaptchaDataSValue is not None: data["recaptchaDataSValue"] = self.recaptchaDataSValue return data
[docs] class RecaptchaV2TaskProxyless(NoCaptchaTaskProxylessTask): """Solve a Google ReCAPTCHA v2 challenge without a proxy (newer API type name). Identical to :class:`NoCaptchaTaskProxylessTask` but uses the updated ``RecaptchaV2TaskProxyless`` type identifier. """ type = "RecaptchaV2TaskProxyless"
[docs] class NoCaptchaTask(ProxyMixin, UserAgentMixin, CookieMixin, NoCaptchaTaskProxylessTask): """Solve a Google ReCAPTCHA v2 challenge through a proxy. Same as :class:`NoCaptchaTaskProxylessTask` but additionally requires proxy and user-agent parameters. Use :class:`Proxy` to build the proxy keyword arguments conveniently:: proxy = Proxy.parse_url("socks5://user:pass@host:port") task = NoCaptchaTask(url, site_key, user_agent=UA, **proxy.to_kwargs()) :param user_agent: Browser User-Agent string. :param cookies: Optional cookie string (default: ``""``). :param proxy_type: Proxy protocol (``"http"``, ``"socks4"``, ``"socks5"``). :param proxy_address: Proxy server hostname or IP. :param proxy_port: Proxy server port. :param proxy_login: Proxy username (empty string if none). :param proxy_password: Proxy password (empty string if none). """ type = "NoCaptchaTask"
[docs] class RecaptchaV2Task(NoCaptchaTask): """Solve a Google ReCAPTCHA v2 challenge through a proxy (newer API type name). Identical to :class:`NoCaptchaTask` but uses the updated ``RecaptchaV2Task`` type identifier. """ type = "RecaptchaV2Task"
[docs] class FunCaptchaProxylessTask(BaseTask): """Solve an Arkose Labs FunCaptcha challenge without a proxy. After the job completes, retrieve the token with :meth:`Job.get_token_response`. :param website_url: Full URL of the page where the captcha appears. :param website_key: The FunCaptcha public key (e.g. ``"DE0B0BB7-1EE4-4D70-1853-31B835D4506B"``). :param subdomain: Custom FunCaptcha API subdomain, if the site uses one (e.g. ``"mysite-api.arkoselabs.com"``). :param data: Additional data blob required by some FunCaptcha implementations. """ type = "FunCaptchaTaskProxyless" websiteURL = None websiteKey = None funcaptchaApiJSSubdomain = None data = None def __init__( self, website_url: str, website_key: str, subdomain: str | None = None, data: str | None = None, *args: Any, **kwargs: Any, ) -> None: self.websiteURL = website_url self.websiteKey = website_key self.funcaptchaApiJSSubdomain = subdomain self.data = data super().__init__(*args, **kwargs)
[docs] def serialize(self, **result: Any) -> dict[str, Any]: data = super().serialize(**result) data["websiteURL"] = self.websiteURL data["websitePublicKey"] = self.websiteKey if self.funcaptchaApiJSSubdomain: data["funcaptchaApiJSSubdomain"] = self.funcaptchaApiJSSubdomain if self.data: data["data"] = self.data return data
[docs] class FunCaptchaTask(ProxyMixin, UserAgentMixin, CookieMixin, FunCaptchaProxylessTask): """Solve an Arkose Labs FunCaptcha challenge through a proxy. Same as :class:`FunCaptchaProxylessTask` but additionally requires proxy, user-agent, and optional cookie parameters. """ type = "FunCaptchaTask"
[docs] class ImageToTextTask(BaseTask): """Solve a classic image-based captcha by extracting text from an image. The image is automatically base64-encoded. You can pass a file path, raw ``bytes``, or an open binary file object. After the job completes, retrieve the text with :meth:`Job.get_captcha_text`. :param image: Captcha image as a file path (``str`` or ``Path``), raw ``bytes``, or a binary file-like object. :param phrase: ``True`` if the answer contains multiple words. :param case: ``True`` if the answer is case-sensitive. :param numeric: ``0`` — no requirements, ``1`` — numbers only, ``2`` — letters only. :param math: ``True`` if the captcha is a math expression to solve. :param min_length: Minimum number of characters in the answer. :param max_length: Maximum number of characters in the answer. :param comment: Hint text shown to the worker (e.g. ``"Enter red letters"``). :param website_url: URL of the page where the captcha was found (optional, used for context). Example:: task = ImageToTextTask("captcha.png") task = ImageToTextTask(open("captcha.png", "rb").read()) """ type = "ImageToTextTask" _body = None phrase = None case = None numeric = None math = None minLength = None maxLength = None comment = None websiteUrl = None def __init__( self, image: str | Path | bytes | BinaryIO, phrase: bool | None = None, case: bool | None = None, numeric: int | None = None, math: bool | None = None, min_length: int | None = None, max_length: int | None = None, comment: str | None = None, website_url: str | None = None, *args: Any, **kwargs: Any, ) -> None: if isinstance(image, (str, Path)): with open(image, "rb") as f: self._body = base64.b64encode(f.read()).decode("utf-8") elif isinstance(image, bytes): self._body = base64.b64encode(image).decode("utf-8") else: self._body = base64.b64encode(image.read()).decode("utf-8") self.phrase = phrase self.case = case self.numeric = numeric self.math = math self.minLength = min_length self.maxLength = max_length self.comment = comment self.websiteUrl = website_url super().__init__(*args, **kwargs)
[docs] def serialize(self, **result: Any) -> dict[str, Any]: data = super().serialize(**result) data["body"] = self._body if self.phrase is not None: data["phrase"] = self.phrase if self.case is not None: data["case"] = self.case if self.numeric is not None: data["numeric"] = self.numeric if self.math is not None: data["math"] = self.math if self.minLength is not None: data["minLength"] = self.minLength if self.maxLength is not None: data["maxLength"] = self.maxLength if self.comment is not None: data["comment"] = self.comment if self.websiteUrl is not None: data["websiteUrl"] = self.websiteUrl return data
[docs] class RecaptchaV3TaskProxyless(BaseTask): """Solve a Google ReCAPTCHA v3 challenge (score-based, proxyless only). ReCAPTCHA v3 returns a score (0.0–1.0) rather than a visual challenge. You must specify the minimum acceptable score and the page action. After the job completes, retrieve the token with :meth:`Job.get_solution_response`. :param website_url: Full URL of the page where the captcha appears. :param website_key: The ``data-sitekey`` value from the ReCAPTCHA element. :param min_score: Minimum score threshold (e.g. ``0.3``, ``0.7``, ``0.9``). :param page_action: The action value from ``grecaptcha.execute(key, {action: ...})``. :param is_enterprise: Set to ``True`` if the site uses the Enterprise version of ReCAPTCHA v3. """ type = "RecaptchaV3TaskProxyless" websiteURL = None websiteKey = None minScore = None pageAction = None isEnterprise = False def __init__( self, website_url: str, website_key: str, min_score: float, page_action: str, is_enterprise: bool = False, *args: Any, **kwargs: Any, ) -> None: self.websiteURL = website_url self.websiteKey = website_key self.minScore = min_score self.pageAction = page_action self.isEnterprise = is_enterprise super().__init__(*args, **kwargs)
[docs] def serialize(self, **result: Any) -> dict[str, Any]: data = super().serialize(**result) data["websiteURL"] = self.websiteURL data["websiteKey"] = self.websiteKey data["minScore"] = self.minScore data["pageAction"] = self.pageAction data["isEnterprise"] = self.isEnterprise return data
[docs] class HCaptchaTaskProxyless(BaseTask): """Solve an hCaptcha challenge without a proxy. After the job completes, retrieve the token with :meth:`Job.get_solution_response`. :param website_url: Full URL of the page where the captcha appears. :param website_key: The ``data-sitekey`` value from the hCaptcha element. """ type = "HCaptchaTaskProxyless" websiteURL = None websiteKey = None def __init__(self, website_url: str, website_key: str, *args: Any, **kwargs: Any) -> None: self.websiteURL = website_url self.websiteKey = website_key super().__init__(*args, **kwargs)
[docs] def serialize(self, **result: Any) -> dict[str, Any]: data = super().serialize(**result) data["websiteURL"] = self.websiteURL data["websiteKey"] = self.websiteKey return data
[docs] class HCaptchaTask(ProxyMixin, UserAgentMixin, CookieMixin, HCaptchaTaskProxyless): """Solve an hCaptcha challenge through a proxy. Same as :class:`HCaptchaTaskProxyless` but additionally requires proxy, user-agent, and optional cookie parameters. """ type = "HCaptchaTask"
[docs] class RecaptchaV2EnterpriseTaskProxyless(BaseTask): """Solve a Google ReCAPTCHA v2 Enterprise challenge without a proxy. Use this for sites that use the Enterprise version of ReCAPTCHA v2. After the job completes, retrieve the token with :meth:`Job.get_solution_response`. :param website_url: Full URL of the page where the captcha appears. :param website_key: The ``data-sitekey`` value from the ReCAPTCHA element. :param enterprise_payload: Optional dictionary with Enterprise-specific parameters (e.g. ``{"s": "...", "action": "..."}``) or ``None``. :param api_domain: Custom API domain if the site uses a non-standard ReCAPTCHA endpoint (e.g. ``"recaptcha.net"``) or ``None``. """ type = "RecaptchaV2EnterpriseTaskProxyless" websiteURL = None websiteKey = None enterprisePayload = None apiDomain = None def __init__( self, website_url: str, website_key: str, enterprise_payload: dict[str, Any] | None, api_domain: str | None, *args: Any, **kwargs: Any, ) -> None: self.websiteURL = website_url self.websiteKey = website_key self.enterprisePayload = enterprise_payload self.apiDomain = api_domain super().__init__(*args, **kwargs)
[docs] def serialize(self, **result: Any) -> dict[str, Any]: data = super().serialize(**result) data["websiteURL"] = self.websiteURL data["websiteKey"] = self.websiteKey if self.enterprisePayload: data["enterprisePayload"] = self.enterprisePayload if self.apiDomain: data["apiDomain"] = self.apiDomain return data
[docs] class RecaptchaV2EnterpriseTask(ProxyMixin, UserAgentMixin, CookieMixin, RecaptchaV2EnterpriseTaskProxyless): """Solve a Google ReCAPTCHA v2 Enterprise challenge through a proxy. Same as :class:`RecaptchaV2EnterpriseTaskProxyless` but additionally requires proxy, user-agent, and optional cookie parameters. """ type = "RecaptchaV2EnterpriseTask"
[docs] class GeeTestTaskProxyless(BaseTask): """Solve a GeeTest (slide / click) captcha without a proxy. After the job completes, use :meth:`Job.get_solution` to get the full solution dictionary containing ``challenge``, ``validate``, and ``seccode``. :param website_url: Full URL of the page where the captcha appears. :param gt: The ``gt`` parameter value from the GeeTest script. :param challenge: The ``challenge`` token obtained from the GeeTest API. :param subdomain: Custom GeeTest API subdomain, if the site uses one. :param lib: Custom ``getLib`` parameter value, if required. """ type = "GeeTestTaskProxyless" websiteURL = None gt = None challenge = None geetestApiServerSubdomain = None geetestGetLib = None def __init__( self, website_url: str, gt: str, challenge: str, subdomain: str | None = None, lib: str | None = None, *args: Any, **kwargs: Any, ) -> None: self.websiteURL = website_url self.gt = gt self.challenge = challenge self.geetestApiServerSubdomain = subdomain self.geetestGetLib = lib super().__init__(*args, **kwargs)
[docs] def serialize(self, **result: Any) -> dict[str, Any]: data = super().serialize(**result) data["websiteURL"] = self.websiteURL data["gt"] = self.gt data["challenge"] = self.challenge if self.geetestApiServerSubdomain: data["geetestApiServerSubdomain"] = self.geetestApiServerSubdomain if self.geetestGetLib: data["geetestGetLib"] = self.geetestGetLib return data
[docs] class GeeTestTask(ProxyMixin, UserAgentMixin, GeeTestTaskProxyless): """Solve a GeeTest captcha through a proxy. Same as :class:`GeeTestTaskProxyless` but additionally requires proxy and user-agent parameters. """ type = "GeeTestTask"
[docs] class AntiGateTaskProxyless(BaseTask): """Solve a custom AntiGate task using a predefined template. AntiGate tasks use templates to automate complex browser-based actions. Browse available templates at https://anti-captcha.com/antigate. After the job completes, use :meth:`Job.get_solution` to get the full solution dictionary. :param website_url: Full URL of the page to process. :param template_name: Name of the AntiGate template (e.g. ``"Sign up on MailChimp"``). :param variables: Dictionary of template variables (keys and values depend on the template). """ type = "AntiGateTask" websiteURL = None templateName = None variables = None def __init__( self, website_url: str, template_name: str, variables: dict[str, Any], *args: Any, **kwargs: Any, ) -> None: self.websiteURL = website_url self.templateName = template_name self.variables = variables super().__init__(*args, **kwargs)
[docs] def serialize(self, **result: Any) -> dict[str, Any]: data = super().serialize(**result) data["websiteURL"] = self.websiteURL data["templateName"] = self.templateName data["variables"] = self.variables return data
[docs] class AntiGateTask(ProxyMixin, AntiGateTaskProxyless): """Solve a custom AntiGate task through a proxy. Same as :class:`AntiGateTaskProxyless` but additionally requires proxy parameters. """