diff --git a/src/askui/tools/playwright/agent_os.py b/src/askui/tools/playwright/agent_os.py index 5f46e837..db199f70 100644 --- a/src/askui/tools/playwright/agent_os.py +++ b/src/askui/tools/playwright/agent_os.py @@ -2,6 +2,7 @@ import io import subprocess +from pathlib import Path from typing import Literal from PIL import Image @@ -9,6 +10,7 @@ Browser, BrowserContext, BrowserType, + Download, Page, Playwright, ViewportSize, @@ -22,6 +24,29 @@ from ..agent_os import AgentOs, Display, DisplaySize, InputEvent, ModifierKey, PcKey +def _to_unique_path(path: Path) -> Path: + """Return ``path`` or, if it already exists, a counter-suffixed variant. + + For example, if ``report.pdf`` exists, returns ``report (1).pdf``; if that + exists too, ``report (2).pdf``, and so on. This keeps existing files from + being overwritten. + + Args: + path (Path): The desired target path. + + Returns: + Path: A path that does not currently exist on disk. + """ + if not path.exists(): + return path + counter = 1 + while True: + candidate = path.with_name(f"{path.stem} ({counter}){path.suffix}") + if not candidate.exists(): + return candidate + counter += 1 + + class PlaywrightAgentOs(AgentOs): """Playwright-based implementation of `AgentOs`. @@ -45,6 +70,11 @@ class PlaywrightAgentOs(AgentOs): Defaults to `True`. install_dependencies (bool, optional): Whether to install system dependencies (requires root permissions). Defaults to `False`. + download_dir (str | Path | None, optional): Directory into which files + downloaded by the browser are automatically copied once they finish. + When ``None``, downloads are left in Playwright's temporary location + (and deleted when the browser closes). The directory is created if it + does not exist. Defaults to `None`. """ _REPORTER_ROLE_NAME: str = "PlaywrightAgentOS" @@ -58,6 +88,7 @@ def __init__( slow_mo: int = 0, install_browser: bool = True, install_dependencies: bool = False, + download_dir: str | Path | None = None, ) -> None: self._browser_type = browser_type self._headless = headless @@ -65,6 +96,7 @@ def __init__( self._slow_mo = slow_mo self._install_browser = install_browser self._install_dependencies = install_dependencies + self._download_dir = Path(download_dir) if download_dir is not None else None # Playwright objects self._playwright: Playwright | None = None @@ -77,6 +109,9 @@ def __init__( self._listening = False self._event_queue: list[InputEvent] = [] + # Files copied into `download_dir`, in the order they finished + self._downloaded_files: list[Path] = [] + def _install_playwright_browser(self) -> None: """Install Playwright browser if requested.""" if not self._install_browser: @@ -162,6 +197,7 @@ def connect(self) -> None: ) self._page = self._context.new_page() + self._page.on("download", self._on_download) # Navigate to a blank page to ensure we have a working page self._page.goto("data:text/html,

Starting...

") self._reporter.add_message( @@ -169,6 +205,50 @@ def connect(self) -> None: "Connected to playwright browser", ) + def _on_download(self, download: Download) -> None: + """Copy a finished download into `download_dir`. + + Registered as the page's ``download`` event handler. When `download_dir` + is configured, the file is saved there under its suggested filename + (auto-renamed on collision); otherwise the download is left untouched in + Playwright's temporary location. Failures are reported but never + propagated, so a failed download cannot break the automation run. + + Args: + download (Download): The Playwright download to persist. + """ + if self._download_dir is None: + return + # Use only the filename component to avoid path traversal from a + # server-suggested name such as "../../etc/passwd". + suggested_name = Path(download.suggested_filename).name + target = _to_unique_path(self._download_dir / suggested_name) + try: + target.parent.mkdir(parents=True, exist_ok=True) + download.save_as(target) + except Exception as e: # noqa: BLE001 - never let a download break the run + self._reporter.add_message( + self._REPORTER_ROLE_NAME, + f"Failed to save download '{suggested_name}': {e}", + ) + return + self._downloaded_files.append(target) + self._reporter.add_message( + self._REPORTER_ROLE_NAME, + f"Downloaded file saved to {target}", + ) + + @property + def downloaded_files(self) -> list[Path]: + """Files copied into `download_dir`, in the order they finished. + + Returns: + list[Path]: Absolute paths of downloads saved so far this session. + Empty when no `download_dir` was configured or nothing was + downloaded yet. + """ + return list(self._downloaded_files) + @override def disconnect(self) -> None: """Terminates the connection to the browser.""" diff --git a/src/askui/tools/store/web/__init__.py b/src/askui/tools/store/web/__init__.py new file mode 100644 index 00000000..d189b658 --- /dev/null +++ b/src/askui/tools/store/web/__init__.py @@ -0,0 +1,12 @@ +"""Web-specific tools. + +These tools require a `PlaywrightAgentOs` and are designed for use with +`WebVisionAgent`. + +""" + +from askui.tools.store.web.save_screenshot_tool import WebSaveScreenshotTool + +__all__ = [ + "WebSaveScreenshotTool", +] diff --git a/src/askui/tools/store/web/save_screenshot_tool.py b/src/askui/tools/store/web/save_screenshot_tool.py new file mode 100644 index 00000000..7520668b --- /dev/null +++ b/src/askui/tools/store/web/save_screenshot_tool.py @@ -0,0 +1,92 @@ +from pathlib import Path + +from askui.models.shared import PlaywrightBaseTool + + +class WebSaveScreenshotTool(PlaywrightBaseTool): + """ + Tool for saving screenshots of the currently active web page to disk. + + This tool captures a screenshot of the current browser page and saves + it to a specified location on the filesystem. The screenshot is saved as a PNG + image file. The directory structure will be created automatically if it doesn't + exist. + + Args: + base_dir (str): The base directory path where screenshots will be saved. + All screenshot paths will be relative to this directory. + + Example: + ```python + from askui import WebVisionAgent + from askui.tools.store.web import WebSaveScreenshotTool + + with WebVisionAgent() as agent: + agent.act( + "Take a screenshot and save it as demo/demo.png", + tools=[WebSaveScreenshotTool(base_dir="/path/to/screenshots")] + ) + ``` + + Example + ```python + from askui import WebVisionAgent + from askui.tools.store.web import WebSaveScreenshotTool + + with WebVisionAgent( + act_tools=[WebSaveScreenshotTool(base_dir="/path/to/screenshots")] + ) as agent: + agent.act("Take a screenshot and save it as demo/demo.png") + """ + + def __init__(self, base_dir: str) -> None: + super().__init__( + name="save_screenshot_tool", + description=( + "Saves a screenshot of the currently active web page " + "to disk as a PNG image file. The screenshot is captured from the " + "currently active browser page. The directory structure for the " + "specified path will be created automatically if it doesn't exist. " + "The PNG extension is automatically appended to the provided path." + ), + input_schema={ + "type": "object", + "properties": { + "image_path": { + "type": "string", + "description": ( + "The relative path where the screenshot should be saved, " + "without the PNG extension. The path is relative to the " + "base directory specified during tool initialization. " + "For example, if base_dir is '/screenshots' and " + "image_path is 'test/my_screenshot', the file will be " + "saved as '/screenshots/test/my_screenshot.png'. " + "Subdirectories will be created automatically if needed." + ), + }, + }, + "required": ["image_path"], + }, + ) + self._base_dir = base_dir + self.is_cacheable = True + + def __call__(self, image_path: str) -> str: + """ + Save a screenshot of the current web page to disk. + + Args: + image_path (str): The relative path where the screenshot should be saved, + without the PNG extension. The path is relative to the base directory + specified during tool initialization. + + Returns: + str: A confirmation message indicating where the screenshot was saved, + including the full absolute path. + """ + absolute_image_path = Path(self._base_dir) / f"{image_path}.png" + absolute_image_path.parent.mkdir(parents=True, exist_ok=True) + + image = self.agent_os.screenshot() + image.save(absolute_image_path, format="PNG") + return f"Screenshot of the current web page saved to {absolute_image_path}." diff --git a/src/askui/web_agent.py b/src/askui/web_agent.py index d1c94232..f0d2cd4c 100644 --- a/src/askui/web_agent.py +++ b/src/askui/web_agent.py @@ -1,4 +1,5 @@ import warnings +from pathlib import Path from pydantic import ConfigDict, validate_call @@ -39,6 +40,35 @@ class WebAgent(Agent): + """Web automation agent backed by a Playwright browser. + + Args: + reporters (list[Reporter] | None, optional): Reporters used for reporting. + Defaults to `None`. + settings (AgentSettings | None, optional): Agent settings. Defaults to + `None`. + retry (Retry | None, optional): Retry strategy. Defaults to `None`. + act_tools (list[Tool] | None, optional): Additional tools made available + during `act()`. Defaults to `None`. + callbacks (list[ConversationCallback] | None, optional): Conversation + callbacks. Defaults to `None`. + truncation_strategy (TruncationStrategy | None, optional): Message history + truncation strategy. Defaults to `None`. + download_dir (str | Path | None, optional): Directory into which files + downloaded by the browser are automatically copied once they finish + (auto-renamed on filename collision). When `None`, downloads are left + in Playwright's temporary location and removed when the browser + closes. Defaults to `None`. + + Example: + ```python + from askui import WebAgent + + with WebAgent(download_dir="~/Downloads/askui") as agent: + agent.act("Open example.com and download the sample PDF") + ``` + """ + @telemetry.record_call( exclude={ "reporters", @@ -46,6 +76,7 @@ class WebAgent(Agent): "act_tools", "callbacks", "truncation_strategy", + "download_dir", } ) @validate_call(config=ConfigDict(arbitrary_types_allowed=True)) @@ -57,9 +88,10 @@ def __init__( act_tools: list[Tool] | None = None, callbacks: list[ConversationCallback] | None = None, truncation_strategy: TruncationStrategy | None = None, + download_dir: str | Path | None = None, ) -> None: reporter = CompositeReporter(reporters=reporters) - self.os = PlaywrightAgentOs(reporter) + self.os = PlaywrightAgentOs(reporter, download_dir=download_dir) super().__init__( reporter=reporter, retry=retry, diff --git a/tests/e2e/tools/playwright/__init__.py b/tests/e2e/tools/playwright/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/e2e/tools/playwright/test_download.py b/tests/e2e/tools/playwright/test_download.py new file mode 100644 index 00000000..0d1a1dc9 --- /dev/null +++ b/tests/e2e/tools/playwright/test_download.py @@ -0,0 +1,71 @@ +from pathlib import Path + +import pytest + +from askui.tools.playwright.agent_os import PlaywrightAgentOs + +# A page with a link that downloads a small text file via a data URL. The +# ``download`` attribute makes the browser treat the navigation as a download +# and provides the suggested filename. +_DOWNLOAD_PAGE = ( + 'download' +) + + +def _trigger_download(agent_os: PlaywrightAgentOs) -> None: + page = agent_os._page + assert page is not None + page.set_content(_DOWNLOAD_PAGE) + page.click("#dl") + # Give the download event time to fire and the file to be written. + page.wait_for_timeout(2000) + + +@pytest.mark.timeout(60) +def test_download_is_copied_into_download_dir(tmp_path: Path) -> None: + agent_os = PlaywrightAgentOs( + headless=True, install_browser=False, download_dir=tmp_path + ) + agent_os.connect() + try: + _trigger_download(agent_os) + finally: + agent_os.disconnect() + + saved = tmp_path / "sample.txt" + assert saved.exists() + assert saved.read_text(encoding="utf-8") == "Hello AskUI" + assert agent_os.downloaded_files == [saved] + + +@pytest.mark.timeout(60) +def test_colliding_downloads_are_auto_renamed(tmp_path: Path) -> None: + (tmp_path / "sample.txt").write_text("pre-existing", encoding="utf-8") + + agent_os = PlaywrightAgentOs( + headless=True, install_browser=False, download_dir=tmp_path + ) + agent_os.connect() + try: + _trigger_download(agent_os) + finally: + agent_os.disconnect() + + renamed = tmp_path / "sample (1).txt" + assert renamed.exists() + assert renamed.read_text(encoding="utf-8") == "Hello AskUI" + # The pre-existing file is left untouched. + assert (tmp_path / "sample.txt").read_text(encoding="utf-8") == "pre-existing" + + +@pytest.mark.timeout(60) +def test_no_download_dir_leaves_files_in_temp(tmp_path: Path) -> None: + agent_os = PlaywrightAgentOs(headless=True, install_browser=False) + agent_os.connect() + try: + _trigger_download(agent_os) + finally: + agent_os.disconnect() + + assert agent_os.downloaded_files == [] + assert list(tmp_path.iterdir()) == [] diff --git a/tests/unit/tools/playwright/__init__.py b/tests/unit/tools/playwright/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/unit/tools/playwright/test_agent_os.py b/tests/unit/tools/playwright/test_agent_os.py new file mode 100644 index 00000000..6d79dccd --- /dev/null +++ b/tests/unit/tools/playwright/test_agent_os.py @@ -0,0 +1,25 @@ +from pathlib import Path + +from askui.tools.playwright.agent_os import _to_unique_path + + +class TestToUniquePath: + def test_returns_path_unchanged_when_free(self, tmp_path: Path) -> None: + target = tmp_path / "report.pdf" + assert _to_unique_path(target) == target + + def test_appends_counter_when_path_exists(self, tmp_path: Path) -> None: + target = tmp_path / "report.pdf" + target.write_text("first", encoding="utf-8") + assert _to_unique_path(target) == tmp_path / "report (1).pdf" + + def test_increments_counter_until_free(self, tmp_path: Path) -> None: + (tmp_path / "report.pdf").write_text("a", encoding="utf-8") + (tmp_path / "report (1).pdf").write_text("b", encoding="utf-8") + (tmp_path / "report (2).pdf").write_text("c", encoding="utf-8") + assert _to_unique_path(tmp_path / "report.pdf") == tmp_path / "report (3).pdf" + + def test_handles_name_without_suffix(self, tmp_path: Path) -> None: + target = tmp_path / "archive" + target.write_text("x", encoding="utf-8") + assert _to_unique_path(target) == tmp_path / "archive (1)"