From a1dfaf51e2400f4698bbd076c56473b0ba6303bf Mon Sep 17 00:00:00 2001
From: Maximilian Bosch <maximilian@mbosch.me>
Date: Mon, 17 Mar 2025 17:27:59 +0000
Subject: [PATCH 1/6] nixos/test-driver: integrate Python `unittest` assertions

Replaces / Closes #345948

I tried to integrate `pytest` assertions because I like the reporting,
but I only managed to get the very basic thing and even that was messing
around a lot with its internals.

The approach in #345948 shifts too much maintenance effort to us, so
it's not really desirable either.

After discussing with Benoit on Ocean Sprint about this, we decided that
it's probably the best compromise to integrate `unittest`: it also
provides good diffs when needed, but the downside is that existing tests
don't benefit from it.

This patch essentially does the following things:

* Add a new global `t` that is an instance of a `unittest.TestCase`
  class. I decided to just go for `t` given that e.g.
  `tester.assertEqual` (or any other longer name) seems quite verbose.

* Use a special class for errors that get special treatment:
  * The traceback is minimized to only include frames from the
    testScript: in this case I don't really care about anything else and
    IMHO that's just visual noise.

    This is not the case for other exceptions since these may indicate a
    bug and then people should be able to send the full traceback to the
    maintainers.
  * Display the error, but with `!!!` as prefix to make sure it's
    easier to spot in between other logs.

This looks e.g. like

    !!! Traceback (most recent call last):
    !!!   File "<string>", line 7, in <module>
    !!!     foo()
    !!!   File "<string>", line 5, in foo
    !!!     t.assertEqual({"foo":[1,2,{"foo":"bar"}]},{"foo":[1,2,{"bar":"foo"}],"bar":[1,2,3,4,"foo"]})
    !!!
    !!! NixOSAssertionError: {'foo': [1, 2, {'foo': 'bar'}]} != {'foo': [1, 2, {'bar': 'foo'}], 'bar': [1, 2, 3, 4, 'foo']}
    !!! - {'foo': [1, 2, {'foo': 'bar'}]}
    !!! + {'bar': [1, 2, 3, 4, 'foo'], 'foo': [1, 2, {'bar': 'foo'}]}
    cleanup
    kill machine (pid 9)
    qemu-system-x86_64: terminating on signal 15 from pid 6 (/nix/store/wz0j2zi02rvnjiz37nn28h3gfdq61svz-python3-3.12.9/bin/python3.12)
    kill vlan (pid 7)
    (finished: cleanup, in 0.00 seconds)

Co-authored-by: bew <bew@users.noreply.github.com>
---
 .../writing-nixos-tests.section.md            |  5 ++-
 .../lib/test-driver/src/test_driver/driver.py | 38 ++++++++++++++++++-
 .../lib/test-driver/src/test_driver/logger.py | 19 ++++++++++
 nixos/lib/test-script-prepend.py              |  2 +
 4 files changed, 61 insertions(+), 3 deletions(-)
diff --git a/nixos/doc/manual/development/writing-nixos-tests.section.md b/nixos/doc/manual/development/writing-nixos-tests.section.md
index bd588e2ba80b..5b08975e5ea4 100644
--- a/nixos/doc/manual/development/writing-nixos-tests.section.md
+++ b/nixos/doc/manual/development/writing-nixos-tests.section.md
@@ -121,8 +121,7 @@ and checks that the output is more-or-less correct:
 ```py
 machine.start()
 machine.wait_for_unit("default.target")
-if not "Linux" in machine.succeed("uname"):
-  raise Exception("Wrong OS")
+t.assertIn("Linux", machine.succeed("uname"), "Wrong OS")
 ```
 
 The first line is technically unnecessary; machines are implicitly started
@@ -134,6 +133,8 @@ starting them in parallel:
 start_all()
 ```
 
+Under the variable `t`, all assertions from [`unittest.TestCase`](https://docs.python.org/3/library/unittest.html) are available.
+
 If the hostname of a node contains characters that can't be used in a
 Python variable name, those characters will be replaced with
 underscores in the variable name, so `nodes.machine-a` will be exposed
diff --git a/nixos/lib/test-driver/src/test_driver/driver.py b/nixos/lib/test-driver/src/test_driver/driver.py
index 6061c1bc09b8..8a878e9a7558 100644
--- a/nixos/lib/test-driver/src/test_driver/driver.py
+++ b/nixos/lib/test-driver/src/test_driver/driver.py
@@ -1,12 +1,15 @@
 import os
 import re
 import signal
+import sys
 import tempfile
 import threading
+import traceback
 from collections.abc import Callable, Iterator
 from contextlib import AbstractContextManager, contextmanager
 from pathlib import Path
 from typing import Any
+from unittest import TestCase
 
 from test_driver.logger import AbstractLogger
 from test_driver.machine import Machine, NixStartScript, retry
@@ -38,6 +41,14 @@ def pythonize_name(name: str) -> str:
     return re.sub(r"^[^A-z_]|[^A-z0-9_]", "_", name)
 
 
+class NixOSAssertionError(AssertionError):
+    pass
+
+
+class Tester(TestCase):
+    failureException = NixOSAssertionError
+
+
 class Driver:
     """A handle to the driver that sets up the environment
     and runs the tests"""
@@ -140,6 +151,7 @@ class Driver:
             serial_stdout_on=self.serial_stdout_on,
             polling_condition=self.polling_condition,
             Machine=Machine,  # for typing
+            t=Tester(),
         )
         machine_symbols = {pythonize_name(m.name): m for m in self.machines}
         # If there's exactly one machine, make it available under the name
@@ -163,7 +175,31 @@ class Driver:
         """Run the test script"""
         with self.logger.nested("run the VM test script"):
             symbols = self.test_symbols()  # call eagerly
-            exec(self.tests, symbols, None)
+            try:
+                exec(self.tests, symbols, None)
+            except NixOSAssertionError:
+                exc_type, exc, tb = sys.exc_info()
+                filtered = [
+                    frame
+                    for frame in traceback.extract_tb(tb)
+                    if frame.filename == "<string>"
+                ]
+
+                self.logger.log_test_error("Traceback (most recent call last):")
+                code = self.tests.splitlines()
+                for frame, line in zip(filtered, traceback.format_list(filtered)):
+                    self.logger.log_test_error(line.rstrip())
+                    if lineno := frame.lineno:
+                        self.logger.log_test_error(
+                            f"    {code[lineno - 1].strip()}",
+                        )
+
+                self.logger.log_test_error("") # blank line for readability
+                exc_prefix = exc_type.__name__ if exc_type is not None else "Error"
+                for line in f"{exc_prefix}: {exc}".splitlines():
+                    self.logger.log_test_error(line)
+
+                sys.exit(1)
 
     def run_tests(self) -> None:
         """Run the test script (for non-interactive test runs)"""
diff --git a/nixos/lib/test-driver/src/test_driver/logger.py b/nixos/lib/test-driver/src/test_driver/logger.py
index 564d39f4f055..fa195080fa2b 100644
--- a/nixos/lib/test-driver/src/test_driver/logger.py
+++ b/nixos/lib/test-driver/src/test_driver/logger.py
@@ -44,6 +44,10 @@ class AbstractLogger(ABC):
     def error(self, *args, **kwargs) -> None:  # type: ignore
         pass
 
+    @abstractmethod
+    def log_test_error(self, *args, **kwargs) -> None:  # type:ignore
+        pass
+
     @abstractmethod
     def log_serial(self, message: str, machine: str) -> None:
         pass
@@ -97,6 +101,9 @@ class JunitXMLLogger(AbstractLogger):
         self.tests[self.currentSubtest].stderr += args[0] + os.linesep
         self.tests[self.currentSubtest].failure = True
 
+    def log_test_error(self, *args, **kwargs) -> None:  # type: ignore
+        self.error(*args, **kwargs)
+
     def log_serial(self, message: str, machine: str) -> None:
         if not self._print_serial_logs:
             return
@@ -156,6 +163,10 @@ class CompositeLogger(AbstractLogger):
         for logger in self.logger_list:
             logger.warning(*args, **kwargs)
 
+    def log_test_error(self, *args, **kwargs) -> None:  # type: ignore
+        for logger in self.logger_list:
+            logger.log_test_error(*args, **kwargs)
+
     def error(self, *args, **kwargs) -> None:  # type: ignore
         for logger in self.logger_list:
             logger.error(*args, **kwargs)
@@ -222,6 +233,11 @@ class TerminalLogger(AbstractLogger):
 
         self._eprint(Style.DIM + f"{machine} # {message}" + Style.RESET_ALL)
 
+    def log_test_error(self, *args, **kwargs) -> None:  # type: ignore
+        prefix = Fore.RED + "!!! " + Style.RESET_ALL
+        # NOTE: using `warning` instead of `error` to ensure it does not exit after printing the first log
+        self.warning(f"{prefix}{args[0]}", *args[1:], **kwargs)
+
 
 class XMLLogger(AbstractLogger):
     def __init__(self, outfile: str) -> None:
@@ -261,6 +277,9 @@ class XMLLogger(AbstractLogger):
     def error(self, *args, **kwargs) -> None:  # type: ignore
         self.log(*args, **kwargs)
 
+    def log_test_error(self, *args, **kwargs) -> None:  # type: ignore
+        self.log(*args, **kwargs)
+
     def log(self, message: str, attributes: dict[str, str] = {}) -> None:
         self.drain_log_queue()
         self.log_line(message, attributes)
diff --git a/nixos/lib/test-script-prepend.py b/nixos/lib/test-script-prepend.py
index 9d2efdf97303..31dad14ef8dd 100644
--- a/nixos/lib/test-script-prepend.py
+++ b/nixos/lib/test-script-prepend.py
@@ -8,6 +8,7 @@ from test_driver.logger import AbstractLogger
 from typing import Callable, Iterator, ContextManager, Optional, List, Dict, Any, Union
 from typing_extensions import Protocol
 from pathlib import Path
+from unittest import TestCase
 
 
 class RetryProtocol(Protocol):
@@ -51,3 +52,4 @@ join_all: Callable[[], None]
 serial_stdout_off: Callable[[], None]
 serial_stdout_on: Callable[[], None]
 polling_condition: PollingConditionProtocol
+t: TestCase

From 11ff96a6795889a3d59b2ec0e9d587b9cf15ed5c Mon Sep 17 00:00:00 2001
From: Maximilian Bosch <maximilian@mbosch.me>
Date: Tue, 18 Mar 2025 11:02:47 +0000
Subject: [PATCH 2/6] nixos/test-driver: use
 RequestedAssertionFailed/TestScriptError in Machine class

I think it's reasonable to also have this kind of visual distinction
here between test failures and actual errors from the test framework.

A failing `machine.require_unit_state` now lookgs like this for
instance:

    !!! Traceback (most recent call last):
    !!!   File "<string>", line 3, in <module>
    !!!     machine.require_unit_state("postgresql","active")
    !!!
    !!! RequestedAssertionFailed: Expected unit 'postgresql' to to be in state 'active' but it is in state 'inactive'

Co-authored-by: Benoit de Chezelles <bew@users.noreply.github.com>
---
 nixos/lib/test-driver/src/pyproject.toml      |  2 +-
 .../lib/test-driver/src/test_driver/driver.py | 34 +++++++++++--------
 .../lib/test-driver/src/test_driver/errors.py | 23 +++++++++++++
 .../test-driver/src/test_driver/machine.py    | 31 ++++++++++-------
 4 files changed, 62 insertions(+), 28 deletions(-)
 create mode 100644 nixos/lib/test-driver/src/test_driver/errors.py

diff --git a/nixos/lib/test-driver/src/pyproject.toml b/nixos/lib/test-driver/src/pyproject.toml
index ac83eed268d9..fa4e6a2de127 100644
--- a/nixos/lib/test-driver/src/pyproject.toml
+++ b/nixos/lib/test-driver/src/pyproject.toml
@@ -21,7 +21,7 @@ target-version = "py312"
 line-length = 88
 
 lint.select = ["E", "F", "I", "U", "N"]
-lint.ignore = ["E501"]
+lint.ignore = ["E501", "N818"]
 
 # xxx: we can import https://pypi.org/project/types-colorama/ here
 [[tool.mypy.overrides]]
diff --git a/nixos/lib/test-driver/src/test_driver/driver.py b/nixos/lib/test-driver/src/test_driver/driver.py
index 8a878e9a7558..6837a0f694dc 100644
--- a/nixos/lib/test-driver/src/test_driver/driver.py
+++ b/nixos/lib/test-driver/src/test_driver/driver.py
@@ -11,6 +11,7 @@ from pathlib import Path
 from typing import Any
 from unittest import TestCase
 
+from test_driver.errors import RequestedAssertionFailed, TestScriptError
 from test_driver.logger import AbstractLogger
 from test_driver.machine import Machine, NixStartScript, retry
 from test_driver.polling_condition import PollingCondition
@@ -19,6 +20,18 @@ from test_driver.vlan import VLan
 SENTINEL = object()
 
 
+class AssertionTester(TestCase):
+    """
+    Subclass of `unittest.TestCase` which is used in the
+    `testScript` to perform assertions.
+
+    It throws a custom exception whose parent class
+    gets special treatment in the logs.
+    """
+
+    failureException = RequestedAssertionFailed
+
+
 def get_tmp_dir() -> Path:
     """Returns a temporary directory that is defined by TMPDIR, TEMP, TMP or CWD
     Raises an exception in case the retrieved temporary directory is not writeable
@@ -41,14 +54,6 @@ def pythonize_name(name: str) -> str:
     return re.sub(r"^[^A-z_]|[^A-z0-9_]", "_", name)
 
 
-class NixOSAssertionError(AssertionError):
-    pass
-
-
-class Tester(TestCase):
-    failureException = NixOSAssertionError
-
-
 class Driver:
     """A handle to the driver that sets up the environment
     and runs the tests"""
@@ -126,7 +131,7 @@ class Driver:
             try:
                 yield
             except Exception as e:
-                self.logger.error(f'Test "{name}" failed with error: "{e}"')
+                self.logger.log_test_error(f'Test "{name}" failed with error: "{e}"')
                 raise e
 
     def test_symbols(self) -> dict[str, Any]:
@@ -151,7 +156,7 @@ class Driver:
             serial_stdout_on=self.serial_stdout_on,
             polling_condition=self.polling_condition,
             Machine=Machine,  # for typing
-            t=Tester(),
+            t=AssertionTester(),
         )
         machine_symbols = {pythonize_name(m.name): m for m in self.machines}
         # If there's exactly one machine, make it available under the name
@@ -177,7 +182,7 @@ class Driver:
             symbols = self.test_symbols()  # call eagerly
             try:
                 exec(self.tests, symbols, None)
-            except NixOSAssertionError:
+            except TestScriptError:
                 exc_type, exc, tb = sys.exc_info()
                 filtered = [
                     frame
@@ -186,15 +191,14 @@ class Driver:
                 ]
 
                 self.logger.log_test_error("Traceback (most recent call last):")
+
                 code = self.tests.splitlines()
                 for frame, line in zip(filtered, traceback.format_list(filtered)):
                     self.logger.log_test_error(line.rstrip())
                     if lineno := frame.lineno:
-                        self.logger.log_test_error(
-                            f"    {code[lineno - 1].strip()}",
-                        )
+                        self.logger.log_test_error(f"    {code[lineno - 1].strip()}")
 
-                self.logger.log_test_error("") # blank line for readability
+                self.logger.log_test_error("")  # blank line for readability
                 exc_prefix = exc_type.__name__ if exc_type is not None else "Error"
                 for line in f"{exc_prefix}: {exc}".splitlines():
                     self.logger.log_test_error(line)
diff --git a/nixos/lib/test-driver/src/test_driver/errors.py b/nixos/lib/test-driver/src/test_driver/errors.py
new file mode 100644
index 000000000000..0b52151d5c2c
--- /dev/null
+++ b/nixos/lib/test-driver/src/test_driver/errors.py
@@ -0,0 +1,23 @@
+class TestScriptError(Exception):
+    """
+    The base error class to indicate that the test script failed.
+    This (and its subclasses) get special treatment, i.e. only stack
+    frames from `testScript` are printed and the error gets prefixed
+    with `!!!` to make it easier to spot between other log-lines.
+
+    This class is used for errors that aren't an actual test failure,
+    but also not a bug in the driver, e.g. failing OCR.
+    """
+
+
+class RequestedAssertionFailed(TestScriptError):
+    """
+    Subclass of `TestScriptError` that gets special treatment.
+
+    Exception raised when a requested assertion fails,
+    e.g. `machine.succeed(...)` or `t.assertEqual(...)`.
+
+    This is separate from the base error class, to have a dedicated class name
+    that better represents this kind of failures.
+    (better readability in test output)
+    """
diff --git a/nixos/lib/test-driver/src/test_driver/machine.py b/nixos/lib/test-driver/src/test_driver/machine.py
index 4faa37726508..1a1d2075973f 100644
--- a/nixos/lib/test-driver/src/test_driver/machine.py
+++ b/nixos/lib/test-driver/src/test_driver/machine.py
@@ -18,6 +18,7 @@ from pathlib import Path
 from queue import Queue
 from typing import Any
 
+from test_driver.errors import RequestedAssertionFailed, TestScriptError
 from test_driver.logger import AbstractLogger
 
 from .qmp import QMPSession
@@ -128,7 +129,7 @@ def _preprocess_screenshot(screenshot_path: str, negate: bool = False) -> str:
     )
 
     if ret.returncode != 0:
-        raise Exception(
+        raise TestScriptError(
             f"Image processing failed with exit code {ret.returncode}, stdout: {ret.stdout.decode()}, stderr: {ret.stderr.decode()}"
         )
 
@@ -139,7 +140,7 @@ def _perform_ocr_on_screenshot(
     screenshot_path: str, model_ids: Iterable[int]
 ) -> list[str]:
     if shutil.which("tesseract") is None:
-        raise Exception("OCR requested but enableOCR is false")
+        raise TestScriptError("OCR requested but enableOCR is false")
 
     processed_image = _preprocess_screenshot(screenshot_path, negate=False)
     processed_negative = _preprocess_screenshot(screenshot_path, negate=True)
@@ -162,7 +163,7 @@ def _perform_ocr_on_screenshot(
                 capture_output=True,
             )
             if ret.returncode != 0:
-                raise Exception(f"OCR failed with exit code {ret.returncode}")
+                raise TestScriptError(f"OCR failed with exit code {ret.returncode}")
             model_results.append(ret.stdout.decode("utf-8"))
 
     return model_results
@@ -179,7 +180,9 @@ def retry(fn: Callable, timeout: int = 900) -> None:
         time.sleep(1)
 
     if not fn(True):
-        raise Exception(f"action timed out after {timeout} seconds")
+        raise RequestedAssertionFailed(
+            f"action timed out after {timeout} tries with one-second pause in-between"
+        )
 
 
 class StartCommand:
@@ -402,14 +405,14 @@ class Machine:
         def check_active(_last_try: bool) -> bool:
             state = self.get_unit_property(unit, "ActiveState", user)
             if state == "failed":
-                raise Exception(f'unit "{unit}" reached state "{state}"')
+                raise RequestedAssertionFailed(f'unit "{unit}" reached state "{state}"')
 
             if state == "inactive":
                 status, jobs = self.systemctl("list-jobs --full 2>&1", user)
                 if "No jobs" in jobs:
                     info = self.get_unit_info(unit, user)
                     if info["ActiveState"] == state:
-                        raise Exception(
+                        raise RequestedAssertionFailed(
                             f'unit "{unit}" is inactive and there are no pending jobs'
                         )
 
@@ -424,7 +427,7 @@ class Machine:
     def get_unit_info(self, unit: str, user: str | None = None) -> dict[str, str]:
         status, lines = self.systemctl(f'--no-pager show "{unit}"', user)
         if status != 0:
-            raise Exception(
+            raise RequestedAssertionFailed(
                 f'retrieving systemctl info for unit "{unit}"'
                 + ("" if user is None else f' under user "{user}"')
                 + f" failed with exit code {status}"
@@ -454,7 +457,7 @@ class Machine:
             user,
         )
         if status != 0:
-            raise Exception(
+            raise RequestedAssertionFailed(
                 f'retrieving systemctl property "{property}" for unit "{unit}"'
                 + ("" if user is None else f' under user "{user}"')
                 + f" failed with exit code {status}"
@@ -502,7 +505,7 @@ class Machine:
             info = self.get_unit_info(unit)
             state = info["ActiveState"]
             if state != require_state:
-                raise Exception(
+                raise RequestedAssertionFailed(
                     f"Expected unit '{unit}' to to be in state "
                     f"'{require_state}' but it is in state '{state}'"
                 )
@@ -656,7 +659,9 @@ class Machine:
                 (status, out) = self.execute(command, timeout=timeout)
                 if status != 0:
                     self.log(f"output: {out}")
-                    raise Exception(f"command `{command}` failed (exit code {status})")
+                    raise RequestedAssertionFailed(
+                        f"command `{command}` failed (exit code {status})"
+                    )
                 output += out
         return output
 
@@ -670,7 +675,9 @@ class Machine:
             with self.nested(f"must fail: {command}"):
                 (status, out) = self.execute(command, timeout=timeout)
                 if status == 0:
-                    raise Exception(f"command `{command}` unexpectedly succeeded")
+                    raise RequestedAssertionFailed(
+                        f"command `{command}` unexpectedly succeeded"
+                    )
                 output += out
         return output
 
@@ -915,7 +922,7 @@ class Machine:
             ret = subprocess.run(f"pnmtopng '{tmp}' > '{filename}'", shell=True)
             os.unlink(tmp)
             if ret.returncode != 0:
-                raise Exception("Cannot convert screenshot")
+                raise TestScriptError("Cannot convert screenshot")
 
     def copy_from_host_via_shell(self, source: str, target: str) -> None:
         """Copy a file from the host into the guest by piping it over the

From cc3d409adca4d8479faf64b2d9cfd67d523a56ec Mon Sep 17 00:00:00 2001
From: Maximilian Bosch <maximilian@mbosch.me>
Date: Thu, 20 Mar 2025 12:39:53 +0000
Subject: [PATCH 3/6] nixos/test-driver: log associated machine for
 `self.nested`

When doing `machine.succeed(...)` or something similar, it's now clear
that the command `...` was issued on `machine`.

Essentially, this results in the following diff in the log:

    -(finished: waiting for unit default.target, in 13.47 seconds)
    +machine: (finished: waiting for unit default.target, in 13.47 seconds)
    (finished: subtest: foobar text lorem ipsum, in 13.47 seconds)
---
 nixos/lib/test-driver/src/test_driver/logger.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nixos/lib/test-driver/src/test_driver/logger.py b/nixos/lib/test-driver/src/test_driver/logger.py
index fa195080fa2b..a218d234fe3f 100644
--- a/nixos/lib/test-driver/src/test_driver/logger.py
+++ b/nixos/lib/test-driver/src/test_driver/logger.py
@@ -213,7 +213,7 @@ class TerminalLogger(AbstractLogger):
         tic = time.time()
         yield
         toc = time.time()
-        self.log(f"(finished: {message}, in {toc - tic:.2f} seconds)")
+        self.log(f"(finished: {message}, in {toc - tic:.2f} seconds)", attributes)
 
     def info(self, *args, **kwargs) -> None:  # type: ignore
         self.log(*args, **kwargs)

From d587d569e0d6bb4c8dbdb6ddbd6e8e0d8c97bbd7 Mon Sep 17 00:00:00 2001
From: Maximilian Bosch <maximilian@mbosch.me>
Date: Fri, 21 Mar 2025 11:38:01 +0000
Subject: [PATCH 4/6] nixos/test-driver: restructure error classes

After a discussion with tfc, we agreed that we need a distinction
between errors where the user isn't at fault (e.g. OCR failing - now
called `MachineError`) and errors where the test actually failed (now
called `RequestedAssertionFailed`).

Both get special treatment from the error handler, i.e. a `!!!` prefix
to make it easier to spot visually.

However, only `RequestedAssertionFailed` gets the shortening of the
traceback, `MachineError` exceptions may be something to report and
maintainers usually want to see the full trace.

Suggested-by: Jacek Galowicz <jacek@galowicz.de>
---
 .../lib/test-driver/src/test_driver/driver.py |  8 ++++--
 .../lib/test-driver/src/test_driver/errors.py | 27 +++++++++----------
 .../test-driver/src/test_driver/machine.py    | 10 +++----
 3 files changed, 23 insertions(+), 22 deletions(-)

diff --git a/nixos/lib/test-driver/src/test_driver/driver.py b/nixos/lib/test-driver/src/test_driver/driver.py
index 6837a0f694dc..56f29ffe24f1 100644
--- a/nixos/lib/test-driver/src/test_driver/driver.py
+++ b/nixos/lib/test-driver/src/test_driver/driver.py
@@ -11,7 +11,7 @@ from pathlib import Path
 from typing import Any
 from unittest import TestCase
 
-from test_driver.errors import RequestedAssertionFailed, TestScriptError
+from test_driver.errors import MachineError, RequestedAssertionFailed
 from test_driver.logger import AbstractLogger
 from test_driver.machine import Machine, NixStartScript, retry
 from test_driver.polling_condition import PollingCondition
@@ -182,7 +182,11 @@ class Driver:
             symbols = self.test_symbols()  # call eagerly
             try:
                 exec(self.tests, symbols, None)
-            except TestScriptError:
+            except MachineError:
+                for line in traceback.format_exc().splitlines():
+                    self.logger.log_test_error(line)
+                sys.exit(1)
+            except RequestedAssertionFailed:
                 exc_type, exc, tb = sys.exc_info()
                 filtered = [
                     frame
diff --git a/nixos/lib/test-driver/src/test_driver/errors.py b/nixos/lib/test-driver/src/test_driver/errors.py
index 0b52151d5c2c..449fef0d30c7 100644
--- a/nixos/lib/test-driver/src/test_driver/errors.py
+++ b/nixos/lib/test-driver/src/test_driver/errors.py
@@ -1,23 +1,20 @@
-class TestScriptError(Exception):
+class MachineError(Exception):
     """
-    The base error class to indicate that the test script failed.
-    This (and its subclasses) get special treatment, i.e. only stack
-    frames from `testScript` are printed and the error gets prefixed
-    with `!!!` to make it easier to spot between other log-lines.
+    Exception that indicates an error that is NOT the user's fault,
+    i.e. something went wrong without the test being necessarily invalid,
+    such as failing OCR.
 
-    This class is used for errors that aren't an actual test failure,
-    but also not a bug in the driver, e.g. failing OCR.
+    To make it easier to spot, this exception (and its subclasses)
+    get a `!!!` prefix in the log output.
     """
 
 
-class RequestedAssertionFailed(TestScriptError):
+class RequestedAssertionFailed(AssertionError):
     """
-    Subclass of `TestScriptError` that gets special treatment.
+    Special assertion that gets thrown on an assertion error,
+    e.g. a failing `t.assertEqual(...)` or `machine.succeed(...)`.
 
-    Exception raised when a requested assertion fails,
-    e.g. `machine.succeed(...)` or `t.assertEqual(...)`.
-
-    This is separate from the base error class, to have a dedicated class name
-    that better represents this kind of failures.
-    (better readability in test output)
+    This gets special treatment in error reporting: i.e. it gets
+    `!!!` as prefix just as `MachineError`, but all stack frames that are
+    not from `testScript` also get removed.
     """
diff --git a/nixos/lib/test-driver/src/test_driver/machine.py b/nixos/lib/test-driver/src/test_driver/machine.py
index 1a1d2075973f..322387392864 100644
--- a/nixos/lib/test-driver/src/test_driver/machine.py
+++ b/nixos/lib/test-driver/src/test_driver/machine.py
@@ -18,7 +18,7 @@ from pathlib import Path
 from queue import Queue
 from typing import Any
 
-from test_driver.errors import RequestedAssertionFailed, TestScriptError
+from test_driver.errors import MachineError, RequestedAssertionFailed
 from test_driver.logger import AbstractLogger
 
 from .qmp import QMPSession
@@ -129,7 +129,7 @@ def _preprocess_screenshot(screenshot_path: str, negate: bool = False) -> str:
     )
 
     if ret.returncode != 0:
-        raise TestScriptError(
+        raise MachineError(
             f"Image processing failed with exit code {ret.returncode}, stdout: {ret.stdout.decode()}, stderr: {ret.stderr.decode()}"
         )
 
@@ -140,7 +140,7 @@ def _perform_ocr_on_screenshot(
     screenshot_path: str, model_ids: Iterable[int]
 ) -> list[str]:
     if shutil.which("tesseract") is None:
-        raise TestScriptError("OCR requested but enableOCR is false")
+        raise MachineError("OCR requested but enableOCR is false")
 
     processed_image = _preprocess_screenshot(screenshot_path, negate=False)
     processed_negative = _preprocess_screenshot(screenshot_path, negate=True)
@@ -163,7 +163,7 @@ def _perform_ocr_on_screenshot(
                 capture_output=True,
             )
             if ret.returncode != 0:
-                raise TestScriptError(f"OCR failed with exit code {ret.returncode}")
+                raise MachineError(f"OCR failed with exit code {ret.returncode}")
             model_results.append(ret.stdout.decode("utf-8"))
 
     return model_results
@@ -922,7 +922,7 @@ class Machine:
             ret = subprocess.run(f"pnmtopng '{tmp}' > '{filename}'", shell=True)
             os.unlink(tmp)
             if ret.returncode != 0:
-                raise TestScriptError("Cannot convert screenshot")
+                raise MachineError("Cannot convert screenshot")
 
     def copy_from_host_via_shell(self, source: str, target: str) -> None:
         """Copy a file from the host into the guest by piping it over the

From e2b3517f598471dde1efad1b97ab1a418c6678c9 Mon Sep 17 00:00:00 2001
From: Maximilian Bosch <maximilian@mbosch.me>
Date: Fri, 21 Mar 2025 12:34:59 +0000
Subject: [PATCH 5/6] nixos/test-driver: use ipython via ptpython

Closes #180089

I realized that the previous commits relying on `sys.exit` for dealing
with `MachineError`/`RequestedAssertionFailed` exit the interactive
session which is kinda bad.

This patch uses the ipython driver: it seems to have equivalent features
such as auto-completion and doesn't stop on SystemExit being raised.

This also fixes other places where this happened such as things calling
`log.error` on the CompositeLogger.
---
 nixos/lib/test-driver/default.nix                 | 1 +
 nixos/lib/test-driver/src/test_driver/__init__.py | 9 ++++-----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/nixos/lib/test-driver/default.nix b/nixos/lib/test-driver/default.nix
index f22744806d48..91db5d8be3c2 100644
--- a/nixos/lib/test-driver/default.nix
+++ b/nixos/lib/test-driver/default.nix
@@ -31,6 +31,7 @@ python3Packages.buildPythonApplication {
       colorama
       junit-xml
       ptpython
+      ipython
     ]
     ++ extraPythonPackages python3Packages;
 
diff --git a/nixos/lib/test-driver/src/test_driver/__init__.py b/nixos/lib/test-driver/src/test_driver/__init__.py
index 1c0793aa75a5..26d8391017e8 100755
--- a/nixos/lib/test-driver/src/test_driver/__init__.py
+++ b/nixos/lib/test-driver/src/test_driver/__init__.py
@@ -3,7 +3,7 @@ import os
 import time
 from pathlib import Path
 
-import ptpython.repl
+import ptpython.ipython
 
 from test_driver.driver import Driver
 from test_driver.logger import (
@@ -136,11 +136,10 @@ def main() -> None:
         if args.interactive:
             history_dir = os.getcwd()
             history_path = os.path.join(history_dir, ".nixos-test-history")
-            ptpython.repl.embed(
-                driver.test_symbols(),
-                {},
+            ptpython.ipython.embed(
+                user_ns=driver.test_symbols(),
                 history_filename=history_path,
-            )
+            )  # type:ignore
         else:
             tic = time.time()
             driver.run_tests()

From deff22bcc8ba821efda81d567301a34a3d51b999 Mon Sep 17 00:00:00 2001
From: Maximilian Bosch <maximilian@mbosch.me>
Date: Sat, 22 Mar 2025 19:13:48 +0100
Subject: [PATCH 6/6] nixos/test-driver: improve wording on comments about new
 error handling

Co-authored-by: Benoit de Chezelles <bew@users.noreply.github.com>
---
 nixos/lib/test-driver/src/test_driver/driver.py | 2 ++
 nixos/lib/test-driver/src/test_driver/errors.py | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/nixos/lib/test-driver/src/test_driver/driver.py b/nixos/lib/test-driver/src/test_driver/driver.py
index 56f29ffe24f1..49b6692bf422 100644
--- a/nixos/lib/test-driver/src/test_driver/driver.py
+++ b/nixos/lib/test-driver/src/test_driver/driver.py
@@ -188,6 +188,8 @@ class Driver:
                 sys.exit(1)
             except RequestedAssertionFailed:
                 exc_type, exc, tb = sys.exc_info()
+                # We manually print the stack frames, keeping only the ones from the test script
+                # (note: because the script is not a real file, the frame filename is `<string>`)
                 filtered = [
                     frame
                     for frame in traceback.extract_tb(tb)
diff --git a/nixos/lib/test-driver/src/test_driver/errors.py b/nixos/lib/test-driver/src/test_driver/errors.py
index 449fef0d30c7..fe072b5185c9 100644
--- a/nixos/lib/test-driver/src/test_driver/errors.py
+++ b/nixos/lib/test-driver/src/test_driver/errors.py
@@ -15,6 +15,6 @@ class RequestedAssertionFailed(AssertionError):
     e.g. a failing `t.assertEqual(...)` or `machine.succeed(...)`.
 
     This gets special treatment in error reporting: i.e. it gets
-    `!!!` as prefix just as `MachineError`, but all stack frames that are
-    not from `testScript` also get removed.
+    `!!!` as prefix just as `MachineError`, but only stack frames coming
+    from `testScript` will show up in logs.
     """