From c0ab4e936cb316bb0ff75598e0ee8f231bdae6c8 Mon Sep 17 00:00:00 2001 From: Marcelo Trylesinski Date: Thu, 25 Jun 2026 17:00:52 +0200 Subject: [PATCH 1/2] Stop flagging snake_case is_error results as tool errors in OTel span The tools/call result match arm treated {"is_error": True} as a tool error, but serialize_server_result validates with by_name=False (alias-only), so the snake_case key is dropped and the client receives a success. The span then contradicted the wire response. Only CallToolResult and the camelCase {"isError": True} dict survive serialization as errors; drop the snake_case arm and assert the result stays a success. --- src/mcp/server/_otel.py | 3 ++- tests/server/test_otel.py | 11 +++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/mcp/server/_otel.py b/src/mcp/server/_otel.py index ac4863205..28dd842a6 100644 --- a/src/mcp/server/_otel.py +++ b/src/mcp/server/_otel.py @@ -59,8 +59,9 @@ async def __call__(self, ctx: ServerRequestContext[Any, Any], call_next: CallNex span.set_status(StatusCode.ERROR, str(e)) raise if ctx.method == "tools/call": + # Only shapes that survive wire serialization (alias-only) are real tool errors. match result: - case CallToolResult(is_error=True) | {"isError": True} | {"is_error": True}: + case CallToolResult(is_error=True) | {"isError": True}: span.set_attribute("error.type", "tool_error") span.set_status(StatusCode.ERROR) case _: diff --git a/tests/server/test_otel.py b/tests/server/test_otel.py index d4505c25f..1da8567b3 100644 --- a/tests/server/test_otel.py +++ b/tests/server/test_otel.py @@ -92,7 +92,9 @@ async def err_tool(ctx: Ctx, params: CallToolRequestParams) -> CallToolResult: @pytest.mark.anyio -async def test_tool_error_snake_case_dict_result_sets_error_type(server: SrvT, spans: SpanCapture): +async def test_snake_case_dict_result_is_not_a_tool_error(server: SrvT, spans: SpanCapture): + # `is_error` is alias-only on the wire, so serialization drops it; the result reaches the + # client as a success and the span must not contradict that. async def err_tool(ctx: Ctx, params: CallToolRequestParams) -> dict[str, Any]: return {"content": [], "is_error": True} @@ -100,11 +102,12 @@ async def err_tool(ctx: Ctx, params: CallToolRequestParams) -> dict[str, Any]: server.middleware.append(OpenTelemetryMiddleware()) async with connected_runner(server) as (client, _): spans.clear() - await client.send_raw_request("tools/call", {"name": "mytool", "arguments": {}}) + result = await client.send_raw_request("tools/call", {"name": "mytool", "arguments": {}}) + assert result == {"content": []} [span] = [s for s in spans.finished() if s.kind == SpanKind.SERVER] assert span.attributes is not None - assert span.attributes["error.type"] == "tool_error" - assert span.status.status_code == StatusCode.ERROR + assert "error.type" not in span.attributes + assert span.status.status_code == StatusCode.UNSET @pytest.mark.anyio From 3eda836f872d0abcbeae536d0a16282f788e360c Mon Sep 17 00:00:00 2001 From: Marcelo Trylesinski Date: Thu, 25 Jun 2026 17:21:41 +0200 Subject: [PATCH 2/2] Document literal-bool limitation of tools/call error detection Expand the comment to note raw-dict isError is matched as a literal bool; non-bool coercible values that serialize to an error are left undetected, since no hand predicate matches pydantic's wire coercion. --- src/mcp/server/_otel.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/mcp/server/_otel.py b/src/mcp/server/_otel.py index 28dd842a6..37e9d2855 100644 --- a/src/mcp/server/_otel.py +++ b/src/mcp/server/_otel.py @@ -59,7 +59,10 @@ async def __call__(self, ctx: ServerRequestContext[Any, Any], call_next: CallNex span.set_status(StatusCode.ERROR, str(e)) raise if ctx.method == "tools/call": - # Only shapes that survive wire serialization (alias-only) are real tool errors. + # Tool errors are detected pre-serialization, so only shapes that reach the wire as an error + # count: the model, or the camelCase alias (`is_error` is dropped by the alias-only wire + # validation). A raw-dict `isError` is matched as a literal bool only - non-bool coercible + # values (1, "true") would serialize to an error but are rare enough to leave undetected. match result: case CallToolResult(is_error=True) | {"isError": True}: span.set_attribute("error.type", "tool_error")