Skip to content
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
security: add path sanitization to all download_* MCP tools
Addresses critical CWE-22 path traversal vulnerability identified by
Gemini Code Assist in PR review. All 8 download_* functions now pass
output_path through _sanitize_output_path() which:

- Rejects '..' traversal sequences
- Strips absolute paths (drive letters, leading slashes)
- Confines writes to ALLOWED_DOWNLOAD_DIR (default: ./downloads/)
- Blocks writes to sensitive directories (.ssh, .gnupg, System32, etc.)
- Configurable via NOTEBOOKLM_DOWNLOAD_DIR env var
  • Loading branch information
sonar5JR committed Mar 6, 2026
commit 127c76d67f98365d687f29b6f0391020bda8ee5d
100 changes: 92 additions & 8 deletions mcp_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,10 @@
import asyncio
import json
import logging
import os
import sys
from contextlib import asynccontextmanager
from pathlib import Path
from typing import Any

from mcp.server.fastmcp import FastMCP
Expand All @@ -23,6 +25,80 @@
# Global client reference
_client: NotebookLMClient | None = None

# ─── Safe download directory ─────────────────────────────────────────────────
# All artifact downloads are confined to this directory to prevent
# path-traversal attacks via prompt injection. Override with the
# NOTEBOOKLM_DOWNLOAD_DIR environment variable.
ALLOWED_DOWNLOAD_DIR = Path(
os.environ.get("NOTEBOOKLM_DOWNLOAD_DIR", "./downloads")
).resolve()

# Sensitive directory patterns that must NEVER be written to
_SENSITIVE_DIRS = {
".ssh", ".gnupg", ".config", ".aws", ".azure", ".kube",
"AppData", "System32", "SysWOW64", "Program Files",
"Program Files (x86)", "Windows",
}


def _sanitize_output_path(output_path: str) -> Path:
"""Sanitize an output_path to prevent path-traversal attacks.

Security guarantees:
1. Rejects paths containing '..' traversal sequences
2. Blocks writes to sensitive system directories
3. Confines all writes to ALLOWED_DOWNLOAD_DIR
4. Auto-creates the target directory if needed

Raises:
ValueError: If the path is unsafe or escapes the allowed directory.
"""
# Step 1: Reject any raw traversal sequences before resolution
if ".." in output_path:
raise ValueError(
f"Path traversal detected: output_path must not contain '..'. "
f"Got: {output_path!r}"
)

# Step 2: Strip leading slashes / drive letters to force relative interpretation
# e.g. "C:\\Windows\\evil.exe" -> "Windows\\evil.exe"
# e.g. "/etc/passwd" -> "etc/passwd"
cleaned = output_path
# Remove Windows drive prefix like C:\ or D:\
if len(cleaned) >= 2 and cleaned[1] == ":":
cleaned = cleaned[2:]
# Strip leading slashes/backslashes
cleaned = cleaned.lstrip("/").lstrip("\\")

if not cleaned:
raise ValueError("output_path must not be empty after sanitization.")

# Step 3: Resolve within the allowed directory
safe_path = (ALLOWED_DOWNLOAD_DIR / cleaned).resolve()

# Step 4: Verify the resolved path is still within ALLOWED_DOWNLOAD_DIR
try:
safe_path.relative_to(ALLOWED_DOWNLOAD_DIR)
except ValueError:
raise ValueError(
f"Path escapes the allowed download directory. "
f"Resolved: {safe_path}, Allowed: {ALLOWED_DOWNLOAD_DIR}"
)

# Step 5: Check against sensitive directory names
for part in safe_path.parts:
if part in _SENSITIVE_DIRS:
raise ValueError(
f"Refusing to write to sensitive directory: {part!r} "
f"in path {safe_path}"
)

# Step 6: Ensure target directory exists
safe_path.parent.mkdir(parents=True, exist_ok=True)

logger.info("Sanitized output path: %s -> %s", output_path, safe_path)
return safe_path


async def get_client() -> NotebookLMClient:
"""Get or create the NotebookLM client singleton."""
Expand Down Expand Up @@ -813,8 +889,9 @@ async def download_audio(

Returns the output file path.
"""
safe_path = _sanitize_output_path(output_path)
client = await get_client()
path = await client.artifacts.download_audio(notebook_id, output_path, artifact_id=artifact_id)
path = await client.artifacts.download_audio(notebook_id, str(safe_path), artifact_id=artifact_id)
return json.dumps({"downloaded": str(path)})

Comment on lines +880 to +896

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

security-high high

The download_audio tool accepts an output_path argument without any sanitization. This allows an attacker to perform a path traversal attack via prompt injection, potentially overwriting sensitive files on the user's machine (e.g., ~/.bashrc, ~/.ssh/authorized_keys). Please sanitize the output_path to ensure it remains within a safe directory and does not contain directory traversal sequences like ...

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed in 127c76d — added

_sanitize_output_path()
to all 8 download tools. It rejects .. traversal, strips absolute paths/drive letters, confines writes to a configurable ALLOWED_DOWNLOAD_DIR (default ./downloads/), and blocks sensitive directories (.ssh, .gnupg, System32, etc.). Thanks for catching this — it was a critical oversight.


Expand All @@ -831,8 +908,9 @@ async def download_video(

Returns the output file path.
"""
safe_path = _sanitize_output_path(output_path)
client = await get_client()
path = await client.artifacts.download_video(notebook_id, output_path, artifact_id=artifact_id)
path = await client.artifacts.download_video(notebook_id, str(safe_path), artifact_id=artifact_id)
return json.dumps({"downloaded": str(path)})

Comment on lines +899 to +915

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

security-high high

The download_video tool accepts an output_path argument without any sanitization. This allows an attacker to perform a path traversal attack via prompt injection, potentially overwriting sensitive files on the user's machine. Please sanitize the output_path to ensure it remains within a safe directory.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Addressed together with all download tools in 127c76d.


Expand All @@ -853,9 +931,10 @@ async def download_quiz(

Returns the output file path.
"""
safe_path = _sanitize_output_path(output_path)
client = await get_client()
path = await client.artifacts.download_quiz(
notebook_id, output_path, output_format=output_format, artifact_id=artifact_id
notebook_id, str(safe_path), output_format=output_format, artifact_id=artifact_id
)
return json.dumps({"downloaded": str(path)})

Expand All @@ -877,9 +956,10 @@ async def download_flashcards(

Returns the output file path.
"""
safe_path = _sanitize_output_path(output_path)
client = await get_client()
path = await client.artifacts.download_flashcards(
notebook_id, output_path, output_format=output_format, artifact_id=artifact_id
notebook_id, str(safe_path), output_format=output_format, artifact_id=artifact_id
)
return json.dumps({"downloaded": str(path)})

Expand All @@ -897,9 +977,10 @@ async def download_slide_deck(

Returns the output file path.
"""
safe_path = _sanitize_output_path(output_path)
client = await get_client()
path = await client.artifacts.download_slide_deck(
notebook_id, output_path, artifact_id=artifact_id
notebook_id, str(safe_path), artifact_id=artifact_id
)
return json.dumps({"downloaded": str(path)})

Expand All @@ -917,9 +998,10 @@ async def download_infographic(

Returns the output file path.
"""
safe_path = _sanitize_output_path(output_path)
client = await get_client()
path = await client.artifacts.download_infographic(
notebook_id, output_path, artifact_id=artifact_id
notebook_id, str(safe_path), artifact_id=artifact_id
)
return json.dumps({"downloaded": str(path)})

Expand All @@ -936,8 +1018,9 @@ async def download_mind_map(

Returns the output file path.
"""
safe_path = _sanitize_output_path(output_path)
client = await get_client()
path = await client.artifacts.download_mind_map(notebook_id, output_path)
path = await client.artifacts.download_mind_map(notebook_id, str(safe_path))
return json.dumps({"downloaded": str(path)})


Comment on lines +1010 to +1026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

security-high high

The download_mind_map tool accepts an output_path argument without any sanitization. This allows an attacker to perform a path traversal attack via prompt injection, potentially overwriting sensitive files on the user's machine. Please sanitize the output_path to ensure it remains within a safe directory.

Expand All @@ -954,9 +1037,10 @@ async def download_data_table(

Returns the output file path.
"""
safe_path = _sanitize_output_path(output_path)
client = await get_client()
path = await client.artifacts.download_data_table(
notebook_id, output_path, artifact_id=artifact_id
notebook_id, str(safe_path), artifact_id=artifact_id
)
return json.dumps({"downloaded": str(path)})

Comment on lines +1028 to +1046

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

security-high high

The download_data_table tool accepts an output_path argument without any sanitization. This allows an attacker to perform a path traversal attack via prompt injection, potentially overwriting sensitive files on the user's machine. Please sanitize the output_path to ensure it remains within a safe directory.

Expand Down
Loading