第六次提交：修复方法签名不一致问题；选择mysql作为缓存时，按 redis_key分表，减少跨业务串扰。

Boris-code · gyj126 · Apr 7, 2026 · Apr 7, 2026 · Apr 7, 2026 · Apr 7, 2026
commit b1db0c9bf0b0ece98f3850d20f5e045bb4a76df0
diff --git a/docs/usage/FileSpider.md b/docs/usage/FileSpider.md
@@ -163,6 +163,46 @@ class MyFileSpider(feapder.FileSpider):
 
 ## 4. 使用示例
 
+### 启动方式（单进程 / master-worker 分离）
+
+FileSpider 支持两种启动方式：
+
+1. 单进程：`spider.start()`，适合本地调试
+2. 分离运行：master 仅负责派发任务，worker 仅负责下载处理，适合生产部署
+
+```python
+from feapder import ArgumentParser
+
+if __name__ == "__main__":
+    spider = MyFileSpider(
+        redis_key="my_file_spider",
+        task_table="file_task",
+        task_keys=["id", "file_urls"],
+    )
+
+    parser = ArgumentParser(description="MyFileSpider 文件下载爬虫")
+    parser.add_argument(
+        "--start_master",
+        action="store_true",
+        help="添加任务",
+        function=spider.start_monitor_task,
+    )
+    parser.add_argument(
+        "--start_worker",
+        action="store_true",
+        help="启动爬虫",
+        function=spider.start,
+    )
+    parser.start()
+```
+
+命令行启动：
+
+```bash
+uv run my_file_spider.py --start_master
+uv run my_file_spider.py --start_worker
+```
+
 ### 场景一：保存到本地磁盘
 
 最简单的用法，下载文件保存到本地：
@@ -330,7 +370,7 @@ FileSpider 提供两级去重：
 |------|--------|------|----------|
 | 不去重 | `None`（默认） | - | 每次都重新下载 |
 | Redis 去重 | `"redis"` | Redis Hash | 分布式共享，多进程安全 |
-| MySQL 去重 | `"mysql"` | MySQL 表（自动建表） | 持久化，长期缓存 |
+| MySQL 去重 | `"mysql"` | MySQL 表（按 `redis_key` 自动分表） | 持久化，隔离不同业务 |
 | 自定义去重 | `FileDedup` 实例 | 用户自定义 | 特殊需求 |
 
 ### 自定义去重

diff --git a/feapder/core/spiders/file_spider.py b/feapder/core/spiders/file_spider.py
@@ -8,6 +8,7 @@
 
 import hashlib
 import os
+import re
 import warnings
 from urllib.parse import urlparse, unquote
 
@@ -125,7 +126,9 @@ def __init__(
         elif file_dedup == "mysql":
             if file_dedup_expire is not None:
                 log.warning("file_dedup_expire仅在file_dedup='redis'时生效")
-            self._file_dedup = MysqlFileDedup()
+            redis_namespace = re.sub(r"[^0-9a-zA-Z_]+", "_", self._redis_key).strip("_")
+            dedup_table = f"file_dedup_{redis_namespace}" if redis_namespace else "file_dedup_default"
+            self._file_dedup = MysqlFileDedup(table=dedup_table)
         elif isinstance(file_dedup, FileDedup):
             self._file_dedup = file_dedup
         elif file_dedup is not None:

diff --git a/feapder/templates/file_spider_template.tmpl b/feapder/templates/file_spider_template.tmpl
@@ -62,10 +62,9 @@ if __name__ == "__main__":
 
     parser.start()
 
-    # 直接启动
-    # spider.start()  # 启动爬虫
-    # spider.start_monitor_task() # 添加任务
+    # 启动方式一：单进程（调试方便）
+    # spider.start()
 
-    # 通过命令行启动
-    # python ${file_name} --start_master  # 添加任务
-    # python ${file_name} --start_worker  # 启动爬虫
+    # 启动方式二：分离 master/worker（生产推荐）
+    # uv run ${file_name} --start_master  # 仅负责派发任务
+    # uv run ${file_name} --start_worker  # 仅负责消费下载
diff --git a/tests/file-spider/test_oss_result_spider.py b/tests/file-spider/test_oss_result_spider.py
@@ -15,6 +15,7 @@
 from urllib.parse import urlparse, unquote
 
 import feapder
+from feapder import ArgumentParser
 from feapder.network.item import Item
 from feapder.utils.log import log
 
@@ -65,10 +66,13 @@ def process_file(self, task_id, url, file_path, response):
         # self.oss_client.put_object(file_path, response.content)
         return f"https://my-bucket.oss.aliyuncs.com/{file_path}"
 
-    def on_task_all_done(self, task, result, success_count, fail_count, total_count):
+    def on_task_all_done(self, task, result, success_count, fail_count, skipped_count, dup_count, total_count):
         # result 与 get_download_urls 返回的列表严格位置对应
         # 例: ["https://oss.com/a.jpg", "https://oss.com/b.jpg", None, "https://oss.com/d.jpg"]
-        log.info(f"任务{task.id} 完成 成功={success_count} 失败={fail_count}")
+        log.info(
+            f"任务{task.id} 完成 成功={success_count} 失败={fail_count} "
+            f"跳过={skipped_count} 去重={dup_count}"
+        )
 
         # 组装结果 Item 写入结果表
         item = FileResultItem()
@@ -89,4 +93,18 @@ def on_task_all_done(self, task, result, success_count, fail_count, total_count)
         task_table="file_task",
         task_keys=["id", "file_urls"],
     )
-    spider.start_monitor_task()
+
+    parser = ArgumentParser(description="OssResultSpider 文件下载爬虫")
+    parser.add_argument(
+        "--start_master",
+        action="store_true",
+        help="添加任务",
+        function=spider.start_monitor_task,
+    )
+    parser.add_argument(
+        "--start_worker",
+        action="store_true",
+        help="启动爬虫",
+        function=spider.start,
+    )
+    parser.start()