|
|
- import os
- import requests
- import re
- from db_manager import DBManager
-
- def sanitize_filename(filename):
- """
- 清理文件名,移除非法字符,并限制长度
- """
- # 替换 Windows 和 Mac 系统中不允许作为文件名的字符
- filename = re.sub(r'[\\/*?:"<>|]', "", filename)
- # 替换换行符等不可见字符
- filename = re.sub(r'[\r\n\t]', " ", filename)
- # 去除首尾空格
- filename = filename.strip()
- # 限制长度 (Mac 允许最多 255 字节),考虑到可能添加的后缀,限制为 200 个字符
- return filename[:200]
-
- def download_images(output_dir="product_images"):
- """
- 从数据库查询有图片链接的商品,并将图片下载到本地
- """
- if not os.path.exists(output_dir):
- os.makedirs(output_dir)
- print(f"已创建输出目录: {output_dir}")
-
- db = DBManager()
- conn = db.get_connection()
- if not conn:
- print("无法连接到数据库。")
- return
-
- try:
- with conn.cursor() as cursor:
- # 查询所有有图片 URL 的商品
- sql = "SELECT id, name, img_url FROM douyin_products WHERE img_url IS NOT NULL AND img_url != ''"
- cursor.execute(sql)
- products = cursor.fetchall()
-
- if not products:
- print("数据库中没有找到包含图片链接的商品。")
- return
-
- print(f"共发现 {len(products)} 个带有图片链接的商品,开始下载...")
-
- success_count = 0
- fail_count = 0
-
- for product in products:
- name = product['name']
- img_url = product['img_url']
-
- # 清理文件名
- safe_name = sanitize_filename(name)
- if not safe_name:
- safe_name = f"product_{product['id']}" # 兜底命名
-
- # 确定文件后缀 (简单判断,如果没有扩展名默认用 .jpg)
- ext = ".jpg"
- if img_url.lower().endswith(('.png', '.jpeg', '.gif', '.webp')):
- ext = os.path.splitext(img_url)[1]
-
- filepath = os.path.join(output_dir, f"{safe_name}{ext}")
-
- # 如果文件已存在则跳过 (可选)
- if os.path.exists(filepath):
- print(f" [跳过] 已存在: {safe_name}{ext}")
- continue
-
- try:
- # print(f" -> 正在下载: {safe_name}")
- response = requests.get(img_url, stream=True, timeout=10)
- response.raise_for_status()
-
- with open(filepath, 'wb') as f:
- for chunk in response.iter_content(chunk_size=8192):
- f.write(chunk)
-
- success_count += 1
- print(f" [成功] 保存为: {safe_name}{ext}")
-
- except Exception as e:
- fail_count += 1
- print(f" [失败] 无法下载图片 ({img_url}): {e}")
-
- print(f"\n下载完成!成功: {success_count} 张,失败: {fail_count} 张。图片保存在 '{output_dir}' 目录中。")
-
- except Exception as e:
- print(f"查询数据库或下载过程中发生错误: {e}")
-
- if __name__ == "__main__":
- download_images()
|