ai客服
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

92 lines
3.2 KiB

1 month ago
  1. import os
  2. import requests
  3. import re
  4. from db_manager import DBManager
  5. def sanitize_filename(filename):
  6. """
  7. """
  8. # 替换 Windows 和 Mac 系统中不允许作为文件名的字符
  9. filename = re.sub(r'[\\/*?:"<>|]', "", filename)
  10. # 替换换行符等不可见字符
  11. filename = re.sub(r'[\r\n\t]', " ", filename)
  12. # 去除首尾空格
  13. filename = filename.strip()
  14. # 限制长度 (Mac 允许最多 255 字节),考虑到可能添加的后缀,限制为 200 个字符
  15. return filename[:200]
  16. def download_images(output_dir="product_images"):
  17. """
  18. """
  19. if not os.path.exists(output_dir):
  20. os.makedirs(output_dir)
  21. print(f"已创建输出目录: {output_dir}")
  22. db = DBManager()
  23. conn = db.get_connection()
  24. if not conn:
  25. print("无法连接到数据库。")
  26. return
  27. try:
  28. with conn.cursor() as cursor:
  29. # 查询所有有图片 URL 的商品
  30. sql = "SELECT id, name, img_url FROM douyin_products WHERE img_url IS NOT NULL AND img_url != ''"
  31. cursor.execute(sql)
  32. products = cursor.fetchall()
  33. if not products:
  34. print("数据库中没有找到包含图片链接的商品。")
  35. return
  36. print(f"共发现 {len(products)} 个带有图片链接的商品,开始下载...")
  37. success_count = 0
  38. fail_count = 0
  39. for product in products:
  40. name = product['name']
  41. img_url = product['img_url']
  42. # 清理文件名
  43. safe_name = sanitize_filename(name)
  44. if not safe_name:
  45. safe_name = f"product_{product['id']}" # 兜底命名
  46. # 确定文件后缀 (简单判断,如果没有扩展名默认用 .jpg)
  47. ext = ".jpg"
  48. if img_url.lower().endswith(('.png', '.jpeg', '.gif', '.webp')):
  49. ext = os.path.splitext(img_url)[1]
  50. filepath = os.path.join(output_dir, f"{safe_name}{ext}")
  51. # 如果文件已存在则跳过 (可选)
  52. if os.path.exists(filepath):
  53. print(f" [跳过] 已存在: {safe_name}{ext}")
  54. continue
  55. try:
  56. # print(f" -> 正在下载: {safe_name}")
  57. response = requests.get(img_url, stream=True, timeout=10)
  58. response.raise_for_status()
  59. with open(filepath, 'wb') as f:
  60. for chunk in response.iter_content(chunk_size=8192):
  61. f.write(chunk)
  62. success_count += 1
  63. print(f" [成功] 保存为: {safe_name}{ext}")
  64. except Exception as e:
  65. fail_count += 1
  66. print(f" [失败] 无法下载图片 ({img_url}): {e}")
  67. print(f"\n下载完成!成功: {success_count} 张,失败: {fail_count} 张。图片保存在 '{output_dir}' 目录中。")
  68. except Exception as e:
  69. print(f"查询数据库或下载过程中发生错误: {e}")
  70. if __name__ == "__main__":
  71. download_images()