• 104阅读
  • 1回复

[应用软件]知乎回答下载工具 图片/视频/gif动图均可附Python源码 [复制链接]

上一主题 下一主题
离线清风入梦

 
发帖
1412

只看楼主 道具中心使用道具 楼主  发表于: 07-12
对于经常换壁纸的wo来说还是蛮吸引的,而且里面的壁纸(小姐姐)都挺好看的,于是就想起了保存下来的想法 =36fS/Gb  
但是一看回答数竟有几千个,这要是全都看完得花多少时间精力啊,显然咱不是这种闲人,懒人自有懒办法,既然是壁纸类问题,那么我们能不能就只看图片不看文字呢? fm@Pa} ,  
Y9|!= T%  
  1. import requests
  2. import os
  3. import time
  4. from threading import Thread
  5. import threadpool
  6. import traceback
  7. import tkinter
  8. from tkinter import *
  9. import threading
  10. from tkinter import messagebox
  11. import win32clipboard as wc
  12. from tkinter import filedialog
  13. def async(f):
  14.     def wrapper(*args, **kwargs):
  15.         thr = Thread(target=f, args=args, kwargs=kwargs)
  16.         thr.start()
  17.     return wrapper
  18. class zhihu(object):
  19.     def __init__(self):
  20.         self.id = None
  21.         self.headers = {
  22.             'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
  23.         }
  24.         self.zh_id = 1
  25.         self.zx_id = 1
  26.         self.nm_id = 1
  27.         self.cz_id = 1
  28.         self.stop_num = 0
  29.         # 创建主窗口,用于容纳其它组件
  30.         self.root = tkinter.Tk()
  31.         # 给主窗口设置标题内容
  32.         self.root.title("知乎问题图片/视频下载器")
  33.         width = 440
  34.         height = 500
  35.         # 获取屏幕尺寸以计算布局参数,使窗口居屏幕中央
  36.         screenwidth = self.root.winfo_screenwidth()
  37.         screenheight = self.root.winfo_screenheight()
  38.         alignstr = '%dx%d+%d+%d' % (width, height, (screenwidth - width) / 2, (screenheight - height) / 2)
  39.         self.root.geometry(alignstr)
  40.         self.root.resizable(width=False, height=False)
  41.         self.var = StringVar()
  42.         self.label_tips = tkinter.Label(self.root, textvariable=self.var, justify=CENTER ,font=("微软雅黑", 12, "bold"), fg='red')
  43.         # self.var.set('正在下载,请稍等... (共433个回答)')
  44.         self.ok_var = StringVar()
  45.         self.label_ok = tkinter.Label(self.root, textvariable=self.ok_var, justify=CENTER ,font=("微软雅黑", 10, "bold"), fg='Firebrick')
  46.         # self.ok_var.set('已完成 13%')
  47.         self.input_var = StringVar()
  48.         self.label_wtid = tkinter.Label(self.root, text='请输入问题ID或问题链接:')
  49.         self.input_wtid = tkinter.Entry(self.root, textvariable=self.input_var, width=37)
  50.         self.button_wtzt = tkinter.Button(self.root, text="粘贴", width=6, command=self.getCopyTxet)
  51.         self.wt_file_var = StringVar()
  52.         self.wt_file_var.set('D:/ZhiHu')
  53.         self.label_wt_file = tkinter.Label(self.root, text='请选择保存目录:')
  54.         self.input_wt_file = tkinter.Entry(self.root, textvariable=self.wt_file_var, state=DISABLED, width=37)
  55.         self.button_wt_file = tkinter.Button(self.root, text="更改", width=6, command=self.thread_browse_folder)
  56.         self.button_wtjc = tkinter.Button(self.root, text="检测", width=8, command=self.thread_wenti_jc, font=("微软雅黑", 12, "bold"))
  57.         self.Monitor_button = tkinter.Button(self.root, text="下载", width=8, command=self.thread_get_offset,
  58.                                              font=("微软雅黑", 12, "bold"))
  59.         self.label_tips1 = tkinter.Label(self.root, text='使用说明:')
  60.         self.label_tips2 = tkinter.Label(self.root, text='1.下载前请先检测问题,以免下错资源')
  61.         self.label_tips3 = tkinter.Label(self.root, text='2.问题资源为实时下载,你可随时在下载文件夹查看')
  62.         # self.label_tips4 = tkinter.Label(self.root, text='3.更多好玩软件、资源欢迎')
  63.     def gui_arrang(self):
  64.         self.label_tips.place(x=52, y=28)
  65.         self.label_wtid.place(x=55, y=82)
  66.         self.input_wtid.place(x=55, y=114)
  67.         self.button_wtzt.place(x=329, y=110)
  68.         self.label_wt_file.place(x=55, y=146)
  69.         self.input_wt_file.place(x=55, y=178)
  70.         self.button_wt_file.place(x=329, y=174)
  71.         self.button_wtjc.place(x=60, y=220)
  72.         self.Monitor_button.place(x=164, y=220)
  73.         self.label_ok.place(x=300, y=233)
  74.         self.label_tips1.place(x=55, y=334)
  75.         self.label_tips2.place(x=55, y=360)
  76.         self.label_tips3.place(x=55, y=386)
  77.         # self.label_tips4.place(x=55, y=412)
  78.     # 获取粘贴板里的内容
  79.     def getCopyTxet(self):
  80.         try:
  81.             wc.OpenClipboard()
  82.             copytxet = wc.GetClipboardData()
  83.             wc.CloseClipboard()
  84.             self.input_var.set(str(copytxet))
  85.         except:
  86.             pass
  87.     def thread_wenti_jc(self):
  88.         t = threading.Thread(target=self.wenti_jc)
  89.         t.setDaemon(True)
  90.         t.start()
  91.     def thread_get_offset(self):
  92.         t = threading.Thread(target=self.get_offset)
  93.         t.setDaemon(True)
  94.         t.start()
  95.     def thread_browse_folder(self):
  96.         t = threading.Thread(target=self.browse_folder)
  97.         t.setDaemon(True)
  98.         t.start()
  99.     # 浏览本地文件夹,选择保存位置
  100.     def browse_folder(self):
  101.         # 浏览选择本地文件夹
  102.         save_address = filedialog.askdirectory()
  103.         if len(save_address) != 0:
  104.             self.wt_file_var.set(save_address)
  105.         # 把获得路径,插入保存地址输入框(即插入input_save_address输入框)
  106.         # input_save_address.insert(0, save_address)
  107.     def wenti_jc(self):
  108.         wt_data = self.input_wtid.get()
  109.         if len(wt_data) == 0:
  110.             tkinter.messagebox.showerror('错误提示', '请先输入问题ID或链接')
  111.         else:
  112.             try:
  113.                 if wt_data.isdigit():
  114.                     url = 'https://www.zhihu.com/api/v4/questions/{}/answers'.format(wt_data)
  115.                     r = requests.get(url, headers=self.headers)
  116.                     if r.status_code == 200:
  117.                         self.totals = int(r.json()['paging']['totals'])
  118.                         self.title = r.json()['data'][0]['question']['title']
  119.                         self.id = int(wt_data)
  120.                         tkinter.messagebox.showinfo('问题ID正确', '你本次要下载的问题为“%s”' % self.title)
  121.                     else:
  122.                         tkinter.messagebox.showerror('问题ID输入错误', '请检查你的问题ID并重新输入')
  123.                 else:
  124.                     wtids = re.findall('question/(.*?)/answer', wt_data)
  125.                     if wtids and len(wtids[0]) != 0:
  126.                         url = 'https://www.zhihu.com/api/v4/questions/{}/answers'.format(wtids[0])
  127.                         r = requests.get(url, headers=self.headers)
  128.                         if r.status_code == 200:
  129.                             self.totals = int(r.json()['paging']['totals'])
  130.                             self.title = r.json()['data'][0]['question']['title']
  131.                             self.id = int(wtids[0])
  132.                             tkinter.messagebox.showinfo('问题链接正确', '你本次要下载的问题为“%s”' % self.title)
  133.                         else:
  134.                             tkinter.messagebox.showerror('问题链接输入错误', '请检查你的问题链接并重新输入')
  135.                     else:
  136.                         if '/question/' in wt_data:
  137.                             wtids = wt_data.split('/question/')
  138.                             for wtid in wtids:
  139.                                 if wtid.isdigit():
  140.                                     url = 'https://www.zhihu.com/api/v4/questions/{}/answers'.format(wtid)
  141.                                     r = requests.get(url, headers=self.headers)
  142.                                     if r.status_code == 200:
  143.                                         self.totals = int(r.json()['paging']['totals'])
  144.                                         self.title = r.json()['data'][0]['question']['title']
  145.                                         self.id = int(wtid)
  146.                                         tkinter.messagebox.showinfo('问题链接正确', '你本次要下载的问题为“%s”' % self.title)
  147.                                     else:
  148.                                         tkinter.messagebox.showerror('问题链接输入错误', '请检查你的问题链接并重新输入')
  149.                         else:
  150.                             tkinter.messagebox.showerror('问题链接输入错误', '请检查你的问题链接并重新输入')
  151.             except:
  152.                 tkinter.messagebox.showerror('错误提示', '抱歉,出现未知错误,请稍后再试')
  153.     def get_offset(self):
  154.         if self.id == None:
  155.             tkinter.messagebox.showerror('错误提示', '请先检测问题ID或链接是否正确')
  156.         else:
  157.             self.var.set('正在下载,请稍等... (共{}个回答)'.format(self.totals))
  158.             self.Monitor_button.config(state=DISABLED)
  159.             path = self.input_wt_file.get()
  160.             self.file_path = '{}/{}'.format(path, self.title)
  161.             folder = os.path.exists(self.file_path)
  162.             if not folder:  # 判断是否存在文件夹如果不存在则创建为文件夹
  163.                 os.makedirs(self.file_path)  # makedirs 创建文件时如果路径不存在会创建这个路径
  164.             # else:
  165.             #     print('该问题内容已经有啦~')
  166.             #     sys.exit()
  167.             if self.totals % 20 == 0:
  168.                 self.max = int(self.totals / 20)
  169.             else:
  170.                 self.max = int(self.totals / 20) + 1
  171.             for m in range(self.max):
  172.                 offset = m * 20
  173.                 self.get_urls(offset)
  174.                 time.sleep(3)
  175.     [url=home.php?mod=space&uid=512266]@Async[/url]  # 开启异步线程执行 调用一次开启一个线程
  176.     def get_urls(self, offset):
  177.         try:
  178.             url = 'https://www.zhihu.com/api/v4/questions/{}/answers?include=data%5B*%5D.is_normal%2Cadmin_closed_comment%2Creward_info%2Cis_collapsed%2Cannotation_action%2Cannotation_detail%2Ccollapse_reason%2Cis_sticky%2Ccollapsed_by%2Csuggest_edit%2Ccomment_count%2Ccan_comment%2Ccontent%2Ceditable_content%2Cvoteup_count%2Creshipment_settings%2Ccomment_permission%2Ccreated_time%2Cupdated_time%2Creview_info%2Crelevant_info%2Cquestion%2Cexcerpt%2Crelationship.is_authorized%2Cis_author%2Cvoting%2Cis_thanked%2Cis_nothelp%2Cis_labeled%3Bdata%5B*%5D.mark_infos%5B*%5D.url%3Bdata%5B*%5D.author.follower_count%2Cbadge%5B*%5D.topics&offset={}&limit=20&sort_by=updated'.format(
  179.                 self.id, offset)
  180.             dict = {
  181.                 'include': 'data[*].is_normal,admin_closed_comment,reward_info,is_collapsed,annotation_action,annotation_detail,collapse_reason,is_sticky,collapsed_by,suggest_edit,comment_count,can_comment,content,editable_content,voteup_count,reshipment_settings,comment_permission,created_time,updated_time,review_info,relevant_info,question,excerpt,relationship.is_authorized,is_author,voting,is_thanked,is_nothelp,is_labeled;data[*].mark_infos[*].url;data[*].author.follower_count,badge[*].topics',
  182.                 'limit': 20,
  183.                 'offset': offset,
  184.                 'sort_by': 'updated'
  185.             }
  186.             r = requests.get(url, headers=self.headers, params=dict).json()
  187.             if self.stop_num == 0:
  188.                 self.ok_var.set('已完成 1%')
  189.             datas = r['data']
  190.             for data in datas:
  191.                 content = data['content']
  192.                 name = data['author']['name']
  193.                 # 防止同天有多个匿名用户/已注销用户作答时文件名相同而覆盖操作
  194.                 if '知乎用户' == name:
  195.                     name = '{}{}'.format(name, self.zh_id)
  196.                     self.zh_id += 1
  197.                 if '「已注销」' == name:
  198.                     name = '{}{}'.format(name, self.zx_id)
  199.                     self.zx_id += 1
  200.                 if '匿名用户' == name:
  201.                     name = '{}{}'.format(name, self.nm_id)
  202.                     self.nm_id += 1
  203.                 if '[已重置]' == name:
  204.                     name = '{}{}'.format(name, self.cz_id)
  205.                     self.cz_id += 1
  206.                 timeStamp = int(data['updated_time'])
  207.                 timeArray = time.localtime(timeStamp)
  208.                 otherStyleTime = time.strftime("%Y-%m-%d", timeArray)
  209.                 img_names = []
  210.                 video_names = []
  211.                 img_urls = re.findall('<noscript><img src="(.*?)"', content, re.S)
  212.                 video_urls = re.findall('"z-ico-video"></span>(.*?)</span>', content, re.S)
  213.                 if img_urls:
  214.                     for i in range(len(img_urls)):
  215.                         file_name = '{}({})_{}'.format(name, otherStyleTime, i + 1)
  216.                         img_names.append(file_name)
  217.                     if len(img_urls) == len(img_names):
  218.                         data = [((img_url, img_name), None) for (img_url, img_name) in
  219.                                 zip(img_urls, img_names)]  # (index,i)也可以写成[index,i]
  220.                         pool = threadpool.ThreadPool(20)
  221.                         results = threadpool.makeRequests(self.save_img, data)
  222.                         [pool.putRequest(req) for req in results]
  223.                         pool.wait()
  224.                 if video_urls:
  225.                     for i in range(len(video_urls)):
  226.                         file_name = '{}({})_video_{}'.format(name, otherStyleTime, i + 1)
  227.                         video_names.append(file_name)
  228.                     str_video_urls = str(video_urls)
  229.                     video_ids = re.findall(".*?/video/(.*?)'", str_video_urls, re.S)
  230.                     if len(video_ids) == len(video_names):
  231.                         data = [((video_id, video_name), None) for (video_id, video_name) in
  232.                                 zip(video_ids, video_names)]  # (index,i)也可以写成[index,i]
  233.                         pool = threadpool.ThreadPool(20)
  234.                         results = threadpool.makeRequests(self.save_video, data)
  235.                         [pool.putRequest(req) for req in results]
  236.                         pool.wait()
  237.             self.stop_num += 1
  238.             ok_num = round(self.stop_num / self.max, 2)
  239.             self.ok_var.set('已完成 {}%'.format(int(ok_num * 100)))
  240.             if self.max == self.stop_num:
  241.                 tkinter.messagebox.showinfo('下载完成', '你的问题资源已全部下载完毕')
  242.                 self.var.set('')
  243.                 self.Monitor_button.config(state=NORMAL)
  244.                 self.ok_var.set('')
  245.                 self.zh_id = 1
  246.                 self.zx_id = 1
  247.                 self.nm_id = 1
  248.                 self.cz_id = 1
  249.                 self.stop_num = 0
  250.         except:
  251.             pass
  252.     def save_img(self, img_url, img_name):
  253.         suffix = None
  254.         if '.jpg' in img_url:
  255.             suffix = '.jpg'
  256.         elif '.gif' in img_url:
  257.             suffix = '.gif'
  258.         try:
  259.             img = requests.get(img_url, headers=self.headers)
  260.             if img.status_code == 200:
  261.                 with open(self.file_path + '/' + img_name + suffix, "wb") as f:
  262.                     f.write(img.content)
  263.                 # time.sleep(0.5)
  264.             else:
  265.                 pass
  266.         except:
  267.             pass
  268.     def save_video(self, video_id, video_name):
  269.         try:
  270.             url = 'https://lens.zhihu.com/api/v4/videos/{}'.format(video_id)
  271.             video_url = requests.get(url, headers=self.headers).json()['playlist']['LD']['play_url']
  272.             video = requests.get(video_url, headers=self.headers)
  273.             if video.status_code == 200:
  274.                 with open(self.file_path + '/' + video_name + '.mp4', "wb") as f:
  275.                     f.write(video.content)
  276.                 # time.sleep(0.5)
  277.             else:
  278.                 pass
  279.         except:
  280.             print(traceback.format_exc())
  281. def main():
  282.     # 初始化对象
  283.     L = zhihu()
  284.     # 进行布局
  285.     L.gui_arrang()
  286.     # 主程序执行
  287.     tkinter.mainloop()
  288. if __name__ == '__main__':
  289.     main()
%8 D>aS U  
本部分内容设定了隐藏,需要回复后才能看到
离线爱小心娅

发帖
339

只看楼主 沙发  发表于: 09-11
实力VIP会员获取器
快速回复
限80 字节
批量上传需要先选择文件,再选择上传
 
上一个 下一个