import urllib.request as urllib2 import threading import random import urllib.parse import json import time import datetime
//设置自己的cookie cookie=''
//分带post内容与不带参数版本
def fetch(url,refer): headers = {'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Host': 'www.miui.com', 'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2', 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36', 'Connection': 'keep-alive', 'Accept-Encoding': 'gzip,deflate', 'Cookie': cookie, 'Referer': refer, 'Pragma': 'no-cache', 'Upgrade-Insecure-Requests': '1', 'Content-Type': 'application/x-www-form-urlencoded', 'Cache-Control':'no-cache' } req = urllib2.Request(url, None, headers) response = urllib2.urlopen(req) page_source = response.read() return page_source def fetch2(url,refer,post): headers = {'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Host': 'www.miui.com', 'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2', 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36', 'Connection': 'keep-alive', 'Accept-Encoding': 'gzip,deflate', 'Cookie': cookie, 'Referer': refer, 'Pragma': 'no-cache', 'Upgrade-Insecure-Requests': '1', 'Content-Type': 'application/x-www-form-urlencoded', 'Cache-Control':'no-cache' } data = urllib.parse.urlencode(post) req = urllib2.Request(url, data.encode("utf-8"),headers) response = urllib2.urlopen(req) page_source = response.read() return page_source
def mainfunc(): //在外部引用re会报错,函数内部无法使用re.complie 不知道为啥 import re //在gid=14的板块下随机获取一个页面 url='http://www.miui.com/forum.php?gid=14&page='+str(random.randint(1,24)) html = fetch(url,'http://www.miui.com/index.html') res=html.decode('utf-8') //获取其中所有的帖子标题 pattern = re.compile(r'<a href="(.*?.html)" onclick=.*?>') threadlist = pattern.findall(res) //筛选,把不是帖子的和不是第一页的去掉,其实没啥用 for i in threadlist: if i.find('thread-')=='-1': threadlist.remove(i) if i.find('-1-1.html')=='-1': threadlist.remove(i) //list去重 threadlist = list(set(threadlist)) //从list中随机取一个进行恢复 refer= 'http://www.miui.com/'+threadlist[random.randint(0,len(threadlist)-1)] //获取帖子内容 html = fetch(refer,'http://www.miui.com/index.html') res=html.decode('utf-8') //找到帖子的回复网址 pattern = re.compile(r'action=\"(.*?)\"') threadpage = pattern.findall(res) reply = threadpage[1]; //reply中把所有的&转义成了&,手动拼回来 replylist=reply.split("&") reply="" for i in replylist: reply=reply+i+"&" //构造请求网址 url='http://www.miui.com/'+reply+'inajax=1' //获取用户的表单hash验证值 pattern = re.compile(r'<input type="hidden" name="formhash" value="(.*?)" \/>') temp = pattern.findall(res) hash=temp[0] hash=hash+":"+hash[::-1] //获取帖子主题 pattern = re.compile(r'<meta name="description" content="(.*?)"\/>') temp = pattern.findall(res) content=temp[0] content=urllib.parse.quote(content) //调用图灵机器人APi获取回复内容 tulingurl='http://www.tuling123.com/openapi/api?key=0344bd2d86f33bda5654b941f757137a&info='+content message=fetch(tulingurl,'') message=message.decode('UTF-8','strict') message=json.loads(message) messge=message["text"] //生成UNIX时间戳 dtime = datetime.datetime.now() ans_time = time.mktime(dtime.timetuple()) //构造回帖附带的参数 post={'message': messge, 'posttime': int(ans_time), 'formhash': hash, 'usesig': '1','subject': ''} //发送回帖请求 re=fetch2(url,refer,post); print(re.decode('UTF-8','strict')) //设置定时器,35秒执行一次,小米论坛每小时最多回复100贴,建议间隔设置不要少于35秒 global timer timer = threading.Timer(35, mainfunc) timer.start()
//调用主函数 mainfunc() |
文章评论