分享一个简单的rss阅读工具

互联网 17-5-3
#!usr/bin/env python  # -*- coding:UTF-8 -*-     import re  from lxml import etree  from bs4 import BeautifulSoup as sp  import requests  import urllib2  import StringIO     import sys  reload(sys)  sys.setdefaultencoding("utf-8")     headers={'User-Agent' : 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'}     def urlread(url):      try:          req=requests.get(url,headers=headers)          req.encoding="utf-8"          return req.text.encode("utf-8")      except:          req=urllib2.Request(url,headers=headers)          response=urllib2.urlopen(req)          return response.read().encode("utf-8")                 class Item:      def __init__(self,title,link,date,description):          self.title=title.strip()          self.link=link.strip()          self.pubDate=date.strip()          self.decription=self.filter(description).strip()                 def filter(self,description):          description=re.sub("<.*?>",'',description)          description=re.sub("\r",'',description)          description=re.sub("\n",'',description)          description=re.sub("&nbsp;"," ",description)          if len(description)>240:              description=description[:240]+'...'          return description                def __str__(self):          return "%s\n%s\n%s\n<%s>\n" % (                  self.title,                  self.link,                  self.decription,                  self.pubDate                  )                 __repr__=__str__         class BSParser(object):      #url=''      def __init__(self,url):          xml=urlread(url)          self.reset(xml)                 def reset(self,xml=None):          if xml==None:              self.soup=sp("<xml> </xml>")          else:              self.soup=sp(xml,"xml")         def callback(self,method,obj,tags):          rst=None          attr=method.lower()             for tag in tags:              try:                  rst=getattr(obj,attr)(tag)              except:                  continue              if rst:                  break          return rst         def getfields(self,tags=["item",'entry']):          return self.callback(method="FIND_ALL",                              obj=self.soup,                              tags=tags)                     def gettitle(self,obj,tags=["title"]):          return self.callback("FIND",obj,tags).text                 def getlink(self,obj,tags=["link"]):               rst=self.callback("FIND",obj,tags).text          if not rst:                        rst=self.callback("FIND",obj,tags).get("href")          return rst                 def getdate(self,obj,tags=["pubDate","published"]):          return self.callback("FIND",obj,tags).text                 def getdescription(self,obj,tags=["description","content"]):                 return self.callback("FIND",obj,tags).text             def run(self):          for item in self.getfields():              title=self.gettitle(item)              link=self.getlink(item)              date=self.getdate(item)              description=self.getdescription(item)              newsitem=Item(title,link,date,description)              yield newsitem     def test():      parser=Parser()      for item in parser.run():          print item             if __name__=="__main__":      test()

以上就是分享一个简单的rss阅读工具的详细内容,更多内容请关注技术你好其它相关文章!

来源链接:
免责声明:
1.资讯内容不构成投资建议,投资者应独立决策并自行承担风险
2.本文版权归属原作所有,仅代表作者本人观点,不代表本站的观点或立场
上一篇:php获取远程图片并下载保存到本地的方法分析 下一篇:使用FeedTools解析RSS代码示例

相关资讯