Python模拟163登陆获取邮件列表

利用cookielib和urllib2模块模拟登陆163的例子有很多，近期看了《python模拟登陆163邮箱并获取通讯录》一文，受到启发，试着对收件箱、发件箱等进行了分析，并列出了所有邮件列表及状态，包括发件人、收件人、主题、发信时间、已读未读等状态。

让客户满意是我们工作的目标，不断超越客户的期望值来自于我们对这个行业的热爱。我们立志把好的技术通过有效、简单的方式提供给客户，将通过不懈努力成为客户在信息化领域值得信任、有价值的长期合作伙伴，公司提供的服务项目有：域名与空间、虚拟主机、营销软件、网站建设、桐城网站维护、网站推广。

1、参考代码：http://hi.baidu.com/fc_lamp/blog/item/2466d1096fcc532de8248839.html%EF%BB%BF

 
 
 
  
  
  #-*- coding:UTF-8 -*- 
  
  
  import urllib,urllib2,cookielib 
  
  
  import xml.etree.ElementTree as etree #xml解析类 
  
  
  
  
  
  class Login163: 
  
  
     #伪装browser 
  
  
      header = {'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'} 
  
  
      username = ''
  
  
      passwd = ''
  
  
      cookie = None #cookie对象 
  
  
      cookiefile = './cookies.dat' #cookie临时存放地 
  
  
      user = ''
  
  
       
  
  
      def __init__(self,username,passwd): 
  
  
          self.username = username 
  
  
          self.passwd = passwd 
  
  
          #cookie设置 
  
  
          self.cookie = cookielib.LWPCookieJar() #自定义cookie存放 
  
  
          opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cookie)) 
  
  
          urllib2.install_opener(opener) 
  
  
  
  
  
     #登陆     
  
  
      def login(self):        
  
  
  
  
  
          #请求参数设置 
  
  
          postdata = { 
  
  
              'username':self.username, 
  
  
              'password':self.passwd, 
  
  
              'type':1
  
  
              } 
  
  
          postdata = urllib.urlencode(postdata) 
  
  
  
  
  
          #发起请求 
  
  
          req = urllib2.Request( 
  
  
                  url='http://reg.163.com/logins.jsp?type=1&product=mail163&url=http://entry.mail.163.com/coremail/fcg/ntesdoor2?lightweight%3D1%26verifycookie%3D1%26language%3D-1%26style%3D1', 
  
  
                  data= postdata,#请求数据 
  
  
                  headers = self.header #请求头 
  
  
              ) 
  
  
  
  
  
          result = urllib2.urlopen(req).read() 
  
  
          result = str(result) 
  
  
          self.user = self.username.split('@')[0] 
  
  
  
  
  
          self.cookie.save(self.cookiefile)#保存cookie 
  
  
           
  
  
          if '登录成功，正在跳转...' in result: 
  
  
              #print("%s 你已成功登陆163邮箱。---------\n" %(user)) 
  
  
              flag = True
  
  
          else: 
  
  
              flag = '%s 登陆163邮箱失败。'%(self.user) 
  
  
              
  
  
          return flag 
  
  
  
  
  
     #获取通讯录 
  
  
      def address_list(self): 
  
  
  
  
  
          #获取认证sid 
  
  
          auth = urllib2.Request( 
  
  
                  url='http://entry.mail.163.com/coremail/fcg/ntesdoor2?username='+self.user+'&lightweight=1&verifycookie=1&language=-1&style=1', 
  
  
                  headers = self.header 
  
  
              ) 
  
  
          auth = urllib2.urlopen(auth).read() 
  
  
          for i,sid in enumerate(self.cookie):#enumerate()用于同时返数字索引与数值，实际上是一个元组:((0,test[0]),(1,test[1]).......)这有点像php里的foreach 语句的作用 
  
  
              sid = str(sid) 
  
  
              if 'sid' in sid: 
  
  
                  sid = sid.split()[1].split('=')[1] 
  
  
                  break
  
  
          self.cookie.save(self.cookiefile) 
  
  
           
  
  
          #请求地址 
  
  
          url = 'http://twebmail.mail.163.com/js4/s?sid='+sid+'&func=global:sequential&showAd=false&userType=browser&uid='+self.username 
  
  
          #参数设定(var 变量是必需要的,不然就只能看到:S_OK这类信息) 
  
  
          #这里参数也是在firebug下查看的。 
  
  
          postdata = { 
  
  
              'func':'global:sequential', 
  
  
              'showAd':'false', 
  
  
              'sid':sid, 
  
  
              'uid':self.username, 
  
  
              'userType':'browser', 
  
  
              'var':''
  
  
              } 
  
  
          postdata = urllib.urlencode(postdata) 
  
  
           
  
  
          #组装请求 
  
  
          req = urllib2.Request( 
  
  
              url = url, 
  
  
              data = postdata, 
  
  
              headers = self.header 
  
  
              ) 
  
  
          res = urllib2.urlopen(req).read() 
  
  
           
  
  
          #解析XML，转换成json 
  
  
          #说明：由于这样请求后163给出的是xml格式的数据， 
  
  
          #为了返回的数据能方便使用最好是转为JSON 
  
  
          json = [] 
  
  
          tree = etree.fromstring(res) 
  
  
          obj = None
  
  
          for child in tree: 
  
  
              if child.tag == 'array': 
  
  
                  obj = child             
  
  
                  break
  
  
          #这里多参考一下，etree元素的方法属性等，包括attrib,text,tag,getchildren()等 
  
  
          obj = obj[0].getchildren().pop() 
  
  
          for child in obj: 
  
  
              for x in child: 
  
  
                  attr = x.attrib 
  
  
                  if attr['name']== 'EMAIL;PREF': 
  
  
                      value = {'email':x.text} 
  
  
                      json.append(value) 
  
  
          return json 
  
  
           
  
  
  #Demo 
  
  
  print("Requesting......\n\n") 
  
  
  login = Login163('xxxx@163.com','xxxxx') 
  
  
  flag = login.login() 
  
  
  if type(flag) is bool: 
  
  
      print("Successful landing,Resolved contacts......\n\n") 
  
  
      res = login.address_list() 
  
  
      for x in res: 
  
  
          print(x['email']) 
  
  
  else: 
  
  
      print(flag)

#p#

2、分析收件箱、发件箱等网址

在参考代码中，获取通讯录的url为

url = 'http://twebmail.mail.163.com/js4/s?sid='+sid+'&func=global:sequential&showAd=false&userType=browser&uid='+self.username，通过对邮箱地址的分析，发现收件箱、发件箱等的url为url = 'http://twebmail.mail.163.com/js4/s？sid='+sid+'&func=mbox:listMessages&showAd=false&userType=browser&uid='+self.username，其中func=mbox:listMessages。其对收件箱、发件箱的具体区分在下面的postdata中，具体为：

（1）收件箱

 
 
 
  
  
  postdata = { 
  
  
  'func':'global:sequential', 
  
  
  'showAd':'false', 
  
  
  'sid':'qACVwiwOfuumHPdcYqOOUTAjEXNbBeAr', 
  
  
  'uid':self.username, 
  
  
  'userType':'browser', 
  
  
  'var':''
  
  
  }

（2）发件箱

 
 
 
  
  
  postdata = { 
  
  
  'func':'global:sequential', 
  
  
  'showAd':'false', 
  
  
  'sid':'qACVwiwOfuumHPdcYqOOUTAjEXNbBeAr', 
  
  
  'uid':self.username, 
  
  
  'userType':'browser', 
  
  
  'var':''
  
  
  }

可以看出，两段代码的不同之处就是fid的取值不同，其中收件箱为1，发件箱为3，草稿箱为2。

#p#

3、xml解析

利用ElementTree 类来进行xml到字典的转换。在获取通讯录的实例中，主要使用了这一方法。本例子（具体代码见后文）在收取邮件列表时，并没有用这一方法，仍然使用的是字符串的处理方法。但这里还是列一下ElementTree 类对xml的处理。如（参考地址：http://hi.baidu.com/fc_lamp/blog/item/8ed2d53ada4586f714cecb3d.html）：

解决方法：

 
 
 
  
  
  #-*- coding:UTF-8 -*- 
  
  
  
  
  
  import xml.etree.ElementTree as etree #xml解析类 
  
  
  def xml2json(xml): 
  
  
      json = [] 
  
  
      tree = etree.fromstring(xml) #如果是文件可用parse(source) 
  
  
      obj = None
  
  
      for child in tree: 
  
  
          if child.tag == 'array': 
  
  
              obj = child             
  
  
              break
  
  
      #这里多参考一下，etree元素的方法属性等，包括attrib,text,tag,getchildren()等 
  
  
      obj = obj[0].getchildren().pop() 
  
  
      for child in obj: 
  
  
          for x in child: 
  
  
              attr = x.attrib 
  
  
              if attr['name']== 'EMAIL;PREF': 
  
  
                  value = {'email':x.text} 
  
  
                  json.append(value) 
  
  
      return json

#p#

4、收件箱邮件列表

本例子只列出了收件箱邮件列表，如果需要，可根据以上介绍调整fid值，列出发件箱、草稿箱等的邮件列表。程序在windosxp、py2.6环境下调查通过，运行后，会在当前目录下生成三个文件：inboxlistfile.txt记录收件箱邮件列表，addfile.txt记录通讯录，cookies.dat记录cookies。具体代码如下：

 
 
 
  
  
  #-*- coding:UTF-8 -*- 
  
  
  import urllib,urllib2,cookielib 
  
  
  import xml.etree.ElementTree as etree #xml解析类 
  
  
  
  
  
  class Login163: 
  
  
     #伪装browser 
  
  
      header = {'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'} 
  
  
      username = ''
  
  
      passwd = ''
  
  
      cookie = None #cookie对象 
  
  
      cookiefile = './cookies.dat' #cookie临时存放地 
  
  
      user = ''
  
  
       
  
  
      def __init__(self,username,passwd): 
  
  
          self.username = username 
  
  
          self.passwd = passwd 
  
  
          #cookie设置 
  
  
          self.cookie = cookielib.LWPCookieJar() #自定义cookie存放 
  
  
          opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cookie)) 
  
  
          urllib2.install_opener(opener) 
  
  
  
  
  
     #登陆     
  
  
      def login(self):        
  
  
  
  
  
          #请求参数设置 
  
  
          postdata = { 
  
  
              'username':self.username, 
  
  
              'password':self.passwd, 
  
  
              'type':1
  
  
              } 
  
  
          postdata = urllib.urlencode(postdata) 
  
  
  
  
  
          #发起请求 
  
  
          req = urllib2.Request( 
  
  
                  url='http://reg.163.com/logins.jsp?type=1&product=mail163&url=http://entry.mail.163.com/coremail/fcg/ntesdoor2?lightweight%3D1%26verifycookie%3D1%26language%3D-1%26style%3D1', 
  
  
                  data= postdata,#请求数据 
  
  
                  headers = self.header #请求头 
  
  
              ) 
  
  
  
  
  
          result = urllib2.urlopen(req).read() 
  
  
          result = str(result) 
  
  
          #print result 
  
  
          self.user = self.username.split('@')[0] 
  
  
  
  
  
          self.cookie.save(self.cookiefile)#保存cookie 
  
  
           
  
  
          if '登录成功，正在跳转...' in result: 
  
  
              #print("%s 你已成功登陆163邮箱。---------n" %(user)) 
  
  
              flag = True
  
  
          else: 
  
  
              flag = '%s 登陆163邮箱失败。'%(self.user) 
  
  
              
  
  
          return flag 
  
  
  
  
  
     #获取通讯录 
  
  
      def address_list(self): 
  
  
  
  
  
          #获取认证sid 
  
  
          auth = urllib2.Request( 
  
  
                  url='http://entry.mail.163.com/coremail/fcg/ntesdoor2?username='+self.user+'&lightweight=1&verifycookie=1&language=-1&style=1', 
  
  
                  headers = self.header 
  
  
              ) 
  
  
          auth = urllib2.urlopen(auth).read() 
  
  
  
  
  
          #authstr=str(auth) 
  
  
          #print authstr 
  
  
           
  
  
          for i,sid in enumerate(self.cookie): 
  
  
              sid = str(sid) 
  
  
              #print 'sid:%s' %sid 
  
  
              if 'sid' in sid: 
  
  
                  sid = sid.split()[1].split('=')[1] 
  
  
                  break
  
  
          self.cookie.save(self.cookiefile) 
  
  
           
  
  
          #请求地址 
  
  
          url = 'http://twebmail.mail.163.com/js4/s?sid='+sid+'&func=global:sequential&showAd=false&userType=browser&uid='+self.username 
  
  
          #参数设定(var 变量是必需要的,不然就只能看到:S_OK这类信息) 
  
  
          #这里参数也是在firebug下查看的。 
  
  
          postdata = { 
  
  
              'func':'global:sequential', 
  
  
              'showAd':'false', 
  
  
              'sid':'qACVwiwOfuumHPdcYqOOUTAjEXNbBeAr', 
  
  
              'uid':self.username, 
  
  
              'userType':'browser', 
  
  
              'var':''
  
  
              } 
  
  
          postdata = urllib.urlencode(postdata) 
  
  
           
  
  
          #组装请求 
  
  
          req = urllib2.Request( 
  
  
              url = url, 
  
  
              data = postdata, 
  
  
              headers = self.header 
  
  
              ) 
  
  
          res = urllib2.urlopen(req).read() 
  
  
  
  
  
          #print str(res) 
  
  
           
  
  
          #解析XML，转换成json 
  
  
          #说明：由于这样请求后163给出的是xml格式的数据， 
  
  
          #为了返回的数据能方便使用最好是转为JSON 
  
  
          json = [] 
  
  
          tree = etree.fromstring(res) 
  
  
  
  
  
           
  
  
           
  
  
          obj = None
  
  
          for child in tree: 
  
  
              if child.tag == 'array': 
  
  
                  obj = child             
  
  
                  break
  
  
          #这里多参考一下，etree元素的方法属性等，包括attrib,text,tag,getchildren()等 
  
  
          obj = obj[0].getchildren().pop() 
  
  
          for child in obj: 
  
  
              for x in child: 
  
  
                  attr = x.attrib 
  
  
                  if attr['name']== 'EMAIL;PREF': 
  
  
                      value = {'email':x.text} 
  
  
                      json.append(value) 
  
  
          return json 
  
  
  #获取收件箱 
  
  
      def minbox(self): 
  
  
          #获取认证sid 
  
  
          auth = urllib2.Request( 
  
  
                  url='http://entry.mail.163.com/coremail/fcg/ntesdoor2?username='+self.user+'&lightweight=1&verifycookie=1&language=-1&style=1', 
  
  
                  headers = self.header 
  
  
              ) 
  
  
          auth = urllib2.urlopen(auth).read() 
  
  
  
  
  
          #authstr=str(auth) 
  
  
          #print authstr 
  
  
           
  
  
          for i,sid in enumerate(self.cookie): 
  
  
              sid = str(sid) 
  
  
              #print 'sid:%s' %sid 
  
  
              if 'sid' in sid: 
  
  
                  sid = sid.split()[1].split('=')[1] 
  
  
                  break
  
  
          self.cookie.save(self.cookiefile) 
  
  
           
  
  
           
  
  
          url = 'http://twebmail.mail.163.com/js4/s?sid='+sid+'&func=mbox:listMessages&showAd=false&userType=browser&uid='+self.username 
  
  
           
  
  
          postdata = { 
  
  
              'func':'global:sequential', 
  
  
              'showAd':'false', 
  
  
              'sid':'qACVwiwOfuumHPdcYqOOUTAjEXNbBeAr', 
  
  
              'uid':self.username, 
  
  
              'userType':'browser', 
  
  
              'var':''
  
  
              } 
  
  
          postdata = urllib.urlencode(postdata) 
  
  
           
  
  
          #组装请求 
  
  
          req = urllib2.Request( 
  
  
              url = url, 
  
  
              data = postdata, 
  
  
              headers = self.header 
  
  
              ) 
  
  
          res = urllib2.urlopen(req).read() 
  
  
  
  
  
          liststr=str(res).split('