Part of Slepp's ProjectsPastebinTURLImagebinFilebin
Feedback -- English French German Japanese
Create Upload Newest Tools Donate
Sign In | Create Account

Stuff
Tuesday, November 25th, 2008 at 11:30:52pm MST 

  1. #!/usr/bin/env python
  2. # -*- coding: UTF-8 -*-
  3. #Author: Roy L Zuo (roylzuo at gmail dot com)
  4. #Last Change: Wed Nov 26 12:37:24 2008 EST
  5. #Description: 根据yingjiesheng搜索关键字结果,群发简历,并保存已投
  6. #             职位具体信息至指定文件夹
  7. import urllib2, re, os, shelve, time
  8.  
  9. searches = [['linux', 'python'], ['unix','python'],['linux','金融'],
  10.             ['unix','金融'],['linux','finance'], ['unix','finance'],
  11.             ]
  12.  
  13. savepath = '%s/workspace/career/buster' %os.environ['HOME']
  14.  
  15. def getLatestJobs(keywords):
  16.     '''搜索最新工作列表,与以投列表对照,并返回未投工作之链接'''
  17.     #TODO: compare with saved pages
  18.     url0 = "http://s.yingjiesheng.com/result.jsp?keyword=%s&period=3&sort=&jobtype=1" %'+'.join(keywords)
  19.     url = url0+"&start=0"
  20.     page = urllib2.urlopen(url).read()
  21.     match = re.search("共找到(.*)条记录",page)
  22.     if not match:      return
  23.     results = re.findall('<h3 class="title"><a href="([^"]*)" target="_blank">.*?</a></h3>',page)
  24.     for i in range(int(match.group(1))/10):
  25.         nurl=url0+"&start=%d0" %(i+1)
  26.         npage = urllib2.urlopen(nurl).read()
  27.         results.extend(re.findall('<h3 class="title"><a href="([^"]*)" target="_blank">.*?</a></h3>',npage))
  28.     return results
  29.  
  30. def getEmailAddress(url, savepath):
  31.     '''查找页面,看是否有email地址,返回email地址'''
  32.     page = urllib2.urlopen(url).read()
  33.     match = re.search("(\w+(?:[-+.]\w+)*@\w+(?:[-.]\w+)*\.\w+(?:[-.]\w+)*)",page)
  34.     if not match:       return
  35.     #保存
  36.     savedir = '%s/%s' %(savepath,time.strftime("%y-%m-%d"))
  37.     if not os.path.exists(savedir):      os.mkdir(savedir)
  38.     file = open("%s/%s" %(savedir,url.split("/")[-1]),'w')
  39.     file.write(page)
  40.     file.close()
  41.     return match.group(1)
  42.  
  43. if __name__=='__main__':
  44.     import sys
  45.     #import socket
  46.     #sys.path.append("%s/workspace/python/lib" %os.environ['HOME'])
  47.     #from threadmanager import WorkerManager
  48.     #socket.setdefaulttimeout(10)
  49.  
  50.     joblist=[]
  51.     #wm = WorkerManager(30)
  52.     for item in searches:
  53.         #wm.add_job(getLatestJobs, item)
  54.     #wm.wait_for_complete()
  55.     #joblist = wm.get_result()
  56.         links = getLatestJobs(item)
  57.         if links is not None:
  58.             joblist += getLatestJobs(item)
  59.     joblist=list(set(joblist))
  60.  
  61.     submitted = shelve.open("%s/submitted" %savepath)
  62.     emails=[]
  63.     for url in joblist:
  64.         if submitted.has_key(url):      continue
  65.         #print url
  66.         e = getEmailAddress(url, savepath)
  67.         #print e
  68.         if e:
  69.             emails.append(e)
  70.             submitted[url]=e
  71.     emails=list(set(emails))
  72.     submitted.close()
  73.  
  74.     sender="Le Zuo (Roy) <lzuo@graduate.hku.hk>"
  75.     attachment="/home/roylez/workspace/career/doc/resume.pdf"
  76.     subject="应聘"
  77.     mutt = "mutt -s'%s' -e'set from=\"%s\"' -a'%s' %s <$HOME/doc/letter.txt"
  78.     subemails = shelve.open("%s/emails" %savepath)
  79.     for e in emails:
  80.         #使用mutt发送简历,内容为文件模板内容,自动添加附件
  81.         if subemails.has_key(e):      continue
  82.         print "Submitting to %s ..." %e
  83.         os.system(mutt %(subject,sender,attachment,e))
  84.         subemails[e]=''

advertising

Update the Post

Either update this post and resubmit it with changes, or make a new post.

You may also comment on this post.

update paste below
details of the post (optional)

Note: Only the paste content is required, though the following information can be useful to others.

Save name / title?

(space separated, optional)



Please note that information posted here will expire by default in one month. If you do not want it to expire, please set the expiry time above. If it is set to expire, web search engines will not be allowed to index it prior to it expiring. Items that are not marked to expire will be indexable by search engines. Be careful with your passwords. All illegal activities will be reported and any information will be handed over to the authorities, so be good.

fantasy-obligation
fantasy-obligation