Advertisement
maxcuban

Untitled

Jan 10th, 2014
164
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.60 KB | None | 0 0
  1. from django.template.loader import get_template
  2. from django.core.paginator import Paginator, EmptyPage, PageNotAnInteger
  3. from django.shortcuts import render_to_response
  4. from bs4 import BeautifulSoup
  5. import urllib2, sys
  6. import urlparse
  7. import re
  8. from listing.models import jobLinks
  9.  
  10.  
  11. def businessghana():
  12.     site = "http://www.businessghana.com/portal/jobs"
  13.     hdr = {'User-Agent' : 'Mozilla/5.0'}
  14.     req = urllib2.Request(site, headers=hdr)
  15.     jobpass = urllib2.urlopen(req)
  16.     soup = BeautifulSoup(jobpass)
  17.     for tag in soup.find_all('a', href = True):
  18.         tag['href'] = urlparse.urljoin('http://www.businessghana.com/portal/', tag['href'])
  19.     return map(str, soup.find_all('a', href = re.compile('.getJobInfo')))
  20.  
  21. def tonaton():
  22.     site = "http://tonaton.com/en/job-vacancies-in-ghana"
  23.     hdr = {'User-Agent' : 'Mozilla/5.0'}
  24.     req = urllib2.Request(site, headers=hdr)
  25.     jobpass = urllib2.urlopen(req)
  26.     soup = BeautifulSoup(jobpass)
  27.     result = []
  28.     # next two lines make all the links in the soup absolute    
  29.     for tag in soup.find_all('a', href=True):
  30.         tag['href'] = urlparse.urljoin('http://www.tonaton.com', tag['href'])
  31.     # assign all 'h2' tags to 'jobs'. The 'h2'tag contains the required links  
  32.     jobs = soup.find_all('h2')
  33.     # Loop through the 'h2' tags and extract all the links
  34.     for h2 in soup.find_all('h2'):
  35.         n = h2.next_element
  36.         if n.name == 'a':  result.append(str(n))
  37.     return result
  38.  
  39. def jobscomgh():
  40.     site = "http://jobs.com.gh"
  41.     hdr = {'User-Agent' : 'Mozilla/5.0'}
  42.     req = urllib2.Request(site, headers=hdr)
  43.     jobpass = urllib2.urlopen(req)
  44.     soup = BeautifulSoup(jobpass)
  45.     return map(str, soup.find_all('a', href = re.compile('.display-job')))
  46.  
  47. businessghana_links = businessghana()
  48. tonaton_links = tonaton()
  49. jobscomgh_links = jobscomgh()
  50.  
  51. def all_links():
  52.     return (businessghana_links + tonaton_links + jobscomgh_links)
  53.  
  54.                                      
  55. def display_links(request):
  56.     name = all_links()
  57.     paginator = Paginator(name, 25)
  58.     page = request.GET.get('page')
  59.     try:
  60.         name = paginator.page(page)
  61.     except PageNotAnInteger:
  62.         name = paginator.page(1)
  63.     except EmptyPage:
  64.         name = paginator.page(paginator.num_pages)
  65.        
  66.     return render_to_response('jobs.html', {'name' : name})    
  67.    
  68. def save_new_links(all_links):
  69.     current_links = joblinks.objects.all()
  70.     for i in all_links:
  71.         if i not in current_links:
  72.             joblink.objects.create(url=i)
  73.  
  74. def this_week_links(all_links):
  75.     return joblinks.objects.all
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement