Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from django.template.loader import get_template
- from django.core.paginator import Paginator, EmptyPage, PageNotAnInteger
- from django.shortcuts import render_to_response
- from bs4 import BeautifulSoup
- import urllib2, sys
- import urlparse
- import re
- from listing.models import jobLinks
- def businessghana():
- site = "http://www.businessghana.com/portal/jobs"
- hdr = {'User-Agent' : 'Mozilla/5.0'}
- req = urllib2.Request(site, headers=hdr)
- jobpass = urllib2.urlopen(req)
- soup = BeautifulSoup(jobpass)
- for tag in soup.find_all('a', href = True):
- tag['href'] = urlparse.urljoin('http://www.businessghana.com/portal/', tag['href'])
- return map(str, soup.find_all('a', href = re.compile('.getJobInfo')))
- def tonaton():
- site = "http://tonaton.com/en/job-vacancies-in-ghana"
- hdr = {'User-Agent' : 'Mozilla/5.0'}
- req = urllib2.Request(site, headers=hdr)
- jobpass = urllib2.urlopen(req)
- soup = BeautifulSoup(jobpass)
- result = []
- # next two lines make all the links in the soup absolute
- for tag in soup.find_all('a', href=True):
- tag['href'] = urlparse.urljoin('http://www.tonaton.com', tag['href'])
- # assign all 'h2' tags to 'jobs'. The 'h2'tag contains the required links
- jobs = soup.find_all('h2')
- # Loop through the 'h2' tags and extract all the links
- for h2 in soup.find_all('h2'):
- n = h2.next_element
- if n.name == 'a': result.append(str(n))
- return result
- def jobscomgh():
- site = "http://jobs.com.gh"
- hdr = {'User-Agent' : 'Mozilla/5.0'}
- req = urllib2.Request(site, headers=hdr)
- jobpass = urllib2.urlopen(req)
- soup = BeautifulSoup(jobpass)
- return map(str, soup.find_all('a', href = re.compile('.display-job')))
- businessghana_links = businessghana()
- tonaton_links = tonaton()
- jobscomgh_links = jobscomgh()
- def all_links():
- return (businessghana_links + tonaton_links + jobscomgh_links)
- def display_links(request):
- name = all_links()
- paginator = Paginator(name, 25)
- page = request.GET.get('page')
- try:
- name = paginator.page(page)
- except PageNotAnInteger:
- name = paginator.page(1)
- except EmptyPage:
- name = paginator.page(paginator.num_pages)
- return render_to_response('jobs.html', {'name' : name})
- def save_new_links(all_links):
- current_links = joblinks.objects.all()
- for i in all_links:
- if i not in current_links:
- joblink.objects.create(url=i)
- def this_week_links(all_links):
- return joblinks.objects.all
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement