Untitled

from django.template.loader import get_template
from django.core.paginator import Paginator, EmptyPage, PageNotAnInteger
from django.shortcuts import render_to_response
from bs4 import BeautifulSoup
import urllib2, sys
import urlparse
import re
from listing.models import jobLinks


def businessghana():
    site = "http://www.businessghana.com/portal/jobs"
    hdr = {'User-Agent' : 'Mozilla/5.0'}
    req = urllib2.Request(site, headers=hdr)
    jobpass = urllib2.urlopen(req)
    soup = BeautifulSoup(jobpass)
    for tag in soup.find_all('a', href = True):
        tag['href'] = urlparse.urljoin('http://www.businessghana.com/portal/', tag['href'])
    return map(str, soup.find_all('a', href = re.compile('.getJobInfo')))

def tonaton():
    site = "http://tonaton.com/en/job-vacancies-in-ghana"
    hdr = {'User-Agent' : 'Mozilla/5.0'}
    req = urllib2.Request(site, headers=hdr)
    jobpass = urllib2.urlopen(req)
    soup = BeautifulSoup(jobpass)
    result = []
    # next two lines make all the links in the soup absolute
    for tag in soup.find_all('a', href=True):
        tag['href'] = urlparse.urljoin('http://www.tonaton.com', tag['href'])
    # assign all 'h2' tags to 'jobs'. The 'h2'tag contains the required links
    jobs = soup.find_all('h2')
    # Loop through the 'h2' tags and extract all the links
    for h2 in soup.find_all('h2'):
        n = h2.next_element
        if n.name == 'a':  result.append(str(n))
    return result

def jobscomgh():
    site = "http://jobs.com.gh"
    hdr = {'User-Agent' : 'Mozilla/5.0'}
    req = urllib2.Request(site, headers=hdr)
    jobpass = urllib2.urlopen(req)
    soup = BeautifulSoup(jobpass)
    return map(str, soup.find_all('a', href = re.compile('.display-job')))

businessghana_links = businessghana()
tonaton_links = tonaton()
jobscomgh_links = jobscomgh()

def all_links():
    return (businessghana_links + tonaton_links + jobscomgh_links)


def display_links(request):
    name = all_links()
    paginator = Paginator(name, 25)
    page = request.GET.get('page')
    try:
        name = paginator.page(page)
    except PageNotAnInteger:
        name = paginator.page(1)
    except EmptyPage:
        name = paginator.page(paginator.num_pages)

    return render_to_response('jobs.html', {'name' : name})

def save_new_links(all_links):
    current_links = joblinks.objects.all()
    for i in all_links:
        if i not in current_links:
            joblink.objects.create(url=i)

def this_week_links(all_links):
    return joblinks.objects.all