Untitled

#!/usr/bin/python
# -*- coding: UTF-8 -*-
# vim:set shiftwidth=4 tabstop=4 expandtab textwidth=79:

import cPickle
import hashlib
import json
import os
import re
import urllib2
import time
from BeautifulSoup import BeautifulSoup
import feedparser

STATE_FILE = 'state.pkl'
FEED_SOURCE = 'http://www.ifanr.com/feed/user/pure'
FEED_DEST = 'http://10010app.cn/OpenApi_test/content/'
PROVIDER_KEY = '509b56941bbb5813fc2f7898'
PROVIDER_ID = '24'
PROVIDER_NAME = u'爱范儿'
SECRET = '3'
TYPE_ID = '1216'
TYPE_NAME = u'科技'


def sign_request():
    timestamp = time.strftime('%Y%m%d%H%M%S')
    pre_sign = ''.join([timestamp, PROVIDER_KEY, SECRET])
    signature_hash = hashlib.md5(pre_sign)
    signature = signature_hash.hexdigest()
    return {'tm': timestamp, 'code': signature, 'pKey': PROVIDER_KEY}

def load_state():
    try:
        state_file = open(STATE_FILE, 'rb')
    except IOError:
        state_file = open(STATE_FILE, 'wb')
        cPickle.dump('-1', state_file)
        state_file.close()
        state_file = open(STATE_FILE, 'rb')

    state = cPickle.load(state_file)
    return state


def save_state(state):
    state_file = open(STATE_FILE, 'wb')
    cPickle.dump(state, state_file)
    state_file.close()


def content_processor(content_html):
    html_ = ''.join([c['value'] for c in content_html if 'value' in c])
    soup = BeautifulSoup(html_)
    images = [i['src'] for i in soup('img')]
    for idx, image in enumerate(images):
        soup.find('img', src=image).replaceWith('#[IMG#%s]#' % idx)
    full_text = ''.join([x.strip(' ') for x in soup(text=True)]).replace('#[',
            '<!--').replace(']#', '-->').replace('\n\n', '\n').replace('\n',
            '\\r\\n\n').split('\n')
    img_ret = [{'name': 'IMG#%d' % idx, 'value': img} for idx, img in enumerate(images)]
    if len(full_text[0]) < 500:
        content_header = full_text[0]
        content_left = '\n'.join(full_text[1:])
    else:
        content_header = full_text[0][:400]
        content_left = '\n'.join([full_text[0][400:], '\n'.join(full_text[1:])])
    contents = [{'name': 'content_all', 'value': '\n'.join(full_text)},
            {'name': 'content_header', 'value': content_header},
            {'name': 'content_left', 'value': content_left}]
    contents.extend(img_ret)
    return contents


def entry_to_dict(entry):
    def tag_getter():
        if 'tags' in entry:
            return [y['term'] for y in entry.tags if 'term' in y]
        return []

    def postid():
        return re.search('\d+$', entry.link).group()

    ret = {}
    ret['contentName'] = entry.title
    ret['description'] = entry.description
    ret['tags'] = tag_getter()
    ret['ext'] = content_processor(entry.content)
    ret['providerPid'] = postid()
    return ret


def upload(data):
    data['typeId'] = TYPE_ID
    data['typeName'] = TYPE_NAME
    data['providerId'] = PROVIDER_ID
    data['providerName'] = PROVIDER_NAME
    data['price'] = 0
    data['platform'] = 'all'

    final_data = {'data': data}
    sign = sign_request()
    for k in sign:
        final_data[k] = sign[k]
    print json.dumps(data)
    req = urllib2.Request(FEED_DEST, json.dumps(data), {'Content-Type': 'application/json'})
    u = urllib2.urlopen(req)
    print u.info()
    print u.read()
    pass


def main():
    last_article = load_state()
    main_feed = feedparser.parse(FEED_SOURCE)
    current_latest = main_feed.entries[0]['guid']
    for entry in main_feed.entries:
        if entry['guid'] == last_article:
            break
        e = entry_to_dict(entry)
        upload(e)
    save_state(current_latest)
    print current_latest, last_article
    # iterate the main feed and send out all new feeds


if __name__ == '__main__':
    os.environ['TZ'] = 'CST-08'
    time.tzset()
    main()