View difference between Paste ID: wxytW1V0 and 4d2wdDUf
SHOW: | | - or go back to the newest paste.
1
import requests
2
import argparse
3
import threading
4
import json
5
6
try:
7
    from Queue import Queue
8
except:
9
    from queue import Queue
10
11
parser = argparse.ArgumentParser()
12
parser.add_argument("file")
13
parser.add_argument("-l", "--limit", default=0, type=int)
14
parser.add_argument("-t", "--threads", default=8, type=int)
15
args = parser.parse_args()
16
17
def do_hack():
18
    with open(args.file, "r") as f:
19
        out = open(args.file + '.data', "a+")
20
        count = 0
21
        queue = Queue(maxsize=args.threads)
22
23
        def worker():
24
            session = requests.Session()
25
            while True:
26
                dni = queue.get()
27
                done = False
28
                print("looking for dni: %s"%(dni))
29
                while not done:
30
                    try:
31
                        find_dni(session, dni)
32
                        done = True
33
                    except:
34
                        print("caught exception, try again")
35
36
                queue.task_done()
37
38
        def find_dni(session, dni):
39
            r = session.get('https://aws.afip.gov.ar/sr-padron/v2/personas/%s'%(dni))
40
            resp = r.json()
41
            if not resp['success']:
42
                return
43
            for cuil in resp['data']:
44
                r = session.get('https://aws.afip.gov.ar/sr-padron/v2/persona/%s'%(cuil))
45
                cresp = r.json()
46
                if cresp['success']:
47
                    print('found: %s ↔ %s'%(dni, cuil))
48
                    out.write(json.dumps(cresp['data']) + '\n')
49
            return True
50
51
        for i in range(args.threads):
52
            t = threading.Thread(target=worker)
53
            #t.daemon =True
54
            t.start()
55
56
        last_dni = None
57
        try:
58
            out.seek(-4096, 2)
59
            last_out = out.readlines()[-1]
60
            last_dni = json.loads(last_out)['numeroDocumento']
61
        except IOError:
62
            pass
63
        except ValueError:
64
            print("file corrupted: %s, restarting"%(args.file))
65
            out.seek(0)
66
67
        print ('last dni: %s'%(last_dni))
68
69
        dropped = 0
70
        for l in f:
71
            dni = int(l.split('|')[1])
72
            if last_dni:
73
                if dni != last_dni:
74
                    dropped += 1
75
                    continue
76
                last_dni = None
77
                print('dropped: %d'%(dropped))
78
                continue
79
80
            queue.put(dni, block=True)
81
82
            count += 1
83
            if args.limit and count >= args.limit:
84
                return
85
86
do_hack()
87
print('all done %s'%(args.file))