Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import time
- start_time = time.time()
- max_length = [-1, -1, -1]
- isPostcode = False
- def chart_to_int(char):
- if str(char).isdecimal():
- return int(char)
- elif abs(ord(char.lower()) - ord('a')) <= 26:
- return ord(char.lower()) - ord('a') + 10
- else:
- return 36
- def radix_sort(strings, sort_by=0):
- arr = strings
- bins = [[0 for _ in range(0)] for _ in range(37)]
- max_len = len(arr[0][0])
- for j in range(max_len - 1, -1, -1):
- for i in range(len(arr)):
- bins[chart_to_int(str(arr[i][sort_by])[j])].append(arr[i])
- arr = []
- for num in bins:
- if len(num) > 0:
- arr.extend(num)
- bins = [[0 for _ in range(0)] for _ in range(37)]
- return arr
- def count_sort(arr, max_el):
- count_arr_len = max_el + 1
- count_arr = [0] * count_arr_len
- for el in arr:
- count_arr[int(el[0])] += 1
- for j in range(1, count_arr_len):
- count_arr[j] += count_arr[j - 1]
- output_arr = [0] * len(arr)
- j = len(arr) - 1
- while j >= 0:
- current_el = int(arr[j][0])
- count_arr[current_el] -= 1
- new_pos = count_arr[current_el]
- output_arr[new_pos] = [current_el, data[j][1], 0]
- j -= 1
- return output_arr
- def is_phone_number(string):
- return string[0] == '+'
- def is_md5(string):
- return len(string) == 32
- def is_car_number(string):
- if len(string) != 8:
- return False
- if str(string[0]).isalpha() and str(string[1]).isspace() and str(string[2]).isdigit() and str(
- string[3]).isdigit() and str(string[4]).isdigit() and str(string[5]).isspace() and str(
- string[6]).isalpha() and str(string[7]).isalpha():
- return True
- return False
- def is_small_num(string):
- if str(string).isdecimal():
- if int(string) <= 65535:
- return True
- return False
- def is_big_number(string):
- if str(string).isdecimal():
- if int(string) > 65535:
- return True
- return False
- #
- # def is_postcode(string):
- # if len(string) >= 8:
- # return string[7] == ' '
- def is_postcode(string):
- return len(str(string)) == 6 and str(string).isdecimal()
- def is_date(string):
- return '.' in string
- def print_values():
- with open('output.txt', 'w') as f:
- for i, v, k in data:
- if isPostcode:
- f.write(f"{str(i).zfill(6)}\t{v}")
- else:
- f.write(f"{i}\t{v}")
- def process_data(values):
- first = values[0][0]
- if is_small_num(first):
- mx_len = len(str(max_length[0]))
- for i in range(len(values)):
- # str(values[i][0]).zfill(mx_len)
- values[i].append(int(values[i][0]))
- elif is_big_number(first):
- mx_len = len(str(max_length[0]))
- for i in range(len(values)):
- values[i].append(str(values[i][0]).zfill(mx_len))
- elif is_date(first):
- for i in range(len(values)):
- line = str(values[i][0]).split('.')
- values[i].append((line[2].zfill(4) + '.' + line[1].zfill(2) + '.' + line[0].zfill(2)))
- elif is_phone_number(first):
- for i in range(len(values)):
- vals = values[i][0][1:-1].split('-')
- values[i].append(str(vals[0]).zfill(len(str(max_length[0]))) + '-' + str(vals[1]).zfill(
- len(str(max_length[1])))
- + '-' + str(vals[2]).zfill(len(str(max_length[2]))))
- return values
- data = list()
- file_lines = open('input.txt').readlines()
- for i in range(len(file_lines)):
- line = file_lines[i]
- if len(line) == 0:
- continue
- # delim is tab
- if '\t' in line:
- splt = line.split('\t')
- data.append(splt)
- isPostcode = is_postcode(splt[0])
- if splt[0].isdecimal():
- if int(splt[0]) > max_length[0]:
- max_length[0] = int(splt[0])
- elif ' ' in line:
- splt = line.split(' ')
- data.append(splt)
- isPostcode = is_postcode(splt[0])
- if splt[0].isdecimal():
- if int(splt[0]) > max_length[0]:
- max_length[0] = int(splt[0])
- first = str(data[0][0])
- if first.isdecimal():
- if not isPostcode:
- first = str(max_length[0])
- else:
- first = str(max_length[0]).zfill(6)
- # TODO: COUNTING SORT NG
- if is_phone_number(first):
- for datum in data:
- vals = list(map(int,
- datum[0][1:len(datum[0])].replace(' ', '-').replace(' ', '-').replace(' ', '-').replace(' ',
- '-').split(
- '-')[0:-1]))
- if vals[0] > max_length[0]:
- max_length[0] = vals[0]
- if vals[1] > max_length[1]:
- max_length[1] = vals[1]
- if vals[2] > max_length[2]:
- max_length[2] = vals[2]
- data = process_data(data)
- elif is_postcode(first):
- isPostcode = True
- data = process_data(data)
- data = count_sort(data, max_length[0])
- elif is_small_num(first):
- data = process_data(data)
- data = count_sort(data, max_length[0])
- elif is_big_number(first):
- data = process_data(data)
- data = radix_sort(data, 2)
- elif is_date(first):
- data = process_data(data)
- data = radix_sort(data, 2)
- else:
- data = process_data(data)
- data = radix_sort(data)
- print_values()
- # print("--- %s seconds ---" % (time.time() - start_time))
- # 0
- # 00
- # 000
- # 0000
- # ...
- # up to 20
- # Numbers, Dates, Phone numbers
- #
- #
- #
- #
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement