Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- @memory_profile
- def scrape_recent_links_to_json() -> None:
- p(
- "Getting new jobs from API to JSON at time:\n",
- correct_datetime()['_time'],
- sep=""
- )
- jobs_external_database = request_jobs_from_website(
- design_jobs_search_parameters
- ).json()
- write_json_file(
- jobs_external_database,
- "database/jobs_recent_saved_before_check_design.json"
- )
- """
- Saving results to "before check" database to minimize API requests.
- """
- jobs_external = jobs_external_database["results"]
- jobs_local_database_all = read_json_file("all_design_jobs")
- jobs_local_database_recent = read_json_file(
- "database/jobs_recent_saved_after_check_design.json"
- )
- jobs_local_all = jobs_local_database_all["jobs"]
- jobs_local_recent = jobs_local_database_recent["results"]
- jobs_added_counter = 0
- can_apply_to_job_now_counter = 0
- for job in jobs_external:
- if job_is_new(job, jobs_local_recent): # checking with only last 10.
- job["saved_to_json_on_datetime"] = (
- str(datetime.datetime.now())
- )
- job_is_relevant, relevancy_reasons = is_job_relevant(job)
- job["relevancy"] = (
- relevancy_reasons
- )
- job["company_blacklisted"] = (
- is_company_in_blacklist(job)
- )
- job["can_apply_now"] = False
- if(
- job_is_relevant and
- not job["company_blacklisted"]
- ):
- if sufficient_days_passed := Job(
- job['id']
- ).days_passed_since_last_contact_or_new_job(
- required_days_to_be_passed=10
- ):
- can_apply_to_job_now_counter += 1
- job["can_apply_now"] = True
- job["sufficient_days_passed"] = sufficient_days_passed
- job["has_applied"] = False # doc.id#3
- jobs_local_all.append(job)
- jobs_added_counter += 1
- if jobs_added_counter:
- p(
- f"Added {jobs_added_counter} new jobs to database."
- )
- if can_apply_to_job_now_counter:
- p(
- f"Added {can_apply_to_job_now_counter} "
- "relevant jobs to database."
- )
- write_json_file(jobs_local_database_all)
- write_json_file(
- jobs_external_database,
- "database/jobs_recent_saved_after_check_design.json"
- )
- del jobs_local_database_all
- gc.collect()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement