Skip to content

Commit

Permalink
Update runner
Browse files Browse the repository at this point in the history
  • Loading branch information
wkobiela committed Jan 7, 2024
1 parent a266ac2 commit c9ebca0
Show file tree
Hide file tree
Showing 4 changed files with 93 additions and 93 deletions.
64 changes: 64 additions & 0 deletions deprecated/justjoinit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import requests
from modules.base_logger import log
from modules.common import updateExcel

class JustJoinIt():
def __init__(self):
self.jobs_dict = {}

def updateJobsDict(self):
url = 'https://justjoin.it/api/offers'
try:
headers = {
"content-type": "application/json, text/plain",
"User-Agent": (
"Mozilla/5.0 (X11; Linux x86_64; rv:57.0) "
"Gecko/20100101 Firefox/57.0"
),
"Host": "justjoin.it",
"Referer": "justjoin.it",
}
response = requests.get(url, headers=headers, timeout=120)
return response
except Exception as e:
print(f"Exception {e} on updateJobsDict.")
return None

def prepareJobsDict(self, response, role, lvl, city):
marker_list = []
city_list = []
exp_list = []

for offer_dict in response.json():
url = f'https://justjoin.it/offers/{offer_dict["id"]}'

if offer_dict.get("marker_icon") not in role:
continue
if offer_dict.get("experience_level") not in lvl:
continue
if (offer_dict.get("workplace_type") not in ("remote") and
not (offer_dict.get("workplace_type") not in ("remote") and offer_dict.get("city") in city)):
continue
if offer_dict.get("display_offer") is False:
continue

job_title = offer_dict.get("title")
job_company = offer_dict.get("company_name")
job_salary = offer_dict.get("employment_types")
job_location = offer_dict.get("city")

self.jobs_dict[url] = {"Title": [job_title],
"Company": [job_company],
"Salary": [job_salary],
"Location": [job_location]}
marker_list.append(offer_dict.get("marker_icon"))
city_list.append(offer_dict.get("city"))
exp_list.append(offer_dict.get("experience_level"))

def run(sheetname, role, lvl, city):
log.info("Starting JustJointIt scrapper.")
just = JustJoinIt()
resp = just.updateJobsDict()
just.prepareJobsDict(resp, role, lvl, city)
updateExcel(sheetname, just.jobs_dict)
log.info("Finished JustJoinIt scrapper.")
14 changes: 8 additions & 6 deletions runner.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from scrappers import justjoinit_new
from scrappers import justjoinit
from scrappers import nofluffjobs
from scrappers import bulldogjob
from modules import setup, common
Expand All @@ -25,6 +25,7 @@
}

justjoinit_settings = {
"site": "JustjoinIt",
"role": ["testing"],
"lvl": ["mid", "junior"],
"city": "Gdańsk"
Expand All @@ -39,12 +40,13 @@
# role=bulldogjob_settings['role'],
# lvl=bulldogjob_settings['lvl'],
# city=bulldogjob_settings['city'])
# JUSTJOINIT_URL = common.createLinks(site=justjoinit_settings['site'],
# role=justjoinit_settings['role'],
# lvl=justjoinit_settings['lvl'],
# city=justjoinit_settings['city'])

# Run setup and scrappers
# setup.run(EXCEL_NAME, NOFLUFFJOBS_SHEET, BULLDOGJOB_SHEET, JUSTJOINIT_SHEET)
setup.run(EXCEL_NAME, NOFLUFFJOBS_SHEET, BULLDOGJOB_SHEET, JUSTJOINIT_SHEET)
# nofluffjobs.run(NOFLUFFJOBS_SHEET, NOFLUFFJOBS_URL)
# bulldogjob.run(BULLDOGJOB_SHEET, BULLDOGJOB_URL)
justjoinit_new.run(JUSTJOINIT_SHEET,
role=justjoinit_settings['role'],
lvl=justjoinit_settings['lvl'],
city=justjoinit_settings['city'])
justjoinit.run(JUSTJOINIT_SHEET, "https://justjoin.it/gdansk/testing/remote_yes")
70 changes: 21 additions & 49 deletions scrappers/justjoinit.py
Original file line number Diff line number Diff line change
@@ -1,64 +1,36 @@
import re
import requests
from bs4 import BeautifulSoup
from modules.base_logger import log
from modules.common import updateExcel
from modules.common import getDomainName, updateExcel

class JustJoinIt():
def __init__(self):
self.jobs_dict = {}

def updateJobsDict(self):
url = 'https://justjoin.it/api/offers'
def updateJobsDict(self, url):
domainName = getDomainName(url)
try:
headers = {
"content-type": "application/json, text/plain",
"User-Agent": (
"Mozilla/5.0 (X11; Linux x86_64; rv:57.0) "
"Gecko/20100101 Firefox/57.0"
),
"Host": "justjoin.it",
"Referer": "justjoin.it",
}
response = requests.get(url, headers=headers, timeout=120)
return response
page = requests.get(url, timeout=120)
page_soup = BeautifulSoup(page.content, "html.parser")
job_links_list = page_soup.find_all("div", {"class": "css-1iq2gw3"})

for job in job_links_list:
job_link = "https://"+domainName+job.find('a', class_='css-4lqp8g')['href']
job_title = job.find('h2').text
job_company = job.find('div', class_=re.compile("css-ldh1c9", re.I)).text
job_salary = job.find('div', class_=re.compile("css-1b2ga3v", re.I)).text
job_location = job.find('div', class_=re.compile("css-68pppj", re.I)).text
self.jobs_dict[job_link] = {"Title": [job_title],
"Company": [job_company],
"Salary": [job_salary],
"Location": [job_location]}
except Exception as e:
print(f"Exception {e} on updateJobsDict.")
return None

def prepareJobsDict(self, response, role, lvl, city):
marker_list = []
city_list = []
exp_list = []

for offer_dict in response.json():
url = f'https://justjoin.it/offers/{offer_dict["id"]}'

if offer_dict.get("marker_icon") not in role:
continue
if offer_dict.get("experience_level") not in lvl:
continue
if (offer_dict.get("workplace_type") not in ("remote") and
not (offer_dict.get("workplace_type") not in ("remote") and offer_dict.get("city") in city)):
continue
if offer_dict.get("display_offer") is False:
continue

job_title = offer_dict.get("title")
job_company = offer_dict.get("company_name")
job_salary = offer_dict.get("employment_types")
job_location = offer_dict.get("city")

self.jobs_dict[url] = {"Title": [job_title],
"Company": [job_company],
"Salary": [job_salary],
"Location": [job_location]}
marker_list.append(offer_dict.get("marker_icon"))
city_list.append(offer_dict.get("city"))
exp_list.append(offer_dict.get("experience_level"))

def run(sheetname, role, lvl, city):
def run(sheetname, url):
log.info("Starting JustJointIt scrapper.")
just = JustJoinIt()
resp = just.updateJobsDict()
just.prepareJobsDict(resp, role, lvl, city)
just.updateJobsDict(url)
updateExcel(sheetname, just.jobs_dict)
log.info("Finished JustJoinIt scrapper.")
38 changes: 0 additions & 38 deletions scrappers/justjoinit_new.py

This file was deleted.

0 comments on commit c9ebca0

Please sign in to comment.