Skip to content

Commit

Permalink
First update on justjoinit scrapper
Browse files Browse the repository at this point in the history
  • Loading branch information
wkobiela committed Jan 7, 2024
1 parent 9f50292 commit a266ac2
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 16 deletions.
32 changes: 16 additions & 16 deletions runner.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from scrappers import justjoinit
from scrappers import justjoinit_new
from scrappers import nofluffjobs
from scrappers import bulldogjob
from modules import setup, common
Expand Down Expand Up @@ -31,20 +31,20 @@
}

# Create links
NOFLUFFJOBS_URL = common.createLinks(site=nofluffjobs_settings['site'],
role=nofluffjobs_settings['role'],
lvl=nofluffjobs_settings['lvl'],
city=nofluffjobs_settings['city'])
BULLDOGJOB_URL = common.createLinks(site=bulldogjob_settings['site'],
role=bulldogjob_settings['role'],
lvl=bulldogjob_settings['lvl'],
city=bulldogjob_settings['city'])
# NOFLUFFJOBS_URL = common.createLinks(site=nofluffjobs_settings['site'],
# role=nofluffjobs_settings['role'],
# lvl=nofluffjobs_settings['lvl'],
# city=nofluffjobs_settings['city'])
# BULLDOGJOB_URL = common.createLinks(site=bulldogjob_settings['site'],
# role=bulldogjob_settings['role'],
# lvl=bulldogjob_settings['lvl'],
# city=bulldogjob_settings['city'])

# Run setup and scrappers
setup.run(EXCEL_NAME, NOFLUFFJOBS_SHEET, BULLDOGJOB_SHEET, JUSTJOINIT_SHEET)
nofluffjobs.run(NOFLUFFJOBS_SHEET, NOFLUFFJOBS_URL)
bulldogjob.run(BULLDOGJOB_SHEET, BULLDOGJOB_URL)
# justjoinit.run(JUSTJOINIT_SHEET,
# role=justjoinit_settings['role'],
# lvl=justjoinit_settings['lvl'],
# city=justjoinit_settings['city'])
# setup.run(EXCEL_NAME, NOFLUFFJOBS_SHEET, BULLDOGJOB_SHEET, JUSTJOINIT_SHEET)
# nofluffjobs.run(NOFLUFFJOBS_SHEET, NOFLUFFJOBS_URL)
# bulldogjob.run(BULLDOGJOB_SHEET, BULLDOGJOB_URL)
justjoinit_new.run(JUSTJOINIT_SHEET,
role=justjoinit_settings['role'],
lvl=justjoinit_settings['lvl'],
city=justjoinit_settings['city'])
38 changes: 38 additions & 0 deletions scrappers/justjoinit_new.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import re
import requests
from bs4 import BeautifulSoup
from modules.base_logger import log
from modules.common import getDomainName, updateExcel

class JustJoinIt():
def __init__(self):
self.jobs_dict = {}

def updateJobsDict(self, url):
domainName = getDomainName(url)
try:
page = requests.get(url, timeout=120)
page_soup = BeautifulSoup(page.content, "html.parser")
job_links_list = page_soup.find_all("div", {"class": "css-1iq2gw3"})

for job in job_links_list:
job_link = "https://"+domainName+job.find('a', class_='css-4lqp8g')['href']
job_title = job.find('h2').text
job_company = job.find('div', class_=re.compile("css-ldh1c9", re.I)).text
job_salary = job.find('div', class_=re.compile("css-1b2ga3v", re.I)).text
job_location = job.find('div', class_=re.compile("css-68pppj", re.I)).text
self.jobs_dict[job_link] = {"Title": [job_title],
"Company": [job_company],
"Salary": [job_salary],
"Location": [job_location]}
except Exception as e:
print(f"Exception {e} on updateJobsDict.")
return None


def run(sheetname, url):
log.info("Starting JustJointIt scrapper.")
just = JustJoinIt()
just.updateJobsDict(url)
updateExcel(sheetname, just.jobs_dict)
log.info("Finished JustJoinIt scrapper.")

0 comments on commit a266ac2

Please sign in to comment.