Skip to content

Commit

Permalink
Fix nofluffjobs scrapper
Browse files Browse the repository at this point in the history
  • Loading branch information
wkobiela committed Jan 7, 2024
1 parent c9ebca0 commit 4521272
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 22 deletions.
8 changes: 3 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
# jobScrapper

### JustJoinIt scrapper temporarily disabled, API is unreachable - no further information, if it will be back.

## Description
Simple python project, that should make it easier to be up to date with jobs offers. Websites like BulldogJob, Nofluffjobs or JustJoinIt have this nasty fature - job offers that are "refreshed" are bumped to the top of the page, so it is easy to get lost of track and even apply to the same job twice.

Expand Down Expand Up @@ -66,13 +64,13 @@ bulldogjob_settings = {
### JustJoinIt

To setup justnoinit scrapper, insert 3 MAIN parameters.
- role (list of strings) from available: `'testing', 'net', 'architecture', 'ruby', 'php', 'mobile', 'other', 'analytics', 'erp', 'go', 'admin', 'scala', 'pm', 'support', 'data', 'java', 'security', 'game', 'python', 'ux', 'c', 'javascript', 'devops', 'html'`
- lvl (list of strings) from avaliable: `'junior', 'mid', 'senior'`
- role (single string) from available: `'testing', 'net', 'architecture', 'ruby', 'php', 'mobile', 'other', 'analytics', 'erp', 'go', 'admin', 'scala', 'pm', 'support', 'data', 'java', 'security', 'game', 'python', 'ux', 'c', 'javascript', 'devops', 'html'`
- lvl (strings separated by comma) from avaliable: `'junior', 'mid', 'senior', 'c-level'`
- city (string) - always looking for remote + eventually in the city of your choosing
```
justjoinit_settings = {
"role": ["testing"],
"lvl": ["mid", "junior"],
"lvl": "mid.senior",
"city": "Gdańsk"
}
```
Expand Down
3 changes: 3 additions & 0 deletions modules/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from bs4 import BeautifulSoup
from openpyxl import load_workbook
from modules.base_logger import log
from unidecode import unidecode

now = datetime.now()

Expand Down Expand Up @@ -101,5 +102,7 @@ def createLinks(**kwargs):
generated_link = f"https://bulldogjob.pl/companies/jobs/s/role,{role}/experienceLevel,{lvl}/city,{city}"
elif site == "NoFluffJobs":
generated_link = f"https://nofluffjobs.com/pl/praca-zdalna/{role}?criteria=city%3D{city}%20%20seniority%3D{lvl}"
elif site == "JustjoinIt":
generated_link = f"https://justjoin.it/{unidecode(city).lower()}/{role}/experience-level_{lvl}/remote_yes"
log.info("Generated link: %s", generated_link)
return(generated_link)
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ pandas==2.1.4; python_version > '3.8'
pytest==7.4.4
Requests==2.31.0
pytest-html==4.1.1
unidecode==1.3.7
32 changes: 16 additions & 16 deletions runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,27 +26,27 @@

justjoinit_settings = {
"site": "JustjoinIt",
"role": ["testing"],
"lvl": ["mid", "junior"],
"role": "testing",
"lvl": "mid.senior",
"city": "Gdańsk"
}

# Create links
# NOFLUFFJOBS_URL = common.createLinks(site=nofluffjobs_settings['site'],
# role=nofluffjobs_settings['role'],
# lvl=nofluffjobs_settings['lvl'],
# city=nofluffjobs_settings['city'])
# BULLDOGJOB_URL = common.createLinks(site=bulldogjob_settings['site'],
# role=bulldogjob_settings['role'],
# lvl=bulldogjob_settings['lvl'],
# city=bulldogjob_settings['city'])
# JUSTJOINIT_URL = common.createLinks(site=justjoinit_settings['site'],
# role=justjoinit_settings['role'],
# lvl=justjoinit_settings['lvl'],
# city=justjoinit_settings['city'])
NOFLUFFJOBS_URL = common.createLinks(site=nofluffjobs_settings['site'],
role=nofluffjobs_settings['role'],
lvl=nofluffjobs_settings['lvl'],
city=nofluffjobs_settings['city'])
BULLDOGJOB_URL = common.createLinks(site=bulldogjob_settings['site'],
role=bulldogjob_settings['role'],
lvl=bulldogjob_settings['lvl'],
city=bulldogjob_settings['city'])
JUSTJOINIT_URL = common.createLinks(site=justjoinit_settings['site'],
role=justjoinit_settings['role'],
lvl=justjoinit_settings['lvl'],
city=justjoinit_settings['city'])

# Run setup and scrappers
setup.run(EXCEL_NAME, NOFLUFFJOBS_SHEET, BULLDOGJOB_SHEET, JUSTJOINIT_SHEET)
# nofluffjobs.run(NOFLUFFJOBS_SHEET, NOFLUFFJOBS_URL)
# bulldogjob.run(BULLDOGJOB_SHEET, BULLDOGJOB_URL)
justjoinit.run(JUSTJOINIT_SHEET, "https://justjoin.it/gdansk/testing/remote_yes")
bulldogjob.run(BULLDOGJOB_SHEET, BULLDOGJOB_URL)
# justjoinit.run(JUSTJOINIT_SHEET, JUSTJOINIT_URL)
2 changes: 1 addition & 1 deletion scrappers/nofluffjobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def updateJobsDict(self, url):
for job in job_links_list:
job_link = "https://"+domainName+job['href']
job_title = job.find('h3').text
job_company = job.find('span', class_=re.compile("company", re.I)).text
job_company = job.find('h4').text
job_salary = job.find('span', class_=re.compile("badgy salary", re.I)).text
job_location = job.find('div', class_=re.compile("tw-flex tw-items-center ng-star-inserted", re.I)).text

Expand Down

0 comments on commit 4521272

Please sign in to comment.