FREE way to scrape Linkedin job postings and export as csv!
I wrote a script that will scrape job listings from Linkedin and export as a csv...for free!
Copy the code from below or from here and paste it into your browser console. Here's a video to show you how to do that. Then hit enter. It will take a few minutes to run. When it's done, it will download a csv file with all the job listings.
Couple important things: you need to be logged in and you need to start from page 1.
And the script is pretty slow. If you need something super speedy, I can get you access to a custom scraper I built that is lightning fast. If you'd like that, or if you need anything else scraped, send me an email: adrian@thewebscrapingguy.com
function createCSV(jsonData, fileName) {
// Convert JSON to CSV
const csvData = []
// Extract the headers
const headers = Object.keys(jsonData[0])
csvData.push(headers.join(','))
jsonData.forEach((item) => {
const row = []
for (const key in item) {
if (item.hasOwnProperty(key)) {
if (typeof item[key] === 'number') {
row.push(item[key])
continue
}
const value = item[key]?.includes(',')
? `"${item[key].replace(/"/g, '""')}"`
: item[key]
row.push(value)
}
}
csvData.push(row.join(','))
})
// Create a Blob containing the CSV data
const csvBlob = new Blob([csvData.join('\n')], {
type: 'text/csv;charset=utf-8',
})
// Create a URL for the Blob
const csvUrl = URL.createObjectURL(csvBlob)
// Create a link element
const link = document.createElement('a')
link.href = csvUrl
link.target = '_blank'
link.download = fileName
// Append the link to the body
document.body.appendChild(link)
// Trigger a click event on the link
link.click()
// Remove the link and revoke the Blob URL
document.body.removeChild(link)
URL.revokeObjectURL(csvUrl)
}
async function scrollDown() {
const wrapper = document.querySelector('.jobs-search-results-list')
await new Promise((resolve, reject) => {
var totalHeight = 0
var distance = 1000
var timer = setInterval(async () => {
var scrollHeightBefore = wrapper.scrollHeight
wrapper.scrollBy(0, distance)
totalHeight += distance
if (totalHeight >= scrollHeightBefore) {
totalHeight = 0
// Calculate scrollHeight after waiting
var scrollHeightAfter = wrapper.scrollHeight
if (scrollHeightAfter > scrollHeightBefore) {
// More content loaded, keep scrolling
return
} else {
// No more content loaded, stop scrolling
clearInterval(timer)
resolve()
}
}
}, 300)
})
}
function getRidOfUnnecessaryLinesAndSpaces(text) {
return text
?.split('\n')
?.map((line) => line.trim())
?.filter((line) => line !== '')
?.join(' ')
}
function jsonify(jobDiv) {
const title = document.querySelector(
'h2.job-details-jobs-unified-top-card__job-title',
)
const link = jobDiv.querySelector('a.ember-view')
let jobLocation = jobDiv.querySelector('ul').textContent
const primaryDescription = document.querySelector(
'.job-details-jobs-unified-top-card__primary-description div',
)
const company = primaryDescription?.children?.[0]?.textContent
let companyLink = primaryDescription?.children?.[0]?.getAttribute('href')
const posted = primaryDescription?.children?.[3]?.textContent
const applicants = primaryDescription?.children?.[5]?.textContent
const details = document.querySelectorAll(
'.job-details-jobs-unified-top-card__job-insight',
)
// const [salary, employmentType, level] = details[0].textContent?.split("路");
const splitFirstLine = details?.[0]?.textContent?.split('路')
const salary = splitFirstLine?.find((line) => line?.includes('$'))
const employmentType = salary ? splitFirstLine?.[1] : splitFirstLine?.[0]
const level = salary ? splitFirstLine?.[2] : splitFirstLine?.[1]
// const [companySize, companyIndustry] = details?.[1]?.textContent?.split("路");
const splitCrap = details?.[1]?.textContent?.split('路')
const companySize = splitCrap?.[0]
const companyIndustry = splitCrap?.[1]
const description = document.querySelector(
'.jobs-description-content__text',
).textContent
return {
title: title?.textContent?.trim() || '',
company: company?.trim() || '',
link: `https://www.linkedin.com${link?.getAttribute('href')}`,
jobLocation: jobLocation?.trim() || '',
companyLink: companyLink || '',
posted: getRidOfUnnecessaryLinesAndSpaces(posted?.trim()) || '',
applicants: applicants?.trim() || '',
salary: salary?.trim() || '',
employmentType: employmentType?.trim() || '',
level: level?.trim() || '',
companySize: companySize?.trim() || '',
companyIndustry: companyIndustry?.trim() || '',
description: description?.trim() || '',
}
}
async function scrapeLinkedinJobs() {
const allJobs = []
let i = 0
// TODO: important that they start this on page 1
let currentPage = 1
let hasNextPage = true
while (hasNextPage) {
console.log(
`If you need jobs scraped, or any other data, email me: adrian@thewebscrapingguy.com`,
)
// wait for the page to load
await new Promise((resolve, reject) => {
setTimeout(() => {
resolve()
}, 3000)
})
await scrollDown()
// collect the job data
const jobDivs = document.querySelectorAll('.job-card-container')
// click on all the jobDivs
for (let index = 0; index < jobDivs.length; index++) {
const jobDiv = jobDivs[index]
jobDiv.click()
await new Promise((resolve, reject) => setTimeout(resolve, 1000))
const job = jsonify(jobDiv)
allJobs.push(job)
}
// get the pages to paginate
const pages = document.querySelectorAll(
'.artdeco-pagination__pages--number li button',
)
// get the attr data-test-pagination-page-btn
for (i = 0; i < pages.length; i++) {
const page = pages[i]
const pageNumber = page.getAttribute('aria-label').replace(/\D/g, '')
// if the page number is equal to the current page + 1, then click it
if (Number(pageNumber) === currentPage + 1) {
console.log('advance to next page')
hasNextPage = true
currentPage++
page.click()
break
}
}
// if the current page is the last page, then stop
if (i === pages.length) {
hasNextPage = false
}
}
console.log('allJobs', allJobs)
createCSV(allJobs, `linkedinJobs-${new Date().getTime()}.csv`)
console.log(`Congrats! 馃帀 You scraped ${allJobs.length} jobs!`)
console.log(
`If you need jobs scraped, or any other data, email me: adrian@thewebscrapingguy.com`,
)
}
await scrapeLinkedinJobs()