JavaScript Techniques
Discover advanced JavaScript techniques to capture links, images, stylesheets, emails, and various types of URLs on web pages. Enhance your web scraping and analysis capabilities with these powerful methods.
Dump all urls
var urls = document.getElementsByTagName('a');
for (url in urls) {
console.log ( urls[url].href );
}
Capture All Links
[...document.links].forEach(({ href }) => console.log(href));
Capture All Links (Alternative)
[...document.querySelectorAll("a")].forEach(a => console.log(a.href));
Capture All Links (Alternative)
Array.from(document.querySelectorAll("a")).forEach(a => console.log(a.href));
Capture All Links (Alternative)
Array.prototype.forEach.call(document.querySelectorAll("a"), a => console.log(a.href));
Capture All Links (Alternative)
console.log(Array.from(new Set([...document.querySelectorAll("a")].map(a => a.href))).join(", "));
Capture All Links (Alternative)
console.log([...new Set([...document.querySelectorAll("a")].map(a => a.href))].join(", "));
Capture All Links (Alternative)
Array.prototype.forEach.call(document.querySelectorAll("a"), a => console.log(a.href));
Capture All Links (Alternative)
const links = document.querySelectorAll("a");
for (const link of links) {
console.log(link.href);
}
Capture All Links (Alternative)
const linksArray = Array.from(document.querySelectorAll("a"));
linksArray.map(link => {
console.log(link.href);
});
Capture All Links (Alternative)
const links = document.getElementsByTagName("a");
for (let i = 0; i < links.length; i++) {
console.log(links[i].href);
}
Capture All Links (Alternative)
const linksArray = Array.from(document.querySelectorAll("a"));
const linksHref = linksArray.reduce((acc, link) => {
console.log(link.href);
return acc.concat(link.href);
}, []);
Capture All Links (Alternative)
const linksArray = [...document.querySelectorAll("a")];
const linksHref = linksArray.map(link => {
console.log(link.href);
return link.href;
});
Capture All Links (Alternative)
const urls = [...document.links].map(({ href }) => href);
// Open the URLs in a new tab with clickable links
const newTab = window.open();
newTab.document.write("<html><body>");
urls.forEach(url => {
newTab.document.write(`<a href="${url}" target="_blank">${url}</a><br>`);
});
newTab.document.write("</body></html>");
newTab.document.close();
Capture All Links (Alternative)
Array.from(document.links).forEach(({ href }) => console.log(href));
Capture All Links (Alternative)
Array.from(document.links, ({ href }) => href).forEach(console.log);
Capture Links Using getElementsByTagName
[...document.getElementsByTagName('a')].forEach(a => console.log(a.href));
Capture Links Using getElementsByTagName (Alternative)
Array.from(document.getElementsByTagName('a'), a => a.href).forEach(url => console.log(url));
Capture Links Using a for Loop
var urls = Array.from(document.getElementsByTagName('a'));
for (var url of urls) {
console.log(url.href);
}
Capture Emails Using Regular Expression
const regex = /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b/g;
const html = document.documentElement.innerHTML;
let match;
while ((match = regex.exec(html))) {
console.log(match[0]);
}
Capture All Images
Array.from(document.images).forEach(({ src }) => console.log(src));
Capture Stylesheets
Array.from(document.styleSheets).forEach(({ href }) => console.log(href));
Capture Internal Links
Array.from(document.links).filter(a => a.hostname === location.hostname).forEach(({ href }) => console.log(href));
Capture External Links
Array.from(document.links).filter(a => a.hostname !== location.hostname).forEach(({ href }) => console.log(href));
Capture Unique URLs
let uniqueURLs = new Set(Array.from(document.links).map(({ href }) => href));
uniqueURLs.forEach(url => console.log(url));
Capture PDF Links
Array.from(document.links).filter(a => a.href.endsWith('.pdf')).forEach(({ href }) => console.log(href));
Capture Download Links
Array.from(document
.querySelectorAll('a[download]')).forEach(({ href }) => console.log(href));
Capture Mailto Links
Array.from(document.querySelectorAll('a[href^="mailto:"]')).forEach(({ href }) => console.log(href));
Capture Tel Links
Array.from(document.querySelectorAll('a[href^="tel:"]')).forEach(({ href }) => console.log(href));