/**
* @file This file checks all links on the WOT-terms site and reports broken links. It also creates a GitHub issue with the broken links.
* The script should be run from the root of the WOT-terms repository.
* Environment: NodeJS
* Usage:
* $ node findBrokenLinks.js
* @author Kor Dwarshuis
* @version 1.0.0
* @since 2023-09-04
* @see https://www.npmjs.com/package/broken-link-checker
* @see https://github.com/stevenvachon/broken-link-checker
*/
require('dotenv').config();
const { Octokit } = require('@octokit/core');
const fs = require('fs');
const { SiteChecker } = require('broken-link-checker');
const { URL } = require('url');
const path = require('path');
/**********/
/* CONFIG */
const siteUrl = 'https://weboftrust.github.io/WOT-terms';
const baseUrl = 'https://weboftrust.github.io';
const outputDirectory = path.join(__dirname, '../logs');
const outputFileName = 'brokenLinks.md';
// const excludedSubdirectories = ['/WOT-terms/slack/'];
const githubToken = process.env.GITHUB_ISSUE_AUTH_TOKEN;
/* END CONFIG */
/**************/
const outputFilePath = path.join(outputDirectory, outputFileName);
let brokenLinks = {};
let fileContent = '';
console.log('Start Link checking...');
const siteChecker = new SiteChecker({
excludeExternalLinks: true,
maxSocketsPerHost: 10
}, {
link: (result) => {
// Log every URL that is checked
console.log(`Checking link: ${result.url.resolved}`);
// Additionally, log if a link is broken
if (result.broken) {
// brokenLinks.push({
// url: result.url.resolved,
// brokenReason: result.brokenReason
// });
const urlObj = new URL(result.url.original, baseUrl);
const baseObj = new URL(result.base.original, baseUrl);
const href = urlObj.href;
if (!brokenLinks[href]) {
brokenLinks[href] = [];
}
if (!brokenLinks[href].includes(baseObj.href)) {
brokenLinks[href].push(baseObj.href);
}
console.log(`Broken link found: ${result.url.resolved} (${result.brokenReason}). Found on page: ${baseObj.href}`);
}
},
end: () => {
console.log("Finished checking site.");
console.log('Checking done! Writing to file...');
// Get ISO8601 timestamp
const getISO8601Timestamp = () => {
const now = new Date();
return now.toISOString();
};
const timestamp = getISO8601Timestamp();
const numberOfBrokenLinks = Object.keys(brokenLinks).length;
console.log('numberOfBrokenLinks: ', numberOfBrokenLinks);
// Format the output for the Markdown file
fileContent = `# Broken Links Report\n\nCreated: ${timestamp}\n\n`;
fileContent += `Total Broken Links Found: ${numberOfBrokenLinks}\n\n`;
let counter = 1; // Initialize counter variable outside the loop
for (const [brokenLink, foundOnPages] of Object.entries(brokenLinks)) {
let markdownBrokenLink = `[${brokenLink}](${brokenLink})`;
let pagesMarkdown = foundOnPages.map(page => `- [${page}](${page})`).join('\n');
pagesMarkdown += '\n\n';
fileContent += `## Broken Link #${counter}:\n${markdownBrokenLink}\n\nFound on Pages:\n\n${pagesMarkdown}\n`;
counter++; // Increment counter for the next broken link
}
// Check if directory exists, if not then create it
if (!fs.existsSync(outputDirectory)) {
fs.mkdirSync(outputDirectory, { recursive: true });
}
fs.writeFile(outputFilePath, fileContent, async (err) => {
if (err) {
console.error('Error writing to file:', err);
} else {
console.log(`Broken links and count written to ${outputFilePath}`);
}
});
console.log('Creating GitHub issue...');
// TODO: Create GitHub should not be inside the file write callback
// Create GitHub issue using Octokit
const issueData = {
title: 'Broken Links Report',
body: "Created: " + timestamp + "\n\n" + "Number of broken internal links: " + numberOfBrokenLinks + "\n\n" + "<a href='https://github.com/WebOfTrust/WOT-terms/blob/main/logs/brokenLinks.md'>See full list of broken internal links</a>.",
};
const octokit = new Octokit({
auth: githubToken
});
octokit.request('POST /repos/WebOfTrust/WOT-terms/issues', {
owner: 'WebOfTrust',
repo: 'WOT-terms',
title: issueData.title,
body: issueData.body,
// labels: [
// 'bug'
// ],
headers: {
'X-GitHub-Api-Version': '2022-11-28'
}
});
console.log('GitHub issue created.');
}
});
siteChecker.enqueue(siteUrl);