Source: maintenance/findBrokenLinks.js

  1. /**
  2. * @file This file checks all links on the WOT-terms site and reports broken links. It also creates a GitHub issue with the broken links.
  3. * The script should be run from the root of the WOT-terms repository.
  4. * Environment: NodeJS
  5. * Usage:
  6. * $ node findBrokenLinks.js
  7. * @author Kor Dwarshuis
  8. * @version 1.0.0
  9. * @since 2023-09-04
  10. * @see https://www.npmjs.com/package/broken-link-checker
  11. * @see https://github.com/stevenvachon/broken-link-checker
  12. */
  13. require('dotenv').config();
  14. const { Octokit } = require('@octokit/core');
  15. const fs = require('fs');
  16. const { SiteChecker } = require('broken-link-checker');
  17. const { URL } = require('url');
  18. const path = require('path');
  19. /**********/
  20. /* CONFIG */
  21. const siteUrl = 'https://weboftrust.github.io/WOT-terms';
  22. const baseUrl = 'https://weboftrust.github.io';
  23. const outputDirectory = path.join(__dirname, '../logs');
  24. const outputFileName = 'brokenLinks.md';
  25. // const excludedSubdirectories = ['/WOT-terms/slack/'];
  26. const githubToken = process.env.GITHUB_ISSUE_AUTH_TOKEN;
  27. /* END CONFIG */
  28. /**************/
  29. const outputFilePath = path.join(outputDirectory, outputFileName);
  30. let brokenLinks = {};
  31. let fileContent = '';
  32. console.log('Start Link checking...');
  33. const siteChecker = new SiteChecker({
  34. excludeExternalLinks: true,
  35. maxSocketsPerHost: 10
  36. }, {
  37. link: (result) => {
  38. // Log every URL that is checked
  39. console.log(`Checking link: ${result.url.resolved}`);
  40. // Additionally, log if a link is broken
  41. if (result.broken) {
  42. // brokenLinks.push({
  43. // url: result.url.resolved,
  44. // brokenReason: result.brokenReason
  45. // });
  46. const urlObj = new URL(result.url.original, baseUrl);
  47. const baseObj = new URL(result.base.original, baseUrl);
  48. const href = urlObj.href;
  49. if (!brokenLinks[href]) {
  50. brokenLinks[href] = [];
  51. }
  52. if (!brokenLinks[href].includes(baseObj.href)) {
  53. brokenLinks[href].push(baseObj.href);
  54. }
  55. console.log(`Broken link found: ${result.url.resolved} (${result.brokenReason}). Found on page: ${baseObj.href}`);
  56. }
  57. },
  58. end: () => {
  59. console.log("Finished checking site.");
  60. console.log('Checking done! Writing to file...');
  61. // Get ISO8601 timestamp
  62. const getISO8601Timestamp = () => {
  63. const now = new Date();
  64. return now.toISOString();
  65. };
  66. const timestamp = getISO8601Timestamp();
  67. const numberOfBrokenLinks = Object.keys(brokenLinks).length;
  68. console.log('numberOfBrokenLinks: ', numberOfBrokenLinks);
  69. // Format the output for the Markdown file
  70. fileContent = `# Broken Links Report\n\nCreated: ${timestamp}\n\n`;
  71. fileContent += `Total Broken Links Found: ${numberOfBrokenLinks}\n\n`;
  72. let counter = 1; // Initialize counter variable outside the loop
  73. for (const [brokenLink, foundOnPages] of Object.entries(brokenLinks)) {
  74. let markdownBrokenLink = `[${brokenLink}](${brokenLink})`;
  75. let pagesMarkdown = foundOnPages.map(page => `- [${page}](${page})`).join('\n');
  76. pagesMarkdown += '\n\n';
  77. fileContent += `## Broken Link #${counter}:\n${markdownBrokenLink}\n\nFound on Pages:\n\n${pagesMarkdown}\n`;
  78. counter++; // Increment counter for the next broken link
  79. }
  80. // Check if directory exists, if not then create it
  81. if (!fs.existsSync(outputDirectory)) {
  82. fs.mkdirSync(outputDirectory, { recursive: true });
  83. }
  84. fs.writeFile(outputFilePath, fileContent, async (err) => {
  85. if (err) {
  86. console.error('Error writing to file:', err);
  87. } else {
  88. console.log(`Broken links and count written to ${outputFilePath}`);
  89. }
  90. });
  91. console.log('Creating GitHub issue...');
  92. // TODO: Create GitHub should not be inside the file write callback
  93. // Create GitHub issue using Octokit
  94. const issueData = {
  95. title: 'Broken Links Report',
  96. body: "Created: " + timestamp + "\n\n" + "Number of broken internal links: " + numberOfBrokenLinks + "\n\n" + "<a href='https://github.com/WebOfTrust/WOT-terms/blob/main/logs/brokenLinks.md'>See full list of broken internal links</a>.",
  97. };
  98. const octokit = new Octokit({
  99. auth: githubToken
  100. });
  101. octokit.request('POST /repos/WebOfTrust/WOT-terms/issues', {
  102. owner: 'WebOfTrust',
  103. repo: 'WOT-terms',
  104. title: issueData.title,
  105. body: issueData.body,
  106. // labels: [
  107. // 'bug'
  108. // ],
  109. headers: {
  110. 'X-GitHub-Api-Version': '2022-11-28'
  111. }
  112. });
  113. console.log('GitHub issue created.');
  114. }
  115. });
  116. siteChecker.enqueue(siteUrl);