mirror of
https://github.com/BillyOutlast/posthog.com.git
synced 2026-02-04 03:11:21 +01:00
729 lines
24 KiB
JavaScript
729 lines
24 KiB
JavaScript
#!/usr/bin/env node
|
|
|
|
/* eslint-disable @typescript-eslint/no-var-requires */
|
|
|
|
/**
|
|
* PostHog Link Checker - Post-Build Validation
|
|
*
|
|
* This script validates internal links and anchor links against the ACTUAL
|
|
* built site structure. It must run after `gatsby build` to access the
|
|
* generated site data.
|
|
*
|
|
* This script:
|
|
* - Reads the generated sitemap.xml to get actual pages
|
|
* - Validates links against real page URLs (not just file paths)
|
|
* - Checks anchor links against actual built page content
|
|
* - Works with dynamic pages, templates, and plugin-generated content
|
|
*
|
|
* Usage:
|
|
* gatsby build && npm run check-links-post-build
|
|
* gatsby build && node scripts/check-links-post-build.js
|
|
*
|
|
* To output results to a file, provide a directory path as an argument:
|
|
* node scripts/check-links-post-build.js link-check-results
|
|
* node scripts/check-links-post-build.js .
|
|
* node scripts/check-links-post-build.js ../output
|
|
*
|
|
* The script will exit with code 1 if any broken links are found.
|
|
*/
|
|
|
|
const fs = require('fs')
|
|
const path = require('path')
|
|
const { JSDOM } = require('jsdom')
|
|
const GithubSlugger = require('github-slugger')
|
|
|
|
// ============================================================================
|
|
// CONFIGURATION
|
|
// ============================================================================
|
|
|
|
const CONFIG = {
|
|
MAX_FILE_SIZE: 5 * 1024 * 1024, // 5MB
|
|
CACHE_SIZE_LIMIT: 1000,
|
|
BATCH_SIZE: 50,
|
|
EXCLUDED_EXTENSIONS: ['.css', '.js', '.json', '.xml', '.svg', '.png', '.jpg', '.jpeg', '.gif', '.woff', '.woff2'],
|
|
EXCLUDE_PATTERNS: [
|
|
'/community/', // powered by Strapi
|
|
'/teams/', // powered by Strapi
|
|
'/careers/', // powered by Ashby
|
|
],
|
|
SITEMAP_PATH: path.join(process.cwd(), 'public', 'sitemap', 'sitemap-0.xml'),
|
|
CONTENTS_DIR: 'contents',
|
|
// Special case URLs that should be considered valid even if not in sitemap
|
|
SPECIAL_CASE_URLS: [
|
|
'/startups', // Dynamic route in sitemap as /startups/[...slug]
|
|
],
|
|
}
|
|
|
|
// Global cache for anchor links
|
|
const anchorCache = new Map()
|
|
|
|
// ============================================================================
|
|
// UTILITY FUNCTIONS
|
|
// ============================================================================
|
|
|
|
// Utility function to convert file paths or internal links to full PostHog URLs
|
|
function convertToPostHogUrl(pathOrUrl) {
|
|
const baseUrl = 'https://posthog.com'
|
|
|
|
// Case 1: File path (starts with 'contents/')
|
|
if (pathOrUrl.startsWith('contents/')) {
|
|
// Remove 'contents/' prefix and file extension
|
|
let urlPath = pathOrUrl.replace(/^contents\//, '')
|
|
urlPath = urlPath.replace(/\.(md|mdx)$/, '')
|
|
return `${baseUrl}/${urlPath}`
|
|
}
|
|
|
|
// Case 2: Internal link (starts with '/')
|
|
if (pathOrUrl.startsWith('/')) {
|
|
return `${baseUrl}${pathOrUrl}`
|
|
}
|
|
|
|
// Case 3: Already a full URL or relative path
|
|
if (pathOrUrl.startsWith('http')) {
|
|
return pathOrUrl
|
|
}
|
|
|
|
// Default: treat as internal path
|
|
return `${baseUrl}/${pathOrUrl}`
|
|
}
|
|
|
|
// Parse vercel.json to extract redirect patterns
|
|
function parseVercelRedirects() {
|
|
try {
|
|
const vercelConfig = JSON.parse(fs.readFileSync('vercel.json', 'utf8'))
|
|
const redirects = vercelConfig.redirects || []
|
|
const rewrites = vercelConfig.rewrites || []
|
|
|
|
// Combine redirects and rewrites (rewrites also act as redirects for link validation)
|
|
return [...redirects, ...rewrites]
|
|
} catch (error) {
|
|
console.warn('Warning: Could not parse vercel.json:', error.message)
|
|
return []
|
|
}
|
|
}
|
|
|
|
// Check if a URL contains a redirect source
|
|
function isRedirectSource(url, redirects) {
|
|
return redirects.some((redirect) => {
|
|
// If source contains :path*, check if URL contains the part before :path*
|
|
if (redirect.source.includes(':path*')) {
|
|
const pathPrefix = redirect.source.split('/:path*')[0]
|
|
return url.includes(pathPrefix)
|
|
}
|
|
// Otherwise, check if URL contains the full source
|
|
return url.includes(redirect.source)
|
|
})
|
|
}
|
|
|
|
// ============================================================================
|
|
// SITEMAP FUNCTIONS
|
|
// ============================================================================
|
|
|
|
// Get all pages from the sitemap
|
|
function getSitemapPages() {
|
|
if (!fs.existsSync(CONFIG.SITEMAP_PATH)) {
|
|
console.error('Error: Sitemap not found at', CONFIG.SITEMAP_PATH)
|
|
console.error('Please run "gatsby build" first to generate the sitemap.')
|
|
process.exit(1)
|
|
}
|
|
|
|
const sitemapContent = fs.readFileSync(CONFIG.SITEMAP_PATH, 'utf8')
|
|
const dom = new JSDOM(sitemapContent, { contentType: 'text/xml' })
|
|
const urlNodes = dom.window.document.querySelectorAll('url loc')
|
|
|
|
const pages = []
|
|
urlNodes.forEach((node) => {
|
|
const url = node.textContent.trim()
|
|
if (url.startsWith('https://posthog.com/')) {
|
|
pages.push(url.replace('https://posthog.com', ''))
|
|
}
|
|
})
|
|
|
|
return pages
|
|
}
|
|
|
|
// Check if URL exists in sitemap
|
|
function urlExistsInSitemap(url, pages) {
|
|
// Remove trailing slash for comparison
|
|
const normalizedUrl = url.replace(/\/$/, '')
|
|
return pages.includes(normalizedUrl) || pages.includes(normalizedUrl + '/')
|
|
}
|
|
|
|
// ============================================================================
|
|
// ANCHOR VALIDATION FUNCTIONS
|
|
// ============================================================================
|
|
|
|
// Better file path resolution for Gatsby's output structure
|
|
function getHtmlFilePath(url) {
|
|
const publicDir = path.join(process.cwd(), 'public')
|
|
let htmlPath
|
|
|
|
// Remove leading slash
|
|
const cleanUrl = url.replace(/^\//, '')
|
|
|
|
if (cleanUrl === '') {
|
|
// Root page
|
|
htmlPath = path.join(publicDir, 'index.html')
|
|
} else if (cleanUrl.endsWith('/')) {
|
|
// Directory with trailing slash
|
|
htmlPath = path.join(publicDir, cleanUrl, 'index.html')
|
|
} else if (cleanUrl.includes('.')) {
|
|
// File with extension
|
|
htmlPath = path.join(publicDir, cleanUrl)
|
|
} else {
|
|
// Directory without trailing slash - try both variants
|
|
const withIndex = path.join(publicDir, cleanUrl, 'index.html')
|
|
const withHtml = path.join(publicDir, cleanUrl + '.html')
|
|
|
|
if (fs.existsSync(withIndex)) {
|
|
htmlPath = withIndex
|
|
} else if (fs.existsSync(withHtml)) {
|
|
htmlPath = withHtml
|
|
} else {
|
|
htmlPath = withIndex // Default to index.html variant
|
|
}
|
|
}
|
|
|
|
return htmlPath
|
|
}
|
|
|
|
// More efficient function that extracts ALL anchors and caches them
|
|
function extractAnchorsFromHtml(htmlPath) {
|
|
// Check cache first
|
|
if (anchorCache.has(htmlPath)) {
|
|
return anchorCache.get(htmlPath)
|
|
}
|
|
|
|
// Manage cache size
|
|
if (anchorCache.size >= CONFIG.CACHE_SIZE_LIMIT) {
|
|
// Remove oldest entry
|
|
const firstKey = anchorCache.keys().next().value
|
|
anchorCache.delete(firstKey)
|
|
}
|
|
|
|
try {
|
|
const stats = fs.statSync(htmlPath)
|
|
|
|
// Skip very large files
|
|
if (stats.size > CONFIG.MAX_FILE_SIZE) {
|
|
console.warn(`Skipping large file: ${htmlPath} (${stats.size} bytes)`)
|
|
const emptySet = new Set()
|
|
anchorCache.set(htmlPath, emptySet)
|
|
return emptySet
|
|
}
|
|
|
|
// Skip excluded file extensions
|
|
const ext = path.extname(htmlPath).toLowerCase()
|
|
if (CONFIG.EXCLUDED_EXTENSIONS.includes(ext)) {
|
|
const emptySet = new Set()
|
|
anchorCache.set(htmlPath, emptySet)
|
|
return emptySet
|
|
}
|
|
|
|
const anchors = new Set()
|
|
|
|
// For all files, process synchronously to avoid Promise issues
|
|
const htmlContent = fs.readFileSync(htmlPath, 'utf8')
|
|
|
|
// Simple string matching for very large content
|
|
if (htmlContent.length > 500000) {
|
|
const idMatches = htmlContent.match(/id="([^"]+)"/g) || []
|
|
const nameMatches = htmlContent.match(/name="([^"]+)"/g) || []
|
|
|
|
idMatches.forEach((match) => {
|
|
const id = match.match(/id="([^"]+)"/)[1]
|
|
anchors.add(id)
|
|
})
|
|
|
|
nameMatches.forEach((match) => {
|
|
const name = match.match(/name="([^"]+)"/)[1]
|
|
anchors.add(name)
|
|
})
|
|
} else {
|
|
// Use DOM parsing for smaller files
|
|
const dom = new JSDOM(htmlContent)
|
|
const document = dom.window.document
|
|
|
|
// Get all elements with ID
|
|
const elementsWithId = document.querySelectorAll('[id]')
|
|
elementsWithId.forEach((element) => {
|
|
anchors.add(element.id)
|
|
})
|
|
|
|
// Get all elements with name
|
|
const elementsWithName = document.querySelectorAll('[name]')
|
|
elementsWithName.forEach((element) => {
|
|
anchors.add(element.name)
|
|
})
|
|
|
|
// Process headings with slugger
|
|
const headings = document.querySelectorAll('h1, h2, h3, h4, h5, h6')
|
|
const slugger = new GithubSlugger()
|
|
|
|
headings.forEach((heading) => {
|
|
const slug = slugger.slug(heading.textContent)
|
|
anchors.add(slug)
|
|
})
|
|
}
|
|
|
|
// Cache the results
|
|
anchorCache.set(htmlPath, anchors)
|
|
return anchors
|
|
} catch (error) {
|
|
console.warn(`Warning: Could not extract anchors from ${htmlPath}:`, error.message)
|
|
const emptySet = new Set()
|
|
anchorCache.set(htmlPath, emptySet)
|
|
return emptySet
|
|
}
|
|
}
|
|
|
|
// ============================================================================
|
|
// LINK VALIDATION FUNCTIONS
|
|
// ============================================================================
|
|
|
|
// Check if internal URL exists in sitemap
|
|
function validateInternalUrl(url, pages) {
|
|
const [baseUrl] = url.split('#')
|
|
|
|
// Check special case URLs that should be considered valid
|
|
if (CONFIG.SPECIAL_CASE_URLS.includes(baseUrl)) {
|
|
return true
|
|
}
|
|
|
|
return urlExistsInSitemap(baseUrl, pages)
|
|
}
|
|
|
|
// Check if anchor exists in HTML file
|
|
function validateAnchor(url, pages) {
|
|
const [baseUrl, anchor] = url.split('#')
|
|
|
|
// No anchor to validate
|
|
if (!anchor) {
|
|
return true
|
|
}
|
|
|
|
// First ensure the base URL exists
|
|
if (!urlExistsInSitemap(baseUrl, pages)) {
|
|
return false
|
|
}
|
|
|
|
// Get the HTML file path and check if it exists
|
|
const htmlPath = getHtmlFilePath(baseUrl)
|
|
if (!fs.existsSync(htmlPath)) {
|
|
return false
|
|
}
|
|
|
|
// Extract all anchors and check if our anchor exists
|
|
const anchors = extractAnchorsFromHtml(htmlPath)
|
|
return anchors.has(anchor)
|
|
}
|
|
|
|
// ============================================================================
|
|
// MARKDOWN PROCESSING FUNCTIONS
|
|
// ============================================================================
|
|
|
|
// Extract all internal links from a markdown file
|
|
function extractInternalLinks(filePath) {
|
|
const content = fs.readFileSync(filePath, 'utf8')
|
|
const links = []
|
|
const linkRegex = /\[([^\]]*)\]\(([^)]+)\)/g
|
|
|
|
let match
|
|
let lineNumber = 1
|
|
let currentIndex = 0
|
|
|
|
while ((match = linkRegex.exec(content)) !== null) {
|
|
const linkText = match[1]
|
|
const linkUrl = match[2]
|
|
|
|
// Count lines up to this match
|
|
while (currentIndex < match.index) {
|
|
if (content[currentIndex] === '\n') {
|
|
lineNumber++
|
|
}
|
|
currentIndex++
|
|
}
|
|
|
|
// Only check internal links
|
|
if (linkUrl.startsWith('/') && !linkUrl.startsWith('//')) {
|
|
const beforeMatch = content.substring(Math.max(0, match.index - 75), match.index)
|
|
const afterMatch = content.substring(
|
|
match.index + match[0].length,
|
|
Math.min(content.length, match.index + match[0].length + 75)
|
|
)
|
|
const context = beforeMatch + match[0] + afterMatch
|
|
|
|
links.push({
|
|
text: linkText,
|
|
url: linkUrl,
|
|
line: lineNumber,
|
|
context: context.replace(/\n/g, ' '),
|
|
})
|
|
}
|
|
}
|
|
|
|
return links
|
|
}
|
|
|
|
// Find all markdown files in a directory recursively
|
|
function findMarkdownFiles(dir) {
|
|
const markdownFiles = []
|
|
|
|
function walkDirectory(currentDir) {
|
|
const files = fs.readdirSync(currentDir)
|
|
|
|
for (const file of files) {
|
|
const filePath = path.join(currentDir, file)
|
|
const stat = fs.statSync(filePath)
|
|
|
|
if (stat.isDirectory()) {
|
|
walkDirectory(filePath)
|
|
} else if (file.endsWith('.md') || file.endsWith('.mdx')) {
|
|
markdownFiles.push(filePath)
|
|
}
|
|
}
|
|
}
|
|
|
|
walkDirectory(dir)
|
|
return markdownFiles
|
|
}
|
|
|
|
// Process files in batches for memory management
|
|
function processFilesInBatches(files, batchSize = CONFIG.BATCH_SIZE) {
|
|
const results = []
|
|
|
|
for (let i = 0; i < files.length; i += batchSize) {
|
|
const batch = files.slice(i, i + batchSize)
|
|
console.log(
|
|
`Processing batch ${Math.floor(i / batchSize) + 1}/${Math.ceil(files.length / batchSize)} (${
|
|
batch.length
|
|
} files)`
|
|
)
|
|
|
|
const batchResults = batch.map((file) => ({
|
|
file,
|
|
links: extractInternalLinks(file),
|
|
}))
|
|
|
|
results.push(...batchResults)
|
|
|
|
// Clear some memory between batches
|
|
if (global.gc) {
|
|
global.gc()
|
|
}
|
|
}
|
|
|
|
return results
|
|
}
|
|
|
|
// ============================================================================
|
|
// LINK PROCESSING FUNCTIONS
|
|
// ============================================================================
|
|
|
|
// Process a single link and return validation result
|
|
function processLink(link, file, pages, redirects) {
|
|
// Strip query parameters from URL for checking
|
|
const urlWithoutQuery = link.url.split('?')[0]
|
|
|
|
// Check if link should be excluded by pattern
|
|
const shouldExclude = CONFIG.EXCLUDE_PATTERNS.some((pattern) => urlWithoutQuery.includes(pattern))
|
|
if (shouldExclude) {
|
|
return { type: 'excluded' }
|
|
}
|
|
|
|
// Check if link has file extension that should be ignored
|
|
const hasExcludedExtension = CONFIG.EXCLUDED_EXTENSIONS.some((ext) => urlWithoutQuery.endsWith(ext))
|
|
if (hasExcludedExtension) {
|
|
return { type: 'excluded' }
|
|
}
|
|
|
|
// Check if this link is a redirect source
|
|
if (isRedirectSource(urlWithoutQuery, redirects)) {
|
|
return { type: 'redirected' }
|
|
}
|
|
|
|
// First check if the internal URL is valid
|
|
if (!validateInternalUrl(urlWithoutQuery, pages)) {
|
|
return {
|
|
type: 'broken',
|
|
brokenItem: {
|
|
file: file,
|
|
link: link.url,
|
|
text: link.text,
|
|
line: link.line,
|
|
context: link.context,
|
|
type: 'page',
|
|
},
|
|
}
|
|
} else if (urlWithoutQuery.includes('#')) {
|
|
// If URL is valid but has anchor, check if anchor exists
|
|
if (!validateAnchor(urlWithoutQuery, pages)) {
|
|
return {
|
|
type: 'broken_anchor',
|
|
brokenItem: {
|
|
file: file,
|
|
link: link.url,
|
|
text: link.text,
|
|
line: link.line,
|
|
context: link.context,
|
|
type: 'anchor',
|
|
},
|
|
}
|
|
}
|
|
}
|
|
|
|
return { type: 'valid' }
|
|
}
|
|
|
|
// Process all links and categorize them
|
|
function processAllLinks(fileResults, pages, redirects) {
|
|
const brokenLinks = []
|
|
const brokenAnchors = []
|
|
let totalLinks = 0
|
|
let anchorLinksChecked = 0
|
|
let redirectedLinks = 0
|
|
let excludedLinks = 0
|
|
|
|
for (const result of fileResults) {
|
|
const { file, links } = result
|
|
totalLinks += links.length
|
|
|
|
for (const link of links) {
|
|
// Check for anchor (like original code)
|
|
const urlWithoutQuery = link.url.split('?')[0]
|
|
const hasAnchor = urlWithoutQuery.includes('#')
|
|
|
|
const processResult = processLink(link, file, pages, redirects)
|
|
|
|
switch (processResult.type) {
|
|
case 'excluded':
|
|
excludedLinks++
|
|
break
|
|
case 'redirected':
|
|
redirectedLinks++
|
|
break
|
|
case 'broken':
|
|
brokenLinks.push(processResult.brokenItem)
|
|
// Count anchor links for ALL non-excluded, non-redirected links (like original)
|
|
if (hasAnchor) {
|
|
anchorLinksChecked++
|
|
}
|
|
break
|
|
case 'broken_anchor':
|
|
brokenAnchors.push(processResult.brokenItem)
|
|
anchorLinksChecked++
|
|
break
|
|
case 'valid':
|
|
// Count anchor links for ALL non-excluded, non-redirected links (like original)
|
|
if (hasAnchor) {
|
|
anchorLinksChecked++
|
|
}
|
|
break
|
|
}
|
|
}
|
|
}
|
|
|
|
return {
|
|
brokenLinks,
|
|
brokenAnchors,
|
|
stats: {
|
|
totalLinks,
|
|
excludedLinks,
|
|
redirectedLinks,
|
|
anchorLinksChecked,
|
|
},
|
|
}
|
|
}
|
|
|
|
// ============================================================================
|
|
// RESULTS AND OUTPUT FUNCTIONS
|
|
// ============================================================================
|
|
|
|
// Create the results object
|
|
function createResultsObject(brokenLinks, brokenAnchors, stats, markdownFiles, redirects, pages) {
|
|
return {
|
|
timestamp: new Date().toISOString(),
|
|
summary: {
|
|
totalLinks: stats.totalLinks,
|
|
excludedLinks: stats.excludedLinks,
|
|
redirectedLinks: stats.redirectedLinks,
|
|
brokenLinks: brokenLinks.length,
|
|
anchorLinksChecked: stats.anchorLinksChecked,
|
|
brokenAnchors: brokenAnchors.length,
|
|
htmlFilesCached: anchorCache.size,
|
|
markdownFiles: markdownFiles.length,
|
|
redirectPatterns: redirects.length,
|
|
pagesInSitemap: pages.length,
|
|
},
|
|
excludePatterns: CONFIG.EXCLUDE_PATTERNS,
|
|
excludeFileExtensions: CONFIG.EXCLUDED_EXTENSIONS,
|
|
brokenLinks: brokenLinks.map((broken) => ({
|
|
type: broken.type,
|
|
file: path.relative(process.cwd(), broken.file),
|
|
page: convertToPostHogUrl(broken.file),
|
|
brokenLink: broken.link,
|
|
brokenUrl: convertToPostHogUrl(broken.link),
|
|
line: broken.line,
|
|
text: broken.text,
|
|
context: broken.context,
|
|
})),
|
|
brokenAnchors: brokenAnchors.map((anchor) => ({
|
|
type: anchor.type,
|
|
file: path.relative(process.cwd(), anchor.file),
|
|
page: convertToPostHogUrl(anchor.file),
|
|
brokenLink: anchor.link,
|
|
brokenUrl: convertToPostHogUrl(anchor.link),
|
|
line: anchor.line,
|
|
text: anchor.text,
|
|
context: anchor.context,
|
|
})),
|
|
}
|
|
}
|
|
|
|
// Write results to JSON file (optional)
|
|
function writeResultsToFile(results, outputPath) {
|
|
if (!outputPath) {
|
|
// No output path provided, skip writing to file
|
|
return
|
|
}
|
|
|
|
const resultsDir = path.join(process.cwd(), outputPath)
|
|
if (!fs.existsSync(resultsDir)) {
|
|
fs.mkdirSync(resultsDir, { recursive: true })
|
|
}
|
|
|
|
const timestamp = new Date().toISOString().replace(/[:.]/g, '-')
|
|
const resultsFile = path.join(resultsDir, `link-check-${timestamp}.json`)
|
|
|
|
try {
|
|
fs.writeFileSync(resultsFile, JSON.stringify(results, null, 2))
|
|
console.log(`\nResults saved to: ${resultsFile}`)
|
|
} catch (error) {
|
|
console.error(`Error saving results: ${error.message}`)
|
|
}
|
|
}
|
|
|
|
// Display broken links
|
|
function displayBrokenLinks(brokenLinks) {
|
|
if (brokenLinks.length === 0) return
|
|
|
|
console.log('\nBroken links found:\n')
|
|
|
|
brokenLinks.forEach((broken) => {
|
|
console.log(`Error type: ${broken.type}`)
|
|
console.log(`File: ${path.relative(process.cwd(), broken.file)}`)
|
|
console.log(`Page: ${convertToPostHogUrl(broken.file)}`)
|
|
console.log(`Broken link: ${broken.link}`)
|
|
console.log(`Broken URL: ${convertToPostHogUrl(broken.link)}`)
|
|
console.log(`Line #: ${broken.line}`)
|
|
if (broken.text) {
|
|
console.log(`Hyperlinked text: ${broken.text}`)
|
|
}
|
|
console.log(`Context: ${broken.context}`)
|
|
console.log('-'.repeat(80))
|
|
})
|
|
}
|
|
|
|
// Display broken anchor links
|
|
function displayBrokenAnchors(brokenAnchors) {
|
|
if (brokenAnchors.length === 0) return
|
|
|
|
console.log('\nBroken anchor links:\n')
|
|
|
|
brokenAnchors.forEach((anchor) => {
|
|
console.log(`Error type: ${anchor.type}`)
|
|
console.log(`File: ${path.relative(process.cwd(), anchor.file)}`)
|
|
console.log(`Page: ${convertToPostHogUrl(anchor.file)}`)
|
|
console.log(`Broken link: ${anchor.link}`)
|
|
console.log(`Broken URL: ${convertToPostHogUrl(anchor.link)}`)
|
|
console.log(`Line #: ${anchor.line}`)
|
|
if (anchor.text) {
|
|
console.log(`Hyperlinked text: ${anchor.text}`)
|
|
}
|
|
console.log(`Context: ${anchor.context}`)
|
|
console.log('-'.repeat(80))
|
|
})
|
|
}
|
|
|
|
// Display summary statistics
|
|
function displaySummaryStats(stats, brokenLinks, brokenAnchors, markdownFilesCount) {
|
|
console.log(`\nScanned ${markdownFilesCount} markdown files`)
|
|
console.log(`Processed ${stats.totalLinks} internal links`)
|
|
console.log(`Found ${stats.excludedLinks} excluded links (skipped)`)
|
|
console.log(`Found ${stats.redirectedLinks} redirected links (skipped)`)
|
|
console.log(`Checked ${stats.anchorLinksChecked} anchor links`)
|
|
console.log(`Found ${brokenLinks.length} broken links`)
|
|
console.log(`Found ${brokenAnchors.length} broken anchor links`)
|
|
console.log(`Cached ${anchorCache.size} HTML files for anchor checking`)
|
|
}
|
|
|
|
// ============================================================================
|
|
// MAIN FUNCTION
|
|
// ============================================================================
|
|
|
|
function checkLinks(outputPath) {
|
|
console.log('Starting post-build link validation...')
|
|
|
|
// Initialize data sources
|
|
const redirects = parseVercelRedirects()
|
|
console.log(`Found ${redirects.length} redirect/rewrite patterns`)
|
|
|
|
const pages = getSitemapPages()
|
|
console.log(`Found ${pages.length} pages in sitemap`)
|
|
|
|
const markdownFiles = findMarkdownFiles(CONFIG.CONTENTS_DIR)
|
|
console.log(`Found ${markdownFiles.length} markdown files`)
|
|
|
|
// Process files and extract links
|
|
const fileResults = processFilesInBatches(markdownFiles)
|
|
|
|
// Process and validate all links
|
|
const { brokenLinks, brokenAnchors, stats } = processAllLinks(fileResults, pages, redirects)
|
|
|
|
// Sort results alphabetically by file
|
|
brokenLinks.sort((a, b) => a.file.localeCompare(b.file))
|
|
brokenAnchors.sort((a, b) => a.file.localeCompare(b.file))
|
|
|
|
// Display broken links
|
|
displayBrokenLinks(brokenLinks)
|
|
displayBrokenAnchors(brokenAnchors)
|
|
|
|
if (brokenLinks.length === 0 && brokenAnchors.length === 0) {
|
|
console.log('\nNo broken links found! 🎉')
|
|
}
|
|
|
|
// Display summary stats at the end
|
|
displaySummaryStats(stats, brokenLinks, brokenAnchors, markdownFiles.length)
|
|
|
|
// Create and save results at the end
|
|
const results = createResultsObject(brokenLinks, brokenAnchors, stats, markdownFiles, redirects, pages)
|
|
writeResultsToFile(results, outputPath)
|
|
|
|
return brokenLinks.length
|
|
}
|
|
|
|
// ============================================================================
|
|
// SCRIPT EXECUTION
|
|
// ============================================================================
|
|
|
|
// Parse command line arguments
|
|
const args = process.argv.slice(2)
|
|
const outputPath = args.length > 0 ? args[0] : null
|
|
|
|
if (outputPath) {
|
|
console.log(`Results will be saved to: ${outputPath}`)
|
|
} else {
|
|
console.log('No output path provided. Results will only be displayed in console.')
|
|
}
|
|
|
|
// Run the script
|
|
const brokenCount = checkLinks(outputPath)
|
|
|
|
// Only exit with error code if there are broken PAGE links (not just anchor links)
|
|
// This allows the workflow to continue while still reporting issues
|
|
if (brokenCount > 0) {
|
|
console.log('\nCheck the output above ☝️')
|
|
}
|
|
|
|
process.exit(0) // Always exit successfully
|