From 04b3a25fa9d19eb6509615858220a306b60cafa4 Mon Sep 17 00:00:00 2001 From: Eli Kinsey Date: Mon, 27 Oct 2025 12:26:52 -0700 Subject: [PATCH] Optional GitHub sourcing (#13380) * source github nodes optionally * fetch references from strapi * fix type --------- Co-authored-by: Cory Watilo --- gatsby/createSchemaCustomization.ts | 88 ++++ gatsby/sourceNodes.ts | 596 +++++++++++++--------------- 2 files changed, 355 insertions(+), 329 deletions(-) diff --git a/gatsby/createSchemaCustomization.ts b/gatsby/createSchemaCustomization.ts index d46679226..fc16aa6ac 100644 --- a/gatsby/createSchemaCustomization.ts +++ b/gatsby/createSchemaCustomization.ts @@ -271,6 +271,94 @@ export const createSchemaCustomization: GatsbyNode['createSchemaCustomization'] mdx: Mdx @link(by: "frontmatter.templateId", from: "pipelineId") introSnippet: String installationSnippet: String + inputs_schema: [PostHogPipelineInputSchema] + name: String + slug: String + type: String + category: [String] + description: String + icon_url: String + status: String + } + type PostHogPipelineInputSchema { + key: String + type: String + label: String + secret: Boolean + required: Boolean + description: String + } + type SdkReferences implements Node { + info: SdkReferencesInfo + referenceId: String + hogRef: String + id: String + categories: [String] + classes: [SdkReferencesClass] + types: [SdkReferencesType] + version: String + } + type SdkReferencesInfo { + description: String + id: String + specUrl: String + slugPrefix: String + title: String + version: String + } + type SdkReferencesClass { + description: String + functions: [SdkReferencesFunction] + id: String + title: String + } + type SdkReferencesFunction { + category: String + description: String + details: String + examples: [SdkReferencesFunctionExample] + id: String + params: [SdkReferencesFunctionParam] + path: String + releaseTag: String + showDocs: Boolean + returnType: SdkReferencesFunctionReturnType + title: String + } + type SdkReferencesFunctionExample { + code: String + name: String + id: String + } + type SdkReferencesFunctionParam { + description: String + isOptional: Boolean + name: String + type: String + } + type SdkReferencesFunctionReturnType { + id: String + name: String + } + type SdkReferencesType { + example: String + id: String + name: String + path: String + properties: [SdkReferencesTypeProperty] + } + type SdkReferencesTypeProperty { + description: String + name: String + type: String + } + type GitHubStats implements Node { + owner: String + repo: String + stars: Int + forks: Int + commits: Int + contributors: Int } type ProductDataProductsAddons { legacy_product: Boolean diff --git a/gatsby/sourceNodes.ts b/gatsby/sourceNodes.ts index 4a1f22b7b..002f530e6 100644 --- a/gatsby/sourceNodes.ts +++ b/gatsby/sourceNodes.ts @@ -75,231 +75,6 @@ export const sourceNodes: GatsbyNode['sourceNodes'] = async ({ actions, createCo components: JSON.stringify(spec.components), }) - // PostHog SDK references - - // Paths to the SDK references folders - const SDK_REFERENCES_FOLDER_PATHS = [ - { - id: 'posthog-python', - repo: 'posthog/posthog-python', - repo_branch: 'master', - folder_path: 'references', - }, - { - id: 'posthog-js', - repo: 'posthog/posthog-js', - repo_branch: 'main', - folder_path: 'packages/browser/references', - }, - { - id: 'posthog-node', - repo: 'posthog/posthog-js', - repo_branch: 'main', - folder_path: 'packages/node/references', - }, - { - id: 'posthog-react-native', - repo: 'posthog/posthog-js', - repo_branch: 'main', - folder_path: 'packages/react-native/references', - }, - ] - - const referencesData = {} - function parseReferencesVersion(version: string): string { - return version.split('-').pop()?.replace('.json', '') || '' - } - - // Fetch all folder contents in a batch - const folderPromises = SDK_REFERENCES_FOLDER_PATHS.map(async (folderPath) => { - const url = `https://api.github.com/repos/${folderPath.repo}/contents/${folderPath.folder_path}?ref=${folderPath.repo_branch}` - const response = await fetch(url) - const data = await response.json() - return { folderPath, data } - }) - - const folderResults = await Promise.allSettled(folderPromises).then((results) => - results - .map((result) => { - if (result.status === 'fulfilled' && result.value?.data && Array.isArray(result.value.data)) { - return { folderPath: result.value.folderPath, data: result.value.data } - } - console.error('Failed to fetch SDK reference files') - return null - }) - .filter((result) => result !== null) - ) - - for (const { folderPath, data } of folderResults) { - referencesData[folderPath.id] = {} - - const filePromises = data - .filter((item) => item.type === 'file') - .map(async (item) => { - const fileUrl = `https://api.github.com/repos/${folderPath.repo}/contents/${folderPath.folder_path}/${item.name}?ref=${folderPath.repo_branch}` - const fileResponse = await fetch(fileUrl) - const fileData = await fileResponse.json() - const version = parseReferencesVersion(item.name) - return { version, content: fileData.content } - }) - - const fileResults = await Promise.allSettled(filePromises).then((results) => - results.map((result) => { - if (result.status === 'fulfilled' && result.value?.version && result.value?.content) { - return { version: result.value.version, content: result.value.content } - } - console.error(`Failed to fetch SDK reference file in ${folderPath.repo}/${folderPath.folder_path}`) - return null - }) - ) - - for (const { version, content } of fileResults.filter((result) => result !== null)) { - if (version) { - referencesData[folderPath.id][version] = content - } - } - } - - for (const id in referencesData) { - // Create version-specific nodes - for (const version in referencesData[id]) { - // Decode the base64 data first - const versionDataBase64 = referencesData[id][version] - let versionData - - try { - const decodedString = Buffer.from(versionDataBase64, 'base64').toString('utf-8') - versionData = JSON.parse(decodedString) - } catch (error) { - console.error(`Failed to decode version ${version} for ${id}:`, error) - continue - } - versionData.id = `${id}-${version}` - const versionNode = { - parent: null, - children: [], - internal: { - type: `SdkReferences`, - contentDigest: createContentDigest(versionData), - }, - referenceId: id, - version: version, - ...versionData, // Spread the decoded JSON data - } - createNode(versionNode) - } - } - - const postHogIssues = await fetch( - 'https://api.github.com/repos/posthog/posthog/issues?sort=comments&per_page=5' - ).then((res) => res.json()) - postHogIssues.forEach((issue) => { - const { html_url, title, number, user, comments, reactions, labels, body, updated_at } = issue - const data = { - url: html_url, - title, - number, - comments, - user: { - username: user?.login, - avatar: user?.avatar_url, - url: user?.html_url, - }, - reactions, - labels, - body, - updated_at, - } - if (data.reactions) { - data.reactions.plus1 = data.reactions['+1'] - data.reactions.minus1 = data.reactions['-1'] - } - const node = { - id: createNodeId(`posthog-issue-${title}`), - parent: null, - children: [], - internal: { - type: `PostHogIssue`, - contentDigest: createContentDigest(data), - }, - ...data, - } - createNode(node) - }) - - const postHogPulls = await fetch( - 'https://api.github.com/repos/posthog/posthog/pulls?sort=popularity&per_page=5' - ).then((res) => res.json()) - postHogPulls.forEach((issue) => { - const { html_url, title, number, user, labels, body, updated_at } = issue - const data = { - url: html_url, - title, - number, - user: { - username: user?.login, - avatar: user?.avatar_url, - url: user?.html_url, - }, - labels, - body, - updated_at, - } - - const node = { - id: createNodeId(`posthog-pull-${title}`), - parent: null, - children: [], - internal: { - type: `PostHogPull`, - contentDigest: createContentDigest(data), - }, - ...data, - } - createNode(node) - }) - - const createGitHubStatsNode = async (owner, repo) => { - const repoStats = await fetch(`https://api.github.com/repos/${owner}/${repo}`).then((res) => res.json()) - const contributors = await fetch(`https://api.github.com/repos/${owner}/${repo}/contributors?per_page=1`).then( - (res) => { - const link = parseLinkHeader(res.headers.get('link')) - const number = link?.last?.page - return number && Number(number) - } - ) - const commits = await fetch(`https://api.github.com/repos/${owner}/${repo}/commits?per_page=1`).then((res) => { - const link = parseLinkHeader(res.headers.get('link')) - const number = link?.last?.page - return number && Number(number) - }) - const { stargazers_count, forks_count } = repoStats - - const data = { - owner, - repo, - stars: stargazers_count, - forks: forks_count, - commits, - contributors, - } - - const node = { - id: createNodeId(`github-stats-${repo}`), - parent: null, - children: [], - internal: { - type: `GitHubStats`, - contentDigest: createContentDigest(data), - }, - ...data, - } - createNode(node) - } - - await createGitHubStatsNode('posthog', 'posthog') - await createGitHubStatsNode('posthog', 'posthog.com') - const createProductDataNode = async () => { const url = `${process.env.BILLING_SERVICE_URL + '/api/products-v2'}` const headers = { @@ -329,26 +104,6 @@ export const sourceNodes: GatsbyNode['sourceNodes'] = async ({ actions, createCo } await createProductDataNode() - const integrations = await fetch( - 'https://raw.githubusercontent.com/PostHog/integrations-repository/main/integrations.json' - ).then((res) => res.json()) - integrations.forEach((integration) => { - const { name, url, ...other } = integration - const node = { - id: createNodeId(`integration-${name}`), - parent: null, - children: [], - internal: { - type: `Integration`, - contentDigest: createContentDigest(integration), - }, - url: url.replace('https://posthog.com', ''), - name, - ...other, - } - createNode(node) - }) - const createRoadmapItems = async (page = 1) => { const roadmapQuery = qs.stringify( { @@ -798,107 +553,290 @@ export const sourceNodes: GatsbyNode['sourceNodes'] = async ({ actions, createCo }) } - const extractIntroSection = (markdown: string): string => { - const headingMatch = markdown.match(/^#{1,2}\s+/m) + async function sourceGithubNodes() { + if (!process.env.GITHUB_API_KEY) return - if (headingMatch) { - const headingIndex = markdown.indexOf(headingMatch[0]) - return markdown.substring(0, headingIndex).trim() - } + const githubHeaders: HeadersInit = { Authorization: `token ${process.env.GITHUB_API_KEY}` } - return markdown - } + const postHogIssues = await fetch( + 'https://api.github.com/repos/posthog/posthog/issues?sort=comments&per_page=5', + { + headers: githubHeaders, + } + ).then((res) => res.json()) + postHogIssues.forEach((issue) => { + const { html_url, title, number, user, comments, reactions, labels, body, updated_at } = issue + const data = { + url: html_url, + title, + number, + comments, + user: { + username: user?.login, + avatar: user?.avatar_url, + url: user?.html_url, + }, + reactions, + labels, + body, + updated_at, + } + if (data.reactions) { + data.reactions.plus1 = data.reactions['+1'] + data.reactions.minus1 = data.reactions['-1'] + } + const node = { + id: createNodeId(`posthog-issue-${title}`), + parent: null, + children: [], + internal: { + type: `PostHogIssue`, + contentDigest: createContentDigest(data), + }, + ...data, + } + createNode(node) + }) - const extractGettingStartedSection = (markdown: string): string => { - const gettingStartedMatch = markdown.match(/^#{1,2}\s+Getting started\s*$/im) - - if (gettingStartedMatch) { - const startIndex = markdown.indexOf(gettingStartedMatch[0]) - const afterHeading = markdown.substring(startIndex + gettingStartedMatch[0].length) - - const nextHeadingMatch = afterHeading.match(/^#+\s+/m) - - if (nextHeadingMatch) { - const endIndex = afterHeading.indexOf(nextHeadingMatch[0]) - return '## Installation\n\n' + afterHeading.substring(0, endIndex).trim() + const postHogPulls = await fetch( + 'https://api.github.com/repos/posthog/posthog/pulls?sort=popularity&per_page=5', + { + headers: githubHeaders, + } + ).then((res) => res.json()) + postHogPulls.forEach((issue) => { + const { html_url, title, number, user, labels, body, updated_at } = issue + const data = { + url: html_url, + title, + number, + user: { + username: user?.login, + avatar: user?.avatar_url, + url: user?.html_url, + }, + labels, + body, + updated_at, } - return '## Installation\n\n' + afterHeading.trim() + const node = { + id: createNodeId(`posthog-pull-${title}`), + parent: null, + children: [], + internal: { + type: `PostHogPull`, + contentDigest: createContentDigest(data), + }, + ...data, + } + createNode(node) + }) + + const createGitHubStatsNode = async (owner, repo) => { + const repoStats = await fetch(`https://api.github.com/repos/${owner}/${repo}`, { + headers: githubHeaders, + }).then((res) => res.json()) + const contributors = await fetch(`https://api.github.com/repos/${owner}/${repo}/contributors?per_page=1`, { + headers: githubHeaders, + }).then((res) => { + const link = parseLinkHeader(res.headers.get('link')) + const number = link?.last?.page + return number && Number(number) + }) + const commits = await fetch(`https://api.github.com/repos/${owner}/${repo}/commits?per_page=1`, { + headers: githubHeaders, + }).then((res) => { + const link = parseLinkHeader(res.headers.get('link')) + const number = link?.last?.page + return number && Number(number) + }) + const { stargazers_count, forks_count } = repoStats + + const data = { + owner, + repo, + stars: stargazers_count, + forks: forks_count, + commits, + contributors, + } + + const node = { + id: createNodeId(`github-stats-${repo}`), + parent: null, + children: [], + internal: { + type: `GitHubStats`, + contentDigest: createContentDigest(data), + }, + ...data, + } + createNode(node) } - return '' - } + await createGitHubStatsNode('posthog', 'posthog') + await createGitHubStatsNode('posthog', 'posthog.com') - const fetchPostHogPipelines = async ( - type: 'transformation' | 'destination' | 'source_webhook', - generateSlug: (pipeline: any) => string - ) => { - const { results } = await fetch( - `https://us.posthog.com/api/public_hog_function_templates?type=${type}&limit=350` + const integrations = await fetch( + 'https://raw.githubusercontent.com/PostHog/integrations-repository/main/integrations.json', + { headers: githubHeaders } ).then((res) => res.json()) - await Promise.all( - results.map(async (pipeline) => { - let additionalData = {} + integrations.forEach((integration) => { + const { name, url, ...other } = integration + const node = { + id: createNodeId(`integration-${name}`), + parent: null, + children: [], + internal: { + type: `Integration`, + contentDigest: createContentDigest(integration), + }, + url: url.replace('https://posthog.com', ''), + name, + ...other, + } + createNode(node) + }) - if (pipeline.id.startsWith('segment-')) { - const cleanMarkdown = (markdown: string) => { - return markdown - .replaceAll(/^---[\s\S]*?---/g, '') // Remove frontmatter - .replaceAll(/{%\s*.*?\s*%}/g, '') // Remove {% ... %} - .replaceAll(/{:.*?}/g, '') // Remove {: ... } - .replaceAll(/{{.*?}}/g, '') // Remove {{ ... }} - .replaceAll('Segment', 'PostHog') - .replaceAll('Connections > Catalog', 'Data pipelines') - .replaceAll('Catalog', 'Data pipelines') - .replaceAll(' (Actions)', '') - .replaceAll('segmentio', 'posthog') - .replaceAll(/\[([^\]]+)\]\(https?:\/\/[^\/]*segment\.com[^)]*\)(\s*\{:.*?\})?/g, '$1') // Remove segment.com links completely, keeping only the link text - .replaceAll(/> \w+ ""/g, '') - .replaceAll(/^.*Both of these destinations receive data from PostHog.*$/gm, '') // Remove banner regarding the Actions-framework - .replaceAll( - /^.*(?:maintains this destination|maintained by|contact.*support|support.*team).*$/gm, + const extractIntroSection = (markdown: string): string => { + const headingMatch = markdown.match(/^#{1,2}\s+/m) + + if (headingMatch) { + const headingIndex = markdown.indexOf(headingMatch[0]) + return markdown.substring(0, headingIndex).trim() + } + + return markdown + } + + const extractGettingStartedSection = (markdown: string): string => { + const gettingStartedMatch = markdown.match(/^#{1,2}\s+Getting started\s*$/im) + + if (gettingStartedMatch) { + const startIndex = markdown.indexOf(gettingStartedMatch[0]) + const afterHeading = markdown.substring(startIndex + gettingStartedMatch[0].length) + + const nextHeadingMatch = afterHeading.match(/^#+\s+/m) + + if (nextHeadingMatch) { + const endIndex = afterHeading.indexOf(nextHeadingMatch[0]) + return '## Installation\n\n' + afterHeading.substring(0, endIndex).trim() + } + + return '## Installation\n\n' + afterHeading.trim() + } + + return '' + } + + const fetchPostHogPipelines = async ( + type: 'transformation' | 'destination' | 'source_webhook', + generateSlug: (pipeline: any) => string + ) => { + const { results } = await fetch( + `https://us.posthog.com/api/public_hog_function_templates?type=${type}&limit=350` + ).then((res) => res.json()) + await Promise.all( + results.map(async (pipeline) => { + let additionalData = {} + + if (pipeline.id.startsWith('segment-')) { + const cleanMarkdown = (markdown: string) => { + return markdown + .replaceAll(/^---[\s\S]*?---/g, '') // Remove frontmatter + .replaceAll(/{%\s*.*?\s*%}/g, '') // Remove {% ... %} + .replaceAll(/{:.*?}/g, '') // Remove {: ... } + .replaceAll(/{{.*?}}/g, '') // Remove {{ ... }} + .replaceAll('Segment', 'PostHog') + .replaceAll('Connections > Catalog', 'Data pipelines') + .replaceAll('Catalog', 'Data pipelines') + .replaceAll(' (Actions)', '') + .replaceAll('segmentio', 'posthog') + .replaceAll(/\[([^\]]+)\]\(https?:\/\/[^\/]*segment\.com[^)]*\)(\s*\{:.*?\})?/g, '$1') // Remove segment.com links completely, keeping only the link text + .replaceAll(/> \w+ ""/g, '') + .replaceAll(/^.*Both of these destinations receive data from PostHog.*$/gm, '') // Remove banner regarding the Actions-framework + .replaceAll( + /^.*(?:maintains this destination|maintained by|contact.*support|support.*team).*$/gm, + '' + ) // Remove lines about other companies maintaining destinations or contact support + .trim() + } + + const response = await fetch( + `https://raw.githubusercontent.com/posthog/segment-docs/refs/heads/develop/src/connections/destinations/catalog/${pipeline.id.replace( + 'segment-', '' - ) // Remove lines about other companies maintaining destinations or contact support - .trim() + )}/index.md`, + { headers: githubHeaders } + ) + let markdown = await response.text() + if (response.status !== 200) markdown = '' + markdown = cleanMarkdown(markdown) + + additionalData = { + introSnippet: extractIntroSection(markdown), + installationSnippet: extractGettingStartedSection(markdown), + } } - const response = await fetch( - `https://raw.githubusercontent.com/posthog/segment-docs/refs/heads/develop/src/connections/destinations/catalog/${pipeline.id.replace( - 'segment-', - '' - )}/index.md` - ) - let markdown = await response.text() - if (response.status !== 200) markdown = '' - markdown = cleanMarkdown(markdown) - - additionalData = { - introSnippet: extractIntroSection(markdown), - installationSnippet: extractGettingStartedSection(markdown), + const slug = generateSlug(pipeline) + const node = { + id: createNodeId(`posthog-pipeline-${pipeline.id}`), + internal: { + type: 'PostHogPipeline', + contentDigest: createContentDigest({ pipeline }), + }, + pipelineId: pipeline.id, + slug, + type, + ...pipeline, + ...additionalData, } - } + createNode(node) + }) + ) + } - const slug = generateSlug(pipeline) - const node = { - id: createNodeId(`posthog-pipeline-${pipeline.id}`), - internal: { - type: 'PostHogPipeline', - contentDigest: createContentDigest({ pipeline }), - }, - pipelineId: pipeline.id, - slug, - type, - ...pipeline, - ...additionalData, - } - createNode(node) - }) - ) + await fetchPostHogPipelines('transformation', (pipeline) => pipeline.id.replace('plugin-', '')) + await fetchPostHogPipelines('destination', (pipeline) => pipeline.id.replace('template-', '')) + await fetchPostHogPipelines('source_webhook', (pipeline) => pipeline.id.replace('template-', '')) } - await fetchPostHogPipelines('transformation', (pipeline) => pipeline.id.replace('plugin-', '')) - await fetchPostHogPipelines('destination', (pipeline) => pipeline.id.replace('template-', '')) - await fetchPostHogPipelines('source_webhook', (pipeline) => pipeline.id.replace('template-', '')) + await sourceGithubNodes() + + const fetchReferences = async (page = 1) => { + const referenceQuery = qs.stringify( + { + pagination: { + page, + pageSize: 100, + }, + }, + { + encodeValuesOnly: true, + } + ) + const referencesURL = `${process.env.GATSBY_SQUEAK_API_HOST}/api/sdk-references?${referenceQuery}` + const { data, meta } = await fetch(referencesURL).then((res) => res.json()) + for (const reference of data) { + const data = reference?.attributes?.data + if (!data) continue + const versionNode = { + parent: null, + children: [], + internal: { + type: `SdkReferences`, + contentDigest: createContentDigest(data), + }, + ...data, + } + createNode(versionNode) + } + if (meta?.pagination?.pageCount > meta?.pagination?.page) await fetchReferences(page + 1) + } + + await fetchReferences() const fetchEvents = async (page = 1) => { const eventsQuery = qs.stringify(