Optional GitHub sourcing (#13380)

* source github nodes optionally

* fetch references from strapi

* fix type

---------

Co-authored-by: Cory Watilo <corywatilo@gmail.com>
This commit is contained in:
Eli Kinsey
2025-10-27 12:26:52 -07:00
committed by GitHub
parent 1fb32e759c
commit 04b3a25fa9
2 changed files with 355 additions and 329 deletions

View File

@@ -271,6 +271,94 @@ export const createSchemaCustomization: GatsbyNode['createSchemaCustomization']
mdx: Mdx @link(by: "frontmatter.templateId", from: "pipelineId")
introSnippet: String
installationSnippet: String
inputs_schema: [PostHogPipelineInputSchema]
name: String
slug: String
type: String
category: [String]
description: String
icon_url: String
status: String
}
type PostHogPipelineInputSchema {
key: String
type: String
label: String
secret: Boolean
required: Boolean
description: String
}
type SdkReferences implements Node {
info: SdkReferencesInfo
referenceId: String
hogRef: String
id: String
categories: [String]
classes: [SdkReferencesClass]
types: [SdkReferencesType]
version: String
}
type SdkReferencesInfo {
description: String
id: String
specUrl: String
slugPrefix: String
title: String
version: String
}
type SdkReferencesClass {
description: String
functions: [SdkReferencesFunction]
id: String
title: String
}
type SdkReferencesFunction {
category: String
description: String
details: String
examples: [SdkReferencesFunctionExample]
id: String
params: [SdkReferencesFunctionParam]
path: String
releaseTag: String
showDocs: Boolean
returnType: SdkReferencesFunctionReturnType
title: String
}
type SdkReferencesFunctionExample {
code: String
name: String
id: String
}
type SdkReferencesFunctionParam {
description: String
isOptional: Boolean
name: String
type: String
}
type SdkReferencesFunctionReturnType {
id: String
name: String
}
type SdkReferencesType {
example: String
id: String
name: String
path: String
properties: [SdkReferencesTypeProperty]
}
type SdkReferencesTypeProperty {
description: String
name: String
type: String
}
type GitHubStats implements Node {
owner: String
repo: String
stars: Int
forks: Int
commits: Int
contributors: Int
}
type ProductDataProductsAddons {
legacy_product: Boolean

View File

@@ -75,231 +75,6 @@ export const sourceNodes: GatsbyNode['sourceNodes'] = async ({ actions, createCo
components: JSON.stringify(spec.components),
})
// PostHog SDK references
// Paths to the SDK references folders
const SDK_REFERENCES_FOLDER_PATHS = [
{
id: 'posthog-python',
repo: 'posthog/posthog-python',
repo_branch: 'master',
folder_path: 'references',
},
{
id: 'posthog-js',
repo: 'posthog/posthog-js',
repo_branch: 'main',
folder_path: 'packages/browser/references',
},
{
id: 'posthog-node',
repo: 'posthog/posthog-js',
repo_branch: 'main',
folder_path: 'packages/node/references',
},
{
id: 'posthog-react-native',
repo: 'posthog/posthog-js',
repo_branch: 'main',
folder_path: 'packages/react-native/references',
},
]
const referencesData = {}
function parseReferencesVersion(version: string): string {
return version.split('-').pop()?.replace('.json', '') || ''
}
// Fetch all folder contents in a batch
const folderPromises = SDK_REFERENCES_FOLDER_PATHS.map(async (folderPath) => {
const url = `https://api.github.com/repos/${folderPath.repo}/contents/${folderPath.folder_path}?ref=${folderPath.repo_branch}`
const response = await fetch(url)
const data = await response.json()
return { folderPath, data }
})
const folderResults = await Promise.allSettled(folderPromises).then((results) =>
results
.map((result) => {
if (result.status === 'fulfilled' && result.value?.data && Array.isArray(result.value.data)) {
return { folderPath: result.value.folderPath, data: result.value.data }
}
console.error('Failed to fetch SDK reference files')
return null
})
.filter((result) => result !== null)
)
for (const { folderPath, data } of folderResults) {
referencesData[folderPath.id] = {}
const filePromises = data
.filter((item) => item.type === 'file')
.map(async (item) => {
const fileUrl = `https://api.github.com/repos/${folderPath.repo}/contents/${folderPath.folder_path}/${item.name}?ref=${folderPath.repo_branch}`
const fileResponse = await fetch(fileUrl)
const fileData = await fileResponse.json()
const version = parseReferencesVersion(item.name)
return { version, content: fileData.content }
})
const fileResults = await Promise.allSettled(filePromises).then((results) =>
results.map((result) => {
if (result.status === 'fulfilled' && result.value?.version && result.value?.content) {
return { version: result.value.version, content: result.value.content }
}
console.error(`Failed to fetch SDK reference file in ${folderPath.repo}/${folderPath.folder_path}`)
return null
})
)
for (const { version, content } of fileResults.filter((result) => result !== null)) {
if (version) {
referencesData[folderPath.id][version] = content
}
}
}
for (const id in referencesData) {
// Create version-specific nodes
for (const version in referencesData[id]) {
// Decode the base64 data first
const versionDataBase64 = referencesData[id][version]
let versionData
try {
const decodedString = Buffer.from(versionDataBase64, 'base64').toString('utf-8')
versionData = JSON.parse(decodedString)
} catch (error) {
console.error(`Failed to decode version ${version} for ${id}:`, error)
continue
}
versionData.id = `${id}-${version}`
const versionNode = {
parent: null,
children: [],
internal: {
type: `SdkReferences`,
contentDigest: createContentDigest(versionData),
},
referenceId: id,
version: version,
...versionData, // Spread the decoded JSON data
}
createNode(versionNode)
}
}
const postHogIssues = await fetch(
'https://api.github.com/repos/posthog/posthog/issues?sort=comments&per_page=5'
).then((res) => res.json())
postHogIssues.forEach((issue) => {
const { html_url, title, number, user, comments, reactions, labels, body, updated_at } = issue
const data = {
url: html_url,
title,
number,
comments,
user: {
username: user?.login,
avatar: user?.avatar_url,
url: user?.html_url,
},
reactions,
labels,
body,
updated_at,
}
if (data.reactions) {
data.reactions.plus1 = data.reactions['+1']
data.reactions.minus1 = data.reactions['-1']
}
const node = {
id: createNodeId(`posthog-issue-${title}`),
parent: null,
children: [],
internal: {
type: `PostHogIssue`,
contentDigest: createContentDigest(data),
},
...data,
}
createNode(node)
})
const postHogPulls = await fetch(
'https://api.github.com/repos/posthog/posthog/pulls?sort=popularity&per_page=5'
).then((res) => res.json())
postHogPulls.forEach((issue) => {
const { html_url, title, number, user, labels, body, updated_at } = issue
const data = {
url: html_url,
title,
number,
user: {
username: user?.login,
avatar: user?.avatar_url,
url: user?.html_url,
},
labels,
body,
updated_at,
}
const node = {
id: createNodeId(`posthog-pull-${title}`),
parent: null,
children: [],
internal: {
type: `PostHogPull`,
contentDigest: createContentDigest(data),
},
...data,
}
createNode(node)
})
const createGitHubStatsNode = async (owner, repo) => {
const repoStats = await fetch(`https://api.github.com/repos/${owner}/${repo}`).then((res) => res.json())
const contributors = await fetch(`https://api.github.com/repos/${owner}/${repo}/contributors?per_page=1`).then(
(res) => {
const link = parseLinkHeader(res.headers.get('link'))
const number = link?.last?.page
return number && Number(number)
}
)
const commits = await fetch(`https://api.github.com/repos/${owner}/${repo}/commits?per_page=1`).then((res) => {
const link = parseLinkHeader(res.headers.get('link'))
const number = link?.last?.page
return number && Number(number)
})
const { stargazers_count, forks_count } = repoStats
const data = {
owner,
repo,
stars: stargazers_count,
forks: forks_count,
commits,
contributors,
}
const node = {
id: createNodeId(`github-stats-${repo}`),
parent: null,
children: [],
internal: {
type: `GitHubStats`,
contentDigest: createContentDigest(data),
},
...data,
}
createNode(node)
}
await createGitHubStatsNode('posthog', 'posthog')
await createGitHubStatsNode('posthog', 'posthog.com')
const createProductDataNode = async () => {
const url = `${process.env.BILLING_SERVICE_URL + '/api/products-v2'}`
const headers = {
@@ -329,26 +104,6 @@ export const sourceNodes: GatsbyNode['sourceNodes'] = async ({ actions, createCo
}
await createProductDataNode()
const integrations = await fetch(
'https://raw.githubusercontent.com/PostHog/integrations-repository/main/integrations.json'
).then((res) => res.json())
integrations.forEach((integration) => {
const { name, url, ...other } = integration
const node = {
id: createNodeId(`integration-${name}`),
parent: null,
children: [],
internal: {
type: `Integration`,
contentDigest: createContentDigest(integration),
},
url: url.replace('https://posthog.com', ''),
name,
...other,
}
createNode(node)
})
const createRoadmapItems = async (page = 1) => {
const roadmapQuery = qs.stringify(
{
@@ -798,107 +553,290 @@ export const sourceNodes: GatsbyNode['sourceNodes'] = async ({ actions, createCo
})
}
const extractIntroSection = (markdown: string): string => {
const headingMatch = markdown.match(/^#{1,2}\s+/m)
async function sourceGithubNodes() {
if (!process.env.GITHUB_API_KEY) return
if (headingMatch) {
const headingIndex = markdown.indexOf(headingMatch[0])
return markdown.substring(0, headingIndex).trim()
}
const githubHeaders: HeadersInit = { Authorization: `token ${process.env.GITHUB_API_KEY}` }
return markdown
}
const postHogIssues = await fetch(
'https://api.github.com/repos/posthog/posthog/issues?sort=comments&per_page=5',
{
headers: githubHeaders,
}
).then((res) => res.json())
postHogIssues.forEach((issue) => {
const { html_url, title, number, user, comments, reactions, labels, body, updated_at } = issue
const data = {
url: html_url,
title,
number,
comments,
user: {
username: user?.login,
avatar: user?.avatar_url,
url: user?.html_url,
},
reactions,
labels,
body,
updated_at,
}
if (data.reactions) {
data.reactions.plus1 = data.reactions['+1']
data.reactions.minus1 = data.reactions['-1']
}
const node = {
id: createNodeId(`posthog-issue-${title}`),
parent: null,
children: [],
internal: {
type: `PostHogIssue`,
contentDigest: createContentDigest(data),
},
...data,
}
createNode(node)
})
const extractGettingStartedSection = (markdown: string): string => {
const gettingStartedMatch = markdown.match(/^#{1,2}\s+Getting started\s*$/im)
if (gettingStartedMatch) {
const startIndex = markdown.indexOf(gettingStartedMatch[0])
const afterHeading = markdown.substring(startIndex + gettingStartedMatch[0].length)
const nextHeadingMatch = afterHeading.match(/^#+\s+/m)
if (nextHeadingMatch) {
const endIndex = afterHeading.indexOf(nextHeadingMatch[0])
return '## Installation\n\n' + afterHeading.substring(0, endIndex).trim()
const postHogPulls = await fetch(
'https://api.github.com/repos/posthog/posthog/pulls?sort=popularity&per_page=5',
{
headers: githubHeaders,
}
).then((res) => res.json())
postHogPulls.forEach((issue) => {
const { html_url, title, number, user, labels, body, updated_at } = issue
const data = {
url: html_url,
title,
number,
user: {
username: user?.login,
avatar: user?.avatar_url,
url: user?.html_url,
},
labels,
body,
updated_at,
}
return '## Installation\n\n' + afterHeading.trim()
const node = {
id: createNodeId(`posthog-pull-${title}`),
parent: null,
children: [],
internal: {
type: `PostHogPull`,
contentDigest: createContentDigest(data),
},
...data,
}
createNode(node)
})
const createGitHubStatsNode = async (owner, repo) => {
const repoStats = await fetch(`https://api.github.com/repos/${owner}/${repo}`, {
headers: githubHeaders,
}).then((res) => res.json())
const contributors = await fetch(`https://api.github.com/repos/${owner}/${repo}/contributors?per_page=1`, {
headers: githubHeaders,
}).then((res) => {
const link = parseLinkHeader(res.headers.get('link'))
const number = link?.last?.page
return number && Number(number)
})
const commits = await fetch(`https://api.github.com/repos/${owner}/${repo}/commits?per_page=1`, {
headers: githubHeaders,
}).then((res) => {
const link = parseLinkHeader(res.headers.get('link'))
const number = link?.last?.page
return number && Number(number)
})
const { stargazers_count, forks_count } = repoStats
const data = {
owner,
repo,
stars: stargazers_count,
forks: forks_count,
commits,
contributors,
}
const node = {
id: createNodeId(`github-stats-${repo}`),
parent: null,
children: [],
internal: {
type: `GitHubStats`,
contentDigest: createContentDigest(data),
},
...data,
}
createNode(node)
}
return ''
}
await createGitHubStatsNode('posthog', 'posthog')
await createGitHubStatsNode('posthog', 'posthog.com')
const fetchPostHogPipelines = async (
type: 'transformation' | 'destination' | 'source_webhook',
generateSlug: (pipeline: any) => string
) => {
const { results } = await fetch(
`https://us.posthog.com/api/public_hog_function_templates?type=${type}&limit=350`
const integrations = await fetch(
'https://raw.githubusercontent.com/PostHog/integrations-repository/main/integrations.json',
{ headers: githubHeaders }
).then((res) => res.json())
await Promise.all(
results.map(async (pipeline) => {
let additionalData = {}
integrations.forEach((integration) => {
const { name, url, ...other } = integration
const node = {
id: createNodeId(`integration-${name}`),
parent: null,
children: [],
internal: {
type: `Integration`,
contentDigest: createContentDigest(integration),
},
url: url.replace('https://posthog.com', ''),
name,
...other,
}
createNode(node)
})
if (pipeline.id.startsWith('segment-')) {
const cleanMarkdown = (markdown: string) => {
return markdown
.replaceAll(/^---[\s\S]*?---/g, '') // Remove frontmatter
.replaceAll(/{%\s*.*?\s*%}/g, '') // Remove {% ... %}
.replaceAll(/{:.*?}/g, '') // Remove {: ... }
.replaceAll(/{{.*?}}/g, '') // Remove {{ ... }}
.replaceAll('Segment', 'PostHog')
.replaceAll('Connections > Catalog', 'Data pipelines')
.replaceAll('Catalog', 'Data pipelines')
.replaceAll(' (Actions)', '')
.replaceAll('segmentio', 'posthog')
.replaceAll(/\[([^\]]+)\]\(https?:\/\/[^\/]*segment\.com[^)]*\)(\s*\{:.*?\})?/g, '$1') // Remove segment.com links completely, keeping only the link text
.replaceAll(/> \w+ ""/g, '')
.replaceAll(/^.*Both of these destinations receive data from PostHog.*$/gm, '') // Remove banner regarding the Actions-framework
.replaceAll(
/^.*(?:maintains this destination|maintained by|contact.*support|support.*team).*$/gm,
const extractIntroSection = (markdown: string): string => {
const headingMatch = markdown.match(/^#{1,2}\s+/m)
if (headingMatch) {
const headingIndex = markdown.indexOf(headingMatch[0])
return markdown.substring(0, headingIndex).trim()
}
return markdown
}
const extractGettingStartedSection = (markdown: string): string => {
const gettingStartedMatch = markdown.match(/^#{1,2}\s+Getting started\s*$/im)
if (gettingStartedMatch) {
const startIndex = markdown.indexOf(gettingStartedMatch[0])
const afterHeading = markdown.substring(startIndex + gettingStartedMatch[0].length)
const nextHeadingMatch = afterHeading.match(/^#+\s+/m)
if (nextHeadingMatch) {
const endIndex = afterHeading.indexOf(nextHeadingMatch[0])
return '## Installation\n\n' + afterHeading.substring(0, endIndex).trim()
}
return '## Installation\n\n' + afterHeading.trim()
}
return ''
}
const fetchPostHogPipelines = async (
type: 'transformation' | 'destination' | 'source_webhook',
generateSlug: (pipeline: any) => string
) => {
const { results } = await fetch(
`https://us.posthog.com/api/public_hog_function_templates?type=${type}&limit=350`
).then((res) => res.json())
await Promise.all(
results.map(async (pipeline) => {
let additionalData = {}
if (pipeline.id.startsWith('segment-')) {
const cleanMarkdown = (markdown: string) => {
return markdown
.replaceAll(/^---[\s\S]*?---/g, '') // Remove frontmatter
.replaceAll(/{%\s*.*?\s*%}/g, '') // Remove {% ... %}
.replaceAll(/{:.*?}/g, '') // Remove {: ... }
.replaceAll(/{{.*?}}/g, '') // Remove {{ ... }}
.replaceAll('Segment', 'PostHog')
.replaceAll('Connections > Catalog', 'Data pipelines')
.replaceAll('Catalog', 'Data pipelines')
.replaceAll(' (Actions)', '')
.replaceAll('segmentio', 'posthog')
.replaceAll(/\[([^\]]+)\]\(https?:\/\/[^\/]*segment\.com[^)]*\)(\s*\{:.*?\})?/g, '$1') // Remove segment.com links completely, keeping only the link text
.replaceAll(/> \w+ ""/g, '')
.replaceAll(/^.*Both of these destinations receive data from PostHog.*$/gm, '') // Remove banner regarding the Actions-framework
.replaceAll(
/^.*(?:maintains this destination|maintained by|contact.*support|support.*team).*$/gm,
''
) // Remove lines about other companies maintaining destinations or contact support
.trim()
}
const response = await fetch(
`https://raw.githubusercontent.com/posthog/segment-docs/refs/heads/develop/src/connections/destinations/catalog/${pipeline.id.replace(
'segment-',
''
) // Remove lines about other companies maintaining destinations or contact support
.trim()
)}/index.md`,
{ headers: githubHeaders }
)
let markdown = await response.text()
if (response.status !== 200) markdown = ''
markdown = cleanMarkdown(markdown)
additionalData = {
introSnippet: extractIntroSection(markdown),
installationSnippet: extractGettingStartedSection(markdown),
}
}
const response = await fetch(
`https://raw.githubusercontent.com/posthog/segment-docs/refs/heads/develop/src/connections/destinations/catalog/${pipeline.id.replace(
'segment-',
''
)}/index.md`
)
let markdown = await response.text()
if (response.status !== 200) markdown = ''
markdown = cleanMarkdown(markdown)
additionalData = {
introSnippet: extractIntroSection(markdown),
installationSnippet: extractGettingStartedSection(markdown),
const slug = generateSlug(pipeline)
const node = {
id: createNodeId(`posthog-pipeline-${pipeline.id}`),
internal: {
type: 'PostHogPipeline',
contentDigest: createContentDigest({ pipeline }),
},
pipelineId: pipeline.id,
slug,
type,
...pipeline,
...additionalData,
}
}
createNode(node)
})
)
}
const slug = generateSlug(pipeline)
const node = {
id: createNodeId(`posthog-pipeline-${pipeline.id}`),
internal: {
type: 'PostHogPipeline',
contentDigest: createContentDigest({ pipeline }),
},
pipelineId: pipeline.id,
slug,
type,
...pipeline,
...additionalData,
}
createNode(node)
})
)
await fetchPostHogPipelines('transformation', (pipeline) => pipeline.id.replace('plugin-', ''))
await fetchPostHogPipelines('destination', (pipeline) => pipeline.id.replace('template-', ''))
await fetchPostHogPipelines('source_webhook', (pipeline) => pipeline.id.replace('template-', ''))
}
await fetchPostHogPipelines('transformation', (pipeline) => pipeline.id.replace('plugin-', ''))
await fetchPostHogPipelines('destination', (pipeline) => pipeline.id.replace('template-', ''))
await fetchPostHogPipelines('source_webhook', (pipeline) => pipeline.id.replace('template-', ''))
await sourceGithubNodes()
const fetchReferences = async (page = 1) => {
const referenceQuery = qs.stringify(
{
pagination: {
page,
pageSize: 100,
},
},
{
encodeValuesOnly: true,
}
)
const referencesURL = `${process.env.GATSBY_SQUEAK_API_HOST}/api/sdk-references?${referenceQuery}`
const { data, meta } = await fetch(referencesURL).then((res) => res.json())
for (const reference of data) {
const data = reference?.attributes?.data
if (!data) continue
const versionNode = {
parent: null,
children: [],
internal: {
type: `SdkReferences`,
contentDigest: createContentDigest(data),
},
...data,
}
createNode(versionNode)
}
if (meta?.pagination?.pageCount > meta?.pagination?.page) await fetchReferences(page + 1)
}
await fetchReferences()
const fetchEvents = async (page = 1) => {
const eventsQuery = qs.stringify(