diff --git a/.gitignore b/.gitignore index 280942b..8dc8ae7 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ node_modules .DS_Store tmp -build \ No newline at end of file +build +save diff --git a/index.js b/index.js index 4ed2aed..8614adc 100644 --- a/index.js +++ b/index.js @@ -1,24 +1,11 @@ -const translate = require('./translate.js'); const express = require('express'); const app = express(); -app.get('/', (req, res) => { - res.send('Hello World!'); -}); - -app.get('/translate', (req, res) => { - const { text, lang } = req.query; - translate(text, lang) - .then((result) => { - res.send(result); - }) - .catch((err) => { - res.send(err); - }); -}); +app.use(express.static('public')); +app.use(express.static('save')); app.listen(process.env.PORT || 3000, (port) => { console.log(`Example app listening on port ${port}!`); -} \ No newline at end of file +}); \ No newline at end of file diff --git a/public/index.html b/public/index.html index e69de29..81fec58 100644 --- a/public/index.html +++ b/public/index.html @@ -0,0 +1,348 @@ + + + + + + + + + +
+
+ +
+

Loading...
+ +
+
+ +
+
+
+
{{item.path}}
+
+
+ +
+
+
+
#{{block.title}}
+
{{block.content}}
+
+
+
#{{block[targetLanguageTitle][0]}}
+
{{block[targetLanguageContent][0]}}
+
+
+ +
+
+
+
+
+ +
+

Grab a repo from Github, and translate it's docs

+
+

Translate repo:

+ + + + + + + +
+
+ +
+
+
+ + + + \ No newline at end of file diff --git a/translate.js b/translate.js index ca21284..90d2218 100644 --- a/translate.js +++ b/translate.js @@ -14,14 +14,6 @@ const octokit = new Octokit({ auth: process.env["GITHUB_PERSONAL_ACCESS_TOKEN"] }) - - -async function main() { - const chatCompletion = await openai.chat.completions.create({ - messages: [{ role: 'user', content: 'Say this is a test' }], - model: 'gpt-3.5-turbo', - }); -} async function getLastFileVersion(owner, repoName, path) { let docFiles = []; @@ -75,11 +67,15 @@ async function listDocFiles(files, owner, repoName, path) { async function loadFiles(owner, repoName, files) { for (let file of files) { - const response = await fetch(file.download_url); - const text = await response.text(); - - file.raw = text; - file.token = encode(text).length; + // do not reload file if already loaded + if (!file.raw) { + const response = await fetch(file.download_url); + const text = await response.text(); + + file.raw = text; + file.token = encode(text).length; + } + } } @@ -109,7 +105,7 @@ function parseMdStrToTree(file) { // a function that take a document hierarchical object and return a md file as a string function parseTreeToMdStr(doc, code='') { let str = ''; - + console.log(JSON.stringify(doc, null, 2)); for (let block of doc) { if (code) { str += '#'.repeat(block.level) + ' ' + block[`title_${code}`][0] + '\n'; @@ -132,8 +128,9 @@ function sleep(ms) { async function translateTextTree(textTree, language, temp=0) { let text = parseTreeToMdStr(textTree); + let messages = []; try { - let messages = [ + messages = [ { "role": "system", "content": `Your are task with translating a technical documentation in ${language}. The user will provide the documentation in a markdown format. Translate it to ${language}. Only output the translated documentation in Markdown, do not add or remove content. Do not try to translate function/api endpoint name, only translate the documentation. If the documentation contain codeblocks, only translate commentaries, do not translate variablenames / function names. Try to output it in a {language} that is easy to read, do not try to translate all expressions verbatim, make it so it feel professional. If {language} usually use the enlish word for a thing, keep it in English. Notes: it's a computer doc, build mean 'compile', watch mean 'looking at file that change', ... Keep the same structure as the original documentation, and retain ALL the links / images.`, @@ -145,11 +142,11 @@ async function translateTextTree(textTree, language, temp=0) { ]; const chatCompletion = await openai.chat.completions.create({ messages: messages, - model: 'gpt-3.5-turbo', + model: 'gpt-3.5-turbo-1106', temperature: temp }); - console.log(chatCompletion.choices[0].message.content) + // console.log(chatCompletion.choices[0].message.content) let translationTree = parseMdStrToTree(chatCompletion.choices[0].message.content); @@ -194,10 +191,12 @@ async function translateTextTree(textTree, language, temp=0) { }); } + console.log("=======\nRejectd", JSON.stringify(messages, null, 2)); + chatCompletion = await openai.chat.completions.create({ messages: messages, - model: 'gpt-3.5-turbo-16k', + model: 'gpt-3.5-turbo-1106', temperature: temp }); translationTree = parseMdStrToTree(chatCompletion.choices[0].message.content); @@ -213,6 +212,7 @@ async function translateTextTree(textTree, language, temp=0) { catch (e) { console.log(e); console.log('sleeping'); + console.log("=======\eerror on ", JSON.stringify(messages, null, 2)); await sleep(30000) return translateTextTree(textTree, language, temp); } @@ -243,6 +243,14 @@ async function translateFile(file, language, code) { } let translationTree = await translateTextTree(blocks, language, 0); + + if (translationTree.length != blocks.length) { + if (!file.translationError) { + file.translationError = {}; + } + file.translationError[code] = true; + + } for (let j = 0; j < blocks.length; j++) { if (!blocks[j]["title_" + code]) { @@ -251,9 +259,11 @@ async function translateFile(file, language, code) { if (!blocks[j]["content_" + code]) { blocks[j]["content_" + code] = [] } - blocks[j]["title_" + code].push(translationTree[j].title); - blocks[j]["content_" + code].push(translationTree[j].content); + + blocks[j]["title_" + code].push(translationTree[j]?.title || '\n'); + blocks[j]["content_" + code].push(translationTree[j]?.content || '\n'); } + } } @@ -264,6 +274,7 @@ async function translateFiles(files, language, code, savepath) { for (let file of files) { started_jobs++; let fn = async function() { + console.log("translating:", file.name); await translateFile(file, language, code); await fs.writeFile(savepath, JSON.stringify(files, null, 2), 'utf8'); finished_jobs++; @@ -281,7 +292,9 @@ async function translateFiles(files, language, code, savepath) { async function correctLinkInFile(file, languageCode, docDir) { for (let block of file.doc) { // find all markdown local link in block.content, BUT NOT IMAGES - console.log(block, block[`content_${languageCode}`][0], `content_${languageCode}`); + if (!block[`content_${languageCode}`]) { + continue + } let matchesTranslated = block[`content_${languageCode}`][0].match(/\[.*\]\(.*\)/g); @@ -333,6 +346,11 @@ async function buildOutputMd(files, owner, repoName, repoDocDir, languageCode) { let targetDir = `${__dirname}/build/${owner}/${repoName}/${repoDocDir}/${languageCode}`; for (let file of files) { + // check if file is translated in target language + if (!file.doc || file.doc[0][`content_${languageCode}`] === undefined) { + continue + } + await correctLinkInFile(file, languageCode); let translatedMd = parseTreeToMdStr(file.doc, languageCode); @@ -362,7 +380,7 @@ async function printFiles(files, owner, repoName, repoDocDir) { let dir = ''; for (let i = 0; i < dirs.length - 1; i++) { dir += dirs[i] + '/'; - try { + try {xw await fs.access(dir); } catch (error) { await fs.mkdir(dir); @@ -376,25 +394,37 @@ async function translateDoc(owner, repoName, repoDocDir, language, code) { // const files = await listDocFiles(owner, repoName, repoDocDir); let files = []; - let savepath = `${__dirname}/tmp/save-${owner}-${repoName}.json`; + let savepath = `${__dirname}/save/${owner}/${repoName}.json`; try { await fs.access(savepath); files = require(savepath) } catch { - + // create the save oath + let dirs = savepath.split('/'); + let dir = ''; + for (let i = 0; i < dirs.length - 1; i++) { + dir += dirs[i] + '/'; + try { + await fs.access(dir); + } catch (error) { + await fs.mkdir(dir); + } + } } await listDocFiles(files, owner, repoName, repoDocDir); await loadFiles(owner, repoName, files); - await printFiles(files, owner, repoName, repoDocDir); + // await printFiles(files, owner, repoName, repoDocDir); await translateFiles(files, language, code, savepath); - await buildOutputMd(files, owner, repoName, repoDocDir, code) + // await buildOutputMd(files, owner, repoName, repoDocDir, code) } - +// translateDoc('nodejs', 'node', 'doc', 'French', 'fr'); translateDoc('run-llama', 'llama_index', 'docs', 'French', 'fr') +// translateDoc('run-llama', 'llama_index', 'docs', 'Simplified Chinese(zh_cn)', 'zh_cn') + module.exports = translateDoc; \ No newline at end of file