diff --git a/cli.js b/cli.js index 2756a2c..3c44424 100755 --- a/cli.js +++ b/cli.js @@ -1,7 +1,8 @@ #!/usr/bin/env node const commander = require('commander'); -const supportedLanguages = require('./supportedLanguages.json'); + +const supportedLanguages = require('./src/supportedLanguages.json'); const { version } = require('./package.json'); @@ -25,7 +26,7 @@ const program = commander .option('-o, --outputPath ',`The directory to output the translated files to, defaults to "./build"`, './build') - .option('-s, --savePath ',`The directory to read the source files from, defaults to "./save"`, `./save`) + .option('-s, --savePath ',`The directory to read the source files from, defaults to "./save"`, `${__dirname}/save`) .option('-gt, --githubtoken ', 'The GitHub token PAT to use for authentication, default to the value of the GITHUB_PERSONAL_ACCESS_TOKEN environment variable') .option('-ok, --openaikey ', 'The OpenAI key to use for authentication, default to the value of the OPENAI_API_KEY environment variable') @@ -53,7 +54,8 @@ if (options.language.indexOf('all') !== -1) { } async function run() { - const translate = require('./translate'); + const translate = require('./src/translate'); + const build = require('./src/build'); let loadFile = true; @@ -62,7 +64,7 @@ async function run() { case 'translate': for (let langCode of options.language) { console.log("Translating to " + langCode); - await translate.translateDoc({ + await translate({ repoOwner: program.args[1], repoName: program.args[2], repoDocDir: options.docPath, @@ -80,7 +82,7 @@ async function run() { for (let langCode of options.language) { console.log("Updating translation in " + langCode); - await translate.translateDoc({ + await translate({ repoOwner: program.args[1], repoName: program.args[2], repoDocDir: options.docPath, @@ -113,7 +115,7 @@ async function run() { console.log("Building translation Md " + langCode + " to " + options.outputPath); - translate.buildDoc({ + build({ repoOwner: program.args[1], repoName: program.args[2], languageCode: langCode, @@ -124,52 +126,52 @@ async function run() { ); } break; - case 'run': - // TODO: clean up this code - // both translate and build - loadFile = true; - for (let langCode of options.language) { - console.log("Translating to " + langCode); - await translate.translateDoc({ + case 'run': + // TODO: clean up this code + // both translate and build + loadFile = true; + for (let langCode of options.language) { + console.log("Translating to " + langCode); + await translate({ + repoOwner: program.args[1], + repoName: program.args[2], + repoDocDir: options.docPath, + language : supportedLanguages[langCode], + languageCode: langCode, + savePath: options.savePath, + loadFile: loadFile + }); + loadFile = false; + } + + for (let langCode of options.language) { + let outPath = '' + let prefixToRemove = '' + + if (options.mode == 'docusaurus') { + console.log('Using docusaurus mode') + outPath = options.outputPath + `/i18n/${langCode}/docusaurus-plugin-content-docs/current/` + prefixToRemove = options.docPath + } else { + console.log('Using manual mode') + outPath = options.outputPath + `/${langCode}` + prefixToRemove = '' + } + + console.log("Building translation Md " + langCode + " to " + options.outputPath); + + + + build({ repoOwner: program.args[1], repoName: program.args[2], - repoDocDir: options.docPath, - language : supportedLanguages[langCode], languageCode: langCode, savePath: options.savePath, - loadFile: loadFile + outputPath: outPath, + prefixToRemove: prefixToRemove }); - loadFile=false; - } - - for (let langCode of options.language) { - let outPath = '' - let prefixToRemove = '' - - if (options.mode == 'docusaurus') { - console.log('Using docusaurus mode') - outPath = options.outputPath + `/i18n/${langCode}/docusaurus-plugin-content-docs/current/` - prefixToRemove = options.docPath - } else { - console.log('Using manual mode') - outPath = options.outputPath + `/${langCode}` - prefixToRemove = '' - } - - console.log("Building translation Md " + langCode + " to " + options.outputPath); - - - - translate.buildDoc({ - repoOwner: program.args[1], - repoName: program.args[2], - languageCode: langCode, - savePath: options.savePath, - outputPath: outPath, - prefixToRemove: prefixToRemove - }); - } - break; + } + break; } } diff --git a/save_file_datastructure.md b/save_file_datastructure.md new file mode 100644 index 0000000..085cc6d --- /dev/null +++ b/save_file_datastructure.md @@ -0,0 +1,24 @@ +The JSON data structure represents a file in a GitHub repository. Each object in the array has the following properties: + +- `name`: The name of the file. +- `path`: The path of the file in the repository. +- `sha`: The SHA-1 hash of the file. +- `size`: The size of the file in bytes. +- `url`: The API URL to access the file. +- `html_url`: The URL to view the file on GitHub. +- `git_url`: The Git URL to access the file. +- `download_url`: The URL to download the file. +- `type`: The type of the file (usually "file"). +- `_links`: An object containing links to access the file. + - `self`: The API URL to access the file. + - `git`: The Git URL to access the file. + - `html`: The URL to view the file on GitHub. +- `raw`: The raw content of the file. +- `token`: A token associated with the file (purpose may vary). +- `doc`: An array of objects representing sections of the document. Each object has the following properties: + - `title`: The title of the section. + - `level`: The level of the section (e.g., 0 for top-level sections). + - `content`: The content of the section. + + - `title_{code}`: An array of title alternatives in language 'code' + - `content_{code}`: An array of content alternatives in language 'code \ No newline at end of file diff --git a/src/build.js b/src/build.js new file mode 100644 index 0000000..4edee4d --- /dev/null +++ b/src/build.js @@ -0,0 +1,167 @@ +const fs = require('fs/promises'); +const mdUtils = require('./mdUtils'); + +const translationDisclaimer = require('./supportedLanguages.json'); + +/** +* Corrects the links in a given file for a specific language. +* If there is a mismatch between the number of links in the translated and non-translated content add a `likelyLinkError_${languageCode}` property to the block. +* +* @param {Object} file - The file object where the links should be corrected. +* @param {string} languageCode - The language code for the translation. +* @param {string} docDir - The directory of the documentation in the repository. +* +* @returns {Promise} A promise that resolves when the links in the file are corrected. +* +*/ +async function correctLinkInFile(file, languageCode, docDir) { + for (let block of file.doc) { + // find all markdown local link in block.content, BUT NOT IMAGES + if (!block[`content_${languageCode}`]) { + continue + } + + let matchesTranslated = block[`content_${languageCode}`][0].match(/\[.*\]\(.*\)/g); + + let matchesNonTranslated = block[`content`].match(/\[.*\]\(.*\)/g); + + if (!matchesTranslated?.length != matchesNonTranslated?.length) { + file.likelyLinkError = true; + block[`likelyLinkError_${languageCode}`]= true; + continue; + } + + if (matchesNonTranslated?.length && matchesTranslated?.length) { + + for (let [index, nonTranslatedMatch] of matchesNonTranslated.entries()) { + let translatedMatch = matchesTranslated[index]; + + // replace translatedMatch with nonTranslatedMatch url with /{languageCode}/ append + let newUrl = nonTranslatedMatch.match(/\(.*\)/)[0]; + + // check if url is a local link (start with . or .. or /) + if (newUrl.match(/^\(.*\)$/)) { + // MAKE THE URL RELATIVE TO `/${docDir}/${languageCode}/` + newUrl = newUrl.substring(1, newUrl.length - 1); + newUrl = `(/${docDir}/${languageCode}/${newUrl})`; + } + + let urlText = translatedMatch.match(/\[.*\]/)[0]; + urlText = urlText.substring(1, urlText.length - 1); + urlText = `[${urlText}]`; + + // console.log('Replace by', urlText + newUrl, 'match', translatedMatch); + + // console.log( block[`content_${languageCode}`]) + block[`content_${languageCode}`][0] = block[`content_${languageCode}`][0].replace(translatedMatch, urlText + newUrl); + + + } + } + } +} + +/** + * Builds the output markdown files for the translated documentation. + * + * @param {Array} files - The files to be processed. + * @param {string} languageCode - The language code for the translation. + * @param {string} targetDir - The directory where the output files should be saved. + * @param {string} prefixToRemove - The prefix to remove from the file paths. + * + * @returns {Promise} A promise that resolves when the output markdown files are built. + * + * @throws {Error} If a file is not translated in the target language. + */ +async function buildOutputMd(files, languageCode, targetDir, prefixToRemove) { + + for (let file of files) { + // check if file is translated in target language + if (!file.doc || file.doc[0][`content_${languageCode}`] === undefined) { + console.log('failed to find translation', file.path); + continue + } + + await correctLinkInFile(file, languageCode); + + let translatedMd = mdUtils.parseTreeToMdStr(file.doc, languageCode); + let filePath = file.path.replace(prefixToRemove, ''); + let path = `${targetDir}/${filePath}`; + + // check if every directory in path exists and create if not + let dirs = path.split('/'); + let dir = ''; + for (let i = 0; i < dirs.length - 1; i++) { + dir += dirs[i] + '/'; + try { + await fs.access(dir); + } catch (error) { + await fs.mkdir(dir); + } + } + + // add translationDisclaimer[languageCode] before the first heading in the file + let disclaimer = translationDisclaimer[languageCode]; + let firstHeading = translatedMd.match(/^(#{1,6}) /m); + if (firstHeading) { + translatedMd = translatedMd.replace(firstHeading[0], `\`\`\`${disclaimer}\`\`\`\n\n${firstHeading[0]}`); + } else { + translatedMd = `\`\`\`${disclaimer}\`\`\`\n\n${translatedMd}`; + } + await fs.writeFile(path, translatedMd, 'utf8'); + } +} + +/** + * Builds the translated documentation of a given repository. + * + * @param {Object} options - The options for the build. + * @param {string} options.repoOwner - The owner of the repository. + * @param {string} options.repoName - The name of the repository. + * @param {string} options.languageCode - The language code for the translation. + * @param {string} options.savePath - The path where the translated documentation should be saved. + * @param {string} options.outputPath - The path where the output should be written. + * @param {string} options.prefixToRemove - The prefix to remove from the file paths. + * + * @returns {Promise} A promise that resolves when the build is complete. + * + * @throws {Error} If no save file is found. + */ +async function build(options) { + const repoOwner = options.repoOwner; + const repoName = options.repoName; + const languageCode = options.languageCode; + const savePath = options.savePath; + const outputPath = options.outputPath; + const prefixToRemove = options.prefixToRemove; + + let files = []; + let savepath = `${savePath}/${repoOwner}/${repoName}.json`; + try { + await fs.access(savepath); + files = require(savepath); + } catch { + throw new Error('No save file found, please run translate first'); + } + + let outPath = outputPath; + try { + await fs.access(outPath); + } catch { + // create the out path + let dirs = outPath.split('/'); + let dir = ''; + for (let i = 0; i < dirs.length - 1; i++) { + dir += dirs[i] + '/'; + try { + await fs.access(dir); + } catch (error) { + await fs.mkdir(dir); + } + } + } + + await buildOutputMd(files, languageCode, outputPath, prefixToRemove); +} + +module.exports = build \ No newline at end of file diff --git a/src/githubUtils.js b/src/githubUtils.js new file mode 100644 index 0000000..6e87162 --- /dev/null +++ b/src/githubUtils.js @@ -0,0 +1,133 @@ +const encode = require('gpt-tokenizer').encode; +const {Octokit} = require("@octokit/core"); + +if (!process.env["GITHUB_PERSONAL_ACCESS_TOKEN"]) { + console.log("GITHUB_PERSONAL_ACCESS_TOKEN env variable is not set"); + console.log("Please see https://docs.github.com/en/github/authenticating-to-github/creating-a-personal-access-token to create a token"); + process.exit(1); +} + + +let suportedLanguages = require('./supportedLanguages.json'); + +const octokit = new Octokit({ + auth: process.env["GITHUB_PERSONAL_ACCESS_TOKEN"] +}) + + +/** + * Retrieves the latest version of specified file types from a given GitHub repository. + * + * @param {string} owner - The owner of the repository. + * @param {string} repoName - The name of the repository. + * @param {string} path - The path in the repository to retrieve the files from. + * @param {Array} filesExtentions - An array of file extensions to consider (default is ['.md', '.mdx']). + * + * @returns {Promise} A promise that resolves to an array of file objects. Each object represents a file in the repository that matches the specified file types and is not a translation. + */ +async function getLastFileVersion(owner, repoName, path, filesExtentions = ['.md', '.mdx']) { + let docFiles = []; + const response = await octokit.request('GET /repos/{owner}/{repo}/contents/{path}', { + owner: owner, + repo: repoName, + path: path, + headers: { + 'X-GitHub-Api-Version': '2022-11-28' + } + }) + for (const file of response.data) { + if (file.type === 'dir') { + const files = await getLastFileVersion(owner, repoName, file.path); + docFiles = docFiles.concat(files); + } + if (file.type === 'file' + && filesExtentions.indexOf(file.name.substr(file.name.lastIndexOf('.'))) !== -1 + ) { + // check if file is a translation + if (file.name.indexOf('/i18n/') !== -1) { + continue; + } + // check if file path contain one of supportedLanguages.keys() + let isLikelyTranslation = false; + + for (let langCode of Object.keys(suportedLanguages)) { + if (file.path.indexOf(`/${langCode}/`) !== -1) { + isLikelyTranslation = true; + break; + } + } + if (isLikelyTranslation) { + continue; + } + docFiles.push(file); + } + } + + return docFiles; +} + +/** + * Lists the documentation files from a given GitHub repository and updates the file list with any changes. + * + * @param {Array} files - An array of file objects representing the current state of the files. + * @param {string} owner - The owner of the repository. + * @param {string} repoName - The name of the repository. + * @param {string} path - The path in the repository to list the files from. + * + * @returns {Promise} A promise that resolves when the file list has been updated. The updated list is directly modified in the `files` parameter. + */ +async function listDocumentationFiles(files, owner, repoName, path) { + let newFiles = await getLastFileVersion(owner, repoName, path); + + for (let file of newFiles) { + let fileHandledFlag = false; + for (let [index, oldFile] of files.entries()) { + if (oldFile.path === file.path && oldFile.sha !== file.sha) { + // TODO: handle files updates here, so we don't have to retranslate them + fileHandledFlag = true; + files[index] = file; + file.history = oldFile; + break; + } + else if (oldFile.path === file.path) { + fileHandledFlag = true; + break; + } + } + + if (!fileHandledFlag) { + // add a new file + files.push(file); + } + + } +} + +/** + * Downloads the documentation files from a given GitHub repository. + * + * @param {string} owner - The owner of the repository. + * @param {string} repoName - The name of the repository. + * @param {Array} files - An array of file objects to be downloaded. Each object should have a `download_url` property. + * + * @returns {Promise} A promise that resolves when all files have been downloaded. The raw content of each file is stored in its `raw` property, and the length of the encoded content is stored in its `token` property. + */ +async function downloadDocumentationFiles(owner, repoName, files) { + + for (let file of files) { + // do not reload file if already loaded + if (!file.raw) { + const response = await fetch(file.download_url); + const text = await response.text(); + + file.raw = text; + file.token = encode(text).length; + } + + } +} + +module.exports = { + listDocumentationFiles: listDocumentationFiles, + downloadDocumentationFiles: downloadDocumentationFiles +} \ No newline at end of file diff --git a/src/mdUtils.js b/src/mdUtils.js new file mode 100644 index 0000000..4d233d1 --- /dev/null +++ b/src/mdUtils.js @@ -0,0 +1,45 @@ +function parseMdStrToTree(file) { + const lines = file.split('\n'); + const doc = []; + let section = { title: '', level: 0, content: '' }; + doc.push(section); + for (let line of lines) { + const match = line.match(/^(#{1,6}) /); + if (match) { + const level = match[1].length; + const title = line.substring(level + 1); + section = { + title: title, + level: level, + content: '' + }; + doc.push(section); + } else { + section.content += line + '\n'; + } + } + return doc; +} + +function parseTreeToMdStr(doc, code='') { + let str = ''; + for (let block of doc) { + if (code && (block[`title_${code}`] || block[`content_${code}`])) { + if (block.level > 0) { + str += '#'.repeat(block.level) + ' ' + block[`title_${code}`][0] + '\n'; + } + str += block[`content_${code}`][0]; + } + else { + str += '#'.repeat(block.level) + ' ' + block[`title`] + '\n'; + str += block[`content`] + '\n'; + } + + } + return str; +} + +module.exports = { + parseMdStrToTree: parseMdStrToTree, + parseTreeToMdStr: parseTreeToMdStr +}; \ No newline at end of file diff --git a/src/supportedLanguages.json b/src/supportedLanguages.json new file mode 100644 index 0000000..0376b9a --- /dev/null +++ b/src/supportedLanguages.json @@ -0,0 +1,40 @@ +{ + "zh-Hans": "Simplified Chinese", + "es": "Spanish", + "ru": "Russian", + "de": "German", + "fr": "French", + "jp": "Japanese", + "ko": "Korean", + "pt": "Portuguese", + "it": "Italian", + "ar": "Arabic", + "tr": "Turkish", + "pl": "Polish", + "nl": "Dutch", + "vi": "Vietnamese", + "th": "Thai", + "pe": "Persian", + "ro": "Romanian", + "sv": "Swedish", + "hu": "Hungarian", + "cs": "Czech", + "el": "Greek", + "da": "Danish", + "fi": "Finnish", + "he": "Hebrew", + "no": "Norwegian", + "hi": "Hindi", + "zh_tw": "Traditional Chinese", + "in": "Indonesian", + "sl": "Slovak", + "se": "Serbian", + "sk": "Slovenian", + "uk": "Ukrainian", + "bg": "Bulgarian", + "hr": "Croatian", + "lt": "Lithuanian", + "lv": "Latvian", + "et": "Estonian", + "cat": "Catalan" +} \ No newline at end of file diff --git a/src/supportedLanguagesWarning.json b/src/supportedLanguagesWarning.json new file mode 100644 index 0000000..9a8ac25 --- /dev/null +++ b/src/supportedLanguagesWarning.json @@ -0,0 +1,40 @@ +{ + "zh-Hans": "此文档已自动翻译,可能包含错误。如有更改建议,请毫不犹豫地提交 Pull Request。", + "es": "Esta documentación ha sido traducida automáticamente y puede contener errores. No dudes en abrir una Pull Request para sugerir cambios.", + "ru": "Эта документация была автоматически переведена и может содержать ошибки. Не стесняйтесь открывать Pull Request для предложения изменений.", + "de": "Diese Dokumentation wurde automatisch übersetzt und kann Fehler enthalten. Zögern Sie nicht, einen Pull Request zu öffnen, um Änderungen vorzuschlagen.", + "fr": "Cette documentation a été traduite automatiquement et peut contenir des erreurs. N'hésitez pas à ouvrir une Pull Request pour suggérer des modifications.", + "jp": "このドキュメントは自動的に翻訳されており、誤りを含んでいる可能性があります。変更を提案するためにプルリクエストを開くことを躊躇しないでください。", + "ko": "이 문서는 자동 번역되었으며 오류가 포함될 수 있습니다. 변경 사항을 제안하려면 Pull Request를 열어 주저하지 마십시오.", + "pt": "Esta documentação foi traduzida automaticamente e pode conter erros. Não hesite em abrir um Pull Request para sugerir alterações.", + "it": "Questa documentazione è stata tradotta automaticamente e può contenere errori. Non esitare ad aprire una Pull Request per suggerire modifiche.", + "ar": "تمت ترجمة هذه الوثيقة تلقائيًا وقد تحتوي على أخطاء. لا تتردد في فتح طلب سحب لاقتراح تغييرات.", + "tr": "Bu belge otomatik olarak çevrilmiştir ve hatalar içerebilir. Değişiklik önermek için bir Pull Request açmaktan çekinmeyin.", + "pl": "Ta dokumentacja została przetłumaczona automatycznie i może zawierać błędy. Nie wahaj się otworzyć Pull Request, aby zaproponować zmiany.", + "nl": "Deze documentatie is automatisch vertaald en kan fouten bevatten. Aarzel niet om een Pull Request te openen om wijzigingen voor te stellen.", + "vi": "Tài liệu này đã được dịch tự động và có thể chứa lỗi. Đừng ngần ngại mở một Pull Request để đề xuất thay đổi.", + "th": "เอกสารนี้ได้รับการแปลโดยอัตโนมัติและอาจมีข้อผิดพลาด อย่าลังเลที่จะเปิด Pull Request เพื่อแนะนำการเปลี่ยนแปลง.", + "pe": "این مستند به طور خودکار ترجمه شده و ممکن است حاوی اشتباهات باشد. در صورت پیشنهاد تغییرات، دریغ نکنید از باز کردن یک Pull Request.", + "ro": "Această documentație a fost tradusă automat și poate conține erori. Nu ezitați să deschideți un Pull Request pentru a sugera modificări.", + "sv": "Denna dokumentation har översatts automatiskt och kan innehålla fel. Tveka inte att öppna en Pull Request för att föreslå ändringar.", + "hu": "Ezt a dokumentációt automatikusan fordították le, és tartalmazhat hibákat. Ne habozzon nyitni egy Pull Requestet a változtatások javasolására.", + "cs": "Tato dokumentace byla automaticky přeložena a může obsahovat chyby. Neváhejte otevřít Pull Request pro navrhování změn.", + "el": "Αυτό το έγγραφο έχει μεταφραστεί αυτόματα και μπορεί να περιέχει λάθη. Μη διστάσετε να ανοίξετε ένα Pull Request για να προτείνετε αλλαγές.", + "da": "Denne dokumentation er blevet automatisk oversat og kan indeholde fejl. Tøv ikke med at åbne en Pull Request for at foreslå ændringer.", + "fi": "Tämä dokumentaatio on käännetty automaattisesti ja se saattaa sisältää virheitä. Älä epäröi avata Pull Requestia ehdottaaksesi muutoksia.", + "he": "התיעוד הזה תורגם באופן אוטומטי ועשוי להכיל טעויות. אל תהסס לפתוח בקשת משיכה כדי להציע שינויים.", + "no": "Denne dokumentasjonen har blitt automatisk oversatt og kan inneholde feil. Ikke nøl med å åpne en Pull Request for å foreslå endringer.", + "hi": "इस दस्तावेज़ का अनुवाद स्वचालित रूप से किया गया है और इसमें त्रुटियाँ हो सकती हैं। परिवर्तन सुझाने के लिए पुल रिक्वेस्ट खोलने में संकोच न करें।", + "zh_tw": "此文件已自動翻譯,可能包含錯誤。如有更改建議,請毫不猶豫地提交 Pull Request。", + "in": "Dokumentasi ini telah diterjemahkan secara otomatis dan mungkin mengandung kesalahan. Jangan ragu untuk membuka Pull Request untuk mengusulkan perubahan.", + "sl": "Táto dokumentácia bola automaticky preložená a môže obsahovať chyby. Neváhajte otvoriť Pull Request na navrhnutie zmien.", + "se": "Ova dokumentacija je automatski prevedena i može sadržati greške. Ne oklevajte da otvorite Pull Request za predlaganje izmena.", + "sk": "Ta dokumentacija je bila samodejno prevedena in lahko vsebuje napake. Ne oklevajte odpreti Pull Request za predlaganje sprememb.", + "uk": "Ця документація була автоматично перекладена і може містити помилки. Не соромтеся відкривати Pull Request, щоб запропонувати зміни.", + "bg": "Тази документация е преведена автоматично и може да съдържа грешки. Не се колебайте да отворите Pull Request, за да предложите промени.", + "hr": "Ova dokumentacija je automatski prevedena i može sadržavati greške. Ne ustručavajte se otvoriti Pull Request za predlaganje promjena.", + "lt": "Ši dokumentacija buvo automatiškai išversta ir gali turėti klaidų. Nedvejodami atidarykite Pull Request, jei norite pasiūlyti pakeitimus.", + "lv": "Šis dokuments ir automātiski tulkots un var saturēt kļūdas. Nevilciniet atvērt Pull Request, lai ierosinātu izmaiņas.", + "et": "See dokumentatsioon on tõlgitud automaatselt ja võib sisaldada vigu. Ärge kartke avada Pull Request, et pakkuda muudatusi.", + "cat": "Aquesta documentació s'ha traduït automàticament i pot contenir errors. No dubteu a obrir una Pull Request per suggerir canvis." +} \ No newline at end of file diff --git a/src/translate.js b/src/translate.js new file mode 100644 index 0000000..f99888a --- /dev/null +++ b/src/translate.js @@ -0,0 +1,314 @@ +const fs = require('fs/promises'); + +const { OpenAI } = require("llamaindex"); + +const githubUtils = require('./githubUtils'); +const mdUtils = require('./mdUtils'); + +const llm = new OpenAI({ model: "gpt-3.5-turbo", temperature: 0, apiKey: process.env["OPENAI_API_KEY"], maxRetries: 2}); + + +let suportedLanguages = require('./supportedLanguages.json'); + + + + +/** + * Wait a little + * + * @param {string} ms - The number of milliseconds to wait. + * + * @returns {Promise} A promise that resolves when the time has elapsed. + */ +function sleep(ms) { + return new Promise(resolve => setTimeout(resolve, ms)); +} + +/** + * Detects a repeated pattern at the end of a string. + * + * @param {string} str - The string in which to detect a repeated pattern. + * + * @returns {string} The detected pattern. If no pattern is detected, the function returns an empty string. + * + * This function starts from the end of the string and checks for repeated patterns. It constructs a potential pattern by adding one character at a time from the end of the string. If the end of the string matches four repetitions of the potential pattern, the function assumes that it has found the repeated pattern. + */ +function detectRepeatedPattern(str) { + let pattern = ''; + for (let i = str.length - 1; i >= 0; i--) { + pattern = str[i] + pattern; + const patternRepeated = pattern + pattern + pattern + pattern; + if (str.endsWith(patternRepeated)) { + return pattern; + } + } + return ''; +} + +/** + * Removes repeated patterns from the end of a string. This tend to happen with bad LLM that will repeat the same token many times. + * + * @param {string} str - The string from which to remove repeated patterns. + * + * @returns {string} The string with repeated patterns removed from the end. If a pattern is detected, the function removes all repetitions of it from the end of the string, then appends one instance of the pattern. If no pattern is detected, the function returns the original string. + * + * This function uses the `detectRepeatedPattern` function to identify the repeated pattern. If `detectRepeatedPattern` returns an empty string, this function assumes that there is no repeated pattern. + */ +function removeRepeatedPattern(str) { + const pattern = detectRepeatedPattern(str); + if (pattern !== '') { + while (str.endsWith(pattern)) { + str = str.substring(0, str.length - pattern.length); + } + return str + pattern; + } + + return str; +} + + +/** + * Translates a tree of text into a specified language. + * + * @param {Array} textTree - An array of text blocks to be translated. Each block should be an object with `title` and `content` properties. + * @param {string} language - The language for the translation. + * + * @returns {Promise} A promise that resolves to an array of translated text blocks. Each block is an object with `title` and `content` properties. + * + * This function uses a chat-based model to translate the text. It sends a series of messages to the model, instructing it to translate the text. If the model's response does not meet certain criteria (e.g., the translated text has the same number of links as the original), the function sends additional messages to correct the translation. + */ +async function translateTextTree(textTree, language) { + // TODO: broke this function into a pipeline + let text = mdUtils.parseTreeToMdStr(textTree); + let messages = []; + let translationTree = []; + try { + + messages = [ + { + "role": "system", + "content": ` +You are a expert technical writer. Your goal is to translate a technical documentation in ${language}. The user will provide the documentation in a markdown format. Translate it to ${language}. + +Guidelines: +- Only output the translated documentation in Markdown, do not add or remove content. +- Do not try to translate function/api endpoint name, only translate the documentation. +- If the documentation contain codeblocks, only translate commentaries, do not translate variable names / function names. +- Try to output it in ${language} that is easy to read, do not try to translate all expressions verbatim, make it so it feel professional. +- If ${language} usually use the english word for a thing, keep it in English. +- Module names should be kept in English, add best effort translation. Always keep the original name in parenthesis, e.g. "聊天引擎 (ChatEngine)" + +Additional Notes: +- it's a computer doc, build mean 'compile', watch mean 'looking at file that change', +- Keep the same structure as the original documentation, and retain ALL the links / images. +`, + }, + { + "role": "user", + "content": `The markdown to translate (remember do not add extra content or remove content, just translate verbatim).\n Do not expend on content, only translate the documentation. The proposed text to translate may only contain a title, in this case only translate the title. Original in English:\n\n ${text} "\n\nTranslation in ${language}:\n\n` + } + ]; + + let chatCompletion = await llm.chat(messages); + + translationTree = mdUtils.parseMdStrToTree(removeRepeatedPattern(chatCompletion.message.content)); + + let linkmatchesTranslated =chatCompletion.message.content.match(/\[.*\]\(.*\)/g); + + let matchesNonTranslated = text.match(/\[.*\]\(.*\)/g); + + // Not a good translation, try again + while ( + linkmatchesTranslated != matchesNonTranslated + && translationTree.length != textTree.length + && messages.length < 5) { + messages.push({ + "role": "assistant", + "content" : removeRepeatedPattern(chatCompletion.message.content) + }); + + // TODO: add more checks here + if (translationTree.length > textTree.length) { + messages.push({ + "role": "user", + "content" : "The translation is seems too long, did you add too much content? redo it correctly. Only output the translation, no other contexts" + }); + } + else if (translationTree.length < textTree.length) { + messages.push({ + "role": "user", + "content" : "The translation is seems too miss some paragraph, did you forget some content? redo it correctly. Only output the translation, no other contexts" + }); + } + else if (linkmatchesTranslated < matchesNonTranslated) { + messages.push({ + "role": "user", + "content" : "The translation is missing some Links!. Redo it correctly. Only output the translation, no other contexts" + }); + } + else if (linkmatchesTranslated > matchesNonTranslated) { + messages.push({ + "role": "user", + "content" : "The translation have EXTRA Links!. Redo it correctly. Only output the translation, no other contexts" + }); + } + + chatCompletion = await llm.chat(messages); + translationTree = mdUtils.parseMdStrToTree(removeRepeatedPattern(chatCompletion.message.content)); + linkmatchesTranslated = chatCompletion.message.content.match(/\[.*\]\(.*\)/g); + } + + } + catch (error) { + console.log(error); + } + return translationTree; +} + +/** + * Translates a single file into a specified language. + * + * @param {Object} file - The file object to be translated. It should have a `raw` property containing the content to be translated, and a `name` property for logging purposes. + * @param {string} language - The language for the translation. + * @param {string} code - The language code for the translation. + * + * @returns {Promise} A promise that resolves when the file has been translated. The translated content is stored in the `title_` and `content_` properties of each block in the file's `doc` property. + */ +async function translateAFile(file, language, code) { + console.log(`translating file: ${file.name} to ${suportedLanguages[code]} (${code})`); + if (!file.doc) { + file.doc = mdUtils.parseMdStrToTree(file.raw); + } + // translate from bottom up to avoid solo title. + for (let i = file.doc.length - 1; i >= 0; i--) { + let block = file.doc[i]; + if (block["title_" + code] || block["content_" + code]) { + // do not retranslate block! + continue; + } + + let blocks = [block]; + let level = block.level; + + for (j = i - 1; j >= 0; j--) { + if (file.doc[j].level < level) { + level = file.doc[j].level; + blocks.unshift(file.doc[j]); + } + } + + let translationTree = await translateTextTree(blocks, language); + + if (translationTree.length != blocks.length) { + if (!file.translationError) { + file.translationError = {}; + } + file.translationError[code] = true; + + } + + for (let j = 0; j < blocks.length; j++) { + if (!blocks[j]["title_" + code]) { + blocks[j]["title_" + code] = [] + } + if (!blocks[j]["content_" + code]) { + blocks[j]["content_" + code] = [] + } + + blocks[j]["title_" + code].push(translationTree[j]?.title || '\n'); + blocks[j]["content_" + code].push(translationTree[j]?.content || '\n'); + } + + } + +} + +/** + * Translates a list of files into a specified language. + * + * @param {Array} files - An array of file objects to be translated. Each object should have a `raw` property containing the content to be translated. + * @param {string} language - The language for the translation. + * @param {string} code - The language code for the translation. + * @param {string} savepath - The path where the translated documentation should be saved. + * + * @returns {Promise} A promise that resolves when all files have been translated. The translated content of each file is stored in its `raw` property. + */ +async function translateFiles(files, language, code, savepath) { + let finished_jobs = 0; + let started_jobs = 0; + for (let file of files) { + started_jobs++; + let fn = async function() { + await translateAFile(file, language, code); + await fs.writeFile(savepath, JSON.stringify(files, null, 2), 'utf8'); + finished_jobs++; + } + fn(); + } + let previously_display_finished_job = 0; + // wait that all jobs finished + while (finished_jobs < started_jobs) { + await sleep(100); + if (previously_display_finished_job != finished_jobs) { + console.log(finished_jobs + '/' + started_jobs); + previously_display_finished_job = finished_jobs; + } + } +} + +/** + * Translates the documentation files from a given GitHub repository. + * + * @param {Object} options - The options for the translation. + * @param {string} options.repoOwner - The owner of the repository. + * @param {string} options.repoName - The name of the repository. + * @param {string} options.repoDocDir - The directory of the documentation in the repository. + * @param {string} options.language - The language for the translation. + * @param {string} options.languageCode - The language code for the translation. + * @param {string} options.savePath - The path where the translated documentation should be saved. + * @param {boolean} [options.loadFile=true] - Whether to load the files from GitHub (default is true). + * + * @returns {Promise} A promise that resolves when the translation is complete. + */ +async function translate(options) { + + const repoOwner = options.repoOwner; + const repoName = options.repoName; + const repoDocDir = options.repoDocDir; + const language = options.language; + const languageCode = options.languageCode; + const savePath = options.savePath; + const loadFile = options.loadFile || (options.loadFile === undefined) ? true : false; // default to true + + let files = []; + let savepath = `${savePath}/${repoOwner}/${repoName}.json`; + try { + await fs.access(savepath); + files = require(savepath); + } catch { + console.log("Creating new save file!"); + // create the save oath + let dirs = savepath.split('/'); + let dir = ''; + for (let i = 0; i < dirs.length - 1; i++) { + dir += dirs[i] + '/'; + try { + await fs.access(dir); + } catch (error) { + await fs.mkdir(dir); + } + } + } + + if (loadFile) { + console.log("Loading files from Github"); + await githubUtils.listDocumentationFiles(files, repoOwner, repoName, repoDocDir, ['.md', '.mdx']); + await githubUtils.downloadDocumentationFiles(repoOwner, repoName, files); + } + + await translateFiles(files, language, languageCode, savepath); +} + + + +module.exports = translate; \ No newline at end of file diff --git a/supportedLanguages.json b/supportedLanguages.json deleted file mode 100644 index 21fa84b..0000000 --- a/supportedLanguages.json +++ /dev/null @@ -1,100 +0,0 @@ -{ - "af": "Afrikaans", - "am": "Amharic", - "ar": "Arabic", - "az": "Azerbaijani", - "bg": "Bulgarian", - "bn": "Bengali", - "bs": "Bosnian", - "ceb": "Cebuano", - "co": "Corsican", - "cs": "Czech", - "cy": "Welsh", - "da": "Danish", - "de": "German", - "el": "Greek", - "en": "English", - "eo": "Esperanto", - "es": "Spanish", - "et": "Estonian", - "eu": "Basque", - "fa": "Persian", - "fi": "Finnish", - "fr": "French", - "fy": "Frisian", - "ga": "Irish", - "gd": "Scots Gaelic", - "gl": "Galician", - "gu": "Gujarati", - "ha": "Hausa", - "haw": "Hawaiian", - "he": "Hebrew", - "hi": "Hindi", - "hr": "Croatian", - "ht": "Haitian Creole", - "hu": "Hungarian", - "hy": "Armenian", - "id": "Indonesian", - "it": "Italian", - "iw": "Hebrew", - "ja": "Japanese", - "jw": "Javanese", - "ka": "Georgian", - "kk": "Kazakh", - "kn": "Kannada", - "ko": "Korean", - "ku": "Kurdish (Kurmanji)", - "ky": "Kyrgyz", - "la": "Latin", - "lb": "Luxembourgish", - "lo": "Lao", - "lt": "Lithuanian", - "lv": "Latvian", - "mg": "Malagasy", - "mi": "Maori", - "mk": "Macedonian", - "ml": "Malayalam", - "mn": "Mongolian", - "mr": "Marathi", - "ms": "Malay", - "mt": "Maltese", - "my": "Myanmar (Burmese)", - "ne": "Nepali", - "nl": "Dutch", - "no": "Norwegian", - "ny": "Chichewa", - "pi": "Punjabi", - "pl": "Polish", - "ps": "Pashto", - "pt": "Portuguese", - "ro": "Romanian", - "ru": "Russian", - "sd": "Sindhi", - "si": "Sinhala", - "sk": "Slovak", - "sl": "Slovenian", - "sm": "Samoan", - "sn": "Shona", - "so": "Somali", - "sq": "Albanian", - "st": "Sesotho", - "su": "Sundanese", - "sv": "Swedish", - "sw": "Swahili", - "ta": "Tamil", - "te": "Telugu", - "tg": "Tajik", - "th": "Thai", - "tl": "Filipino", - "tr": "Turkish", - "uk": "Ukrainian", - "ur": "Urdu", - "uz": "Uzbek", - "vi": "Vietnamese", - "xh": "Xhosa", - "yi": "Yiddish", - "yo": "Yoruba", - "zh-Hans": "Simplified Chinese", - "zh_tw": "Traditional Chinese", - "zu": "Zulu" - } \ No newline at end of file diff --git a/translate.js b/translate.js deleted file mode 100644 index 148d703..0000000 --- a/translate.js +++ /dev/null @@ -1,540 +0,0 @@ -const fs = require('fs/promises'); - -const { OpenAI } = require("llamaindex"); - -const llm = new OpenAI({ model: "gpt-3.5-turbo-1106", temperature: 0, apiKey: process.env["OPENAI_API_KEY"], maxRetries: 5}); - - -const {Octokit} = require("@octokit/core"); - -const {encode} = require("gpt-tokenizer"); - - -const octokit = new Octokit({ - auth: process.env["GITHUB_PERSONAL_ACCESS_TOKEN"] -}) - -let suportedLanguages = require('./supportedLanguages.json'); - -async function getLastFileVersion(owner, repoName, path) { - let docFiles = []; - const response = await octokit.request('GET /repos/{owner}/{repo}/contents/{path}', { - owner: owner, - repo: repoName, - path: path, - headers: { - 'X-GitHub-Api-Version': '2022-11-28' - } - }) - for (const file of response.data) { - if (file.type === 'dir') { - const files = await getLastFileVersion(owner, repoName, file.path); - docFiles = docFiles.concat(files); - } - if (file.type === 'file' && (file.name.endsWith('.md') || file.name.endsWith('.mdx'))) { - // check if file is a translation - if (file.name.indexOf('/i18n/') !== -1) { - continue; - } - // check if file path contain one of supportedLanguages.keys() - let isLikelyTranslation = false; - - for (let langCode of Object.keys(suportedLanguages)) { - if (file.path.indexOf(`/${langCode}/`) !== -1) { - isLikelyTranslation = true; - break; - } - } - if (isLikelyTranslation) { - continue; - } - docFiles.push(file); - } - } - - return docFiles; -} - -async function listDocFiles(files, owner, repoName, path) { - let newFiles = await getLastFileVersion(owner, repoName, path); - console.log(newFiles); - - for (let file of newFiles) { - let fileHandledFlag = false; - for (let [index, oldFile] of files.entries()) { - if (oldFile.path === file.path && oldFile.sha !== file.sha) { - // TODO: handle files updates here, so we don't have to retranslate them - fileHandledFlag = true; - files[index] = file; - file.history = oldFile; - break; - } - else if (oldFile.path === file.path) { - fileHandledFlag = true; - break; - } - } - - if (!fileHandledFlag) { - // add a new file - files.push(file); - } - - } -} - -async function loadFiles(owner, repoName, files) { - - for (let file of files) { - // do not reload file if already loaded - if (!file.raw) { - const response = await fetch(file.download_url); - const text = await response.text(); - - file.raw = text; - file.token = encode(text).length; - } - - } -} - -// a function that takes a md file as a string and return a document hierarchical object -function parseMdStrToTree(file) { - const lines = file.split('\n'); - const doc = []; - let section = { title: '', level: 0, content: '' }; - doc.push(section); - for (let line of lines) { - const match = line.match(/^(#{1,6}) /); - if (match) { - const level = match[1].length; - const title = line.substring(level + 1); - section = { - title: title, - level: level, - content: '' - }; - doc.push(section); - } else { - section.content += line + '\n'; - } - } - return doc; -} - -// a function that take a document hierarchical object and return a md file as a string -function parseTreeToMdStr(doc, code='') { - let str = ''; - for (let block of doc) { - if (code) { - if (block.level > 0) { - str += '#'.repeat(block.level) + ' ' + block[`title_${code}`][0] + '\n'; - } - str += block[`content_${code}`][0] + '\n'; - } - else { - str += '#'.repeat(block.level) + ' ' + block[`title`] + '\n'; - str += block[`content`] + '\n'; - } - - } - return str; -} - -// a sleep function as async -function sleep(ms) { - return new Promise(resolve => setTimeout(resolve, ms)); -} - -// GPT have a tendency to loop on a word / token, here we match the last word of the string with the last word of the string - 1, and return the number of time it match -function detectRepeatedPattern(str) { - let pattern = ''; - for (let i = str.length - 1; i >= 0; i--) { - pattern = str[i] + pattern; - const patternRepeated = pattern + pattern + pattern + pattern; - if (str.endsWith(patternRepeated)) { - return pattern; - } - } - return ''; -} - -function removeRepeatedPattern(str) { - const pattern = detectRepeatedPattern(str); - if (pattern !== '') { - while (str.endsWith(pattern)) { - str = str.substring(0, str.length - pattern.length); - } - return str + pattern; - } - - return str; -} -async function translateTextTree(textTree, language) { - - let text = parseTreeToMdStr(textTree); - let messages = []; - - - try { - - messages = [ - { - "role": "system", - "content": ` -You are a expert technical writer. Your goal is to translate a technical documentation in ${language}. The user will provide the documentation in a markdown format. Translate it to ${language}. - -Guidelines: -- Only output the translated documentation in Markdown, do not add or remove content. -- Do not try to translate function/api endpoint name, only translate the documentation. -- If the documentation contain codeblocks, only translate commentaries, do not translate variable names / function names. -- Try to output it in ${language} that is easy to read, do not try to translate all expressions verbatim, make it so it feel professional. -- If ${language} usually use the english word for a thing, keep it in English. -- Module names should be kept in English, add best effort translation. Always keep the original name in parenthesis, e.g. "聊天引擎 (ChatEngine)" - -Additional Notes: -- it's a computer doc, build mean 'compile', watch mean 'looking at file that change', -- Keep the same structure as the original documentation, and retain ALL the links / images. -`, - }, - { - "role": "user", - "content": `The markdown to translate (remember do not add extra content or remove content, just translate verbatim).\n Do not expend on content, only translate the documentation. The proposed text to translate may only contain a title, in this case only translate the title. Original in English:\n\n ${text} "\n\nTranslation in ${language}:\n\n` - } - ]; - - - let chatCompletion = await llm.chat(messages); - - - // console.log(chatCompletion.choices[0].message.content) - - - let translationTree = parseMdStrToTree(removeRepeatedPattern(chatCompletion.message.content)); - - let linkmatchesTranslated =chatCompletion.message.content.match(/\[.*\]\(.*\)/g); - - let matchesNonTranslated = text.match(/\[.*\]\(.*\)/g); - - // Not a good translation, try again - while ( - linkmatchesTranslated != matchesNonTranslated - && translationTree.length != textTree.length - && messages.length < 5) { - messages.push({ - "role": "assistant", - "content" : removeRepeatedPattern(chatCompletion.message.content) - }); - - // TODO: add more checks here - if (translationTree.length > textTree.length) { - messages.push({ - "role": "user", - "content" : "The translation is seems too long, did you add too much content? redo it correctly. Only output the translation, no other contexts" - }); - } - else if (translationTree.length < textTree.length) { - messages.push({ - "role": "user", - "content" : "The translation is seems too short, did you forget some content? redo it correctly. Only output the translation, no other contexts" - }); - } - else if (linkmatchesTranslated < matchesNonTranslated) { - messages.push({ - "role": "user", - "content" : "The translation is missing some Links!. Redo it correctly. Only output the translation, no other contexts" - }); - } - else if (linkmatchesTranslated > matchesNonTranslated) { - messages.push({ - "role": "user", - "content" : "The translation have EXTRA Links!. Redo it correctly. Only output the translation, no other contexts" - }); - } - - console.log(messages); - chatCompletion = await llm.chat(messages); - translationTree = parseMdStrToTree(removeRepeatedPattern(chatCompletion.message.content)); - linkmatchesTranslated = chatCompletion.message.content.match(/\[.*\]\(.*\)/g); - - //console.log(chatCompletion.message.content) - } - - - return translationTree; - } - catch (e) { - console.log(e); - console.log('sleeping'); - console.log("=======\eerror on ", JSON.stringify(messages, null, 2)); - await sleep(30000) - return translateTextTree(textTree, language); - } - -} - - -async function translateAFile(file, language, code) { - console.log(`translating file: ${file.name} to ${suportedLanguages[code]} (${code})`); - if (!file.doc) { - file.doc = parseMdStrToTree(file.raw); - } - // translate from bottom up to avoid solo title. - for (let i = file.doc.length - 1; i >= 0; i--) { - let block = file.doc[i]; - if (block["title_" + code] || block["content_" + code]) { - // do not retranslate block! - continue; - } - - let blocks = [block]; - let level = block.level; - - for (j = i - 1; j >= 0; j--) { - if (file.doc[j].level < level) { - level = file.doc[j].level; - blocks.unshift(file.doc[j]); - } - } - - let translationTree = await translateTextTree(blocks, language); - - if (translationTree.length != blocks.length) { - if (!file.translationError) { - file.translationError = {}; - } - file.translationError[code] = true; - - } - - for (let j = 0; j < blocks.length; j++) { - if (!blocks[j]["title_" + code]) { - blocks[j]["title_" + code] = [] - } - if (!blocks[j]["content_" + code]) { - blocks[j]["content_" + code] = [] - } - - blocks[j]["title_" + code].push(translationTree[j]?.title || '\n'); - blocks[j]["content_" + code].push(translationTree[j]?.content || '\n'); - } - - } - -} - -async function translateFiles(files, language, code, savepath) { - let finished_jobs = 0; - let started_jobs = 0; - for (let file of files) { - started_jobs++; - let fn = async function() { - await translateAFile(file, language, code); - await fs.writeFile(savepath, JSON.stringify(files, null, 2), 'utf8'); - finished_jobs++; - } - fn(); - } - let previously_display_finished_job = 0; - // wait that all jobs finished - while (finished_jobs < started_jobs) { - await sleep(100); - if (previously_display_finished_job != finished_jobs) { - console.log(finished_jobs + '/' + started_jobs); - previously_display_finished_job = finished_jobs; - } - } -} - - -async function correctLinkInFile(file, languageCode, docDir) { - for (let block of file.doc) { - // find all markdown local link in block.content, BUT NOT IMAGES - if (!block[`content_${languageCode}`]) { - continue - } - - let matchesTranslated = block[`content_${languageCode}`][0].match(/\[.*\]\(.*\)/g); - - let matchesNonTranslated = block[`content`].match(/\[.*\]\(.*\)/g); - - if (!matchesTranslated?.length != matchesNonTranslated?.length) { - file.likelyLinkError = true; - block[`likelyLinkError_${languageCode}`]= true; - continue; - } - - if (matchesNonTranslated?.length && matchesTranslated?.length) { - - for (let [index, nonTranslatedMatch] of matchesNonTranslated.entries()) { - let translatedMatch = matchesTranslated[index]; - - // replace translatedMatch with nonTranslatedMatch url with /{languageCode}/ append - let newUrl = nonTranslatedMatch.match(/\(.*\)/)[0]; - - // check if url is a local link (start with . or .. or /) - if (newUrl.match(/^\(.*\)$/)) { - // MAKE THE URL RELATIVE TO `/${docDir}/${languageCode}/` - newUrl = newUrl.substring(1, newUrl.length - 1); - newUrl = `(/${docDir}/${languageCode}/${newUrl})`; - } - - - - let urlText = translatedMatch.match(/\[.*\]/)[0]; - urlText = urlText.substring(1, urlText.length - 1); - urlText = `[${urlText}]`; - - // console.log('Replace by', urlText + newUrl, 'match', translatedMatch); - - // console.log( block[`content_${languageCode}`]) - block[`content_${languageCode}`][0] = block[`content_${languageCode}`][0].replace(translatedMatch, urlText + newUrl); - - - } - - } - - - } - -} - -async function buildOutputMd(files, languageCode, targetDir, prefixToRemove) { - - for (let file of files) { - // check if file is translated in target language - if (!file.doc || file.doc[0][`content_${languageCode}`] === undefined) { - console.log('failed to find translation', file.path); - continue - } - - await correctLinkInFile(file, languageCode); - - let translatedMd = parseTreeToMdStr(file.doc, languageCode); - let filePath = file.path.replace(prefixToRemove, ''); - let path = `${targetDir}/${filePath}`; - - // check if every directory in path exists and create if not - let dirs = path.split('/'); - let dir = ''; - for (let i = 0; i < dirs.length - 1; i++) { - dir += dirs[i] + '/'; - try { - await fs.access(dir); - } catch (error) { - await fs.mkdir(dir); - } - } - console.log(path); - await fs.writeFile(path, translatedMd, 'utf8'); - } -} - -async function printFiles(files, owner, repoName, repoDocDir) { - let targetDir = `${__dirname}/build/${owner}/${repoName}/${repoDocDir}`; - for (let file of files) { - - let path = file.path.replace(repoDocDir, targetDir); - // check if every directory in path exists and create if not - let dirs = path.split('/'); - let dir = ''; - for (let i = 0; i < dirs.length - 1; i++) { - dir += dirs[i] + '/'; - try { - await fs.access(dir); - } catch (error) { - await fs.mkdir(dir); - } - } - await fs.writeFile(path, file.raw, 'utf8'); - } -} - -async function translateDoc(options) { - - const repoOwner = options.repoOwner; - const repoName = options.repoName; - const repoDocDir = options.repoDocDir; - const language = options.language; - const languageCode = options.languageCode; - const savePath = options.savePath; - const loadFile = options.loadFile || (options.loadFile === undefined) ? true : false; // default to true - - // const files = await listDocFiles(owner, repoName, repoDocDir); - let files = []; - let savepath = `${savePath}/${repoOwner}/${repoName}.json`; - try { - await fs.access(savepath); - console.log("Loading files from save file successfully") - files = require(savepath); - } catch { - // create the save oath - let dirs = savepath.split('/'); - let dir = ''; - for (let i = 0; i < dirs.length - 1; i++) { - dir += dirs[i] + '/'; - try { - await fs.access(dir); - } catch (error) { - await fs.mkdir(dir); - } - } - } - - if (loadFile) { - console.log("Loading files from Github"); - await listDocFiles(files, repoOwner, repoName, repoDocDir); - await loadFiles(repoOwner, repoName, files); - } - - await translateFiles(files, language, languageCode, savepath); -} - - -async function buildDoc(options) { - const repoOwner = options.repoOwner; - const repoName = options.repoName; - const languageCode = options.languageCode; - const savePath = options.savePath; - const outputPath = options.outputPath; - const prefixToRemove = options.prefixToRemove; - - - - // const files = await listDocFiles(owner, repoName, repoDocDir); - let files = []; - let savepath = `${savePath}/${repoOwner}/${repoName}.json`; - try { - await fs.access(savepath); - files = require(savepath); - console.log("Loading files from save file successfully") - } catch { - throw new Error('No save file found, please run translate first'); - } - - let outPath = outputPath; - try { - await fs.access(outPath); - } catch { - // create the out path - let dirs = outPath.split('/'); - let dir = ''; - for (let i = 0; i < dirs.length - 1; i++) { - dir += dirs[i] + '/'; - try { - await fs.access(dir); - } catch (error) { - await fs.mkdir(dir); - } - } - } - - await buildOutputMd(files, languageCode, outputPath, prefixToRemove); -} - -module.exports = -{ - translateDoc: translateDoc, - buildDoc: buildDoc -} \ No newline at end of file