mirror of
https://github.com/run-llama/automatic-doc-translate.git
synced 2026-06-30 21:37:56 -04:00
better version
This commit is contained in:
+2
-1
@@ -1,4 +1,5 @@
|
||||
node_modules
|
||||
.DS_Store
|
||||
tmp
|
||||
build
|
||||
build
|
||||
save
|
||||
|
||||
@@ -1,24 +1,11 @@
|
||||
const translate = require('./translate.js');
|
||||
|
||||
const express = require('express');
|
||||
|
||||
const app = express();
|
||||
|
||||
app.get('/', (req, res) => {
|
||||
res.send('Hello World!');
|
||||
});
|
||||
|
||||
app.get('/translate', (req, res) => {
|
||||
const { text, lang } = req.query;
|
||||
translate(text, lang)
|
||||
.then((result) => {
|
||||
res.send(result);
|
||||
})
|
||||
.catch((err) => {
|
||||
res.send(err);
|
||||
});
|
||||
});
|
||||
app.use(express.static('public'));
|
||||
app.use(express.static('save'));
|
||||
|
||||
app.listen(process.env.PORT || 3000, (port) => {
|
||||
console.log(`Example app listening on port ${port}!`);
|
||||
}
|
||||
});
|
||||
@@ -0,0 +1,348 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<script src="https://unpkg.com/vue@3/dist/vue.global.js"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
|
||||
|
||||
<style>
|
||||
|
||||
body {
|
||||
margin: 0px;
|
||||
font-family: 'Roboto', sans-serif;
|
||||
font-size: 14px;
|
||||
font-weight: normal;
|
||||
line-height: 1.5;
|
||||
color: #333;
|
||||
background-color: #f4f4f4;
|
||||
}
|
||||
|
||||
#app {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
height: 100vh; /* This will make #app take up the full height of the viewport */
|
||||
}
|
||||
|
||||
.navbar {
|
||||
background-color: #333;
|
||||
color: white;
|
||||
padding: 15px;
|
||||
font-size: 14px;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
.file-explorer {
|
||||
display: flex;
|
||||
flex-direction: row;
|
||||
justify-content: flex-start;
|
||||
align-items: flex-start;
|
||||
align-content: flex-start;
|
||||
flex-wrap: nowrap;
|
||||
overflow-y: auto; /* This will add a scrollbar if the content is too long */
|
||||
flex-grow: 1; /* This will make .file-explorer take up the remaining space in #app */
|
||||
}
|
||||
|
||||
.file-list {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
overflow-y: auto;
|
||||
flex-wrap: wrap;
|
||||
flex-basis: 300px;
|
||||
border-right: 1px solid #eee;
|
||||
}
|
||||
|
||||
.file-item {
|
||||
padding: 4px;
|
||||
margin: 0px;
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
.file-name {
|
||||
font-size: 14px;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
.file-overview {
|
||||
flex-grow: 1;
|
||||
flex-shrink: 1;
|
||||
flex-basis: 0;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
|
||||
.file-block {
|
||||
flex-grow: 1;
|
||||
flex-shrink: 1;
|
||||
flex-basis: 0;
|
||||
display: flex;
|
||||
flex-direction: row;
|
||||
flex-wrap: wrap;
|
||||
justify-content: space-between;
|
||||
}
|
||||
|
||||
.initial, .translation {
|
||||
flex: 1;
|
||||
padding: 10px;
|
||||
box-sizing: border-box;
|
||||
border: 1px solid #eee;
|
||||
}
|
||||
|
||||
.file-overview .title {
|
||||
font-size: 14px;
|
||||
font-weight: bold;
|
||||
padding : 0px;
|
||||
margin: 0px;
|
||||
}
|
||||
|
||||
.file-overview .content {
|
||||
font-size: 12px;
|
||||
padding : 0px;
|
||||
margin: 0px;
|
||||
}
|
||||
|
||||
|
||||
pre {
|
||||
white-space: pre-wrap; /* CSS 2.1 */
|
||||
white-space: -moz-pre-wrap; /* For Mozilla */
|
||||
white-space: -pre-wrap; /* For Opera 4-6 */
|
||||
white-space: -o-pre-wrap; /* For Opera 7 */
|
||||
word-wrap: break-word; /* For IE 5.5+ and modern browsers */
|
||||
overflow-wrap: anywhere; /* This will break the word at any character to prevent overflow */
|
||||
}
|
||||
|
||||
.home-repo {
|
||||
display: flex;
|
||||
flex-direction: row;
|
||||
justify-content: center;
|
||||
align-items: center;
|
||||
align-content: center;
|
||||
flex-wrap: nowrap;
|
||||
overflow-y: auto; /* This will add a scrollbar if the content is too long */
|
||||
flex-grow: 1; /* This will make .file-explorer take up the remaining space in #app */
|
||||
}
|
||||
|
||||
.home-repo-input {
|
||||
padding: 10px;
|
||||
font-size: 14px;
|
||||
font-weight: bold;
|
||||
border: 1px solid #eee;
|
||||
border-radius: 5px;
|
||||
margin-right: 10px;
|
||||
}
|
||||
|
||||
.home-repo-button {
|
||||
padding: 10px;
|
||||
font-size: 14px;
|
||||
font-weight: bold;
|
||||
border: 1px solid #eee;
|
||||
border-radius: 5px;
|
||||
background-color: #eee;
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
.home-repo-button:hover {
|
||||
background-color: #ddd;
|
||||
}
|
||||
|
||||
.home-repo-input:focus {
|
||||
outline: none;
|
||||
}
|
||||
|
||||
.home-repo-input:hover {
|
||||
border: 1px solid #ddd;
|
||||
}
|
||||
|
||||
.home-repo-input:active {
|
||||
border: 1px solid #ddd;
|
||||
}
|
||||
|
||||
.home-repo-language-select {
|
||||
padding: 10px;
|
||||
font-size: 14px;
|
||||
font-weight: bold;
|
||||
border: 1px solid #eee;
|
||||
border-radius: 5px;
|
||||
margin-right: 10px;
|
||||
}
|
||||
|
||||
.home-repo-language-select:focus {
|
||||
outline: none;
|
||||
}
|
||||
|
||||
.home-repo-language-select:hover {
|
||||
border: 1px solid #ddd;
|
||||
}
|
||||
|
||||
.home-repo-language-select:active {
|
||||
border: 1px solid #ddd;
|
||||
}
|
||||
|
||||
|
||||
|
||||
.spinner {
|
||||
border: 16px solid #f3f3f3;
|
||||
border-top: 16px solid #3498db;
|
||||
border-radius: 50%;
|
||||
width: 80px;
|
||||
height: 80px;
|
||||
animation: spin 2s linear infinite;
|
||||
}
|
||||
|
||||
@keyframes spin {
|
||||
0% { transform: rotate(0deg); }
|
||||
100% { transform: rotate(360deg); }
|
||||
}
|
||||
|
||||
.loading-page {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
justify-content: center;
|
||||
align-items: center;
|
||||
align-content: center;
|
||||
flex-wrap: nowrap;
|
||||
overflow-y: auto;
|
||||
flex-grow: 1;
|
||||
background-color: white;
|
||||
padding: 20px;
|
||||
border-radius: 10px;
|
||||
box-shadow: 0px 0px 10px rgba(0, 0, 0, 0.1);
|
||||
}
|
||||
|
||||
.not-translated {
|
||||
color: rgb(232, 125, 125);
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div id="app">
|
||||
<div v-if="loading" class="loading-page">
|
||||
<!-- Loading page -->
|
||||
<div class="spinner"></div>
|
||||
<div><br/>Loading...</div>
|
||||
|
||||
</div>
|
||||
<div v-else-if="targetRepository">
|
||||
<div class="navbar">
|
||||
Translate your doc - {{targetRepository}} -> <select v-model="targetLanguage">
|
||||
<option value="fr">fr</option>
|
||||
<option value="zh_cn">zh_cn</option>
|
||||
</select>
|
||||
</div>
|
||||
<div class="file-explorer">
|
||||
<div class='file-list'>
|
||||
<div class='file-item' v-for="(item, index) in files" v-if="files">
|
||||
<div class='file-name' v-on:click="changeFile(index)" :class="fileStatusClass(item)">{{item.path}}</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class='file-overview'>
|
||||
<div class='file-block' v-for="block in file.doc" v-if="file">
|
||||
<div class="initial" contenteditable="false">
|
||||
<pre class="title" v-if="block.title.length"><span v-for="n in block.level" key="n">#</span>{{block.title}}</pre>
|
||||
<pre class="content" v-if="block.content.length">{{block.content}}</pre>
|
||||
</div>
|
||||
<div class="translation" v-if="block[targetLanguageTitle].length || block[targetLanguageContent].length">
|
||||
<pre class="title" v-if="block[targetLanguageTitle].length"><span v-for="n in block.level" key="n">#</span><span contenteditable>{{block[targetLanguageTitle][0]}}</span></pre>
|
||||
<pre class="content" v-if="block[targetLanguageContent].length" contenteditable>{{block[targetLanguageContent][0]}}</pre>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div v-else>
|
||||
<div>
|
||||
|
||||
<div class="home-repo">
|
||||
<h1>Grab a repo from Github, and translate it's docs</h1>
|
||||
<div>
|
||||
<h2>Translate repo: </h2>
|
||||
<input class="home-repo-input" type="text" v-model=inputValue placeholder="user/repo"></input>
|
||||
|
||||
<label> to </label>
|
||||
|
||||
<select class="home-repo-language-select" v-model="targetLanguage">
|
||||
<option value="fr">fr</option>
|
||||
<option value="zh_cn">zh_cn</option>
|
||||
</select>
|
||||
|
||||
<button class="home-repo-button" @click="translate()">Go!</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script>
|
||||
const { createApp, ref, onMounted, watch} = Vue
|
||||
const app = createApp({
|
||||
setup() {
|
||||
const files = ref(null);
|
||||
const file = ref(null); // Initialize file as null
|
||||
const targetRepository = ref(""); // "run-llama/llama_index";
|
||||
const loading = ref(false);
|
||||
const fileLoaded = ref(false);
|
||||
const inputValue = ref("");
|
||||
const targetLanguage = ref("fr");
|
||||
const targetLanguageTitle = ref("title_fr");
|
||||
const targetLanguageContent = ref("content_fr");
|
||||
|
||||
watch(targetLanguage, (newVal, oldVal) => {
|
||||
// Update targetLanguageTitle and targetLanguageContent based on newVal
|
||||
targetLanguageTitle.value = `title_${newVal}`;
|
||||
targetLanguageContent.value = `content_${newVal}`;
|
||||
});
|
||||
|
||||
const changeFile = (index) => {
|
||||
file.value = files.value[index];
|
||||
};
|
||||
|
||||
|
||||
|
||||
const translate = async () => {
|
||||
targetRepository.value = inputValue.value;
|
||||
console.log("translate", targetRepository.value);
|
||||
loading.value = "Translating..." + targetRepository.value;
|
||||
loading.value = true;
|
||||
const response = await fetch(targetRepository.value + ".json");
|
||||
const data = await response.json();
|
||||
files.value = data;
|
||||
file.value = data[0];
|
||||
loading.value = false;
|
||||
fileLoaded.value = true;
|
||||
};
|
||||
|
||||
const fileStatusClass = (item) => {
|
||||
if (!item.doc) {
|
||||
return "not-translated";
|
||||
} else {
|
||||
return "translated";
|
||||
}
|
||||
;
|
||||
};
|
||||
|
||||
|
||||
return {
|
||||
files,
|
||||
file,
|
||||
targetRepository,
|
||||
loading,
|
||||
fileLoaded,
|
||||
inputValue,
|
||||
targetLanguage,
|
||||
targetLanguageTitle,
|
||||
targetLanguageContent,
|
||||
changeFile,
|
||||
translate,
|
||||
fileStatusClass
|
||||
};
|
||||
}
|
||||
}).mount('#app')
|
||||
|
||||
|
||||
|
||||
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
+57
-27
@@ -14,14 +14,6 @@ const octokit = new Octokit({
|
||||
auth: process.env["GITHUB_PERSONAL_ACCESS_TOKEN"]
|
||||
})
|
||||
|
||||
|
||||
|
||||
async function main() {
|
||||
const chatCompletion = await openai.chat.completions.create({
|
||||
messages: [{ role: 'user', content: 'Say this is a test' }],
|
||||
model: 'gpt-3.5-turbo',
|
||||
});
|
||||
}
|
||||
|
||||
async function getLastFileVersion(owner, repoName, path) {
|
||||
let docFiles = [];
|
||||
@@ -75,11 +67,15 @@ async function listDocFiles(files, owner, repoName, path) {
|
||||
async function loadFiles(owner, repoName, files) {
|
||||
|
||||
for (let file of files) {
|
||||
const response = await fetch(file.download_url);
|
||||
const text = await response.text();
|
||||
|
||||
file.raw = text;
|
||||
file.token = encode(text).length;
|
||||
// do not reload file if already loaded
|
||||
if (!file.raw) {
|
||||
const response = await fetch(file.download_url);
|
||||
const text = await response.text();
|
||||
|
||||
file.raw = text;
|
||||
file.token = encode(text).length;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@@ -109,7 +105,7 @@ function parseMdStrToTree(file) {
|
||||
// a function that take a document hierarchical object and return a md file as a string
|
||||
function parseTreeToMdStr(doc, code='') {
|
||||
let str = '';
|
||||
|
||||
console.log(JSON.stringify(doc, null, 2));
|
||||
for (let block of doc) {
|
||||
if (code) {
|
||||
str += '#'.repeat(block.level) + ' ' + block[`title_${code}`][0] + '\n';
|
||||
@@ -132,8 +128,9 @@ function sleep(ms) {
|
||||
async function translateTextTree(textTree, language, temp=0) {
|
||||
|
||||
let text = parseTreeToMdStr(textTree);
|
||||
let messages = [];
|
||||
try {
|
||||
let messages = [
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": `Your are task with translating a technical documentation in ${language}. The user will provide the documentation in a markdown format. Translate it to ${language}. Only output the translated documentation in Markdown, do not add or remove content. Do not try to translate function/api endpoint name, only translate the documentation. If the documentation contain codeblocks, only translate commentaries, do not translate variablenames / function names. Try to output it in a {language} that is easy to read, do not try to translate all expressions verbatim, make it so it feel professional. If {language} usually use the enlish word for a thing, keep it in English. Notes: it's a computer doc, build mean 'compile', watch mean 'looking at file that change', ... Keep the same structure as the original documentation, and retain ALL the links / images.`,
|
||||
@@ -145,11 +142,11 @@ async function translateTextTree(textTree, language, temp=0) {
|
||||
];
|
||||
const chatCompletion = await openai.chat.completions.create({
|
||||
messages: messages,
|
||||
model: 'gpt-3.5-turbo',
|
||||
model: 'gpt-3.5-turbo-1106',
|
||||
temperature: temp
|
||||
});
|
||||
|
||||
console.log(chatCompletion.choices[0].message.content)
|
||||
// console.log(chatCompletion.choices[0].message.content)
|
||||
|
||||
|
||||
let translationTree = parseMdStrToTree(chatCompletion.choices[0].message.content);
|
||||
@@ -194,10 +191,12 @@ async function translateTextTree(textTree, language, temp=0) {
|
||||
});
|
||||
}
|
||||
|
||||
console.log("=======\nRejectd", JSON.stringify(messages, null, 2));
|
||||
|
||||
|
||||
chatCompletion = await openai.chat.completions.create({
|
||||
messages: messages,
|
||||
model: 'gpt-3.5-turbo-16k',
|
||||
model: 'gpt-3.5-turbo-1106',
|
||||
temperature: temp
|
||||
});
|
||||
translationTree = parseMdStrToTree(chatCompletion.choices[0].message.content);
|
||||
@@ -213,6 +212,7 @@ async function translateTextTree(textTree, language, temp=0) {
|
||||
catch (e) {
|
||||
console.log(e);
|
||||
console.log('sleeping');
|
||||
console.log("=======\eerror on ", JSON.stringify(messages, null, 2));
|
||||
await sleep(30000)
|
||||
return translateTextTree(textTree, language, temp);
|
||||
}
|
||||
@@ -243,6 +243,14 @@ async function translateFile(file, language, code) {
|
||||
}
|
||||
|
||||
let translationTree = await translateTextTree(blocks, language, 0);
|
||||
|
||||
if (translationTree.length != blocks.length) {
|
||||
if (!file.translationError) {
|
||||
file.translationError = {};
|
||||
}
|
||||
file.translationError[code] = true;
|
||||
|
||||
}
|
||||
|
||||
for (let j = 0; j < blocks.length; j++) {
|
||||
if (!blocks[j]["title_" + code]) {
|
||||
@@ -251,9 +259,11 @@ async function translateFile(file, language, code) {
|
||||
if (!blocks[j]["content_" + code]) {
|
||||
blocks[j]["content_" + code] = []
|
||||
}
|
||||
blocks[j]["title_" + code].push(translationTree[j].title);
|
||||
blocks[j]["content_" + code].push(translationTree[j].content);
|
||||
|
||||
blocks[j]["title_" + code].push(translationTree[j]?.title || '\n');
|
||||
blocks[j]["content_" + code].push(translationTree[j]?.content || '\n');
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@@ -264,6 +274,7 @@ async function translateFiles(files, language, code, savepath) {
|
||||
for (let file of files) {
|
||||
started_jobs++;
|
||||
let fn = async function() {
|
||||
console.log("translating:", file.name);
|
||||
await translateFile(file, language, code);
|
||||
await fs.writeFile(savepath, JSON.stringify(files, null, 2), 'utf8');
|
||||
finished_jobs++;
|
||||
@@ -281,7 +292,9 @@ async function translateFiles(files, language, code, savepath) {
|
||||
async function correctLinkInFile(file, languageCode, docDir) {
|
||||
for (let block of file.doc) {
|
||||
// find all markdown local link in block.content, BUT NOT IMAGES
|
||||
console.log(block, block[`content_${languageCode}`][0], `content_${languageCode}`);
|
||||
if (!block[`content_${languageCode}`]) {
|
||||
continue
|
||||
}
|
||||
|
||||
let matchesTranslated = block[`content_${languageCode}`][0].match(/\[.*\]\(.*\)/g);
|
||||
|
||||
@@ -333,6 +346,11 @@ async function buildOutputMd(files, owner, repoName, repoDocDir, languageCode) {
|
||||
let targetDir = `${__dirname}/build/${owner}/${repoName}/${repoDocDir}/${languageCode}`;
|
||||
|
||||
for (let file of files) {
|
||||
// check if file is translated in target language
|
||||
if (!file.doc || file.doc[0][`content_${languageCode}`] === undefined) {
|
||||
continue
|
||||
}
|
||||
|
||||
await correctLinkInFile(file, languageCode);
|
||||
|
||||
let translatedMd = parseTreeToMdStr(file.doc, languageCode);
|
||||
@@ -362,7 +380,7 @@ async function printFiles(files, owner, repoName, repoDocDir) {
|
||||
let dir = '';
|
||||
for (let i = 0; i < dirs.length - 1; i++) {
|
||||
dir += dirs[i] + '/';
|
||||
try {
|
||||
try {xw
|
||||
await fs.access(dir);
|
||||
} catch (error) {
|
||||
await fs.mkdir(dir);
|
||||
@@ -376,25 +394,37 @@ async function translateDoc(owner, repoName, repoDocDir, language, code) {
|
||||
|
||||
// const files = await listDocFiles(owner, repoName, repoDocDir);
|
||||
let files = [];
|
||||
let savepath = `${__dirname}/tmp/save-${owner}-${repoName}.json`;
|
||||
let savepath = `${__dirname}/save/${owner}/${repoName}.json`;
|
||||
try {
|
||||
await fs.access(savepath);
|
||||
files = require(savepath)
|
||||
} catch {
|
||||
|
||||
// create the save oath
|
||||
let dirs = savepath.split('/');
|
||||
let dir = '';
|
||||
for (let i = 0; i < dirs.length - 1; i++) {
|
||||
dir += dirs[i] + '/';
|
||||
try {
|
||||
await fs.access(dir);
|
||||
} catch (error) {
|
||||
await fs.mkdir(dir);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
await listDocFiles(files, owner, repoName, repoDocDir);
|
||||
|
||||
await loadFiles(owner, repoName, files);
|
||||
|
||||
await printFiles(files, owner, repoName, repoDocDir);
|
||||
// await printFiles(files, owner, repoName, repoDocDir);
|
||||
|
||||
await translateFiles(files, language, code, savepath);
|
||||
|
||||
await buildOutputMd(files, owner, repoName, repoDocDir, code)
|
||||
// await buildOutputMd(files, owner, repoName, repoDocDir, code)
|
||||
}
|
||||
|
||||
|
||||
// translateDoc('nodejs', 'node', 'doc', 'French', 'fr');
|
||||
translateDoc('run-llama', 'llama_index', 'docs', 'French', 'fr')
|
||||
// translateDoc('run-llama', 'llama_index', 'docs', 'Simplified Chinese(zh_cn)', 'zh_cn')
|
||||
|
||||
module.exports = translateDoc;
|
||||
Reference in New Issue
Block a user