better version

This commit is contained in:
Pierre
2023-12-02 18:58:54 +01:00
parent 1c2939c614
commit c4e865fd1c
4 changed files with 410 additions and 44 deletions
+2 -1
View File
@@ -1,4 +1,5 @@
node_modules
.DS_Store
tmp
build
build
save
+3 -16
View File
@@ -1,24 +1,11 @@
const translate = require('./translate.js');
const express = require('express');
const app = express();
app.get('/', (req, res) => {
res.send('Hello World!');
});
app.get('/translate', (req, res) => {
const { text, lang } = req.query;
translate(text, lang)
.then((result) => {
res.send(result);
})
.catch((err) => {
res.send(err);
});
});
app.use(express.static('public'));
app.use(express.static('save'));
app.listen(process.env.PORT || 3000, (port) => {
console.log(`Example app listening on port ${port}!`);
}
});
+348
View File
@@ -0,0 +1,348 @@
<!DOCTYPE html>
<html lang="en">
<head>
<script src="https://unpkg.com/vue@3/dist/vue.global.js"></script>
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
<style>
body {
margin: 0px;
font-family: 'Roboto', sans-serif;
font-size: 14px;
font-weight: normal;
line-height: 1.5;
color: #333;
background-color: #f4f4f4;
}
#app {
display: flex;
flex-direction: column;
height: 100vh; /* This will make #app take up the full height of the viewport */
}
.navbar {
background-color: #333;
color: white;
padding: 15px;
font-size: 14px;
font-weight: bold;
}
.file-explorer {
display: flex;
flex-direction: row;
justify-content: flex-start;
align-items: flex-start;
align-content: flex-start;
flex-wrap: nowrap;
overflow-y: auto; /* This will add a scrollbar if the content is too long */
flex-grow: 1; /* This will make .file-explorer take up the remaining space in #app */
}
.file-list {
display: flex;
flex-direction: column;
overflow-y: auto;
flex-wrap: wrap;
flex-basis: 300px;
border-right: 1px solid #eee;
}
.file-item {
padding: 4px;
margin: 0px;
cursor: pointer;
}
.file-name {
font-size: 14px;
font-weight: bold;
}
.file-overview {
flex-grow: 1;
flex-shrink: 1;
flex-basis: 0;
display: flex;
flex-direction: column;
flex-wrap: wrap;
}
.file-block {
flex-grow: 1;
flex-shrink: 1;
flex-basis: 0;
display: flex;
flex-direction: row;
flex-wrap: wrap;
justify-content: space-between;
}
.initial, .translation {
flex: 1;
padding: 10px;
box-sizing: border-box;
border: 1px solid #eee;
}
.file-overview .title {
font-size: 14px;
font-weight: bold;
padding : 0px;
margin: 0px;
}
.file-overview .content {
font-size: 12px;
padding : 0px;
margin: 0px;
}
pre {
white-space: pre-wrap; /* CSS 2.1 */
white-space: -moz-pre-wrap; /* For Mozilla */
white-space: -pre-wrap; /* For Opera 4-6 */
white-space: -o-pre-wrap; /* For Opera 7 */
word-wrap: break-word; /* For IE 5.5+ and modern browsers */
overflow-wrap: anywhere; /* This will break the word at any character to prevent overflow */
}
.home-repo {
display: flex;
flex-direction: row;
justify-content: center;
align-items: center;
align-content: center;
flex-wrap: nowrap;
overflow-y: auto; /* This will add a scrollbar if the content is too long */
flex-grow: 1; /* This will make .file-explorer take up the remaining space in #app */
}
.home-repo-input {
padding: 10px;
font-size: 14px;
font-weight: bold;
border: 1px solid #eee;
border-radius: 5px;
margin-right: 10px;
}
.home-repo-button {
padding: 10px;
font-size: 14px;
font-weight: bold;
border: 1px solid #eee;
border-radius: 5px;
background-color: #eee;
cursor: pointer;
}
.home-repo-button:hover {
background-color: #ddd;
}
.home-repo-input:focus {
outline: none;
}
.home-repo-input:hover {
border: 1px solid #ddd;
}
.home-repo-input:active {
border: 1px solid #ddd;
}
.home-repo-language-select {
padding: 10px;
font-size: 14px;
font-weight: bold;
border: 1px solid #eee;
border-radius: 5px;
margin-right: 10px;
}
.home-repo-language-select:focus {
outline: none;
}
.home-repo-language-select:hover {
border: 1px solid #ddd;
}
.home-repo-language-select:active {
border: 1px solid #ddd;
}
.spinner {
border: 16px solid #f3f3f3;
border-top: 16px solid #3498db;
border-radius: 50%;
width: 80px;
height: 80px;
animation: spin 2s linear infinite;
}
@keyframes spin {
0% { transform: rotate(0deg); }
100% { transform: rotate(360deg); }
}
.loading-page {
display: flex;
flex-direction: column;
justify-content: center;
align-items: center;
align-content: center;
flex-wrap: nowrap;
overflow-y: auto;
flex-grow: 1;
background-color: white;
padding: 20px;
border-radius: 10px;
box-shadow: 0px 0px 10px rgba(0, 0, 0, 0.1);
}
.not-translated {
color: rgb(232, 125, 125);
}
</style>
</head>
<body>
<div id="app">
<div v-if="loading" class="loading-page">
<!-- Loading page -->
<div class="spinner"></div>
<div><br/>Loading...</div>
</div>
<div v-else-if="targetRepository">
<div class="navbar">
Translate your doc - {{targetRepository}} -> <select v-model="targetLanguage">
<option value="fr">fr</option>
<option value="zh_cn">zh_cn</option>
</select>
</div>
<div class="file-explorer">
<div class='file-list'>
<div class='file-item' v-for="(item, index) in files" v-if="files">
<div class='file-name' v-on:click="changeFile(index)" :class="fileStatusClass(item)">{{item.path}}</div>
</div>
</div>
<div class='file-overview'>
<div class='file-block' v-for="block in file.doc" v-if="file">
<div class="initial" contenteditable="false">
<pre class="title" v-if="block.title.length"><span v-for="n in block.level" key="n">#</span>{{block.title}}</pre>
<pre class="content" v-if="block.content.length">{{block.content}}</pre>
</div>
<div class="translation" v-if="block[targetLanguageTitle].length || block[targetLanguageContent].length">
<pre class="title" v-if="block[targetLanguageTitle].length"><span v-for="n in block.level" key="n">#</span><span contenteditable>{{block[targetLanguageTitle][0]}}</span></pre>
<pre class="content" v-if="block[targetLanguageContent].length" contenteditable>{{block[targetLanguageContent][0]}}</pre>
</div>
</div>
</div>
</div>
</div>
<div v-else>
<div>
<div class="home-repo">
<h1>Grab a repo from Github, and translate it's docs</h1>
<div>
<h2>Translate repo: </h2>
<input class="home-repo-input" type="text" v-model=inputValue placeholder="user/repo"></input>
<label> to </label>
<select class="home-repo-language-select" v-model="targetLanguage">
<option value="fr">fr</option>
<option value="zh_cn">zh_cn</option>
</select>
<button class="home-repo-button" @click="translate()">Go!</button>
</div>
</div>
</div>
</div>
</div>
<script>
const { createApp, ref, onMounted, watch} = Vue
const app = createApp({
setup() {
const files = ref(null);
const file = ref(null); // Initialize file as null
const targetRepository = ref(""); // "run-llama/llama_index";
const loading = ref(false);
const fileLoaded = ref(false);
const inputValue = ref("");
const targetLanguage = ref("fr");
const targetLanguageTitle = ref("title_fr");
const targetLanguageContent = ref("content_fr");
watch(targetLanguage, (newVal, oldVal) => {
// Update targetLanguageTitle and targetLanguageContent based on newVal
targetLanguageTitle.value = `title_${newVal}`;
targetLanguageContent.value = `content_${newVal}`;
});
const changeFile = (index) => {
file.value = files.value[index];
};
const translate = async () => {
targetRepository.value = inputValue.value;
console.log("translate", targetRepository.value);
loading.value = "Translating..." + targetRepository.value;
loading.value = true;
const response = await fetch(targetRepository.value + ".json");
const data = await response.json();
files.value = data;
file.value = data[0];
loading.value = false;
fileLoaded.value = true;
};
const fileStatusClass = (item) => {
if (!item.doc) {
return "not-translated";
} else {
return "translated";
}
;
};
return {
files,
file,
targetRepository,
loading,
fileLoaded,
inputValue,
targetLanguage,
targetLanguageTitle,
targetLanguageContent,
changeFile,
translate,
fileStatusClass
};
}
}).mount('#app')
</script>
</body>
</html>
+57 -27
View File
@@ -14,14 +14,6 @@ const octokit = new Octokit({
auth: process.env["GITHUB_PERSONAL_ACCESS_TOKEN"]
})
async function main() {
const chatCompletion = await openai.chat.completions.create({
messages: [{ role: 'user', content: 'Say this is a test' }],
model: 'gpt-3.5-turbo',
});
}
async function getLastFileVersion(owner, repoName, path) {
let docFiles = [];
@@ -75,11 +67,15 @@ async function listDocFiles(files, owner, repoName, path) {
async function loadFiles(owner, repoName, files) {
for (let file of files) {
const response = await fetch(file.download_url);
const text = await response.text();
file.raw = text;
file.token = encode(text).length;
// do not reload file if already loaded
if (!file.raw) {
const response = await fetch(file.download_url);
const text = await response.text();
file.raw = text;
file.token = encode(text).length;
}
}
}
@@ -109,7 +105,7 @@ function parseMdStrToTree(file) {
// a function that take a document hierarchical object and return a md file as a string
function parseTreeToMdStr(doc, code='') {
let str = '';
console.log(JSON.stringify(doc, null, 2));
for (let block of doc) {
if (code) {
str += '#'.repeat(block.level) + ' ' + block[`title_${code}`][0] + '\n';
@@ -132,8 +128,9 @@ function sleep(ms) {
async function translateTextTree(textTree, language, temp=0) {
let text = parseTreeToMdStr(textTree);
let messages = [];
try {
let messages = [
messages = [
{
"role": "system",
"content": `Your are task with translating a technical documentation in ${language}. The user will provide the documentation in a markdown format. Translate it to ${language}. Only output the translated documentation in Markdown, do not add or remove content. Do not try to translate function/api endpoint name, only translate the documentation. If the documentation contain codeblocks, only translate commentaries, do not translate variablenames / function names. Try to output it in a {language} that is easy to read, do not try to translate all expressions verbatim, make it so it feel professional. If {language} usually use the enlish word for a thing, keep it in English. Notes: it's a computer doc, build mean 'compile', watch mean 'looking at file that change', ... Keep the same structure as the original documentation, and retain ALL the links / images.`,
@@ -145,11 +142,11 @@ async function translateTextTree(textTree, language, temp=0) {
];
const chatCompletion = await openai.chat.completions.create({
messages: messages,
model: 'gpt-3.5-turbo',
model: 'gpt-3.5-turbo-1106',
temperature: temp
});
console.log(chatCompletion.choices[0].message.content)
// console.log(chatCompletion.choices[0].message.content)
let translationTree = parseMdStrToTree(chatCompletion.choices[0].message.content);
@@ -194,10 +191,12 @@ async function translateTextTree(textTree, language, temp=0) {
});
}
console.log("=======\nRejectd", JSON.stringify(messages, null, 2));
chatCompletion = await openai.chat.completions.create({
messages: messages,
model: 'gpt-3.5-turbo-16k',
model: 'gpt-3.5-turbo-1106',
temperature: temp
});
translationTree = parseMdStrToTree(chatCompletion.choices[0].message.content);
@@ -213,6 +212,7 @@ async function translateTextTree(textTree, language, temp=0) {
catch (e) {
console.log(e);
console.log('sleeping');
console.log("=======\eerror on ", JSON.stringify(messages, null, 2));
await sleep(30000)
return translateTextTree(textTree, language, temp);
}
@@ -243,6 +243,14 @@ async function translateFile(file, language, code) {
}
let translationTree = await translateTextTree(blocks, language, 0);
if (translationTree.length != blocks.length) {
if (!file.translationError) {
file.translationError = {};
}
file.translationError[code] = true;
}
for (let j = 0; j < blocks.length; j++) {
if (!blocks[j]["title_" + code]) {
@@ -251,9 +259,11 @@ async function translateFile(file, language, code) {
if (!blocks[j]["content_" + code]) {
blocks[j]["content_" + code] = []
}
blocks[j]["title_" + code].push(translationTree[j].title);
blocks[j]["content_" + code].push(translationTree[j].content);
blocks[j]["title_" + code].push(translationTree[j]?.title || '\n');
blocks[j]["content_" + code].push(translationTree[j]?.content || '\n');
}
}
}
@@ -264,6 +274,7 @@ async function translateFiles(files, language, code, savepath) {
for (let file of files) {
started_jobs++;
let fn = async function() {
console.log("translating:", file.name);
await translateFile(file, language, code);
await fs.writeFile(savepath, JSON.stringify(files, null, 2), 'utf8');
finished_jobs++;
@@ -281,7 +292,9 @@ async function translateFiles(files, language, code, savepath) {
async function correctLinkInFile(file, languageCode, docDir) {
for (let block of file.doc) {
// find all markdown local link in block.content, BUT NOT IMAGES
console.log(block, block[`content_${languageCode}`][0], `content_${languageCode}`);
if (!block[`content_${languageCode}`]) {
continue
}
let matchesTranslated = block[`content_${languageCode}`][0].match(/\[.*\]\(.*\)/g);
@@ -333,6 +346,11 @@ async function buildOutputMd(files, owner, repoName, repoDocDir, languageCode) {
let targetDir = `${__dirname}/build/${owner}/${repoName}/${repoDocDir}/${languageCode}`;
for (let file of files) {
// check if file is translated in target language
if (!file.doc || file.doc[0][`content_${languageCode}`] === undefined) {
continue
}
await correctLinkInFile(file, languageCode);
let translatedMd = parseTreeToMdStr(file.doc, languageCode);
@@ -362,7 +380,7 @@ async function printFiles(files, owner, repoName, repoDocDir) {
let dir = '';
for (let i = 0; i < dirs.length - 1; i++) {
dir += dirs[i] + '/';
try {
try {xw
await fs.access(dir);
} catch (error) {
await fs.mkdir(dir);
@@ -376,25 +394,37 @@ async function translateDoc(owner, repoName, repoDocDir, language, code) {
// const files = await listDocFiles(owner, repoName, repoDocDir);
let files = [];
let savepath = `${__dirname}/tmp/save-${owner}-${repoName}.json`;
let savepath = `${__dirname}/save/${owner}/${repoName}.json`;
try {
await fs.access(savepath);
files = require(savepath)
} catch {
// create the save oath
let dirs = savepath.split('/');
let dir = '';
for (let i = 0; i < dirs.length - 1; i++) {
dir += dirs[i] + '/';
try {
await fs.access(dir);
} catch (error) {
await fs.mkdir(dir);
}
}
}
await listDocFiles(files, owner, repoName, repoDocDir);
await loadFiles(owner, repoName, files);
await printFiles(files, owner, repoName, repoDocDir);
// await printFiles(files, owner, repoName, repoDocDir);
await translateFiles(files, language, code, savepath);
await buildOutputMd(files, owner, repoName, repoDocDir, code)
// await buildOutputMd(files, owner, repoName, repoDocDir, code)
}
// translateDoc('nodejs', 'node', 'doc', 'French', 'fr');
translateDoc('run-llama', 'llama_index', 'docs', 'French', 'fr')
// translateDoc('run-llama', 'llama_index', 'docs', 'Simplified Chinese(zh_cn)', 'zh_cn')
module.exports = translateDoc;