Add RAGFlow tarball build workflow and installation documentation

This commit is contained in:
John Doe
2026-03-14 20:16:04 -04:00
parent 0ae657e07c
commit a29258f432
2 changed files with 755 additions and 0 deletions
+312
View File
@@ -0,0 +1,312 @@
name: Build RAGFlow Tarball
on:
release:
types: [published]
workflow_dispatch:
inputs:
use_china_mirrors:
description: 'Use China mirrors for downloads'
required: false
default: false
type: boolean
env:
PYTHON_VERSION: '3.12'
NODE_VERSION: '20'
UV_VERSION: '0.9.16'
jobs:
build:
strategy:
fail-fast: false
matrix:
include:
- arch: amd64
runner: ubuntu-latest
uv_arch: x86_64
- arch: arm64
runner: ubuntu-24.04-arm
uv_arch: aarch64
runs-on: ${{ matrix.runner }}
steps:
- name: Checkout RAGFlow repository
uses: actions/checkout@v4
with:
repository: infiniflow/ragflow
submodules: true
fetch-depth: 0
- name: Get version
id: version
run: |
VERSION=$(git describe --tags --match=v* --first-parent --always)
VERSION="${VERSION#v}"
if [[ "${{ github.event_name }}" == "release" ]]; then
VERSION="${{ github.event.release.tag_name }}"
VERSION="${VERSION#v}"
fi
echo "version=$VERSION" >> $GITHUB_OUTPUT
- name: Install system dependencies
run: |
sudo apt-get update
sudo apt-get install -y \
libglib2.0-0 libglx-mesa0 libgl1 \
pkg-config libicu-dev libgdiplus \
default-jdk \
libatk-bridge2.0-0 \
libpython3-dev libgtk-4-1 libnss3 xdg-utils libgbm-dev \
libjemalloc-dev \
gnupg unzip curl wget git vim less \
ghostscript pandoc texlive \
fonts-freefont-ttf fonts-noto-cjk \
postgresql-client \
unixodbc-dev
- name: Install NGINX
run: |
sudo mkdir -p /etc/apt/keyrings
curl -fsSL https://nginx.org/keys/nginx_signing.key | gpg --dearmor -o /etc/apt/keyrings/nginx-archive-keyring.gpg
echo "deb [signed-by=/etc/apt/keyrings/nginx-archive-keyring.gpg] https://nginx.org/packages/mainline/ubuntu/ noble nginx" | sudo tee /etc/apt/sources.list.d/nginx.list
sudo apt-get update
sudo apt-get install -y nginx
sudo apt-mark hold nginx
- name: Install MS SQL ODBC driver
run: |
curl https://packages.microsoft.com/keys/microsoft.asc | sudo apt-key add -
curl https://packages.microsoft.com/config/ubuntu/22.04/prod.list | sudo tee /etc/apt/sources.list.d/mssql-release.list
sudo apt-get update
if [[ "${{ matrix.arch }}" == "arm64" ]]; then
ACCEPT_EULA=Y sudo apt-get install -y msodbcsql18
else
ACCEPT_EULA=Y sudo apt-get install -y msodbcsql17
fi
- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Install uv
run: |
curl -LsSf "https://github.com/astral-sh/uv/releases/download/${{ env.UV_VERSION }}/uv-${{ matrix.uv_arch }}-unknown-linux-gnu.tar.gz" | tar xz
sudo cp "uv-${{ matrix.uv_arch }}-unknown-linux-gnu/uv" /usr/local/bin/
sudo cp "uv-${{ matrix.uv_arch }}-unknown-linux-gnu/uvx" /usr/local/bin/ 2>/dev/null || true
rm -rf "uv-${{ matrix.uv_arch }}-unknown-linux-gnu"
- name: Setup Node.js
uses: actions/setup-node@v4
with:
node-version: ${{ env.NODE_VERSION }}
- name: Setup Rust toolchain
uses: dtolnay/rust-toolchain@stable
- name: Cache Rust cargo
uses: actions/cache@v4
with:
path: |
~/.cargo/registry
~/.cargo/git
key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
restore-keys: |
${{ runner.os }}-cargo-
- name: Download dependencies
run: |
if [[ "${{ inputs.use_china_mirrors }}" == "true" ]]; then
python download_deps.py --china-mirrors
else
python download_deps.py
fi
env:
HF_ENDPOINT: ${{ inputs.use_china_mirrors == true && 'https://hf-mirror.com' || '' }}
- name: Install Python dependencies
run: |
uv sync --python ${{ env.PYTHON_VERSION }} --frozen
- name: Build frontend
run: |
cd web
npm install
npm run build
- name: Create version file
run: |
VERSION_INFO=$(git describe --tags --match=v* --first-parent --always)
echo "RAGFlow version: $VERSION_INFO"
echo "$VERSION_INFO" > VERSION
- name: Prepare tarball contents
run: |
mkdir -p staging/ragflow
# Copy application code
cp -r web staging/ragflow/
cp -r admin staging/ragflow/
cp -r api staging/ragflow/
cp -r conf staging/ragflow/
cp -r deepdoc staging/ragflow/
cp -r rag staging/ragflow/
cp -r agent staging/ragflow/
cp -r mcp staging/ragflow/
cp -r common staging/ragflow/
cp -r memory staging/ragflow/
cp -r bin staging/ragflow/
cp pyproject.toml uv.lock staging/ragflow/
cp VERSION staging/ragflow/
# Copy Python virtual environment
cp -r .venv staging/ragflow/
# Copy docker configuration
cp docker/service_conf.yaml.template staging/ragflow/conf/
cp docker/entrypoint.sh staging/ragflow/
chmod +x staging/ragflow/entrypoint.sh
# Copy built frontend
cp -r web/dist staging/ragflow/web/
# Copy downloaded models and resources
mkdir -p staging/ragflow/rag/res/deepdoc
cp -r huggingface.co/InfiniFlow/text_concat_xgb_v1.0 staging/ragflow/rag/res/deepdoc/ 2>/dev/null || true
cp -r huggingface.co/InfiniFlow/deepdoc staging/ragflow/rag/res/deepdoc/ 2>/dev/null || true
# Copy NLTK data
cp -r nltk_data staging/ragflow/
# Copy tika and tiktoken files
cp tika-server-standard-3.2.3.jar staging/ragflow/ 2>/dev/null || true
cp tika-server-standard-3.2.3.jar.md5 staging/ragflow/ 2>/dev/null || true
cp cl100k_base.tiktoken staging/ragflow/9b5ad71b2ce5302211f9c61530b329a4922fc6a4 2>/dev/null || true
# Copy Chrome and ChromeDriver
mkdir -p staging/ragflow/opt/chrome
cp -r chrome-linux64/* staging/ragflow/opt/chrome/ 2>/dev/null || true
cp chromedriver staging/ragflow/usr/local/bin/ 2>/dev/null || true
# Create launch script
cat > staging/ragflow/launch.sh << 'LAUNCH_EOF'
#!/bin/bash
# RAGFlow Server Launch Script for Tarball Installation
# This file starts up the RAGFlow server
set -e
echo "[RAGFlow] Starting server..."
# Required environment variables
: "\${MYSQL_HOST:?MYSQL_HOST environment variable is required}"
: "\${MYSQL_PORT:?MYSQL_PORT environment variable is required}"
: "\${MYSQL_USER:?MYSQL_USER environment variable is required}"
: "\${MYSQL_PASSWORD:?MYSQL_PASSWORD environment variable is required}"
: "\${MINIO_HOST:?MINIO_HOST environment variable is required}"
: "\${MINIO_PORT:?MINIO_PORT environment variable is required}"
: "\${MINIO_USER:?MINIO_USER environment variable is required}"
: "\${MINIO_PASSWORD:?MINIO_PASSWORD environment variable is required}"
: "\${REDIS_HOST:?REDIS_HOST environment variable is required}"
: "\${REDIS_PORT:?REDIS_PORT environment variable is required}"
: "\${REDIS_PASSWORD:?REDIS_PASSWORD environment variable is required}"
# Set defaults for optional variables
export TIKA_SERVER_JAR="\${TIKA_SERVER_JAR:-file://$(pwd)/tika-server-standard-3.2.3.jar}"
export PYTHONPATH="\${PYTHONPATH:-$(pwd)}"
export PATH="$(pwd)/.venv/bin:\${PATH}"
# Activate virtual environment
source .venv/bin/activate
# Set jemalloc path for better memory management
JEMALLOC_PATH=$(pkg-config --variable=libdir jemalloc)/libjemalloc.so
export LD_PRELOAD="\${JEMALLOC_PATH}"
# Start backend services
echo "[RAGFlow] Starting task executor..."
python rag/svr/task_executor.py 1 &
echo "[RAGFlow] Starting RAGFlow server..."
python api/ragflow_server.py
LAUNCH_EOF
chmod +x staging/ragflow/launch.sh
# Create README
cat > staging/ragflow/README.txt << 'README_EOF'
RAGFlow Server - Tarball Installation
=====================================
Prerequisites:
- Python 3.12 or later
- MySQL 8.0+ (or MariaDB)
- MinIO (object storage)
- Redis/Valkey
- Elasticsearch 8.x or OpenSearch (optional, for search capabilities)
- NGINX (for reverse proxy)
Required Environment Variables:
- MYSQL_HOST: MySQL server host
- MYSQL_PORT: MySQL server port (default: 5455)
- MYSQL_USER: MySQL username
- MYSQL_PASSWORD: MySQL password
- MINIO_HOST: MinIO server host
- MINIO_PORT: MinIO server port (default: 9000)
- MINIO_USER: MinIO access key
- MINIO_PASSWORD: MinIO secret key
- REDIS_HOST: Redis server host
- REDIS_PORT: Redis server port (default: 6379)
- REDIS_PASSWORD: Redis password
Optional Environment Variables:
- TIKA_SERVER_JAR: Path to Tika server JAR (default: bundled)
- HF_ENDPOINT: HuggingFace mirror endpoint
- PYTHONPATH: Python module search path (default: current directory)
Quick Start:
1. Install dependencies: MySQL, MinIO, Redis
2. Set environment variables (see above)
3. Run: ./launch.sh
For more information, visit: https://ragflow.io/docs/launch_ragflow_from_source
README_EOF
- name: Create tarball
run: |
cd staging
tar -czvf ../ragflow-${{ steps.version.outputs.version }}-${{ matrix.arch }}.tar.gz ragflow
cd ..
echo "Tarball created: ragflow-${{ steps.version.outputs.version }}-${{ matrix.arch }}.tar.gz"
ls -la ragflow-*.tar.gz
- name: Upload artifact
uses: actions/upload-artifact@v4
with:
name: ragflow-${{ matrix.arch }}
path: ragflow-${{ steps.version.outputs.version }}-${{ matrix.arch }}.tar.gz
retention-days: 7
release:
needs: build
runs-on: ubuntu-latest
if: github.event_name == 'release'
permissions:
contents: write
steps:
- name: Download all artifacts
uses: actions/download-artifact@v4
with:
path: artifacts
merge-multiple: true
- name: List artifacts
run: ls -la artifacts/
- name: Upload tarballs to release
uses: softprops/action-gh-release@v2
with:
files: artifacts/*.tar.gz
+443
View File
@@ -0,0 +1,443 @@
# RAGFlow Tarball Installation Guide
This guide explains how to build and install RAGFlow from a pre-built tarball archive.
## Overview
The tarball build process creates self-contained archives for RAGFlow that include:
- Pre-compiled Python virtual environment with all dependencies
- Pre-built frontend (web application)
- Required models and resources (HuggingFace models, NLTK data, Tika, etc.)
- Chrome/ChromeDriver for document processing
- Launch scripts for easy deployment
**This is a Docker-free installation method.** RAGFlow runs directly on your host system without containerization.
## Building the Tarball
### Prerequisites for Building
| Requirement | Version | Notes |
|-------------|---------|-------|
| Ubuntu | 24.04+ | Target platform |
| Python | 3.12 | Required for uv package manager |
| Node.js | 20.x | For frontend build |
| Rust | Stable | For C extensions |
**Note:** Docker is NOT required. The build process runs directly on the host system.
### Automated Build (GitHub Actions)
The repository includes a GitHub Actions workflow (`.github/workflows/build-ragflow-tarball.yml`) that automatically builds tarballs for both AMD64 and ARM64 architectures.
#### Triggering a Build
1. **Release Build**: Create a new release in GitHub - tarballs are automatically uploaded to the release
2. **Manual Build**: Use the "workflow_dispatch" trigger in GitHub Actions UI
#### Build Configuration Options
| Input | Description | Default |
|-------|-------------|---------|
| `use_china_mirrors` | Use China-accessible mirrors for downloads | `false` |
### Manual Build Process
If you need to build manually outside of GitHub Actions:
```bash
# 1. Clone the repository
git clone https://github.com/infiniflow/ragflow.git
cd ragflow
# 2. Install system dependencies
sudo apt-get update
sudo apt-get install -y \
libglib2.0-0 libglx-mesa0 libgl1 \
pkg-config libicu-dev libgdiplus \
default-jdk \
libatk-bridge2.0-0 \
libpython3-dev libgtk-4-1 libnss3 xdg-utils libgbm-dev \
libjemalloc-dev \
gnupg unzip curl wget git vim less \
ghostscript pandoc texlive \
fonts-freefont-ttf fonts-noto-cjk \
postgresql-client \
unixodbc-dev
# 3. Install NGINX
curl -fsSL https://nginx.org/keys/nginx_signing.key | gpg --dearmor -o /etc/apt/keyrings/nginx-archive-keyring.gpg
echo "deb [signed-by=/etc/apt/keyrings/nginx-archive-keyring.gpg] https://nginx.org/packages/mainline/ubuntu/ noble nginx" | sudo tee /etc/apt/sources.list.d/nginx.list
sudo apt-get update
sudo apt-get install -y nginx
# 4. Install uv package manager
curl -LsSf https://github.com/astral-sh/uv/releases/download/0.9.16/uv-x86_64-unknown-linux-gnu.tar.gz | tar xz
sudo cp uv-x86_64-unknown-linux-gnu/uv /usr/local/bin/
# 5. Download dependencies (models, Chrome, Tika, etc.)
python download_deps.py
# 6. Install Python dependencies
uv sync --python 3.12 --frozen
# 7. Build frontend
cd web
npm install
npm run build
cd ..
# 8. Create version file
git describe --tags --match=v* --first-parent --always > VERSION
# 9. Create tarball staging directory
mkdir -p staging/ragflow
cp -r web admin api conf deepdoc rag agent mcp common memory bin staging/ragflow/
cp -r .venv staging/ragflow/
cp pyproject.toml uv.lock VERSION staging/ragflow/
cp docker/service_conf.yaml.template staging/ragflow/conf/
cp docker/entrypoint.sh staging/ragflow/
chmod +x staging/ragflow/entrypoint.sh
# 10. Copy resources
cp -r huggingface.co staging/ragflow/
cp -r nltk_data staging/ragflow/
cp tika-server-standard-3.2.3.jar staging/ragflow/
cp cl100k_base.tiktoken staging/ragflow/
# 11. Create tarball
cd staging
tar -czvf ../ragflow-$(cat ../VERSION)-$(uname -m).tar.gz ragflow
```
## Installation from Tarball
### Prerequisites
| Requirement | Version | Notes |
|-------------|---------|-------|
| MySQL | 8.0+ | Or MariaDB 10.5+ |
| MinIO | Latest | Object storage |
| Redis/Valkey | 7.x+ | Caching layer |
| Elasticsearch | 8.x | Optional, for search |
| NGINX | Latest | Reverse proxy |
**No Docker required!** All services can be installed directly on the host or accessed from remote servers.
### Step 1: Download and Extract
```bash
# Create installation directory
sudo mkdir -p /opt/ragflow
cd /opt/ragflow
# Download tarball (replace VERSION and ARCH)
wget https://github.com/your-org/ragflow/releases/download/vVERSION/ragflow-VERSION-ARCH.tar.gz
# Extract
sudo tar -xzvf ragflow-VERSION-ARCH.tar.gz
```
### Step 2: Set Up External Services
You can install these services directly on your host system OR use remote/managed services. Docker examples are provided for convenience only.
#### MySQL
**Option A: Install directly on host (recommended for production)**
```bash
# Ubuntu/Debian
sudo apt-get install mysql-server
# Create database and user
sudo mysql
CREATE USER 'ragflow'@'localhost' IDENTIFIED BY 'your-secure-password';
CREATE DATABASE ragflow CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;
GRANT ALL PRIVILEGES ON ragflow.* TO 'ragflow'@'localhost';
FLUSH PRIVILEGES;
```
**Option B: Use remote/managed MySQL**
- AWS RDS, Google Cloud SQL, Azure Database for MySQL
- Self-hosted MySQL on another server
**Option C: Docker (development only)**
```bash
docker run -d --name mysql -p 3306:3306 \
-e MYSQL_ROOT_PASSWORD=your-secure-password \
-e MYSQL_DATABASE=ragflow \
mysql:8.0
```
#### MinIO
**Option A: Install directly on host**
```bash
# Download MinIO binary
wget https://dl.min.io/server/minio/release/linux-amd64/minio
chmod +x minio
sudo mv minio /usr/local/bin/
# Create data directory
sudo mkdir -p /var/lib/minio
# Start MinIO (create systemd service for production)
MINIO_ROOT_USER=ragflow MINIO_ROOT_PASSWORD=your-secure-password \
minio server /var/lib/minio --console-address ":9001"
```
**Option B: Use remote/managed object storage**
- AWS S3 (with S3-compatible API)
- Google Cloud Storage
- Azure Blob Storage
- Self-hosted MinIO on another server
**Option C: Docker (development only)**
```bash
docker run -d --name minio -p 9000:9000 -p 9001:9001 \
-v /var/lib/minio:/data \
-e MINIO_ROOT_USER=ragflow \
-e MINIO_ROOT_PASSWORD=your-secure-password \
quay.io/minio/minio server /data --console-address ":9001"
```
#### Redis
**Option A: Install directly on host (recommended)**
```bash
# Ubuntu/Debian
sudo apt-get install redis-server
# Configure password
sudo sed -i 's/# requirepass foobared/requirepass your-secure-password/' /etc/redis/redis.conf
sudo sed -i 's/maxmemory.*/maxmemory 128mb/' /etc/redis/redis.conf
sudo sed -i 's/# maxmemory-policy.*/maxmemory-policy allkeys-lru/' /etc/redis/redis.conf
# Restart Redis
sudo systemctl restart redis
```
**Option B: Use remote/managed Redis**
- AWS ElastiCache
- Google Cloud Memorystore
- Azure Cache for Redis
- Self-hosted Redis on another server
**Option C: Docker (development only)**
```bash
docker run -d --name redis -p 6379:6379 \
valkey/valkey:8 \
redis-server --requirepass your-secure-password --maxmemory 128mb --maxmemory-policy allkeys-lru
```
### Step 3: Configure Environment
Create an environment file:
```bash
cat > /opt/ragflow/.env << 'EOF'
# MySQL Configuration
MYSQL_HOST=localhost
MYSQL_PORT=3306
MYSQL_USER=ragflow
MYSQL_PASSWORD=your-secure-password
# MinIO Configuration
MINIO_HOST=localhost
MINIO_PORT=9000
MINIO_USER=ragflow
MINIO_PASSWORD=your-secure-password
# Redis Configuration
REDIS_HOST=localhost
REDIS_PORT=6379
REDIS_PASSWORD=your-secure-password
# Optional: Elasticsearch (if using)
ES_HOST=localhost
ES_PORT=9200
ES_PASSWORD=your-elastic-password
# Optional: HuggingFace mirror (China users)
# HF_ENDPOINT=https://hf-mirror.com
EOF
```
### Step 4: Configure RAGFlow
Edit the configuration file:
```bash
cp /opt/ragflow/conf/service_conf.yaml.template /opt/ragflow/conf/service_conf.yaml
vim /opt/ragflow/conf/service_conf.yaml
```
Update the following settings:
```yaml
# Database
mysql:
host: ${MYSQL_HOST}
port: ${MYSQL_PORT}
user: ${MYSQL_USER}
password: ${MYSQL_PASSWORD}
database: ragflow
# Object Storage
minio:
host: ${MINIO_HOST}
port: ${MINIO_PORT}
access_key: ${MINIO_USER}
secret_key: ${MINIO_PASSWORD}
# Redis
redis:
host: ${REDIS_HOST}
port: ${REDIS_PORT}
password: ${REDIS_PASSWORD}
```
### Step 5: Run RAGFlow
```bash
cd /opt/ragflow
source .env
./launch.sh
```
## Running as a Systemd Service
Create a systemd service file:
```bash
sudo cat > /etc/systemd/system/ragflow.service << 'EOF'
[Unit]
Description=RAGFlow Server
After=network.target mysql.service redis.service
[Service]
Type=simple
User=ragflow
WorkingDirectory=/opt/ragflow
EnvironmentFile=/opt/ragflow/.env
ExecStart=/opt/ragflow/launch.sh
Restart=on-failure
RestartSec=10
[Install]
WantedBy=multi-user.target
EOF
sudo systemctl daemon-reload
sudo systemctl enable ragflow
sudo systemctl start ragflow
```
## Architecture Support
The build process supports two architectures:
| Architecture | Runner | Notes |
|-------------|--------|-------|
| AMD64 | `ubuntu-latest` | Standard x86_64 servers |
| ARM64 | `ubuntu-24.04-arm` | ARM servers, Raspberry Pi, AWS Graviton |
## Troubleshooting
### Database Connection Issues
```bash
# Test MySQL connection
mysql -h $MYSQL_HOST -P $MYSQL_PORT -u $MYSQL_USER -p$MYSQL_PASSWORD -e "SELECT 1"
# Check MySQL logs (if installed locally)
sudo journalctl -u mysql
```
### MinIO Connection Issues
```bash
# Test MinIO connection
curl -I http://$MINIO_HOST:$MINIO_PORT/minio/health/live
# Check MinIO logs (if installed locally)
sudo journalctl -u minio
```
### Redis Connection Issues
```bash
# Test Redis connection
redis-cli -h $REDIS_HOST -p $REDIS_PORT -a $REDIS_PASSWORD ping
# Check Redis logs (if installed locally)
sudo journalctl -u redis
```
### Chrome/ChromeDriver Issues
The tarball includes Chrome and ChromeDriver for document processing. If you encounter issues:
```bash
# Verify Chrome is accessible
ls -la /opt/ragflow/opt/chrome/
# Verify ChromeDriver
ls -la /opt/ragflow/usr/local/bin/chromedriver
```
### Memory Issues
RAGFlow uses jemalloc for memory management. If you see memory-related errors:
```bash
# Verify jemalloc is installed
pkg-config --variable=libdir jemalloc
# Check available memory
free -h
```
## Environment Variables Reference
### Required Variables
| Variable | Description | Example |
|----------|-------------|---------|
| `MYSQL_HOST` | MySQL server host | `localhost` |
| `MYSQL_PORT` | MySQL server port | `3306` |
| `MYSQL_USER` | MySQL username | `ragflow` |
| `MYSQL_PASSWORD` | MySQL password | `secure-password` |
| `MINIO_HOST` | MinIO server host | `localhost` |
| `MINIO_PORT` | MinIO server port | `9000` |
| `MINIO_USER` | MinIO access key | `ragflow` |
| `MINIO_PASSWORD` | MinIO secret key | `secure-password` |
| `REDIS_HOST` | Redis server host | `localhost` |
| `REDIS_PORT` | Redis server port | `6379` |
| `REDIS_PASSWORD` | Redis password | `secure-password` |
### Optional Variables
| Variable | Description | Default |
|----------|-------------|---------|
| `TIKA_SERVER_JAR` | Path to Tika JAR | Bundled |
| `HF_ENDPOINT` | HuggingFace mirror | `https://huggingface.co` |
| `PYTHONPATH` | Python module path | `/opt/ragflow` |
## Why Tarball Instead of Docker?
| Aspect | Tarball Installation | Docker Installation |
|--------|---------------------|---------------------|
| Performance | Native performance | Slight overhead |
| Resource usage | Direct system access | Container isolation overhead |
| Deployment | Single tarball extraction | Multiple containers needed |
| Debugging | Direct access to processes | Container debugging required |
| Integration | Easy integration with host services | Network/port mapping needed |
| Updates | Replace tarball directory | Rebuild/restart containers |
| Best for | Production servers, bare metal | Development, testing, microservices |
## Additional Resources
- [RAGFlow Documentation](https://ragflow.io/docs/)
- [Launch from Source Guide](https://ragflow.io/docs/launch_ragflow_from_source)
- [GitHub Repository](https://github.com/infiniflow/ragflow)