Macmini m4 搭建AI环境实现本地全自动 AI 视频工厂系统架构

Macmini m4 搭建AI环境实现本地全自动 AI 视频工厂系统架构

Macmini m4 搭建AI环境实现本地全自动 AI 视频工厂系统架构

#AI,#brew,#FFmpeg,#huggingface.co,#Macos,#Miniforge,#ollama,#python,#SadTalker,#Shell,#stable-diffusion-webui,#TTS

在家里找不到工作的日子不安与痛苦,我想赚钱,就想着能不能批量化构建制造视频流
所以就有了这次实践,希望这个流可以为我赚钱吧。😭

参考

brew mac工具包
ollama 文本生成
Miniforge python隔离环境
SD 头像
TTS 配音
SadTalker 嘴形
FFmpeg 合成
v1-5-pruned-emaonly.safetensors 模型
Realistic_Vision_V6.0_NV_B1.safetensors 模型
anything-v5.safetensors 模型

批量创造思路

┌────────────┐     ┌────────────┐     ┌────────────┐     ┌────────────┐     ┌──────────────┐     ┌──────────────┐ 
│ 文本生成     │ →  │ 人脸生成     │ →   │ 配音生成    │ →   │ 嘴型合成    │  →  │ 音乐合成(失败) │  →  │ 合成输出       │ 
│ GPT 模型    │     │ SD 头像     │    │ TTS        │     │ SadTalker  │     │ ?            │     │ FFmpeg + 发布 │ 
└────────────┘     └────────────┘     └────────────┘     └────────────┘     └──────────────┘     └──────────────┘ 

Shell 终端操作安装 brew 环境

/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
echo 'eval "$(/opt/homebrew/bin/brew shellenv)"' >> ~/.zshrc

Shell 终端操作安装 Miniforge 环境

# GitHub 项目 URI
URI="conda-forge/miniforge"

# 获取最新版本
VERSION=$(curl -sL "https://github.com/$URI/releases" | grep -Eo '/releases/tag/[^"]+' | awk -F'/tag/' '{print $2}' | head -n 1)
echo "Latest version: $VERSION"

# 获取操作系统和架构信息
OS=$(uname -s)
ARCH=$(uname -m)

# 映射平台到官方命名
case "$OS" in
  Linux)
    PLATFORM="Linux"
    if [[ "$ARCH" == "arm64" || "$ARCH" == "aarch64" ]]; then
      ARCH="aarch64"
    elif [[ "$ARCH" == "x86_64" ]]; then
      ARCH="x86_64"
    else
      echo "Unsupported architecture: $ARCH"
      #exit 1
    fi
    ;;
  Darwin)
    PLATFORM="MacOSX"
    if [[ "$ARCH" == "arm64" || "$ARCH" == "aarch64" ]]; then
      ARCH="arm64"
    elif [[ "$ARCH" == "x86_64" ]]; then
      ARCH="x86_64"
    else
      echo "Unsupported architecture: $ARCH"
      #exit 1
    fi
    ;;
  *)
    echo "Unsupported OS: $OS"
    #exit 1
    ;;
esac

# 输出最终平台和架构
echo "Platform: $PLATFORM"
echo "Architecture: $ARCH"

# 拼接下载链接和校验码链接
TARGET_FILE="Miniforge3-$VERSION-$PLATFORM-$ARCH.sh"
SHA256_FILE="$TARGET_FILE.sha256"
URI_DOWNLOAD="https://github.com/$URI/releases/download/$VERSION/$TARGET_FILE"
URI_SHA256="https://github.com/$URI/releases/download/$VERSION/$SHA256_FILE"
echo "Download URL: $URI_DOWNLOAD"
echo "SHA256 URL: $URI_SHA256"

# 检查文件是否存在
if [[ -f "/tmp/$TARGET_FILE" ]]; then
  echo "File already exists: /tmp/$TARGET_FILE"
  
  # 删除旧的 SHA256 文件(如果存在)
  if [[ -f "/tmp/$SHA256_FILE" ]]; then
    echo "Removing old SHA256 file: /tmp/$SHA256_FILE"
    rm -fv "/tmp/$SHA256_FILE"
  fi

  # 下载新的 SHA256 文件
  echo "Downloading SHA256 file..."
  curl -L -C - --retry 3 --retry-delay 5 --progress-bar -o "/tmp/$SHA256_FILE" "$URI_SHA256"

  # 校验文件完整性
  # shasum 校验依赖 perl 可能 linux 系统需要手动安装
  echo "Verifying file integrity for /tmp/$TARGET_FILE..."
  cd /tmp || exit 1
  if ! shasum -a 256 -c "$SHA256_FILE"; then
    echo "SHA256 checksum failed. Removing file and retrying..."
    rm -fv "/tmp/$TARGET_FILE"
  else
    echo "File integrity verified successfully."
  fi
fi

# 如果文件不存在或之前校验失败
if [[ ! -f "/tmp/$TARGET_FILE" ]]; then
  echo "Downloading file..."
  curl -L -C - --retry 3 --retry-delay 5 --progress-bar -o "/tmp/$TARGET_FILE" "$URI_DOWNLOAD"

  # 删除旧的 SHA256 文件并重新下载
  if [[ -f "/tmp/$SHA256_FILE" ]]; then
    echo "Removing old SHA256 file: /tmp/$SHA256_FILE"
    rm -fv "/tmp/$SHA256_FILE"
  fi
  echo "Downloading SHA256 file..."
  curl -L --progress-bar -o "/tmp/$SHA256_FILE" "$URI_SHA256"

  # 校验完整性
  # shasum 校验依赖 perl 可能 linux 系统需要手动安装
  echo "Verifying file integrity for /tmp/$TARGET_FILE..."
  cd /tmp || exit 1
  if ! shasum -a 256 -c "$SHA256_FILE"; then
    echo "Download failed: SHA256 checksum does not match."
    #exit 1
  else
    echo "File integrity verified successfully."
  fi
fi

# 创建安装目录
echo "Installing Miniforge..."
sudo mkdir -pv /opt/Mambaforge
sudo chmod -Rv a+x /opt/Mambaforge
OS=$(uname -s)

# 设置安装目录权限
if [[ "$OS" == "Darwin" ]]; then
  # macOS 使用 "admin" 作为用户组
  sudo chown -Rv $(whoami):admin /opt/Mambaforge
elif [[ "$OS" == "Linux" ]]; then
  # Linux 通常用户组为用户名
  sudo chown -Rv $(whoami):$(whoami) /opt/Mambaforge
else
  echo "Unsupported OS: $OS"
  #exit 1
fi

# 安装 Miniforge
bash "/tmp/$TARGET_FILE" -b -f -p /opt/Mambaforge

# 清理 Miniforge 缓存和安装包,这个命令在某些 linux 系统环境里似乎具有破坏性,先注释吧
echo "Cleaning up..."
#/opt/Mambaforge/bin/conda clean -afy
#rm -fv "/tmp/$TARGET_FILE" "/tmp/$SHA256_FILE"

# 初始化 Miniforge 环境
export PATH=/opt/Mambaforge/bin:$PATH
# 某些 linux 系统可能需要导入 Miniforge 的 lib 环境
# 如果你需要这个,就执行,之后可能还需要手动写入到 bash 或 zsh 的配置文件中,以持续生效
# 但是我不确定是不是所有的 linux 都会 lib 缺失,先注释吧
#export LD_LIBRARY_PATH=/opt/Mambaforge/lib:$LD_LIBRARY_PATH


# 获取操作系统信息
OS=$(uname -s)
# 获取当前 shell 名称
CURRENT_SHELL=$(basename "$SHELL")

echo "Detected shell: $CURRENT_SHELL"
echo "Detected OS: $OS"

case "$CURRENT_SHELL" in
  bash)
    if ! grep -q "conda initialize" "$HOME/.bashrc"; then
      echo "Initializing conda for bash..."
      conda init bash
    fi
    if [[ "$OS" == "Linux" ]]; then
      source "$HOME/.bashrc"
    elif [[ "$OS" == "Darwin" ]]; then
      source "$HOME/.bash_profile"
    else
      echo "Unsupported OS: $OS"
    fi
    ;;
  zsh)
    if ! grep -q "conda initialize" "$HOME/.zshrc"; then
      echo "Initializing conda for zsh..."
      conda init zsh
    fi
    if [[ "$OS" == "Linux" ]]; then
      source "$HOME/.zshrc"
    elif [[ "$OS" == "Darwin" ]]; then
      echo "Ensuring .zshrc is sourced from .zprofile..."
      if ! grep -q "source ~/.zshrc" "$HOME/.zprofile"; then
        echo "source ~/.zshrc" >> "$HOME/.zprofile"
      fi
      source "$HOME/.zprofile"
    else
      echo "Unsupported OS: $OS"
    fi
    ;;
  fish)
    echo "Initializing mamba for fish..."
    mamba init fish
    ;;
  *)
    echo "Unsupported shell: $CURRENT_SHELL"
    ;;
esac

# 更新 Mamba 和 Conda
mamba update -n base -c defaults mamba -y
mamba update -n base -c defaults conda -y

安装 ollama git ffmpe cmake protobuf rust font-noto-sans-sc 等基础工具

brew install ollama git ffmpeg cmake protobuf rust font-noto-sans-sc
brew services start ollama

创建 AI python 隔离环境

# 进入主目录
mkdir -pv ai-project
cd ai-project

# Stable-Diffusion-web-UI 环境
conda create -n stable-diffusion-webui python=3.10.6 -y

# TTS 环境
conda create -n tts python=3.10 -y

# SadTalker 环境
conda create -n sadtalker python=3.10 -y

# 音乐环境(未成功)
# conda create -n ? python=3.10 -y

# whisper 环境
conda create -n whisper python=3.10 -y

Ollama 安装本地模型 mistral 生成多段文案,一条一行写入 texts.txt

ollama run mistral

使用 AUTOMATIC1111 + Mac M系列 GPU 加速

默认执行 webui.sh 会默认下载模型 v1-5-pruned-emaonly.safetensors
Hugging Face 链接: “https://huggingface.co/runwayml/stable-diffusion-v1-5/resolve/main/v1-5-pruned-emaonly.safetensors
路径: stable-diffusion-webui/models/Stable-diffusion/v1-5-pruned-emaonly.safetensors
可以手动下载安装 realisticVisionV6.1 和 AnythingV5 模型
Hugging Face 链接: “https://huggingface.co/SG161222/Realistic_Vision_V6.0_B1_noVAE/resolve/main/Realistic_Vision_V6.0_NV_B1.safetensors
Hugging Face 链接: “https://huggingface.co/genai-archive/anything-v5/resolve/main/anything-v5.safetensors
路径: stable-diffusion-webui/models/Stable-diffusion/
批量生成脚本(生成头像)

conda activate stable-diffusion-webui
git clone https://github.com/AUTOMATIC1111/stable-diffusion-webui
pushd stable-diffusion-webui 2>/dev/null
python -m pip install requests
chmod -v a+x ./webui.sh
#./webui.sh --listen
pkill -f './webui.sh --listen'
pkill -f 'venv/bin/python -u launch.py --listen'
nohup ./webui.sh --listen > ./webui.log 2>&1 &
# 模型下载,如果下载过就不用执行
RESOURCE_DIR=models/Stable-diffusion
for URI in "https://huggingface.co/runwayml/stable-diffusion-v1-5/resolve/main/v1-5-pruned-emaonly.safetensors" \
  "https://huggingface.co/SG161222/Realistic_Vision_V6.0_B1_noVAE/resolve/main/Realistic_Vision_V6.0_NV_B1.safetensors" \
  "https://huggingface.co/genai-archive/anything-v5/resolve/main/anything-v5.safetensors";do
  TARGET_FILE=$(basename ${URI})
  curl -L -C - --retry 3 --retry-delay 5 --progress-bar -o "${RESOURCE_DIR}/${TARGET_FILE}" "${URI}"
done
popd

安装配音生成模块(TTS 本地高质量语音)

PyTorch 2.6 的 torch.load(…, weights_only=True) 默认仅支持极少数安全类。一旦模型中反序列化用到了内建类如:
collections.defaultdict
dict
TTS.utils.radam.RAdam
就必须手动通过 add_safe_globals() 加白。

conda activate tts
python -m pip install TTS

IFS_BAK=$IFS
IFS=$'\n'
# 发现模型缓存位置是一个我不喜欢的目录,通过软链接换到我喜欢的目录
rm -frv ${HOME}'/Library/Application Support/tts'
mkdir -pv $(pwd)/TTS
ln -sfv $(pwd)/TTS ${HOME}'/Library/Application Support/tts'
IFS=$IFS_BAK

TTS --model_name "tts_models/zh-CN/baker/tacotron2-DDC-GST" \
    --vocoder_name "vocoder_models/universal/libri-tts/wavegrad" \
    --text '你好世界!' --out_path ./TTS/output.wav

如果提示以下信息,那需要添加安全反序列化对象

	(1) In PyTorch 2.6, we changed the default value of the `weights_only` argument in `torch.load` from `False` to `True`. Re-running `torch.load` with `weights_only` set to `False` will likely succeed, but it can result in arbitrary code execution. Do it only if you got the file from a trusted source.
	(2) Alternatively, to load with `weights_only=True` please check the recommended steps in the following error message.
	WeightsUnpickler error: Unsupported global: GLOBAL TTS.utils.radam.RAdam was not an allowed global by default. Please use `torch.serialization.add_safe_globals([TTS.utils.radam.RAdam])` or the `torch.serialization.safe_globals([TTS.utils.radam.RAdam])` context manager to allowlist this global if you trust this class/function.
cat << '469138946ba5fa' | tee TTS_test.py
import torch
from torch.serialization import add_safe_globals
import collections

# TTS 特定模块
import TTS.utils.radam
from TTS.utils.synthesizer import Synthesizer
from TTS.utils.manage import ModelManager

# 添加所有必须的安全反序列化对象
add_safe_globals([
    TTS.utils.radam.RAdam,
    collections.defaultdict,
    dict,
])

# 自动获取模型路径
model_manager = ModelManager()
tts_model_name = "tts_models/zh-CN/baker/tacotron2-DDC-GST"
vocoder_model_name = "vocoder_models/universal/libri-tts/wavegrad"

tts_checkpoint, tts_config_path, *_ = model_manager.download_model(tts_model_name)
vocoder_checkpoint, vocoder_config, *_ = model_manager.download_model(vocoder_model_name)

# 初始化合成器
synthesizer = Synthesizer(
    tts_checkpoint=tts_checkpoint,
    tts_config_path=tts_config_path,
    vocoder_checkpoint=vocoder_checkpoint,
    vocoder_config=vocoder_config,
    use_cuda=False,
)

# 合成语音
wav = synthesizer.tts("你好,世界!这是语音合成测试。")
synthesizer.save_wav(wav, "output.wav")
print("✅ 合成完成,已保存为 output.wav")
469138946ba5fa
# 执行测试
python TTS_test.py

安装 嘴型驱动模块(SadTalker)

conda activate sadtalker
git clone https://github.com/OpenTalker/SadTalker.git
pushd SadTalker 2>/dev/null
# install pytorch 2.0
python -m pip install torch torchvision torchaudio
# 项目太老,已经不安全了,只能允许不安全的安装方法
python -m pip install -r requirements.txt --use-pep517
python -m pip install dlib # macOS needs to install the original dlib.
python -m pip install torch==1.12.1 torchvision==0.13.1

# 下载模型文件(使用 bash 脚本)
bash scripts/download_models.sh

# 测试生成视频到 ./results/ 目录
python inference.py 
popd

AI 音乐合成(本地搭建未成功)


视频合成 & 添加字幕/BGM(FFmpeg)

  • 添加字幕(使用 Whisper 本地转录)
conda activate whisper
mkdir -pv whisper
python -m pip install git+https://github.com/openai/whisper.git

IFS_BAK=$IFS
IFS=$'\n'
# 发现模型缓存位置是一个我不喜欢的目录,通过软链接换到我喜欢的目录
rm -frv ${HOME}'/.cache/whisper'
mkdir -pv $(pwd)/whisper
ln -sfv $(pwd)/whisper ${HOME}'/.cache/whisper'
IFS=$IFS_BAK

whisper output.wav --language Chinese --output_format srt
  • 合成视频 + 音频 + 字幕 + 字体
ffmpeg -y \
       -i SadTalker/results/2025_06_29_12.17.09.mp4 \
       -i output.wav \
       -filter_complex "[0:a][1:a]amix=inputs=2:duration=first" \
       -vf "subtitles=output.srt:force_style='FontName=Noto Sans SC,FontSize=25'" \
       ${HOME}/Downloads/output.mp4

根据流程设想设计自动化主控脚本

ai-project/
├── run_pipeline.py。         # ← 主控脚本
├── modules/
│   ├── generate_audio.py     # TTS 语音合成
│   ├── generate_image.py     # SD 图像生成
│   ├── generate_lipsync.py   # SadTalker 嘴型驱动
│   ├── generate_music.py     # AI 背景音乐(可选)
│   ├── generate_subtitle.py  # Whisper 自动字幕
│   └── merge_video.py        # FFmpeg 合成
├── texts.txt                 # 一键跑全流程
└── README.md                 # 一键跑全流程

终极测试,AI制作视频流程,视频最终输出到 output 中

# 初始化 run_pipeline.py
cat << '469138946ba5fa' > run_pipeline.sh
#!/usr/bin/env bash
set -euo pipefail

# === 全局配置 ===
INPUT_FILE="texts.txt"
OUTPUT_DIR="output"
rm -fr "$OUTPUT_DIR"
mkdir -p "$OUTPUT_DIR"

# 加载 conda 激活脚本
source "$(conda info --base)/etc/profile.d/conda.sh"

# === 启动 WebUI ===
echo "[💥] 清理旧 WebUI..."
pkill -f 'webui.sh --api' || true
pkill -f 'launch.py --api' || true

echo "[🚀] 启动 stable-diffusion-webui..."
pushd stable-diffusion-webui >/dev/null
nohup ./webui.sh --api > ../webui.log 2>&1 &
popd

# === 重启 ollama 服务并生成文本 ===
echo "[💥] 停止旧 ollama..."
brew services stop ollama || true
echo "[🚀] 启动 ollama..."
brew services start ollama
sleep 10

echo "[📝] 使用 ollama 生成文本..."
ollama run mistral << '469138946ba5fa2' > "$INPUT_FILE"
Write 5 short Chinese motivational quotes, one per line.
469138946ba5fa2

echo "[ℹ️] 文本内容如下:"
cat "$INPUT_FILE"

# === 主循环处理 ===
INDEX=0
while IFS= read -r LINE; do
  [[ -z "$LINE" ]] && continue
  echo -e "\n🚀 处理第 $INDEX 条文本: $LINE"

  TEXT=$(echo "$LINE" | sed -E 's/^[[:space:]]*[0-9]+[.)、.: \-]*[[:space:]]*//')
  echo "清洗后文本: $TEXT"

  # 文件名定义
  AUDIO_NAME="audio_${INDEX}"
  AUDIO_FILE="${AUDIO_NAME}.wav"
  IMAGE_FILE="image_${INDEX}.png"
  VIDEO_FILE="video_lipsync_${INDEX}.mp4"
  SUBTITLE_FILE="${AUDIO_NAME}.srt"
  FINAL_VIDEO="final_${INDEX}.mp4"

  echo "[🎤] 合成语音..."
  conda activate tts
  python modules/generate_audio.py "$TEXT" "$OUTPUT_DIR/$INDEX" "$AUDIO_FILE"
  conda deactivate

  echo "[🖼️ ] 合成图像..."
  conda activate stable-diffusion-webui
  python modules/generate_image.py "$TEXT" "$OUTPUT_DIR/$INDEX" "$IMAGE_FILE"
  conda deactivate

  echo "[👄] 嘴型合成..."
  conda activate sadtalker
  pushd SadTalker >/dev/null
  python ../modules/generate_lipsync.py \
    "../$OUTPUT_DIR/$INDEX/$AUDIO_FILE" \
    "../$OUTPUT_DIR/$INDEX/$IMAGE_FILE" \
    "../$OUTPUT_DIR/$INDEX" \
    "$VIDEO_FILE"
  popd
  conda deactivate

  echo "[📜] 自动字幕..."
  conda activate whisper
  python modules/generate_subtitle.py \
    "$OUTPUT_DIR/$INDEX/$AUDIO_FILE" \
    "$OUTPUT_DIR/$INDEX"
  conda deactivate

  echo "[🎬] 合成视频..."
  python modules/merge_video.py \
    "$OUTPUT_DIR/$INDEX/$VIDEO_FILE" \
    "$OUTPUT_DIR/$INDEX/$AUDIO_FILE" \
    "$OUTPUT_DIR/$INDEX/$SUBTITLE_FILE" \
    "$OUTPUT_DIR/$INDEX/$FINAL_VIDEO"

  echo "✅ 第 $INDEX 条处理完成"
  INDEX=$((INDEX+1))

done < "$INPUT_FILE"

echo -e "\n🏁 所有流程完成,共处理 $INDEX 条文本。"
469138946ba5fa

mkdir -pv modules output

# 初始化每个模块为可调用的空壳(占位函数)
cat << '469138946ba5fa' > modules/generate_audio.py
# modules/generate_audio.py
import sys
import os
import torch
import collections
from torch.serialization import add_safe_globals
from TTS.utils.radam import RAdam
from TTS.utils.synthesizer import Synthesizer
from TTS.utils.manage import ModelManager

# 允许 torch 反序列化包含额外对象
add_safe_globals([RAdam, collections.defaultdict, dict])

# 初始化一次 Synthesizer
model_manager = ModelManager()
tts_model = "tts_models/zh-CN/baker/tacotron2-DDC-GST"
vocoder_model = "vocoder_models/universal/libri-tts/wavegrad"
tts_ckpt, tts_config, *_ = model_manager.download_model(tts_model)
vocoder_ckpt, vocoder_config, *_ = model_manager.download_model(vocoder_model)

synthesizer = Synthesizer(
    tts_checkpoint=tts_ckpt,
    tts_config_path=tts_config,
    vocoder_checkpoint=vocoder_ckpt,
    vocoder_config=vocoder_config,
    use_cuda=False
)

def main():
    if len(sys.argv) != 4:
        print("用法: python generate_audio.py <text> <output_dir> <output_file>")
        sys.exit(1)

    text = sys.argv[1]
    output_dir = sys.argv[2]
    output_file = sys.argv[3]

    os.makedirs(output_dir, exist_ok=True)
    output_path = os.path.join(output_dir, output_file)

    print(f"[🎤] 合成语音中: {text}")
    wav = synthesizer.tts(text)
    synthesizer.save_wav(wav, output_path)
    print(f"[🎤] 语音保存到: {output_path}")

if __name__ == "__main__":
    main()
469138946ba5fa

cat << '469138946ba5fa' > modules/generate_image.py
# modules/generate_image.py
import sys
import os
import base64
import requests

def main():
    if len(sys.argv) != 4:
        print("用法: python generate_image.py <text> <output_dir> <output_file>")
        sys.exit(1)

    text = sys.argv[1]
    output_dir = sys.argv[2]
    output_file = sys.argv[3]
    output_path = os.path.join(output_dir, output_file)

    os.makedirs(output_dir, exist_ok=True)
    print(f"[🖼️ ] 请求 WebUI 生成图像: {text}")

    API_URL = "http://127.0.0.1:7860/sdapi/v1/txt2img"
    payload = {
        "prompt": f"{text}, portrait of a person, front view, realistic face",
        "steps": 20,
        "cfg_scale": 7,
        "width": 512,
        "height": 512,
        "sampler_index": "Euler a",
        "override_settings": {
            "sd_model_checkpoint": "Realistic_Vision_V6.0_NV_B1.safetensors [5d814d2f9c]"
        }
    }

    try:
        response = requests.post(API_URL, json=payload)
        response.raise_for_status()
        r = response.json()
        if "images" not in r or not r["images"]:
            raise RuntimeError("API 响应中无图像")
    except Exception as e:
        print(f"[❌] 图像生成失败: {e}")
        sys.exit(1)

    with open(output_path, "wb") as f:
        f.write(base64.b64decode(r["images"][0]))

    print(f"[🖼️ ] 图像保存到: {output_path}")

if __name__ == "__main__":
    main()
469138946ba5fa

cat << '469138946ba5fa' > modules/generate_lipsync.py
# modules/generate_lipsync.py
import sys
import os
import subprocess

def main():
    if len(sys.argv) != 5:
        print("用法: python generate_lipsync.py <audio_file> <image_file> <output_dir> <output_file>")
        sys.exit(1)

    audio_path = sys.argv[1]
    image_path = sys.argv[2]
    output_dir = sys.argv[3]
    output_file = sys.argv[4]

    os.makedirs(output_dir, exist_ok=True)
    print(f"[👄] 生成嘴型动画: {audio_path} + {image_path}")

    subprocess.run([
        "python", "inference.py",
        "--driven_audio", audio_path,
        "--source_image", image_path,
        "--still", "--preprocess", "full", "--enhancer", "gfpgan",
        "--pose_style", "0"
    ])

    # 自动查找输出视频并重命名为目标路径
    for f in os.listdir("results"):
        if f.endswith(".mp4"):
            src = os.path.join("results", f)
            dst = os.path.join(output_dir, output_file)
            os.rename(src, dst)
            print(f"[👄] 嘴型视频重命名为: {dst}")
            return

    print("[❌] 未找到输出视频文件!")
    sys.exit(1)

if __name__ == "__main__":
    main()
469138946ba5fa

cat << '469138946ba5fa' > modules/generate_music.py
# modules/generate_music.py
import sys
import os

def main():
    if len(sys.argv) != 3:
        print("用法: python generate_music.py <output_dir> <output_file>")
        sys.exit(1)

    output_dir = sys.argv[1]
    output_file = sys.argv[2]
    output_path = os.path.join(output_dir, output_file)

    print(f"[🎵] 生成背景音乐(占位,未实现)")
    # 未来可调用 MusicGen/Riffusion 写入 output_path

if __name__ == "__main__":
    main()
469138946ba5fa

cat << '469138946ba5fa' > modules/generate_subtitle.py
# modules/generate_subtitle.py
import sys
import os
import subprocess

def main():
    audio_path = sys.argv[1]
    output_dir = sys.argv[2]

    print(f"[📜] 生成字幕 for {audio_path}")
    os.makedirs(output_dir, exist_ok=True)

    subprocess.run([
        "whisper", audio_path,
        "--language", "Chinese",
        "--output_format", "srt",
        "--output_dir", output_dir
    ])

    basename = os.path.splitext(os.path.basename(audio_path))[0]
    print(f"[📜] 字幕保存为 {os.path.join(output_dir, f'{basename}.srt')}")

if __name__ == "__main__":
    main()
469138946ba5fa

cat << '469138946ba5fa' > modules/merge_video.py
# modules/merge_video.py
import sys
import os
import subprocess

def main():
    if len(sys.argv) != 5:
        print("用法: python merge_video.py <video_path> <audio_path> <subtitle_path> <output_path>")
        sys.exit(1)

    video_path = sys.argv[1]
    audio_path = sys.argv[2]
    subtitle_path = sys.argv[3]
    final_path = sys.argv[4]

    print(f"[🎬] 合成最终视频: 添加音频 + 字幕")

    cmd = [
        "ffmpeg", "-y",
        "-i", video_path,
        "-i", audio_path,
        "-filter_complex", "[0:a][1:a]amix=inputs=2:duration=first",
        "-vf", f"subtitles='{subtitle_path}':force_style='FontName=Noto Sans SC,FontSize=25'",
        final_path
    ]
    subprocess.run(cmd)
    print(f"[🎬] 输出最终视频到: {final_path}")

if __name__ == "__main__":
    main()
469138946ba5fa


chmod -v a+x run_pipeline.sh
./run_pipeline.sh

效果,额,不知道为什么,我生成的视频都很色情,唉,看来还得优化一下,要不然根本没法发啊,唉😮‍💨

0
1

Comments