Macmini m4 搭建AI环境实现本地全自动 AI 视频工厂系统架构
#AI,#brew,#FFmpeg,#huggingface.co,#Macos,#Miniforge,#ollama,#python,#SadTalker,#Shell,#stable-diffusion-webui,#TTS
在家里找不到工作的日子不安与痛苦,我想赚钱,就想着能不能批量化构建制造视频流
所以就有了这次实践,希望这个流可以为我赚钱吧。😭
- 参考
- 批量创造思路
- Shell 终端操作安装 brew 环境
- Shell 终端操作安装 Miniforge 环境
- 安装 ollama git ffmpe cmake protobuf rust font-noto-sans-sc 等基础工具
- 创建 AI python 隔离环境
- 使用 AUTOMATIC1111 + Mac M系列 GPU 加速
- 安装配音生成模块(TTS 本地高质量语音)
- 安装 嘴型驱动模块(SadTalker)
- AI 音乐合成(本地搭建未成功)
- 视频合成 & 添加字幕/BGM(FFmpeg)
- 根据流程设想设计自动化主控脚本
- 终极测试,AI制作视频流程
参考
brew mac工具包
ollama 文本生成
Miniforge python隔离环境
SD 头像
TTS 配音
SadTalker 嘴形
FFmpeg 合成
v1-5-pruned-emaonly.safetensors 模型
Realistic_Vision_V6.0_NV_B1.safetensors 模型
anything-v5.safetensors 模型
批量创造思路
┌────────────┐ ┌────────────┐ ┌────────────┐ ┌────────────┐ ┌──────────────┐ ┌──────────────┐
│ 文本生成 │ → │ 人脸生成 │ → │ 配音生成 │ → │ 嘴型合成 │ → │ 音乐合成(失败) │ → │ 合成输出 │
│ GPT 模型 │ │ SD 头像 │ │ TTS │ │ SadTalker │ │ ? │ │ FFmpeg + 发布 │
└────────────┘ └────────────┘ └────────────┘ └────────────┘ └──────────────┘ └──────────────┘
Shell 终端操作安装 brew 环境
/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
echo 'eval "$(/opt/homebrew/bin/brew shellenv)"' >> ~/.zshrc
Shell 终端操作安装 Miniforge 环境
# GitHub 项目 URI
URI="conda-forge/miniforge"
# 获取最新版本
VERSION=$(curl -sL "https://github.com/$URI/releases" | grep -Eo '/releases/tag/[^"]+' | awk -F'/tag/' '{print $2}' | head -n 1)
echo "Latest version: $VERSION"
# 获取操作系统和架构信息
OS=$(uname -s)
ARCH=$(uname -m)
# 映射平台到官方命名
case "$OS" in
Linux)
PLATFORM="Linux"
if [[ "$ARCH" == "arm64" || "$ARCH" == "aarch64" ]]; then
ARCH="aarch64"
elif [[ "$ARCH" == "x86_64" ]]; then
ARCH="x86_64"
else
echo "Unsupported architecture: $ARCH"
#exit 1
fi
;;
Darwin)
PLATFORM="MacOSX"
if [[ "$ARCH" == "arm64" || "$ARCH" == "aarch64" ]]; then
ARCH="arm64"
elif [[ "$ARCH" == "x86_64" ]]; then
ARCH="x86_64"
else
echo "Unsupported architecture: $ARCH"
#exit 1
fi
;;
*)
echo "Unsupported OS: $OS"
#exit 1
;;
esac
# 输出最终平台和架构
echo "Platform: $PLATFORM"
echo "Architecture: $ARCH"
# 拼接下载链接和校验码链接
TARGET_FILE="Miniforge3-$VERSION-$PLATFORM-$ARCH.sh"
SHA256_FILE="$TARGET_FILE.sha256"
URI_DOWNLOAD="https://github.com/$URI/releases/download/$VERSION/$TARGET_FILE"
URI_SHA256="https://github.com/$URI/releases/download/$VERSION/$SHA256_FILE"
echo "Download URL: $URI_DOWNLOAD"
echo "SHA256 URL: $URI_SHA256"
# 检查文件是否存在
if [[ -f "/tmp/$TARGET_FILE" ]]; then
echo "File already exists: /tmp/$TARGET_FILE"
# 删除旧的 SHA256 文件(如果存在)
if [[ -f "/tmp/$SHA256_FILE" ]]; then
echo "Removing old SHA256 file: /tmp/$SHA256_FILE"
rm -fv "/tmp/$SHA256_FILE"
fi
# 下载新的 SHA256 文件
echo "Downloading SHA256 file..."
curl -L -C - --retry 3 --retry-delay 5 --progress-bar -o "/tmp/$SHA256_FILE" "$URI_SHA256"
# 校验文件完整性
# shasum 校验依赖 perl 可能 linux 系统需要手动安装
echo "Verifying file integrity for /tmp/$TARGET_FILE..."
cd /tmp || exit 1
if ! shasum -a 256 -c "$SHA256_FILE"; then
echo "SHA256 checksum failed. Removing file and retrying..."
rm -fv "/tmp/$TARGET_FILE"
else
echo "File integrity verified successfully."
fi
fi
# 如果文件不存在或之前校验失败
if [[ ! -f "/tmp/$TARGET_FILE" ]]; then
echo "Downloading file..."
curl -L -C - --retry 3 --retry-delay 5 --progress-bar -o "/tmp/$TARGET_FILE" "$URI_DOWNLOAD"
# 删除旧的 SHA256 文件并重新下载
if [[ -f "/tmp/$SHA256_FILE" ]]; then
echo "Removing old SHA256 file: /tmp/$SHA256_FILE"
rm -fv "/tmp/$SHA256_FILE"
fi
echo "Downloading SHA256 file..."
curl -L --progress-bar -o "/tmp/$SHA256_FILE" "$URI_SHA256"
# 校验完整性
# shasum 校验依赖 perl 可能 linux 系统需要手动安装
echo "Verifying file integrity for /tmp/$TARGET_FILE..."
cd /tmp || exit 1
if ! shasum -a 256 -c "$SHA256_FILE"; then
echo "Download failed: SHA256 checksum does not match."
#exit 1
else
echo "File integrity verified successfully."
fi
fi
# 创建安装目录
echo "Installing Miniforge..."
sudo mkdir -pv /opt/Mambaforge
sudo chmod -Rv a+x /opt/Mambaforge
OS=$(uname -s)
# 设置安装目录权限
if [[ "$OS" == "Darwin" ]]; then
# macOS 使用 "admin" 作为用户组
sudo chown -Rv $(whoami):admin /opt/Mambaforge
elif [[ "$OS" == "Linux" ]]; then
# Linux 通常用户组为用户名
sudo chown -Rv $(whoami):$(whoami) /opt/Mambaforge
else
echo "Unsupported OS: $OS"
#exit 1
fi
# 安装 Miniforge
bash "/tmp/$TARGET_FILE" -b -f -p /opt/Mambaforge
# 清理 Miniforge 缓存和安装包,这个命令在某些 linux 系统环境里似乎具有破坏性,先注释吧
echo "Cleaning up..."
#/opt/Mambaforge/bin/conda clean -afy
#rm -fv "/tmp/$TARGET_FILE" "/tmp/$SHA256_FILE"
# 初始化 Miniforge 环境
export PATH=/opt/Mambaforge/bin:$PATH
# 某些 linux 系统可能需要导入 Miniforge 的 lib 环境
# 如果你需要这个,就执行,之后可能还需要手动写入到 bash 或 zsh 的配置文件中,以持续生效
# 但是我不确定是不是所有的 linux 都会 lib 缺失,先注释吧
#export LD_LIBRARY_PATH=/opt/Mambaforge/lib:$LD_LIBRARY_PATH
# 获取操作系统信息
OS=$(uname -s)
# 获取当前 shell 名称
CURRENT_SHELL=$(basename "$SHELL")
echo "Detected shell: $CURRENT_SHELL"
echo "Detected OS: $OS"
case "$CURRENT_SHELL" in
bash)
if ! grep -q "conda initialize" "$HOME/.bashrc"; then
echo "Initializing conda for bash..."
conda init bash
fi
if [[ "$OS" == "Linux" ]]; then
source "$HOME/.bashrc"
elif [[ "$OS" == "Darwin" ]]; then
source "$HOME/.bash_profile"
else
echo "Unsupported OS: $OS"
fi
;;
zsh)
if ! grep -q "conda initialize" "$HOME/.zshrc"; then
echo "Initializing conda for zsh..."
conda init zsh
fi
if [[ "$OS" == "Linux" ]]; then
source "$HOME/.zshrc"
elif [[ "$OS" == "Darwin" ]]; then
echo "Ensuring .zshrc is sourced from .zprofile..."
if ! grep -q "source ~/.zshrc" "$HOME/.zprofile"; then
echo "source ~/.zshrc" >> "$HOME/.zprofile"
fi
source "$HOME/.zprofile"
else
echo "Unsupported OS: $OS"
fi
;;
fish)
echo "Initializing mamba for fish..."
mamba init fish
;;
*)
echo "Unsupported shell: $CURRENT_SHELL"
;;
esac
# 更新 Mamba 和 Conda
mamba update -n base -c defaults mamba -y
mamba update -n base -c defaults conda -y
安装 ollama git ffmpe cmake protobuf rust font-noto-sans-sc 等基础工具
brew install ollama git ffmpeg cmake protobuf rust font-noto-sans-sc
brew services start ollama
创建 AI python 隔离环境
# 进入主目录
mkdir -pv ai-project
cd ai-project
# Stable-Diffusion-web-UI 环境
conda create -n stable-diffusion-webui python=3.10.6 -y
# TTS 环境
conda create -n tts python=3.10 -y
# SadTalker 环境
conda create -n sadtalker python=3.10 -y
# 音乐环境(未成功)
# conda create -n ? python=3.10 -y
# whisper 环境
conda create -n whisper python=3.10 -y
Ollama 安装本地模型 mistral 生成多段文案,一条一行写入 texts.txt
ollama run mistral
使用 AUTOMATIC1111 + Mac M系列 GPU 加速
默认执行 webui.sh 会默认下载模型 v1-5-pruned-emaonly.safetensors
Hugging Face 链接: “https://huggingface.co/runwayml/stable-diffusion-v1-5/resolve/main/v1-5-pruned-emaonly.safetensors”
路径: stable-diffusion-webui/models/Stable-diffusion/v1-5-pruned-emaonly.safetensors
可以手动下载安装 realisticVisionV6.1 和 AnythingV5 模型
Hugging Face 链接: “https://huggingface.co/SG161222/Realistic_Vision_V6.0_B1_noVAE/resolve/main/Realistic_Vision_V6.0_NV_B1.safetensors”
Hugging Face 链接: “https://huggingface.co/genai-archive/anything-v5/resolve/main/anything-v5.safetensors”
路径: stable-diffusion-webui/models/Stable-diffusion/
批量生成脚本(生成头像)
conda activate stable-diffusion-webui
git clone https://github.com/AUTOMATIC1111/stable-diffusion-webui
pushd stable-diffusion-webui 2>/dev/null
python -m pip install requests
chmod -v a+x ./webui.sh
#./webui.sh --listen
pkill -f './webui.sh --listen'
pkill -f 'venv/bin/python -u launch.py --listen'
nohup ./webui.sh --listen > ./webui.log 2>&1 &
# 模型下载,如果下载过就不用执行
RESOURCE_DIR=models/Stable-diffusion
for URI in "https://huggingface.co/runwayml/stable-diffusion-v1-5/resolve/main/v1-5-pruned-emaonly.safetensors" \
"https://huggingface.co/SG161222/Realistic_Vision_V6.0_B1_noVAE/resolve/main/Realistic_Vision_V6.0_NV_B1.safetensors" \
"https://huggingface.co/genai-archive/anything-v5/resolve/main/anything-v5.safetensors";do
TARGET_FILE=$(basename ${URI})
curl -L -C - --retry 3 --retry-delay 5 --progress-bar -o "${RESOURCE_DIR}/${TARGET_FILE}" "${URI}"
done
popd
安装配音生成模块(TTS 本地高质量语音)
PyTorch 2.6 的 torch.load(…, weights_only=True) 默认仅支持极少数安全类。一旦模型中反序列化用到了内建类如:
collections.defaultdict
dict
TTS.utils.radam.RAdam
就必须手动通过 add_safe_globals() 加白。
conda activate tts
python -m pip install TTS
IFS_BAK=$IFS
IFS=$'\n'
# 发现模型缓存位置是一个我不喜欢的目录,通过软链接换到我喜欢的目录
rm -frv ${HOME}'/Library/Application Support/tts'
mkdir -pv $(pwd)/TTS
ln -sfv $(pwd)/TTS ${HOME}'/Library/Application Support/tts'
IFS=$IFS_BAK
TTS --model_name "tts_models/zh-CN/baker/tacotron2-DDC-GST" \
--vocoder_name "vocoder_models/universal/libri-tts/wavegrad" \
--text '你好世界!' --out_path ./TTS/output.wav
如果提示以下信息,那需要添加安全反序列化对象
(1) In PyTorch 2.6, we changed the default value of the `weights_only` argument in `torch.load` from `False` to `True`. Re-running `torch.load` with `weights_only` set to `False` will likely succeed, but it can result in arbitrary code execution. Do it only if you got the file from a trusted source.
(2) Alternatively, to load with `weights_only=True` please check the recommended steps in the following error message.
WeightsUnpickler error: Unsupported global: GLOBAL TTS.utils.radam.RAdam was not an allowed global by default. Please use `torch.serialization.add_safe_globals([TTS.utils.radam.RAdam])` or the `torch.serialization.safe_globals([TTS.utils.radam.RAdam])` context manager to allowlist this global if you trust this class/function.
cat << '469138946ba5fa' | tee TTS_test.py
import torch
from torch.serialization import add_safe_globals
import collections
# TTS 特定模块
import TTS.utils.radam
from TTS.utils.synthesizer import Synthesizer
from TTS.utils.manage import ModelManager
# 添加所有必须的安全反序列化对象
add_safe_globals([
TTS.utils.radam.RAdam,
collections.defaultdict,
dict,
])
# 自动获取模型路径
model_manager = ModelManager()
tts_model_name = "tts_models/zh-CN/baker/tacotron2-DDC-GST"
vocoder_model_name = "vocoder_models/universal/libri-tts/wavegrad"
tts_checkpoint, tts_config_path, *_ = model_manager.download_model(tts_model_name)
vocoder_checkpoint, vocoder_config, *_ = model_manager.download_model(vocoder_model_name)
# 初始化合成器
synthesizer = Synthesizer(
tts_checkpoint=tts_checkpoint,
tts_config_path=tts_config_path,
vocoder_checkpoint=vocoder_checkpoint,
vocoder_config=vocoder_config,
use_cuda=False,
)
# 合成语音
wav = synthesizer.tts("你好,世界!这是语音合成测试。")
synthesizer.save_wav(wav, "output.wav")
print("✅ 合成完成,已保存为 output.wav")
469138946ba5fa
# 执行测试
python TTS_test.py
安装 嘴型驱动模块(SadTalker)
conda activate sadtalker
git clone https://github.com/OpenTalker/SadTalker.git
pushd SadTalker 2>/dev/null
# install pytorch 2.0
python -m pip install torch torchvision torchaudio
# 项目太老,已经不安全了,只能允许不安全的安装方法
python -m pip install -r requirements.txt --use-pep517
python -m pip install dlib # macOS needs to install the original dlib.
python -m pip install torch==1.12.1 torchvision==0.13.1
# 下载模型文件(使用 bash 脚本)
bash scripts/download_models.sh
# 测试生成视频到 ./results/ 目录
python inference.py
popd
AI 音乐合成(本地搭建未成功)
视频合成 & 添加字幕/BGM(FFmpeg)
- 添加字幕(使用 Whisper 本地转录)
conda activate whisper
mkdir -pv whisper
python -m pip install git+https://github.com/openai/whisper.git
IFS_BAK=$IFS
IFS=$'\n'
# 发现模型缓存位置是一个我不喜欢的目录,通过软链接换到我喜欢的目录
rm -frv ${HOME}'/.cache/whisper'
mkdir -pv $(pwd)/whisper
ln -sfv $(pwd)/whisper ${HOME}'/.cache/whisper'
IFS=$IFS_BAK
whisper output.wav --language Chinese --output_format srt
- 合成视频 + 音频 + 字幕 + 字体
ffmpeg -y \
-i SadTalker/results/2025_06_29_12.17.09.mp4 \
-i output.wav \
-filter_complex "[0:a][1:a]amix=inputs=2:duration=first" \
-vf "subtitles=output.srt:force_style='FontName=Noto Sans SC,FontSize=25'" \
${HOME}/Downloads/output.mp4
根据流程设想设计自动化主控脚本
ai-project/
├── run_pipeline.py。 # ← 主控脚本
├── modules/
│ ├── generate_audio.py # TTS 语音合成
│ ├── generate_image.py # SD 图像生成
│ ├── generate_lipsync.py # SadTalker 嘴型驱动
│ ├── generate_music.py # AI 背景音乐(可选)
│ ├── generate_subtitle.py # Whisper 自动字幕
│ └── merge_video.py # FFmpeg 合成
├── texts.txt # 一键跑全流程
└── README.md # 一键跑全流程
终极测试,AI制作视频流程,视频最终输出到 output 中
# 初始化 run_pipeline.py
cat << '469138946ba5fa' > run_pipeline.sh
#!/usr/bin/env bash
set -euo pipefail
# === 全局配置 ===
INPUT_FILE="texts.txt"
OUTPUT_DIR="output"
rm -fr "$OUTPUT_DIR"
mkdir -p "$OUTPUT_DIR"
# 加载 conda 激活脚本
source "$(conda info --base)/etc/profile.d/conda.sh"
# === 启动 WebUI ===
echo "[💥] 清理旧 WebUI..."
pkill -f 'webui.sh --api' || true
pkill -f 'launch.py --api' || true
echo "[🚀] 启动 stable-diffusion-webui..."
pushd stable-diffusion-webui >/dev/null
nohup ./webui.sh --api > ../webui.log 2>&1 &
popd
# === 重启 ollama 服务并生成文本 ===
echo "[💥] 停止旧 ollama..."
brew services stop ollama || true
echo "[🚀] 启动 ollama..."
brew services start ollama
sleep 10
echo "[📝] 使用 ollama 生成文本..."
ollama run mistral << '469138946ba5fa2' > "$INPUT_FILE"
Write 5 short Chinese motivational quotes, one per line.
469138946ba5fa2
echo "[ℹ️] 文本内容如下:"
cat "$INPUT_FILE"
# === 主循环处理 ===
INDEX=0
while IFS= read -r LINE; do
[[ -z "$LINE" ]] && continue
echo -e "\n🚀 处理第 $INDEX 条文本: $LINE"
TEXT=$(echo "$LINE" | sed -E 's/^[[:space:]]*[0-9]+[.)、.: \-]*[[:space:]]*//')
echo "清洗后文本: $TEXT"
# 文件名定义
AUDIO_NAME="audio_${INDEX}"
AUDIO_FILE="${AUDIO_NAME}.wav"
IMAGE_FILE="image_${INDEX}.png"
VIDEO_FILE="video_lipsync_${INDEX}.mp4"
SUBTITLE_FILE="${AUDIO_NAME}.srt"
FINAL_VIDEO="final_${INDEX}.mp4"
echo "[🎤] 合成语音..."
conda activate tts
python modules/generate_audio.py "$TEXT" "$OUTPUT_DIR/$INDEX" "$AUDIO_FILE"
conda deactivate
echo "[🖼️ ] 合成图像..."
conda activate stable-diffusion-webui
python modules/generate_image.py "$TEXT" "$OUTPUT_DIR/$INDEX" "$IMAGE_FILE"
conda deactivate
echo "[👄] 嘴型合成..."
conda activate sadtalker
pushd SadTalker >/dev/null
python ../modules/generate_lipsync.py \
"../$OUTPUT_DIR/$INDEX/$AUDIO_FILE" \
"../$OUTPUT_DIR/$INDEX/$IMAGE_FILE" \
"../$OUTPUT_DIR/$INDEX" \
"$VIDEO_FILE"
popd
conda deactivate
echo "[📜] 自动字幕..."
conda activate whisper
python modules/generate_subtitle.py \
"$OUTPUT_DIR/$INDEX/$AUDIO_FILE" \
"$OUTPUT_DIR/$INDEX"
conda deactivate
echo "[🎬] 合成视频..."
python modules/merge_video.py \
"$OUTPUT_DIR/$INDEX/$VIDEO_FILE" \
"$OUTPUT_DIR/$INDEX/$AUDIO_FILE" \
"$OUTPUT_DIR/$INDEX/$SUBTITLE_FILE" \
"$OUTPUT_DIR/$INDEX/$FINAL_VIDEO"
echo "✅ 第 $INDEX 条处理完成"
INDEX=$((INDEX+1))
done < "$INPUT_FILE"
echo -e "\n🏁 所有流程完成,共处理 $INDEX 条文本。"
469138946ba5fa
mkdir -pv modules output
# 初始化每个模块为可调用的空壳(占位函数)
cat << '469138946ba5fa' > modules/generate_audio.py
# modules/generate_audio.py
import sys
import os
import torch
import collections
from torch.serialization import add_safe_globals
from TTS.utils.radam import RAdam
from TTS.utils.synthesizer import Synthesizer
from TTS.utils.manage import ModelManager
# 允许 torch 反序列化包含额外对象
add_safe_globals([RAdam, collections.defaultdict, dict])
# 初始化一次 Synthesizer
model_manager = ModelManager()
tts_model = "tts_models/zh-CN/baker/tacotron2-DDC-GST"
vocoder_model = "vocoder_models/universal/libri-tts/wavegrad"
tts_ckpt, tts_config, *_ = model_manager.download_model(tts_model)
vocoder_ckpt, vocoder_config, *_ = model_manager.download_model(vocoder_model)
synthesizer = Synthesizer(
tts_checkpoint=tts_ckpt,
tts_config_path=tts_config,
vocoder_checkpoint=vocoder_ckpt,
vocoder_config=vocoder_config,
use_cuda=False
)
def main():
if len(sys.argv) != 4:
print("用法: python generate_audio.py <text> <output_dir> <output_file>")
sys.exit(1)
text = sys.argv[1]
output_dir = sys.argv[2]
output_file = sys.argv[3]
os.makedirs(output_dir, exist_ok=True)
output_path = os.path.join(output_dir, output_file)
print(f"[🎤] 合成语音中: {text}")
wav = synthesizer.tts(text)
synthesizer.save_wav(wav, output_path)
print(f"[🎤] 语音保存到: {output_path}")
if __name__ == "__main__":
main()
469138946ba5fa
cat << '469138946ba5fa' > modules/generate_image.py
# modules/generate_image.py
import sys
import os
import base64
import requests
def main():
if len(sys.argv) != 4:
print("用法: python generate_image.py <text> <output_dir> <output_file>")
sys.exit(1)
text = sys.argv[1]
output_dir = sys.argv[2]
output_file = sys.argv[3]
output_path = os.path.join(output_dir, output_file)
os.makedirs(output_dir, exist_ok=True)
print(f"[🖼️ ] 请求 WebUI 生成图像: {text}")
API_URL = "http://127.0.0.1:7860/sdapi/v1/txt2img"
payload = {
"prompt": f"{text}, portrait of a person, front view, realistic face",
"steps": 20,
"cfg_scale": 7,
"width": 512,
"height": 512,
"sampler_index": "Euler a",
"override_settings": {
"sd_model_checkpoint": "Realistic_Vision_V6.0_NV_B1.safetensors [5d814d2f9c]"
}
}
try:
response = requests.post(API_URL, json=payload)
response.raise_for_status()
r = response.json()
if "images" not in r or not r["images"]:
raise RuntimeError("API 响应中无图像")
except Exception as e:
print(f"[❌] 图像生成失败: {e}")
sys.exit(1)
with open(output_path, "wb") as f:
f.write(base64.b64decode(r["images"][0]))
print(f"[🖼️ ] 图像保存到: {output_path}")
if __name__ == "__main__":
main()
469138946ba5fa
cat << '469138946ba5fa' > modules/generate_lipsync.py
# modules/generate_lipsync.py
import sys
import os
import subprocess
def main():
if len(sys.argv) != 5:
print("用法: python generate_lipsync.py <audio_file> <image_file> <output_dir> <output_file>")
sys.exit(1)
audio_path = sys.argv[1]
image_path = sys.argv[2]
output_dir = sys.argv[3]
output_file = sys.argv[4]
os.makedirs(output_dir, exist_ok=True)
print(f"[👄] 生成嘴型动画: {audio_path} + {image_path}")
subprocess.run([
"python", "inference.py",
"--driven_audio", audio_path,
"--source_image", image_path,
"--still", "--preprocess", "full", "--enhancer", "gfpgan",
"--pose_style", "0"
])
# 自动查找输出视频并重命名为目标路径
for f in os.listdir("results"):
if f.endswith(".mp4"):
src = os.path.join("results", f)
dst = os.path.join(output_dir, output_file)
os.rename(src, dst)
print(f"[👄] 嘴型视频重命名为: {dst}")
return
print("[❌] 未找到输出视频文件!")
sys.exit(1)
if __name__ == "__main__":
main()
469138946ba5fa
cat << '469138946ba5fa' > modules/generate_music.py
# modules/generate_music.py
import sys
import os
def main():
if len(sys.argv) != 3:
print("用法: python generate_music.py <output_dir> <output_file>")
sys.exit(1)
output_dir = sys.argv[1]
output_file = sys.argv[2]
output_path = os.path.join(output_dir, output_file)
print(f"[🎵] 生成背景音乐(占位,未实现)")
# 未来可调用 MusicGen/Riffusion 写入 output_path
if __name__ == "__main__":
main()
469138946ba5fa
cat << '469138946ba5fa' > modules/generate_subtitle.py
# modules/generate_subtitle.py
import sys
import os
import subprocess
def main():
audio_path = sys.argv[1]
output_dir = sys.argv[2]
print(f"[📜] 生成字幕 for {audio_path}")
os.makedirs(output_dir, exist_ok=True)
subprocess.run([
"whisper", audio_path,
"--language", "Chinese",
"--output_format", "srt",
"--output_dir", output_dir
])
basename = os.path.splitext(os.path.basename(audio_path))[0]
print(f"[📜] 字幕保存为 {os.path.join(output_dir, f'{basename}.srt')}")
if __name__ == "__main__":
main()
469138946ba5fa
cat << '469138946ba5fa' > modules/merge_video.py
# modules/merge_video.py
import sys
import os
import subprocess
def main():
if len(sys.argv) != 5:
print("用法: python merge_video.py <video_path> <audio_path> <subtitle_path> <output_path>")
sys.exit(1)
video_path = sys.argv[1]
audio_path = sys.argv[2]
subtitle_path = sys.argv[3]
final_path = sys.argv[4]
print(f"[🎬] 合成最终视频: 添加音频 + 字幕")
cmd = [
"ffmpeg", "-y",
"-i", video_path,
"-i", audio_path,
"-filter_complex", "[0:a][1:a]amix=inputs=2:duration=first",
"-vf", f"subtitles='{subtitle_path}':force_style='FontName=Noto Sans SC,FontSize=25'",
final_path
]
subprocess.run(cmd)
print(f"[🎬] 输出最终视频到: {final_path}")
if __name__ == "__main__":
main()
469138946ba5fa
chmod -v a+x run_pipeline.sh
./run_pipeline.sh
效果,额,不知道为什么,我生成的视频都很色情,唉,看来还得优化一下,要不然根本没法发啊,唉😮💨
Comments
Post a Comment