安装 Termux
不另外说明。以下操作均在Termux中操作。
安装 llama.cpp
$ wget https://github.com/ggml-org/llama.cpp/releases/download/b9222/llama-b9222-bin-android-arm64.tar.gz
$ tar -xvf llama-b9222-bin-android-arm64.tar.gz # 解压后应该得到一个 llama-b9222 目录,个人喜欢将它重命名为 llama
$ mv llama-b9222 llama
$ echo 'PATH=$PATH:'`pwd`'/llama' >> ~/.bashrc # 让 llama 的可执行文件可直接执行
$ echo 'export LD_LIBRARY_PATH="'`pwd`'/llama:$LD_LIBRARY_PATH"' >> ~/.bashrc # 让 llama 可以正常连接动态库
$ . ~/.bashrc
$ llama-sever -h # 测试一下
下载大模型
$ mkdir MiniCPM-V-4.6
$ cd MiniCPM-V-4.6
$ wget https://modelscope.cn/models/OpenBMB/MiniCPM-V-4.6-gguf/resolve/master/MiniCPM-V-4_6-Q4_K_M.gguf
$ wget https://modelscope.cn/models/OpenBMB/MiniCPM-V-4.6-gguf/resolve/master/mmproj-model-f16.gguf
安装 ffmpeg
$ pkg upgrade
$ pkg install ffmpeg
下载 python 及前端代码
# app.py
import os
import subprocess
import tempfile
import shutil
import base64
import requests
from pathlib import Path
from flask import Flask, render_template, request, jsonify
app = Flask(__name__)
# ================= 配置区 =================
LLAMA_SERVER_URL = os.environ.get("LLAMA_SERVER_URL", "http://localhost:8080")
DEFAULT_MODEL = os.environ.get("DEFAULT_MODEL", "") # 空则自动选第一个
MAX_FRAMES = 16
FRAME_INTERVAL = 2
# ================= 核心功能 =================
def extract_frames(video_path, output_dir, max_frames=MAX_FRAMES, interval=FRAME_INTERVAL):
"""用 ffmpeg 提取视频帧"""
output_pattern = os.path.join(output_dir, "frame_%04d.jpg")
cmd = [
"ffmpeg", "-i", video_path,
"-vf", f"fps=1/{interval}",
"-frames:v", str(max_frames),
"-q:v", "2",
output_pattern,
"-y"
]
subprocess.run(cmd, capture_output=True, check=True)
frames = sorted(Path(output_dir).glob("frame_*.jpg"))
return [str(f) for f in frames]
def image_to_base64(image_path):
with open(image_path, "rb") as f:
return base64.b64encode(f.read()).decode("utf-8")
def call_llama_api(messages, model_name, max_tokens=512, temperature=0.1):
"""调用 llama-server 的 OpenAI 兼容 API"""
payload = {
"model": model_name,
"messages": messages,
"max_tokens": max_tokens,
"temperature": temperature,
"stream": False,
}
resp = requests.post(
f"{LLAMA_SERVER_URL}/v1/chat/completions",
json=payload,
timeout=120
)
resp.raise_for_status()
data = resp.json()
return data["choices"][0]["message"]["content"]
def analyze_video(video_path, question, model_name):
"""分析视频的主逻辑"""
temp_dir = tempfile.mkdtemp()
try:
print("正在用 ffmpeg 提取视频帧...")
frame_paths = extract_frames(video_path, temp_dir)
if not frame_paths:
return {"error": "未能提取到视频帧"}
print(f"提取到 {len(frame_paths)} 帧")
# 逐帧分析
all_descriptions = []
for i, frame_path in enumerate(frame_paths):
print(f"正在分析第 {i + 1}/{len(frame_paths)} 帧...")
base64_image = image_to_base64(frame_path)
messages = [
{
"role": "user",
"content": [
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
}
},
{
"type": "text",
"text": f"这是视频的第 {i + 1} 帧。请描述画面的关键内容:场景、人物、动作、文字等。"
}
]
}
]
desc = call_llama_api(messages, model_name, max_tokens=256)
all_descriptions.append(f"第{i + 1}帧: {desc}")
# 汇总总结
print("正在汇总分析...")
combined = "\n\n".join(all_descriptions)
summary_messages = [
{
"role": "user",
"content": f"用户问题:{question}\n\n以下是视频不同时间点的画面描述,请综合这些信息,用流畅的中文给出完整回答。\n\n{combined}"
}
]
final_answer = call_llama_api(summary_messages, model_name, max_tokens=1024)
return {
"success": True,
"frames_analyzed": len(frame_paths),
"answer": final_answer,
}
except Exception as e:
import traceback
traceback.print_exc()
return {"error": f"分析失败: {str(e)}"}
finally:
shutil.rmtree(temp_dir, ignore_errors=True)
# ================= API:获取模型列表 =================
@app.route('/api/models')
def get_models():
"""从 llama-server 获取可用模型列表"""
try:
resp = requests.get(f"{LLAMA_SERVER_URL}/v1/models", timeout=5)
resp.raise_for_status()
data = resp.json()
models = [m["id"] for m in data.get("data", [])]
# 确定默认模型
default = DEFAULT_MODEL
if not default and models:
default = models[0]
elif default and default not in models:
default = models[0] if models else ""
return jsonify({
"models": models,
"default": default,
})
except requests.exceptions.RequestException as e:
return jsonify({
"models": [],
"default": "",
"error": f"无法连接 llama-server: {str(e)}"
}), 200 # 返回 200,让前端优雅降级
@app.route('/')
def index():
return render_template('index.html')
@app.route('/analyze', methods=['POST'])
def analyze():
video_file = request.files.get('video')
question = request.form.get('question', '请描述这个视频的内容')
model_name = request.form.get('model', '')
if not video_file:
return jsonify({"error": "请上传视频文件"}), 400
if not model_name:
return jsonify({"error": "请选择模型"}), 400
temp_video = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
try:
video_file.save(temp_video.name)
temp_video.close()
result = analyze_video(temp_video.name, question, model_name)
return jsonify(result)
finally:
if os.path.exists(temp_video.name):
os.unlink(temp_video.name)
if __name__ == '__main__':
print(f"llama-server 地址: {LLAMA_SERVER_URL}")
if DEFAULT_MODEL:
print(f"默认模型: {DEFAULT_MODEL}")
app.run(host='0.0.0.0', port=7860, debug=False)
templates/index.html。
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>视频分析</title>
<style>
* { margin: 0; padding: 0; box-sizing: border-box; }
body {
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
background: linear-gradient(135deg, #1a1a2e 0%, #16213e 50%, #0f3460 100%);
min-height: 100vh;
padding: 20px;
color: #e0e0e0;
}
.container {
max-width: 850px;
margin: 0 auto;
background: rgba(255,255,255,0.05);
backdrop-filter: blur(10px);
border-radius: 20px;
padding: 35px;
border: 1px solid rgba(255,255,255,0.1);
box-shadow: 0 25px 70px rgba(0,0,0,0.4);
}
h1 {
text-align: center;
color: #fff;
margin-bottom: 6px;
font-size: 30px;
letter-spacing: 1px;
}
.subtitle {
text-align: center;
color: #a0a0b8;
margin-bottom: 30px;
font-size: 14px;
}
.server-status {
display: flex;
align-items: center;
justify-content: center;
gap: 8px;
margin-bottom: 20px;
font-size: 13px;
}
.status-dot {
width: 8px;
height: 8px;
border-radius: 50%;
background: #666;
}
.status-dot.connected { background: #4ade80; }
.status-dot.disconnected { background: #f87171; }
.model-select-area {
margin-bottom: 22px;
}
.model-select-area label {
display: block;
margin-bottom: 8px;
font-weight: 600;
color: #d0d0e0;
font-size: 14px;
}
.model-select-area select {
width: 100%;
padding: 12px;
background: rgba(0,0,0,0.3);
border: 1px solid rgba(255,255,255,0.15);
border-radius: 10px;
color: #e0e0e0;
font-size: 14px;
cursor: pointer;
}
.model-select-area select:focus {
outline: none;
border-color: #7c5cfc;
}
.model-select-area select option {
background: #1a1a2e;
color: #e0e0e0;
}
.upload-area {
border: 2px dashed rgba(255,255,255,0.25);
border-radius: 16px;
padding: 45px 30px;
text-align: center;
cursor: pointer;
transition: all 0.3s;
background: rgba(255,255,255,0.03);
margin-bottom: 22px;
}
.upload-area:hover {
border-color: #7c5cfc;
background: rgba(124,92,252,0.08);
}
.upload-area.dragover {
border-color: #7c5cfc;
background: rgba(124,92,252,0.15);
transform: scale(1.01);
}
.upload-icon { font-size: 50px; margin-bottom: 12px; }
.upload-area p { color: #c0c0d0; }
.upload-area .file-name { color: #7c5cfc; font-weight: 600; margin-top: 8px; }
#videoInput { display: none; }
.question-area { margin-bottom: 22px; }
.question-area label {
display: block;
margin-bottom: 10px;
font-weight: 600;
color: #d0d0e0;
font-size: 15px;
}
.preset-questions {
display: flex;
flex-wrap: wrap;
gap: 8px;
margin-bottom: 14px;
}
.preset-btn {
padding: 8px 18px;
background: rgba(124,92,252,0.15);
border: 1px solid rgba(124,92,252,0.4);
border-radius: 20px;
cursor: pointer;
font-size: 13px;
color: #b8b0f0;
transition: all 0.2s;
}
.preset-btn:hover {
background: rgba(124,92,252,0.35);
color: #fff;
border-color: #7c5cfc;
}
.question-area textarea {
width: 100%;
padding: 14px;
background: rgba(0,0,0,0.3);
border: 1px solid rgba(255,255,255,0.15);
border-radius: 10px;
font-size: 14px;
color: #e0e0e0;
resize: vertical;
min-height: 65px;
font-family: inherit;
}
.question-area textarea:focus {
outline: none;
border-color: #7c5cfc;
}
.btn {
width: 100%;
padding: 16px;
background: linear-gradient(135deg, #7c5cfc 0%, #5b3cc4 100%);
color: white;
border: none;
border-radius: 12px;
font-size: 16px;
font-weight: 600;
cursor: pointer;
transition: all 0.3s;
letter-spacing: 1px;
}
.btn:hover:not(:disabled) {
transform: translateY(-2px);
box-shadow: 0 10px 30px rgba(124,92,252,0.4);
}
.btn:disabled { opacity: 0.4; cursor: not-allowed; }
.status {
text-align: center;
margin-top: 18px;
color: #9090a8;
font-size: 14px;
}
.result-area {
margin-top: 30px;
display: none;
}
.result-area.show { display: block; }
.result-area h3 {
color: #d0d0e0;
margin-bottom: 12px;
font-size: 18px;
}
.result-box {
background: rgba(0,0,0,0.35);
border-radius: 12px;
padding: 22px;
white-space: pre-wrap;
word-wrap: break-word;
max-height: 450px;
overflow-y: auto;
border-left: 4px solid #7c5cfc;
line-height: 1.7;
color: #d0d0d0;
font-size: 14px;
}
.result-info {
font-size: 12px;
color: #8080a0;
margin-top: 10px;
}
#videoPreview {
max-width: 100%;
max-height: 300px;
margin: 18px auto;
display: none;
border-radius: 10px;
}
</style>
</head>
<body>
<div class="container">
<h1>🎬 视频分析</h1>
<p class="subtitle">llama.cpp API 模式 · 上传视频即分析</p>
<div class="server-status">
<span class="status-dot" id="statusDot"></span>
<span id="statusText">正在连接 llama-server...</span>
</div>
<div class="model-select-area">
<label>🤖 选择模型</label>
<select id="modelSelect">
<option value="">加载中...</option>
</select>
</div>
<div class="upload-area" id="uploadArea">
<div class="upload-icon">📁</div>
<p>点击或拖拽上传视频文件</p>
<p style="font-size:12px;color:#8080a0;">支持 MP4, AVI, MOV, MKV 等格式</p>
<p class="file-name" id="fileName"></p>
</div>
<input type="file" id="videoInput" accept="video/*">
<video id="videoPreview" controls></video>
<div class="question-area">
<label>📝 你想了解视频的什么信息?</label>
<div class="preset-questions">
<span class="preset-btn" onclick="setQuestion('请详细描述这个视频的内容,包括场景、人物、动作等')">🎯 详细描述</span>
<span class="preset-btn" onclick="setQuestion('这个视频中发生了什么事件?请按时间顺序描述')">⏱️ 发生了什么</span>
<span class="preset-btn" onclick="setQuestion('视频中有哪些物体和人物?他们在做什么?')">🔍 物体和人物</span>
<span class="preset-btn" onclick="setQuestion('用一句话总结这个视频的主要内容')">📝 一句话总结</span>
</div>
<textarea id="question" placeholder="输入你的问题...">请详细描述这个视频的内容,包括场景、人物、动作等</textarea>
</div>
<button class="btn" id="analyzeBtn" disabled>🚀 开始分析</button>
<div class="status" id="status"></div>
<div class="result-area" id="resultArea">
<h3>📊 分析结果</h3>
<div class="result-box" id="resultBox"></div>
<div class="result-info" id="resultInfo"></div>
</div>
</div>
<script>
const uploadArea = document.getElementById('uploadArea');
const videoInput = document.getElementById('videoInput');
const videoPreview = document.getElementById('videoPreview');
const analyzeBtn = document.getElementById('analyzeBtn');
const questionInput = document.getElementById('question');
const statusDiv = document.getElementById('status');
const resultArea = document.getElementById('resultArea');
const resultBox = document.getElementById('resultBox');
const resultInfo = document.getElementById('resultInfo');
const fileNameP = document.getElementById('fileName');
const modelSelect = document.getElementById('modelSelect');
const statusDot = document.getElementById('statusDot');
const statusText = document.getElementById('statusText');
let selectedFile = null;
// ===== 加载模型列表 =====
async function loadModels() {
try {
const resp = await fetch('/api/models');
const data = await resp.json();
if (data.error) {
// 连接失败
statusDot.className = 'status-dot disconnected';
statusText.textContent = 'llama-server 未连接';
modelSelect.innerHTML = '<option value="">无可用模型</option>';
return;
}
statusDot.className = 'status-dot connected';
statusText.textContent = 'llama-server 已连接';
// 填充模型选择框
modelSelect.innerHTML = '';
data.models.forEach(model => {
const opt = document.createElement('option');
opt.value = model;
opt.textContent = model;
if (model === data.default) {
opt.selected = true;
}
modelSelect.appendChild(opt);
});
} catch (err) {
statusDot.className = 'status-dot disconnected';
statusText.textContent = '无法连接后端服务';
modelSelect.innerHTML = '<option value="">连接失败</option>';
}
}
// 页面加载时获取模型列表
loadModels();
// ===== 文件上传逻辑 =====
uploadArea.addEventListener('click', () => videoInput.click());
videoInput.addEventListener('change', (e) => {
if (e.target.files.length > 0) handleFile(e.target.files[0]);
});
uploadArea.addEventListener('dragover', (e) => {
e.preventDefault();
uploadArea.classList.add('dragover');
});
uploadArea.addEventListener('dragleave', () => {
uploadArea.classList.remove('dragover');
});
uploadArea.addEventListener('drop', (e) => {
e.preventDefault();
uploadArea.classList.remove('dragover');
if (e.dataTransfer.files.length > 0) handleFile(e.dataTransfer.files[0]);
});
function handleFile(file) {
if (!file.type.startsWith('video/')) {
alert('请上传视频文件!');
return;
}
selectedFile = file;
analyzeBtn.disabled = false;
fileNameP.textContent = `✅ ${file.name}`;
const url = URL.createObjectURL(file);
videoPreview.src = url;
videoPreview.style.display = 'block';
}
function setQuestion(text) {
questionInput.value = text;
}
// ===== 分析按钮 =====
analyzeBtn.addEventListener('click', async () => {
if (!selectedFile) return;
const modelName = modelSelect.value;
if (!modelName) {
alert('请先选择模型');
return;
}
const formData = new FormData();
formData.append('video', selectedFile);
formData.append('question', questionInput.value);
formData.append('model', modelName);
analyzeBtn.disabled = true;
analyzeBtn.textContent = '⏳ 分析中...';
statusDiv.textContent = '正在处理,请耐心等待...';
resultArea.classList.remove('show');
try {
const response = await fetch('/analyze', {
method: 'POST',
body: formData,
});
const data = await response.json();
if (data.error) {
resultBox.innerHTML = `<span style="color:#ff6b6b;">❌ 错误: ${data.error}</span>`;
statusDiv.textContent = '分析失败';
} else {
resultBox.textContent = data.answer;
resultInfo.textContent = `使用模型: ${modelName} | 共分析了 ${data.frames_analyzed} 帧画面`;
statusDiv.textContent = '✅ 分析完成!';
}
resultArea.classList.add('show');
} catch (err) {
resultBox.innerHTML = `<span style="color:#ff6b6b;">❌ 请求失败: ${err.message}</span>`;
resultArea.classList.add('show');
statusDiv.textContent = '连接错误';
} finally {
analyzeBtn.disabled = false;
analyzeBtn.textContent = '🚀 开始分析';
}
});
</script>
</body>
</html>
安装依赖。
$ pip install flask requests
启动服务
$ nohup llama-server -m MiniCPM-V-4_6-Q4_K_M.gguf -mm mmproj-model-f16.gguf &
$ python app.py