Enter keywords to search...

安卓平板部署本地大模型提供视频分析服务

安装 Termux

不另外说明。以下操作均在Termux中操作。

安装 llama.cpp

$ wget https://github.com/ggml-org/llama.cpp/releases/download/b9222/llama-b9222-bin-android-arm64.tar.gz
$ tar -xvf llama-b9222-bin-android-arm64.tar.gz  # 解压后应该得到一个 llama-b9222 目录,个人喜欢将它重命名为 llama
$ mv llama-b9222 llama
$ echo 'PATH=$PATH:'`pwd`'/llama' >> ~/.bashrc  # 让 llama 的可执行文件可直接执行
$ echo 'export LD_LIBRARY_PATH="'`pwd`'/llama:$LD_LIBRARY_PATH"' >> ~/.bashrc  # 让 llama 可以正常连接动态库
$ . ~/.bashrc
$ llama-sever -h  # 测试一下

下载大模型

$ mkdir MiniCPM-V-4.6
$ cd MiniCPM-V-4.6
$ wget https://modelscope.cn/models/OpenBMB/MiniCPM-V-4.6-gguf/resolve/master/MiniCPM-V-4_6-Q4_K_M.gguf
$ wget https://modelscope.cn/models/OpenBMB/MiniCPM-V-4.6-gguf/resolve/master/mmproj-model-f16.gguf

安装 ffmpeg

$ pkg upgrade
$ pkg install ffmpeg

下载 python 及前端代码

# app.py
import os
import subprocess
import tempfile
import shutil
import base64
import requests
from pathlib import Path
from flask import Flask, render_template, request, jsonify

app = Flask(__name__)

# ================= 配置区 =================
LLAMA_SERVER_URL = os.environ.get("LLAMA_SERVER_URL", "http://localhost:8080")
DEFAULT_MODEL = os.environ.get("DEFAULT_MODEL", "")  # 空则自动选第一个

MAX_FRAMES = 16
FRAME_INTERVAL = 2


# ================= 核心功能 =================
def extract_frames(video_path, output_dir, max_frames=MAX_FRAMES, interval=FRAME_INTERVAL):
    """用 ffmpeg 提取视频帧"""
    output_pattern = os.path.join(output_dir, "frame_%04d.jpg")
    cmd = [
        "ffmpeg", "-i", video_path,
        "-vf", f"fps=1/{interval}",
        "-frames:v", str(max_frames),
        "-q:v", "2",
        output_pattern,
        "-y"
    ]
    subprocess.run(cmd, capture_output=True, check=True)
    frames = sorted(Path(output_dir).glob("frame_*.jpg"))
    return [str(f) for f in frames]


def image_to_base64(image_path):
    with open(image_path, "rb") as f:
        return base64.b64encode(f.read()).decode("utf-8")


def call_llama_api(messages, model_name, max_tokens=512, temperature=0.1):
    """调用 llama-server 的 OpenAI 兼容 API"""
    payload = {
        "model": model_name,
        "messages": messages,
        "max_tokens": max_tokens,
        "temperature": temperature,
        "stream": False,
    }

    resp = requests.post(
        f"{LLAMA_SERVER_URL}/v1/chat/completions",
        json=payload,
        timeout=120
    )
    resp.raise_for_status()
    data = resp.json()
    return data["choices"][0]["message"]["content"]


def analyze_video(video_path, question, model_name):
    """分析视频的主逻辑"""
    temp_dir = tempfile.mkdtemp()
    try:
        print("正在用 ffmpeg 提取视频帧...")
        frame_paths = extract_frames(video_path, temp_dir)
        if not frame_paths:
            return {"error": "未能提取到视频帧"}
        print(f"提取到 {len(frame_paths)} 帧")

        # 逐帧分析
        all_descriptions = []
        for i, frame_path in enumerate(frame_paths):
            print(f"正在分析第 {i + 1}/{len(frame_paths)} 帧...")

            base64_image = image_to_base64(frame_path)
            messages = [
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "image_url",
                            "image_url": {
                                "url": f"data:image/jpeg;base64,{base64_image}"
                            }
                        },
                        {
                            "type": "text",
                            "text": f"这是视频的第 {i + 1} 帧。请描述画面的关键内容:场景、人物、动作、文字等。"
                        }
                    ]
                }
            ]

            desc = call_llama_api(messages, model_name, max_tokens=256)
            all_descriptions.append(f"第{i + 1}帧: {desc}")

        # 汇总总结
        print("正在汇总分析...")
        combined = "\n\n".join(all_descriptions)
        summary_messages = [
            {
                "role": "user",
                "content": f"用户问题:{question}\n\n以下是视频不同时间点的画面描述,请综合这些信息,用流畅的中文给出完整回答。\n\n{combined}"
            }
        ]

        final_answer = call_llama_api(summary_messages, model_name, max_tokens=1024)

        return {
            "success": True,
            "frames_analyzed": len(frame_paths),
            "answer": final_answer,
        }

    except Exception as e:
        import traceback
        traceback.print_exc()
        return {"error": f"分析失败: {str(e)}"}
    finally:
        shutil.rmtree(temp_dir, ignore_errors=True)


# ================= API:获取模型列表 =================
@app.route('/api/models')
def get_models():
    """从 llama-server 获取可用模型列表"""
    try:
        resp = requests.get(f"{LLAMA_SERVER_URL}/v1/models", timeout=5)
        resp.raise_for_status()
        data = resp.json()
        models = [m["id"] for m in data.get("data", [])]

        # 确定默认模型
        default = DEFAULT_MODEL
        if not default and models:
            default = models[0]
        elif default and default not in models:
            default = models[0] if models else ""

        return jsonify({
            "models": models,
            "default": default,
        })
    except requests.exceptions.RequestException as e:
        return jsonify({
            "models": [],
            "default": "",
            "error": f"无法连接 llama-server: {str(e)}"
        }), 200  # 返回 200,让前端优雅降级


@app.route('/')
def index():
    return render_template('index.html')


@app.route('/analyze', methods=['POST'])
def analyze():
    video_file = request.files.get('video')
    question = request.form.get('question', '请描述这个视频的内容')
    model_name = request.form.get('model', '')

    if not video_file:
        return jsonify({"error": "请上传视频文件"}), 400
    if not model_name:
        return jsonify({"error": "请选择模型"}), 400

    temp_video = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
    try:
        video_file.save(temp_video.name)
        temp_video.close()
        result = analyze_video(temp_video.name, question, model_name)
        return jsonify(result)
    finally:
        if os.path.exists(temp_video.name):
            os.unlink(temp_video.name)


if __name__ == '__main__':
    print(f"llama-server 地址: {LLAMA_SERVER_URL}")
    if DEFAULT_MODEL:
        print(f"默认模型: {DEFAULT_MODEL}")

    app.run(host='0.0.0.0', port=7860, debug=False)

templates/index.html

<!DOCTYPE html>
<html lang="zh-CN">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>视频分析</title>
    <style>
        * { margin: 0; padding: 0; box-sizing: border-box; }
        body {
            font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
            background: linear-gradient(135deg, #1a1a2e 0%, #16213e 50%, #0f3460 100%);
            min-height: 100vh;
            padding: 20px;
            color: #e0e0e0;
        }
        .container {
            max-width: 850px;
            margin: 0 auto;
            background: rgba(255,255,255,0.05);
            backdrop-filter: blur(10px);
            border-radius: 20px;
            padding: 35px;
            border: 1px solid rgba(255,255,255,0.1);
            box-shadow: 0 25px 70px rgba(0,0,0,0.4);
        }
        h1 {
            text-align: center;
            color: #fff;
            margin-bottom: 6px;
            font-size: 30px;
            letter-spacing: 1px;
        }
        .subtitle {
            text-align: center;
            color: #a0a0b8;
            margin-bottom: 30px;
            font-size: 14px;
        }
        .server-status {
            display: flex;
            align-items: center;
            justify-content: center;
            gap: 8px;
            margin-bottom: 20px;
            font-size: 13px;
        }
        .status-dot {
            width: 8px;
            height: 8px;
            border-radius: 50%;
            background: #666;
        }
        .status-dot.connected { background: #4ade80; }
        .status-dot.disconnected { background: #f87171; }
        .model-select-area {
            margin-bottom: 22px;
        }
        .model-select-area label {
            display: block;
            margin-bottom: 8px;
            font-weight: 600;
            color: #d0d0e0;
            font-size: 14px;
        }
        .model-select-area select {
            width: 100%;
            padding: 12px;
            background: rgba(0,0,0,0.3);
            border: 1px solid rgba(255,255,255,0.15);
            border-radius: 10px;
            color: #e0e0e0;
            font-size: 14px;
            cursor: pointer;
        }
        .model-select-area select:focus {
            outline: none;
            border-color: #7c5cfc;
        }
        .model-select-area select option {
            background: #1a1a2e;
            color: #e0e0e0;
        }
        .upload-area {
            border: 2px dashed rgba(255,255,255,0.25);
            border-radius: 16px;
            padding: 45px 30px;
            text-align: center;
            cursor: pointer;
            transition: all 0.3s;
            background: rgba(255,255,255,0.03);
            margin-bottom: 22px;
        }
        .upload-area:hover {
            border-color: #7c5cfc;
            background: rgba(124,92,252,0.08);
        }
        .upload-area.dragover {
            border-color: #7c5cfc;
            background: rgba(124,92,252,0.15);
            transform: scale(1.01);
        }
        .upload-icon { font-size: 50px; margin-bottom: 12px; }
        .upload-area p { color: #c0c0d0; }
        .upload-area .file-name { color: #7c5cfc; font-weight: 600; margin-top: 8px; }
        #videoInput { display: none; }
        .question-area { margin-bottom: 22px; }
        .question-area label {
            display: block;
            margin-bottom: 10px;
            font-weight: 600;
            color: #d0d0e0;
            font-size: 15px;
        }
        .preset-questions {
            display: flex;
            flex-wrap: wrap;
            gap: 8px;
            margin-bottom: 14px;
        }
        .preset-btn {
            padding: 8px 18px;
            background: rgba(124,92,252,0.15);
            border: 1px solid rgba(124,92,252,0.4);
            border-radius: 20px;
            cursor: pointer;
            font-size: 13px;
            color: #b8b0f0;
            transition: all 0.2s;
        }
        .preset-btn:hover {
            background: rgba(124,92,252,0.35);
            color: #fff;
            border-color: #7c5cfc;
        }
        .question-area textarea {
            width: 100%;
            padding: 14px;
            background: rgba(0,0,0,0.3);
            border: 1px solid rgba(255,255,255,0.15);
            border-radius: 10px;
            font-size: 14px;
            color: #e0e0e0;
            resize: vertical;
            min-height: 65px;
            font-family: inherit;
        }
        .question-area textarea:focus {
            outline: none;
            border-color: #7c5cfc;
        }
        .btn {
            width: 100%;
            padding: 16px;
            background: linear-gradient(135deg, #7c5cfc 0%, #5b3cc4 100%);
            color: white;
            border: none;
            border-radius: 12px;
            font-size: 16px;
            font-weight: 600;
            cursor: pointer;
            transition: all 0.3s;
            letter-spacing: 1px;
        }
        .btn:hover:not(:disabled) {
            transform: translateY(-2px);
            box-shadow: 0 10px 30px rgba(124,92,252,0.4);
        }
        .btn:disabled { opacity: 0.4; cursor: not-allowed; }
        .status {
            text-align: center;
            margin-top: 18px;
            color: #9090a8;
            font-size: 14px;
        }
        .result-area {
            margin-top: 30px;
            display: none;
        }
        .result-area.show { display: block; }
        .result-area h3 {
            color: #d0d0e0;
            margin-bottom: 12px;
            font-size: 18px;
        }
        .result-box {
            background: rgba(0,0,0,0.35);
            border-radius: 12px;
            padding: 22px;
            white-space: pre-wrap;
            word-wrap: break-word;
            max-height: 450px;
            overflow-y: auto;
            border-left: 4px solid #7c5cfc;
            line-height: 1.7;
            color: #d0d0d0;
            font-size: 14px;
        }
        .result-info {
            font-size: 12px;
            color: #8080a0;
            margin-top: 10px;
        }
        #videoPreview {
            max-width: 100%;
            max-height: 300px;
            margin: 18px auto;
            display: none;
            border-radius: 10px;
        }
    </style>
</head>
<body>
<div class="container">
    <h1>🎬 视频分析</h1>
    <p class="subtitle">llama.cpp API 模式 · 上传视频即分析</p>

    <div class="server-status">
        <span class="status-dot" id="statusDot"></span>
        <span id="statusText">正在连接 llama-server...</span>
    </div>

    <div class="model-select-area">
        <label>🤖 选择模型</label>
        <select id="modelSelect">
            <option value="">加载中...</option>
        </select>
    </div>

    <div class="upload-area" id="uploadArea">
        <div class="upload-icon">📁</div>
        <p>点击或拖拽上传视频文件</p>
        <p style="font-size:12px;color:#8080a0;">支持 MP4, AVI, MOV, MKV 等格式</p>
        <p class="file-name" id="fileName"></p>
    </div>
    <input type="file" id="videoInput" accept="video/*">
    <video id="videoPreview" controls></video>

    <div class="question-area">
        <label>📝 你想了解视频的什么信息?</label>
        <div class="preset-questions">
            <span class="preset-btn" onclick="setQuestion('请详细描述这个视频的内容,包括场景、人物、动作等')">🎯 详细描述</span>
            <span class="preset-btn" onclick="setQuestion('这个视频中发生了什么事件?请按时间顺序描述')">⏱️ 发生了什么</span>
            <span class="preset-btn" onclick="setQuestion('视频中有哪些物体和人物?他们在做什么?')">🔍 物体和人物</span>
            <span class="preset-btn" onclick="setQuestion('用一句话总结这个视频的主要内容')">📝 一句话总结</span>
        </div>
        <textarea id="question" placeholder="输入你的问题...">请详细描述这个视频的内容,包括场景、人物、动作等</textarea>
    </div>

    <button class="btn" id="analyzeBtn" disabled>🚀 开始分析</button>
    <div class="status" id="status"></div>

    <div class="result-area" id="resultArea">
        <h3>📊 分析结果</h3>
        <div class="result-box" id="resultBox"></div>
        <div class="result-info" id="resultInfo"></div>
    </div>
</div>

<script>
    const uploadArea = document.getElementById('uploadArea');
    const videoInput = document.getElementById('videoInput');
    const videoPreview = document.getElementById('videoPreview');
    const analyzeBtn = document.getElementById('analyzeBtn');
    const questionInput = document.getElementById('question');
    const statusDiv = document.getElementById('status');
    const resultArea = document.getElementById('resultArea');
    const resultBox = document.getElementById('resultBox');
    const resultInfo = document.getElementById('resultInfo');
    const fileNameP = document.getElementById('fileName');
    const modelSelect = document.getElementById('modelSelect');
    const statusDot = document.getElementById('statusDot');
    const statusText = document.getElementById('statusText');

    let selectedFile = null;

    // ===== 加载模型列表 =====
    async function loadModels() {
        try {
            const resp = await fetch('/api/models');
            const data = await resp.json();

            if (data.error) {
                // 连接失败
                statusDot.className = 'status-dot disconnected';
                statusText.textContent = 'llama-server 未连接';
                modelSelect.innerHTML = '<option value="">无可用模型</option>';
                return;
            }

            statusDot.className = 'status-dot connected';
            statusText.textContent = 'llama-server 已连接';

            // 填充模型选择框
            modelSelect.innerHTML = '';
            data.models.forEach(model => {
                const opt = document.createElement('option');
                opt.value = model;
                opt.textContent = model;
                if (model === data.default) {
                    opt.selected = true;
                }
                modelSelect.appendChild(opt);
            });

        } catch (err) {
            statusDot.className = 'status-dot disconnected';
            statusText.textContent = '无法连接后端服务';
            modelSelect.innerHTML = '<option value="">连接失败</option>';
        }
    }

    // 页面加载时获取模型列表
    loadModels();

    // ===== 文件上传逻辑 =====
    uploadArea.addEventListener('click', () => videoInput.click());

    videoInput.addEventListener('change', (e) => {
        if (e.target.files.length > 0) handleFile(e.target.files[0]);
    });

    uploadArea.addEventListener('dragover', (e) => {
        e.preventDefault();
        uploadArea.classList.add('dragover');
    });

    uploadArea.addEventListener('dragleave', () => {
        uploadArea.classList.remove('dragover');
    });

    uploadArea.addEventListener('drop', (e) => {
        e.preventDefault();
        uploadArea.classList.remove('dragover');
        if (e.dataTransfer.files.length > 0) handleFile(e.dataTransfer.files[0]);
    });

    function handleFile(file) {
        if (!file.type.startsWith('video/')) {
            alert('请上传视频文件!');
            return;
        }
        selectedFile = file;
        analyzeBtn.disabled = false;
        fileNameP.textContent = `✅ ${file.name}`;

        const url = URL.createObjectURL(file);
        videoPreview.src = url;
        videoPreview.style.display = 'block';
    }

    function setQuestion(text) {
        questionInput.value = text;
    }

    // ===== 分析按钮 =====
    analyzeBtn.addEventListener('click', async () => {
        if (!selectedFile) return;

        const modelName = modelSelect.value;
        if (!modelName) {
            alert('请先选择模型');
            return;
        }

        const formData = new FormData();
        formData.append('video', selectedFile);
        formData.append('question', questionInput.value);
        formData.append('model', modelName);

        analyzeBtn.disabled = true;
        analyzeBtn.textContent = '⏳ 分析中...';
        statusDiv.textContent = '正在处理,请耐心等待...';
        resultArea.classList.remove('show');

        try {
            const response = await fetch('/analyze', {
                method: 'POST',
                body: formData,
            });

            const data = await response.json();

            if (data.error) {
                resultBox.innerHTML = `<span style="color:#ff6b6b;">❌ 错误: ${data.error}</span>`;
                statusDiv.textContent = '分析失败';
            } else {
                resultBox.textContent = data.answer;
                resultInfo.textContent = `使用模型: ${modelName} | 共分析了 ${data.frames_analyzed} 帧画面`;
                statusDiv.textContent = '✅ 分析完成!';
            }
            resultArea.classList.add('show');
        } catch (err) {
            resultBox.innerHTML = `<span style="color:#ff6b6b;">❌ 请求失败: ${err.message}</span>`;
            resultArea.classList.add('show');
            statusDiv.textContent = '连接错误';
        } finally {
            analyzeBtn.disabled = false;
            analyzeBtn.textContent = '🚀 开始分析';
        }
    });
</script>
</body>
</html>

安装依赖。

$ pip install flask requests

启动服务

$ nohup llama-server -m MiniCPM-V-4_6-Q4_K_M.gguf -mm mmproj-model-f16.gguf &
$ python app.py