字节码缓存提升程序性能的原理浅谈

2025/04/29 编译原理

Java这类语言,编译器会把代码预先编译为字节码,JVM直接解释执行这些字节码,程序就可以运行起来了。PHP使用者在性能优化时可能也会遇到OPcache这个名词,所谓OPcache其实就是opcodecacheopcode其实类似于字节码,虽然本质上有点差异,但可以类比为同一种事物。

PHP在开启OPcache扩展后,传统的PHP程序运行速度会提升很多,究其原因,就是OPcache扩展会将代码编译中间结果opcode进行缓存,而php-fpm的运行模式又是每次请求都会重新解释代码,也就是每次请求都会省略了前面的编译过程,所以单位时间内就能节省很多实际运行时间。与之相反,php-cli运行模式下的程序,OPcache对其起到的作用不大,因为php-cli下,程序由于常驻内存而只需要解释一次执行多次。

代码

以下是一个用PHP写的小型编译器。

<?php

class Token
{
    public string $type;
    public mixed $value;

    public function __construct($type, $value = null)
    {
        $this->type = $type;
        $this->value = $value;
    }
}

class TokenType
{
    const T_INT = 'INT';
    const T_PRINT = 'PRINT';
    const T_IF = 'IF';
    const T_ELSE = 'ELSE';
    const T_WHILE = 'WHILE';
    const T_FUNCTION = 'FUNCTION';
    const T_RETURN = 'RETURN';
    const T_IDENTIFIER = 'IDENTIFIER';
    const T_NUMBER = 'NUMBER';
    const T_PLUS = 'PLUS';
    const T_MINUS = 'MINUS';
    const T_MULTIPLY = 'MULTIPLY';
    const T_DIVIDE = 'DIVIDE';
    const T_LPAREN = 'LPAREN';
    const T_RPAREN = 'RPAREN';
    const T_LBRACE = 'LBRACE';
    const T_RBRACE = 'RBRACE';
    const T_SEMICOLON = 'SEMICOLON';
    const T_COMMA = 'COMMA';
    const T_ASSIGN = 'ASSIGN';
    const T_LT = 'LT';
    const T_GT = 'GT';
    const T_EQ = 'EQ';
    const T_NEQ = 'NEQ';
    const T_LE = 'LE';
    const T_GE = 'GE';
    const T_AND = 'AND';
    const T_OR = 'OR';
    const T_NOT = 'NOT';
    const T_EOF = 'EOF';
}

class Lexer
{
    private string $source;
    private int $pos;
    private ?string $currentChar;

    public function __construct($source)
    {
        $this->source = $source;
        $this->pos = 0;
        $this->currentChar = $source[0] ?? null;
    }

    private function advance(): void
    {
        $this->pos++;
        if ($this->pos < strlen($this->source)) {
            $this->currentChar = $this->source[$this->pos];
        } else {
            $this->currentChar = null;
        }
    }

    private function skipWhitespace(): void
    {
        while (ctype_space($this->currentChar)) {
            $this->advance();
        }
    }

    private function number(): Token
    {
        $result = '';
        while (ctype_digit($this->currentChar)) {
            $result .= $this->currentChar;
            $this->advance();
        }
        return new Token(TokenType::T_NUMBER, intval($result));
    }

    private function identifier(): Token
    {
        $result = '';
        while ((ctype_alpha($this->currentChar) || ctype_digit($this->currentChar) || $this->currentChar == '_')) {
            $result .= $this->currentChar;
            $this->advance();
        }

        $keywords = [
            'print' => TokenType::T_PRINT,
            'if' => TokenType::T_IF,
            'else' => TokenType::T_ELSE,
            'while' => TokenType::T_WHILE,
            'function' => TokenType::T_FUNCTION,
            'return' => TokenType::T_RETURN,
        ];
        if (isset($keywords[$result])) {
            return new Token($keywords[$result]);
        }
        return new Token(TokenType::T_IDENTIFIER, $result);
    }

    public function getNextToken(): Token
    {
        while ($this->currentChar !== null) {
            if (ctype_space($this->currentChar)) {
                $this->skipWhitespace();
                continue;
            }

            if (ctype_alpha($this->currentChar) || $this->currentChar == '_') {
                return $this->identifier();
            }

            if (ctype_digit($this->currentChar)) {
                return $this->number();
            }

            if ($this->currentChar == '+') {
                $this->advance();
                return new Token(TokenType::T_PLUS);
            }
            if ($this->currentChar == '-') {
                $this->advance();
                return new Token(TokenType::T_MINUS);
            }
            if ($this->currentChar == '*') {
                $this->advance();
                return new Token(TokenType::T_MULTIPLY);
            }
            if ($this->currentChar == '/') {
                $this->advance();
                return new Token(TokenType::T_DIVIDE);
            }
            if ($this->currentChar == '(') {
                $this->advance();
                return new Token(TokenType::T_LPAREN);
            }
            if ($this->currentChar == ')') {
                $this->advance();
                return new Token(TokenType::T_RPAREN);
            }
            if ($this->currentChar == '{') {
                $this->advance();
                return new Token(TokenType::T_LBRACE);
            }
            if ($this->currentChar == '}') {
                $this->advance();
                return new Token(TokenType::T_RBRACE);
            }
            if ($this->currentChar == ';') {
                $this->advance();
                return new Token(TokenType::T_SEMICOLON);
            }
            if ($this->currentChar == ',') {
                $this->advance();
                return new Token(TokenType::T_COMMA);
            }
            if ($this->currentChar == '!') {
                $this->advance();
                if ($this->currentChar == '=') {
                    $this->advance();
                    return new Token(TokenType::T_NEQ);
                }
                return new Token(TokenType::T_NOT);
            }
            if ($this->currentChar == '=') {
                $this->advance();
                if ($this->currentChar == '=') {
                    $this->advance();
                    return new Token(TokenType::T_EQ);
                }
                return new Token(TokenType::T_ASSIGN);
            }
            if ($this->currentChar == '<') {
                $this->advance();
                if ($this->currentChar == '=') {
                    $this->advance();
                    return new Token(TokenType::T_LE);
                }
                return new Token(TokenType::T_LT);
            }
            if ($this->currentChar == '>') {
                $this->advance();
                if ($this->currentChar == '=') {
                    $this->advance();
                    return new Token(TokenType::T_GE);
                }
                return new Token(TokenType::T_GT);
            }
            if ($this->currentChar == '&') {
                $this->advance();
                if ($this->currentChar == '&') {
                    $this->advance();
                    return new Token(TokenType::T_AND);
                }
            }
            if ($this->currentChar == '|') {
                $this->advance();
                if ($this->currentChar == '|') {
                    $this->advance();
                    return new Token(TokenType::T_OR);
                }
            }

            throw new Exception("无法识别的字符: " . $this->currentChar);
        }

        return new Token(TokenType::T_EOF);
    }
}

abstract class ASTNode
{
}

class Program extends ASTNode
{
    public array $statements;

    public function __construct($statements)
    {
        $this->statements = $statements;
    }
}

class Block extends ASTNode implements Statement
{
    public array $statements;

    public function __construct($statements)
    {
        $this->statements = $statements;
    }
}

interface Statement
{
}

class PrintStatement extends ASTNode implements Statement
{
    public Expression $expression;

    public function __construct($expression)
    {
        $this->expression = $expression;
    }
}

class IfStatement extends ASTNode implements Statement
{
    public Expression $condition;
    public Statement $thenBranch;
    public Statement $elseBranch;

    public function __construct($condition, $thenBranch, $elseBranch = null)
    {
        $this->condition = $condition;
        $this->thenBranch = $thenBranch;
        $this->elseBranch = $elseBranch;
    }
}

class WhileStatement extends ASTNode implements Statement
{
    public Expression $condition;
    public Statement $body;

    public function __construct($condition, $body)
    {
        $this->condition = $condition;
        $this->body = $body;
    }
}

class FunctionDeclaration extends ASTNode implements Statement
{
    public string $name;
    public array $params;
    public Statement $body;

    public function __construct($name, $params, $body)
    {
        $this->name = $name;
        $this->params = $params;
        $this->body = $body;
    }
}

class ReturnStatement extends ASTNode implements Statement
{
    public Expression $expression;

    public function __construct($expression)
    {
        $this->expression = $expression;
    }
}

class ExpressionStatement extends ASTNode implements Statement
{
    public Expression $expression;

    public function __construct($expression)
    {
        $this->expression = $expression;
    }
}

interface Expression
{
}

class Assignment extends ASTNode implements Expression
{
    public Variable $variable;
    public Expression $expression;

    public function __construct($variable, $expression)
    {
        $this->variable = $variable;
        $this->expression = $expression;
    }
}

class BinaryExpression extends ASTNode implements Expression
{
    public Expression $left;
    public string $operator;
    public Expression $right;

    public function __construct($left, $operator, $right)
    {
        $this->left = $left;
        $this->operator = $operator;
        $this->right = $right;
    }
}

class UnaryExpression extends ASTNode implements Expression
{
    public string $operator;
    public Expression $expression;

    public function __construct($operator, $expression)
    {
        $this->operator = $operator;
        $this->expression = $expression;
    }
}

class Literal extends ASTNode implements Expression
{
    public mixed $value;

    public function __construct($value)
    {
        $this->value = $value;
    }
}

class Variable extends ASTNode implements Expression
{
    public string $name;

    public function __construct($name)
    {
        $this->name = $name;
    }
}

class FunctionCall extends ASTNode implements Expression
{
    public string $name;
    public array $arguments;

    public function __construct($name, $arguments)
    {
        $this->name = $name;
        $this->arguments = $arguments;
    }
}

class Parser
{
    private Lexer $lexer;
    private Token $currentToken;

    public function __construct($lexer)
    {
        $this->lexer = $lexer;
        $this->currentToken = $this->lexer->getNextToken();
    }

    private function eat(string $tokenType): void
    {
        if ($this->currentToken->type == $tokenType) {
            $this->currentToken = $this->lexer->getNextToken();
        } else {
            throw new Exception("期望 " . $tokenType . ",但获得 " . $this->currentToken->type);
        }
    }

    public function parse(): Program
    {
        $statements = [];
        while ($this->currentToken->type != TokenType::T_EOF) {
            $statements[] = $this->statement();
        }
        return new Program($statements);
    }

    private function statement(): Statement
    {
        return match ($this->currentToken->type) {
            TokenType::T_PRINT => $this->printStatement(),
            TokenType::T_IF => $this->ifStatement(),
            TokenType::T_WHILE => $this->whileStatement(),
            TokenType::T_FUNCTION => $this->functionDeclaration(),
            TokenType::T_RETURN => $this->returnStatement(),
            TokenType::T_LBRACE => $this->block(),
            default => $this->expressionStatement(),
        };
    }

    private function block(): Block
    {
        $this->eat(TokenType::T_LBRACE);
        $statements = [];
        while ($this->currentToken->type != TokenType::T_RBRACE) {
            $statements[] = $this->statement();
        }
        $this->eat(TokenType::T_RBRACE);
        return new Block($statements);
    }

    private function printStatement(): PrintStatement
    {
        $this->eat(TokenType::T_PRINT);
        $this->eat(TokenType::T_LPAREN);
        $expr = $this->expression();
        $this->eat(TokenType::T_RPAREN);
        $this->eat(TokenType::T_SEMICOLON);
        return new PrintStatement($expr);
    }

    private function ifStatement(): IfStatement
    {
        $this->eat(TokenType::T_IF);
        $this->eat(TokenType::T_LPAREN);
        $condition = $this->expression();
        $this->eat(TokenType::T_RPAREN);
        $thenBranch = $this->statement();
        $elseBranch = null;
        if ($this->currentToken->type == TokenType::T_ELSE) {
            $this->eat(TokenType::T_ELSE);
            $elseBranch = $this->statement();
        }
        return new IfStatement($condition, $thenBranch, $elseBranch);
    }

    private function whileStatement(): WhileStatement
    {
        $this->eat(TokenType::T_WHILE);
        $this->eat(TokenType::T_LPAREN);
        $condition = $this->expression();
        $this->eat(TokenType::T_RPAREN);
        $body = $this->statement();
        return new WhileStatement($condition, $body);
    }

    private function functionDeclaration(): FunctionDeclaration
    {
        $this->eat(TokenType::T_FUNCTION);
        $name = $this->currentToken->value;
        $this->eat(TokenType::T_IDENTIFIER);
        $this->eat(TokenType::T_LPAREN);
        $params = [];
        if ($this->currentToken->type != TokenType::T_RPAREN) {
            $params[] = $this->currentToken->value;
            $this->eat(TokenType::T_IDENTIFIER);
            while ($this->currentToken->type == TokenType::T_COMMA) {
                $this->eat(TokenType::T_COMMA);
                $params[] = $this->currentToken->value;
                $this->eat(TokenType::T_IDENTIFIER);
            }
        }
        $this->eat(TokenType::T_RPAREN);
        $body = $this->block();
        return new FunctionDeclaration($name, $params, $body);
    }

    private function returnStatement(): ReturnStatement
    {
        $this->eat(TokenType::T_RETURN);
        $expr = null;
        if ($this->currentToken->type != TokenType::T_SEMICOLON) {
            $expr = $this->expression();
        }
        $this->eat(TokenType::T_SEMICOLON);
        return new ReturnStatement($expr);
    }

    private function expressionStatement(): ExpressionStatement
    {
        $expr = $this->expression();
        $this->eat(TokenType::T_SEMICOLON);
        return new ExpressionStatement($expr);
    }

    private function expression(): Expression
    {
        return $this->assignment();
    }

    private function assignment(): Expression
    {
        $expr = $this->logicalOr();
        if ($this->currentToken->type == TokenType::T_ASSIGN) {
            if (!($expr instanceof Variable)) {
                throw new Exception("赋值目标无效");
            }
            $this->eat(TokenType::T_ASSIGN);
            $value = $this->assignment();
            return new Assignment($expr, $value);
        }
        return $expr;
    }

    private function logicalOr(): Expression
    {
        $expr = $this->logicalAnd();
        while ($this->currentToken->type == TokenType::T_OR) {
            $operator = $this->currentToken->type;
            $this->eat(TokenType::T_OR);
            $right = $this->logicalAnd();
            $expr = new BinaryExpression($expr, $operator, $right);
        }
        return $expr;
    }

    private function logicalAnd(): Expression
    {
        $expr = $this->equality();
        while ($this->currentToken->type == TokenType::T_AND) {
            $operator = $this->currentToken->type;
            $this->eat(TokenType::T_AND);
            $right = $this->equality();
            $expr = new BinaryExpression($expr, $operator, $right);
        }
        return $expr;
    }

    private function equality(): Expression
    {
        $expr = $this->comparison();
        while ($this->currentToken->type == TokenType::T_EQ || $this->currentToken->type == TokenType::T_NEQ) {
            $operator = $this->currentToken->type;
            if ($operator == TokenType::T_EQ) {
                $this->eat(TokenType::T_EQ);
            } else {
                $this->eat(TokenType::T_NEQ);
            }
            $right = $this->comparison();
            $expr = new BinaryExpression($expr, $operator, $right);
        }
        return $expr;
    }

    private function comparison(): Expression
    {
        $expr = $this->addition();
        while (in_array($this->currentToken->type, [TokenType::T_LT, TokenType::T_GT, TokenType::T_LE, TokenType::T_GE])) {
            $operator = $this->currentToken->type;
            $this->eat($operator);
            $right = $this->addition();
            $expr = new BinaryExpression($expr, $operator, $right);
        }
        return $expr;
    }

    private function addition(): Expression
    {
        $expr = $this->multiplication();
        while ($this->currentToken->type == TokenType::T_PLUS || $this->currentToken->type == TokenType::T_MINUS) {
            $operator = $this->currentToken->type;
            if ($operator == TokenType::T_PLUS) {
                $this->eat(TokenType::T_PLUS);
            } else {
                $this->eat(TokenType::T_MINUS);
            }
            $right = $this->multiplication();
            $expr = new BinaryExpression($expr, $operator, $right);
        }
        return $expr;
    }

    private function multiplication(): Expression
    {
        $expr = $this->unary();
        while ($this->currentToken->type == TokenType::T_MULTIPLY || $this->currentToken->type == TokenType::T_DIVIDE) {
            $operator = $this->currentToken->type;
            if ($operator == TokenType::T_MULTIPLY) {
                $this->eat(TokenType::T_MULTIPLY);
            } else {
                $this->eat(TokenType::T_DIVIDE);
            }
            $right = $this->unary();
            $expr = new BinaryExpression($expr, $operator, $right);
        }
        return $expr;
    }

    private function unary(): Expression
    {
        if ($this->currentToken->type == TokenType::T_NOT || $this->currentToken->type == TokenType::T_MINUS) {
            $operator = $this->currentToken->type;
            $this->eat($operator);
            $expr = $this->unary();
            return new UnaryExpression($operator, $expr);
        }
        return $this->primary();
    }

    private function primary(): Expression
    {
        $token = $this->currentToken;
        if ($token->type == TokenType::T_NUMBER) {
            $this->eat(TokenType::T_NUMBER);
            return new Literal($token->value);
        }
        if ($token->type == TokenType::T_IDENTIFIER) {
            $this->eat(TokenType::T_IDENTIFIER);

            if ($this->currentToken->type == TokenType::T_LPAREN) {
                $this->eat(TokenType::T_LPAREN);
                $args = [];
                if ($this->currentToken->type != TokenType::T_RPAREN) {
                    $args[] = $this->expression();
                    while ($this->currentToken->type == TokenType::T_COMMA) {
                        $this->eat(TokenType::T_COMMA);
                        $args[] = $this->expression();
                    }
                }
                $this->eat(TokenType::T_RPAREN);
                return new FunctionCall($token->value, $args);
            }
            return new Variable($token->value);
        }
        if ($token->type == TokenType::T_LPAREN) {
            $this->eat(TokenType::T_LPAREN);
            $expr = $this->expression();
            $this->eat(TokenType::T_RPAREN);
            return $expr;
        }
        throw new Exception("无法识别的 token: " . $token->type);
    }
}

class OpCode
{
    const ICONST = 'ICONST';
    const LOAD = 'LOAD';
    const STORE = 'STORE';
    const PRINT = 'PRINT';
    const ADD = 'ADD';
    const SUB = 'SUB';
    const MUL = 'MUL';
    const DIV = 'DIV';
    const LT = 'LT';
    const GT = 'GT';
    const EQ = 'EQ';
    const NE = 'NE';
    const LE = 'LE';
    const GE = 'GE';
    const AND = 'AND';
    const OR = 'OR';
    const NOT = 'NOT';
    const JMP = 'JMP';
    const JMPF = 'JMPF';
    const CALL = 'CALL';
    const RET = 'RET';
    const POP = 'POP';
}

class CodeGenerator
{
    private array $instructions = [];
    private array $functions = [];

    public function generate($node): array
    {
        if ($node instanceof Program) {
            $funcDecls = [];
            $mainStmts = [];
            foreach ($node->statements as $stmt) {
                if ($stmt instanceof FunctionDeclaration) {
                    $funcDecls[] = $stmt;
                } else {
                    $mainStmts[] = $stmt;
                }
            }

            foreach ($mainStmts as $stmt) {
                $this->genStatement($stmt);
            }

            $jumpPos = $this->emit(OpCode::JMP, 0);
            $funcStart = count($this->instructions);

            foreach ($funcDecls as $func) {
                $funcName = $func->name;
                $entryPoint = count($this->instructions);
                $this->functions[$funcName] = ['params' => $func->params, 'entry' => $entryPoint];
                $this->genStatement($func->body);
                $this->emit(OpCode::RET);
            }

            $this->patch($jumpPos, count($this->instructions));
            return $this->instructions;
        } else {
            throw new Exception("无效的 AST 节点");
        }
    }

    private function genStatement($node): void
    {
        if ($node instanceof PrintStatement) {
            $this->genExpression($node->expression);
            $this->emit(OpCode::PRINT);
        } elseif ($node instanceof ExpressionStatement) {
            $this->genExpression($node->expression);
            $this->emit(OpCode::POP);
        } elseif ($node instanceof Assignment) {
            $this->genExpression($node->expression);
            $this->emit(OpCode::STORE, $node->variable->name);
        } elseif ($node instanceof IfStatement) {
            $this->genExpression($node->condition);
            $jmpFalsePos = $this->emit(OpCode::JMPF, 0);
            $this->genStatement($node->thenBranch);
            if ($node->elseBranch !== null) {
                $jmpEndPos = $this->emit(OpCode::JMP, 0);
                $this->patch($jmpFalsePos, count($this->instructions));
                $this->genStatement($node->elseBranch);
                $this->patch($jmpEndPos, count($this->instructions));
            } else {
                $this->patch($jmpFalsePos, count($this->instructions));
            }
        } elseif ($node instanceof WhileStatement) {
            $startPos = count($this->instructions);
            $this->genExpression($node->condition);
            $jmpFalsePos = $this->emit(OpCode::JMPF, 0);
            $this->genStatement($node->body);
            $this->emit(OpCode::JMP, $startPos);
            $this->patch($jmpFalsePos, count($this->instructions));
        } elseif ($node instanceof Block) {
            foreach ($node->statements as $stmt) {
                $this->genStatement($stmt);
            }
        } elseif ($node instanceof ReturnStatement) {
            if ($node->expression !== null) {
                $this->genExpression($node->expression);
            } else {
                $this->emit(OpCode::ICONST, 0);
            }
            $this->emit(OpCode::RET);
        } else {
            throw new Exception("未知的语句类型");
        }
    }

    private function genExpression($node): void
    {
        if ($node instanceof Literal) {
            $this->emit(OpCode::ICONST, $node->value);
        } elseif ($node instanceof Variable) {
            $this->emit(OpCode::LOAD, $node->name);
        } elseif ($node instanceof Assignment) {
            $this->genExpression($node->expression);
            $this->emit(OpCode::STORE, $node->variable->name);
        } elseif ($node instanceof BinaryExpression) {
            $this->genExpression($node->left);
            $this->genExpression($node->right);
            switch ($node->operator) {
                case TokenType::T_PLUS:
                    $this->emit(OpCode::ADD);
                    break;
                case TokenType::T_MINUS:
                    $this->emit(OpCode::SUB);
                    break;
                case TokenType::T_MULTIPLY:
                    $this->emit(OpCode::MUL);
                    break;
                case TokenType::T_DIVIDE:
                    $this->emit(OpCode::DIV);
                    break;
                case TokenType::T_LT:
                    $this->emit(OpCode::LT);
                    break;
                case TokenType::T_GT:
                    $this->emit(OpCode::GT);
                    break;
                case TokenType::T_EQ:
                    $this->emit(OpCode::EQ);
                    break;
                case TokenType::T_NEQ:
                    $this->emit(OpCode::NE);
                    break;
                case TokenType::T_LE:
                    $this->emit(OpCode::LE);
                    break;
                case TokenType::T_GE:
                    $this->emit(OpCode::GE);
                    break;
                case TokenType::T_AND:
                    $this->emit(OpCode::AND);
                    break;
                case TokenType::T_OR:
                    $this->emit(OpCode::OR);
                    break;
                default:
                    throw new Exception("未知的二元运算符: " . $node->operator);
            }
        } elseif ($node instanceof UnaryExpression) {
            if ($node->operator == TokenType::T_MINUS) {
                $this->emit(OpCode::ICONST, 0);
                $this->genExpression($node->expression);
                $this->emit(OpCode::SUB);
            } elseif ($node->operator == TokenType::T_NOT) {
                $this->genExpression($node->expression);
                $this->emit(OpCode::NOT);
            } else {
                throw new Exception("未知的一元运算符: " . $node->operator);
            }
        } elseif ($node instanceof FunctionCall) {
            foreach ($node->arguments as $arg) {
                $this->genExpression($arg);
            }

            $this->emit(OpCode::CALL, ['name' => $node->name, 'argc' => count($node->arguments)]);
        } else {
            throw new Exception("未知的表达式类型");
        }
    }

    private function emit($opcode, $operand = null): int
    {
        $instr = ['op' => $opcode];
        if ($operand !== null) {
            $instr['operand'] = $operand;
        }
        $this->instructions[] = $instr;
        return count($this->instructions) - 1;
    }

    private function patch($pos, $target): void
    {
        $this->instructions[$pos]['operand'] = $target;
    }

    public function getFunctions(): array
    {
        return $this->functions;
    }
}

class VirtualMachine
{
    private array $instructions;
    private array $functions;
    private int $ip;
    private array $stack;
    private array $globals;
    private array $callStack;

    public function __construct($instructions, $functions)
    {
        $this->instructions = $instructions;
        $this->functions = $functions;
        $this->ip = 0;
        $this->stack = [];
        $this->globals = [];
        $this->callStack = [];
    }

    public function run()
    {
        while ($this->ip < count($this->instructions)) {
            $instr = $this->instructions[$this->ip];
            switch ($instr['op']) {
                case OpCode::ICONST:
                    $this->stack[] = $instr['operand'];
                    $this->ip++;
                    break;
                case OpCode::LOAD:
                    $var = $instr['operand'];
                    $this->stack[] = $this->globals[$var] ?? 0;
                    $this->ip++;
                    break;
                case OpCode::STORE:
                    $var = $instr['operand'];
                    $value = array_pop($this->stack);
                    $this->globals[$var] = $value;
                    $this->ip++;
                    break;
                case OpCode::PRINT:
                    $value = array_pop($this->stack);
                    echo $value . "\n";
                    $this->ip++;
                    break;
                case OpCode::ADD:
                    $b = array_pop($this->stack);
                    $a = array_pop($this->stack);
                    $this->stack[] = $a + $b;
                    $this->ip++;
                    break;
                case OpCode::SUB:
                    $b = array_pop($this->stack);
                    $a = array_pop($this->stack);
                    $this->stack[] = $a - $b;
                    $this->ip++;
                    break;
                case OpCode::MUL:
                    $b = array_pop($this->stack);
                    $a = array_pop($this->stack);
                    $this->stack[] = $a * $b;
                    $this->ip++;
                    break;
                case OpCode::DIV:
                    $b = array_pop($this->stack);
                    $a = array_pop($this->stack);
                    $this->stack[] = intval($a / $b);
                    $this->ip++;
                    break;
                case OpCode::LT:
                    $b = array_pop($this->stack);
                    $a = array_pop($this->stack);
                    $this->stack[] = ($a < $b) ? 1 : 0;
                    $this->ip++;
                    break;
                case OpCode::GT:
                    $b = array_pop($this->stack);
                    $a = array_pop($this->stack);
                    $this->stack[] = ($a > $b) ? 1 : 0;
                    $this->ip++;
                    break;
                case OpCode::EQ:
                    $b = array_pop($this->stack);
                    $a = array_pop($this->stack);
                    $this->stack[] = ($a == $b) ? 1 : 0;
                    $this->ip++;
                    break;
                case OpCode::NE:
                    $b = array_pop($this->stack);
                    $a = array_pop($this->stack);
                    $this->stack[] = ($a != $b) ? 1 : 0;
                    $this->ip++;
                    break;
                case OpCode::LE:
                    $b = array_pop($this->stack);
                    $a = array_pop($this->stack);
                    $this->stack[] = ($a <= $b) ? 1 : 0;
                    $this->ip++;
                    break;
                case OpCode::GE:
                    $b = array_pop($this->stack);
                    $a = array_pop($this->stack);
                    $this->stack[] = ($a >= $b) ? 1 : 0;
                    $this->ip++;
                    break;
                case OpCode::AND:
                    $b = array_pop($this->stack);
                    $a = array_pop($this->stack);
                    $this->stack[] = ($a && $b) ? 1 : 0;
                    $this->ip++;
                    break;
                case OpCode::OR:
                    $b = array_pop($this->stack);
                    $a = array_pop($this->stack);
                    $this->stack[] = ($a || $b) ? 1 : 0;
                    $this->ip++;
                    break;
                case OpCode::NOT:
                    $a = array_pop($this->stack);
                    $this->stack[] = (!$a) ? 1 : 0;
                    $this->ip++;
                    break;
                case OpCode::JMP:
                    $this->ip = $instr['operand'];
                    break;
                case OpCode::JMPF:
                    $cond = array_pop($this->stack);
                    if (!$cond) {
                        $this->ip = $instr['operand'];
                    } else {
                        $this->ip++;
                    }
                    break;
                case OpCode::CALL:
                    $funcInfo = $instr['operand'];
                    $funcName = $funcInfo['name'];
                    $argc = $funcInfo['argc'];
                    if (!isset($this->functions[$funcName])) {
                        throw new Exception("未定义函数: " . $funcName);
                    }
                    $fn = $this->functions[$funcName];

                    $this->callStack[] = [$this->ip + 1, $this->globals];

                    $args = [];
                    for ($i = 0; $i < $argc; $i++) {
                        array_unshift($args, array_pop($this->stack));
                    }

                    $this->globals = [];
                    $params = $fn['params'];
                    for ($i = 0; $i < count($params); $i++) {
                        $this->globals[$params[$i]] = $args[$i] ?? 0;
                    }
                    $this->ip = $fn['entry'];
                    break;
                case OpCode::RET:
                    $retValue = array_pop($this->stack);
                    if (empty($this->callStack)) {
                        return $retValue;
                    }

                    list($this->ip, $prevGlobals) = array_pop($this->callStack);
                    $this->globals = $prevGlobals;
                    $this->stack[] = $retValue;
                    break;
                case OpCode::POP:
                    array_pop($this->stack);
                    $this->ip++;
                    break;
                default:
                    throw new Exception("未知的操作码: " . $instr['op']);
            }
        }
    }
}


// 主程序部分

$source = <<<'EOT'
print(1 + 2 * 3);
print(1 + 2 * (3 + 4));

a = 10;

print(2 * (a + 10));

while(a > 0) {
    print(a);
    if (a == 1) {
        a = a - 11; 
    } else {
        a = a - 1;
    }
}
print(a);

function factorial(n) {
    if(n == 0) {
        return 1;
    } else {
        return n * factorial(n - 1);
    }
}

print(factorial(5));

function sum(a, b) {
    print(666666);
    return a + b;
}

b = 10;
print(sum(10, b));
EOT;

try {
    $lexer = new Lexer($source);
    $parser = new Parser($lexer);
    $ast = $parser->parse();
    $codegen = new CodeGenerator();
    $instructions = $codegen->generate($ast);
    $functions = $codegen->getFunctions();

    echo json_encode($instructions), "\n\n";
    echo json_encode($functions), "\n\n";

    $vm = new VirtualMachine($instructions, $functions);
    $vm->run();
} catch (Exception $e) {
    echo "Error: " . $e->getMessage() . "\n";
}

虽然代码很多,但我们只需要将目光集中到// 主程序以下部分,这么多代码的目的就是运行$source中的代码。程序主要分成四个部分:LexerParserCodeGeneratorVirtualMachine。如果以Java为例,LexerParserCodeGenerator部分就是javac做的,VirtualMachine就是java对应的部分。Java程序可以预先将代码编译成字节码,再单独运行字节码。而PHP在明面上就没分得这么清晰,大部分PHPer可能只知道PHP的热更新很方便,不知道背后的解释器做了多少工作,可能以为解释器只是傻傻的逐行解释执行。其实PHP的编译过程跟Java大差不差,也可以生成字节码同一层次的opcodeopcode跟字节码当然是有点差异的,但为了易于理解,可以当成是同一类型的东西。既然如此,PHPopcode当然也可以像字节码一样单独执行,Opcache就因此而生。可惜,则于历史原因,PHPopcode并没有规范,所以官方实现无法单独生成opcode经由ZendVM直接执行,内部社区其实也有相关的讨论如 https://externals.io/message/111965,还可以在 https://externals.io 查阅更多相关信息。

经过上面的解释,如果我们要提升性能,可以像Java一样,将LexerParserCodeGenerator三个部分的生成物——$instructions$functions保存到文件,运行的时候也可以将这三个部分去除,只保留$vm = new VirtualMachine($instruction, $functions); $vm->run();

比较

以上面的$source为例简单测试一下运行时间,

try {
    $t1 = microtime(true);
    $lexer = new Lexer($source);
    $parser = new Parser($lexer);
    $ast = $parser->parse();
    $codegen = new CodeGenerator();
    $instructions = $codegen->generate($ast);
    $functions = $codegen->getFunctions();
    $vm = new VirtualMachine($instructions, $functions);
    $vm->run();
    $t2 = microtime(true);
    echo "time:", bcmul(bcsub((string)$t2, (string)$t1, 6), '1000000', 0), "\n";
} catch (Exception $e) {
    echo "Error: " . $e->getMessage() . "\n";
}
try {
    $instructions = json_decode('[{"op":"ICONST","operand":1},{"op":"ICONST","operand":2},{"op":"ICONST","operand":3},{"op":"MUL"},{"op":"ADD"},{"op":"PRINT"},{"op":"ICONST","operand":1},{"op":"ICONST","operand":2},{"op":"ICONST","operand":3},{"op":"ICONST","operand":4},{"op":"ADD"},{"op":"MUL"},{"op":"ADD"},{"op":"PRINT"},{"op":"ICONST","operand":10},{"op":"STORE","operand":"a"},{"op":"POP"},{"op":"ICONST","operand":2},{"op":"LOAD","operand":"a"},{"op":"ICONST","operand":10},{"op":"ADD"},{"op":"MUL"},{"op":"PRINT"},{"op":"LOAD","operand":"a"},{"op":"ICONST","operand":0},{"op":"GT"},{"op":"JMPF","operand":45},{"op":"LOAD","operand":"a"},{"op":"PRINT"},{"op":"LOAD","operand":"a"},{"op":"ICONST","operand":1},{"op":"EQ"},{"op":"JMPF","operand":39},{"op":"LOAD","operand":"a"},{"op":"ICONST","operand":11},{"op":"SUB"},{"op":"STORE","operand":"a"},{"op":"POP"},{"op":"JMP","operand":44},{"op":"LOAD","operand":"a"},{"op":"ICONST","operand":1},{"op":"SUB"},{"op":"STORE","operand":"a"},{"op":"POP"},{"op":"JMP","operand":23},{"op":"LOAD","operand":"a"},{"op":"PRINT"},{"op":"ICONST","operand":5},{"op":"CALL","operand":{"name":"factorial","argc":1}},{"op":"PRINT"},{"op":"ICONST","operand":10},{"op":"STORE","operand":"b"},{"op":"POP"},{"op":"ICONST","operand":10},{"op":"LOAD","operand":"b"},{"op":"CALL","operand":{"name":"sum","argc":2}},{"op":"PRINT"},{"op":"JMP","operand":80},{"op":"LOAD","operand":"n"},{"op":"ICONST","operand":0},{"op":"EQ"},{"op":"JMPF","operand":65},{"op":"ICONST","operand":1},{"op":"RET"},{"op":"JMP","operand":72},{"op":"LOAD","operand":"n"},{"op":"LOAD","operand":"n"},{"op":"ICONST","operand":1},{"op":"SUB"},{"op":"CALL","operand":{"name":"factorial","argc":1}},{"op":"MUL"},{"op":"RET"},{"op":"RET"},{"op":"ICONST","operand":666666},{"op":"PRINT"},{"op":"LOAD","operand":"a"},{"op":"LOAD","operand":"b"},{"op":"ADD"},{"op":"RET"},{"op":"RET"}]', true);
    $functions = json_decode('{"factorial":{"params":["n"],"entry":58},"sum":{"params":["a","b"],"entry":73}}', true);

    $t1 = microtime(true);
    $vm = new VirtualMachine($instructions, $functions);
    $vm->run();
    $t2 = microtime(true);
    echo "time:", bcmul(bcsub((string)$t2, (string)$t1, 6), '1000000', 0), "\n";
} catch (Exception $e) {
    echo "Error: " . $e->getMessage() . "\n";
}
解释执行(单位:微秒) 直接执行字节码(单位:微秒)
1100 100
1000 200
900 200
600 200
500 200
400 700
600 100
600 200
600 100
1100 100

可以看到,解释执行大体上是要比直接执行字节码慢上很多的,代码量越大量,差距越明显。

Search

    Table of Contents