像Java
这类语言,编译器会把代码预先编译为字节码,JVM
直接解释执行这些字节码,程序就可以运行起来了。PHP
使用者在性能优化时可能也会遇到OPcache
这个名词,所谓OPcache
其实就是opcode
的cache
,opcode
其实类似于字节码,虽然本质上有点差异,但可以类比为同一种事物。
PHP
在开启OPcache
扩展后,传统的PHP
程序运行速度会提升很多,究其原因,就是OPcache
扩展会将代码编译中间结果opcode
进行缓存,而php-fpm
的运行模式又是每次请求都会重新解释代码,也就是每次请求都会省略了前面的编译过程,所以单位时间内就能节省很多实际运行时间。与之相反,php-cli
运行模式下的程序,OPcache
对其起到的作用不大,因为php-cli
下,程序由于常驻内存而只需要解释一次执行多次。
代码
以下是一个用PHP
写的小型编译器。
<?php
class Token
{
public string $type;
public mixed $value;
public function __construct($type, $value = null)
{
$this->type = $type;
$this->value = $value;
}
}
class TokenType
{
const T_INT = 'INT';
const T_PRINT = 'PRINT';
const T_IF = 'IF';
const T_ELSE = 'ELSE';
const T_WHILE = 'WHILE';
const T_FUNCTION = 'FUNCTION';
const T_RETURN = 'RETURN';
const T_IDENTIFIER = 'IDENTIFIER';
const T_NUMBER = 'NUMBER';
const T_PLUS = 'PLUS';
const T_MINUS = 'MINUS';
const T_MULTIPLY = 'MULTIPLY';
const T_DIVIDE = 'DIVIDE';
const T_LPAREN = 'LPAREN';
const T_RPAREN = 'RPAREN';
const T_LBRACE = 'LBRACE';
const T_RBRACE = 'RBRACE';
const T_SEMICOLON = 'SEMICOLON';
const T_COMMA = 'COMMA';
const T_ASSIGN = 'ASSIGN';
const T_LT = 'LT';
const T_GT = 'GT';
const T_EQ = 'EQ';
const T_NEQ = 'NEQ';
const T_LE = 'LE';
const T_GE = 'GE';
const T_AND = 'AND';
const T_OR = 'OR';
const T_NOT = 'NOT';
const T_EOF = 'EOF';
}
class Lexer
{
private string $source;
private int $pos;
private ?string $currentChar;
public function __construct($source)
{
$this->source = $source;
$this->pos = 0;
$this->currentChar = $source[0] ?? null;
}
private function advance(): void
{
$this->pos++;
if ($this->pos < strlen($this->source)) {
$this->currentChar = $this->source[$this->pos];
} else {
$this->currentChar = null;
}
}
private function skipWhitespace(): void
{
while (ctype_space($this->currentChar)) {
$this->advance();
}
}
private function number(): Token
{
$result = '';
while (ctype_digit($this->currentChar)) {
$result .= $this->currentChar;
$this->advance();
}
return new Token(TokenType::T_NUMBER, intval($result));
}
private function identifier(): Token
{
$result = '';
while ((ctype_alpha($this->currentChar) || ctype_digit($this->currentChar) || $this->currentChar == '_')) {
$result .= $this->currentChar;
$this->advance();
}
$keywords = [
'print' => TokenType::T_PRINT,
'if' => TokenType::T_IF,
'else' => TokenType::T_ELSE,
'while' => TokenType::T_WHILE,
'function' => TokenType::T_FUNCTION,
'return' => TokenType::T_RETURN,
];
if (isset($keywords[$result])) {
return new Token($keywords[$result]);
}
return new Token(TokenType::T_IDENTIFIER, $result);
}
public function getNextToken(): Token
{
while ($this->currentChar !== null) {
if (ctype_space($this->currentChar)) {
$this->skipWhitespace();
continue;
}
if (ctype_alpha($this->currentChar) || $this->currentChar == '_') {
return $this->identifier();
}
if (ctype_digit($this->currentChar)) {
return $this->number();
}
if ($this->currentChar == '+') {
$this->advance();
return new Token(TokenType::T_PLUS);
}
if ($this->currentChar == '-') {
$this->advance();
return new Token(TokenType::T_MINUS);
}
if ($this->currentChar == '*') {
$this->advance();
return new Token(TokenType::T_MULTIPLY);
}
if ($this->currentChar == '/') {
$this->advance();
return new Token(TokenType::T_DIVIDE);
}
if ($this->currentChar == '(') {
$this->advance();
return new Token(TokenType::T_LPAREN);
}
if ($this->currentChar == ')') {
$this->advance();
return new Token(TokenType::T_RPAREN);
}
if ($this->currentChar == '{') {
$this->advance();
return new Token(TokenType::T_LBRACE);
}
if ($this->currentChar == '}') {
$this->advance();
return new Token(TokenType::T_RBRACE);
}
if ($this->currentChar == ';') {
$this->advance();
return new Token(TokenType::T_SEMICOLON);
}
if ($this->currentChar == ',') {
$this->advance();
return new Token(TokenType::T_COMMA);
}
if ($this->currentChar == '!') {
$this->advance();
if ($this->currentChar == '=') {
$this->advance();
return new Token(TokenType::T_NEQ);
}
return new Token(TokenType::T_NOT);
}
if ($this->currentChar == '=') {
$this->advance();
if ($this->currentChar == '=') {
$this->advance();
return new Token(TokenType::T_EQ);
}
return new Token(TokenType::T_ASSIGN);
}
if ($this->currentChar == '<') {
$this->advance();
if ($this->currentChar == '=') {
$this->advance();
return new Token(TokenType::T_LE);
}
return new Token(TokenType::T_LT);
}
if ($this->currentChar == '>') {
$this->advance();
if ($this->currentChar == '=') {
$this->advance();
return new Token(TokenType::T_GE);
}
return new Token(TokenType::T_GT);
}
if ($this->currentChar == '&') {
$this->advance();
if ($this->currentChar == '&') {
$this->advance();
return new Token(TokenType::T_AND);
}
}
if ($this->currentChar == '|') {
$this->advance();
if ($this->currentChar == '|') {
$this->advance();
return new Token(TokenType::T_OR);
}
}
throw new Exception("无法识别的字符: " . $this->currentChar);
}
return new Token(TokenType::T_EOF);
}
}
abstract class ASTNode
{
}
class Program extends ASTNode
{
public array $statements;
public function __construct($statements)
{
$this->statements = $statements;
}
}
class Block extends ASTNode implements Statement
{
public array $statements;
public function __construct($statements)
{
$this->statements = $statements;
}
}
interface Statement
{
}
class PrintStatement extends ASTNode implements Statement
{
public Expression $expression;
public function __construct($expression)
{
$this->expression = $expression;
}
}
class IfStatement extends ASTNode implements Statement
{
public Expression $condition;
public Statement $thenBranch;
public Statement $elseBranch;
public function __construct($condition, $thenBranch, $elseBranch = null)
{
$this->condition = $condition;
$this->thenBranch = $thenBranch;
$this->elseBranch = $elseBranch;
}
}
class WhileStatement extends ASTNode implements Statement
{
public Expression $condition;
public Statement $body;
public function __construct($condition, $body)
{
$this->condition = $condition;
$this->body = $body;
}
}
class FunctionDeclaration extends ASTNode implements Statement
{
public string $name;
public array $params;
public Statement $body;
public function __construct($name, $params, $body)
{
$this->name = $name;
$this->params = $params;
$this->body = $body;
}
}
class ReturnStatement extends ASTNode implements Statement
{
public Expression $expression;
public function __construct($expression)
{
$this->expression = $expression;
}
}
class ExpressionStatement extends ASTNode implements Statement
{
public Expression $expression;
public function __construct($expression)
{
$this->expression = $expression;
}
}
interface Expression
{
}
class Assignment extends ASTNode implements Expression
{
public Variable $variable;
public Expression $expression;
public function __construct($variable, $expression)
{
$this->variable = $variable;
$this->expression = $expression;
}
}
class BinaryExpression extends ASTNode implements Expression
{
public Expression $left;
public string $operator;
public Expression $right;
public function __construct($left, $operator, $right)
{
$this->left = $left;
$this->operator = $operator;
$this->right = $right;
}
}
class UnaryExpression extends ASTNode implements Expression
{
public string $operator;
public Expression $expression;
public function __construct($operator, $expression)
{
$this->operator = $operator;
$this->expression = $expression;
}
}
class Literal extends ASTNode implements Expression
{
public mixed $value;
public function __construct($value)
{
$this->value = $value;
}
}
class Variable extends ASTNode implements Expression
{
public string $name;
public function __construct($name)
{
$this->name = $name;
}
}
class FunctionCall extends ASTNode implements Expression
{
public string $name;
public array $arguments;
public function __construct($name, $arguments)
{
$this->name = $name;
$this->arguments = $arguments;
}
}
class Parser
{
private Lexer $lexer;
private Token $currentToken;
public function __construct($lexer)
{
$this->lexer = $lexer;
$this->currentToken = $this->lexer->getNextToken();
}
private function eat(string $tokenType): void
{
if ($this->currentToken->type == $tokenType) {
$this->currentToken = $this->lexer->getNextToken();
} else {
throw new Exception("期望 " . $tokenType . ",但获得 " . $this->currentToken->type);
}
}
public function parse(): Program
{
$statements = [];
while ($this->currentToken->type != TokenType::T_EOF) {
$statements[] = $this->statement();
}
return new Program($statements);
}
private function statement(): Statement
{
return match ($this->currentToken->type) {
TokenType::T_PRINT => $this->printStatement(),
TokenType::T_IF => $this->ifStatement(),
TokenType::T_WHILE => $this->whileStatement(),
TokenType::T_FUNCTION => $this->functionDeclaration(),
TokenType::T_RETURN => $this->returnStatement(),
TokenType::T_LBRACE => $this->block(),
default => $this->expressionStatement(),
};
}
private function block(): Block
{
$this->eat(TokenType::T_LBRACE);
$statements = [];
while ($this->currentToken->type != TokenType::T_RBRACE) {
$statements[] = $this->statement();
}
$this->eat(TokenType::T_RBRACE);
return new Block($statements);
}
private function printStatement(): PrintStatement
{
$this->eat(TokenType::T_PRINT);
$this->eat(TokenType::T_LPAREN);
$expr = $this->expression();
$this->eat(TokenType::T_RPAREN);
$this->eat(TokenType::T_SEMICOLON);
return new PrintStatement($expr);
}
private function ifStatement(): IfStatement
{
$this->eat(TokenType::T_IF);
$this->eat(TokenType::T_LPAREN);
$condition = $this->expression();
$this->eat(TokenType::T_RPAREN);
$thenBranch = $this->statement();
$elseBranch = null;
if ($this->currentToken->type == TokenType::T_ELSE) {
$this->eat(TokenType::T_ELSE);
$elseBranch = $this->statement();
}
return new IfStatement($condition, $thenBranch, $elseBranch);
}
private function whileStatement(): WhileStatement
{
$this->eat(TokenType::T_WHILE);
$this->eat(TokenType::T_LPAREN);
$condition = $this->expression();
$this->eat(TokenType::T_RPAREN);
$body = $this->statement();
return new WhileStatement($condition, $body);
}
private function functionDeclaration(): FunctionDeclaration
{
$this->eat(TokenType::T_FUNCTION);
$name = $this->currentToken->value;
$this->eat(TokenType::T_IDENTIFIER);
$this->eat(TokenType::T_LPAREN);
$params = [];
if ($this->currentToken->type != TokenType::T_RPAREN) {
$params[] = $this->currentToken->value;
$this->eat(TokenType::T_IDENTIFIER);
while ($this->currentToken->type == TokenType::T_COMMA) {
$this->eat(TokenType::T_COMMA);
$params[] = $this->currentToken->value;
$this->eat(TokenType::T_IDENTIFIER);
}
}
$this->eat(TokenType::T_RPAREN);
$body = $this->block();
return new FunctionDeclaration($name, $params, $body);
}
private function returnStatement(): ReturnStatement
{
$this->eat(TokenType::T_RETURN);
$expr = null;
if ($this->currentToken->type != TokenType::T_SEMICOLON) {
$expr = $this->expression();
}
$this->eat(TokenType::T_SEMICOLON);
return new ReturnStatement($expr);
}
private function expressionStatement(): ExpressionStatement
{
$expr = $this->expression();
$this->eat(TokenType::T_SEMICOLON);
return new ExpressionStatement($expr);
}
private function expression(): Expression
{
return $this->assignment();
}
private function assignment(): Expression
{
$expr = $this->logicalOr();
if ($this->currentToken->type == TokenType::T_ASSIGN) {
if (!($expr instanceof Variable)) {
throw new Exception("赋值目标无效");
}
$this->eat(TokenType::T_ASSIGN);
$value = $this->assignment();
return new Assignment($expr, $value);
}
return $expr;
}
private function logicalOr(): Expression
{
$expr = $this->logicalAnd();
while ($this->currentToken->type == TokenType::T_OR) {
$operator = $this->currentToken->type;
$this->eat(TokenType::T_OR);
$right = $this->logicalAnd();
$expr = new BinaryExpression($expr, $operator, $right);
}
return $expr;
}
private function logicalAnd(): Expression
{
$expr = $this->equality();
while ($this->currentToken->type == TokenType::T_AND) {
$operator = $this->currentToken->type;
$this->eat(TokenType::T_AND);
$right = $this->equality();
$expr = new BinaryExpression($expr, $operator, $right);
}
return $expr;
}
private function equality(): Expression
{
$expr = $this->comparison();
while ($this->currentToken->type == TokenType::T_EQ || $this->currentToken->type == TokenType::T_NEQ) {
$operator = $this->currentToken->type;
if ($operator == TokenType::T_EQ) {
$this->eat(TokenType::T_EQ);
} else {
$this->eat(TokenType::T_NEQ);
}
$right = $this->comparison();
$expr = new BinaryExpression($expr, $operator, $right);
}
return $expr;
}
private function comparison(): Expression
{
$expr = $this->addition();
while (in_array($this->currentToken->type, [TokenType::T_LT, TokenType::T_GT, TokenType::T_LE, TokenType::T_GE])) {
$operator = $this->currentToken->type;
$this->eat($operator);
$right = $this->addition();
$expr = new BinaryExpression($expr, $operator, $right);
}
return $expr;
}
private function addition(): Expression
{
$expr = $this->multiplication();
while ($this->currentToken->type == TokenType::T_PLUS || $this->currentToken->type == TokenType::T_MINUS) {
$operator = $this->currentToken->type;
if ($operator == TokenType::T_PLUS) {
$this->eat(TokenType::T_PLUS);
} else {
$this->eat(TokenType::T_MINUS);
}
$right = $this->multiplication();
$expr = new BinaryExpression($expr, $operator, $right);
}
return $expr;
}
private function multiplication(): Expression
{
$expr = $this->unary();
while ($this->currentToken->type == TokenType::T_MULTIPLY || $this->currentToken->type == TokenType::T_DIVIDE) {
$operator = $this->currentToken->type;
if ($operator == TokenType::T_MULTIPLY) {
$this->eat(TokenType::T_MULTIPLY);
} else {
$this->eat(TokenType::T_DIVIDE);
}
$right = $this->unary();
$expr = new BinaryExpression($expr, $operator, $right);
}
return $expr;
}
private function unary(): Expression
{
if ($this->currentToken->type == TokenType::T_NOT || $this->currentToken->type == TokenType::T_MINUS) {
$operator = $this->currentToken->type;
$this->eat($operator);
$expr = $this->unary();
return new UnaryExpression($operator, $expr);
}
return $this->primary();
}
private function primary(): Expression
{
$token = $this->currentToken;
if ($token->type == TokenType::T_NUMBER) {
$this->eat(TokenType::T_NUMBER);
return new Literal($token->value);
}
if ($token->type == TokenType::T_IDENTIFIER) {
$this->eat(TokenType::T_IDENTIFIER);
if ($this->currentToken->type == TokenType::T_LPAREN) {
$this->eat(TokenType::T_LPAREN);
$args = [];
if ($this->currentToken->type != TokenType::T_RPAREN) {
$args[] = $this->expression();
while ($this->currentToken->type == TokenType::T_COMMA) {
$this->eat(TokenType::T_COMMA);
$args[] = $this->expression();
}
}
$this->eat(TokenType::T_RPAREN);
return new FunctionCall($token->value, $args);
}
return new Variable($token->value);
}
if ($token->type == TokenType::T_LPAREN) {
$this->eat(TokenType::T_LPAREN);
$expr = $this->expression();
$this->eat(TokenType::T_RPAREN);
return $expr;
}
throw new Exception("无法识别的 token: " . $token->type);
}
}
class OpCode
{
const ICONST = 'ICONST';
const LOAD = 'LOAD';
const STORE = 'STORE';
const PRINT = 'PRINT';
const ADD = 'ADD';
const SUB = 'SUB';
const MUL = 'MUL';
const DIV = 'DIV';
const LT = 'LT';
const GT = 'GT';
const EQ = 'EQ';
const NE = 'NE';
const LE = 'LE';
const GE = 'GE';
const AND = 'AND';
const OR = 'OR';
const NOT = 'NOT';
const JMP = 'JMP';
const JMPF = 'JMPF';
const CALL = 'CALL';
const RET = 'RET';
const POP = 'POP';
}
class CodeGenerator
{
private array $instructions = [];
private array $functions = [];
public function generate($node): array
{
if ($node instanceof Program) {
$funcDecls = [];
$mainStmts = [];
foreach ($node->statements as $stmt) {
if ($stmt instanceof FunctionDeclaration) {
$funcDecls[] = $stmt;
} else {
$mainStmts[] = $stmt;
}
}
foreach ($mainStmts as $stmt) {
$this->genStatement($stmt);
}
$jumpPos = $this->emit(OpCode::JMP, 0);
$funcStart = count($this->instructions);
foreach ($funcDecls as $func) {
$funcName = $func->name;
$entryPoint = count($this->instructions);
$this->functions[$funcName] = ['params' => $func->params, 'entry' => $entryPoint];
$this->genStatement($func->body);
$this->emit(OpCode::RET);
}
$this->patch($jumpPos, count($this->instructions));
return $this->instructions;
} else {
throw new Exception("无效的 AST 节点");
}
}
private function genStatement($node): void
{
if ($node instanceof PrintStatement) {
$this->genExpression($node->expression);
$this->emit(OpCode::PRINT);
} elseif ($node instanceof ExpressionStatement) {
$this->genExpression($node->expression);
$this->emit(OpCode::POP);
} elseif ($node instanceof Assignment) {
$this->genExpression($node->expression);
$this->emit(OpCode::STORE, $node->variable->name);
} elseif ($node instanceof IfStatement) {
$this->genExpression($node->condition);
$jmpFalsePos = $this->emit(OpCode::JMPF, 0);
$this->genStatement($node->thenBranch);
if ($node->elseBranch !== null) {
$jmpEndPos = $this->emit(OpCode::JMP, 0);
$this->patch($jmpFalsePos, count($this->instructions));
$this->genStatement($node->elseBranch);
$this->patch($jmpEndPos, count($this->instructions));
} else {
$this->patch($jmpFalsePos, count($this->instructions));
}
} elseif ($node instanceof WhileStatement) {
$startPos = count($this->instructions);
$this->genExpression($node->condition);
$jmpFalsePos = $this->emit(OpCode::JMPF, 0);
$this->genStatement($node->body);
$this->emit(OpCode::JMP, $startPos);
$this->patch($jmpFalsePos, count($this->instructions));
} elseif ($node instanceof Block) {
foreach ($node->statements as $stmt) {
$this->genStatement($stmt);
}
} elseif ($node instanceof ReturnStatement) {
if ($node->expression !== null) {
$this->genExpression($node->expression);
} else {
$this->emit(OpCode::ICONST, 0);
}
$this->emit(OpCode::RET);
} else {
throw new Exception("未知的语句类型");
}
}
private function genExpression($node): void
{
if ($node instanceof Literal) {
$this->emit(OpCode::ICONST, $node->value);
} elseif ($node instanceof Variable) {
$this->emit(OpCode::LOAD, $node->name);
} elseif ($node instanceof Assignment) {
$this->genExpression($node->expression);
$this->emit(OpCode::STORE, $node->variable->name);
} elseif ($node instanceof BinaryExpression) {
$this->genExpression($node->left);
$this->genExpression($node->right);
switch ($node->operator) {
case TokenType::T_PLUS:
$this->emit(OpCode::ADD);
break;
case TokenType::T_MINUS:
$this->emit(OpCode::SUB);
break;
case TokenType::T_MULTIPLY:
$this->emit(OpCode::MUL);
break;
case TokenType::T_DIVIDE:
$this->emit(OpCode::DIV);
break;
case TokenType::T_LT:
$this->emit(OpCode::LT);
break;
case TokenType::T_GT:
$this->emit(OpCode::GT);
break;
case TokenType::T_EQ:
$this->emit(OpCode::EQ);
break;
case TokenType::T_NEQ:
$this->emit(OpCode::NE);
break;
case TokenType::T_LE:
$this->emit(OpCode::LE);
break;
case TokenType::T_GE:
$this->emit(OpCode::GE);
break;
case TokenType::T_AND:
$this->emit(OpCode::AND);
break;
case TokenType::T_OR:
$this->emit(OpCode::OR);
break;
default:
throw new Exception("未知的二元运算符: " . $node->operator);
}
} elseif ($node instanceof UnaryExpression) {
if ($node->operator == TokenType::T_MINUS) {
$this->emit(OpCode::ICONST, 0);
$this->genExpression($node->expression);
$this->emit(OpCode::SUB);
} elseif ($node->operator == TokenType::T_NOT) {
$this->genExpression($node->expression);
$this->emit(OpCode::NOT);
} else {
throw new Exception("未知的一元运算符: " . $node->operator);
}
} elseif ($node instanceof FunctionCall) {
foreach ($node->arguments as $arg) {
$this->genExpression($arg);
}
$this->emit(OpCode::CALL, ['name' => $node->name, 'argc' => count($node->arguments)]);
} else {
throw new Exception("未知的表达式类型");
}
}
private function emit($opcode, $operand = null): int
{
$instr = ['op' => $opcode];
if ($operand !== null) {
$instr['operand'] = $operand;
}
$this->instructions[] = $instr;
return count($this->instructions) - 1;
}
private function patch($pos, $target): void
{
$this->instructions[$pos]['operand'] = $target;
}
public function getFunctions(): array
{
return $this->functions;
}
}
class VirtualMachine
{
private array $instructions;
private array $functions;
private int $ip;
private array $stack;
private array $globals;
private array $callStack;
public function __construct($instructions, $functions)
{
$this->instructions = $instructions;
$this->functions = $functions;
$this->ip = 0;
$this->stack = [];
$this->globals = [];
$this->callStack = [];
}
public function run()
{
while ($this->ip < count($this->instructions)) {
$instr = $this->instructions[$this->ip];
switch ($instr['op']) {
case OpCode::ICONST:
$this->stack[] = $instr['operand'];
$this->ip++;
break;
case OpCode::LOAD:
$var = $instr['operand'];
$this->stack[] = $this->globals[$var] ?? 0;
$this->ip++;
break;
case OpCode::STORE:
$var = $instr['operand'];
$value = array_pop($this->stack);
$this->globals[$var] = $value;
$this->ip++;
break;
case OpCode::PRINT:
$value = array_pop($this->stack);
echo $value . "\n";
$this->ip++;
break;
case OpCode::ADD:
$b = array_pop($this->stack);
$a = array_pop($this->stack);
$this->stack[] = $a + $b;
$this->ip++;
break;
case OpCode::SUB:
$b = array_pop($this->stack);
$a = array_pop($this->stack);
$this->stack[] = $a - $b;
$this->ip++;
break;
case OpCode::MUL:
$b = array_pop($this->stack);
$a = array_pop($this->stack);
$this->stack[] = $a * $b;
$this->ip++;
break;
case OpCode::DIV:
$b = array_pop($this->stack);
$a = array_pop($this->stack);
$this->stack[] = intval($a / $b);
$this->ip++;
break;
case OpCode::LT:
$b = array_pop($this->stack);
$a = array_pop($this->stack);
$this->stack[] = ($a < $b) ? 1 : 0;
$this->ip++;
break;
case OpCode::GT:
$b = array_pop($this->stack);
$a = array_pop($this->stack);
$this->stack[] = ($a > $b) ? 1 : 0;
$this->ip++;
break;
case OpCode::EQ:
$b = array_pop($this->stack);
$a = array_pop($this->stack);
$this->stack[] = ($a == $b) ? 1 : 0;
$this->ip++;
break;
case OpCode::NE:
$b = array_pop($this->stack);
$a = array_pop($this->stack);
$this->stack[] = ($a != $b) ? 1 : 0;
$this->ip++;
break;
case OpCode::LE:
$b = array_pop($this->stack);
$a = array_pop($this->stack);
$this->stack[] = ($a <= $b) ? 1 : 0;
$this->ip++;
break;
case OpCode::GE:
$b = array_pop($this->stack);
$a = array_pop($this->stack);
$this->stack[] = ($a >= $b) ? 1 : 0;
$this->ip++;
break;
case OpCode::AND:
$b = array_pop($this->stack);
$a = array_pop($this->stack);
$this->stack[] = ($a && $b) ? 1 : 0;
$this->ip++;
break;
case OpCode::OR:
$b = array_pop($this->stack);
$a = array_pop($this->stack);
$this->stack[] = ($a || $b) ? 1 : 0;
$this->ip++;
break;
case OpCode::NOT:
$a = array_pop($this->stack);
$this->stack[] = (!$a) ? 1 : 0;
$this->ip++;
break;
case OpCode::JMP:
$this->ip = $instr['operand'];
break;
case OpCode::JMPF:
$cond = array_pop($this->stack);
if (!$cond) {
$this->ip = $instr['operand'];
} else {
$this->ip++;
}
break;
case OpCode::CALL:
$funcInfo = $instr['operand'];
$funcName = $funcInfo['name'];
$argc = $funcInfo['argc'];
if (!isset($this->functions[$funcName])) {
throw new Exception("未定义函数: " . $funcName);
}
$fn = $this->functions[$funcName];
$this->callStack[] = [$this->ip + 1, $this->globals];
$args = [];
for ($i = 0; $i < $argc; $i++) {
array_unshift($args, array_pop($this->stack));
}
$this->globals = [];
$params = $fn['params'];
for ($i = 0; $i < count($params); $i++) {
$this->globals[$params[$i]] = $args[$i] ?? 0;
}
$this->ip = $fn['entry'];
break;
case OpCode::RET:
$retValue = array_pop($this->stack);
if (empty($this->callStack)) {
return $retValue;
}
list($this->ip, $prevGlobals) = array_pop($this->callStack);
$this->globals = $prevGlobals;
$this->stack[] = $retValue;
break;
case OpCode::POP:
array_pop($this->stack);
$this->ip++;
break;
default:
throw new Exception("未知的操作码: " . $instr['op']);
}
}
}
}
// 主程序部分
$source = <<<'EOT'
print(1 + 2 * 3);
print(1 + 2 * (3 + 4));
a = 10;
print(2 * (a + 10));
while(a > 0) {
print(a);
if (a == 1) {
a = a - 11;
} else {
a = a - 1;
}
}
print(a);
function factorial(n) {
if(n == 0) {
return 1;
} else {
return n * factorial(n - 1);
}
}
print(factorial(5));
function sum(a, b) {
print(666666);
return a + b;
}
b = 10;
print(sum(10, b));
EOT;
try {
$lexer = new Lexer($source);
$parser = new Parser($lexer);
$ast = $parser->parse();
$codegen = new CodeGenerator();
$instructions = $codegen->generate($ast);
$functions = $codegen->getFunctions();
echo json_encode($instructions), "\n\n";
echo json_encode($functions), "\n\n";
$vm = new VirtualMachine($instructions, $functions);
$vm->run();
} catch (Exception $e) {
echo "Error: " . $e->getMessage() . "\n";
}
虽然代码很多,但我们只需要将目光集中到// 主程序
以下部分,这么多代码的目的就是运行$source
中的代码。程序主要分成四个部分:Lexer
、Parser
、CodeGenerator
和VirtualMachine
。如果以Java
为例,Lexer
、Parser
、CodeGenerator
部分就是javac
做的,VirtualMachine
就是java
对应的部分。Java
程序可以预先将代码编译成字节码,再单独运行字节码。而PHP
在明面上就没分得这么清晰,大部分PHPer
可能只知道PHP
的热更新很方便,不知道背后的解释器做了多少工作,可能以为解释器只是傻傻的逐行解释执行。其实PHP
的编译过程跟Java
大差不差,也可以生成字节码同一层次的opcode
,opcode
跟字节码当然是有点差异的,但为了易于理解,可以当成是同一类型的东西。既然如此,PHP
的opcode
当然也可以像字节码一样单独执行,Opcache
就因此而生。可惜,则于历史原因,PHP
的opcode
并没有规范,所以官方实现无法单独生成opcode
经由ZendVM
直接执行,内部社区其实也有相关的讨论如 https://externals.io/message/111965,还可以在 https://externals.io 查阅更多相关信息。
经过上面的解释,如果我们要提升性能,可以像Java
一样,将Lexer
、Parser
和CodeGenerator
三个部分的生成物——$instructions
及$functions
保存到文件,运行的时候也可以将这三个部分去除,只保留$vm = new VirtualMachine($instruction, $functions); $vm->run();
。
比较
以上面的$source
为例简单测试一下运行时间,
try {
$t1 = microtime(true);
$lexer = new Lexer($source);
$parser = new Parser($lexer);
$ast = $parser->parse();
$codegen = new CodeGenerator();
$instructions = $codegen->generate($ast);
$functions = $codegen->getFunctions();
$vm = new VirtualMachine($instructions, $functions);
$vm->run();
$t2 = microtime(true);
echo "time:", bcmul(bcsub((string)$t2, (string)$t1, 6), '1000000', 0), "\n";
} catch (Exception $e) {
echo "Error: " . $e->getMessage() . "\n";
}
try {
$instructions = json_decode('[{"op":"ICONST","operand":1},{"op":"ICONST","operand":2},{"op":"ICONST","operand":3},{"op":"MUL"},{"op":"ADD"},{"op":"PRINT"},{"op":"ICONST","operand":1},{"op":"ICONST","operand":2},{"op":"ICONST","operand":3},{"op":"ICONST","operand":4},{"op":"ADD"},{"op":"MUL"},{"op":"ADD"},{"op":"PRINT"},{"op":"ICONST","operand":10},{"op":"STORE","operand":"a"},{"op":"POP"},{"op":"ICONST","operand":2},{"op":"LOAD","operand":"a"},{"op":"ICONST","operand":10},{"op":"ADD"},{"op":"MUL"},{"op":"PRINT"},{"op":"LOAD","operand":"a"},{"op":"ICONST","operand":0},{"op":"GT"},{"op":"JMPF","operand":45},{"op":"LOAD","operand":"a"},{"op":"PRINT"},{"op":"LOAD","operand":"a"},{"op":"ICONST","operand":1},{"op":"EQ"},{"op":"JMPF","operand":39},{"op":"LOAD","operand":"a"},{"op":"ICONST","operand":11},{"op":"SUB"},{"op":"STORE","operand":"a"},{"op":"POP"},{"op":"JMP","operand":44},{"op":"LOAD","operand":"a"},{"op":"ICONST","operand":1},{"op":"SUB"},{"op":"STORE","operand":"a"},{"op":"POP"},{"op":"JMP","operand":23},{"op":"LOAD","operand":"a"},{"op":"PRINT"},{"op":"ICONST","operand":5},{"op":"CALL","operand":{"name":"factorial","argc":1}},{"op":"PRINT"},{"op":"ICONST","operand":10},{"op":"STORE","operand":"b"},{"op":"POP"},{"op":"ICONST","operand":10},{"op":"LOAD","operand":"b"},{"op":"CALL","operand":{"name":"sum","argc":2}},{"op":"PRINT"},{"op":"JMP","operand":80},{"op":"LOAD","operand":"n"},{"op":"ICONST","operand":0},{"op":"EQ"},{"op":"JMPF","operand":65},{"op":"ICONST","operand":1},{"op":"RET"},{"op":"JMP","operand":72},{"op":"LOAD","operand":"n"},{"op":"LOAD","operand":"n"},{"op":"ICONST","operand":1},{"op":"SUB"},{"op":"CALL","operand":{"name":"factorial","argc":1}},{"op":"MUL"},{"op":"RET"},{"op":"RET"},{"op":"ICONST","operand":666666},{"op":"PRINT"},{"op":"LOAD","operand":"a"},{"op":"LOAD","operand":"b"},{"op":"ADD"},{"op":"RET"},{"op":"RET"}]', true);
$functions = json_decode('{"factorial":{"params":["n"],"entry":58},"sum":{"params":["a","b"],"entry":73}}', true);
$t1 = microtime(true);
$vm = new VirtualMachine($instructions, $functions);
$vm->run();
$t2 = microtime(true);
echo "time:", bcmul(bcsub((string)$t2, (string)$t1, 6), '1000000', 0), "\n";
} catch (Exception $e) {
echo "Error: " . $e->getMessage() . "\n";
}
解释执行(单位:微秒) | 直接执行字节码(单位:微秒) |
---|---|
1100 | 100 |
1000 | 200 |
900 | 200 |
600 | 200 |
500 | 200 |
400 | 700 |
600 | 100 |
600 | 200 |
600 | 100 |
1100 | 100 |
可以看到,解释执行大体上是要比直接执行字节码慢上很多的,代码量越大量,差距越明显。