Interpret this... (PHPem 2016)

Post on 26-Jan-2017

34 views 1 download

Transcript of Interpret this... (PHPem 2016)

@asgrim

Interpret this…James Titcumb

PHPem Unconference 2016

Who is this guy?James Titcumb

www.jamestitcumb.com

www.roave.com

www.phphants.co.uk

www.phpsouthcoast.co.uk

@asgrim

@asgrim

How PHP works

PHP code

OpCacheExecute (VM)

Lexer + Parser

Compiler

@asgrim

The PHP Lexer

zend_language_scanner.l

@asgrim

zend_language_scanner.l<ST_IN_SCRIPTING>"exit" {

RETURN_TOKEN(T_EXIT);

}

<ST_IN_SCRIPTING>"die" {

RETURN_TOKEN(T_EXIT);

}

<ST_IN_SCRIPTING>"function" {

RETURN_TOKEN(T_FUNCTION);

}

@asgrim

zend_language_scanner.l<ST_IN_SCRIPTING>"exit" {

RETURN_TOKEN(T_EXIT);

}

<ST_IN_SCRIPTING>"die" {

RETURN_TOKEN(T_EXIT);

}

<ST_IN_SCRIPTING>"function" {

RETURN_TOKEN(T_FUNCTION);

}

@asgrim

zend_language_scanner.l<ST_IN_SCRIPTING>"exit" {

RETURN_TOKEN(T_EXIT);

}

<ST_IN_SCRIPTING>"die" {

RETURN_TOKEN(T_EXIT);

}

<ST_IN_SCRIPTING>"function" {

RETURN_TOKEN(T_FUNCTION);

}

@asgrim

zend_language_scanner.l<ST_IN_SCRIPTING>"exit" {

RETURN_TOKEN(T_EXIT);

}

<ST_IN_SCRIPTING>"die" {

RETURN_TOKEN(T_EXIT);

}

<ST_IN_SCRIPTING>"function" {

RETURN_TOKEN(T_FUNCTION);

}

@asgrim

zend_language_scanner.l<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {

yy_push_state(ST_LOOKING_FOR_VARNAME);

RETURN_TOKEN(T_DOLLAR_OPEN_CURLY_BRACES);

}

<ST_LOOKING_FOR_VARNAME>{LABEL}[[}] {

yyless(yyleng - 1);

zend_copy_value(zendlval, yytext, yyleng);

yy_pop_state();

yy_push_state(ST_IN_SCRIPTING);

RETURN_TOKEN(T_STRING_VARNAME);

}

@asgrim

zend_language_scanner.l<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {

yy_push_state(ST_LOOKING_FOR_VARNAME);

RETURN_TOKEN(T_DOLLAR_OPEN_CURLY_BRACES);

}

<ST_LOOKING_FOR_VARNAME>{LABEL}[[}] {

yyless(yyleng - 1);

zend_copy_value(zendlval, yytext, yyleng);

yy_pop_state();

yy_push_state(ST_IN_SCRIPTING);

RETURN_TOKEN(T_STRING_VARNAME);

}

@asgrim

zend_language_scanner.l<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {

yy_push_state(ST_LOOKING_FOR_VARNAME);

RETURN_TOKEN(T_DOLLAR_OPEN_CURLY_BRACES);

}

<ST_LOOKING_FOR_VARNAME>{LABEL}[[}] {

yyless(yyleng - 1);

zend_copy_value(zendlval, yytext, yyleng);

yy_pop_state();

yy_push_state(ST_IN_SCRIPTING);

RETURN_TOKEN(T_STRING_VARNAME);

}

@asgrim

zend_language_scanner.l<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {

yy_push_state(ST_LOOKING_FOR_VARNAME);

RETURN_TOKEN(T_DOLLAR_OPEN_CURLY_BRACES);

}

<ST_LOOKING_FOR_VARNAME>{LABEL}[[}] {

yyless(yyleng - 1);

zend_copy_value(zendlval, yytext, yyleng);

yy_pop_state();

yy_push_state(ST_IN_SCRIPTING);

RETURN_TOKEN(T_STRING_VARNAME);

}

@asgrim

zend_language_scanner.l<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {

yy_push_state(ST_LOOKING_FOR_VARNAME);

RETURN_TOKEN(T_DOLLAR_OPEN_CURLY_BRACES);

}

<ST_LOOKING_FOR_VARNAME>{LABEL}[[}] {

yyless(yyleng - 1);

zend_copy_value(zendlval, yytext, yyleng);

yy_pop_state();

yy_push_state(ST_IN_SCRIPTING);

RETURN_TOKEN(T_STRING_VARNAME);

}

@asgrim

zend_language_scanner.l<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {

yy_push_state(ST_LOOKING_FOR_VARNAME);

RETURN_TOKEN(T_DOLLAR_OPEN_CURLY_BRACES);

}

<ST_LOOKING_FOR_VARNAME>{LABEL}[[}] {

yyless(yyleng - 1);

zend_copy_value(zendlval, yytext, yyleng);

yy_pop_state();

yy_push_state(ST_IN_SCRIPTING);

RETURN_TOKEN(T_STRING_VARNAME);

}

@asgrim

zend_language_scanner.l<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {

yy_push_state(ST_LOOKING_FOR_VARNAME);

RETURN_TOKEN(T_DOLLAR_OPEN_CURLY_BRACES);

}

<ST_LOOKING_FOR_VARNAME>{LABEL}[[}] {

yyless(yyleng - 1);

zend_copy_value(zendlval, yytext, yyleng);

yy_pop_state();

yy_push_state(ST_IN_SCRIPTING);

RETURN_TOKEN(T_STRING_VARNAME);

}

@asgrim

zend_language_scanner.l<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {

yy_push_state(ST_LOOKING_FOR_VARNAME);

RETURN_TOKEN(T_DOLLAR_OPEN_CURLY_BRACES);

}

<ST_LOOKING_FOR_VARNAME>{LABEL}[[}] {

yyless(yyleng - 1);

zend_copy_value(zendlval, yytext, yyleng);

yy_pop_state();

yy_push_state(ST_IN_SCRIPTING);

RETURN_TOKEN(T_STRING_VARNAME);

}

@asgrim

zend_language_scanner.l<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {

yy_push_state(ST_LOOKING_FOR_VARNAME);

RETURN_TOKEN(T_DOLLAR_OPEN_CURLY_BRACES);

}

<ST_LOOKING_FOR_VARNAME>{LABEL}[[}] {

yyless(yyleng - 1);

zend_copy_value(zendlval, yytext, yyleng);

yy_pop_state();

yy_push_state(ST_IN_SCRIPTING);

RETURN_TOKEN(T_STRING_VARNAME);

}

@asgrim

The PHP Lexer

zend_language_scanner.l

@asgrim

The PHP Lexer

zend_language_scanner.l

re2c

@asgrim

The PHP Lexer

zend_language_scanner.l

re2c

zend_language_scanner.c

@asgrim

The PHP Parser

zend_language_parser.y

@asgrim

zend_language_parser.yif_stmt:

if_stmt_without_else %prec T_NOELSE { $$ = $1; }

| if_stmt_without_else T_ELSE statement

{ $$ = zend_ast_list_add($1, zend_ast_create(ZEND_AST_IF_ELEM, NULL, $3)); }

;

if_stmt_without_else:

T_IF '(' expr ')' statement

{ $$ = zend_ast_create_list(1, ZEND_AST_IF,

zend_ast_create(ZEND_AST_IF_ELEM, $3, $5)); }

| if_stmt_without_else T_ELSEIF '(' expr ')' statement

{ $$ = zend_ast_list_add($1,

zend_ast_create(ZEND_AST_IF_ELEM, $4, $6)); }

;

@asgrim

if_stmt:

if_stmt_without_else %prec T_NOELSE { $$ = $1; }

| if_stmt_without_else T_ELSE statement

{ $$ = zend_ast_list_add($1, zend_ast_create(ZEND_AST_IF_ELEM, NULL, $3)); }

;

if_stmt_without_else:

T_IF '(' expr ')' statement

{ $$ = zend_ast_create_list(1, ZEND_AST_IF,

zend_ast_create(ZEND_AST_IF_ELEM, $3, $5)); }

| if_stmt_without_else T_ELSEIF '(' expr ')' statement

{ $$ = zend_ast_list_add($1,

zend_ast_create(ZEND_AST_IF_ELEM, $4, $6)); }

;

zend_language_parser.y

@asgrim

if_stmt:

if_stmt_without_else %prec T_NOELSE { $$ = $1; }

| if_stmt_without_else T_ELSE statement

{ $$ = zend_ast_list_add($1, zend_ast_create(ZEND_AST_IF_ELEM, NULL, $3)); }

;

if_stmt_without_else:

T_IF '(' expr ')' statement

{ $$ = zend_ast_create_list(1, ZEND_AST_IF,

zend_ast_create(ZEND_AST_IF_ELEM, $3, $5)); }

| if_stmt_without_else T_ELSEIF '(' expr ')' statement

{ $$ = zend_ast_list_add($1,

zend_ast_create(ZEND_AST_IF_ELEM, $4, $6)); }

;

zend_language_parser.y

@asgrim

if_stmt:

if_stmt_without_else %prec T_NOELSE { $$ = $1; }

| if_stmt_without_else T_ELSE statement

{ $$ = zend_ast_list_add($1, zend_ast_create(ZEND_AST_IF_ELEM, NULL, $3)); }

;

if_stmt_without_else:

T_IF '(' expr ')' statement

{ $$ = zend_ast_create_list(1, ZEND_AST_IF,

zend_ast_create(ZEND_AST_IF_ELEM, $3, $5)); }

| if_stmt_without_else T_ELSEIF '(' expr ')' statement

{ $$ = zend_ast_list_add($1,

zend_ast_create(ZEND_AST_IF_ELEM, $4, $6)); }

;

zend_language_parser.y

@asgrim

if_stmt:

if_stmt_without_else %prec T_NOELSE { $$ = $1; }

| if_stmt_without_else T_ELSE statement

{ $$ = zend_ast_list_add($1, zend_ast_create(ZEND_AST_IF_ELEM, NULL, $3)); }

;

if_stmt_without_else:

T_IF '(' expr ')' statement

{ $$ = zend_ast_create_list(1, ZEND_AST_IF,

zend_ast_create(ZEND_AST_IF_ELEM, $3, $5)); }

| if_stmt_without_else T_ELSEIF '(' expr ')' statement

{ $$ = zend_ast_list_add($1,

zend_ast_create(ZEND_AST_IF_ELEM, $4, $6)); }

;

zend_language_parser.y

@asgrim

if_stmt:

if_stmt_without_else %prec T_NOELSE { $$ = $1; }

| if_stmt_without_else T_ELSE statement

{ $$ = zend_ast_list_add($1, zend_ast_create(ZEND_AST_IF_ELEM, NULL, $3)); }

;

if_stmt_without_else:

T_IF '(' expr ')' statement

{ $$ = zend_ast_create_list(1, ZEND_AST_IF,

zend_ast_create(ZEND_AST_IF_ELEM, $3, $5)); }

| if_stmt_without_else T_ELSEIF '(' expr ')' statement

{ $$ = zend_ast_list_add($1,

zend_ast_create(ZEND_AST_IF_ELEM, $4, $6)); }

;

zend_language_parser.y

@asgrim

if_stmt:

if_stmt_without_else %prec T_NOELSE { $$ = $1; }

| if_stmt_without_else T_ELSE statement

{ $$ = zend_ast_list_add($1, zend_ast_create(ZEND_AST_IF_ELEM, NULL, $3)); }

;

if_stmt_without_else:

T_IF '(' expr ')' statement

{ $$ = zend_ast_create_list(1, ZEND_AST_IF,

zend_ast_create(ZEND_AST_IF_ELEM, $3, $5)); }

| if_stmt_without_else T_ELSEIF '(' expr ')' statement

{ $$ = zend_ast_list_add($1,

zend_ast_create(ZEND_AST_IF_ELEM, $4, $6)); }

;

zend_language_parser.y

@asgrim

if_stmt:

if_stmt_without_else %prec T_NOELSE { $$ = $1; }

| if_stmt_without_else T_ELSE statement

{ $$ = zend_ast_list_add($1, zend_ast_create(ZEND_AST_IF_ELEM, NULL, $3)); }

;

if_stmt_without_else:

T_IF '(' expr ')' statement

{ $$ = zend_ast_create_list(1, ZEND_AST_IF,

zend_ast_create(ZEND_AST_IF_ELEM, $3, $5)); }

| if_stmt_without_else T_ELSEIF '(' expr ')' statement

{ $$ = zend_ast_list_add($1,

zend_ast_create(ZEND_AST_IF_ELEM, $4, $6)); }

;

zend_language_parser.y

@asgrim

if ($a == 1)

{

a();

}

else if ($b == 1)

{

b();

}

else

{

c();

}

Using the rules to parse

@asgrim

if ($a == 1)

{

a();

}

else if ($b == 1)

{

b();

}

else

{

c();

}

Using the rules to parse

if_stmt_without_else (A)

@asgrim

if ($a == 1)

{

a();

}

else if ($b == 1)

{

b();

}

else

{

c();

}

Using the rules to parse

if_stmt_without_else (A)

if_stmt_without_else (B)

@asgrim

if ($a == 1)

{

a();

}

else if ($b == 1)

{

b();

}

else

{

c();

}

Using the rules to parse

if_stmt_without_else (A)

if_stmt_without_else (B)

if_stmt

@asgrim

Zend_language_parser.y (PHP 7.0.10)if_stmt:

if_stmt_without_else %prec T_NOELSE { $$ = $1; }

| if_stmt_without_else T_ELSE statement

{ $$ = zend_ast_list_add($1, zend_ast_create(ZEND_AST_IF_ELEM, NULL, $3)); }

;

if_stmt_without_else:

T_IF '(' expr ')' statement

{ $$ = zend_ast_create_list(1, ZEND_AST_IF,

zend_ast_create(ZEND_AST_IF_ELEM, $3, $5)); }

| if_stmt_without_else T_ELSEIF '(' expr ')' statement

{ $$ = zend_ast_list_add($1,

zend_ast_create(ZEND_AST_IF_ELEM, $4, $6)); }

;

@asgrim

zend_language_parser.y (PHP 5.6.26)T_IF parenthesis_expr { zend_do_if_cond(&$2, &$1 TSRMLS_CC); }

statement { zend_do_if_after_statement(&$1, 1 TSRMLS_CC); }

void zend_do_if_cond(const znode *cond, znode *closing_bracket_token TSRMLS_DC)

{

int if_cond_op_number = get_next_op_number(CG(active_op_array));

zend_op *opline = get_next_op(CG(active_op_array) TSRMLS_CC);

opline->opcode = ZEND_JMPZ;

SET_NODE(opline->op1, cond);

closing_bracket_token->u.op.opline_num = if_cond_op_number;

SET_UNUSED(opline->op2);

INC_BPC(CG(active_op_array));

}

@asgrim

AST is new in PHP 7+

@asgrim

How PHP works

PHP code

OpCacheExecute (VM)

Lexer + Parser

Compiler

@asgrim

Let’s simplify!

@asgrim

First… WTF is AST?

@asgrim

AST is just a data structure

@asgrim

PHP code

<?php

echo "Hello world";

@asgrim

An AST representation

Echo statement

`-- String, value "Hello world"

@asgrim

PHP code

<?php

echo "Hello " . "world";

@asgrim

An AST representation

Echo statement

`-- Concat

|-- Left

| `-- String, value "Hello "

`-- Right

`-- String, value "world"

@asgrim

PHP code

<?php

$a = 5;

$b = 3;

echo $a + ($b * 2);

@asgrim

An AST representationAssign statement

|-- Variable $a

`-- Integer, value 5

Assign statement

|-- Variable $b

`-- Integer, value 3

Echo statement

`-- Add operation

|-- Left

| `-- Variable $a

`-- Right

`-- Multiply operation

|-- Left

| `-- Variable $b

`-- Right

`-- Integer, value 2

@asgrim

AST compilationStatements

EchoAssign

Scalarvalue: (int)5

Variablename: $a

Assign

Scalarvalue: (int)3

Variablename: $b Add op

Right operandLeft operand

Variablename: $a

Multiply op

Right operandLeft operand

Variablename: $b

Scalarvalue: (int)2

@asgrim

AST compilation: pre-order traversalStatements

EchoAssign

Scalarvalue: (int)5

Variablename: $a

Assign

Scalarvalue: (int)3

Variablename: $b Add op

Right operandLeft operand

Variablename: $a

Multiply op

Right operandLeft operand

Variablename: $b

Scalarvalue: (int)2

@asgrim

Pre-order traversal: Polish notationAssign(Variable $a, Scalar 5)

Assign(Variable $b, Scalar 3)

Echo (

Add(

Variable $a,

Multiply( $b, 2 )

)

)

@asgrim

Order of precedence

1 + 2 * 3

= 1 + (2 * 3) = 7?

= (1 + 2) * 3 = 9?

@asgrim

Order of precedence

1 + 2 * 3

= 1 + (2 * 3) = 7?

= (1 + 2) * 3 = 9?

+ 1 * 2 3

@asgrim

Order of precedence

1 + 2 * 3

= 1 + (2 * 3) = 7?

= (1 + 2) * 3 = 9?

+ 1 * 2 3

Operator Left operand Right operand

@asgrim

Order of precedence

1 + 2 * 3

= 1 + (2 * 3) = 7?

= (1 + 2) * 3 = 9?

+ 1 * 2 3

Operator Left operand Right operand

Operator Left operand Right operand

@asgrim

Reverse Polish Notation

1 2 3 * +

@asgrim

Reverse Polish Notation

1 2 3 * + The stack

@asgrim

Reverse Polish Notation

1 2 3 * + The stack

1

@asgrim

Reverse Polish Notation

1 2 3 * + The stack

1

2

@asgrim

Reverse Polish Notation

1 2 3 * + The stack

1

2

3

@asgrim

Reverse Polish Notation

1 2 3 * + The stack

1

2

3

@asgrim

Reverse Polish Notation

1 2 3 * + The stack

1

2

3

@asgrim

Reverse Polish Notation

1 2 3 * + The stack

1

6

@asgrim

Reverse Polish Notation

1 2 3 * + The stack

1

6

@asgrim

Reverse Polish Notation

1 2 3 * + The stack

7

Any questions?

James Titcumb @asgrim