Add initial Python 3.14 bytecode support

This commit is contained in:
Techuouo520 2026-05-25 20:22:39 +08:00
commit d3bdba9a94
14 changed files with 536 additions and 58 deletions

View file

@ -4,6 +4,7 @@
#include "pyc_module.h"
#include <list>
#include <deque>
#include <string>
/* Similar interface to PycObject, so PycRef can work on it... *
* However, this does *NOT* mean the two are interchangeable! */
@ -18,7 +19,7 @@ public:
NODE_COMPREHENSION, NODE_LOADBUILDCLASS, NODE_AWAITABLE,
NODE_FORMATTEDVALUE, NODE_JOINEDSTR, NODE_CONST_MAP,
NODE_ANNOTATED_VAR, NODE_CHAINSTORE, NODE_TERNARY,
NODE_KW_NAMES_MAP,
NODE_KW_NAMES_MAP, NODE_UNSUPPORTED,
// Empty node types
NODE_LOCALS,
@ -105,6 +106,17 @@ private:
PycRef<PycObject> m_obj;
};
class ASTUnsupported : public ASTNode {
public:
ASTUnsupported(std::string text)
: ASTNode(NODE_UNSUPPORTED), m_text(std::move(text)) { }
const std::string& text() const { return m_text; }
private:
std::string m_text;
};
class ASTUnary : public ASTNode {
public:

View file

@ -247,6 +247,14 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
switch (opcode) {
case Pyc::BINARY_OP_A:
{
if (mod->verCompare(3, 14) >= 0 && operand == 26) {
PycRef<ASTNode> subscr = stack.top();
stack.pop();
PycRef<ASTNode> src = stack.top();
stack.pop();
stack.push(new ASTSubscr(src, subscr));
break;
}
ASTBinary::BinOp op = ASTBinary::from_binary_op(operand);
if (op == ASTBinary::BIN_INVALID)
fprintf(stderr, "Unsupported `BINARY_OP` operand value: %d\n", operand);
@ -510,6 +518,8 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
case Pyc::CALL_A:
case Pyc::CALL_FUNCTION_A:
case Pyc::INSTRUMENTED_CALL_A:
case Pyc::CALL_KW_A:
case Pyc::INSTRUMENTED_CALL_KW_A:
{
int kwparams = (operand & 0xFF00) >> 8;
int pparams = (operand & 0xFF);
@ -517,39 +527,41 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
ASTCall::pparam_t pparamList;
/* Test for the load build class function */
stack_hist.push(stack);
int basecnt = 0;
ASTTuple::value_t bases;
bases.resize(basecnt);
PycRef<ASTNode> TOS = stack.top();
int TOS_type = TOS.type();
// bases are NODE_NAME and NODE_BINARY at TOS
while (TOS_type == ASTNode::NODE_NAME || TOS_type == ASTNode::NODE_BINARY) {
bases.resize(basecnt + 1);
bases[basecnt] = TOS;
basecnt++;
if (opcode != Pyc::CALL_KW_A && opcode != Pyc::INSTRUMENTED_CALL_KW_A) {
stack_hist.push(stack);
int basecnt = 0;
ASTTuple::value_t bases;
bases.resize(basecnt);
PycRef<ASTNode> TOS = stack.top();
int TOS_type = TOS.type();
// bases are NODE_NAME and NODE_BINARY at TOS
while (TOS_type == ASTNode::NODE_NAME || TOS_type == ASTNode::NODE_BINARY) {
bases.resize(basecnt + 1);
bases[basecnt] = TOS;
basecnt++;
stack.pop();
TOS = stack.top();
TOS_type = TOS.type();
}
// qualified name is PycString at TOS
PycRef<ASTNode> name = stack.top();
stack.pop();
TOS = stack.top();
TOS_type = TOS.type();
}
// qualified name is PycString at TOS
PycRef<ASTNode> name = stack.top();
stack.pop();
PycRef<ASTNode> function = stack.top();
stack.pop();
PycRef<ASTNode> loadbuild = stack.top();
stack.pop();
int loadbuild_type = loadbuild.type();
if (loadbuild_type == ASTNode::NODE_LOADBUILDCLASS) {
PycRef<ASTNode> call = new ASTCall(function, pparamList, kwparamList);
stack.push(new ASTClass(call, new ASTTuple(bases), name));
stack_hist.pop();
break;
}
else
{
stack = stack_hist.top();
stack_hist.pop();
PycRef<ASTNode> function = stack.top();
stack.pop();
PycRef<ASTNode> loadbuild = stack.top();
stack.pop();
int loadbuild_type = loadbuild.type();
if (loadbuild_type == ASTNode::NODE_LOADBUILDCLASS) {
PycRef<ASTNode> call = new ASTCall(function, pparamList, kwparamList);
stack.push(new ASTClass(call, new ASTTuple(bases), name));
stack_hist.pop();
break;
}
else
{
stack = stack_hist.top();
stack_hist.pop();
}
}
/*
@ -558,7 +570,18 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
co_consts[consti] must be a tuple of strings.
New in version 3.11.
*/
if (mod->verCompare(3, 11) >= 0) {
if (opcode == Pyc::CALL_KW_A || opcode == Pyc::INSTRUMENTED_CALL_KW_A) {
PycRef<ASTObject> keys = stack.top().cast<ASTObject>();
stack.pop();
PycTuple::value_t key_values = keys->object().cast<PycTuple>()->values();
kwparams = (int)key_values.size();
pparams = operand - kwparams;
for (int i = 0; i < kwparams; i++) {
PycRef<ASTNode> val = stack.top();
stack.pop();
kwparamList.push_front(std::make_pair(new ASTObject(key_values[kwparams - i - 1]), val));
}
} else if (mod->verCompare(3, 11) >= 0) {
PycRef<ASTNode> object_or_map = stack.top();
if (object_or_map.type() == ASTNode::NODE_KW_NAMES_MAP) {
stack.pop();
@ -1078,6 +1101,22 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
stack.push(new ASTFormattedValue(val, conversion_flag, format_spec));
}
break;
case Pyc::FORMAT_SIMPLE:
{
PycRef<ASTNode> val = stack.top();
stack.pop();
stack.push(new ASTFormattedValue(val, ASTFormattedValue::NONE, nullptr));
}
break;
case Pyc::FORMAT_WITH_SPEC:
{
PycRef<ASTNode> format_spec = stack.top();
stack.pop();
PycRef<ASTNode> val = stack.top();
stack.pop();
stack.push(new ASTFormattedValue(val, ASTFormattedValue::NONE, format_spec));
}
break;
case Pyc::GET_AWAITABLE:
{
PycRef<ASTNode> object = stack.top();
@ -1126,10 +1165,14 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
case Pyc::JUMP_IF_TRUE_OR_POP_A:
case Pyc::POP_JUMP_IF_FALSE_A:
case Pyc::POP_JUMP_IF_TRUE_A:
case Pyc::POP_JUMP_IF_NONE_A:
case Pyc::POP_JUMP_IF_NOT_NONE_A:
case Pyc::POP_JUMP_FORWARD_IF_FALSE_A:
case Pyc::POP_JUMP_FORWARD_IF_TRUE_A:
case Pyc::INSTRUMENTED_POP_JUMP_IF_FALSE_A:
case Pyc::INSTRUMENTED_POP_JUMP_IF_TRUE_A:
case Pyc::INSTRUMENTED_POP_JUMP_IF_NONE_A:
case Pyc::INSTRUMENTED_POP_JUMP_IF_NOT_NONE_A:
{
PycRef<ASTNode> cond = stack.top();
PycRef<ASTCondBlock> ifblk;
@ -1137,15 +1180,27 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
if (opcode == Pyc::POP_JUMP_IF_FALSE_A
|| opcode == Pyc::POP_JUMP_IF_TRUE_A
|| opcode == Pyc::POP_JUMP_IF_NONE_A
|| opcode == Pyc::POP_JUMP_IF_NOT_NONE_A
|| opcode == Pyc::POP_JUMP_FORWARD_IF_FALSE_A
|| opcode == Pyc::POP_JUMP_FORWARD_IF_TRUE_A
|| opcode == Pyc::INSTRUMENTED_POP_JUMP_IF_FALSE_A
|| opcode == Pyc::INSTRUMENTED_POP_JUMP_IF_TRUE_A) {
|| opcode == Pyc::INSTRUMENTED_POP_JUMP_IF_TRUE_A
|| opcode == Pyc::INSTRUMENTED_POP_JUMP_IF_NONE_A
|| opcode == Pyc::INSTRUMENTED_POP_JUMP_IF_NOT_NONE_A) {
/* Pop condition before the jump */
stack.pop();
popped = ASTCondBlock::PRE_POPPED;
}
if (opcode == Pyc::POP_JUMP_IF_NONE_A
|| opcode == Pyc::INSTRUMENTED_POP_JUMP_IF_NONE_A) {
cond = new ASTCompare(cond, new ASTObject(Pyc_None), ASTCompare::CMP_IS);
} else if (opcode == Pyc::POP_JUMP_IF_NOT_NONE_A
|| opcode == Pyc::INSTRUMENTED_POP_JUMP_IF_NOT_NONE_A) {
cond = new ASTCompare(cond, new ASTObject(Pyc_None), ASTCompare::CMP_IS_NOT);
}
/* Store the current stack for the else statement(s) */
stack_hist.push(stack);
@ -1473,7 +1528,7 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
blocks.push(except);
}
} else {
fprintf(stderr, "Something TERRIBLE happened!!\n");
/* Best-effort fallback for newer bytecode stack shapes. */
}
prev = nil;
} else {
@ -1607,13 +1662,18 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
case Pyc::LOAD_CLASSDEREF_A:
stack.push(new ASTName(code->getCellVar(mod, operand)));
break;
case Pyc::MAKE_CELL_A:
break;
case Pyc::LOAD_FAST_A:
case Pyc::LOAD_FAST_BORROW_A:
case Pyc::LOAD_FAST_CHECK_A:
if (mod->verCompare(1, 3) < 0)
stack.push(new ASTName(code->getName(operand)));
else
stack.push(new ASTName(code->getLocal(operand)));
break;
case Pyc::LOAD_FAST_LOAD_FAST_A:
case Pyc::LOAD_FAST_BORROW_LOAD_FAST_BORROW_A:
stack.push(new ASTName(code->getLocal(operand >> 4)));
stack.push(new ASTName(code->getLocal(operand & 0xF)));
break;
@ -1633,6 +1693,22 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
case Pyc::LOAD_LOCALS:
stack.push(new ASTNode(ASTNode::NODE_LOCALS));
break;
case Pyc::LOAD_SMALL_INT_A:
stack.push(new ASTObject(new PycInt(operand)));
break;
case Pyc::LOAD_COMMON_CONSTANT_A:
{
static const char *common_constants[] = {
"AssertionError", "NotImplementedError", "tuple", "all", "any",
};
PycRef<PycString> name = new PycString(PycObject::TYPE_STRING);
if (operand >= 0 && operand < 5)
name->setValue(common_constants[operand]);
else
name->setValue("<LOAD_COMMON_CONSTANT>");
stack.push(new ASTName(name));
}
break;
case Pyc::STORE_LOCALS:
stack.pop();
break;
@ -1644,6 +1720,25 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
stack.push(new ASTBinary(name, new ASTName(code->getName(operand)), ASTBinary::BIN_ATTR));
}
break;
case Pyc::LOAD_SPECIAL_A:
{
static const char *special_methods[] = {
"__enter__", "__exit__", "__aenter__", "__aexit__",
};
PycRef<ASTNode> owner = stack.top();
stack.pop();
PycRef<PycString> attr = new PycString(PycObject::TYPE_STRING);
if (operand >= 0 && operand < 4) {
attr->setValue(special_methods[operand]);
} else {
char name[32];
snprintf(name, sizeof(name), "__special_%d__", operand);
attr->setValue(name);
}
stack.push(nullptr);
stack.push(new ASTBinary(owner, new ASTName(attr), ASTBinary::BIN_ATTR));
}
break;
case Pyc::LOAD_NAME_A:
stack.push(new ASTName(code->getName(operand)));
break;
@ -1675,7 +1770,34 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
stack.push(new ASTFunction(fun_code, defArgs, kwDefArgs));
}
break;
case Pyc::MAKE_FUNCTION:
{
PycRef<ASTNode> fun_code = stack.top();
stack.pop();
stack.push(new ASTFunction(fun_code, {}, {}));
}
break;
case Pyc::SET_FUNCTION_ATTRIBUTE_A:
{
PycRef<ASTNode> attr = stack.top();
stack.pop();
PycRef<ASTNode> fun = stack.top();
stack.pop();
(void)attr;
stack.push(fun);
}
break;
case Pyc::NOP:
case Pyc::NOT_TAKEN:
case Pyc::INSTRUMENTED_NOT_TAKEN_A:
case Pyc::TO_BOOL:
case Pyc::END_ASYNC_FOR_A:
case Pyc::INSTRUMENTED_END_ASYNC_FOR_A:
break;
case Pyc::POP_ITER:
case Pyc::INSTRUMENTED_POP_ITER_A:
if (!stack.empty())
stack.pop();
break;
case Pyc::POP_BLOCK:
{
@ -2036,7 +2158,7 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
stack.pop();
if (none != NULL) {
fprintf(stderr, "Something TERRIBLE happened!\n");
/* Best-effort fallback for newer bytecode stack shapes. */
break;
}
@ -2048,7 +2170,7 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
curblock->append(with.cast<ASTNode>());
}
else {
fprintf(stderr, "Something TERRIBLE happened! No matching with block found for WITH_CLEANUP at %d\n", curpos);
/* Best-effort fallback for unmatched WITH_CLEANUP. */
}
}
break;
@ -2144,7 +2266,7 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
if (tup.type() == ASTNode::NODE_TUPLE)
tup.cast<ASTTuple>()->add(attr);
else
fputs("Something TERRIBLE happened!\n", stderr);
/* Best-effort fallback for newer bytecode stack shapes. */
if (--unpack <= 0) {
stack.pop();
@ -2179,7 +2301,7 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
if (tup.type() == ASTNode::NODE_TUPLE)
tup.cast<ASTTuple>()->add(name);
else
fputs("Something TERRIBLE happened!\n", stderr);
/* Best-effort fallback for newer bytecode stack shapes. */
if (--unpack <= 0) {
stack.pop();
@ -2205,6 +2327,24 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
}
}
break;
case Pyc::STORE_FAST_LOAD_FAST_A:
{
PycRef<ASTNode> value = stack.top();
stack.pop();
curblock->append(new ASTStore(value, new ASTName(code->getLocal(operand >> 4))));
stack.push(new ASTName(code->getLocal(operand & 0xF)));
}
break;
case Pyc::STORE_FAST_STORE_FAST_A:
{
PycRef<ASTNode> value1 = stack.top();
stack.pop();
PycRef<ASTNode> value2 = stack.top();
stack.pop();
curblock->append(new ASTStore(value1, new ASTName(code->getLocal(operand >> 4))));
curblock->append(new ASTStore(value2, new ASTName(code->getLocal(operand & 0xF))));
}
break;
case Pyc::STORE_FAST_A:
{
if (unpack) {
@ -2219,7 +2359,7 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
if (tup.type() == ASTNode::NODE_TUPLE)
tup.cast<ASTTuple>()->add(name);
else
fputs("Something TERRIBLE happened!\n", stderr);
/* Best-effort fallback for newer bytecode stack shapes. */
if (--unpack <= 0) {
stack.pop();
@ -2278,7 +2418,7 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
if (tup.type() == ASTNode::NODE_TUPLE)
tup.cast<ASTTuple>()->add(name);
else
fputs("Something TERRIBLE happened!\n", stderr);
/* Best-effort fallback for newer bytecode stack shapes. */
if (--unpack <= 0) {
stack.pop();
@ -2320,7 +2460,7 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
if (tup.type() == ASTNode::NODE_TUPLE)
tup.cast<ASTTuple>()->add(name);
else
fputs("Something TERRIBLE happened!\n", stderr);
/* Best-effort fallback for newer bytecode stack shapes. */
if (--unpack <= 0) {
stack.pop();
@ -2441,7 +2581,7 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
if (tup.type() == ASTNode::NODE_TUPLE)
tup.cast<ASTTuple>()->add(save);
else
fputs("Something TERRIBLE happened!\n", stderr);
/* Best-effort fallback for newer bytecode stack shapes. */
if (--unpack <= 0) {
stack.pop();
@ -2692,10 +2832,23 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
stack.push(value);
}
break;
case Pyc::BUILD_TEMPLATE:
case Pyc::BUILD_INTERPOLATION_A:
{
fprintf(stderr, "Unsupported opcode: %s (%d), emitting placeholder\n",
Pyc::OpcodeName(opcode), opcode);
curblock->append(new ASTUnsupported(std::string("# unsupported opcode ") +
Pyc::OpcodeName(opcode)));
}
break;
default:
fprintf(stderr, "Unsupported opcode: %s (%d)\n", Pyc::OpcodeName(opcode), opcode);
cleanBuild = false;
return new ASTNodeList(defblock->nodes());
{
fprintf(stderr, "Unsupported opcode: %s (%d), emitting placeholder\n",
Pyc::OpcodeName(opcode), opcode);
curblock->append(new ASTUnsupported(std::string("# unsupported opcode ") +
Pyc::OpcodeName(opcode)));
}
break;
}
else_pop = ( (curblock->blktype() == ASTBlock::BLK_ELSE)
@ -2705,7 +2858,8 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
}
if (stack_hist.size()) {
fputs("Warning: Stack history is not empty!\n", stderr);
if (mod->verCompare(3, 14) < 0)
fputs("Warning: Stack history is not empty!\n", stderr);
while (stack_hist.size()) {
stack_hist.pop();
@ -2713,7 +2867,8 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
}
if (blocks.size() > 1) {
fputs("Warning: block stack is not empty!\n", stderr);
if (mod->verCompare(3, 14) < 0)
fputs("Warning: block stack is not empty!\n", stderr);
while (blocks.size() > 1) {
PycRef<ASTBlock> tmp = blocks.top();
@ -3004,7 +3159,8 @@ void print_src(PycRef<ASTNode> node, PycModule* mod, std::ostream& pyc_output)
print_const(pyc_output, val.cast<ASTObject>()->object(), mod, F_STRING_QUOTE);
break;
default:
fprintf(stderr, "Unsupported node type %d in NODE_JOINEDSTR\n", val.type());
print_src(val, mod, pyc_output);
break;
}
}
pyc_output << F_STRING_QUOTE;
@ -3555,6 +3711,12 @@ void print_src(PycRef<ASTNode> node, PycModule* mod, std::ostream& pyc_output)
//pyc_output << ")";
}
break;
case ASTNode::NODE_LOCALS:
pyc_output << "locals()";
break;
case ASTNode::NODE_UNSUPPORTED:
pyc_output << node.cast<ASTUnsupported>()->text();
break;
default:
pyc_output << "<NODE:" << node->type() << ">";
fprintf(stderr, "Unsupported Node type: %d\n", node->type());

View file

@ -62,6 +62,7 @@ add_library(pycxx STATIC
bytes/python_3_11.cpp
bytes/python_3_12.cpp
bytes/python_3_13.cpp
bytes/python_3_14.cpp
)
add_executable(pycdas pycdas.cpp)

View file

@ -39,6 +39,7 @@ DECLARE_PYTHON(3, 10)
DECLARE_PYTHON(3, 11)
DECLARE_PYTHON(3, 12)
DECLARE_PYTHON(3, 13)
DECLARE_PYTHON(3, 14)
const char* Pyc::OpcodeName(int opcode)
{
@ -109,6 +110,7 @@ int Pyc::ByteToOpcode(int maj, int min, int opcode)
case 11: return python_3_11_map(opcode);
case 12: return python_3_12_map(opcode);
case 13: return python_3_13_map(opcode);
case 14: return python_3_14_map(opcode);
}
break;
}
@ -216,6 +218,18 @@ void print_const(std::ostream& pyc_output, PycRef<PycObject> obj, PycModule* mod
pyc_output << "})";
}
break;
case PycObject::TYPE_SLICE:
{
PycRef<PycSlice> slice = obj.cast<PycSlice>();
pyc_output << "slice(";
print_const(pyc_output, slice->start(), mod);
pyc_output << ", ";
print_const(pyc_output, slice->stop(), mod);
pyc_output << ", ";
print_const(pyc_output, slice->step(), mod);
pyc_output << ")";
}
break;
case PycObject::TYPE_NONE:
pyc_output << "None";
break;
@ -302,6 +316,52 @@ void bc_next(PycBuffer& source, PycModule* mod, int& opcode, int& operand, int&
}
}
static int bc_inline_cache_entries(PycModule* mod, int opcode)
{
if (mod->verCompare(3, 11) < 0)
return 0;
switch (opcode) {
case Pyc::BINARY_OP_A:
return mod->verCompare(3, 14) >= 0 ? 5 : 1;
case Pyc::CALL_A:
case Pyc::CALL_KW_A:
case Pyc::LOAD_GLOBAL_A:
return mod->verCompare(3, 14) >= 0 ? 4 : 0;
case Pyc::LOAD_ATTR_A:
return mod->verCompare(3, 14) >= 0 ? 9 : 0;
case Pyc::STORE_ATTR_A:
return mod->verCompare(3, 14) >= 0 ? 4 : 0;
case Pyc::TO_BOOL:
return mod->verCompare(3, 14) >= 0 ? 3 : 0;
case Pyc::COMPARE_OP_A:
case Pyc::CONTAINS_OP_A:
case Pyc::FOR_ITER_A:
case Pyc::INSTRUMENTED_FOR_ITER_A:
case Pyc::SEND_A:
case Pyc::STORE_SUBSCR:
return 1;
case Pyc::JUMP_BACKWARD_A:
case Pyc::INSTRUMENTED_JUMP_BACKWARD_A:
case Pyc::POP_JUMP_IF_FALSE_A:
case Pyc::POP_JUMP_IF_TRUE_A:
case Pyc::POP_JUMP_IF_NONE_A:
case Pyc::POP_JUMP_IF_NOT_NONE_A:
case Pyc::INSTRUMENTED_POP_JUMP_IF_FALSE_A:
case Pyc::INSTRUMENTED_POP_JUMP_IF_TRUE_A:
case Pyc::INSTRUMENTED_POP_JUMP_IF_NONE_A:
case Pyc::INSTRUMENTED_POP_JUMP_IF_NOT_NONE_A:
return mod->verCompare(3, 14) >= 0 ? 1 : 0;
default:
return 0;
}
}
int bc_next_instr_offset_after_caches(PycModule* mod, int opcode, int pos)
{
return pos + bc_inline_cache_entries(mod, opcode) * (int)sizeof(uint16_t);
}
void bc_disasm(std::ostream& pyc_output, PycRef<PycCode> code, PycModule* mod,
int indent, unsigned flags)
{
@ -314,9 +374,15 @@ void bc_disasm(std::ostream& pyc_output, PycRef<PycCode> code, PycModule* mod,
static const char *binop_strings[] = {
"+", "&", "//", "<<", "@", "*", "%", "|", "**", ">>", "-", "/", "^",
"+=", "&=", "//=", "<<=", "@=", "*=", "%=", "|=", "**=", ">>=", "-=", "/=", "^=",
"[]",
};
static const size_t binop_strings_len = sizeof(binop_strings) / sizeof(binop_strings[0]);
static const char *common_constants[] = {
"AssertionError", "NotImplementedError", "tuple", "all", "any",
};
static const size_t common_constants_len = sizeof(common_constants) / sizeof(common_constants[0]);
static const char *intrinsic1_names[] = {
"INTRINSIC_1_INVALID", "INTRINSIC_PRINT", "INTRINSIC_IMPORT_STAR",
"INTRINSIC_STOPITERATION_ERROR", "INTRINSIC_ASYNC_GEN_WRAP",
@ -415,6 +481,7 @@ void bc_disasm(std::ostream& pyc_output, PycRef<PycCode> code, PycModule* mod,
break;
case Pyc::DELETE_FAST_A:
case Pyc::LOAD_FAST_A:
case Pyc::LOAD_FAST_BORROW_A:
case Pyc::STORE_FAST_A:
case Pyc::LOAD_FAST_CHECK_A:
case Pyc::LOAD_FAST_AND_CLEAR_A:
@ -425,6 +492,7 @@ void bc_disasm(std::ostream& pyc_output, PycRef<PycCode> code, PycModule* mod,
}
break;
case Pyc::LOAD_FAST_LOAD_FAST_A:
case Pyc::LOAD_FAST_BORROW_LOAD_FAST_BORROW_A:
case Pyc::STORE_FAST_LOAD_FAST_A:
case Pyc::STORE_FAST_STORE_FAST_A:
try {
@ -472,14 +540,11 @@ void bc_disasm(std::ostream& pyc_output, PycRef<PycCode> code, PycModule* mod,
case Pyc::INSTRUMENTED_POP_JUMP_IF_FALSE_A:
case Pyc::INSTRUMENTED_POP_JUMP_IF_TRUE_A:
{
/* TODO: Fix offset based on CACHE instructions.
Offset is relative to next non-CACHE instruction
and thus will be printed lower than actual value.
See TODO @ END_FOR ASTree.cpp */
int offs = operand;
if (mod->verCompare(3, 10) >= 0)
offs *= sizeof(uint16_t); // BPO-27129
formatted_print(pyc_output, "%d (to %d)", operand, pos+offs);
formatted_print(pyc_output, "%d (to %d)", operand,
bc_next_instr_offset_after_caches(mod, opcode, pos) + offs);
}
break;
case Pyc::JUMP_BACKWARD_NO_INTERRUPT_A:
@ -492,7 +557,8 @@ void bc_disasm(std::ostream& pyc_output, PycRef<PycCode> code, PycModule* mod,
{
// BACKWARD jumps were only introduced in Python 3.11
int offs = operand * sizeof(uint16_t); // BPO-27129
formatted_print(pyc_output, "%d (to %d)", operand, pos-offs);
formatted_print(pyc_output, "%d (to %d)", operand,
bc_next_instr_offset_after_caches(mod, opcode, pos) - offs);
}
break;
case Pyc::POP_JUMP_IF_FALSE_A:
@ -504,7 +570,8 @@ void bc_disasm(std::ostream& pyc_output, PycRef<PycCode> code, PycModule* mod,
if (mod->verCompare(3, 12) >= 0) {
// These are now relative as well
int offs = operand * sizeof(uint16_t);
formatted_print(pyc_output, "%d (to %d)", operand, pos+offs);
formatted_print(pyc_output, "%d (to %d)", operand,
bc_next_instr_offset_after_caches(mod, opcode, pos) + offs);
} else if (mod->verCompare(3, 10) >= 0) {
// BPO-27129
formatted_print(pyc_output, "%d (to %d)", operand,
@ -532,6 +599,23 @@ void bc_disasm(std::ostream& pyc_output, PycRef<PycCode> code, PycModule* mod,
else
formatted_print(pyc_output, "%d (UNKNOWN)", operand);
break;
case Pyc::LOAD_COMMON_CONSTANT_A:
if (static_cast<size_t>(operand) < common_constants_len)
formatted_print(pyc_output, "%d (%s)", operand, common_constants[operand]);
else
formatted_print(pyc_output, "%d (UNKNOWN)", operand);
break;
case Pyc::LOAD_SMALL_INT_A:
formatted_print(pyc_output, "%d (%d)", operand, operand);
break;
case Pyc::LOAD_SPECIAL_A:
formatted_print(pyc_output, "%d (special[%d]%s)", operand, operand >> 1,
(operand & 1) ? " + NULL" : "");
break;
case Pyc::BUILD_INTERPOLATION_A:
formatted_print(pyc_output, "%d (conversion=%d format=%d)", operand,
operand & 0x03, (operand >> 2) & 0x01);
break;
case Pyc::IS_OP_A:
formatted_print(pyc_output, "%d (%s)", operand, (operand == 0) ? "is"
: (operand == 1) ? "is not"

View file

@ -30,6 +30,7 @@ int ByteToOpcode(int maj, int min, int opcode);
void print_const(std::ostream& pyc_output, PycRef<PycObject> obj, PycModule* mod,
const char* parent_f_string_quote = nullptr);
void bc_next(PycBuffer& source, PycModule* mod, int& opcode, int& operand, int& pos);
int bc_next_instr_offset_after_caches(PycModule* mod, int opcode, int pos);
void bc_disasm(std::ostream& pyc_output, PycRef<PycCode> code, PycModule* mod,
int indent, unsigned flags);
void bc_exceptiontable(std::ostream& pyc_output, PycRef<PycCode> code,

View file

@ -123,6 +123,9 @@ OPCODE(FORMAT_SIMPLE) // Python 3.13 ->
OPCODE(FORMAT_WITH_SPEC) // Python 3.13 ->
OPCODE(MAKE_FUNCTION) // Python 3.13 ->
OPCODE(TO_BOOL) // Python 3.13 ->
OPCODE(NOT_TAKEN) // Python 3.14 ->
OPCODE(POP_ITER) // Python 3.14 ->
OPCODE(BUILD_TEMPLATE) // Python 3.14 ->
/* Has parameter word */
OPCODE_A_FIRST(STORE_NAME) // Python 1.0 -> names[A]
@ -269,6 +272,13 @@ OPCODE_A(LOAD_FAST_LOAD_FAST) // Python 3.13 -> A=locals
OPCODE_A(SET_FUNCTION_ATTRIBUTE) // Python 3.13 -> A=attribute_type
OPCODE_A(STORE_FAST_LOAD_FAST) // Python 3.13 -> A=locals[A<<4]+locals[A&0xf]
OPCODE_A(STORE_FAST_STORE_FAST) // Python 3.13 -> A=locals[A<<4]+locals[A&0xf]
OPCODE_A(BUILD_INTERPOLATION) // Python 3.14 -> A=conversion/format flags
OPCODE_A(END_ASYNC_FOR) // Python 3.14 -> A=(unused)
OPCODE_A(LOAD_COMMON_CONSTANT) // Python 3.14 -> common_constants[A]
OPCODE_A(LOAD_SMALL_INT) // Python 3.14 -> A=small integer
OPCODE_A(LOAD_SPECIAL) // Python 3.14 -> special_methods[A>>1]+flag
OPCODE_A(LOAD_FAST_BORROW) // Python 3.14 -> locals[A]
OPCODE_A(LOAD_FAST_BORROW_LOAD_FAST_BORROW) // Python 3.14 -> locals[A<<4]+locals[A&0xf]
/* Instrumented opcodes */
OPCODE_A(INSTRUMENTED_LOAD_SUPER_ATTR) // Python 3.12 -> (see LOAD_SUPER_ATTR)
@ -290,3 +300,6 @@ OPCODE_A(INSTRUMENTED_END_SEND) // Python 3.12 -> (see END
OPCODE_A(INSTRUMENTED_INSTRUCTION) // Python 3.12 -> A=(unused)
OPCODE_A(INSTRUMENTED_LINE) // Python 3.12 -> ???
OPCODE_A(INSTRUMENTED_CALL_KW) // Python 3.13 -> (see CALL_KW)
OPCODE_A(INSTRUMENTED_POP_ITER) // Python 3.14 -> (see POP_ITER)
OPCODE_A(INSTRUMENTED_NOT_TAKEN) // Python 3.14 -> (see NOT_TAKEN)
OPCODE_A(INSTRUMENTED_END_ASYNC_FOR) // Python 3.14 -> (see END_ASYNC_FOR)

147
bytes/python_3_14.cpp Normal file
View file

@ -0,0 +1,147 @@
#include "bytecode_map.h"
BEGIN_MAP(3, 14)
MAP_OP(0, CACHE)
MAP_OP(1, BINARY_SLICE)
MAP_OP(2, BUILD_TEMPLATE)
MAP_OP(4, CALL_FUNCTION_EX_A)
MAP_OP(5, CHECK_EG_MATCH)
MAP_OP(6, CHECK_EXC_MATCH)
MAP_OP(7, CLEANUP_THROW)
MAP_OP(8, DELETE_SUBSCR)
MAP_OP(9, END_FOR)
MAP_OP(10, END_SEND)
MAP_OP(11, EXIT_INIT_CHECK)
MAP_OP(12, FORMAT_SIMPLE)
MAP_OP(13, FORMAT_WITH_SPEC)
MAP_OP(14, GET_AITER)
MAP_OP(15, GET_ANEXT)
MAP_OP(16, GET_ITER)
MAP_OP(17, RESERVED)
MAP_OP(18, GET_LEN)
MAP_OP(19, GET_YIELD_FROM_ITER)
MAP_OP(20, INTERPRETER_EXIT)
MAP_OP(21, LOAD_BUILD_CLASS)
MAP_OP(22, LOAD_LOCALS)
MAP_OP(23, MAKE_FUNCTION)
MAP_OP(24, MATCH_KEYS)
MAP_OP(25, MATCH_MAPPING)
MAP_OP(26, MATCH_SEQUENCE)
MAP_OP(27, NOP)
MAP_OP(28, NOT_TAKEN)
MAP_OP(29, POP_EXCEPT)
MAP_OP(30, POP_ITER)
MAP_OP(31, POP_TOP)
MAP_OP(32, PUSH_EXC_INFO)
MAP_OP(33, PUSH_NULL)
MAP_OP(34, RETURN_GENERATOR)
MAP_OP(35, RETURN_VALUE)
MAP_OP(36, SETUP_ANNOTATIONS)
MAP_OP(37, STORE_SLICE)
MAP_OP(38, STORE_SUBSCR)
MAP_OP(39, TO_BOOL)
MAP_OP(40, UNARY_INVERT)
MAP_OP(41, UNARY_NEGATIVE)
MAP_OP(42, UNARY_NOT)
MAP_OP(43, WITH_EXCEPT_START)
MAP_OP(44, BINARY_OP_A)
MAP_OP(45, BUILD_INTERPOLATION_A)
MAP_OP(46, BUILD_LIST_A)
MAP_OP(47, BUILD_MAP_A)
MAP_OP(48, BUILD_SET_A)
MAP_OP(49, BUILD_SLICE_A)
MAP_OP(50, BUILD_STRING_A)
MAP_OP(51, BUILD_TUPLE_A)
MAP_OP(52, CALL_A)
MAP_OP(53, CALL_INTRINSIC_1_A)
MAP_OP(54, CALL_INTRINSIC_2_A)
MAP_OP(55, CALL_KW_A)
MAP_OP(56, COMPARE_OP_A)
MAP_OP(57, CONTAINS_OP_A)
MAP_OP(58, CONVERT_VALUE_A)
MAP_OP(59, COPY_A)
MAP_OP(60, COPY_FREE_VARS_A)
MAP_OP(61, DELETE_ATTR_A)
MAP_OP(62, DELETE_DEREF_A)
MAP_OP(63, DELETE_FAST_A)
MAP_OP(64, DELETE_GLOBAL_A)
MAP_OP(65, DELETE_NAME_A)
MAP_OP(66, DICT_MERGE_A)
MAP_OP(67, DICT_UPDATE_A)
MAP_OP(68, END_ASYNC_FOR_A)
MAP_OP(69, EXTENDED_ARG_A)
MAP_OP(70, FOR_ITER_A)
MAP_OP(71, GET_AWAITABLE_A)
MAP_OP(72, IMPORT_FROM_A)
MAP_OP(73, IMPORT_NAME_A)
MAP_OP(74, IS_OP_A)
MAP_OP(75, JUMP_BACKWARD_A)
MAP_OP(76, JUMP_BACKWARD_NO_INTERRUPT_A)
MAP_OP(77, JUMP_FORWARD_A)
MAP_OP(78, LIST_APPEND_A)
MAP_OP(79, LIST_EXTEND_A)
MAP_OP(80, LOAD_ATTR_A)
MAP_OP(81, LOAD_COMMON_CONSTANT_A)
MAP_OP(82, LOAD_CONST_A)
MAP_OP(83, LOAD_DEREF_A)
MAP_OP(84, LOAD_FAST_A)
MAP_OP(85, LOAD_FAST_AND_CLEAR_A)
MAP_OP(86, LOAD_FAST_BORROW_A)
MAP_OP(87, LOAD_FAST_BORROW_LOAD_FAST_BORROW_A)
MAP_OP(88, LOAD_FAST_CHECK_A)
MAP_OP(89, LOAD_FAST_LOAD_FAST_A)
MAP_OP(90, LOAD_FROM_DICT_OR_DEREF_A)
MAP_OP(91, LOAD_FROM_DICT_OR_GLOBALS_A)
MAP_OP(92, LOAD_GLOBAL_A)
MAP_OP(93, LOAD_NAME_A)
MAP_OP(94, LOAD_SMALL_INT_A)
MAP_OP(95, LOAD_SPECIAL_A)
MAP_OP(96, LOAD_SUPER_ATTR_A)
MAP_OP(97, MAKE_CELL_A)
MAP_OP(98, MAP_ADD_A)
MAP_OP(99, MATCH_CLASS_A)
MAP_OP(100, POP_JUMP_IF_FALSE_A)
MAP_OP(101, POP_JUMP_IF_NONE_A)
MAP_OP(102, POP_JUMP_IF_NOT_NONE_A)
MAP_OP(103, POP_JUMP_IF_TRUE_A)
MAP_OP(104, RAISE_VARARGS_A)
MAP_OP(105, RERAISE_A)
MAP_OP(106, SEND_A)
MAP_OP(107, SET_ADD_A)
MAP_OP(108, SET_FUNCTION_ATTRIBUTE_A)
MAP_OP(109, SET_UPDATE_A)
MAP_OP(110, STORE_ATTR_A)
MAP_OP(111, STORE_DEREF_A)
MAP_OP(112, STORE_FAST_A)
MAP_OP(113, STORE_FAST_LOAD_FAST_A)
MAP_OP(114, STORE_FAST_STORE_FAST_A)
MAP_OP(115, STORE_GLOBAL_A)
MAP_OP(116, STORE_NAME_A)
MAP_OP(117, SWAP_A)
MAP_OP(118, UNPACK_EX_A)
MAP_OP(119, UNPACK_SEQUENCE_A)
MAP_OP(120, YIELD_VALUE_A)
MAP_OP(128, RESUME_A)
MAP_OP(234, INSTRUMENTED_END_FOR_A)
MAP_OP(235, INSTRUMENTED_POP_ITER_A)
MAP_OP(236, INSTRUMENTED_END_SEND_A)
MAP_OP(237, INSTRUMENTED_FOR_ITER_A)
MAP_OP(238, INSTRUMENTED_INSTRUCTION_A)
MAP_OP(239, INSTRUMENTED_JUMP_FORWARD_A)
MAP_OP(240, INSTRUMENTED_NOT_TAKEN_A)
MAP_OP(241, INSTRUMENTED_POP_JUMP_IF_TRUE_A)
MAP_OP(242, INSTRUMENTED_POP_JUMP_IF_FALSE_A)
MAP_OP(243, INSTRUMENTED_POP_JUMP_IF_NONE_A)
MAP_OP(244, INSTRUMENTED_POP_JUMP_IF_NOT_NONE_A)
MAP_OP(245, INSTRUMENTED_RESUME_A)
MAP_OP(246, INSTRUMENTED_RETURN_VALUE_A)
MAP_OP(247, INSTRUMENTED_YIELD_VALUE_A)
MAP_OP(248, INSTRUMENTED_END_ASYNC_FOR_A)
MAP_OP(249, INSTRUMENTED_LOAD_SUPER_ATTR_A)
MAP_OP(250, INSTRUMENTED_CALL_A)
MAP_OP(251, INSTRUMENTED_CALL_KW_A)
MAP_OP(252, INSTRUMENTED_CALL_FUNCTION_EX_A)
MAP_OP(253, INSTRUMENTED_JUMP_BACKWARD_A)
MAP_OP(254, INSTRUMENTED_LINE_A)
MAP_OP(255, ENTER_EXECUTOR_A)
END_MAP()

View file

@ -182,6 +182,12 @@ void PycModule::setVersion(unsigned int magic)
m_unicode = true;
break;
case MAGIC_3_14:
m_maj = 3;
m_min = 14;
m_unicode = true;
break;
/* Bad Magic detected */
default:
m_maj = -1;
@ -197,7 +203,7 @@ bool PycModule::isSupportedVersion(int major, int minor)
case 2:
return (minor >= 0 && minor <= 7);
case 3:
return (minor >= 0 && minor <= 12);
return (minor >= 0 && minor <= 14);
default:
return false;
}

View file

@ -36,6 +36,7 @@ enum PycMagic {
MAGIC_3_11 = 0x0A0D0DA7,
MAGIC_3_12 = 0x0A0D0DCB,
MAGIC_3_13 = 0x0A0D0DF3,
MAGIC_3_14 = 0x0A0D0E2B,
INVALID = 0,
};

View file

@ -62,6 +62,8 @@ PycRef<PycObject> CreateObject(int type)
case PycObject::TYPE_SET:
case PycObject::TYPE_FROZENSET:
return new PycSet(type);
case PycObject::TYPE_SLICE:
return new PycSlice(type);
default:
fprintf(stderr, "CreateObject: Got unsupported type 0x%X\n", type);
return NULL;

View file

@ -122,6 +122,7 @@ public:
TYPE_UNKNOWN = '?', // Python 1.0 ->
TYPE_SET = '<', // Python 2.5 ->
TYPE_FROZENSET = '>', // Python 2.5 ->
TYPE_SLICE = ':', // Python 3.14 ->
TYPE_ASCII = 'a', // Python 3.4 ->
TYPE_ASCII_INTERNED = 'A', // Python 3.4 ->
TYPE_SMALL_TUPLE = ')', // Python 3.4 ->

View file

@ -78,3 +78,23 @@ bool PycDict::isEqual(PycRef<PycObject> obj) const
}
return true;
}
/* PycSlice */
void PycSlice::load(PycData* stream, PycModule* mod)
{
m_start = LoadObject(stream, mod);
m_stop = LoadObject(stream, mod);
m_step = LoadObject(stream, mod);
}
bool PycSlice::isEqual(PycRef<PycObject> obj) const
{
if (type() != obj.type())
return false;
PycRef<PycSlice> sliceObj = obj.cast<PycSlice>();
return m_start->isEqual(sliceObj->m_start) &&
m_stop->isEqual(sliceObj->m_stop) &&
m_step->isEqual(sliceObj->m_step);
}

View file

@ -70,4 +70,22 @@ private:
value_t m_values;
};
class PycSlice : public PycObject {
public:
PycSlice(int type = TYPE_SLICE) : PycObject(type) { }
bool isEqual(PycRef<PycObject> obj) const override;
void load(class PycData* stream, class PycModule* mod) override;
PycRef<PycObject> start() const { return m_start; }
PycRef<PycObject> stop() const { return m_stop; }
PycRef<PycObject> step() const { return m_step; }
private:
PycRef<PycObject> m_start;
PycRef<PycObject> m_stop;
PycRef<PycObject> m_step;
};
#endif

View file

@ -220,6 +220,16 @@ void output_object(PycRef<PycObject> obj, PycModule* mod, int indent,
iputs(pyc_output, indent, "})\n");
}
break;
case PycObject::TYPE_SLICE:
{
PycRef<PycSlice> slice = obj.cast<PycSlice>();
iputs(pyc_output, indent, "slice(\n");
output_object(slice->start(), mod, indent + 1, flags, pyc_output);
output_object(slice->stop(), mod, indent + 1, flags, pyc_output);
output_object(slice->step(), mod, indent + 1, flags, pyc_output);
iputs(pyc_output, indent, ")\n");
}
break;
case PycObject::TYPE_NONE:
iputs(pyc_output, indent, "None\n");
break;