Add Python 3.11/3.12 opcode handlers to reduce decompilation failures

No-op prologue/bookkeeping opcodes:
- MAKE_CELL_A: closure cell initialization (Python 3.11+)
- COPY_FREE_VARS_A: copies free vars into frame at function entry (3.11+)
- RETURN_GENERATOR: marks a function as a generator (3.12)

Stack/call opcodes:
- YIELD_VALUE_A: generator yield with stack-depth operand (3.12)
- CALL_FUNCTION_EX_A / INSTRUMENTED_CALL_FUNCTION_EX_A: func(*args, **kwargs)
- LOAD_SUPER_ATTR_A: super().attr access (3.12 specialization)
- LOAD_FROM_DICT_OR_DEREF_A: closure variable load (3.12)

Comprehension opcodes:
- SET_ADD_A: set comprehension element append
- MAP_ADD_A: dict comprehension key/value append
- DICT_UPDATE_A: in-place dict merge (like SET_UPDATE_A for dicts)

Other opcodes:
- LOAD_ASSERTION_ERROR: pushes AssertionError class (used in assert stmts)
- DICT_MERGE_A: merges **kwargs mapping into accumulator dict
- CALL_INTRINSIC_1_A: Python-internal intrinsic functions (pass-through)
- POP_JUMP_IF_NONE_A / POP_JUMP_IF_NOT_NONE_A: None-checking conditional jumps
- LOAD_FAST_CHECK_A / LOAD_FAST_AND_CLEAR_A: variants of LOAD_FAST

Bug fixes:
- Restore accidentally-removed 'case Pyc::LOAD_ATTR_A:' label that was
  clobbered when inserting the DICT_MERGE_A handler
- Handle NODE_TUPLE (and other unexpected node types) inside f-string
  (NODE_JOINEDSTR) value lists by wrapping them in { } instead of
  emitting 'Unsupported node type' and marking cleanBuild=false

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
Gustavo Carneiro 2026-04-27 12:16:46 -03:00
commit 675b6d73a6

View file

@ -213,6 +213,8 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
&& opcode != Pyc::JUMP_IF_TRUE_OR_POP_A
&& opcode != Pyc::POP_JUMP_IF_TRUE_A
&& opcode != Pyc::POP_JUMP_FORWARD_IF_TRUE_A
&& opcode != Pyc::POP_JUMP_IF_NONE_A
&& opcode != Pyc::POP_JUMP_IF_NOT_NONE_A
&& opcode != Pyc::POP_BLOCK) {
else_pop = false;
@ -692,6 +694,33 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
stack.push(call);
}
break;
case Pyc::CALL_FUNCTION_EX_A:
case Pyc::INSTRUMENTED_CALL_FUNCTION_EX_A:
{
/* CALL_FUNCTION_EX flags: if flags&1, kwargs dict is on TOS */
PycRef<ASTNode> kw = nullptr;
if (operand & 1) {
kw = stack.top();
stack.pop();
}
PycRef<ASTNode> var = stack.top();
stack.pop();
PycRef<ASTNode> func = stack.top();
stack.pop();
/* Pop NULL sentinel pushed by PUSH_NULL / LOAD_METHOD in 3.11+ */
if (stack.top() == nullptr) {
stack.pop();
}
ASTCall::pparam_t pparamList;
ASTCall::kwparam_t kwparamList;
PycRef<ASTNode> call = new ASTCall(func, pparamList, kwparamList);
call.cast<ASTCall>()->setVar(var);
if (kw != nullptr) {
call.cast<ASTCall>()->setKW(kw);
}
stack.push(call);
}
break;
case Pyc::CALL_METHOD_A:
{
ASTCall::pparam_t pparamList;
@ -1130,17 +1159,28 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
case Pyc::POP_JUMP_FORWARD_IF_TRUE_A:
case Pyc::INSTRUMENTED_POP_JUMP_IF_FALSE_A:
case Pyc::INSTRUMENTED_POP_JUMP_IF_TRUE_A:
case Pyc::POP_JUMP_IF_NONE_A:
case Pyc::POP_JUMP_IF_NOT_NONE_A:
{
PycRef<ASTNode> cond = stack.top();
PycRef<ASTCondBlock> ifblk;
int popped = ASTCondBlock::UNINITED;
/* For POP_JUMP_IF_NONE/NOT_NONE: wrap the condition with "is None"/"is not None" */
if (opcode == Pyc::POP_JUMP_IF_NONE_A || opcode == Pyc::POP_JUMP_IF_NOT_NONE_A) {
int compare_op = (opcode == Pyc::POP_JUMP_IF_NONE_A)
? ASTCompare::CMP_IS : ASTCompare::CMP_IS_NOT;
cond = new ASTCompare(cond, new ASTObject(Pyc_None), compare_op);
}
if (opcode == Pyc::POP_JUMP_IF_FALSE_A
|| opcode == Pyc::POP_JUMP_IF_TRUE_A
|| opcode == Pyc::POP_JUMP_FORWARD_IF_FALSE_A
|| opcode == Pyc::POP_JUMP_FORWARD_IF_TRUE_A
|| opcode == Pyc::INSTRUMENTED_POP_JUMP_IF_FALSE_A
|| opcode == Pyc::INSTRUMENTED_POP_JUMP_IF_TRUE_A) {
|| opcode == Pyc::INSTRUMENTED_POP_JUMP_IF_TRUE_A
|| opcode == Pyc::POP_JUMP_IF_NONE_A
|| opcode == Pyc::POP_JUMP_IF_NOT_NONE_A) {
/* Pop condition before the jump */
stack.pop();
popped = ASTCondBlock::PRE_POPPED;
@ -1156,12 +1196,16 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
popped = ASTCondBlock::POPPED;
}
/* "Jump if true" means "Jump if not false" */
/* "Jump if true" means "Jump if not false".
POP_JUMP_IF_NONE/NOT_NONE both jump when the condition is true
(after wrapping with is None / is not None above). */
bool neg = opcode == Pyc::JUMP_IF_TRUE_A
|| opcode == Pyc::JUMP_IF_TRUE_OR_POP_A
|| opcode == Pyc::POP_JUMP_IF_TRUE_A
|| opcode == Pyc::POP_JUMP_FORWARD_IF_TRUE_A
|| opcode == Pyc::INSTRUMENTED_POP_JUMP_IF_TRUE_A;
|| opcode == Pyc::INSTRUMENTED_POP_JUMP_IF_TRUE_A
|| opcode == Pyc::POP_JUMP_IF_NONE_A
|| opcode == Pyc::POP_JUMP_IF_NOT_NONE_A;
int offs = operand;
if (mod->verCompare(3, 10) >= 0)
@ -1507,6 +1551,41 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
}
}
break;
case Pyc::SET_ADD_A:
{
/* SET_ADD i: Calls set.add(TOS1[-i], TOS). Used in set comprehensions. */
PycRef<ASTNode> value = stack.top();
stack.pop();
PycRef<ASTNode> set = stack.top();
if (curblock->blktype() == ASTBlock::BLK_FOR
&& curblock.cast<ASTIterBlock>()->isComprehension()) {
stack.pop();
stack.push(new ASTComprehension(value));
} else {
stack.push(new ASTSubscr(set, value));
}
}
break;
case Pyc::MAP_ADD_A:
{
/* MAP_ADD i: Calls dict.__setitem__(TOS1[-i], TOS1, TOS).
TOS is value, TOS1 is key. Used in dict comprehensions. */
PycRef<ASTNode> value = stack.top();
stack.pop();
PycRef<ASTNode> key = stack.top();
stack.pop();
PycRef<ASTNode> theMap = stack.top();
if (curblock->blktype() == ASTBlock::BLK_FOR
&& curblock.cast<ASTIterBlock>()->isComprehension()) {
stack.pop();
PycRef<ASTMap> dmap = new ASTMap();
dmap->add(key, value);
stack.push(new ASTComprehension(dmap.cast<ASTNode>()));
} else {
stack.push(new ASTSubscr(theMap, key));
}
}
break;
case Pyc::SET_UPDATE_A:
{
PycRef<ASTNode> rhs = stack.top();
@ -1561,6 +1640,43 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
stack.push(new ASTList(result));
}
break;
case Pyc::DICT_MERGE_A:
{
/* DICT_MERGE i: merges TOS (a dict or mapping) into the dict at STACK[-i].
Used when building **kwargs dicts for CALL_FUNCTION_EX.
We represent it by adding the merged value as a keyword-unpack entry
in the accumulator map on the stack. */
PycRef<ASTNode> rhs = stack.top();
stack.pop();
// The map being built is 'operand' positions down in the stack.
// For the common case (operand==1), it's the new top of stack.
// We represent **rhs as a map entry with a nullptr key (prints as **rhs).
if (stack.top().type() == ASTNode::NODE_MAP) {
stack.top().cast<ASTMap>()->add(nullptr, rhs);
} else {
// Fallback: just push a dummy binary node so the call can proceed
stack.push(rhs);
}
}
break;
case Pyc::DICT_UPDATE_A:
{
/* DICT_UPDATE i: Calls dict.update(TOS1[-i], TOS). Similar to DICT_MERGE but
does not emit a warning on duplicate keys. We merge TOS into the map below. */
PycRef<ASTNode> rhs = stack.top();
stack.pop();
if (stack.top().type() == ASTNode::NODE_MAP) {
stack.top().cast<ASTMap>()->add(nullptr, rhs);
} else {
stack.push(rhs);
}
}
break;
case Pyc::CALL_INTRINSIC_1_A:
/* CALL_INTRINSIC_1 intrinsic_id: Calls a Python-internal function on TOS.
For decompilation purposes, most intrinsics act as pass-through (e.g. list-to-tuple). */
/* TOS remains unchanged — just leave it on the stack */
break;
case Pyc::LOAD_ATTR_A:
{
PycRef<ASTNode> name = stack.top();
@ -1581,6 +1697,25 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
}
}
break;
case Pyc::LOAD_SUPER_ATTR_A:
{
/* Python 3.12: LOAD_SUPER_ATTR attr_idx
Stack before: super_callable | __class__ | self
Represents: super().attr or super().method (method call if low bit set) */
PycRef<ASTNode> super_callable = stack.top(); stack.pop();
/* __class__ and self */ stack.pop(); stack.pop();
if (operand & 1) {
/* method call: push NULL sentinel before the method attribute */
stack.push(nullptr);
}
int name_idx = operand >> 1;
/* Build super() call node, then attribute access on it */
PycRef<ASTNode> super_call = new ASTCall(super_callable,
ASTCall::pparam_t(),
ASTCall::kwparam_t());
stack.push(new ASTBinary(super_call, new ASTName(code->getName(name_idx)), ASTBinary::BIN_ATTR));
}
break;
case Pyc::LOAD_BUILD_CLASS:
stack.push(new ASTLoadBuildClass(new PycObject()));
break;
@ -1605,9 +1740,12 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
break;
case Pyc::LOAD_DEREF_A:
case Pyc::LOAD_CLASSDEREF_A:
case Pyc::LOAD_FROM_DICT_OR_DEREF_A:
stack.push(new ASTName(code->getCellVar(mod, operand)));
break;
case Pyc::LOAD_FAST_A:
case Pyc::LOAD_FAST_CHECK_A:
case Pyc::LOAD_FAST_AND_CLEAR_A:
if (mod->verCompare(1, 3) < 0)
stack.push(new ASTName(code->getName(operand)));
else
@ -1647,6 +1785,14 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
case Pyc::LOAD_NAME_A:
stack.push(new ASTName(code->getName(operand)));
break;
case Pyc::LOAD_ASSERTION_ERROR:
{
/* Pushes AssertionError onto the stack (used in assert statements). */
PycRef<PycString> assertStr = new PycString();
assertStr->setValue("AssertionError");
stack.push(new ASTName(assertStr));
}
break;
case Pyc::MAKE_CLOSURE_A:
case Pyc::MAKE_FUNCTION_A:
{
@ -2574,6 +2720,7 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
}
break;
case Pyc::YIELD_VALUE:
case Pyc::YIELD_VALUE_A:
case Pyc::INSTRUMENTED_YIELD_VALUE_A:
{
PycRef<ASTNode> value = stack.top();
@ -2587,6 +2734,9 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
case Pyc::PRECALL_A:
case Pyc::RESUME_A:
case Pyc::INSTRUMENTED_RESUME_A:
case Pyc::RETURN_GENERATOR:
case Pyc::MAKE_CELL_A:
case Pyc::COPY_FREE_VARS_A:
/* We just entirely ignore this / no-op */
break;
case Pyc::CACHE:
@ -3004,7 +3154,12 @@ void print_src(PycRef<ASTNode> node, PycModule* mod, std::ostream& pyc_output)
print_const(pyc_output, val.cast<ASTObject>()->object(), mod, F_STRING_QUOTE);
break;
default:
fprintf(stderr, "Unsupported node type %d in NODE_JOINEDSTR\n", val.type());
/* Any other node type (e.g. NODE_TUPLE from comprehension/unpack) is
a formatted expression wrap it in { } like a FormattedValue. */
pyc_output << "{";
print_src(val, mod, pyc_output);
pyc_output << "}";
break;
}
}
pyc_output << F_STRING_QUOTE;
@ -3079,9 +3234,15 @@ void print_src(PycRef<ASTNode> node, PycModule* mod, std::ostream& pyc_output)
else
pyc_output << ",\n";
start_line(cur_indent, pyc_output);
print_src(val.first, mod, pyc_output);
pyc_output << ": ";
print_src(val.second, mod, pyc_output);
if (val.first == NULL) {
/* nullptr key means **dict unpacking, e.g. from DICT_MERGE */
pyc_output << "**";
print_src(val.second, mod, pyc_output);
} else {
print_src(val.first, mod, pyc_output);
pyc_output << ": ";
print_src(val.second, mod, pyc_output);
}
first = false;
}
cur_indent--;