#include #include #include #include #include #include #include #include "ASTree.h" #include "FastStack.h" #include "pyc_numeric.h" #include "bytecode.h" // This must be a triple quote (''' or """), to handle interpolated string literals containing the opposite quote style. // E.g. f'''{"interpolated "123' literal"}''' -> valid. // E.g. f"""{"interpolated "123' literal"}""" -> valid. // E.g. f'{"interpolated "123' literal"}' -> invalid, unescaped quotes in literal. // E.g. f'{"interpolated \"123\' literal"}' -> invalid, f-string expression does not allow backslash. // NOTE: Nested f-strings not supported. #define F_STRING_QUOTE "'''" static void append_to_chain_store(const PycRef& chainStore, PycRef item, FastStack& stack, const PycRef& curblock); /* Use this to determine if an error occurred (and therefore, if we should * avoid cleaning the output tree) */ static bool cleanBuild; /* Use this to prevent printing return keywords and newlines in lambdas. */ static bool inLambda = false; /* Use this to keep track of whether we need to print out any docstring and * the list of global variables that we are using (such as inside a function). */ static bool printDocstringAndGlobals = false; /* Use this to keep track of whether we need to print a class or module docstring */ static bool printClassDocstring = true; // shortcut for all top/pop calls static PycRef StackPopTop(FastStack& stack) { const auto node(stack.top()); stack.pop(); return node; } /* compiler generates very, VERY similar byte code for if/else statement block and if-expression * statement * if a: b = 1 * else: b = 2 * expression: * b = 1 if a else 2 * (see for instance https://stackoverflow.com/a/52202007) * here, try to guess if just finished else statement is part of if-expression (ternary operator) * if it is, remove statements from the block and put a ternary node on top of stack */ static void CheckIfExpr(FastStack& stack, PycRef curblock) { if (stack.empty()) return; if (curblock->nodes().size() < 2) return; auto rit = curblock->nodes().crbegin(); // the last is "else" block, the one before should be "if" (could be "for", ...) if ((*rit)->type() != ASTNode::NODE_BLOCK || (*rit).cast()->blktype() != ASTBlock::BLK_ELSE) return; ++rit; if ((*rit)->type() != ASTNode::NODE_BLOCK || (*rit).cast()->blktype() != ASTBlock::BLK_IF) return; auto else_expr = StackPopTop(stack); curblock->removeLast(); auto if_block = curblock->nodes().back(); auto if_expr = StackPopTop(stack); curblock->removeLast(); stack.push(new ASTTernary(std::move(if_block), std::move(if_expr), std::move(else_expr))); } PycRef BuildFromCode(PycRef code, PycModule* mod); /* Sentinel name representing the live exception pushed by PUSH_EXC_INFO (Python 3.11+). It is bound by `except ... as ` (a STORE) or discarded (a POP_TOP); it must never reach the output. */ static const char* const EXC_SENTINEL = ""; static bool isExcSentinel(const PycRef& node) { return node != NULL && node.type() == ASTNode::NODE_NAME && node.cast()->name()->isEqual(EXC_SENTINEL); } static bool sameName(const PycRef& a, const PycRef& b) { return a != NULL && b != NULL && a.type() == ASTNode::NODE_NAME && b.type() == ASTNode::NODE_NAME && a.cast()->name()->isEqual(b.cast()->name()->value()); } /* Python 3.11 `except ... as ` binding plus its compiler cleanup. Returns true if the store was consumed (and should be suppressed). */ static bool handleExceptBinding(PycRef& curblock, const PycRef& value, const PycRef& name) { if (curblock->blktype() != ASTBlock::BLK_EXCEPT) return false; PycRef exc = curblock.try_cast(); if (exc == NULL) return false; if (isExcSentinel(value)) { /* except as : */ exc->setExceptVar(name); return true; } /* Compiler cleanup at the end of an `except ... as ` handler: ` = None; del `. The store value may arrive either as a NULL placeholder or as an explicit None constant (LOAD_CONST None; STORE). Suppress both when the target is the bound exception var. */ bool valueIsNone = (value == NULL) || (value != NULL && value.type() == ASTNode::NODE_OBJECT && value.cast()->object() == Pyc_None); if (valueIsNone && exc->exceptVar() != NULL && sameName(name, exc->exceptVar())) return true; return false; } /* True if `del ` targets the bound exception variable (cleanup). */ static bool isExceptVarDelete(const PycRef& curblock, const PycRef& name) { if (curblock->blktype() != ASTBlock::BLK_EXCEPT) return false; PycRef exc = curblock.try_cast(); return exc != NULL && exc->exceptVar() != NULL && sameName(name, exc->exceptVar()); } /* Search a decompiled comprehension/generator code body for the ASTComprehension node it produces (Python 3.x compiles comprehensions and generator expressions into separate code objects). */ static PycRef FindComprehension(PycRef node) { if (node == NULL) return NULL; switch (node.type()) { case ASTNode::NODE_COMPREHENSION: return node.cast(); case ASTNode::NODE_RETURN: return FindComprehension(node.cast()->value()); case ASTNode::NODE_STORE: return FindComprehension(node.cast()->src()); case ASTNode::NODE_NODELIST: for (const auto& n : node.cast()->nodes()) { PycRef c = FindComprehension(n); if (c != NULL) return c; } return NULL; default: return NULL; } } /* A generator expression compiles to a code object that yields rather than building a NODE_COMPREHENSION, so reconstruct it from its for-loop: the FOR block becomes the comprehension generator, the yielded value the result, and a wrapping `if` its filter. The caller substitutes the implicit ".0" iterator with the real iterable. */ /* Walk a comprehension for-loop body for the yielded value, descending through (possibly nested) `if` filters and combining their conditions with `and`. Returns the yielded value and, via outCond, the combined filter (or NULL). */ static PycRef findCompYield(const ASTBlock::list_t& nodes, PycRef condSoFar, PycRef& outCond) { for (const auto& n : nodes) { if (n.type() == ASTNode::NODE_RETURN && n.cast()->rettype() == ASTReturn::YIELD) { outCond = condSoFar; return n.cast()->value(); } if (n.type() == ASTNode::NODE_BLOCK && n.cast()->blktype() == ASTBlock::BLK_IF) { PycRef ifblk = n.cast(); PycRef c = ifblk->cond(); if (ifblk->negative()) c = new ASTUnary(c, ASTUnary::UN_NOT); PycRef combined = (condSoFar == NULL) ? c : new ASTBinary(condSoFar, c, ASTBinary::BIN_LOG_AND); PycRef r = findCompYield(ifblk->nodes(), combined, outCond); if (r != NULL) return r; } } return NULL; } static PycRef SynthGenexpr(PycRef node) { if (node == NULL) return NULL; if (node.type() == ASTNode::NODE_NODELIST) { for (const auto& n : node.cast()->nodes()) { PycRef c = SynthGenexpr(n); if (c != NULL) return c; } return NULL; } if (node.type() == ASTNode::NODE_BLOCK && node.cast()->blktype() == ASTBlock::BLK_FOR) { PycRef forblk = node.cast(); PycRef cond; PycRef result = findCompYield(forblk->nodes(), NULL, cond); if (result == NULL) return NULL; if (cond != NULL) forblk->setCondition(cond); PycRef comp = new ASTComprehension(result); comp->addGenerator(forblk); return comp; } return NULL; } /* Python 3.11 with-statement pre-pass. For each BEFORE_WITH whose normal exit has the canonical shape (body -> implicit __exit__ -> JUMP over the cleanup handler -> resume), record the with body end and the resume offset. The region [bodyEnd, resume) (implicit __exit__ call + jump + exception cleanup handler) is then skipped during decompilation. With-statements without this clean shape are left unhandled (no regression). */ static void ScanWithBlocks(PycRef code, PycModule* mod, const std::vector& entries, std::map& bodyEndByBefore, std::map& resumeByBodyEnd) { PycBuffer src(code->code()->value(), code->code()->length()); int opcode, operand, pos = 0; std::vector befores; std::vector> fwdJumps; /* (pos, target) */ std::map opcodeAt; /* pos -> opcode */ while (!src.atEof()) { int p = pos; bc_next(src, mod, opcode, operand, pos); opcodeAt[p] = opcode; if (opcode == Pyc::BEFORE_WITH) befores.push_back(p); else if (opcode == Pyc::JUMP_FORWARD_A) fwdJumps.push_back(std::make_pair(p, pos + operand * 2)); } for (int bp : befores) { int bodyEnd = -1, handler = -1; for (const auto& e : entries) { if (e.start_offset > bp) { bodyEnd = e.end_offset; handler = e.target; break; } } if (bodyEnd < 0 || handler < 0) continue; /* Confirm the handler is a genuine with-cleanup: it must begin with PUSH_EXC_INFO followed by WITH_EXCEPT_START. */ auto h0 = opcodeAt.find(handler); if (h0 == opcodeAt.end() || h0->second != Pyc::PUSH_EXC_INFO) continue; bool hasWithExcept = false; for (const auto& kv : opcodeAt) { if (kv.first > handler && kv.first <= handler + 4) { if (kv.second == Pyc::WITH_EXCEPT_START) { hasWithExcept = true; break; } } } if (!hasWithExcept) continue; int resume = -1; for (const auto& jp : fwdJumps) { /* The normal-exit jump sits between the body end and the handler and jumps over the handler (target at/after it). */ if (jp.first >= bodyEnd && jp.first < handler && jp.second >= handler) { resume = jp.second; break; } } if (resume < 0) continue; /* only the clean jump-over-handler shape */ bodyEndByBefore[bp] = bodyEnd; resumeByBodyEnd[bodyEnd] = resume; } } /* Python 3.11 try/finally pre-pass. A finally compiles to: try body -> finally body (normal copy) -> JUMP over an exception handler that duplicates the finally body and re-raises. Recognize it from the exception table and record, per try-body entry start: the try body end, the finally block end (the JUMP over the duplicate), and the resume offset. The duplicate handler region is skipped during decompilation. Distinguishes finally from except handlers: after PUSH_EXC_INFO a finally has neither a POP_TOP (bare except) nor a CHECK_EXC_MATCH (typed except). */ static void ScanTryFinally(PycRef code, PycModule* mod, const std::vector& entries, std::map& tryEndByStart, std::map& finallyEndByStart, std::map& resumeByFinallyEnd) { PycBuffer src(code->code()->value(), code->code()->length()); int opcode, operand, pos = 0; std::map opcodeAt; std::vector> fwdJumps; while (!src.atEof()) { int p = pos; bc_next(src, mod, opcode, operand, pos); opcodeAt[p] = opcode; if (opcode == Pyc::JUMP_FORWARD_A) fwdJumps.push_back(std::make_pair(p, pos + operand * 2)); } for (const auto& e : entries) { if (e.push_lasti) continue; /* try-body entries are lasti=False */ int T = e.target; auto h0 = opcodeAt.find(T); if (h0 == opcodeAt.end() || h0->second != Pyc::PUSH_EXC_INFO) continue; /* handler region end: the exception entry that starts at the handler */ int handlerEnd = -1; for (const auto& e2 : entries) { if (e2.start_offset == T) { handlerEnd = e2.end_offset; break; } } if (handlerEnd < 0) continue; auto h1 = opcodeAt.find(T + 2); if (h1 != opcodeAt.end() && h1->second == Pyc::POP_TOP) continue; /* bare except, not finally */ bool hasCheck = false; for (const auto& kv : opcodeAt) { if (kv.first >= T && kv.first < handlerEnd && kv.second == Pyc::CHECK_EXC_MATCH) { hasCheck = true; break; } } if (hasCheck) continue; /* typed except, not finally */ /* The normal finally copy ends with a JUMP over the handler. */ int jumpPos = -1, resume = -1; for (const auto& jp : fwdJumps) { if (jp.first >= e.end_offset && jp.first < T && jp.second >= T) { jumpPos = jp.first; resume = jp.second; break; } } if (jumpPos < 0) continue; tryEndByStart[e.start_offset] = e.end_offset; finallyEndByStart[e.start_offset] = jumpPos; resumeByFinallyEnd[jumpPos] = resume; } } /* Python 3.11 while-loop pre-pass. A while loop with the standard bottom-test layout compiles to: ; POP_JUMP_FORWARD_IF_FALSE end (entry guard) loop_start: ; POP_JUMP_BACKWARD_IF_TRUE loop_start (back-edge) end: Without recognition pycdc renders both halves as independent `if` blocks and never forms the loop (the body's trailing back-edge becomes `if not cond: pass`). Detect it from a conditional backward jump whose target is the instruction immediately following a matching forward guard, and whose guard skips to the instruction right after the back-edge. Record the guard position (the BLK_WHILE opens there) and the back-edge position (the loop closes there). */ static void ScanWhileLoops(PycRef code, PycModule* mod, std::map& whileGuardEnd, std::set& whileBackedges) { PycBuffer src(code->code()->value(), code->code()->length()); int opcode, operand, pos = 0; std::vector order; std::map opAt, operAt, nextAt; while (!src.atEof()) { int p = pos; bc_next(src, mod, opcode, operand, pos); order.push_back(p); opAt[p] = opcode; operAt[p] = operand; nextAt[p] = pos; } std::map instrEndingAt; /* nextpos -> pos */ for (int p : order) instrEndingAt[nextAt[p]] = p; auto isCondBack = [](int op){ return op == Pyc::POP_JUMP_BACKWARD_IF_TRUE_A || op == Pyc::POP_JUMP_BACKWARD_IF_FALSE_A || op == Pyc::POP_JUMP_BACKWARD_IF_NONE_A || op == Pyc::POP_JUMP_BACKWARD_IF_NOT_NONE_A; }; auto isFwdGuard = [](int op){ return op == Pyc::POP_JUMP_FORWARD_IF_FALSE_A || op == Pyc::POP_JUMP_FORWARD_IF_TRUE_A || op == Pyc::POP_JUMP_FORWARD_IF_NONE_A || op == Pyc::POP_JUMP_FORWARD_IF_NOT_NONE_A; }; for (int b : order) { if (!isCondBack(opAt[b])) continue; int end = nextAt[b]; int loop_start = end - operAt[b] * 2; /* backward target */ if (loop_start < 0 || loop_start >= b) continue; auto git = instrEndingAt.find(loop_start); if (git == instrEndingAt.end()) continue; int guard = git->second; if (!isFwdGuard(opAt[guard])) continue; int guard_target = nextAt[guard] + operAt[guard] * 2; if (guard_target != end) /* guard must skip to loop end */ continue; whileGuardEnd[guard] = end; whileBackedges.insert(b); } } PycRef BuildFromCode(PycRef code, PycModule* mod) { PycBuffer source(code->code()->value(), code->code()->length()); FastStack stack((mod->majorVer() == 1) ? 20 : code->stackSize()); stackhist_t stack_hist; std::stack > blocks; PycRef defblock = new ASTBlock(ASTBlock::BLK_MAIN); defblock->init(); PycRef curblock = defblock; blocks.push(defblock); int opcode, operand; int curpos = 0; int pos = 0; int unpack = 0; bool else_pop = false; bool need_try = false; bool variable_annotations = false; std::vector exception_entries; size_t next_exception_entry = 0; /* Python 3.11 with-statement reconstruction state. */ std::map withBodyEndByBefore; /* BEFORE_WITH pos -> body end */ std::map withResumeByBodyEnd; /* body end -> resume offset */ int with_skip_until = 0; /* skip cleanup region < this */ /* Python 3.11 try/finally reconstruction state. */ std::map finallyTryEndByStart; /* entry start -> try body end */ std::map finallyEndByStart; /* entry start -> finally end */ std::map finallyResumeByEnd; /* finally end -> resume offset */ /* Python 3.11 while-loop reconstruction state. */ std::map whileGuardEnd; /* guard pos -> loop end */ std::set whileBackedges; /* back-edge positions */ if (mod->verCompare(3, 11) >= 0) { exception_entries = code->exceptionTableEntries(); ScanWithBlocks(code, mod, exception_entries, withBodyEndByBefore, withResumeByBodyEnd); ScanTryFinally(code, mod, exception_entries, finallyTryEndByStart, finallyEndByStart, finallyResumeByEnd); ScanWhileLoops(code, mod, whileGuardEnd, whileBackedges); } while (!source.atEof()) { #if defined(BLOCK_DEBUG) || defined(STACK_DEBUG) fprintf(stderr, "%-7d", pos); #ifdef STACK_DEBUG fprintf(stderr, "%-5d", (unsigned int)stack_hist.size() + 1); #endif #ifdef BLOCK_DEBUG for (unsigned int i = 0; i < blocks.size(); i++) fprintf(stderr, " "); fprintf(stderr, "%s (%d)", curblock->type_str(), curblock->end()); #endif fprintf(stderr, "\n"); #endif /* Python 3.11 with-statement / try-finally: close the body at its end and enter the cleanup skip region BEFORE processing exception-table entries. An enclosing try re-protects the implicit __exit__/finally cleanup region, which would otherwise reopen a spurious try over it. */ if (curblock->blktype() == ASTBlock::BLK_WITH && curblock->end() != 0 && curblock->end() <= pos && blocks.size() > 1) { PycRef with = curblock; blocks.pop(); curblock = blocks.top(); curblock->append(with.cast()); auto rit = withResumeByBodyEnd.find(with->end()); if (rit != withResumeByBodyEnd.end()) with_skip_until = rit->second; } if (curblock->blktype() == ASTBlock::BLK_FINALLY && curblock->end() != 0 && curblock->end() <= pos && blocks.size() > 1) { int finEnd = curblock->end(); PycRef final = curblock; blocks.pop(); curblock = blocks.top(); curblock->append(final.cast()); if (curblock->blktype() == ASTBlock::BLK_CONTAINER && blocks.size() > 1) { PycRef cont = curblock; blocks.pop(); curblock = blocks.top(); curblock->append(cont.cast()); } auto rit = finallyResumeByEnd.find(finEnd); if (rit != finallyResumeByEnd.end()) with_skip_until = rit->second; } if (with_skip_until > 0) { if (pos < with_skip_until) { curpos = pos; bc_next(source, mod, opcode, operand, pos); continue; } with_skip_until = 0; } while (next_exception_entry < exception_entries.size() && exception_entries[next_exception_entry].start_offset < pos) { next_exception_entry++; } if (next_exception_entry < exception_entries.size()) { const auto& entry = exception_entries[next_exception_entry]; if (entry.start_offset == pos && !entry.push_lasti) { auto fit = finallyTryEndByStart.find(entry.start_offset); if (fit != finallyTryEndByStart.end()) { /* Python 3.11 try/finally: container carries the finally end offset; the try body ends before the normal finally copy. */ int finEnd = finallyEndByStart[entry.start_offset]; PycRef cont = new ASTContainerBlock(finEnd, 0); blocks.push(cont.cast()); curblock = blocks.top(); stack_hist.push(stack); PycRef tryblock = new ASTBlock(ASTBlock::BLK_TRY, fit->second, true); blocks.push(tryblock.cast()); curblock = blocks.top(); next_exception_entry++; } else { if (curblock->blktype() == ASTBlock::BLK_CONTAINER) { curblock.cast()->setExcept(entry.target); } else { PycRef next = new ASTContainerBlock(0, entry.target); blocks.push(next.cast()); curblock = blocks.top(); } stack_hist.push(stack); PycRef tryblock = new ASTBlock(ASTBlock::BLK_TRY, entry.target, true); blocks.push(tryblock.cast()); curblock = blocks.top(); next_exception_entry++; } } } if (curblock->blktype() == ASTBlock::BLK_TRY && curblock->end() == pos && blocks.size() > 1) { PycRef prev = curblock; blocks.pop(); curblock = blocks.top(); if (curblock->blktype() == ASTBlock::BLK_CONTAINER && curblock.cast()->hasExcept()) { if (!stack_hist.empty()) { stack = stack_hist.top(); stack_hist.pop(); } curblock->append(prev.cast()); stack_hist.push(stack); PycRef except = new ASTCondBlock(ASTBlock::BLK_EXCEPT, 0, NULL, false); except->init(); blocks.push(except); curblock = blocks.top(); } else if (curblock->blktype() == ASTBlock::BLK_CONTAINER && curblock.cast()->hasFinally()) { /* Python 3.11 try/finally: the try body is followed by the finally body (normal copy). */ if (!stack_hist.empty()) { stack = stack_hist.top(); stack_hist.pop(); } curblock->append(prev.cast()); int finEnd = curblock.cast()->finally(); PycRef final = new ASTBlock(ASTBlock::BLK_FINALLY, finEnd, true); final->init(); blocks.push(final); curblock = blocks.top(); } else { blocks.push(prev); curblock = prev; } } if (curblock->blktype() == ASTBlock::BLK_EXCEPT && curblock->end() == pos && blocks.size() > 1) { PycRef prev = curblock; blocks.pop(); curblock = blocks.top(); if (!stack_hist.empty()) { stack = stack_hist.top(); stack_hist.pop(); } if (prev->size() != 0) { curblock->append(prev.cast()); } if (curblock->blktype() == ASTBlock::BLK_CONTAINER && !curblock.cast()->hasFinally()) { PycRef cont = curblock; blocks.pop(); curblock = blocks.top(); curblock->append(cont.cast()); } } curpos = pos; bc_next(source, mod, opcode, operand, pos); if (need_try && opcode != Pyc::SETUP_EXCEPT_A) { need_try = false; /* Store the current stack for the except/finally statement(s) */ stack_hist.push(stack); PycRef tryblock = new ASTBlock(ASTBlock::BLK_TRY, curblock->end(), true); blocks.push(tryblock); curblock = blocks.top(); } else if (else_pop && opcode != Pyc::JUMP_FORWARD_A && opcode != Pyc::JUMP_IF_FALSE_A && opcode != Pyc::JUMP_IF_FALSE_OR_POP_A && opcode != Pyc::POP_JUMP_IF_FALSE_A && opcode != Pyc::POP_JUMP_FORWARD_IF_FALSE_A && opcode != Pyc::JUMP_IF_TRUE_A && opcode != Pyc::JUMP_IF_TRUE_OR_POP_A && opcode != Pyc::POP_JUMP_IF_TRUE_A && opcode != Pyc::POP_JUMP_FORWARD_IF_TRUE_A && opcode != Pyc::POP_BLOCK) { else_pop = false; PycRef prev = curblock; while (prev->end() < pos && prev->blktype() != ASTBlock::BLK_MAIN) { if (prev->blktype() != ASTBlock::BLK_CONTAINER) { if (prev->end() == 0) { break; } /* We want to keep the stack the same, but we need to pop * a level off the history. */ //stack = stack_hist.top(); if (!stack_hist.empty()) stack_hist.pop(); } blocks.pop(); if (blocks.empty()) break; curblock = blocks.top(); curblock->append(prev.cast()); prev = curblock; CheckIfExpr(stack, curblock); } } switch (opcode) { case Pyc::BINARY_OP_A: { ASTBinary::BinOp op = ASTBinary::from_binary_op(operand); if (op == ASTBinary::BIN_INVALID) fprintf(stderr, "Unsupported `BINARY_OP` operand value: %d\n", operand); PycRef right = stack.top(); stack.pop(); PycRef left = stack.top(); stack.pop(); stack.push(new ASTBinary(left, right, op)); } break; case Pyc::BINARY_ADD: case Pyc::BINARY_AND: case Pyc::BINARY_DIVIDE: case Pyc::BINARY_FLOOR_DIVIDE: case Pyc::BINARY_LSHIFT: case Pyc::BINARY_MODULO: case Pyc::BINARY_MULTIPLY: case Pyc::BINARY_OR: case Pyc::BINARY_POWER: case Pyc::BINARY_RSHIFT: case Pyc::BINARY_SUBTRACT: case Pyc::BINARY_TRUE_DIVIDE: case Pyc::BINARY_XOR: case Pyc::BINARY_MATRIX_MULTIPLY: case Pyc::INPLACE_ADD: case Pyc::INPLACE_AND: case Pyc::INPLACE_DIVIDE: case Pyc::INPLACE_FLOOR_DIVIDE: case Pyc::INPLACE_LSHIFT: case Pyc::INPLACE_MODULO: case Pyc::INPLACE_MULTIPLY: case Pyc::INPLACE_OR: case Pyc::INPLACE_POWER: case Pyc::INPLACE_RSHIFT: case Pyc::INPLACE_SUBTRACT: case Pyc::INPLACE_TRUE_DIVIDE: case Pyc::INPLACE_XOR: case Pyc::INPLACE_MATRIX_MULTIPLY: { ASTBinary::BinOp op = ASTBinary::from_opcode(opcode); if (op == ASTBinary::BIN_INVALID) throw std::runtime_error("Unhandled opcode from ASTBinary::from_opcode"); PycRef right = stack.top(); stack.pop(); PycRef left = stack.top(); stack.pop(); stack.push(new ASTBinary(left, right, op)); } break; case Pyc::BINARY_SUBSCR: { PycRef subscr = stack.top(); stack.pop(); PycRef src = stack.top(); stack.pop(); stack.push(new ASTSubscr(src, subscr)); } break; case Pyc::BREAK_LOOP: curblock->append(new ASTKeyword(ASTKeyword::KW_BREAK)); break; case Pyc::BUILD_CLASS: { PycRef class_code = stack.top(); stack.pop(); PycRef bases = stack.top(); stack.pop(); PycRef name = stack.top(); stack.pop(); stack.push(new ASTClass(class_code, bases, name)); } break; case Pyc::BUILD_FUNCTION: { PycRef fun_code = stack.top(); stack.pop(); stack.push(new ASTFunction(fun_code, {}, {})); } break; case Pyc::BUILD_LIST_A: { ASTList::value_t values; for (int i=0; iverCompare(3, 5) >= 0) { auto map = new ASTMap; for (int i=0; i value = stack.top(); stack.pop(); PycRef key = stack.top(); stack.pop(); map->add(key, value); } stack.push(map); } else { if (stack.top().type() == ASTNode::NODE_CHAINSTORE) { stack.pop(); } stack.push(new ASTMap()); } break; case Pyc::BUILD_CONST_KEY_MAP_A: // Top of stack will be a tuple of keys. // Values will start at TOS - 1. { PycRef keys = stack.top(); stack.pop(); ASTConstMap::values_t values; values.reserve(operand); for (int i = 0; i < operand; ++i) { PycRef value = stack.top(); stack.pop(); values.push_back(value); } stack.push(new ASTConstMap(keys, values)); } break; case Pyc::STORE_MAP: { PycRef key = stack.top(); stack.pop(); PycRef value = stack.top(); stack.pop(); PycRef map = stack.top().cast(); map->add(key, value); } break; case Pyc::BUILD_SLICE_A: { if (operand == 2) { PycRef end = stack.top(); stack.pop(); PycRef start = stack.top(); stack.pop(); if (start.type() == ASTNode::NODE_OBJECT && start.cast()->object() == Pyc_None) { start = NULL; } if (end.type() == ASTNode::NODE_OBJECT && end.cast()->object() == Pyc_None) { end = NULL; } if (start == NULL && end == NULL) { stack.push(new ASTSlice(ASTSlice::SLICE0)); } else if (start == NULL) { stack.push(new ASTSlice(ASTSlice::SLICE2, start, end)); } else if (end == NULL) { stack.push(new ASTSlice(ASTSlice::SLICE1, start, end)); } else { stack.push(new ASTSlice(ASTSlice::SLICE3, start, end)); } } else if (operand == 3) { PycRef step = stack.top(); stack.pop(); PycRef end = stack.top(); stack.pop(); PycRef start = stack.top(); stack.pop(); if (start.type() == ASTNode::NODE_OBJECT && start.cast()->object() == Pyc_None) { start = NULL; } if (end.type() == ASTNode::NODE_OBJECT && end.cast()->object() == Pyc_None) { end = NULL; } if (step.type() == ASTNode::NODE_OBJECT && step.cast()->object() == Pyc_None) { step = NULL; } /* We have to do this as a slice where one side is another slice */ /* [[a:b]:c] */ if (start == NULL && end == NULL) { stack.push(new ASTSlice(ASTSlice::SLICE0)); } else if (start == NULL) { stack.push(new ASTSlice(ASTSlice::SLICE2, start, end)); } else if (end == NULL) { stack.push(new ASTSlice(ASTSlice::SLICE1, start, end)); } else { stack.push(new ASTSlice(ASTSlice::SLICE3, start, end)); } PycRef lhs = stack.top(); stack.pop(); if (step == NULL) { stack.push(new ASTSlice(ASTSlice::SLICE1, lhs, step)); } else { stack.push(new ASTSlice(ASTSlice::SLICE3, lhs, step)); } } } break; case Pyc::BUILD_STRING_A: { // Nearly identical logic to BUILD_LIST ASTList::value_t values; for (int i = 0; i < operand; i++) { values.push_front(stack.top()); stack.pop(); } stack.push(new ASTJoinedStr(values)); } break; case Pyc::BUILD_TUPLE_A: { // if class is a closure code, ignore this tuple PycRef tos = stack.top(); if (tos && tos->type() == ASTNode::NODE_LOADBUILDCLASS) { break; } ASTTuple::value_t values; values.resize(operand); for (int i=0; igetConst(operand).cast()->size(); ASTKwNamesMap kwparamList; std::vector> keys = code->getConst(operand).cast()->values(); for (int i = 0; i < kwparams; i++) { kwparamList.add(new ASTObject(keys[kwparams - i - 1]), stack.top()); stack.pop(); } stack.push(new ASTKwNamesMap(kwparamList)); } break; case Pyc::CALL_A: case Pyc::CALL_FUNCTION_A: case Pyc::INSTRUMENTED_CALL_A: { int kwparams = (operand & 0xFF00) >> 8; int pparams = (operand & 0xFF); ASTCall::kwparam_t kwparamList; ASTCall::pparam_t pparamList; /* Test for the load build class function */ stack_hist.push(stack); /* Class keyword arguments (e.g. metaclass=) arrive via a 3.11 KW_NAMES map at TOS; capture them before scanning the bases. */ ASTCall::kwparam_t classKwargs; if (mod->verCompare(3, 11) >= 0 && stack.top() != NULL && stack.top().type() == ASTNode::NODE_KW_NAMES_MAP) { PycRef km = stack.top().cast(); stack.pop(); for (const auto& kv : km->values()) classKwargs.push_back(kv); } int basecnt = 0; ASTTuple::value_t bases; bases.resize(basecnt); PycRef TOS = stack.top(); int TOS_type = TOS.type(); // bases are NODE_NAME and NODE_BINARY at TOS while (TOS_type == ASTNode::NODE_NAME || TOS_type == ASTNode::NODE_BINARY) { bases.resize(basecnt + 1); bases[basecnt] = TOS; basecnt++; stack.pop(); TOS = stack.top(); TOS_type = TOS.type(); } // qualified name is PycString at TOS PycRef name = stack.top(); stack.pop(); PycRef function = stack.top(); stack.pop(); PycRef loadbuild = stack.top(); stack.pop(); int loadbuild_type = loadbuild.type(); if (loadbuild_type == ASTNode::NODE_LOADBUILDCLASS) { /* Python 3.11 pushes a NULL before LOAD_BUILD_CLASS; drop it so a following decorator application sees the real callable beneath the class instead of the NULL. */ if (mod->verCompare(3, 11) >= 0 && !stack.empty() && stack.top() == nullptr) { stack.pop(); } PycRef call = new ASTCall(function, pparamList, classKwargs); stack.push(new ASTClass(call, new ASTTuple(bases), name)); stack_hist.pop(); break; } else { stack = stack_hist.top(); stack_hist.pop(); } /* KW_NAMES(i) Stores a reference to co_consts[consti] into an internal variable for use by CALL. co_consts[consti] must be a tuple of strings. New in version 3.11. */ if (mod->verCompare(3, 11) >= 0) { PycRef object_or_map = stack.top(); if (object_or_map.type() == ASTNode::NODE_KW_NAMES_MAP) { stack.pop(); PycRef kwparams_map = object_or_map.cast(); for (ASTKwNamesMap::map_t::const_iterator it = kwparams_map->values().begin(); it != kwparams_map->values().end(); it++) { kwparamList.push_front(std::make_pair(it->first, it->second)); pparams -= 1; } } } else { for (int i = 0; i < kwparams; i++) { PycRef val = stack.top(); stack.pop(); PycRef key = stack.top(); stack.pop(); kwparamList.push_front(std::make_pair(key, val)); } } for (int i=0; i param = stack.top(); stack.pop(); if (param.type() == ASTNode::NODE_FUNCTION) { PycRef fun_code = param.cast()->code(); PycRef code_src = fun_code.cast()->object().cast(); PycRef function_name = code_src->name(); if (function_name->isEqual("")) { pparamList.push_front(param); } else { // Decorator used PycRef decor_name = new ASTName(function_name); curblock->append(new ASTStore(param, decor_name)); pparamList.push_front(decor_name); } } else { pparamList.push_front(param); } } PycRef func = stack.top(); stack.pop(); bool isDecoratorApply = false; bool compInlined = false; if (opcode == Pyc::CALL_A || opcode == Pyc::INSTRUMENTED_CALL_A) { if (!stack.empty() && stack.top() == nullptr) { /* Normal call: discard the NULL self slot. */ stack.pop(); } else if (!stack.empty() && stack.top() != nullptr) { /* Python 3.11 leaves a non-NULL value in the self slot (no PUSH_NULL) in two cases we reconstruct here: - decorator application: TOS is the decorated function/class, the real callable is below it; - comprehension/genexpr call: the callable below is a comprehension code object and TOS is the iterable. */ PycRef below = stack.top(); bool topIsDecoTarget = (func.type() == ASTNode::NODE_FUNCTION || func.type() == ASTNode::NODE_CLASS || (func.type() == ASTNode::NODE_CALL && func.cast()->isDecorator())); bool belowIsComp = false; if (below != NULL && below.type() == ASTNode::NODE_FUNCTION) { PycRef bcode = below.cast()->code() .cast()->object().cast(); const char* bnm = bcode->name()->value(); belowIsComp = bnm && (!strcmp(bnm, "") || !strcmp(bnm, "") || !strcmp(bnm, "") || !strcmp(bnm, "")); } if (belowIsComp) { /* Inline the comprehension. The callable is `below` and the iterable is `func` (TOS). Decompile the comprehension body and substitute its implicit ".0" iterator with the real iterable. */ PycRef ccode = below.cast()->code() .cast()->object().cast(); bool savedClean = cleanBuild; PycRef compAst = BuildFromCode(ccode, mod); cleanBuild = savedClean; PycRef comp = FindComprehension(compAst); if (comp == NULL) comp = SynthGenexpr(compAst); /* generator expr */ if (comp != NULL && !comp->generators().empty()) { comp->generators().front()->setIter(func); stack.pop(); /* remove the comprehension callable */ stack.push(comp.cast()); compInlined = true; } /* else: leave as-is and fall through to a plain call. */ } else if (topIsDecoTarget) { /* Decorator application: `func` (TOS) is the decorated target, the real callable is below. */ PycRef target = func; func = stack.top(); stack.pop(); if (target.type() == ASTNode::NODE_FUNCTION) { PycRef tcode = target.cast()->code() .cast()->object().cast(); PycRef tname = tcode->name(); if (!tname->isEqual("")) { /* Emit the def, then reference it by name. */ PycRef decor_name = new ASTName(tname); curblock->append(new ASTStore(target, decor_name)); target = decor_name; } } else if (target.type() == ASTNode::NODE_CLASS) { /* Decorated class: emit the class body, then reference it by name as the decorator argument. */ PycRef cname = target.cast()->name(); PycRef decor_name; if (cname != NULL && cname.type() == ASTNode::NODE_NAME) { decor_name = cname; } else if (cname != NULL && cname.type() == ASTNode::NODE_OBJECT) { PycRef s = cname.cast()->object().try_cast(); if (s != NULL) decor_name = new ASTName(s); } if (decor_name != NULL) { curblock->append(new ASTStore(target, decor_name)); target = decor_name; } } pparamList.push_front(target); isDecoratorApply = true; } /* else: a non-null self slot we don't special-case (e.g. method calls collapsed by LOAD_METHOD); behave as before with `func` as the callable. */ } } if (!compInlined) { PycRef callNode = new ASTCall(func, pparamList, kwparamList); if (isDecoratorApply) callNode.cast()->setDecorator(true); stack.push(callNode); } } break; case Pyc::CALL_FUNCTION_VAR_A: { PycRef var = stack.top(); stack.pop(); int kwparams = (operand & 0xFF00) >> 8; int pparams = (operand & 0xFF); ASTCall::kwparam_t kwparamList; ASTCall::pparam_t pparamList; for (int i=0; i val = stack.top(); stack.pop(); PycRef key = stack.top(); stack.pop(); kwparamList.push_front(std::make_pair(key, val)); } for (int i=0; i func = stack.top(); stack.pop(); PycRef call = new ASTCall(func, pparamList, kwparamList); call.cast()->setVar(var); stack.push(call); } break; case Pyc::CALL_FUNCTION_KW_A: { PycRef kw = stack.top(); stack.pop(); int kwparams = (operand & 0xFF00) >> 8; int pparams = (operand & 0xFF); ASTCall::kwparam_t kwparamList; ASTCall::pparam_t pparamList; for (int i=0; i val = stack.top(); stack.pop(); PycRef key = stack.top(); stack.pop(); kwparamList.push_front(std::make_pair(key, val)); } for (int i=0; i func = stack.top(); stack.pop(); PycRef call = new ASTCall(func, pparamList, kwparamList); call.cast()->setKW(kw); stack.push(call); } break; case Pyc::CALL_FUNCTION_VAR_KW_A: { PycRef kw = stack.top(); stack.pop(); PycRef var = stack.top(); stack.pop(); int kwparams = (operand & 0xFF00) >> 8; int pparams = (operand & 0xFF); ASTCall::kwparam_t kwparamList; ASTCall::pparam_t pparamList; for (int i=0; i val = stack.top(); stack.pop(); PycRef key = stack.top(); stack.pop(); kwparamList.push_front(std::make_pair(key, val)); } for (int i=0; i func = stack.top(); stack.pop(); PycRef call = new ASTCall(func, pparamList, kwparamList); call.cast()->setKW(kw); call.cast()->setVar(var); stack.push(call); } break; case Pyc::CALL_FUNCTION_EX_A: case Pyc::INSTRUMENTED_CALL_FUNCTION_EX_A: { /* Call with iterable args and (optionally) a mapping of kwargs. Low bit of the flag operand means a kwargs mapping is present. */ PycRef kw; if (operand & 0x01) { kw = stack.top(); stack.pop(); } PycRef var = stack.top(); stack.pop(); PycRef func = stack.top(); stack.pop(); /* Python 3.11+ pushes a NULL sentinel below the callable. */ if (mod->verCompare(3, 11) >= 0 && !stack.empty() && stack.top() == nullptr) { stack.pop(); } PycRef call = new ASTCall(func, ASTCall::pparam_t(), ASTCall::kwparam_t()); call.cast()->setVar(var); if (kw != NULL) call.cast()->setKW(kw); stack.push(call); } break; case Pyc::CALL_METHOD_A: { ASTCall::pparam_t pparamList; for (int i = 0; i < operand; i++) { PycRef param = stack.top(); stack.pop(); if (param.type() == ASTNode::NODE_FUNCTION) { PycRef fun_code = param.cast()->code(); PycRef code_src = fun_code.cast()->object().cast(); PycRef function_name = code_src->name(); if (function_name->isEqual("")) { pparamList.push_front(param); } else { // Decorator used PycRef decor_name = new ASTName(function_name); curblock->append(new ASTStore(param, decor_name)); pparamList.push_front(decor_name); } } else { pparamList.push_front(param); } } PycRef func = stack.top(); stack.pop(); stack.push(new ASTCall(func, pparamList, ASTCall::kwparam_t())); } break; case Pyc::CONTINUE_LOOP_A: curblock->append(new ASTKeyword(ASTKeyword::KW_CONTINUE)); break; case Pyc::COMPARE_OP_A: { PycRef right = stack.top(); stack.pop(); PycRef left = stack.top(); stack.pop(); auto arg = operand; if (mod->verCompare(3, 12) == 0) arg >>= 4; // changed under GH-100923 else if (mod->verCompare(3, 13) >= 0) arg >>= 5; stack.push(new ASTCompare(left, right, arg)); } break; case Pyc::CONTAINS_OP_A: { PycRef right = stack.top(); stack.pop(); PycRef left = stack.top(); stack.pop(); // The operand will be 0 for 'in' and 1 for 'not in'. stack.push(new ASTCompare(left, right, operand ? ASTCompare::CMP_NOT_IN : ASTCompare::CMP_IN)); } break; case Pyc::DELETE_ATTR_A: { PycRef name = stack.top(); stack.pop(); curblock->append(new ASTDelete(new ASTBinary(name, new ASTName(code->getName(operand)), ASTBinary::BIN_ATTR))); } break; case Pyc::DELETE_GLOBAL_A: code->markGlobal(code->getName(operand)); /* Fall through */ case Pyc::DELETE_NAME_A: { PycRef varname = code->getName(operand); if (varname->length() >= 2 && varname->value()[0] == '_' && varname->value()[1] == '[') { /* Don't show deletes that are a result of list comps. */ break; } PycRef name = new ASTName(varname); if (isExceptVarDelete(curblock, name)) break; curblock->append(new ASTDelete(name)); } break; case Pyc::DELETE_FAST_A: { PycRef name; if (mod->verCompare(1, 3) < 0) name = new ASTName(code->getName(operand)); else name = new ASTName(code->getLocal(operand)); if (name.cast()->name()->value()[0] == '_' && name.cast()->name()->value()[1] == '[') { /* Don't show deletes that are a result of list comps. */ break; } if (isExceptVarDelete(curblock, name)) break; curblock->append(new ASTDelete(name)); } break; case Pyc::DELETE_SLICE_0: { PycRef name = stack.top(); stack.pop(); curblock->append(new ASTDelete(new ASTSubscr(name, new ASTSlice(ASTSlice::SLICE0)))); } break; case Pyc::DELETE_SLICE_1: { PycRef upper = stack.top(); stack.pop(); PycRef name = stack.top(); stack.pop(); curblock->append(new ASTDelete(new ASTSubscr(name, new ASTSlice(ASTSlice::SLICE1, upper)))); } break; case Pyc::DELETE_SLICE_2: { PycRef lower = stack.top(); stack.pop(); PycRef name = stack.top(); stack.pop(); curblock->append(new ASTDelete(new ASTSubscr(name, new ASTSlice(ASTSlice::SLICE2, NULL, lower)))); } break; case Pyc::DELETE_SLICE_3: { PycRef lower = stack.top(); stack.pop(); PycRef upper = stack.top(); stack.pop(); PycRef name = stack.top(); stack.pop(); curblock->append(new ASTDelete(new ASTSubscr(name, new ASTSlice(ASTSlice::SLICE3, upper, lower)))); } break; case Pyc::DELETE_SUBSCR: { PycRef key = stack.top(); stack.pop(); PycRef name = stack.top(); stack.pop(); curblock->append(new ASTDelete(new ASTSubscr(name, key))); } break; case Pyc::DUP_TOP: { if (stack.top().type() == PycObject::TYPE_NULL) { stack.push(stack.top()); } else if (stack.top().type() == ASTNode::NODE_CHAINSTORE) { auto chainstore = stack.top(); stack.pop(); stack.push(stack.top()); stack.push(chainstore); } else { stack.push(stack.top()); ASTNodeList::list_t targets; stack.push(new ASTChainStore(targets, stack.top())); } } break; case Pyc::DUP_TOP_TWO: { PycRef first = stack.top(); stack.pop(); PycRef second = stack.top(); stack.push(first); stack.push(second); stack.push(first); } break; case Pyc::DUP_TOPX_A: { std::stack > first; std::stack > second; for (int i = 0; i < operand; i++) { PycRef node = stack.top(); stack.pop(); first.push(node); second.push(node); } while (first.size()) { stack.push(first.top()); first.pop(); } while (second.size()) { stack.push(second.top()); second.pop(); } } break; case Pyc::END_FINALLY: { bool isFinally = false; if (curblock->blktype() == ASTBlock::BLK_FINALLY) { PycRef final = curblock; blocks.pop(); stack = stack_hist.top(); stack_hist.pop(); curblock = blocks.top(); curblock->append(final.cast()); isFinally = true; } else if (curblock->blktype() == ASTBlock::BLK_EXCEPT) { blocks.pop(); PycRef prev = curblock; bool isUninitAsyncFor = false; if (blocks.top()->blktype() == ASTBlock::BLK_CONTAINER) { auto container = blocks.top(); blocks.pop(); auto asyncForBlock = blocks.top(); isUninitAsyncFor = asyncForBlock->blktype() == ASTBlock::BLK_ASYNCFOR && !asyncForBlock->inited(); if (isUninitAsyncFor) { auto tryBlock = container->nodes().front().cast(); if (!tryBlock->nodes().empty() && tryBlock->blktype() == ASTBlock::BLK_TRY) { auto store = tryBlock->nodes().front().try_cast(); if (store) { asyncForBlock.cast()->setIndex(store->dest()); } } curblock = blocks.top(); stack = stack_hist.top(); stack_hist.pop(); if (!curblock->inited()) fprintf(stderr, "Error when decompiling 'async for'.\n"); } else { blocks.push(container); } } if (!isUninitAsyncFor) { if (curblock->size() != 0) { blocks.top()->append(curblock.cast()); } curblock = blocks.top(); /* Turn it into an else statement. */ if (curblock->end() != pos || curblock.cast()->hasFinally()) { PycRef elseblk = new ASTBlock(ASTBlock::BLK_ELSE, prev->end()); elseblk->init(); blocks.push(elseblk); curblock = blocks.top(); } else { stack = stack_hist.top(); stack_hist.pop(); } } } if (curblock->blktype() == ASTBlock::BLK_CONTAINER) { /* This marks the end of the except block(s). */ PycRef cont = curblock.cast(); if (!cont->hasFinally() || isFinally) { /* If there's no finally block, pop the container. */ blocks.pop(); curblock = blocks.top(); curblock->append(cont.cast()); } } } break; case Pyc::EXEC_STMT: { if (stack.top().type() == ASTNode::NODE_CHAINSTORE) { stack.pop(); } PycRef loc = stack.top(); stack.pop(); PycRef glob = stack.top(); stack.pop(); PycRef stmt = stack.top(); stack.pop(); curblock->append(new ASTExec(stmt, glob, loc)); } break; case Pyc::FOR_ITER_A: case Pyc::INSTRUMENTED_FOR_ITER_A: { PycRef iter = stack.top(); // Iterable if (mod->verCompare(3, 12) < 0) { // Do not pop the iterator for py 3.12+ stack.pop(); } /* Pop it? Don't pop it? */ int end; bool comprehension = false; // before 3.8, there is a SETUP_LOOP instruction with block start and end position, // the operand is usually a jump to a POP_BLOCK instruction // after 3.8, block extent has to be inferred implicitly; the operand is a jump to a position after the for block if (mod->majorVer() == 3 && mod->minorVer() >= 8) { end = operand; if (mod->verCompare(3, 10) >= 0) end *= sizeof(uint16_t); // // BPO-27129 end += pos; { const char* cn = code->name()->value(); comprehension = cn && (!strcmp(cn, "") || !strcmp(cn, "") || !strcmp(cn, "")); } } else { PycRef top = blocks.top(); end = top->end(); // block end position from SETUP_LOOP if (top->blktype() == ASTBlock::BLK_WHILE) { blocks.pop(); } else { comprehension = true; } } PycRef forblk = new ASTIterBlock(ASTBlock::BLK_FOR, curpos, end, iter); forblk->setComprehension(comprehension); blocks.push(forblk.cast()); curblock = blocks.top(); stack.push(NULL); } break; case Pyc::FOR_LOOP_A: { PycRef curidx = stack.top(); // Current index stack.pop(); PycRef iter = stack.top(); // Iterable stack.pop(); bool comprehension = false; PycRef top = blocks.top(); if (top->blktype() == ASTBlock::BLK_WHILE) { blocks.pop(); } else { comprehension = true; } PycRef forblk = new ASTIterBlock(ASTBlock::BLK_FOR, curpos, top->end(), iter); forblk->setComprehension(comprehension); blocks.push(forblk.cast()); curblock = blocks.top(); /* Python Docs say: "push the sequence, the incremented counter, and the current item onto the stack." */ stack.push(iter); stack.push(curidx); stack.push(NULL); // We can totally hack this >_> } break; case Pyc::GET_AITER: { // Logic similar to FOR_ITER_A PycRef iter = stack.top(); // Iterable stack.pop(); PycRef top = blocks.top(); if (top->blktype() == ASTBlock::BLK_WHILE) { blocks.pop(); PycRef forblk = new ASTIterBlock(ASTBlock::BLK_ASYNCFOR, curpos, top->end(), iter); blocks.push(forblk.cast()); curblock = blocks.top(); stack.push(nullptr); } else { fprintf(stderr, "Unsupported use of GET_AITER outside of SETUP_LOOP\n"); } } break; case Pyc::GET_ANEXT: break; case Pyc::FORMAT_VALUE_A: { auto conversion_flag = static_cast(operand); PycRef format_spec = nullptr; if (conversion_flag & ASTFormattedValue::HAVE_FMT_SPEC) { format_spec = stack.top(); stack.pop(); } auto val = stack.top(); stack.pop(); stack.push(new ASTFormattedValue(val, conversion_flag, format_spec)); } break; case Pyc::GET_AWAITABLE: { PycRef object = stack.top(); stack.pop(); stack.push(new ASTAwaitable(object)); } break; case Pyc::GET_ITER: case Pyc::GET_YIELD_FROM_ITER: /* We just entirely ignore this */ break; case Pyc::IMPORT_NAME_A: if (mod->majorVer() == 1) { stack.push(new ASTImport(new ASTName(code->getName(operand)), NULL)); } else { PycRef fromlist = stack.top(); stack.pop(); if (mod->verCompare(2, 5) >= 0) stack.pop(); // Level -- we don't care stack.push(new ASTImport(new ASTName(code->getName(operand)), fromlist)); } break; case Pyc::IMPORT_FROM_A: stack.push(new ASTName(code->getName(operand))); break; case Pyc::IMPORT_STAR: { PycRef import = stack.top(); stack.pop(); curblock->append(new ASTStore(import, NULL)); } break; case Pyc::IS_OP_A: { PycRef right = stack.top(); stack.pop(); PycRef left = stack.top(); stack.pop(); // The operand will be 0 for 'is' and 1 for 'is not'. stack.push(new ASTCompare(left, right, operand ? ASTCompare::CMP_IS_NOT : ASTCompare::CMP_IS)); } break; case Pyc::JUMP_IF_FALSE_A: case Pyc::JUMP_IF_TRUE_A: case Pyc::JUMP_IF_FALSE_OR_POP_A: case Pyc::JUMP_IF_TRUE_OR_POP_A: case Pyc::POP_JUMP_IF_FALSE_A: case Pyc::POP_JUMP_IF_TRUE_A: case Pyc::POP_JUMP_FORWARD_IF_FALSE_A: case Pyc::POP_JUMP_FORWARD_IF_TRUE_A: case Pyc::POP_JUMP_FORWARD_IF_NONE_A: case Pyc::POP_JUMP_FORWARD_IF_NOT_NONE_A: case Pyc::INSTRUMENTED_POP_JUMP_IF_FALSE_A: case Pyc::INSTRUMENTED_POP_JUMP_IF_TRUE_A: { /* Python 3.11 while-loop entry guard: open a BLK_WHILE here instead of an if. The matching back-edge (POP_JUMP_BACKWARD) closes it. */ auto wit = whileGuardEnd.find(curpos); if (wit != whileGuardEnd.end()) { PycRef wcond = stack.top(); stack.pop(); bool wneg = false; if (opcode == Pyc::POP_JUMP_FORWARD_IF_NONE_A) { wcond = new ASTCompare(wcond, new ASTObject(Pyc_None), ASTCompare::CMP_IS_NOT); } else if (opcode == Pyc::POP_JUMP_FORWARD_IF_NOT_NONE_A) { wcond = new ASTCompare(wcond, new ASTObject(Pyc_None), ASTCompare::CMP_IS); } else if (opcode == Pyc::POP_JUMP_FORWARD_IF_TRUE_A) { wneg = true; } PycRef wh = new ASTCondBlock( ASTBlock::BLK_WHILE, wit->second, wcond, wneg); wh->init(ASTCondBlock::PRE_POPPED); blocks.push(wh.cast()); curblock = blocks.top(); break; } PycRef cond = stack.top(); PycRef ifblk; int popped = ASTCondBlock::UNINITED; if (opcode == Pyc::POP_JUMP_FORWARD_IF_NONE_A || opcode == Pyc::POP_JUMP_FORWARD_IF_NOT_NONE_A) { /* Python 3.11: compare TOS against None, then pop and jump. IF_NONE jumps past the body when TOS is None, so the body runs for "x is not None" (and vice-versa). */ stack.pop(); ASTCompare::CompareOp cmpop = (opcode == Pyc::POP_JUMP_FORWARD_IF_NONE_A) ? ASTCompare::CMP_IS_NOT : ASTCompare::CMP_IS; cond = new ASTCompare(cond, new ASTObject(Pyc_None), cmpop); popped = ASTCondBlock::PRE_POPPED; } if (opcode == Pyc::POP_JUMP_IF_FALSE_A || opcode == Pyc::POP_JUMP_IF_TRUE_A || opcode == Pyc::POP_JUMP_FORWARD_IF_FALSE_A || opcode == Pyc::POP_JUMP_FORWARD_IF_TRUE_A || opcode == Pyc::INSTRUMENTED_POP_JUMP_IF_FALSE_A || opcode == Pyc::INSTRUMENTED_POP_JUMP_IF_TRUE_A) { /* Pop condition before the jump */ stack.pop(); popped = ASTCondBlock::PRE_POPPED; } /* Store the current stack for the else statement(s) */ stack_hist.push(stack); if (opcode == Pyc::JUMP_IF_FALSE_OR_POP_A || opcode == Pyc::JUMP_IF_TRUE_OR_POP_A) { /* Pop condition only if condition is met */ stack.pop(); popped = ASTCondBlock::POPPED; } /* "Jump if true" means "Jump if not false" */ bool neg = opcode == Pyc::JUMP_IF_TRUE_A || opcode == Pyc::JUMP_IF_TRUE_OR_POP_A || opcode == Pyc::POP_JUMP_IF_TRUE_A || opcode == Pyc::POP_JUMP_FORWARD_IF_TRUE_A || opcode == Pyc::INSTRUMENTED_POP_JUMP_IF_TRUE_A; int offs = operand; if (mod->verCompare(3, 10) >= 0) offs *= sizeof(uint16_t); // // BPO-27129 if (mod->verCompare(3, 12) >= 0 || opcode == Pyc::JUMP_IF_FALSE_A || opcode == Pyc::JUMP_IF_TRUE_A || opcode == Pyc::POP_JUMP_FORWARD_IF_TRUE_A || opcode == Pyc::POP_JUMP_FORWARD_IF_FALSE_A || opcode == Pyc::POP_JUMP_FORWARD_IF_NONE_A || opcode == Pyc::POP_JUMP_FORWARD_IF_NOT_NONE_A) { /* Offset is relative in these cases */ offs += pos; } if (cond.type() == ASTNode::NODE_COMPARE && cond.cast()->op() == ASTCompare::CMP_EXCEPTION) { int except_end = offs; if (curblock->blktype() == ASTBlock::BLK_EXCEPT && curblock.cast()->cond() == NULL) { /* Refining the initial (type-less) except handler. The clause ends at the dispatch jump target (the next clause / reraise), not at the whole handler region; using the region end would swallow following clauses when this clause does not fall through (e.g. raises). */ if (offs <= pos) except_end = curblock->end(); blocks.pop(); curblock = blocks.top(); stack_hist.pop(); } ifblk = new ASTCondBlock(ASTBlock::BLK_EXCEPT, except_end, cond.cast()->right(), false); } else if (curblock->blktype() == ASTBlock::BLK_ELSE && curblock->size() == 0) { /* Collapse into elif statement */ blocks.pop(); stack = stack_hist.top(); stack_hist.pop(); ifblk = new ASTCondBlock(ASTBlock::BLK_ELIF, offs, cond, neg); } else if (curblock->size() == 0 && !curblock->inited() && curblock->blktype() == ASTBlock::BLK_WHILE) { /* The condition for a while loop */ PycRef top = blocks.top(); blocks.pop(); ifblk = new ASTCondBlock(top->blktype(), offs, cond, neg); /* We don't store the stack for loops! Pop it! */ stack_hist.pop(); } else if (curblock->size() == 0 && curblock->end() <= offs && (curblock->blktype() == ASTBlock::BLK_IF || curblock->blktype() == ASTBlock::BLK_ELIF || curblock->blktype() == ASTBlock::BLK_WHILE)) { PycRef newcond; PycRef top = curblock.cast(); PycRef cond1 = top->cond(); blocks.pop(); if (curblock->blktype() == ASTBlock::BLK_WHILE) { stack_hist.pop(); } else { FastStack s_top = stack_hist.top(); stack_hist.pop(); stack_hist.pop(); stack_hist.push(s_top); } if (curblock->end() == offs || (curblock->end() == curpos && !top->negative())) { /* if blah and blah */ newcond = new ASTBinary(cond1, cond, ASTBinary::BIN_LOG_AND); } else { /* if blah or blah */ newcond = new ASTBinary(cond1, cond, ASTBinary::BIN_LOG_OR); } ifblk = new ASTCondBlock(top->blktype(), offs, newcond, neg); } else if (curblock->blktype() == ASTBlock::BLK_FOR && curblock.cast()->isComprehension() && mod->verCompare(2, 7) >= 0) { /* Comprehension condition */ curblock.cast()->setCondition(cond); stack_hist.pop(); // TODO: Handle older python versions, where condition // is laid out a little differently. break; } else { /* Plain old if statement */ ifblk = new ASTCondBlock(ASTBlock::BLK_IF, offs, cond, neg); } if (popped) ifblk->init(popped); blocks.push(ifblk.cast()); curblock = blocks.top(); } break; case Pyc::POP_JUMP_BACKWARD_IF_TRUE_A: case Pyc::POP_JUMP_BACKWARD_IF_FALSE_A: case Pyc::POP_JUMP_BACKWARD_IF_NONE_A: case Pyc::POP_JUMP_BACKWARD_IF_NOT_NONE_A: { /* Python 3.11 while-loop back-edge: the duplicate condition has just been evaluated onto the stack; discard it and close the BLK_WHILE opened at the matching entry guard. Only act when a BLK_WHILE is actually open (guard against a misidentified back-edge collapsing the whole block stack). */ if (whileBackedges.count(curpos)) { bool hasWhile = false; { std::stack > tmp = blocks; while (!tmp.empty()) { if (tmp.top()->blktype() == ASTBlock::BLK_WHILE) { hasWhile = true; break; } tmp.pop(); } } if (hasWhile) { if (!stack.empty()) stack.pop(); /* Close any blocks still open inside the loop body, then the BLK_WHILE itself. */ while (blocks.size() > 1 && curblock->blktype() != ASTBlock::BLK_WHILE) { PycRef inner = curblock; blocks.pop(); curblock = blocks.top(); curblock->append(inner.cast()); } if (curblock->blktype() == ASTBlock::BLK_WHILE && blocks.size() > 1) { PycRef wh = curblock; blocks.pop(); curblock = blocks.top(); curblock->append(wh.cast()); } break; } } /* Python 3.11: conditional jump backwards. In compiled loops this implements an in-body guard ("if cond: ") where a failed test jumps back to the loop header to start the next iteration. Model it as an if-block that spans to the end of the enclosing loop body. */ PycRef cond = stack.top(); stack.pop(); bool neg = false; if (opcode == Pyc::POP_JUMP_BACKWARD_IF_NONE_A) { /* jumps back when "x is None" -> body runs for "x is not None" */ cond = new ASTCompare(cond, new ASTObject(Pyc_None), ASTCompare::CMP_IS_NOT); } else if (opcode == Pyc::POP_JUMP_BACKWARD_IF_NOT_NONE_A) { cond = new ASTCompare(cond, new ASTObject(Pyc_None), ASTCompare::CMP_IS); } else if (opcode == Pyc::POP_JUMP_BACKWARD_IF_TRUE_A) { /* jumps back when cond is true -> body runs for "not cond" */ neg = true; } int end = curblock->end(); stack_hist.push(stack); PycRef ifblk = new ASTCondBlock(ASTBlock::BLK_IF, end, cond, neg); ifblk->init(ASTCondBlock::PRE_POPPED); blocks.push(ifblk.cast()); curblock = blocks.top(); } break; case Pyc::JUMP_ABSOLUTE_A: // bpo-47120: Replaced JUMP_ABSOLUTE by the relative jump JUMP_BACKWARD. case Pyc::JUMP_BACKWARD_A: case Pyc::JUMP_BACKWARD_NO_INTERRUPT_A: { int offs = operand; if (mod->verCompare(3, 10) >= 0) offs *= sizeof(uint16_t); // // BPO-27129 if (offs < pos) { if (curblock->blktype() == ASTBlock::BLK_FOR) { /* The jump operand is relative in 3.10+; the real target is (pos - offs) where pos already points past this instruction. Compare that against the loop start. A for loop has TWO kinds of backward jumps to its start: the implicit loop iteration at the very end of the body (pos == block end), and an explicit `continue` somewhere earlier (pos < block end). Only the former closes the loop; the latter emits a continue. */ int target = (mod->verCompare(3, 10) >= 0) ? (pos - offs) : offs; bool is_jump_to_start = target == curblock.cast()->start(); bool at_loop_end = (curblock->end() != 0) && (pos == curblock->end()); bool should_pop_for_block = curblock.cast()->isComprehension(); // in v3.8, SETUP_LOOP is deprecated and for blocks aren't terminated by POP_BLOCK, so we add them here bool should_add_for_block = mod->majorVer() == 3 && mod->minorVer() >= 8 && is_jump_to_start && at_loop_end && !curblock.cast()->isComprehension(); if (!should_pop_for_block && !should_add_for_block && is_jump_to_start && !at_loop_end && !curblock.cast()->isComprehension()) { /* explicit continue directly in the for body */ curblock->append(new ASTKeyword(ASTKeyword::KW_CONTINUE)); break; } if (should_pop_for_block || should_add_for_block) { PycRef top = stack.top(); if (top.type() == ASTNode::NODE_COMPREHENSION) { PycRef comp = top.cast(); comp->addGenerator(curblock.cast()); } PycRef tmp = curblock; blocks.pop(); curblock = blocks.top(); if (should_add_for_block) { curblock->append(tmp.cast()); } } } else if (curblock->blktype() == ASTBlock::BLK_ELSE) { if (!stack_hist.empty()) { stack = stack_hist.top(); stack_hist.pop(); } blocks.pop(); blocks.top()->append(curblock.cast()); curblock = blocks.top(); if (curblock->blktype() == ASTBlock::BLK_CONTAINER && !curblock.cast()->hasFinally()) { blocks.pop(); blocks.top()->append(curblock.cast()); curblock = blocks.top(); } } else { curblock->append(new ASTKeyword(ASTKeyword::KW_CONTINUE)); } /* We're in a loop, this jumps back to the start */ /* I think we'll just ignore this case... */ break; // Bad idea? Probably! } if (curblock->blktype() == ASTBlock::BLK_CONTAINER) { PycRef cont = curblock.cast(); if (cont->hasExcept() && pos < cont->except()) { PycRef except = new ASTCondBlock(ASTBlock::BLK_EXCEPT, 0, NULL, false); except->init(); blocks.push(except); curblock = blocks.top(); } break; } if (!stack_hist.empty()) { stack = stack_hist.top(); stack_hist.pop(); } else { fprintf(stderr, "Warning: Stack history is empty, something wrong might have happened\n"); } PycRef prev = curblock; PycRef nil; bool push = true; do { blocks.pop(); blocks.top()->append(prev.cast()); if (prev->blktype() == ASTBlock::BLK_IF || prev->blktype() == ASTBlock::BLK_ELIF) { if (push) { stack_hist.push(stack); } PycRef next = new ASTBlock(ASTBlock::BLK_ELSE, blocks.top()->end()); if (prev->inited() == ASTCondBlock::PRE_POPPED) { next->init(ASTCondBlock::PRE_POPPED); } blocks.push(next.cast()); prev = nil; } else if (prev->blktype() == ASTBlock::BLK_EXCEPT) { if (push) { stack_hist.push(stack); } PycRef next = new ASTCondBlock(ASTBlock::BLK_EXCEPT, blocks.top()->end(), NULL, false); next->init(); blocks.push(next.cast()); prev = nil; } else if (prev->blktype() == ASTBlock::BLK_ELSE) { /* Special case */ prev = blocks.top(); if (!push) { stack = stack_hist.top(); stack_hist.pop(); } push = false; } else { prev = nil; } } while (prev != nil); curblock = blocks.top(); } break; case Pyc::JUMP_FORWARD_A: case Pyc::INSTRUMENTED_JUMP_FORWARD_A: { int offs = operand; if (mod->verCompare(3, 10) >= 0) offs *= sizeof(uint16_t); // // BPO-27129 if (curblock->blktype() == ASTBlock::BLK_CONTAINER) { PycRef cont = curblock.cast(); if (cont->hasExcept()) { stack_hist.push(stack); curblock->setEnd(pos+offs); PycRef except = new ASTCondBlock(ASTBlock::BLK_EXCEPT, pos+offs, NULL, false); except->init(); blocks.push(except); curblock = blocks.top(); } break; } if (!stack_hist.empty()) { if (stack.empty()) // if it's part of if-expression, TOS at the moment is the result of "if" part stack = stack_hist.top(); stack_hist.pop(); } PycRef prev = curblock; PycRef nil; bool push = true; do { blocks.pop(); if (!blocks.empty()) blocks.top()->append(prev.cast()); if (prev->blktype() == ASTBlock::BLK_IF || prev->blktype() == ASTBlock::BLK_ELIF) { if (offs == 0) { prev = nil; continue; } if (push) { stack_hist.push(stack); } PycRef next = new ASTBlock(ASTBlock::BLK_ELSE, pos+offs); if (prev->inited() == ASTCondBlock::PRE_POPPED) { next->init(ASTCondBlock::PRE_POPPED); } blocks.push(next.cast()); prev = nil; } else if (prev->blktype() == ASTBlock::BLK_EXCEPT) { if (offs == 0) { prev = nil; continue; } if (push) { stack_hist.push(stack); } PycRef next = new ASTCondBlock(ASTBlock::BLK_EXCEPT, pos+offs, NULL, false); next->init(); blocks.push(next.cast()); prev = nil; } else if (prev->blktype() == ASTBlock::BLK_ELSE) { /* Special case */ prev = blocks.top(); if (!push) { stack = stack_hist.top(); stack_hist.pop(); } push = false; if (prev->blktype() == ASTBlock::BLK_MAIN) { /* Something went out of control! */ prev = nil; } } else if (prev->blktype() == ASTBlock::BLK_TRY && prev->end() < pos+offs) { /* Need to add an except/finally block */ if (!stack_hist.empty()) { stack = stack_hist.top(); stack_hist.pop(); } if (blocks.top()->blktype() == ASTBlock::BLK_CONTAINER) { PycRef cont = blocks.top().cast(); if (cont->hasExcept()) { if (push) { stack_hist.push(stack); } PycRef except = new ASTCondBlock(ASTBlock::BLK_EXCEPT, pos+offs, NULL, false); except->init(); blocks.push(except); } } else { fprintf(stderr, "Something TERRIBLE happened!!\n"); } prev = nil; } else { prev = nil; } } while (prev != nil); if (!blocks.empty()) { curblock = blocks.top(); if (curblock->blktype() == ASTBlock::BLK_EXCEPT) curblock->setEnd(pos+offs); } } break; case Pyc::LIST_TO_TUPLE: { /* Python 3.9+: convert the list at TOS into a tuple (used when building *args for calls). */ PycRef list = stack.top(); stack.pop(); if (list.type() == ASTNode::NODE_LIST) { const auto& lv = list.cast()->values(); ASTTuple::value_t tv(lv.begin(), lv.end()); stack.push(new ASTTuple(tv)); } else { stack.push(list); } } break; case Pyc::RETURN_GENERATOR: /* Python 3.11 generator/coroutine prologue; the pushed generator object is immediately discarded by a following POP_TOP. */ stack.push(nullptr); break; case Pyc::MAP_ADD_A: { /* Python 3.8+: value at TOS, key at TOS1 (dict comprehensions). */ PycRef value = stack.top(); stack.pop(); PycRef key = stack.top(); stack.pop(); if (curblock->blktype() == ASTBlock::BLK_FOR && curblock.cast()->isComprehension()) { stack.pop(); auto m = new ASTMap(); m->add(key, value); stack.push(new ASTComprehension(m)); } else { PycRef map = stack.top(); if (map.type() == ASTNode::NODE_MAP) map.cast()->add(key, value); } } break; case Pyc::LIST_APPEND: case Pyc::LIST_APPEND_A: { PycRef value = stack.top(); stack.pop(); PycRef list = stack.top(); if (curblock->blktype() == ASTBlock::BLK_FOR && curblock.cast()->isComprehension()) { stack.pop(); stack.push(new ASTComprehension(value)); } else { const char* cn = code->name() != NULL ? code->name()->value() : NULL; if (cn && !strcmp(cn, "")) { /* Filtered list comprehension: the append is inside the filter block, so record the value as a yield-style marker and let SynthGenexpr rebuild the comprehension (with its filter condition). */ curblock->append(new ASTReturn(value, ASTReturn::YIELD)); } else { stack.push(new ASTSubscr(list, value)); /* Total hack */ } } } break; case Pyc::SET_UPDATE_A: { PycRef rhs = stack.top(); stack.pop(); PycRef lhs = stack.top().cast(); stack.pop(); if (rhs.type() != ASTNode::NODE_OBJECT) { fprintf(stderr, "Unsupported argument found for SET_UPDATE\n"); break; } // I've only ever seen this be a TYPE_FROZENSET, but let's be careful... PycRef obj = rhs.cast()->object(); if (obj->type() != PycObject::TYPE_FROZENSET) { fprintf(stderr, "Unsupported argument type found for SET_UPDATE\n"); break; } ASTSet::value_t result = lhs->values(); for (const auto& it : obj.cast()->values()) { result.push_back(new ASTObject(it)); } stack.push(new ASTSet(result)); } break; case Pyc::DICT_UPDATE_A: case Pyc::DICT_MERGE_A: { /* Python 3.9+: merge mapping at TOS into the dict below it (used for {**a, **b} literals and **kwargs construction). */ PycRef rhs = stack.top(); stack.pop(); PycRef lhsNode = stack.top(); if (lhsNode.type() == ASTNode::NODE_MAP) { PycRef lhs = lhsNode.cast(); if (rhs.type() == ASTNode::NODE_MAP) { for (const auto& it : rhs.cast()->values()) lhs->add(it.first, it.second); } else { /* Spread of a non-literal mapping (**expr); represent with a NULL key so the source writer can emit "**expr". */ lhs->add(nullptr, rhs); } } /* Leave the (updated) dict on the stack. */ } break; case Pyc::LIST_EXTEND_A: { PycRef rhs = stack.top(); stack.pop(); PycRef lhs = stack.top().cast(); stack.pop(); if (rhs.type() != ASTNode::NODE_OBJECT) { fprintf(stderr, "Unsupported argument found for LIST_EXTEND\n"); break; } // I've only ever seen this be a SMALL_TUPLE, but let's be careful... PycRef obj = rhs.cast()->object(); if (obj->type() != PycObject::TYPE_TUPLE && obj->type() != PycObject::TYPE_SMALL_TUPLE) { fprintf(stderr, "Unsupported argument type found for LIST_EXTEND\n"); break; } ASTList::value_t result = lhs->values(); for (const auto& it : obj.cast()->values()) { result.push_back(new ASTObject(it)); } stack.push(new ASTList(result)); } break; case Pyc::LOAD_ATTR_A: { PycRef name = stack.top(); if (name.type() != ASTNode::NODE_IMPORT) { stack.pop(); if (mod->verCompare(3, 12) >= 0) { if (operand & 1) { /* Changed in version 3.12: If the low bit of name is set, then a NULL or self is pushed to the stack before the attribute or unbound method respectively. */ stack.push(nullptr); } operand >>= 1; } stack.push(new ASTBinary(name, new ASTName(code->getName(operand)), ASTBinary::BIN_ATTR)); } } break; case Pyc::LOAD_ASSERTION_ERROR: { PycRef assertName = new PycString(); assertName->setValue("AssertionError"); stack.push(new ASTName(assertName)); } break; case Pyc::LOAD_BUILD_CLASS: stack.push(new ASTLoadBuildClass(new PycObject())); break; case Pyc::LOAD_CLOSURE_A: if (mod->verCompare(3, 6) >= 0) { /* In 3.6+ the closure cells are collected into a tuple via BUILD_TUPLE and consumed by MAKE_FUNCTION; push a placeholder so that tuple stays balanced (the value is later discarded). */ stack.push(new ASTName(code->getCellVar(mod, operand))); } /* else: ignored (older bytecode) */ break; case Pyc::MAKE_CELL_A: case Pyc::COPY_FREE_VARS_A: /* Python 3.11 function prologue; no stack effect for decompilation */ break; case Pyc::LOAD_CONST_A: { PycRef t_ob = new ASTObject(code->getConst(operand)); if ((t_ob->object().type() == PycObject::TYPE_TUPLE || t_ob->object().type() == PycObject::TYPE_SMALL_TUPLE) && !t_ob->object().cast()->values().size()) { ASTTuple::value_t values; stack.push(new ASTTuple(values)); } else if (t_ob->object().type() == PycObject::TYPE_NONE) { stack.push(NULL); } else { stack.push(t_ob.cast()); } } break; case Pyc::LOAD_DEREF_A: case Pyc::LOAD_CLASSDEREF_A: stack.push(new ASTName(code->getCellVar(mod, operand))); break; case Pyc::LOAD_FAST_A: if (mod->verCompare(1, 3) < 0) stack.push(new ASTName(code->getName(operand))); else stack.push(new ASTName(code->getLocal(operand))); break; case Pyc::LOAD_FAST_LOAD_FAST_A: stack.push(new ASTName(code->getLocal(operand >> 4))); stack.push(new ASTName(code->getLocal(operand & 0xF))); break; case Pyc::LOAD_GLOBAL_A: if (mod->verCompare(3, 11) >= 0) { // Loads the global named co_names[namei>>1] onto the stack. if (operand & 1) { /* Changed in version 3.11: If the low bit of "NAMEI" (operand) is set, then a NULL is pushed to the stack before the global variable. */ stack.push(nullptr); } operand >>= 1; } stack.push(new ASTName(code->getName(operand))); break; case Pyc::LOAD_LOCALS: stack.push(new ASTNode(ASTNode::NODE_LOCALS)); break; case Pyc::STORE_LOCALS: stack.pop(); break; case Pyc::LOAD_METHOD_A: { // Behave like LOAD_ATTR PycRef name = stack.top(); stack.pop(); stack.push(new ASTBinary(name, new ASTName(code->getName(operand)), ASTBinary::BIN_ATTR)); } break; case Pyc::LOAD_NAME_A: stack.push(new ASTName(code->getName(operand))); break; case Pyc::MAKE_CLOSURE_A: case Pyc::MAKE_FUNCTION_A: { PycRef fun_code = stack.top(); stack.pop(); /* Test for the qualified name of the function (at TOS) */ int tos_type = fun_code.cast()->object().type(); if (tos_type != PycObject::TYPE_CODE && tos_type != PycObject::TYPE_CODE2) { fun_code = stack.top(); stack.pop(); } ASTFunction::defarg_t defArgs, kwDefArgs; if (mod->verCompare(3, 6) >= 0) { /* Python 3.6+: the operand is a flag bitmask, and each set flag has a single object pushed on the stack (bottom to top): 0x01 defaults tuple, 0x02 kwonly-defaults dict, 0x04 annotations, 0x08 closure tuple. The code object was already popped above. They must be consumed top-down. */ if (operand & 0x08) { /* closure: tuple of cells - not needed for source */ stack.pop(); } if (operand & 0x04) { /* annotations - not reconstructed here */ stack.pop(); } if (operand & 0x02) { /* keyword-only defaults: a mapping on the stack */ PycRef kwd = stack.top(); stack.pop(); if (kwd != NULL && kwd.type() == ASTNode::NODE_MAP) { for (const auto& it : kwd.cast()->values()) kwDefArgs.push_back(it.second); } else if (kwd != NULL && kwd.type() == ASTNode::NODE_CONST_MAP) { for (const auto& v : kwd.cast()->values()) kwDefArgs.push_back(v); } } if (operand & 0x01) { /* positional defaults: a tuple on the stack */ PycRef defs = stack.top(); stack.pop(); if (defs != NULL && defs.type() == ASTNode::NODE_TUPLE) { for (const auto& v : defs.cast()->values()) defArgs.push_back(v); } else if (defs != NULL && defs.type() == ASTNode::NODE_OBJECT) { PycRef o = defs.cast()->object(); if (o->type() == PycObject::TYPE_TUPLE || o->type() == PycObject::TYPE_SMALL_TUPLE) { for (const auto& it : o.cast()->values()) defArgs.push_back(new ASTObject(it)); } } } } else { const int defCount = operand & 0xFF; const int kwDefCount = (operand >> 8) & 0xFF; for (int i = 0; i < defCount; ++i) { defArgs.push_front(stack.top()); stack.pop(); } for (int i = 0; i < kwDefCount; ++i) { kwDefArgs.push_front(stack.top()); stack.pop(); } } stack.push(new ASTFunction(fun_code, defArgs, kwDefArgs)); } break; case Pyc::NOP: break; case Pyc::POP_BLOCK: { if (curblock->blktype() == ASTBlock::BLK_CONTAINER || curblock->blktype() == ASTBlock::BLK_FINALLY) { /* These should only be popped by an END_FINALLY */ break; } if (curblock->blktype() == ASTBlock::BLK_WITH) { // This should only be popped by a WITH_CLEANUP break; } if (curblock->nodes().size() && curblock->nodes().back().type() == ASTNode::NODE_KEYWORD) { curblock->removeLast(); } if (curblock->blktype() == ASTBlock::BLK_IF || curblock->blktype() == ASTBlock::BLK_ELIF || curblock->blktype() == ASTBlock::BLK_ELSE || curblock->blktype() == ASTBlock::BLK_TRY || curblock->blktype() == ASTBlock::BLK_EXCEPT || curblock->blktype() == ASTBlock::BLK_FINALLY) { if (!stack_hist.empty()) { stack = stack_hist.top(); stack_hist.pop(); } else { fprintf(stderr, "Warning: Stack history is empty, something wrong might have happened\n"); } } PycRef tmp = curblock; blocks.pop(); if (!blocks.empty()) curblock = blocks.top(); if (!(tmp->blktype() == ASTBlock::BLK_ELSE && tmp->nodes().size() == 0)) { curblock->append(tmp.cast()); } if (tmp->blktype() == ASTBlock::BLK_FOR && tmp->end() >= pos) { stack_hist.push(stack); PycRef blkelse = new ASTBlock(ASTBlock::BLK_ELSE, tmp->end()); blocks.push(blkelse); curblock = blocks.top(); } if (curblock->blktype() == ASTBlock::BLK_TRY && tmp->blktype() != ASTBlock::BLK_FOR && tmp->blktype() != ASTBlock::BLK_ASYNCFOR && tmp->blktype() != ASTBlock::BLK_WHILE) { stack = stack_hist.top(); stack_hist.pop(); tmp = curblock; blocks.pop(); curblock = blocks.top(); if (!(tmp->blktype() == ASTBlock::BLK_ELSE && tmp->nodes().size() == 0)) { curblock->append(tmp.cast()); } } if (curblock->blktype() == ASTBlock::BLK_CONTAINER) { PycRef cont = curblock.cast(); if (tmp->blktype() == ASTBlock::BLK_ELSE && !cont->hasFinally()) { /* Pop the container */ blocks.pop(); curblock = blocks.top(); curblock->append(cont.cast()); } else if ((tmp->blktype() == ASTBlock::BLK_ELSE && cont->hasFinally()) || (tmp->blktype() == ASTBlock::BLK_TRY && !cont->hasExcept())) { /* Add the finally block */ stack_hist.push(stack); PycRef final = new ASTBlock(ASTBlock::BLK_FINALLY, 0, true); blocks.push(final); curblock = blocks.top(); } } if ((curblock->blktype() == ASTBlock::BLK_FOR || curblock->blktype() == ASTBlock::BLK_ASYNCFOR) && curblock->end() == pos) { blocks.pop(); blocks.top()->append(curblock.cast()); curblock = blocks.top(); } } break; case Pyc::POP_EXCEPT: /* Do nothing. */ break; case Pyc::PUSH_EXC_INFO: /* Python 3.11+: pushes the live exception. Use a sentinel so the `as ` binding and CHECK_EXC_MATCH stay balanced; it is consumed by the following STORE (binding) or POP_TOP (discard). */ { PycRef s = new PycString(); s->setValue(EXC_SENTINEL); stack.push(new ASTName(s)); } break; case Pyc::CHECK_EXC_MATCH: { /* Python 3.11+: compare the exception (left, kept on the stack for a possible `as` binding) against the handler type. */ PycRef right = stack.top(); stack.pop(); PycRef left = stack.top(); stack.push(new ASTCompare(left, right, ASTCompare::CMP_EXCEPTION)); } break; case Pyc::END_FOR: { stack.pop(); if ((opcode == Pyc::END_FOR) && (mod->majorVer() == 3) && (mod->minorVer() == 12)) { // one additional pop for python 3.12 stack.pop(); } // end for loop here /* TODO : Ensure that FOR loop ends here. Due to CACHE instructions at play, the end indicated in the for loop by pycdas is not correct, it is off by some small amount. */ if (curblock->blktype() == ASTBlock::BLK_FOR) { PycRef prev = blocks.top(); blocks.pop(); curblock = blocks.top(); curblock->append(prev.cast()); } else { fprintf(stderr, "Wrong block type %i for END_FOR\n", curblock->blktype()); } } break; case Pyc::POP_TOP: { PycRef value = stack.top(); stack.pop(); /* The live exception from PUSH_EXC_INFO is discarded here when the handler has no `as` binding; never emit the sentinel. */ if (isExcSentinel(value)) { if (!curblock->inited()) curblock->init(); break; } if (!curblock->inited()) { if (curblock->blktype() == ASTBlock::BLK_WITH) { curblock.cast()->setExpr(value); } else { curblock->init(); } break; } else if (value == nullptr || value->processed()) { break; } curblock->append(value); if (curblock->blktype() == ASTBlock::BLK_FOR && curblock.cast()->isComprehension()) { /* This relies on some really uncertain logic... * If it's a comprehension, the only POP_TOP should be * a call to append the iter to the list. */ if (value.type() == ASTNode::NODE_CALL) { auto& pparams = value.cast()->pparams(); if (!pparams.empty()) { PycRef res = pparams.front(); stack.push(new ASTComprehension(res)); } } } } break; case Pyc::PRINT_ITEM: { PycRef printNode; if (curblock->size() > 0 && curblock->nodes().back().type() == ASTNode::NODE_PRINT) printNode = curblock->nodes().back().try_cast(); if (printNode && printNode->stream() == nullptr && !printNode->eol()) printNode->add(stack.top()); else curblock->append(new ASTPrint(stack.top())); stack.pop(); } break; case Pyc::PRINT_ITEM_TO: { PycRef stream = stack.top(); stack.pop(); PycRef printNode; if (curblock->size() > 0 && curblock->nodes().back().type() == ASTNode::NODE_PRINT) printNode = curblock->nodes().back().try_cast(); if (printNode && printNode->stream() == stream && !printNode->eol()) printNode->add(stack.top()); else curblock->append(new ASTPrint(stack.top(), stream)); stack.pop(); if (stream) stream->setProcessed(); } break; case Pyc::PRINT_NEWLINE: { PycRef printNode; if (curblock->size() > 0 && curblock->nodes().back().type() == ASTNode::NODE_PRINT) printNode = curblock->nodes().back().try_cast(); if (printNode && printNode->stream() == nullptr && !printNode->eol()) printNode->setEol(true); else curblock->append(new ASTPrint(nullptr)); stack.pop(); } break; case Pyc::PRINT_NEWLINE_TO: { PycRef stream = stack.top(); stack.pop(); PycRef printNode; if (curblock->size() > 0 && curblock->nodes().back().type() == ASTNode::NODE_PRINT) printNode = curblock->nodes().back().try_cast(); if (printNode && printNode->stream() == stream && !printNode->eol()) printNode->setEol(true); else curblock->append(new ASTPrint(nullptr, stream)); stack.pop(); if (stream) stream->setProcessed(); } break; case Pyc::RAISE_VARARGS_A: { ASTRaise::param_t paramList; for (int i = 0; i < operand; i++) { paramList.push_front(stack.top()); stack.pop(); } curblock->append(new ASTRaise(paramList)); if ((curblock->blktype() == ASTBlock::BLK_IF || curblock->blktype() == ASTBlock::BLK_ELSE) && stack_hist.size() && (mod->verCompare(2, 6) >= 0)) { stack = stack_hist.top(); stack_hist.pop(); PycRef prev = curblock; blocks.pop(); curblock = blocks.top(); curblock->append(prev.cast()); } } break; case Pyc::RERAISE: case Pyc::RERAISE_A: /* Python 3.11 cleanup opcode. */ break; case Pyc::RETURN_VALUE: case Pyc::INSTRUMENTED_RETURN_VALUE_A: { PycRef value = stack.top(); stack.pop(); curblock->append(new ASTReturn(value)); if ((curblock->blktype() == ASTBlock::BLK_IF || curblock->blktype() == ASTBlock::BLK_ELSE) && stack_hist.size() && (mod->verCompare(2, 6) >= 0)) { stack = stack_hist.top(); stack_hist.pop(); PycRef prev = curblock; blocks.pop(); curblock = blocks.top(); curblock->append(prev.cast()); /* A return inside an if/else is normally followed by a redundant JUMP (or other filler) that the old code unconditionally consumed to skip the else branch. In 3.11+ the return can instead fall straight into the sibling branch, and if that branch begins by loading a code object (a comprehension or nested function that feeds a following MAKE_FUNCTION), consuming it would drop the operand MAKE_FUNCTION needs and crash. Peek the next instruction: keep it only when it is a LOAD_CONST of a code object (always real code); otherwise preserve the original skip behavior to avoid regressions. */ if (!source.atEof()) { int savedSrc = source.pos(); int savedPos = pos; int pkOp = 0, pkOperand = 0; bc_next(source, mod, pkOp, pkOperand, pos); bool keepNext = false; if (pkOp == Pyc::LOAD_CONST_A) { PycRef k = code->getConst(pkOperand); if (k != NULL && (k->type() == PycObject::TYPE_CODE || k->type() == PycObject::TYPE_CODE2)) keepNext = true; } /* Never consume the start of an exception handler: a return inside an `if` within a `try` is immediately followed by PUSH_EXC_INFO. Skipping it would drop the handler's sentinel and corrupt the `except ... as e` binding. */ if (pkOp == Pyc::PUSH_EXC_INFO) keepNext = true; if (keepNext) { source.setPos(savedSrc); pos = savedPos; } else { opcode = pkOp; operand = pkOperand; } } } } break; case Pyc::RETURN_CONST_A: case Pyc::INSTRUMENTED_RETURN_CONST_A: { PycRef value = new ASTObject(code->getConst(operand)); curblock->append(new ASTReturn(value.cast())); } break; case Pyc::ROT_TWO: { PycRef one = stack.top(); stack.pop(); if (stack.top().type() == ASTNode::NODE_CHAINSTORE) { stack.pop(); } PycRef two = stack.top(); stack.pop(); stack.push(one); stack.push(two); } break; case Pyc::ROT_THREE: { PycRef one = stack.top(); stack.pop(); PycRef two = stack.top(); stack.pop(); if (stack.top().type() == ASTNode::NODE_CHAINSTORE) { stack.pop(); } PycRef three = stack.top(); stack.pop(); stack.push(one); stack.push(three); stack.push(two); } break; case Pyc::ROT_FOUR: { PycRef one = stack.top(); stack.pop(); PycRef two = stack.top(); stack.pop(); PycRef three = stack.top(); stack.pop(); if (stack.top().type() == ASTNode::NODE_CHAINSTORE) { stack.pop(); } PycRef four = stack.top(); stack.pop(); stack.push(one); stack.push(four); stack.push(three); stack.push(two); } break; case Pyc::SET_LINENO_A: // Ignore break; case Pyc::SETUP_WITH_A: { PycRef withblock = new ASTWithBlock(pos+operand); blocks.push(withblock); curblock = blocks.top(); } break; case Pyc::WITH_EXCEPT_START: /* Python 3.11 with-statement exception cleanup: calls __exit__ with the live exception. Discard the exception sentinel that PUSH_EXC_INFO left on the stack and yield a None-like result so the implicit cleanup test does not leak the sentinel. */ if (!stack.empty() && isExcSentinel(stack.top())) stack.pop(); stack.push(nullptr); break; case Pyc::BEFORE_WITH: { /* Python 3.11 with-statement. If the pre-pass recognized this one, open a with-block; the context manager stays on the stack and the following STORE/POP_TOP turns it into the with-expression (and optional `as `). */ auto it = withBodyEndByBefore.find(curpos); if (it != withBodyEndByBefore.end()) { PycRef withblock = new ASTWithBlock(it->second); blocks.push(withblock); curblock = blocks.top(); } } break; case Pyc::WITH_CLEANUP: case Pyc::WITH_CLEANUP_START: { // Stack top should be a None. Ignore it. PycRef none = stack.top(); stack.pop(); if (none != NULL) { fprintf(stderr, "Something TERRIBLE happened!\n"); break; } if (curblock->blktype() == ASTBlock::BLK_WITH && curblock->end() == curpos) { PycRef with = curblock; blocks.pop(); curblock = blocks.top(); curblock->append(with.cast()); } else { fprintf(stderr, "Something TERRIBLE happened! No matching with block found for WITH_CLEANUP at %d\n", curpos); } } break; case Pyc::WITH_CLEANUP_FINISH: /* Ignore this */ break; case Pyc::SETUP_EXCEPT_A: { if (curblock->blktype() == ASTBlock::BLK_CONTAINER) { curblock.cast()->setExcept(pos+operand); } else { PycRef next = new ASTContainerBlock(0, pos+operand); blocks.push(next.cast()); } /* Store the current stack for the except/finally statement(s) */ stack_hist.push(stack); PycRef tryblock = new ASTBlock(ASTBlock::BLK_TRY, pos+operand, true); blocks.push(tryblock.cast()); curblock = blocks.top(); need_try = false; } break; case Pyc::SETUP_FINALLY_A: { PycRef next = new ASTContainerBlock(pos+operand); blocks.push(next.cast()); curblock = blocks.top(); need_try = true; } break; case Pyc::SETUP_LOOP_A: { PycRef next = new ASTCondBlock(ASTBlock::BLK_WHILE, pos+operand, NULL, false); blocks.push(next.cast()); curblock = blocks.top(); } break; case Pyc::SLICE_0: { PycRef name = stack.top(); stack.pop(); PycRef slice = new ASTSlice(ASTSlice::SLICE0); stack.push(new ASTSubscr(name, slice)); } break; case Pyc::SLICE_1: { PycRef lower = stack.top(); stack.pop(); PycRef name = stack.top(); stack.pop(); PycRef slice = new ASTSlice(ASTSlice::SLICE1, lower); stack.push(new ASTSubscr(name, slice)); } break; case Pyc::SLICE_2: { PycRef upper = stack.top(); stack.pop(); PycRef name = stack.top(); stack.pop(); PycRef slice = new ASTSlice(ASTSlice::SLICE2, NULL, upper); stack.push(new ASTSubscr(name, slice)); } break; case Pyc::SLICE_3: { PycRef upper = stack.top(); stack.pop(); PycRef lower = stack.top(); stack.pop(); PycRef name = stack.top(); stack.pop(); PycRef slice = new ASTSlice(ASTSlice::SLICE3, lower, upper); stack.push(new ASTSubscr(name, slice)); } break; case Pyc::STORE_ATTR_A: { if (unpack) { PycRef name = stack.top(); stack.pop(); PycRef attr = new ASTBinary(name, new ASTName(code->getName(operand)), ASTBinary::BIN_ATTR); PycRef tup = stack.top(); if (tup.type() == ASTNode::NODE_TUPLE) tup.cast()->add(attr); else fputs("Something TERRIBLE happened!\n", stderr); if (--unpack <= 0) { stack.pop(); PycRef seq = stack.top(); stack.pop(); if (seq.type() == ASTNode::NODE_CHAINSTORE) { append_to_chain_store(seq, tup, stack, curblock); } else { curblock->append(new ASTStore(seq, tup)); } } } else { PycRef name = stack.top(); stack.pop(); PycRef value = stack.top(); stack.pop(); PycRef attr = new ASTBinary(name, new ASTName(code->getName(operand)), ASTBinary::BIN_ATTR); if (value.type() == ASTNode::NODE_CHAINSTORE) { append_to_chain_store(value, attr, stack, curblock); } else { curblock->append(new ASTStore(value, attr)); } } } break; case Pyc::STORE_DEREF_A: { if (unpack) { PycRef name = new ASTName(code->getCellVar(mod, operand)); PycRef tup = stack.top(); if (tup.type() == ASTNode::NODE_TUPLE) tup.cast()->add(name); else fputs("Something TERRIBLE happened!\n", stderr); if (--unpack <= 0) { stack.pop(); PycRef seq = stack.top(); stack.pop(); if (seq.type() == ASTNode::NODE_CHAINSTORE) { append_to_chain_store(seq, tup, stack, curblock); } else { curblock->append(new ASTStore(seq, tup)); } } } else { PycRef value = stack.top(); stack.pop(); PycRef name = new ASTName(code->getCellVar(mod, operand)); if (value.type() == ASTNode::NODE_CHAINSTORE) { append_to_chain_store(value, name, stack, curblock); } else { curblock->append(new ASTStore(value, name)); } } } break; case Pyc::STORE_FAST_A: { if (unpack) { PycRef name; if (mod->verCompare(1, 3) < 0) name = new ASTName(code->getName(operand)); else name = new ASTName(code->getLocal(operand)); PycRef tup = stack.top(); if (tup.type() == ASTNode::NODE_TUPLE) tup.cast()->add(name); else fputs("Something TERRIBLE happened!\n", stderr); if (--unpack <= 0) { stack.pop(); PycRef seq = stack.top(); stack.pop(); if (curblock->blktype() == ASTBlock::BLK_FOR && !curblock->inited()) { PycRef tuple = tup.try_cast(); if (tuple != NULL) tuple->setRequireParens(false); curblock.cast()->setIndex(tup); } else if (seq.type() == ASTNode::NODE_CHAINSTORE) { append_to_chain_store(seq, tup, stack, curblock); } else { curblock->append(new ASTStore(seq, tup)); } } } else { PycRef value = stack.top(); stack.pop(); PycRef name; if (mod->verCompare(1, 3) < 0) name = new ASTName(code->getName(operand)); else name = new ASTName(code->getLocal(operand)); if (name.cast()->name()->value()[0] == '_' && name.cast()->name()->value()[1] == '[') { /* Don't show stores of list comp append objects. */ break; } if (handleExceptBinding(curblock, value, name)) break; if (curblock->blktype() == ASTBlock::BLK_FOR && !curblock->inited()) { curblock.cast()->setIndex(name); } else if (curblock->blktype() == ASTBlock::BLK_WITH && !curblock->inited()) { curblock.cast()->setExpr(value); curblock.cast()->setVar(name); } else if (value.type() == ASTNode::NODE_CHAINSTORE) { append_to_chain_store(value, name, stack, curblock); } else { curblock->append(new ASTStore(value, name)); } } } break; case Pyc::STORE_GLOBAL_A: { PycRef name = new ASTName(code->getName(operand)); if (unpack) { PycRef tup = stack.top(); if (tup.type() == ASTNode::NODE_TUPLE) tup.cast()->add(name); else fputs("Something TERRIBLE happened!\n", stderr); if (--unpack <= 0) { stack.pop(); PycRef seq = stack.top(); stack.pop(); if (curblock->blktype() == ASTBlock::BLK_FOR && !curblock->inited()) { PycRef tuple = tup.try_cast(); if (tuple != NULL) tuple->setRequireParens(false); curblock.cast()->setIndex(tup); } else if (seq.type() == ASTNode::NODE_CHAINSTORE) { append_to_chain_store(seq, tup, stack, curblock); } else { curblock->append(new ASTStore(seq, tup)); } } } else { PycRef value = stack.top(); stack.pop(); if (value.type() == ASTNode::NODE_CHAINSTORE) { append_to_chain_store(value, name, stack, curblock); } else { curblock->append(new ASTStore(value, name)); } } /* Mark the global as used */ code->markGlobal(name.cast()->name()); } break; case Pyc::STORE_NAME_A: { if (unpack) { PycRef name = new ASTName(code->getName(operand)); PycRef tup = stack.top(); if (tup.type() == ASTNode::NODE_TUPLE) tup.cast()->add(name); else fputs("Something TERRIBLE happened!\n", stderr); if (--unpack <= 0) { stack.pop(); PycRef seq = stack.top(); stack.pop(); if (curblock->blktype() == ASTBlock::BLK_FOR && !curblock->inited()) { PycRef tuple = tup.try_cast(); if (tuple != NULL) tuple->setRequireParens(false); curblock.cast()->setIndex(tup); } else if (seq.type() == ASTNode::NODE_CHAINSTORE) { append_to_chain_store(seq, tup, stack, curblock); } else { curblock->append(new ASTStore(seq, tup)); } } } else { PycRef value = stack.top(); stack.pop(); PycRef varname = code->getName(operand); if (varname->length() >= 2 && varname->value()[0] == '_' && varname->value()[1] == '[') { /* Don't show stores of list comp append objects. */ break; } // Return private names back to their original name const std::string class_prefix = std::string("_") + code->name()->strValue(); if (varname->startsWith(class_prefix + std::string("__"))) varname->setValue(varname->strValue().substr(class_prefix.size())); PycRef name = new ASTName(varname); if (handleExceptBinding(curblock, value, name)) break; if (curblock->blktype() == ASTBlock::BLK_FOR && !curblock->inited()) { curblock.cast()->setIndex(name); } else if (stack.top().type() == ASTNode::NODE_IMPORT) { PycRef import = stack.top().cast(); import->add_store(new ASTStore(value, name)); } else if (curblock->blktype() == ASTBlock::BLK_WITH && !curblock->inited()) { curblock.cast()->setExpr(value); curblock.cast()->setVar(name); } else if (value.type() == ASTNode::NODE_CHAINSTORE) { append_to_chain_store(value, name, stack, curblock); } else { curblock->append(new ASTStore(value, name)); if (value.type() == ASTNode::NODE_INVALID) break; } } } break; case Pyc::STORE_SLICE_0: { PycRef dest = stack.top(); stack.pop(); PycRef value = stack.top(); stack.pop(); curblock->append(new ASTStore(value, new ASTSubscr(dest, new ASTSlice(ASTSlice::SLICE0)))); } break; case Pyc::STORE_SLICE_1: { PycRef upper = stack.top(); stack.pop(); PycRef dest = stack.top(); stack.pop(); PycRef value = stack.top(); stack.pop(); curblock->append(new ASTStore(value, new ASTSubscr(dest, new ASTSlice(ASTSlice::SLICE1, upper)))); } break; case Pyc::STORE_SLICE_2: { PycRef lower = stack.top(); stack.pop(); PycRef dest = stack.top(); stack.pop(); PycRef value = stack.top(); stack.pop(); curblock->append(new ASTStore(value, new ASTSubscr(dest, new ASTSlice(ASTSlice::SLICE2, NULL, lower)))); } break; case Pyc::STORE_SLICE_3: { PycRef lower = stack.top(); stack.pop(); PycRef upper = stack.top(); stack.pop(); PycRef dest = stack.top(); stack.pop(); PycRef value = stack.top(); stack.pop(); curblock->append(new ASTStore(value, new ASTSubscr(dest, new ASTSlice(ASTSlice::SLICE3, upper, lower)))); } break; case Pyc::STORE_SUBSCR: { if (unpack) { PycRef subscr = stack.top(); stack.pop(); PycRef dest = stack.top(); stack.pop(); PycRef save = new ASTSubscr(dest, subscr); PycRef tup = stack.top(); if (tup.type() == ASTNode::NODE_TUPLE) tup.cast()->add(save); else fputs("Something TERRIBLE happened!\n", stderr); if (--unpack <= 0) { stack.pop(); PycRef seq = stack.top(); stack.pop(); if (seq.type() == ASTNode::NODE_CHAINSTORE) { append_to_chain_store(seq, tup, stack, curblock); } else { curblock->append(new ASTStore(seq, tup)); } } } else { PycRef subscr = stack.top(); stack.pop(); PycRef dest = stack.top(); stack.pop(); PycRef src = stack.top(); stack.pop(); // If variable annotations are enabled, we'll need to check for them here. // Python handles a varaible annotation by setting: // __annotations__['var-name'] = type const bool found_annotated_var = (variable_annotations && dest->type() == ASTNode::Type::NODE_NAME && dest.cast()->name()->isEqual("__annotations__")); if (found_annotated_var) { // Annotations can be done alone or as part of an assignment. // In the case of an assignment, we'll see a NODE_STORE on the stack. if (!curblock->nodes().empty() && curblock->nodes().back()->type() == ASTNode::Type::NODE_STORE) { // Replace the existing NODE_STORE with a new one that includes the annotation. PycRef store = curblock->nodes().back().cast(); curblock->removeLast(); curblock->append(new ASTStore(store->src(), new ASTAnnotatedVar(subscr, src))); } else { curblock->append(new ASTAnnotatedVar(subscr, src)); } } else { if (dest.type() == ASTNode::NODE_MAP) { dest.cast()->add(subscr, src); } else if (src.type() == ASTNode::NODE_CHAINSTORE) { append_to_chain_store(src, new ASTSubscr(dest, subscr), stack, curblock); } else { curblock->append(new ASTStore(src, new ASTSubscr(dest, subscr))); } } } } break; case Pyc::UNARY_CALL: { PycRef func = stack.top(); stack.pop(); stack.push(new ASTCall(func, ASTCall::pparam_t(), ASTCall::kwparam_t())); } break; case Pyc::UNARY_CONVERT: { PycRef name = stack.top(); stack.pop(); stack.push(new ASTConvert(name)); } break; case Pyc::UNARY_INVERT: { PycRef arg = stack.top(); stack.pop(); stack.push(new ASTUnary(arg, ASTUnary::UN_INVERT)); } break; case Pyc::UNARY_NEGATIVE: { PycRef arg = stack.top(); stack.pop(); stack.push(new ASTUnary(arg, ASTUnary::UN_NEGATIVE)); } break; case Pyc::UNARY_NOT: { PycRef arg = stack.top(); stack.pop(); stack.push(new ASTUnary(arg, ASTUnary::UN_NOT)); } break; case Pyc::UNARY_POSITIVE: { PycRef arg = stack.top(); stack.pop(); stack.push(new ASTUnary(arg, ASTUnary::UN_POSITIVE)); } break; case Pyc::UNPACK_LIST_A: case Pyc::UNPACK_TUPLE_A: case Pyc::UNPACK_SEQUENCE_A: { unpack = operand; if (unpack > 0) { ASTTuple::value_t vals; stack.push(new ASTTuple(vals)); } else { // Unpack zero values and assign it to top of stack or for loop variable. // E.g. [] = TOS / for [] in X ASTTuple::value_t vals; auto tup = new ASTTuple(vals); if (curblock->blktype() == ASTBlock::BLK_FOR && !curblock->inited()) { tup->setRequireParens(true); curblock.cast()->setIndex(tup); } else if (stack.top().type() == ASTNode::NODE_CHAINSTORE) { auto chainStore = stack.top(); stack.pop(); append_to_chain_store(chainStore, tup, stack, curblock); } else { curblock->append(new ASTStore(stack.top(), tup)); stack.pop(); } } } break; case Pyc::YIELD_FROM: { PycRef dest = stack.top(); stack.pop(); // TODO: Support yielding into a non-null destination PycRef value = stack.top(); if (value) { value->setProcessed(); curblock->append(new ASTReturn(value, ASTReturn::YIELD_FROM)); } } break; case Pyc::YIELD_VALUE: case Pyc::INSTRUMENTED_YIELD_VALUE_A: { PycRef value = stack.top(); stack.pop(); curblock->append(new ASTReturn(value, ASTReturn::YIELD)); } break; case Pyc::SETUP_ANNOTATIONS: variable_annotations = true; break; case Pyc::PRECALL_A: case Pyc::RESUME_A: case Pyc::INSTRUMENTED_RESUME_A: /* We just entirely ignore this / no-op */ break; case Pyc::CACHE: /* These "fake" opcodes are used as placeholders for optimizing certain opcodes in Python 3.11+. Since we have no need for that during disassembly/decompilation, we can just treat these as no-ops. */ break; case Pyc::PUSH_NULL: stack.push(nullptr); break; case Pyc::GEN_START_A: stack.pop(); break; case Pyc::SWAP_A: /* Python 3.11+: swap TOS with the operand-th element from the top. A plain stack operation (used by chained comparisons, starred unpacking, etc.). */ stack.swap(operand); break; case Pyc::BINARY_SLICE: { PycRef end = stack.top(); stack.pop(); PycRef start = stack.top(); stack.pop(); PycRef dest = stack.top(); stack.pop(); if (start.type() == ASTNode::NODE_OBJECT && start.cast()->object() == Pyc_None) { start = NULL; } if (end.type() == ASTNode::NODE_OBJECT && end.cast()->object() == Pyc_None) { end = NULL; } PycRef slice; if (start == NULL && end == NULL) { slice = new ASTSlice(ASTSlice::SLICE0); } else if (start == NULL) { slice = new ASTSlice(ASTSlice::SLICE2, start, end); } else if (end == NULL) { slice = new ASTSlice(ASTSlice::SLICE1, start, end); } else { slice = new ASTSlice(ASTSlice::SLICE3, start, end); } stack.push(new ASTSubscr(dest, slice)); } break; case Pyc::STORE_SLICE: { PycRef end = stack.top(); stack.pop(); PycRef start = stack.top(); stack.pop(); PycRef dest = stack.top(); stack.pop(); PycRef values = stack.top(); stack.pop(); if (start.type() == ASTNode::NODE_OBJECT && start.cast()->object() == Pyc_None) { start = NULL; } if (end.type() == ASTNode::NODE_OBJECT && end.cast()->object() == Pyc_None) { end = NULL; } PycRef slice; if (start == NULL && end == NULL) { slice = new ASTSlice(ASTSlice::SLICE0); } else if (start == NULL) { slice = new ASTSlice(ASTSlice::SLICE2, start, end); } else if (end == NULL) { slice = new ASTSlice(ASTSlice::SLICE1, start, end); } else { slice = new ASTSlice(ASTSlice::SLICE3, start, end); } curblock->append(new ASTStore(values, new ASTSubscr(dest, slice))); } break; case Pyc::COPY_A: { PycRef value = stack.top(operand); stack.push(value); } break; default: fprintf(stderr, "Unsupported opcode: %s (%d)\n", Pyc::OpcodeName(opcode), opcode); cleanBuild = false; return new ASTNodeList(defblock->nodes()); } else_pop = ( (curblock->blktype() == ASTBlock::BLK_ELSE) || (curblock->blktype() == ASTBlock::BLK_IF) || (curblock->blktype() == ASTBlock::BLK_ELIF) ) && (curblock->end() == pos); } if (stack_hist.size()) { fputs("Warning: Stack history is not empty!\n", stderr); while (stack_hist.size()) { stack_hist.pop(); } } if (blocks.size() > 1) { fputs("Warning: block stack is not empty!\n", stderr); while (blocks.size() > 1) { PycRef tmp = blocks.top(); blocks.pop(); blocks.top()->append(tmp.cast()); } } cleanBuild = true; return new ASTNodeList(defblock->nodes()); } static void append_to_chain_store(const PycRef &chainStore, PycRef item, FastStack& stack, const PycRef& curblock) { stack.pop(); // ignore identical source object. chainStore.cast()->append(item); if (stack.top().type() == PycObject::TYPE_NULL) { curblock->append(chainStore); } else { stack.push(chainStore); } } static int cmp_prec(PycRef parent, PycRef child) { /* Determine whether the parent has higher precedence than therefore child, so we don't flood the source code with extraneous parens. Else we'd have expressions like (((a + b) + c) + d) when therefore equivalent, a + b + c + d would suffice. */ if (parent.type() == ASTNode::NODE_UNARY && parent.cast()->op() == ASTUnary::UN_NOT) return 1; // Always parenthesize not(x) if (child.type() == ASTNode::NODE_BINARY) { PycRef binChild = child.cast(); if (parent.type() == ASTNode::NODE_BINARY) { PycRef binParent = parent.cast(); if (binParent->right() == child) { if (binParent->op() == ASTBinary::BIN_SUBTRACT && binChild->op() == ASTBinary::BIN_ADD) return 1; else if (binParent->op() == ASTBinary::BIN_DIVIDE && binChild->op() == ASTBinary::BIN_MULTIPLY) return 1; } return binChild->op() - binParent->op(); } else if (parent.type() == ASTNode::NODE_COMPARE) return (binChild->op() == ASTBinary::BIN_LOG_AND || binChild->op() == ASTBinary::BIN_LOG_OR) ? 1 : -1; else if (parent.type() == ASTNode::NODE_UNARY) return (binChild->op() == ASTBinary::BIN_POWER) ? -1 : 1; } else if (child.type() == ASTNode::NODE_UNARY) { PycRef unChild = child.cast(); if (parent.type() == ASTNode::NODE_BINARY) { PycRef binParent = parent.cast(); if (binParent->op() == ASTBinary::BIN_LOG_AND || binParent->op() == ASTBinary::BIN_LOG_OR) return -1; else if (unChild->op() == ASTUnary::UN_NOT) return 1; else if (binParent->op() == ASTBinary::BIN_POWER) return 1; else return -1; } else if (parent.type() == ASTNode::NODE_COMPARE) { return (unChild->op() == ASTUnary::UN_NOT) ? 1 : -1; } else if (parent.type() == ASTNode::NODE_UNARY) { return unChild->op() - parent.cast()->op(); } } else if (child.type() == ASTNode::NODE_COMPARE) { PycRef cmpChild = child.cast(); if (parent.type() == ASTNode::NODE_BINARY) return (parent.cast()->op() == ASTBinary::BIN_LOG_AND || parent.cast()->op() == ASTBinary::BIN_LOG_OR) ? -1 : 1; else if (parent.type() == ASTNode::NODE_COMPARE) return cmpChild->op() - parent.cast()->op(); else if (parent.type() == ASTNode::NODE_UNARY) return (parent.cast()->op() == ASTUnary::UN_NOT) ? -1 : 1; } /* For normal nodes, don't parenthesize anything */ return -1; } static void print_ordered(PycRef parent, PycRef child, PycModule* mod, std::ostream& pyc_output) { if (child.type() == ASTNode::NODE_BINARY || child.type() == ASTNode::NODE_COMPARE) { if (cmp_prec(parent, child) > 0) { pyc_output << "("; print_src(child, mod, pyc_output); pyc_output << ")"; } else { print_src(child, mod, pyc_output); } } else if (child.type() == ASTNode::NODE_UNARY) { if (cmp_prec(parent, child) > 0) { pyc_output << "("; print_src(child, mod, pyc_output); pyc_output << ")"; } else { print_src(child, mod, pyc_output); } } else { print_src(child, mod, pyc_output); } } static void start_line(int indent, std::ostream& pyc_output) { if (inLambda) return; for (int i=0; i blk, PycModule* mod, std::ostream& pyc_output) { ASTBlock::list_t lines = blk->nodes(); if (lines.size() == 0) { PycRef pass = new ASTKeyword(ASTKeyword::KW_PASS); start_line(cur_indent, pyc_output); print_src(pass, mod, pyc_output); } for (auto ln = lines.cbegin(); ln != lines.cend();) { if ((*ln).cast().type() != ASTNode::NODE_NODELIST) { start_line(cur_indent, pyc_output); } print_src(*ln, mod, pyc_output); if (++ln != lines.end()) { end_line(pyc_output); } } } void print_formatted_value(PycRef formatted_value, PycModule* mod, std::ostream& pyc_output) { pyc_output << "{"; print_src(formatted_value->val(), mod, pyc_output); switch (formatted_value->conversion() & ASTFormattedValue::CONVERSION_MASK) { case ASTFormattedValue::NONE: break; case ASTFormattedValue::STR: pyc_output << "!s"; break; case ASTFormattedValue::REPR: pyc_output << "!r"; break; case ASTFormattedValue::ASCII: pyc_output << "!a"; break; } if (formatted_value->conversion() & ASTFormattedValue::HAVE_FMT_SPEC) { pyc_output << ":" << formatted_value->format_spec().cast()->object().cast()->value(); } pyc_output << "}"; } static std::unordered_set node_seen; void print_src(PycRef node, PycModule* mod, std::ostream& pyc_output) { if (node == NULL) { pyc_output << "None"; cleanBuild = true; return; } if (node_seen.find((ASTNode *)node) != node_seen.end()) { fputs("WARNING: Circular reference detected\n", stderr); return; } node_seen.insert((ASTNode *)node); switch (node->type()) { case ASTNode::NODE_BINARY: case ASTNode::NODE_COMPARE: { PycRef bin = node.cast(); print_ordered(node, bin->left(), mod, pyc_output); pyc_output << bin->op_str(); print_ordered(node, bin->right(), mod, pyc_output); } break; case ASTNode::NODE_UNARY: { PycRef un = node.cast(); pyc_output << un->op_str(); print_ordered(node, un->operand(), mod, pyc_output); } break; case ASTNode::NODE_CALL: { PycRef call = node.cast(); print_src(call->func(), mod, pyc_output); pyc_output << "("; bool first = true; for (const auto& param : call->pparams()) { if (!first) pyc_output << ", "; print_src(param, mod, pyc_output); first = false; } for (const auto& param : call->kwparams()) { if (!first) pyc_output << ", "; if (param.first.type() == ASTNode::NODE_NAME) { pyc_output << param.first.cast()->name()->value() << " = "; } else { PycRef str_name = param.first.cast()->object().cast(); pyc_output << str_name->value() << " = "; } print_src(param.second, mod, pyc_output); first = false; } if (call->hasVar()) { if (!first) pyc_output << ", "; pyc_output << "*"; print_src(call->var(), mod, pyc_output); first = false; } if (call->hasKW()) { if (!first) pyc_output << ", "; pyc_output << "**"; print_src(call->kw(), mod, pyc_output); first = false; } pyc_output << ")"; } break; case ASTNode::NODE_DELETE: { pyc_output << "del "; print_src(node.cast()->value(), mod, pyc_output); } break; case ASTNode::NODE_EXEC: { PycRef exec = node.cast(); pyc_output << "exec "; print_src(exec->statement(), mod, pyc_output); if (exec->globals() != NULL) { pyc_output << " in "; print_src(exec->globals(), mod, pyc_output); if (exec->locals() != NULL && exec->globals() != exec->locals()) { pyc_output << ", "; print_src(exec->locals(), mod, pyc_output); } } } break; case ASTNode::NODE_FORMATTEDVALUE: pyc_output << "f" F_STRING_QUOTE; print_formatted_value(node.cast(), mod, pyc_output); pyc_output << F_STRING_QUOTE; break; case ASTNode::NODE_JOINEDSTR: pyc_output << "f" F_STRING_QUOTE; for (const auto& val : node.cast()->values()) { switch (val.type()) { case ASTNode::NODE_FORMATTEDVALUE: print_formatted_value(val.cast(), mod, pyc_output); break; case ASTNode::NODE_OBJECT: // When printing a piece of the f-string, keep the quote style consistent. // This avoids problems when ''' or """ is part of the string. print_const(pyc_output, val.cast()->object(), mod, F_STRING_QUOTE); break; default: fprintf(stderr, "Unsupported node type %d in NODE_JOINEDSTR\n", val.type()); } } pyc_output << F_STRING_QUOTE; break; case ASTNode::NODE_KEYWORD: pyc_output << node.cast()->word_str(); break; case ASTNode::NODE_LIST: { pyc_output << "["; bool first = true; cur_indent++; for (const auto& val : node.cast()->values()) { if (first) pyc_output << "\n"; else pyc_output << ",\n"; start_line(cur_indent, pyc_output); print_src(val, mod, pyc_output); first = false; } cur_indent--; pyc_output << "]"; } break; case ASTNode::NODE_SET: { pyc_output << "{"; bool first = true; cur_indent++; for (const auto& val : node.cast()->values()) { if (first) pyc_output << "\n"; else pyc_output << ",\n"; start_line(cur_indent, pyc_output); print_src(val, mod, pyc_output); first = false; } cur_indent--; pyc_output << "}"; } break; case ASTNode::NODE_COMPREHENSION: { PycRef comp = node.cast(); bool is_dict = comp->result() != NULL && comp->result().type() == ASTNode::NODE_MAP; pyc_output << (is_dict ? "{ " : "[ "); if (is_dict) { const auto& entries = comp->result().cast()->values(); if (!entries.empty()) { print_src(entries.front().first, mod, pyc_output); pyc_output << ": "; print_src(entries.front().second, mod, pyc_output); } } else { print_src(comp->result(), mod, pyc_output); } for (const auto& gen : comp->generators()) { pyc_output << " for "; print_src(gen->index(), mod, pyc_output); pyc_output << " in "; print_src(gen->iter(), mod, pyc_output); if (gen->condition()) { pyc_output << " if "; print_src(gen->condition(), mod, pyc_output); } } pyc_output << (is_dict ? " }" : " ]"); } break; case ASTNode::NODE_MAP: { pyc_output << "{"; bool first = true; cur_indent++; for (const auto& val : node.cast()->values()) { if (first) pyc_output << "\n"; else pyc_output << ",\n"; start_line(cur_indent, pyc_output); if (val.first == NULL) { /* Dictionary unpacking: {**expr} */ pyc_output << "**"; print_src(val.second, mod, pyc_output); } else { print_src(val.first, mod, pyc_output); pyc_output << ": "; print_src(val.second, mod, pyc_output); } first = false; } cur_indent--; pyc_output << " }"; } break; case ASTNode::NODE_CONST_MAP: { PycRef const_map = node.cast(); PycTuple::value_t keys = const_map->keys().cast()->object().cast()->values(); ASTConstMap::values_t values = const_map->values(); auto map = new ASTMap; for (const auto& key : keys) { // Values are pushed onto the stack in reverse order. PycRef value = values.back(); values.pop_back(); map->add(new ASTObject(key), value); } print_src(map, mod, pyc_output); } break; case ASTNode::NODE_NAME: pyc_output << node.cast()->name()->value(); break; case ASTNode::NODE_NODELIST: { cur_indent++; for (const auto& ln : node.cast()->nodes()) { if (ln.cast().type() != ASTNode::NODE_NODELIST) { start_line(cur_indent, pyc_output); } print_src(ln, mod, pyc_output); end_line(pyc_output); } cur_indent--; } break; case ASTNode::NODE_BLOCK: { PycRef blk = node.cast(); if (blk->blktype() == ASTBlock::BLK_ELSE && blk->size() == 0) break; if (blk->blktype() == ASTBlock::BLK_CONTAINER) { end_line(pyc_output); print_block(blk, mod, pyc_output); end_line(pyc_output); break; } pyc_output << blk->type_str(); if (blk->blktype() == ASTBlock::BLK_IF || blk->blktype() == ASTBlock::BLK_ELIF || blk->blktype() == ASTBlock::BLK_WHILE) { if (blk.cast()->negative()) pyc_output << " not "; else pyc_output << " "; print_src(blk.cast()->cond(), mod, pyc_output); } else if (blk->blktype() == ASTBlock::BLK_FOR || blk->blktype() == ASTBlock::BLK_ASYNCFOR) { pyc_output << " "; print_src(blk.cast()->index(), mod, pyc_output); pyc_output << " in "; print_src(blk.cast()->iter(), mod, pyc_output); } else if (blk->blktype() == ASTBlock::BLK_EXCEPT && blk.cast()->cond() != NULL) { pyc_output << " "; print_src(blk.cast()->cond(), mod, pyc_output); PycRef excVar = blk.cast()->exceptVar(); if (excVar != NULL) { pyc_output << " as "; print_src(excVar, mod, pyc_output); } } else if (blk->blktype() == ASTBlock::BLK_WITH) { pyc_output << " "; print_src(blk.cast()->expr(), mod, pyc_output); PycRef var = blk.try_cast()->var(); if (var != NULL) { pyc_output << " as "; print_src(var, mod, pyc_output); } } pyc_output << ":\n"; cur_indent++; print_block(blk, mod, pyc_output); cur_indent--; } break; case ASTNode::NODE_OBJECT: { PycRef obj = node.cast()->object(); if (obj.type() == PycObject::TYPE_CODE) { PycRef code = obj.cast(); decompyle(code, mod, pyc_output); } else { print_const(pyc_output, obj, mod); } } break; case ASTNode::NODE_PRINT: { pyc_output << "print "; bool first = true; if (node.cast()->stream() != nullptr) { pyc_output << ">>"; print_src(node.cast()->stream(), mod, pyc_output); first = false; } for (const auto& val : node.cast()->values()) { if (!first) pyc_output << ", "; print_src(val, mod, pyc_output); first = false; } if (!node.cast()->eol()) pyc_output << ","; } break; case ASTNode::NODE_RAISE: { PycRef raise = node.cast(); pyc_output << "raise"; if (mod->verCompare(3, 0) >= 0) { /* Python 3: `raise`, `raise X`, or `raise X from Y`. */ int i = 0; for (const auto& param : raise->params()) { pyc_output << (i == 0 ? " " : " from "); print_src(param, mod, pyc_output); ++i; } } else { /* Python 2: `raise [X [, Y [, Z]]]`. */ bool first = true; for (const auto& param : raise->params()) { pyc_output << (first ? " " : ", "); print_src(param, mod, pyc_output); first = false; } } } break; case ASTNode::NODE_RETURN: { PycRef ret = node.cast(); PycRef value = ret->value(); if (!inLambda) { switch (ret->rettype()) { case ASTReturn::RETURN: pyc_output << "return "; break; case ASTReturn::YIELD: pyc_output << "yield "; break; case ASTReturn::YIELD_FROM: if (value.type() == ASTNode::NODE_AWAITABLE) { pyc_output << "await "; value = value.cast()->expression(); } else { pyc_output << "yield from "; } break; } } print_src(value, mod, pyc_output); } break; case ASTNode::NODE_SLICE: { PycRef slice = node.cast(); if (slice->op() & ASTSlice::SLICE1) { print_src(slice->left(), mod, pyc_output); } pyc_output << ":"; if (slice->op() & ASTSlice::SLICE2) { print_src(slice->right(), mod, pyc_output); } } break; case ASTNode::NODE_IMPORT: { PycRef import = node.cast(); if (import->stores().size()) { ASTImport::list_t stores = import->stores(); pyc_output << "from "; if (import->name().type() == ASTNode::NODE_IMPORT) print_src(import->name().cast()->name(), mod, pyc_output); else print_src(import->name(), mod, pyc_output); pyc_output << " import "; if (stores.size() == 1) { auto src = stores.front()->src(); auto dest = stores.front()->dest(); print_src(src, mod, pyc_output); if (src.cast()->name()->value() != dest.cast()->name()->value()) { pyc_output << " as "; print_src(dest, mod, pyc_output); } } else { bool first = true; for (const auto& st : stores) { if (!first) pyc_output << ", "; print_src(st->src(), mod, pyc_output); first = false; if (st->src().cast()->name()->value() != st->dest().cast()->name()->value()) { pyc_output << " as "; print_src(st->dest(), mod, pyc_output); } } } } else { pyc_output << "import "; print_src(import->name(), mod, pyc_output); } } break; case ASTNode::NODE_FUNCTION: { /* Actual named functions are NODE_STORE with a name */ pyc_output << "(lambda "; PycRef code = node.cast()->code(); PycRef code_src = code.cast()->object().cast(); ASTFunction::defarg_t defargs = node.cast()->defargs(); ASTFunction::defarg_t kwdefargs = node.cast()->kwdefargs(); auto da = defargs.cbegin(); int narg = 0; for (int i=0; iargCount(); i++) { if (narg) pyc_output << ", "; pyc_output << code_src->getLocal(narg++)->value(); if ((code_src->argCount() - i) <= (int)defargs.size()) { pyc_output << " = "; print_src(*da++, mod, pyc_output); } } da = kwdefargs.cbegin(); if (code_src->kwOnlyArgCount() != 0) { pyc_output << (narg == 0 ? "*" : ", *"); for (int i = 0; i < code_src->argCount(); i++) { pyc_output << ", "; pyc_output << code_src->getLocal(narg++)->value(); if ((code_src->kwOnlyArgCount() - i) <= (int)kwdefargs.size()) { pyc_output << " = "; print_src(*da++, mod, pyc_output); } } } pyc_output << ": "; inLambda = true; print_src(code, mod, pyc_output); inLambda = false; pyc_output << ")"; } break; case ASTNode::NODE_STORE: { PycRef src = node.cast()->src(); PycRef dest = node.cast()->dest(); if (src.type() == ASTNode::NODE_FUNCTION) { PycRef code = src.cast()->code(); PycRef code_src = code.cast()->object().cast(); bool isLambda = false; if (strcmp(code_src->name()->value(), "") == 0) { pyc_output << "\n"; start_line(cur_indent, pyc_output); print_src(dest, mod, pyc_output); pyc_output << " = lambda "; isLambda = true; } else { pyc_output << "\n"; start_line(cur_indent, pyc_output); if (code_src->flags() & PycCode::CO_COROUTINE) pyc_output << "async "; pyc_output << "def "; print_src(dest, mod, pyc_output); pyc_output << "("; } ASTFunction::defarg_t defargs = src.cast()->defargs(); ASTFunction::defarg_t kwdefargs = src.cast()->kwdefargs(); /* Python signature order is: positional-or-keyword, *args, keyword-only, **kwargs. Local variables, however, are ordered positional, keyword-only, *args, **kwargs, so index them explicitly rather than sequentially. */ const int argC = code_src->argCount(); const int kwC = code_src->kwOnlyArgCount(); const bool hasVarArgs = (code_src->flags() & PycCode::CO_VARARGS) != 0; const bool hasVarKw = (code_src->flags() & PycCode::CO_VARKEYWORDS) != 0; bool firstParam = true; auto da = defargs.cbegin(); for (int i = 0; i < argC; ++i) { if (!firstParam) pyc_output << ", "; firstParam = false; pyc_output << code_src->getLocal(i)->value(); if ((argC - i) <= (int)defargs.size()) { pyc_output << " = "; print_src(*da++, mod, pyc_output); } } if (hasVarArgs) { if (!firstParam) pyc_output << ", "; firstParam = false; pyc_output << "*" << code_src->getLocal(argC + kwC)->value(); } else if (kwC != 0) { if (!firstParam) pyc_output << ", "; firstParam = false; pyc_output << "*"; } auto kda = kwdefargs.cbegin(); for (int i = 0; i < kwC; ++i) { if (!firstParam) pyc_output << ", "; firstParam = false; pyc_output << code_src->getLocal(argC + i)->value(); if ((kwC - i) <= (int)kwdefargs.size()) { pyc_output << " = "; print_src(*kda++, mod, pyc_output); } } if (hasVarKw) { if (!firstParam) pyc_output << ", "; firstParam = false; pyc_output << "**" << code_src->getLocal(argC + kwC + (hasVarArgs ? 1 : 0))->value(); } if (isLambda) { pyc_output << ": "; } else { pyc_output << "):\n"; printDocstringAndGlobals = true; } bool preLambda = inLambda; inLambda |= isLambda; print_src(code, mod, pyc_output); inLambda = preLambda; } else if (src.type() == ASTNode::NODE_CLASS) { pyc_output << "\n"; start_line(cur_indent, pyc_output); pyc_output << "class "; print_src(dest, mod, pyc_output); PycRef bases = src.cast()->bases().cast(); ASTCall::kwparam_t classKw; { PycRef classCall = src.cast()->code(); if (classCall != NULL && classCall.type() == ASTNode::NODE_CALL) classKw = classCall.cast()->kwparams(); } if (bases->values().size() > 0 || !classKw.empty()) { pyc_output << "("; bool first = true; for (const auto& val : bases->values()) { if (!first) pyc_output << ", "; print_src(val, mod, pyc_output); first = false; } for (const auto& kv : classKw) { if (!first) pyc_output << ", "; if (kv.first.type() == ASTNode::NODE_NAME) { pyc_output << kv.first.cast()->name()->value() << " = "; } else { pyc_output << kv.first.cast()->object() .cast()->value() << " = "; } print_src(kv.second, mod, pyc_output); first = false; } pyc_output << "):\n"; } else { // Don't put parens if there are no base classes pyc_output << ":\n"; } printClassDocstring = true; PycRef code = src.cast()->code().cast() ->func().cast()->code(); print_src(code, mod, pyc_output); } else if (src.type() == ASTNode::NODE_IMPORT) { PycRef import = src.cast(); if (import->fromlist() != NULL) { PycRef fromlist = import->fromlist().cast()->object(); if (fromlist != Pyc_None) { pyc_output << "from "; if (import->name().type() == ASTNode::NODE_IMPORT) print_src(import->name().cast()->name(), mod, pyc_output); else print_src(import->name(), mod, pyc_output); pyc_output << " import "; if (fromlist.type() == PycObject::TYPE_TUPLE || fromlist.type() == PycObject::TYPE_SMALL_TUPLE) { bool first = true; for (const auto& val : fromlist.cast()->values()) { if (!first) pyc_output << ", "; pyc_output << val.cast()->value(); first = false; } } else { pyc_output << fromlist.cast()->value(); } } else { pyc_output << "import "; print_src(import->name(), mod, pyc_output); } } else { pyc_output << "import "; PycRef import_name = import->name(); print_src(import_name, mod, pyc_output); if (!dest.cast()->name()->isEqual(import_name.cast()->name().cast())) { pyc_output << " as "; print_src(dest, mod, pyc_output); } } } else if (src.type() == ASTNode::NODE_BINARY && src.cast()->is_inplace()) { print_src(src, mod, pyc_output); } else { print_src(dest, mod, pyc_output); pyc_output << " = "; print_src(src, mod, pyc_output); } } break; case ASTNode::NODE_CHAINSTORE: { for (auto& dest : node.cast()->nodes()) { print_src(dest, mod, pyc_output); pyc_output << " = "; } print_src(node.cast()->src(), mod, pyc_output); } break; case ASTNode::NODE_SUBSCR: { print_src(node.cast()->name(), mod, pyc_output); pyc_output << "["; print_src(node.cast()->key(), mod, pyc_output); pyc_output << "]"; } break; case ASTNode::NODE_CONVERT: { pyc_output << "`"; print_src(node.cast()->name(), mod, pyc_output); pyc_output << "`"; } break; case ASTNode::NODE_TUPLE: { PycRef tuple = node.cast(); ASTTuple::value_t values = tuple->values(); if (tuple->requireParens()) pyc_output << "("; bool first = true; for (const auto& val : values) { if (!first) pyc_output << ", "; print_src(val, mod, pyc_output); first = false; } if (values.size() == 1) pyc_output << ','; if (tuple->requireParens()) pyc_output << ')'; } break; case ASTNode::NODE_ANNOTATED_VAR: { PycRef annotated_var = node.cast(); PycRef name = annotated_var->name().cast(); PycRef annotation = annotated_var->annotation(); pyc_output << name->object().cast()->value(); pyc_output << ": "; print_src(annotation, mod, pyc_output); } break; case ASTNode::NODE_TERNARY: { /* parenthesis might be needed * * when if-expr is part of numerical expression, ternary has the LOWEST precedence * print(a + b if False else c) * output is c, not a+c (a+b is calculated first) * * but, let's not add parenthesis - to keep the source as close to original as possible in most cases */ PycRef ternary = node.cast(); //pyc_output << "("; print_src(ternary->if_expr(), mod, pyc_output); const auto if_block = ternary->if_block().cast(); pyc_output << " if "; if (if_block->negative()) pyc_output << "not "; print_src(if_block->cond(), mod, pyc_output); pyc_output << " else "; print_src(ternary->else_expr(), mod, pyc_output); //pyc_output << ")"; } break; default: pyc_output << "type() << ">"; fprintf(stderr, "Unsupported Node type: %d\n", node->type()); cleanBuild = false; node_seen.erase((ASTNode *)node); return; } cleanBuild = true; node_seen.erase((ASTNode *)node); } bool print_docstring(PycRef obj, int indent, PycModule* mod, std::ostream& pyc_output) { // docstrings are translated from the bytecode __doc__ = 'string' to simply '''string''' auto doc = obj.try_cast(); if (doc != nullptr) { start_line(indent, pyc_output); doc->print(pyc_output, mod, true); pyc_output << "\n"; return true; } return false; } static std::unordered_set code_seen; /* A `return` is invalid at module scope, yet the implicit "return None" (and, with nested `if`s, copies of it) can end up inside module-level blocks. Strip the trailing bare return from this block and, recursively, from every nested block (descending into all children, not just the last). Only ever called on the code object, whose blocks are module-level control flow (nested function/class bodies are separate code objects, so this never touches a real `return`). */ static void StripModuleTrailingReturn(PycRef node) { if (node == NULL) return; const ASTNodeList::list_t* nodes = NULL; if (node.type() == ASTNode::NODE_NODELIST) nodes = &node.cast()->nodes(); else if (node.type() == ASTNode::NODE_BLOCK) nodes = &node.cast()->nodes(); if (nodes == NULL || nodes->empty()) return; /* Recurse into every nested block first. */ for (const auto& child : *nodes) { if (child.type() == ASTNode::NODE_BLOCK) StripModuleTrailingReturn(child); } /* Then strip this block's own trailing return. Any `return` is invalid at module scope (the only ones that appear are reconstruction artifacts), so drop it regardless of the returned value. */ if (!nodes->empty() && nodes->back().type() == ASTNode::NODE_RETURN && nodes->back().cast()->rettype() == ASTReturn::RETURN) { if (node.type() == ASTNode::NODE_NODELIST) node.cast()->removeLast(); else node.cast()->removeLast(); } } void decompyle(PycRef code, PycModule* mod, std::ostream& pyc_output) { if (code_seen.find((PycCode *)code) != code_seen.end()) { fputs("WARNING: Circular reference detected\n", stderr); return; } code_seen.insert((PycCode *)code); PycRef source = BuildFromCode(code, mod); PycRef clean = source.cast(); if (cleanBuild) { // The Python compiler adds some stuff that we don't really care // about, and would add extra code for re-compilation anyway. // We strip these lines out here, and then add a "pass" statement // if the cleaned up code is empty if (clean->nodes().front().type() == ASTNode::NODE_STORE) { PycRef store = clean->nodes().front().cast(); if (store->src().type() == ASTNode::NODE_NAME && store->dest().type() == ASTNode::NODE_NAME) { PycRef src = store->src().cast(); PycRef dest = store->dest().cast(); if (src->name()->isEqual("__name__") && dest->name()->isEqual("__module__")) { // __module__ = __name__ // Automatically added by Python 2.2.1 and later clean->removeFirst(); } } } if (clean->nodes().front().type() == ASTNode::NODE_STORE) { PycRef store = clean->nodes().front().cast(); if (store->src().type() == ASTNode::NODE_OBJECT && store->dest().type() == ASTNode::NODE_NAME) { PycRef src = store->src().cast(); PycRef srcString = src->object().try_cast(); PycRef dest = store->dest().cast(); if (dest->name()->isEqual("__qualname__")) { // __qualname__ = '' // Automatically added by Python 3.3 and later clean->removeFirst(); } } } // Class and module docstrings may only appear at the beginning of their source if (printClassDocstring && clean->nodes().front().type() == ASTNode::NODE_STORE) { PycRef store = clean->nodes().front().cast(); if (store->dest().type() == ASTNode::NODE_NAME && store->dest().cast()->name()->isEqual("__doc__") && store->src().type() == ASTNode::NODE_OBJECT) { if (print_docstring(store->src().cast()->object(), cur_indent + (code->name()->isEqual("") ? 0 : 1), mod, pyc_output)) clean->removeFirst(); } } if (clean->nodes().back().type() == ASTNode::NODE_RETURN) { PycRef ret = clean->nodes().back().cast(); PycRef retObj = ret->value().try_cast(); if (ret->value() == NULL || ret->value().type() == ASTNode::NODE_LOCALS || (retObj && retObj->object().type() == PycObject::TYPE_NONE)) { clean->removeLast(); // Always an extraneous return statement } } // Python 3.11 class bodies end with compiler-generated // "__classcell__ = __class__" and "return __class__"; strip them. if (clean->nodes().size() && clean->nodes().back().type() == ASTNode::NODE_RETURN) { PycRef ret = clean->nodes().back().cast(); if (ret->value() != NULL && ret->value().type() == ASTNode::NODE_NAME && ret->value().cast()->name()->isEqual("__class__")) { clean->removeLast(); } } if (clean->nodes().size() && clean->nodes().back().type() == ASTNode::NODE_STORE) { PycRef st = clean->nodes().back().cast(); if (st->dest().type() == ASTNode::NODE_NAME && st->dest().cast()->name()->isEqual("__classcell__")) { clean->removeLast(); } } /* Strip the module's implicit trailing "return None" (invalid at module scope), even when nested in a trailing block. */ if (code->name()->isEqual("")) StripModuleTrailingReturn(clean.cast()); } if (printClassDocstring) printClassDocstring = false; // This is outside the clean check so a source block will always // be compilable, even if decompylation failed. if (clean->nodes().size() == 0 && !code.isIdent(mod->code())) clean->append(new ASTKeyword(ASTKeyword::KW_PASS)); bool part1clean = cleanBuild; if (printDocstringAndGlobals) { if (code->consts()->size()) print_docstring(code->getConst(0), cur_indent + 1, mod, pyc_output); PycCode::globals_t globs = code->getGlobals(); if (globs.size()) { start_line(cur_indent + 1, pyc_output); pyc_output << "global "; bool first = true; for (const auto& glob : globs) { if (!first) pyc_output << ", "; pyc_output << glob->value(); first = false; } pyc_output << "\n"; } printDocstringAndGlobals = false; } print_src(source, mod, pyc_output); if (!cleanBuild || !part1clean) { start_line(cur_indent, pyc_output); pyc_output << "# WARNING: Decompyle incomplete\n"; } code_seen.erase((PycCode *)code); }