#include #include #include #include #include #include #include "ASTree.h" #include "FastStack.h" #include "pyc_numeric.h" #include "bytecode.h" // This must be a triple quote (''' or """), to handle interpolated string literals containing the opposite quote style. // E.g. f'''{"interpolated "123' literal"}''' -> valid. // E.g. f"""{"interpolated "123' literal"}""" -> valid. // E.g. f'{"interpolated "123' literal"}' -> invalid, unescaped quotes in literal. // E.g. f'{"interpolated \"123\' literal"}' -> invalid, f-string expression does not allow backslash. // NOTE: Nested f-strings not supported. #define F_STRING_QUOTE "'''" static void append_to_chain_store(const PycRef& chainStore, PycRef item, FastStack& stack, const PycRef& curblock); /* Use this to determine if an error occurred (and therefore, if we should * avoid cleaning the output tree) */ static bool cleanBuild; /* Use this to prevent printing return keywords and newlines in lambdas. */ static bool inLambda = false; /* Use this to keep track of whether we need to print out any docstring and * the list of global variables that we are using (such as inside a function). */ static bool printDocstringAndGlobals = false; /* Use this to keep track of whether we need to print a class or module docstring */ static bool printClassDocstring = true; // shortcut for all top/pop calls static PycRef StackPopTop(FastStack& stack) { const auto node(stack.top()); stack.pop(); return node; } /* compiler generates very, VERY similar byte code for if/else statement block and if-expression * statement * if a: b = 1 * else: b = 2 * expression: * b = 1 if a else 2 * (see for instance https://stackoverflow.com/a/52202007) * here, try to guess if just finished else statement is part of if-expression (ternary operator) * if it is, remove statements from the block and put a ternary node on top of stack */ static void CheckIfExpr(FastStack& stack, PycRef curblock) { if (stack.empty()) return; if (curblock->nodes().size() < 2) return; auto rit = curblock->nodes().crbegin(); // the last is "else" block, the one before should be "if" (could be "for", ...) if ((*rit)->type() != ASTNode::NODE_BLOCK || (*rit).cast()->blktype() != ASTBlock::BLK_ELSE) return; ++rit; if ((*rit)->type() != ASTNode::NODE_BLOCK || (*rit).cast()->blktype() != ASTBlock::BLK_IF) return; auto else_expr = StackPopTop(stack); curblock->removeLast(); auto if_block = curblock->nodes().back(); auto if_expr = StackPopTop(stack); curblock->removeLast(); stack.push(new ASTTernary(std::move(if_block), std::move(if_expr), std::move(else_expr))); } PycRef BuildFromCode(PycRef code, PycModule* mod) { PycBuffer source(code->code()->value(), code->code()->length()); FastStack stack((mod->majorVer() == 1) ? 20 : code->stackSize()); stackhist_t stack_hist; std::stack > blocks; PycRef defblock = new ASTBlock(ASTBlock::BLK_MAIN); defblock->init(); PycRef curblock = defblock; blocks.push(defblock); int opcode, operand; int curpos = 0; int pos = 0; int unpack = 0; bool else_pop = false; bool need_try = false; bool variable_annotations = false; std::vector exception_entries; size_t next_exception_entry = 0; if (mod->verCompare(3, 11) >= 0) { exception_entries = code->exceptionTableEntries(); } /* Pre-scan for 3.11 `match`/`case` (class patterns only — the only kind in the corpus). Each case compiles to [COPY 1] # preserve a subject copy for the next case ; LOAD_CONST ; MATCH_CLASS COPY 1; POP_JUMP_FORWARD_IF_NONE # pattern-fail -> next case UNPACK_SEQUENCE ; STORE... (or POP_TOP when n==0) # captures ; [JUMP_FORWARD ] POP_TOP # discard the None result, then the next case We register only the SIMPLE handleable shape: positional captures only (empty kwnames), no guard (no jump to failTarget inside the body). Anything else is left unregistered -> MATCH_CLASS bails -> honest `.dis`. matchCase[MATCH_CLASS off] = {isFirst, matchEnd, failTarget, bodyStart, caps}; matchCaseEnd = failTarget offsets (close BLK_CASE + skip the machinery POP_TOP); matchBlockEnd = matchEnd offsets (close BLK_MATCH). */ struct MCase { bool isFirst; int matchEnd; int failTarget; int bodyStart; int popExtra; std::vector> caps; }; std::unordered_map matchCase; std::unordered_map matchCaseEnd; // failTarget -> offset after the machinery POP_TOP std::unordered_set matchBlockEnd; /* SHIP-149: VALUE-pattern `match`/`case` (literal patterns — `match x: case 'a': …`). Unlike class patterns (MATCH_CLASS) these compile to a COMPARE_OP chain: per case `[COPY 1] LOAD ; COMPARE_OP ==; POP_JUMP_FORWARD_IF_FALSE ft`, the subject threaded on the stack via the COPY (the LAST case omits the COPY, its COMPARE consuming the subject); the matched path POP_TOPs the leftover subject then runs the body. matchValue[COMPARE off] = the case. */ struct VCase { bool isFirst; bool isLast; int matchEnd; int failTarget; int bodyStart; }; std::unordered_map matchValue; /* A wildcard `case _:` carries NO MATCH_CLASS (it always matches; the bytecode just POP_TOPs the leftover subject then runs the body), so the MATCH_CLASS pre-scan can't see it and would end the match at the last TYPED case — leaving the wildcard body to render as spurious post-match code (dropping any fall-through/return after the match: RETDROP, e.g. ripple `factory.__call__`). Map the wildcard body start -> the real match merge so the loop opens a `case _:` block there and the match closes at the true merge instead. */ std::unordered_map matchWildcardOpen; // wildcardBodyStart -> matchEnd(merge) if (mod->verCompare(3, 11) >= 0) { struct Ins { int op; int arg; int off; int next; }; std::vector v; std::unordered_map idxOf; // offset -> index in v { PycBuffer scan(code->code()->value(), code->code()->length()); int so, sa, sp = 0; while (!scan.atEof()) { int io = sp; bc_next(scan, mod, so, sa, sp); if (so == Pyc::CACHE) continue; idxOf[io] = (int)v.size(); v.push_back({ so, sa, io, sp }); } } auto capName = [&](const Ins& s) -> PycRef { if (s.op == Pyc::STORE_FAST_A) return new ASTName(code->getLocal(s.arg)); if (s.op == Pyc::STORE_NAME_A || s.op == Pyc::STORE_GLOBAL_A) return new ASTName(code->getName(s.arg)); if (s.op == Pyc::STORE_DEREF_A) return new ASTName(code->getCellVar(mod, s.arg)); return nullptr; }; /* Parse one case starting at the MATCH_CLASS at v[mi]; fill failTarget, bodyStart, caps. Returns true iff the simple shape holds. */ auto parseCase = [&](size_t mi, int& failTarget, int& bodyStart, int& popExtra, std::vector>& caps) -> bool { int nPos = v[mi].arg; /* require an empty kwnames tuple just before (positional-only) */ if (mi == 0) return false; const Ins& kw = v[mi-1]; if (kw.op != Pyc::LOAD_CONST_A) return false; PycRef kwo = code->getConst(kw.arg); if (kwo == nullptr || (kwo->type() != PycObject::TYPE_TUPLE && kwo->type() != PycObject::TYPE_SMALL_TUPLE)) return false; if (kwo.cast()->values().size() != 0) return false; // kw patterns -> bail /* next: COPY 1 ; POP_JUMP_FORWARD_IF_NONE failTarget */ if (mi+2 >= v.size()) return false; if (!(v[mi+1].op == Pyc::COPY_A && v[mi+1].arg == 1)) return false; if (v[mi+2].op != Pyc::POP_JUMP_FORWARD_IF_NONE_A) return false; failTarget = v[mi+2].next + v[mi+2].arg * (int)sizeof(uint16_t); size_t j = mi+3; caps.clear(); /* matched-path captures: UNPACK_SEQUENCE then nPos STOREs (the class pattern always emits UNPACK_SEQUENCE, even nPos==0). */ if (j >= v.size() || v[j].op != Pyc::UNPACK_SEQUENCE_A || v[j].arg != nPos) return false; j++; for (int k = 0; k < nPos; ++k, ++j) { if (j >= v.size()) return false; /* a `_` wildcard sub-pattern (`case Ok(_):`) discards its captured value with POP_TOP instead of a STORE — render it as `_`. */ if (v[j].op == Pyc::POP_TOP) { PycRef us = new PycString(); us->setValue("_"); caps.push_back(new ASTName(us)); continue; } PycRef nm = capName(v[j]); if (nm == nullptr) return false; // non-simple capture target caps.push_back(nm); } /* optional leftover-subject POP_TOP(s): a COPY preserved the subject for a later case; on THIS (matched) path the copy is discarded here. (A case body never starts with POP_TOP, so this is unambiguous.) */ popExtra = 0; while (j < v.size() && v[j].op == Pyc::POP_TOP) { popExtra++; j++; } if (j >= v.size()) return false; bodyStart = v[j].off; // next REAL instruction (v is CACHE-filtered) /* guard detection: a jump to failTarget inside [bodyStart, failTarget) means a `case P if g:` guard (or other case-internal branch we don't model) -> bail. */ for (size_t k = j; k < v.size() && v[k].off < failTarget; ++k) { int op = v[k].op; bool isJump = op == Pyc::POP_JUMP_FORWARD_IF_TRUE_A || op == Pyc::POP_JUMP_FORWARD_IF_FALSE_A || op == Pyc::POP_JUMP_FORWARD_IF_NONE_A || op == Pyc::POP_JUMP_FORWARD_IF_NOT_NONE_A || op == Pyc::JUMP_FORWARD_A; if (isJump) { int tgt = v[k].next + v[k].arg * (int)sizeof(uint16_t); if (tgt == failTarget) return false; // guard } } return true; }; /* collect MATCH_CLASS anchors, parse each */ struct CaseRec { size_t mi; int failTarget; int bodyStart; int popExtra; std::vector> caps; }; std::vector recs; for (size_t i = 0; i < v.size(); ++i) { if (v[i].op != Pyc::MATCH_CLASS_A) continue; int ft, bs, pe; std::vector> caps; if (parseCase(i, ft, bs, pe, caps)) recs.push_back({ i, ft, bs, pe, caps }); } /* chain cases into matches: case i's successor is the case whose MATCH_CLASS region begins at the POP_TOP at failTarget (the failTarget op is a POP_TOP; the next case follows it). */ std::unordered_map caseByPopTop; // failTarget(=POP_TOP off) -> rec idx of the case there /* a rec's case begins with an optional COPY then the class load; the POP_TOP that precedes it is at failTarget of the previous case. The POP_TOP sits at some offset; the NEXT case's MATCH_CLASS is the first MATCH_CLASS anchor at a higher offset. Map each rec's "entry POP_TOP". */ /* Determine, for each rec, whether some OTHER rec's failTarget POP_TOP is immediately followed (skipping POP_TOP/COPY/loads) by this rec's MATCH_CLASS. */ auto firstMatchClassAtOrAfter = [&](int off) -> int { for (size_t k = 0; k < v.size(); ++k) if (v[k].off >= off && v[k].op == Pyc::MATCH_CLASS_A) return (int)k; return -1; }; std::unordered_set isSuccessor; std::unordered_map succOf; // rec idx -> rec idx std::unordered_map recByMi; for (size_t r = 0; r < recs.size(); ++r) recByMi[recs[r].mi] = r; for (size_t r = 0; r < recs.size(); ++r) { int ft = recs[r].failTarget; if (idxOf.count(ft) && v[idxOf[ft]].op == Pyc::POP_TOP) { int nm = firstMatchClassAtOrAfter(v[idxOf[ft]].next); if (nm >= 0 && recByMi.count((size_t)nm)) { /* ensure nothing but case-setup (COPY/loads) lies between */ size_t s = recByMi[(size_t)nm]; succOf[r] = s; isSuccessor.insert(s); } } } for (size_t r = 0; r < recs.size(); ++r) { if (isSuccessor.count(r)) continue; // not a first case /* walk the chain to find the last case + matchEnd */ size_t last = r; while (succOf.count(last)) last = succOf[last]; int lastFt = recs[last].failTarget; // POP_TOP of the final no-match int matchEnd = (idxOf.count(lastFt) && v[idxOf[lastFt]].op == Pyc::POP_TOP) ? v[idxOf[lastFt]].next : lastFt; /* Wildcard `case _:` detection. The TYPED cases' matched bodies all JUMP_FORWARD to the post-match merge M. If M lies PAST `matchEnd` (the last typed case's no-match POP_TOP+1), the region [matchEnd, M) is the wildcard case body (the final no-match falls through to it). Require a single consistent merge target and NO MATCH_CLASS in the gap (an unparsed typed case would also have a MATCH_CLASS there -> leave the match honest rather than mis-label it a wildcard). */ { int firstOff = v[recs[r].mi].off; int merge = -1; bool consistent = true; for (size_t k = 0; k < v.size(); ++k) { if (v[k].off < firstOff || v[k].off > lastFt) continue; if (v[k].op != Pyc::JUMP_FORWARD_A) continue; int t = v[k].next + v[k].arg * (int)sizeof(uint16_t); if (t > matchEnd) { if (merge < 0) merge = t; else if (merge != t) { consistent = false; break; } } } /* The gap must be a TRUE wildcard 'case _:' (subject already discarded by the final no-match POP_TOP), not a capture pattern ('case other:' -> STORE_x binds the subject) nor any further pattern test (MATCH_x or POP_JUMP_FORWARD_IF_NONE). Either would make the gap mis-render as 'case _:' with a wrong/garbage binding, and the STORE divergence is CF-gate-blind. Bail to honest .dis on anything but a plain unconditional body. */ bool gapUnsafe = false; if (consistent && merge > matchEnd) { for (size_t k = 0; k < v.size(); ++k) { if (v[k].off < matchEnd || v[k].off >= merge) continue; int op = v[k].op; if (op == Pyc::MATCH_CLASS_A || op == Pyc::MATCH_SEQUENCE || op == Pyc::MATCH_MAPPING || op == Pyc::MATCH_KEYS || op == Pyc::POP_JUMP_FORWARD_IF_NONE_A) { gapUnsafe = true; break; } } /* first real op of the gap (skip NOP): a leading STORE_* binds the subject => capture pattern, not `_`. */ for (size_t k = 0; k < v.size(); ++k) { if (v[k].off < matchEnd) continue; if (v[k].op == Pyc::NOP) continue; if (v[k].op == Pyc::STORE_FAST_A || v[k].op == Pyc::STORE_NAME_A || v[k].op == Pyc::STORE_GLOBAL_A || v[k].op == Pyc::STORE_DEREF_A) gapUnsafe = true; break; } /* The wildcard body must be TERMINAL (ends in raise/return). A FALL-THROUGH wildcard reaches `merge` by falling off its end, so `merge` is shared with the typed cases AND any enclosing if/else whose branches converge there — extending the match to it then mis-places the post-merge code (e.g. a method-level `return` rendered inside the `else`: node_connect_protocol `get_connect_stages`, a gate-blind mis-render). A terminal wildcard exits before `merge`, so the merge is unambiguously the match's own. */ int lastOp = -1; for (size_t k = 0; k < v.size(); ++k) if (v[k].off >= matchEnd && v[k].off < merge) lastOp = v[k].op; if (!(lastOp == Pyc::RAISE_VARARGS_A || lastOp == Pyc::RETURN_VALUE || lastOp == Pyc::RETURN_CONST_A || lastOp == Pyc::RERAISE || lastOp == Pyc::RERAISE_A)) gapUnsafe = true; } if (consistent && merge > matchEnd && !gapUnsafe) { matchWildcardOpen[matchEnd] = merge; // open `case _:` at the gap start matchCaseEnd[merge] = merge; // close the wildcard case at the merge matchEnd = merge; // the match itself ends at the merge } } /* register every case in the chain */ for (size_t c = r; ; c = succOf[c]) { MCase mc; mc.isFirst = (c == r); mc.matchEnd = matchEnd; mc.failTarget = recs[c].failTarget; mc.bodyStart = recs[c].bodyStart; mc.popExtra = recs[c].popExtra; mc.caps = recs[c].caps; matchCase[v[recs[c].mi].off] = mc; int ft = recs[c].failTarget; matchCaseEnd[ft] = (idxOf.count(ft) && v[idxOf[ft]].op == Pyc::POP_TOP) ? v[idxOf[ft]].next : ft; if (!succOf.count(c)) break; } matchBlockEnd.insert(matchEnd); } } /* SHIP-149 value-pattern match pre-scan (see VCase above). */ if (mod->verCompare(3, 11) >= 0 && mod->verCompare(3, 12) < 0) { struct Ins { int op; int arg; int off; int next; }; std::vector v; std::unordered_map idxOf; { PycBuffer scan(code->code()->value(), code->code()->length()); int so, sa, sp = 0; while (!scan.atEof()) { int io = sp; bc_next(scan, mod, so, sa, sp); if (so == Pyc::CACHE) continue; idxOf[io] = (int)v.size(); v.push_back({ so, sa, io, sp }); } } /* a case TEST: `[COPY 1] ; COMPARE_OP ==(arg 2); PJF ft`, where the pattern (the COMPARE's right operand) is EITHER a single LOAD_CONST (literal `case 'a':`) OR a dotted name — LOAD_{GLOBAL,NAME,DEREF,FAST} base + >=1 LOAD_ATTR (Enum/attribute `case State.A:`). The COPY-1-before / POP_TOP-after subject threading is identical for both, so anchor on the COMPARE==/PJF and walk backward for the pattern. The pattern NODE is already built on the stack by the load ops, so the runtime COMPARE handler is unchanged. */ struct Test { int startOff; bool hasCopy; int compareOff; int ft; int matchedNext; }; std::vector tests; std::unordered_map testByStart; for (size_t c = 1; c + 1 < v.size(); ++c) { if (!(v[c].op == Pyc::COMPARE_OP_A && v[c].arg == 2 && v[c+1].op == Pyc::POP_JUMP_FORWARD_IF_FALSE_A)) continue; int j = (int)c - 1, attrs = 0; while (j >= 0 && v[j].op == Pyc::LOAD_ATTR_A) { attrs++; --j; } int patStart = -1; if (attrs == 0 && j >= 0 && v[j].op == Pyc::LOAD_CONST_A) patStart = j; /* literal pattern */ else if (attrs >= 1 && j >= 0 && (v[j].op == Pyc::LOAD_GLOBAL_A || v[j].op == Pyc::LOAD_NAME_A || v[j].op == Pyc::LOAD_DEREF_A || v[j].op == Pyc::LOAD_FAST_A)) patStart = j; /* dotted Enum/attribute pattern */ if (patStart < 0) continue; bool hasCopy = (patStart >= 1 && v[patStart-1].op == Pyc::COPY_A && v[patStart-1].arg == 1); int startOff = hasCopy ? v[patStart-1].off : v[patStart].off; int ft = v[c+1].next + v[c+1].arg * (int)sizeof(uint16_t); testByStart[startOff] = (int)tests.size(); tests.push_back({ startOff, hasCopy, v[c].off, ft, v[c+1].next }); } std::unordered_set targeted; for (const auto& t : tests) if (testByStart.count(t.ft)) targeted.insert(t.ft); for (size_t t = 0; t < tests.size(); ++t) { if (!tests[t].hasCopy || targeted.count(tests[t].startOff)) continue; /* not a chain start */ std::vector chain; std::unordered_set seen; int cur = (int)t; bool ok = true; while (true) { if (seen.count(cur)) { ok = false; break; } seen.insert(cur); chain.push_back(cur); int ft = tests[cur].ft; if (!testByStart.count(ft)) break; /* ft = matchEnd, cur was last? */ int nx = testByStart[ft]; if (tests[nx].hasCopy) { cur = nx; continue; } chain.push_back(nx); break; /* no-copy = LAST case */ } if (!ok || chain.size() < 2) continue; int lastT = chain.back(); if (tests[lastT].hasCopy) continue; /* must end on a no-copy last case */ int lastFt = tests[lastT].ft; auto caseBodyStart = [&](const Test& tc) -> int { if (tc.hasCopy) { int mn = tc.matchedNext; /* POP_TOP discarding subject */ return (idxOf.count(mn) && v[idxOf[mn]].op == Pyc::POP_TOP) ? v[idxOf[mn]].next : mn; } return tc.matchedNext; }; /* A non-returning case body JUMP_FORWARDs to the TRUE match end, which lies PAST the last typed case's ft when a wildcard `case _:` body sits between (emote_svc: `case 'destroy':` ft -> the `case _:` body, the 'move' case jumps to the post-wildcard merge). Find a single consistent forward-jump target > lastFt among the case bodies => the wildcard span [lastFt, conv) + matchEnd=conv. */ int conv = -1; bool convOk = true; for (size_t k = 0; k < chain.size(); ++k) { const Test& tc = tests[chain[k]]; int bs = caseBodyStart(tc); if (!idxOf.count(bs)) continue; for (int ii = idxOf[bs]; ii < (int)v.size() && v[ii].off < tc.ft; ++ii) { if (v[ii].op == Pyc::JUMP_FORWARD_A) { int t = v[ii].next + v[ii].arg * (int)sizeof(uint16_t); if (t > lastFt) { if (conv < 0) conv = t; else if (conv != t) convOk = false; } } } } int matchEnd = lastFt; int wildcardStart = -1; if (convOk && conv > lastFt) { bool clean = true; /* [lastFt, conv) must be a plain wildcard body */ for (const auto& tt : tests) if (tt.startOff >= lastFt && tt.startOff < conv) { clean = false; break; } /* The convergence must NOT cross an enclosing exception handler: when the match sits inside a `try/except`, a non-returning case jumps to the enclosing try's exit PAST the except handler, so `conv` overruns the real match end and a wildcard span [lastFt, conv) would absorb the enclosing `except` (http_deprecated). Bail the whole value-match there — it renders faithfully as if/elif instead. */ bool crossesExc = false; for (int ii = 0; ii < (int)v.size(); ++ii) if (v[ii].off >= lastFt && v[ii].off < conv && v[ii].op == Pyc::PUSH_EXC_INFO) { crossesExc = true; break; } if (crossesExc) continue; /* skip registering this chain */ if (clean) { matchEnd = conv; wildcardStart = lastFt; } } /* Bail when the match is the LAST statement of a loop with PER-CASE back-edges: each case body's fall-through `continue`s directly to the loop top (a JUMP_BACKWARD inside the match span). CPython collapsed the match-merge into the loop-continue → one back-edge PER CASE; a structured `match` recompiles with a shared merge (one back-edge + per-case JUMP_FORWARDs) → loop-opcode count diverges. The faithful render is gate-dirty by construction, so leave it to the if/elif fallback (which recompiles to the per-case back-edge layout). JUMP_BACKWARD_NO_INTERRUPT (await SEND loops) is NOT a loop continue. */ { int spanStart = tests[chain[0]].startOff; bool loopBack = false; for (const auto& iv : v) if (iv.off >= spanStart && iv.off < matchEnd && iv.op == Pyc::JUMP_BACKWARD_A) { loopBack = true; break; } if (loopBack) continue; } for (size_t k = 0; k < chain.size(); ++k) { const Test& tc = tests[chain[k]]; VCase vc; vc.isFirst = (k == 0); vc.isLast = (k + 1 == chain.size()); vc.matchEnd = matchEnd; vc.failTarget = tc.ft; vc.bodyStart = caseBodyStart(tc); matchValue[tc.compareOff] = vc; matchCaseEnd[tc.ft] = tc.ft; /* close case at ft; no machinery to skip */ } if (wildcardStart >= 0) { matchWildcardOpen[wildcardStart] = matchEnd; /* open `case _:` here */ matchCaseEnd[matchEnd] = matchEnd; /* close the wildcard at the merge */ } matchBlockEnd.insert(matchEnd); } } while (!source.atEof()) { #if defined(BLOCK_DEBUG) || defined(STACK_DEBUG) fprintf(stderr, "%-7d", pos); #ifdef STACK_DEBUG fprintf(stderr, "%-5d", (unsigned int)stack_hist.size() + 1); #endif #ifdef BLOCK_DEBUG for (unsigned int i = 0; i < blocks.size(); i++) fprintf(stderr, " "); fprintf(stderr, "%s (%d)", curblock->type_str(), curblock->end()); #endif fprintf(stderr, "\n"); #endif while (next_exception_entry < exception_entries.size() && exception_entries[next_exception_entry].start_offset < pos) { next_exception_entry++; } if (next_exception_entry < exception_entries.size()) { const auto& entry = exception_entries[next_exception_entry]; if (entry.start_offset == pos && entry.stack_depth == 0 && !entry.push_lasti) { if (curblock->blktype() == ASTBlock::BLK_CONTAINER) { curblock.cast()->setExcept(entry.target); } else { PycRef next = new ASTContainerBlock(0, entry.target); blocks.push(next.cast()); curblock = blocks.top(); } stack_hist.push(stack); PycRef tryblock = new ASTBlock(ASTBlock::BLK_TRY, entry.target, true); blocks.push(tryblock.cast()); curblock = blocks.top(); next_exception_entry++; } } if (curblock->blktype() == ASTBlock::BLK_TRY && curblock->end() == pos && blocks.size() > 1) { PycRef prev = curblock; blocks.pop(); curblock = blocks.top(); if (curblock->blktype() == ASTBlock::BLK_CONTAINER && curblock.cast()->hasExcept()) { if (!stack_hist.empty()) { stack = stack_hist.top(); stack_hist.pop(); } curblock->append(prev.cast()); stack_hist.push(stack); PycRef except = new ASTCondBlock(ASTBlock::BLK_EXCEPT, 0, NULL, false); except->init(); blocks.push(except); curblock = blocks.top(); } else { blocks.push(prev); curblock = prev; } } if (curblock->blktype() == ASTBlock::BLK_EXCEPT && curblock->end() == pos && blocks.size() > 1) { PycRef prev = curblock; blocks.pop(); curblock = blocks.top(); if (!stack_hist.empty()) { stack = stack_hist.top(); stack_hist.pop(); } if (prev->size() != 0) { curblock->append(prev.cast()); } if (curblock->blktype() == ASTBlock::BLK_CONTAINER && !curblock.cast()->hasFinally()) { PycRef cont = curblock; blocks.pop(); curblock = blocks.top(); curblock->append(cont.cast()); } } /* Close the whole `match` at its end (post-match code starts here). */ if (curblock->blktype() == ASTBlock::BLK_MATCH && matchBlockEnd.count(pos)) { PycRef ms = curblock; blocks.pop(); curblock = blocks.top(); curblock->append(ms.cast()); } /* A `match` case body whose LAST statement is an else-less `if X: Y` exits to the match merge via a JUMP_FORWARD that pycdc misreads as an `else:` (a BLK_ELSE spanning to matchEnd, PAST the case's fail-target). At the case's fail-target (a matchCaseEnd point) curblock is that spurious BLK_ELSE/BLK_IF, not the BLK_CASE, so the close below never fires and the NEXT case opens nested inside the else (events_excavating2: `case RIGHT: if…: … else: case UP:…` — invalid). Drain the run of inner BLK_IF/BLK_ELSE/BLK_ELIF sitting on the BLK_CASE into it here so the case can close as a sibling. Non-mutating peek confirms the shape first. */ if (matchCaseEnd.count(pos) && blocks.size() > 1 && (curblock->blktype() == ASTBlock::BLK_IF || curblock->blktype() == ASTBlock::BLK_ELSE || curblock->blktype() == ASTBlock::BLK_ELIF)) { std::stack > peek = blocks; int nif = 0; bool shape = false; while (peek.size() > 1 && (peek.top()->blktype() == ASTBlock::BLK_IF || peek.top()->blktype() == ASTBlock::BLK_ELSE || peek.top()->blktype() == ASTBlock::BLK_ELIF)) { peek.pop(); nif++; if (peek.top()->blktype() == ASTBlock::BLK_CASE) { shape = true; break; } } if (shape) { for (int k = 0; k < nif; ++k) { PycRef inner = curblock; blocks.pop(); if (!stack_hist.empty()) stack_hist.pop(); curblock = blocks.top(); curblock->append(inner.cast()); } } } /* Close a `match` case at its pattern-fail target (the next-case POP_TOP). The body just ran; close the BLK_CASE into the BLK_MATCH and skip the machinery POP_TOP that discards the None match-result. */ if (curblock->blktype() == ASTBlock::BLK_CASE && matchCaseEnd.count(pos)) { PycRef cs = curblock; blocks.pop(); curblock = blocks.top(); curblock->append(cs.cast()); int after = matchCaseEnd[pos]; source.setPos(after); pos = after; while (next_exception_entry < exception_entries.size() && exception_entries[next_exception_entry].start_offset < pos) next_exception_entry++; continue; } /* Open a wildcard `case _:` block (the last typed case just closed and set pos to the wildcard body start). The body reconstructs normally; the BLK_CASE closes at the merge (matchCaseEnd[merge]) and the BLK_MATCH right after it. */ if (curblock->blktype() == ASTBlock::BLK_MATCH && matchWildcardOpen.count(pos)) { PycRef us = new PycString(); us->setValue("_"); blocks.push(new ASTCaseBlock(matchWildcardOpen[pos], new ASTName(us))); curblock = blocks.top(); curblock->init(); } curpos = pos; bc_next(source, mod, opcode, operand, pos); if (need_try && opcode != Pyc::SETUP_EXCEPT_A) { need_try = false; /* Store the current stack for the except/finally statement(s) */ stack_hist.push(stack); PycRef tryblock = new ASTBlock(ASTBlock::BLK_TRY, curblock->end(), true); blocks.push(tryblock); curblock = blocks.top(); } else if (else_pop && opcode != Pyc::JUMP_FORWARD_A && opcode != Pyc::JUMP_IF_FALSE_A && opcode != Pyc::JUMP_IF_FALSE_OR_POP_A && opcode != Pyc::POP_JUMP_IF_FALSE_A && opcode != Pyc::POP_JUMP_FORWARD_IF_FALSE_A && opcode != Pyc::JUMP_IF_TRUE_A && opcode != Pyc::JUMP_IF_TRUE_OR_POP_A && opcode != Pyc::POP_JUMP_IF_TRUE_A && opcode != Pyc::POP_JUMP_FORWARD_IF_TRUE_A && opcode != Pyc::POP_BLOCK) { else_pop = false; PycRef prev = curblock; while (prev->end() < pos && prev->blktype() != ASTBlock::BLK_MAIN) { if (prev->blktype() != ASTBlock::BLK_CONTAINER) { if (prev->end() == 0) { break; } /* We want to keep the stack the same, but we need to pop * a level off the history. */ //stack = stack_hist.top(); if (!stack_hist.empty()) stack_hist.pop(); } blocks.pop(); if (blocks.empty()) break; curblock = blocks.top(); curblock->append(prev.cast()); prev = curblock; CheckIfExpr(stack, curblock); } } switch (opcode) { case Pyc::BINARY_OP_A: { ASTBinary::BinOp op = ASTBinary::from_binary_op(operand); if (op == ASTBinary::BIN_INVALID) fprintf(stderr, "Unsupported `BINARY_OP` operand value: %d\n", operand); PycRef right = stack.top(); stack.pop(); PycRef left = stack.top(); stack.pop(); stack.push(new ASTBinary(left, right, op)); } break; case Pyc::BINARY_ADD: case Pyc::BINARY_AND: case Pyc::BINARY_DIVIDE: case Pyc::BINARY_FLOOR_DIVIDE: case Pyc::BINARY_LSHIFT: case Pyc::BINARY_MODULO: case Pyc::BINARY_MULTIPLY: case Pyc::BINARY_OR: case Pyc::BINARY_POWER: case Pyc::BINARY_RSHIFT: case Pyc::BINARY_SUBTRACT: case Pyc::BINARY_TRUE_DIVIDE: case Pyc::BINARY_XOR: case Pyc::BINARY_MATRIX_MULTIPLY: case Pyc::INPLACE_ADD: case Pyc::INPLACE_AND: case Pyc::INPLACE_DIVIDE: case Pyc::INPLACE_FLOOR_DIVIDE: case Pyc::INPLACE_LSHIFT: case Pyc::INPLACE_MODULO: case Pyc::INPLACE_MULTIPLY: case Pyc::INPLACE_OR: case Pyc::INPLACE_POWER: case Pyc::INPLACE_RSHIFT: case Pyc::INPLACE_SUBTRACT: case Pyc::INPLACE_TRUE_DIVIDE: case Pyc::INPLACE_XOR: case Pyc::INPLACE_MATRIX_MULTIPLY: { ASTBinary::BinOp op = ASTBinary::from_opcode(opcode); if (op == ASTBinary::BIN_INVALID) throw std::runtime_error("Unhandled opcode from ASTBinary::from_opcode"); PycRef right = stack.top(); stack.pop(); PycRef left = stack.top(); stack.pop(); stack.push(new ASTBinary(left, right, op)); } break; case Pyc::BINARY_SUBSCR: { PycRef subscr = stack.top(); stack.pop(); PycRef src = stack.top(); stack.pop(); stack.push(new ASTSubscr(src, subscr)); } break; case Pyc::BREAK_LOOP: curblock->append(new ASTKeyword(ASTKeyword::KW_BREAK)); break; case Pyc::BUILD_CLASS: { PycRef class_code = stack.top(); stack.pop(); PycRef bases = stack.top(); stack.pop(); PycRef name = stack.top(); stack.pop(); stack.push(new ASTClass(class_code, bases, name)); } break; case Pyc::BUILD_FUNCTION: { PycRef fun_code = stack.top(); stack.pop(); stack.push(new ASTFunction(fun_code, {}, {})); } break; case Pyc::BUILD_LIST_A: { ASTList::value_t values; for (int i=0; iverCompare(3, 5) >= 0) { auto map = new ASTMap; for (int i=0; i value = stack.top(); stack.pop(); PycRef key = stack.top(); stack.pop(); map->add(key, value); } stack.push(map); } else { if (stack.top().type() == ASTNode::NODE_CHAINSTORE) { stack.pop(); } stack.push(new ASTMap()); } break; case Pyc::BUILD_CONST_KEY_MAP_A: // Top of stack will be a tuple of keys. // Values will start at TOS - 1. { PycRef keys = stack.top(); stack.pop(); ASTConstMap::values_t values; values.reserve(operand); for (int i = 0; i < operand; ++i) { PycRef value = stack.top(); stack.pop(); values.push_back(value); } stack.push(new ASTConstMap(keys, values)); } break; case Pyc::STORE_MAP: { PycRef key = stack.top(); stack.pop(); PycRef value = stack.top(); stack.pop(); PycRef map = stack.top().cast(); map->add(key, value); } break; case Pyc::BUILD_SLICE_A: { if (operand == 2) { PycRef end = stack.top(); stack.pop(); PycRef start = stack.top(); stack.pop(); if (start.type() == ASTNode::NODE_OBJECT && start.cast()->object() == Pyc_None) { start = NULL; } if (end.type() == ASTNode::NODE_OBJECT && end.cast()->object() == Pyc_None) { end = NULL; } if (start == NULL && end == NULL) { stack.push(new ASTSlice(ASTSlice::SLICE0)); } else if (start == NULL) { stack.push(new ASTSlice(ASTSlice::SLICE2, start, end)); } else if (end == NULL) { stack.push(new ASTSlice(ASTSlice::SLICE1, start, end)); } else { stack.push(new ASTSlice(ASTSlice::SLICE3, start, end)); } } else if (operand == 3) { PycRef step = stack.top(); stack.pop(); PycRef end = stack.top(); stack.pop(); PycRef start = stack.top(); stack.pop(); if (start.type() == ASTNode::NODE_OBJECT && start.cast()->object() == Pyc_None) { start = NULL; } if (end.type() == ASTNode::NODE_OBJECT && end.cast()->object() == Pyc_None) { end = NULL; } if (step.type() == ASTNode::NODE_OBJECT && step.cast()->object() == Pyc_None) { step = NULL; } /* We have to do this as a slice where one side is another slice */ /* [[a:b]:c] */ if (start == NULL && end == NULL) { stack.push(new ASTSlice(ASTSlice::SLICE0)); } else if (start == NULL) { stack.push(new ASTSlice(ASTSlice::SLICE2, start, end)); } else if (end == NULL) { stack.push(new ASTSlice(ASTSlice::SLICE1, start, end)); } else { stack.push(new ASTSlice(ASTSlice::SLICE3, start, end)); } PycRef lhs = stack.top(); stack.pop(); if (step == NULL) { stack.push(new ASTSlice(ASTSlice::SLICE1, lhs, step)); } else { stack.push(new ASTSlice(ASTSlice::SLICE3, lhs, step)); } } } break; case Pyc::BUILD_STRING_A: { // Nearly identical logic to BUILD_LIST ASTList::value_t values; for (int i = 0; i < operand; i++) { values.push_front(stack.top()); stack.pop(); } stack.push(new ASTJoinedStr(values)); } break; case Pyc::BUILD_TUPLE_A: { // if class is a closure code, ignore this tuple PycRef tos = stack.top(); if (tos && tos->type() == ASTNode::NODE_LOADBUILDCLASS) { break; } ASTTuple::value_t values; values.resize(operand); for (int i=0; igetConst(operand).cast()->size(); ASTKwNamesMap kwparamList; std::vector> keys = code->getConst(operand).cast()->values(); for (int i = 0; i < kwparams; i++) { kwparamList.add(new ASTObject(keys[kwparams - i - 1]), stack.top()); stack.pop(); } stack.push(new ASTKwNamesMap(kwparamList)); } break; case Pyc::CALL_A: case Pyc::CALL_FUNCTION_A: case Pyc::INSTRUMENTED_CALL_A: { int kwparams = (operand & 0xFF00) >> 8; int pparams = (operand & 0xFF); ASTCall::kwparam_t kwparamList; ASTCall::pparam_t pparamList; /* Test for the load build class function */ stack_hist.push(stack); int basecnt = 0; ASTTuple::value_t bases; bases.resize(basecnt); PycRef TOS = stack.top(); int TOS_type = TOS.type(); // bases are NODE_NAME and NODE_BINARY at TOS while (TOS_type == ASTNode::NODE_NAME || TOS_type == ASTNode::NODE_BINARY) { bases.resize(basecnt + 1); bases[basecnt] = TOS; basecnt++; stack.pop(); TOS = stack.top(); TOS_type = TOS.type(); } // qualified name is PycString at TOS PycRef name = stack.top(); stack.pop(); PycRef function = stack.top(); stack.pop(); PycRef loadbuild = stack.top(); stack.pop(); int loadbuild_type = loadbuild.type(); if (loadbuild_type == ASTNode::NODE_LOADBUILDCLASS) { PycRef call = new ASTCall(function, pparamList, kwparamList); stack.push(new ASTClass(call, new ASTTuple(bases), name)); stack_hist.pop(); break; } else { stack = stack_hist.top(); stack_hist.pop(); } /* KW_NAMES(i) Stores a reference to co_consts[consti] into an internal variable for use by CALL. co_consts[consti] must be a tuple of strings. New in version 3.11. */ if (mod->verCompare(3, 11) >= 0) { PycRef object_or_map = stack.top(); if (object_or_map.type() == ASTNode::NODE_KW_NAMES_MAP) { stack.pop(); PycRef kwparams_map = object_or_map.cast(); for (ASTKwNamesMap::map_t::const_iterator it = kwparams_map->values().begin(); it != kwparams_map->values().end(); it++) { kwparamList.push_front(std::make_pair(it->first, it->second)); pparams -= 1; } } } else { for (int i = 0; i < kwparams; i++) { PycRef val = stack.top(); stack.pop(); PycRef key = stack.top(); stack.pop(); kwparamList.push_front(std::make_pair(key, val)); } } for (int i=0; i param = stack.top(); stack.pop(); if (param.type() == ASTNode::NODE_FUNCTION) { PycRef fun_code = param.cast()->code(); PycRef code_src = fun_code.cast()->object().cast(); PycRef function_name = code_src->name(); if (function_name->isEqual("")) { pparamList.push_front(param); } else { // Decorator used PycRef decor_name = new ASTName(function_name); curblock->append(new ASTStore(param, decor_name)); pparamList.push_front(decor_name); } } else { pparamList.push_front(param); } } PycRef func = stack.top(); stack.pop(); if ((opcode == Pyc::CALL_A || opcode == Pyc::INSTRUMENTED_CALL_A) && stack.top() == nullptr) { stack.pop(); } stack.push(new ASTCall(func, pparamList, kwparamList)); } break; case Pyc::CALL_FUNCTION_VAR_A: { PycRef var = stack.top(); stack.pop(); int kwparams = (operand & 0xFF00) >> 8; int pparams = (operand & 0xFF); ASTCall::kwparam_t kwparamList; ASTCall::pparam_t pparamList; for (int i=0; i val = stack.top(); stack.pop(); PycRef key = stack.top(); stack.pop(); kwparamList.push_front(std::make_pair(key, val)); } for (int i=0; i func = stack.top(); stack.pop(); PycRef call = new ASTCall(func, pparamList, kwparamList); call.cast()->setVar(var); stack.push(call); } break; case Pyc::CALL_FUNCTION_KW_A: { PycRef kw = stack.top(); stack.pop(); int kwparams = (operand & 0xFF00) >> 8; int pparams = (operand & 0xFF); ASTCall::kwparam_t kwparamList; ASTCall::pparam_t pparamList; for (int i=0; i val = stack.top(); stack.pop(); PycRef key = stack.top(); stack.pop(); kwparamList.push_front(std::make_pair(key, val)); } for (int i=0; i func = stack.top(); stack.pop(); PycRef call = new ASTCall(func, pparamList, kwparamList); call.cast()->setKW(kw); stack.push(call); } break; case Pyc::CALL_FUNCTION_VAR_KW_A: { PycRef kw = stack.top(); stack.pop(); PycRef var = stack.top(); stack.pop(); int kwparams = (operand & 0xFF00) >> 8; int pparams = (operand & 0xFF); ASTCall::kwparam_t kwparamList; ASTCall::pparam_t pparamList; for (int i=0; i val = stack.top(); stack.pop(); PycRef key = stack.top(); stack.pop(); kwparamList.push_front(std::make_pair(key, val)); } for (int i=0; i func = stack.top(); stack.pop(); PycRef call = new ASTCall(func, pparamList, kwparamList); call.cast()->setKW(kw); call.cast()->setVar(var); stack.push(call); } break; case Pyc::CALL_METHOD_A: { ASTCall::pparam_t pparamList; for (int i = 0; i < operand; i++) { PycRef param = stack.top(); stack.pop(); if (param.type() == ASTNode::NODE_FUNCTION) { PycRef fun_code = param.cast()->code(); PycRef code_src = fun_code.cast()->object().cast(); PycRef function_name = code_src->name(); if (function_name->isEqual("")) { pparamList.push_front(param); } else { // Decorator used PycRef decor_name = new ASTName(function_name); curblock->append(new ASTStore(param, decor_name)); pparamList.push_front(decor_name); } } else { pparamList.push_front(param); } } PycRef func = stack.top(); stack.pop(); stack.push(new ASTCall(func, pparamList, ASTCall::kwparam_t())); } break; case Pyc::CONTINUE_LOOP_A: curblock->append(new ASTKeyword(ASTKeyword::KW_CONTINUE)); break; case Pyc::COMPARE_OP_A: { /* SHIP-149: a registered value-pattern match case test. Open the `match`/`case` instead of an `if x == pat`. The subject threads on the stack via the per-case COPY; pop the pattern + the subject COPY (non-last) or the subject itself (last case), open BLK_MATCH (first) + BLK_CASE(pattern), and jump to the body — reuses matchCaseEnd / matchBlockEnd to close. */ auto vmi = matchValue.find(curpos); if (vmi != matchValue.end()) { const VCase& vc = vmi->second; PycRef pattern = stack.top(); stack.pop(); if (vc.isLast) { stack.pop(); /* last case consumes the subject */ } else { PycRef copy = stack.top(); stack.pop(); /* drop the COPY */ if (vc.isFirst) { PycRef subject = stack.top(); /* threads; don't pop */ blocks.push(new ASTMatchBlock(vc.matchEnd, subject)); curblock = blocks.top(); } } blocks.push(new ASTCaseBlock(vc.failTarget, pattern)); curblock = blocks.top(); curblock->init(); source.setPos(vc.bodyStart); pos = vc.bodyStart; while (next_exception_entry < exception_entries.size() && exception_entries[next_exception_entry].start_offset < pos) next_exception_entry++; break; } PycRef right = stack.top(); stack.pop(); PycRef left = stack.top(); stack.pop(); auto arg = operand; if (mod->verCompare(3, 12) == 0) arg >>= 4; // changed under GH-100923 else if (mod->verCompare(3, 13) >= 0) arg >>= 5; stack.push(new ASTCompare(left, right, arg)); } break; case Pyc::CONTAINS_OP_A: { PycRef right = stack.top(); stack.pop(); PycRef left = stack.top(); stack.pop(); // The operand will be 0 for 'in' and 1 for 'not in'. stack.push(new ASTCompare(left, right, operand ? ASTCompare::CMP_NOT_IN : ASTCompare::CMP_IN)); } break; case Pyc::DELETE_ATTR_A: { PycRef name = stack.top(); stack.pop(); curblock->append(new ASTDelete(new ASTBinary(name, new ASTName(code->getName(operand)), ASTBinary::BIN_ATTR))); } break; case Pyc::DELETE_GLOBAL_A: code->markGlobal(code->getName(operand)); /* Fall through */ case Pyc::DELETE_NAME_A: { PycRef varname = code->getName(operand); if (varname->length() >= 2 && varname->value()[0] == '_' && varname->value()[1] == '[') { /* Don't show deletes that are a result of list comps. */ break; } PycRef name = new ASTName(varname); curblock->append(new ASTDelete(name)); } break; case Pyc::DELETE_FAST_A: { PycRef name; if (mod->verCompare(1, 3) < 0) name = new ASTName(code->getName(operand)); else name = new ASTName(code->getLocal(operand)); if (name.cast()->name()->value()[0] == '_' && name.cast()->name()->value()[1] == '[') { /* Don't show deletes that are a result of list comps. */ break; } curblock->append(new ASTDelete(name)); } break; case Pyc::DELETE_SLICE_0: { PycRef name = stack.top(); stack.pop(); curblock->append(new ASTDelete(new ASTSubscr(name, new ASTSlice(ASTSlice::SLICE0)))); } break; case Pyc::DELETE_SLICE_1: { PycRef upper = stack.top(); stack.pop(); PycRef name = stack.top(); stack.pop(); curblock->append(new ASTDelete(new ASTSubscr(name, new ASTSlice(ASTSlice::SLICE1, upper)))); } break; case Pyc::DELETE_SLICE_2: { PycRef lower = stack.top(); stack.pop(); PycRef name = stack.top(); stack.pop(); curblock->append(new ASTDelete(new ASTSubscr(name, new ASTSlice(ASTSlice::SLICE2, NULL, lower)))); } break; case Pyc::DELETE_SLICE_3: { PycRef lower = stack.top(); stack.pop(); PycRef upper = stack.top(); stack.pop(); PycRef name = stack.top(); stack.pop(); curblock->append(new ASTDelete(new ASTSubscr(name, new ASTSlice(ASTSlice::SLICE3, upper, lower)))); } break; case Pyc::DELETE_SUBSCR: { PycRef key = stack.top(); stack.pop(); PycRef name = stack.top(); stack.pop(); curblock->append(new ASTDelete(new ASTSubscr(name, key))); } break; case Pyc::DUP_TOP: { if (stack.top().type() == PycObject::TYPE_NULL) { stack.push(stack.top()); } else if (stack.top().type() == ASTNode::NODE_CHAINSTORE) { auto chainstore = stack.top(); stack.pop(); stack.push(stack.top()); stack.push(chainstore); } else { stack.push(stack.top()); ASTNodeList::list_t targets; stack.push(new ASTChainStore(targets, stack.top())); } } break; case Pyc::DUP_TOP_TWO: { PycRef first = stack.top(); stack.pop(); PycRef second = stack.top(); stack.push(first); stack.push(second); stack.push(first); } break; case Pyc::DUP_TOPX_A: { std::stack > first; std::stack > second; for (int i = 0; i < operand; i++) { PycRef node = stack.top(); stack.pop(); first.push(node); second.push(node); } while (first.size()) { stack.push(first.top()); first.pop(); } while (second.size()) { stack.push(second.top()); second.pop(); } } break; case Pyc::END_FINALLY: { bool isFinally = false; if (curblock->blktype() == ASTBlock::BLK_FINALLY) { PycRef final = curblock; blocks.pop(); stack = stack_hist.top(); stack_hist.pop(); curblock = blocks.top(); curblock->append(final.cast()); isFinally = true; } else if (curblock->blktype() == ASTBlock::BLK_EXCEPT) { blocks.pop(); PycRef prev = curblock; bool isUninitAsyncFor = false; if (blocks.top()->blktype() == ASTBlock::BLK_CONTAINER) { auto container = blocks.top(); blocks.pop(); auto asyncForBlock = blocks.top(); isUninitAsyncFor = asyncForBlock->blktype() == ASTBlock::BLK_ASYNCFOR && !asyncForBlock->inited(); if (isUninitAsyncFor) { auto tryBlock = container->nodes().front().cast(); if (!tryBlock->nodes().empty() && tryBlock->blktype() == ASTBlock::BLK_TRY) { auto store = tryBlock->nodes().front().try_cast(); if (store) { asyncForBlock.cast()->setIndex(store->dest()); } } curblock = blocks.top(); stack = stack_hist.top(); stack_hist.pop(); if (!curblock->inited()) fprintf(stderr, "Error when decompiling 'async for'.\n"); } else { blocks.push(container); } } if (!isUninitAsyncFor) { if (curblock->size() != 0) { blocks.top()->append(curblock.cast()); } curblock = blocks.top(); /* Turn it into an else statement. */ if (curblock->end() != pos || curblock.cast()->hasFinally()) { PycRef elseblk = new ASTBlock(ASTBlock::BLK_ELSE, prev->end()); elseblk->init(); blocks.push(elseblk); curblock = blocks.top(); } else { stack = stack_hist.top(); stack_hist.pop(); } } } if (curblock->blktype() == ASTBlock::BLK_CONTAINER) { /* This marks the end of the except block(s). */ PycRef cont = curblock.cast(); if (!cont->hasFinally() || isFinally) { /* If there's no finally block, pop the container. */ blocks.pop(); curblock = blocks.top(); curblock->append(cont.cast()); } } } break; case Pyc::EXEC_STMT: { if (stack.top().type() == ASTNode::NODE_CHAINSTORE) { stack.pop(); } PycRef loc = stack.top(); stack.pop(); PycRef glob = stack.top(); stack.pop(); PycRef stmt = stack.top(); stack.pop(); curblock->append(new ASTExec(stmt, glob, loc)); } break; case Pyc::FOR_ITER_A: case Pyc::INSTRUMENTED_FOR_ITER_A: { PycRef iter = stack.top(); // Iterable if (mod->verCompare(3, 12) < 0) { // Do not pop the iterator for py 3.12+ stack.pop(); } /* Pop it? Don't pop it? */ int end; bool comprehension = false; // before 3.8, there is a SETUP_LOOP instruction with block start and end position, // the operand is usually a jump to a POP_BLOCK instruction // after 3.8, block extent has to be inferred implicitly; the operand is a jump to a position after the for block if (mod->majorVer() == 3 && mod->minorVer() >= 8) { end = operand; if (mod->verCompare(3, 10) >= 0) end *= sizeof(uint16_t); // // BPO-27129 end += pos; comprehension = strcmp(code->name()->value(), "") == 0; } else { PycRef top = blocks.top(); end = top->end(); // block end position from SETUP_LOOP if (top->blktype() == ASTBlock::BLK_WHILE) { blocks.pop(); } else { comprehension = true; } } PycRef forblk = new ASTIterBlock(ASTBlock::BLK_FOR, curpos, end, iter); forblk->setComprehension(comprehension); blocks.push(forblk.cast()); curblock = blocks.top(); stack.push(NULL); } break; case Pyc::FOR_LOOP_A: { PycRef curidx = stack.top(); // Current index stack.pop(); PycRef iter = stack.top(); // Iterable stack.pop(); bool comprehension = false; PycRef top = blocks.top(); if (top->blktype() == ASTBlock::BLK_WHILE) { blocks.pop(); } else { comprehension = true; } PycRef forblk = new ASTIterBlock(ASTBlock::BLK_FOR, curpos, top->end(), iter); forblk->setComprehension(comprehension); blocks.push(forblk.cast()); curblock = blocks.top(); /* Python Docs say: "push the sequence, the incremented counter, and the current item onto the stack." */ stack.push(iter); stack.push(curidx); stack.push(NULL); // We can totally hack this >_> } break; case Pyc::GET_AITER: { // Logic similar to FOR_ITER_A PycRef iter = stack.top(); // Iterable stack.pop(); PycRef top = blocks.top(); if (top->blktype() == ASTBlock::BLK_WHILE) { blocks.pop(); PycRef forblk = new ASTIterBlock(ASTBlock::BLK_ASYNCFOR, curpos, top->end(), iter); blocks.push(forblk.cast()); curblock = blocks.top(); stack.push(nullptr); } else { fprintf(stderr, "Unsupported use of GET_AITER outside of SETUP_LOOP\n"); } } break; case Pyc::GET_ANEXT: break; case Pyc::FORMAT_VALUE_A: { auto conversion_flag = static_cast(operand); PycRef format_spec = nullptr; if (conversion_flag & ASTFormattedValue::HAVE_FMT_SPEC) { format_spec = stack.top(); stack.pop(); } auto val = stack.top(); stack.pop(); stack.push(new ASTFormattedValue(val, conversion_flag, format_spec)); } break; case Pyc::GET_AWAITABLE: { PycRef object = stack.top(); stack.pop(); stack.push(new ASTAwaitable(object)); } break; case Pyc::GET_ITER: case Pyc::GET_YIELD_FROM_ITER: /* We just entirely ignore this */ break; case Pyc::IMPORT_NAME_A: if (mod->majorVer() == 1) { stack.push(new ASTImport(new ASTName(code->getName(operand)), NULL)); } else { PycRef fromlist = stack.top(); stack.pop(); if (mod->verCompare(2, 5) >= 0) stack.pop(); // Level -- we don't care stack.push(new ASTImport(new ASTName(code->getName(operand)), fromlist)); } break; case Pyc::IMPORT_FROM_A: stack.push(new ASTName(code->getName(operand))); break; case Pyc::IMPORT_STAR: { PycRef import = stack.top(); stack.pop(); curblock->append(new ASTStore(import, NULL)); } break; case Pyc::IS_OP_A: { PycRef right = stack.top(); stack.pop(); PycRef left = stack.top(); stack.pop(); // The operand will be 0 for 'is' and 1 for 'is not'. stack.push(new ASTCompare(left, right, operand ? ASTCompare::CMP_IS_NOT : ASTCompare::CMP_IS)); } break; case Pyc::JUMP_IF_FALSE_A: case Pyc::JUMP_IF_TRUE_A: case Pyc::JUMP_IF_FALSE_OR_POP_A: case Pyc::JUMP_IF_TRUE_OR_POP_A: case Pyc::POP_JUMP_IF_FALSE_A: case Pyc::POP_JUMP_IF_TRUE_A: case Pyc::POP_JUMP_FORWARD_IF_FALSE_A: case Pyc::POP_JUMP_FORWARD_IF_TRUE_A: case Pyc::INSTRUMENTED_POP_JUMP_IF_FALSE_A: case Pyc::INSTRUMENTED_POP_JUMP_IF_TRUE_A: { PycRef cond = stack.top(); PycRef ifblk; int popped = ASTCondBlock::UNINITED; if (opcode == Pyc::POP_JUMP_IF_FALSE_A || opcode == Pyc::POP_JUMP_IF_TRUE_A || opcode == Pyc::POP_JUMP_FORWARD_IF_FALSE_A || opcode == Pyc::POP_JUMP_FORWARD_IF_TRUE_A || opcode == Pyc::INSTRUMENTED_POP_JUMP_IF_FALSE_A || opcode == Pyc::INSTRUMENTED_POP_JUMP_IF_TRUE_A) { /* Pop condition before the jump */ stack.pop(); popped = ASTCondBlock::PRE_POPPED; } /* Store the current stack for the else statement(s) */ stack_hist.push(stack); if (opcode == Pyc::JUMP_IF_FALSE_OR_POP_A || opcode == Pyc::JUMP_IF_TRUE_OR_POP_A) { /* Pop condition only if condition is met */ stack.pop(); popped = ASTCondBlock::POPPED; } /* "Jump if true" means "Jump if not false" */ bool neg = opcode == Pyc::JUMP_IF_TRUE_A || opcode == Pyc::JUMP_IF_TRUE_OR_POP_A || opcode == Pyc::POP_JUMP_IF_TRUE_A || opcode == Pyc::POP_JUMP_FORWARD_IF_TRUE_A || opcode == Pyc::INSTRUMENTED_POP_JUMP_IF_TRUE_A; int offs = operand; if (mod->verCompare(3, 10) >= 0) offs *= sizeof(uint16_t); // // BPO-27129 if (mod->verCompare(3, 12) >= 0 || opcode == Pyc::JUMP_IF_FALSE_A || opcode == Pyc::JUMP_IF_TRUE_A || opcode == Pyc::POP_JUMP_FORWARD_IF_TRUE_A || opcode == Pyc::POP_JUMP_FORWARD_IF_FALSE_A) { /* Offset is relative in these cases */ offs += pos; } if (cond.type() == ASTNode::NODE_COMPARE && cond.cast()->op() == ASTCompare::CMP_EXCEPTION) { int except_end = offs; if (curblock->blktype() == ASTBlock::BLK_EXCEPT && curblock.cast()->cond() == NULL) { except_end = curblock->end(); blocks.pop(); curblock = blocks.top(); stack_hist.pop(); } ifblk = new ASTCondBlock(ASTBlock::BLK_EXCEPT, except_end, cond.cast()->right(), false); } else if (curblock->blktype() == ASTBlock::BLK_ELSE && curblock->size() == 0) { /* Collapse into elif statement */ blocks.pop(); stack = stack_hist.top(); stack_hist.pop(); ifblk = new ASTCondBlock(ASTBlock::BLK_ELIF, offs, cond, neg); } else if (curblock->size() == 0 && !curblock->inited() && curblock->blktype() == ASTBlock::BLK_WHILE) { /* The condition for a while loop */ PycRef top = blocks.top(); blocks.pop(); ifblk = new ASTCondBlock(top->blktype(), offs, cond, neg); /* We don't store the stack for loops! Pop it! */ stack_hist.pop(); } else if (curblock->size() == 0 && curblock->end() <= offs && (curblock->blktype() == ASTBlock::BLK_IF || curblock->blktype() == ASTBlock::BLK_ELIF || curblock->blktype() == ASTBlock::BLK_WHILE)) { PycRef newcond; PycRef top = curblock.cast(); PycRef cond1 = top->cond(); blocks.pop(); if (curblock->blktype() == ASTBlock::BLK_WHILE) { stack_hist.pop(); } else { FastStack s_top = stack_hist.top(); stack_hist.pop(); stack_hist.pop(); stack_hist.push(s_top); } if (curblock->end() == offs || (curblock->end() == curpos && !top->negative())) { /* if blah and blah */ newcond = new ASTBinary(cond1, cond, ASTBinary::BIN_LOG_AND); } else { /* if blah or blah */ newcond = new ASTBinary(cond1, cond, ASTBinary::BIN_LOG_OR); } ifblk = new ASTCondBlock(top->blktype(), offs, newcond, neg); } else if (curblock->blktype() == ASTBlock::BLK_FOR && curblock.cast()->isComprehension() && mod->verCompare(2, 7) >= 0) { /* Comprehension condition */ curblock.cast()->setCondition(cond); stack_hist.pop(); // TODO: Handle older python versions, where condition // is laid out a little differently. break; } else { /* Plain old if statement */ ifblk = new ASTCondBlock(ASTBlock::BLK_IF, offs, cond, neg); } if (popped) ifblk->init(popped); blocks.push(ifblk.cast()); curblock = blocks.top(); } break; case Pyc::JUMP_ABSOLUTE_A: // bpo-47120: Replaced JUMP_ABSOLUTE by the relative jump JUMP_BACKWARD. case Pyc::JUMP_BACKWARD_A: case Pyc::JUMP_BACKWARD_NO_INTERRUPT_A: { int offs = operand; if (mod->verCompare(3, 10) >= 0) offs *= sizeof(uint16_t); // // BPO-27129 if (offs < pos) { if (curblock->blktype() == ASTBlock::BLK_FOR) { bool is_jump_to_start = offs == curblock.cast()->start(); bool should_pop_for_block = curblock.cast()->isComprehension(); // in v3.8, SETUP_LOOP is deprecated and for blocks aren't terminated by POP_BLOCK, so we add them here bool should_add_for_block = mod->majorVer() == 3 && mod->minorVer() >= 8 && is_jump_to_start && !curblock.cast()->isComprehension(); if (should_pop_for_block || should_add_for_block) { PycRef top = stack.top(); if (top.type() == ASTNode::NODE_COMPREHENSION) { PycRef comp = top.cast(); comp->addGenerator(curblock.cast()); } PycRef tmp = curblock; blocks.pop(); curblock = blocks.top(); if (should_add_for_block) { curblock->append(tmp.cast()); } } } else if (curblock->blktype() == ASTBlock::BLK_ELSE) { stack = stack_hist.top(); stack_hist.pop(); blocks.pop(); blocks.top()->append(curblock.cast()); curblock = blocks.top(); if (curblock->blktype() == ASTBlock::BLK_CONTAINER && !curblock.cast()->hasFinally()) { blocks.pop(); blocks.top()->append(curblock.cast()); curblock = blocks.top(); } } else { curblock->append(new ASTKeyword(ASTKeyword::KW_CONTINUE)); } /* We're in a loop, this jumps back to the start */ /* I think we'll just ignore this case... */ break; // Bad idea? Probably! } if (curblock->blktype() == ASTBlock::BLK_CONTAINER) { PycRef cont = curblock.cast(); if (cont->hasExcept() && pos < cont->except()) { PycRef except = new ASTCondBlock(ASTBlock::BLK_EXCEPT, 0, NULL, false); except->init(); blocks.push(except); curblock = blocks.top(); } break; } if (!stack_hist.empty()) { stack = stack_hist.top(); stack_hist.pop(); } else { fprintf(stderr, "Warning: Stack history is empty, something wrong might have happened\n"); } PycRef prev = curblock; PycRef nil; bool push = true; do { blocks.pop(); blocks.top()->append(prev.cast()); if (prev->blktype() == ASTBlock::BLK_IF || prev->blktype() == ASTBlock::BLK_ELIF) { if (push) { stack_hist.push(stack); } PycRef next = new ASTBlock(ASTBlock::BLK_ELSE, blocks.top()->end()); if (prev->inited() == ASTCondBlock::PRE_POPPED) { next->init(ASTCondBlock::PRE_POPPED); } blocks.push(next.cast()); prev = nil; } else if (prev->blktype() == ASTBlock::BLK_EXCEPT) { if (push) { stack_hist.push(stack); } PycRef next = new ASTCondBlock(ASTBlock::BLK_EXCEPT, blocks.top()->end(), NULL, false); next->init(); blocks.push(next.cast()); prev = nil; } else if (prev->blktype() == ASTBlock::BLK_ELSE) { /* Special case */ prev = blocks.top(); if (!push) { stack = stack_hist.top(); stack_hist.pop(); } push = false; } else { prev = nil; } } while (prev != nil); curblock = blocks.top(); } break; case Pyc::JUMP_FORWARD_A: case Pyc::INSTRUMENTED_JUMP_FORWARD_A: { int offs = operand; if (mod->verCompare(3, 10) >= 0) offs *= sizeof(uint16_t); // // BPO-27129 if (curblock->blktype() == ASTBlock::BLK_CONTAINER) { PycRef cont = curblock.cast(); if (cont->hasExcept()) { stack_hist.push(stack); curblock->setEnd(pos+offs); PycRef except = new ASTCondBlock(ASTBlock::BLK_EXCEPT, pos+offs, NULL, false); except->init(); blocks.push(except); curblock = blocks.top(); } break; } if (!stack_hist.empty()) { if (stack.empty()) // if it's part of if-expression, TOS at the moment is the result of "if" part stack = stack_hist.top(); stack_hist.pop(); } PycRef prev = curblock; PycRef nil; bool push = true; do { blocks.pop(); if (!blocks.empty()) blocks.top()->append(prev.cast()); if (prev->blktype() == ASTBlock::BLK_IF || prev->blktype() == ASTBlock::BLK_ELIF) { if (offs == 0) { prev = nil; continue; } if (push) { stack_hist.push(stack); } PycRef next = new ASTBlock(ASTBlock::BLK_ELSE, pos+offs); if (prev->inited() == ASTCondBlock::PRE_POPPED) { next->init(ASTCondBlock::PRE_POPPED); } blocks.push(next.cast()); prev = nil; } else if (prev->blktype() == ASTBlock::BLK_EXCEPT) { if (offs == 0) { prev = nil; continue; } if (push) { stack_hist.push(stack); } PycRef next = new ASTCondBlock(ASTBlock::BLK_EXCEPT, pos+offs, NULL, false); next->init(); blocks.push(next.cast()); prev = nil; } else if (prev->blktype() == ASTBlock::BLK_ELSE) { /* Special case */ prev = blocks.top(); if (!push) { stack = stack_hist.top(); stack_hist.pop(); } push = false; if (prev->blktype() == ASTBlock::BLK_MAIN) { /* Something went out of control! */ prev = nil; } } else if (prev->blktype() == ASTBlock::BLK_TRY && prev->end() < pos+offs) { /* Need to add an except/finally block */ if (!stack_hist.empty()) { stack = stack_hist.top(); stack_hist.pop(); } if (blocks.top()->blktype() == ASTBlock::BLK_CONTAINER) { PycRef cont = blocks.top().cast(); if (cont->hasExcept()) { if (push) { stack_hist.push(stack); } PycRef except = new ASTCondBlock(ASTBlock::BLK_EXCEPT, pos+offs, NULL, false); except->init(); blocks.push(except); } } else { fprintf(stderr, "Something TERRIBLE happened!!\n"); } prev = nil; } else { prev = nil; } } while (prev != nil); if (!blocks.empty()) { curblock = blocks.top(); if (curblock->blktype() == ASTBlock::BLK_EXCEPT) curblock->setEnd(pos+offs); } } break; case Pyc::LIST_APPEND: case Pyc::LIST_APPEND_A: { PycRef value = stack.top(); stack.pop(); PycRef list = stack.top(); if (curblock->blktype() == ASTBlock::BLK_FOR && curblock.cast()->isComprehension()) { stack.pop(); stack.push(new ASTComprehension(value)); } else { stack.push(new ASTSubscr(list, value)); /* Total hack */ } } break; case Pyc::SET_UPDATE_A: { PycRef rhs = stack.top(); stack.pop(); PycRef lhs = stack.top().cast(); stack.pop(); if (rhs.type() != ASTNode::NODE_OBJECT) { fprintf(stderr, "Unsupported argument found for SET_UPDATE\n"); break; } // I've only ever seen this be a TYPE_FROZENSET, but let's be careful... PycRef obj = rhs.cast()->object(); if (obj->type() != PycObject::TYPE_FROZENSET) { fprintf(stderr, "Unsupported argument type found for SET_UPDATE\n"); break; } ASTSet::value_t result = lhs->values(); for (const auto& it : obj.cast()->values()) { result.push_back(new ASTObject(it)); } stack.push(new ASTSet(result)); } break; case Pyc::LIST_EXTEND_A: { PycRef rhs = stack.top(); stack.pop(); PycRef lhs = stack.top().cast(); stack.pop(); if (rhs.type() != ASTNode::NODE_OBJECT) { fprintf(stderr, "Unsupported argument found for LIST_EXTEND\n"); break; } // I've only ever seen this be a SMALL_TUPLE, but let's be careful... PycRef obj = rhs.cast()->object(); if (obj->type() != PycObject::TYPE_TUPLE && obj->type() != PycObject::TYPE_SMALL_TUPLE) { fprintf(stderr, "Unsupported argument type found for LIST_EXTEND\n"); break; } ASTList::value_t result = lhs->values(); for (const auto& it : obj.cast()->values()) { result.push_back(new ASTObject(it)); } stack.push(new ASTList(result)); } break; case Pyc::LOAD_ATTR_A: { PycRef name = stack.top(); if (name.type() != ASTNode::NODE_IMPORT) { stack.pop(); if (mod->verCompare(3, 12) >= 0) { if (operand & 1) { /* Changed in version 3.12: If the low bit of name is set, then a NULL or self is pushed to the stack before the attribute or unbound method respectively. */ stack.push(nullptr); } operand >>= 1; } stack.push(new ASTBinary(name, new ASTName(code->getName(operand)), ASTBinary::BIN_ATTR)); } } break; case Pyc::LOAD_BUILD_CLASS: stack.push(new ASTLoadBuildClass(new PycObject())); break; case Pyc::LOAD_CLOSURE_A: /* Ignore this */ break; case Pyc::LOAD_CONST_A: { PycRef t_ob = new ASTObject(code->getConst(operand)); if ((t_ob->object().type() == PycObject::TYPE_TUPLE || t_ob->object().type() == PycObject::TYPE_SMALL_TUPLE) && !t_ob->object().cast()->values().size()) { ASTTuple::value_t values; stack.push(new ASTTuple(values)); } else if (t_ob->object().type() == PycObject::TYPE_NONE) { stack.push(NULL); } else { stack.push(t_ob.cast()); } } break; case Pyc::LOAD_DEREF_A: case Pyc::LOAD_CLASSDEREF_A: stack.push(new ASTName(code->getCellVar(mod, operand))); break; case Pyc::LOAD_FAST_A: if (mod->verCompare(1, 3) < 0) stack.push(new ASTName(code->getName(operand))); else stack.push(new ASTName(code->getLocal(operand))); break; case Pyc::LOAD_FAST_LOAD_FAST_A: stack.push(new ASTName(code->getLocal(operand >> 4))); stack.push(new ASTName(code->getLocal(operand & 0xF))); break; case Pyc::LOAD_GLOBAL_A: if (mod->verCompare(3, 11) >= 0) { // Loads the global named co_names[namei>>1] onto the stack. if (operand & 1) { /* Changed in version 3.11: If the low bit of "NAMEI" (operand) is set, then a NULL is pushed to the stack before the global variable. */ stack.push(nullptr); } operand >>= 1; } stack.push(new ASTName(code->getName(operand))); break; case Pyc::LOAD_LOCALS: stack.push(new ASTNode(ASTNode::NODE_LOCALS)); break; case Pyc::STORE_LOCALS: stack.pop(); break; case Pyc::LOAD_METHOD_A: { // Behave like LOAD_ATTR PycRef name = stack.top(); stack.pop(); stack.push(new ASTBinary(name, new ASTName(code->getName(operand)), ASTBinary::BIN_ATTR)); } break; case Pyc::LOAD_NAME_A: stack.push(new ASTName(code->getName(operand))); break; case Pyc::MAKE_CLOSURE_A: case Pyc::MAKE_FUNCTION_A: { PycRef fun_code = stack.top(); stack.pop(); /* Test for the qualified name of the function (at TOS) */ int tos_type = fun_code.cast()->object().type(); if (tos_type != PycObject::TYPE_CODE && tos_type != PycObject::TYPE_CODE2) { fun_code = stack.top(); stack.pop(); } ASTFunction::defarg_t defArgs, kwDefArgs; const int defCount = operand & 0xFF; const int kwDefCount = (operand >> 8) & 0xFF; for (int i = 0; i < defCount; ++i) { defArgs.push_front(stack.top()); stack.pop(); } for (int i = 0; i < kwDefCount; ++i) { kwDefArgs.push_front(stack.top()); stack.pop(); } stack.push(new ASTFunction(fun_code, defArgs, kwDefArgs)); } break; case Pyc::NOP: break; case Pyc::POP_BLOCK: { if (curblock->blktype() == ASTBlock::BLK_CONTAINER || curblock->blktype() == ASTBlock::BLK_FINALLY) { /* These should only be popped by an END_FINALLY */ break; } if (curblock->blktype() == ASTBlock::BLK_WITH) { // This should only be popped by a WITH_CLEANUP break; } if (curblock->nodes().size() && curblock->nodes().back().type() == ASTNode::NODE_KEYWORD) { curblock->removeLast(); } if (curblock->blktype() == ASTBlock::BLK_IF || curblock->blktype() == ASTBlock::BLK_ELIF || curblock->blktype() == ASTBlock::BLK_ELSE || curblock->blktype() == ASTBlock::BLK_TRY || curblock->blktype() == ASTBlock::BLK_EXCEPT || curblock->blktype() == ASTBlock::BLK_FINALLY) { if (!stack_hist.empty()) { stack = stack_hist.top(); stack_hist.pop(); } else { fprintf(stderr, "Warning: Stack history is empty, something wrong might have happened\n"); } } PycRef tmp = curblock; blocks.pop(); if (!blocks.empty()) curblock = blocks.top(); if (!(tmp->blktype() == ASTBlock::BLK_ELSE && tmp->nodes().size() == 0)) { curblock->append(tmp.cast()); } if (tmp->blktype() == ASTBlock::BLK_FOR && tmp->end() >= pos) { stack_hist.push(stack); PycRef blkelse = new ASTBlock(ASTBlock::BLK_ELSE, tmp->end()); blocks.push(blkelse); curblock = blocks.top(); } if (curblock->blktype() == ASTBlock::BLK_TRY && tmp->blktype() != ASTBlock::BLK_FOR && tmp->blktype() != ASTBlock::BLK_ASYNCFOR && tmp->blktype() != ASTBlock::BLK_WHILE) { stack = stack_hist.top(); stack_hist.pop(); tmp = curblock; blocks.pop(); curblock = blocks.top(); if (!(tmp->blktype() == ASTBlock::BLK_ELSE && tmp->nodes().size() == 0)) { curblock->append(tmp.cast()); } } if (curblock->blktype() == ASTBlock::BLK_CONTAINER) { PycRef cont = curblock.cast(); if (tmp->blktype() == ASTBlock::BLK_ELSE && !cont->hasFinally()) { /* Pop the container */ blocks.pop(); curblock = blocks.top(); curblock->append(cont.cast()); } else if ((tmp->blktype() == ASTBlock::BLK_ELSE && cont->hasFinally()) || (tmp->blktype() == ASTBlock::BLK_TRY && !cont->hasExcept())) { /* Add the finally block */ stack_hist.push(stack); PycRef final = new ASTBlock(ASTBlock::BLK_FINALLY, 0, true); blocks.push(final); curblock = blocks.top(); } } if ((curblock->blktype() == ASTBlock::BLK_FOR || curblock->blktype() == ASTBlock::BLK_ASYNCFOR) && curblock->end() == pos) { blocks.pop(); blocks.top()->append(curblock.cast()); curblock = blocks.top(); } } break; case Pyc::POP_EXCEPT: /* Do nothing. */ break; case Pyc::PUSH_EXC_INFO: /* Python 3.11+: pushes exception info tuple. We ignore here to keep decompilation going. */ break; case Pyc::CHECK_EXC_MATCH: { /* Python 3.11+: compares exception against handler type. */ PycRef right = stack.top(); stack.pop(); PycRef left = stack.top(); stack.pop(); stack.push(new ASTCompare(left, right, ASTCompare::CMP_EXCEPTION)); } break; case Pyc::END_FOR: { stack.pop(); if ((opcode == Pyc::END_FOR) && (mod->majorVer() == 3) && (mod->minorVer() == 12)) { // one additional pop for python 3.12 stack.pop(); } // end for loop here /* TODO : Ensure that FOR loop ends here. Due to CACHE instructions at play, the end indicated in the for loop by pycdas is not correct, it is off by some small amount. */ if (curblock->blktype() == ASTBlock::BLK_FOR) { PycRef prev = blocks.top(); blocks.pop(); curblock = blocks.top(); curblock->append(prev.cast()); } else { fprintf(stderr, "Wrong block type %i for END_FOR\n", curblock->blktype()); } } break; case Pyc::POP_TOP: { PycRef value = stack.top(); stack.pop(); /* Value-pattern match subject cleanup: the subject is computed once and THREADS on the stack (COPY'd per case for the COMPARE). When a case body exits via a loop continue / a return inside a loop, the compiler emits a POP_TOP to discard the still-live subject before the back-edge — pycdc would render it as a stray expression statement (`args['k']`). Inside a match, if the popped value IS the enclosing BLK_MATCH's subject node (same PycRef threaded through), drop it. */ if (value != nullptr) { std::stack > ms = blocks; while (!ms.empty()) { if (ms.top()->blktype() == ASTBlock::BLK_MATCH) { if (ms.top().cast()->subject() == value) value = nullptr; // match subject cleanup -> drop break; } ms.pop(); } if (value == nullptr) break; } if (!curblock->inited()) { if (curblock->blktype() == ASTBlock::BLK_WITH) { curblock.cast()->setExpr(value); } else { curblock->init(); } break; } else if (value == nullptr || value->processed()) { break; } curblock->append(value); if (curblock->blktype() == ASTBlock::BLK_FOR && curblock.cast()->isComprehension()) { /* This relies on some really uncertain logic... * If it's a comprehension, the only POP_TOP should be * a call to append the iter to the list. */ if (value.type() == ASTNode::NODE_CALL) { auto& pparams = value.cast()->pparams(); if (!pparams.empty()) { PycRef res = pparams.front(); stack.push(new ASTComprehension(res)); } } } } break; case Pyc::PRINT_ITEM: { PycRef printNode; if (curblock->size() > 0 && curblock->nodes().back().type() == ASTNode::NODE_PRINT) printNode = curblock->nodes().back().try_cast(); if (printNode && printNode->stream() == nullptr && !printNode->eol()) printNode->add(stack.top()); else curblock->append(new ASTPrint(stack.top())); stack.pop(); } break; case Pyc::PRINT_ITEM_TO: { PycRef stream = stack.top(); stack.pop(); PycRef printNode; if (curblock->size() > 0 && curblock->nodes().back().type() == ASTNode::NODE_PRINT) printNode = curblock->nodes().back().try_cast(); if (printNode && printNode->stream() == stream && !printNode->eol()) printNode->add(stack.top()); else curblock->append(new ASTPrint(stack.top(), stream)); stack.pop(); if (stream) stream->setProcessed(); } break; case Pyc::PRINT_NEWLINE: { PycRef printNode; if (curblock->size() > 0 && curblock->nodes().back().type() == ASTNode::NODE_PRINT) printNode = curblock->nodes().back().try_cast(); if (printNode && printNode->stream() == nullptr && !printNode->eol()) printNode->setEol(true); else curblock->append(new ASTPrint(nullptr)); stack.pop(); } break; case Pyc::PRINT_NEWLINE_TO: { PycRef stream = stack.top(); stack.pop(); PycRef printNode; if (curblock->size() > 0 && curblock->nodes().back().type() == ASTNode::NODE_PRINT) printNode = curblock->nodes().back().try_cast(); if (printNode && printNode->stream() == stream && !printNode->eol()) printNode->setEol(true); else curblock->append(new ASTPrint(nullptr, stream)); stack.pop(); if (stream) stream->setProcessed(); } break; case Pyc::RAISE_VARARGS_A: { ASTRaise::param_t paramList; for (int i = 0; i < operand; i++) { paramList.push_front(stack.top()); stack.pop(); } curblock->append(new ASTRaise(paramList)); if ((curblock->blktype() == ASTBlock::BLK_IF || curblock->blktype() == ASTBlock::BLK_ELSE) && stack_hist.size() && (mod->verCompare(2, 6) >= 0)) { stack = stack_hist.top(); stack_hist.pop(); PycRef prev = curblock; blocks.pop(); curblock = blocks.top(); curblock->append(prev.cast()); } } break; case Pyc::RERAISE: case Pyc::RERAISE_A: /* Python 3.11 cleanup opcode. */ break; case Pyc::RETURN_VALUE: case Pyc::INSTRUMENTED_RETURN_VALUE_A: { PycRef value = stack.top(); stack.pop(); curblock->append(new ASTReturn(value)); if ((curblock->blktype() == ASTBlock::BLK_IF || curblock->blktype() == ASTBlock::BLK_ELSE) && stack_hist.size() && (mod->verCompare(2, 6) >= 0)) { stack = stack_hist.top(); stack_hist.pop(); PycRef prev = curblock; blocks.pop(); curblock = blocks.top(); curblock->append(prev.cast()); bc_next(source, mod, opcode, operand, pos); } } break; case Pyc::RETURN_CONST_A: case Pyc::INSTRUMENTED_RETURN_CONST_A: { PycRef value = new ASTObject(code->getConst(operand)); curblock->append(new ASTReturn(value.cast())); } break; case Pyc::ROT_TWO: { PycRef one = stack.top(); stack.pop(); if (stack.top().type() == ASTNode::NODE_CHAINSTORE) { stack.pop(); } PycRef two = stack.top(); stack.pop(); stack.push(one); stack.push(two); } break; case Pyc::ROT_THREE: { PycRef one = stack.top(); stack.pop(); PycRef two = stack.top(); stack.pop(); if (stack.top().type() == ASTNode::NODE_CHAINSTORE) { stack.pop(); } PycRef three = stack.top(); stack.pop(); stack.push(one); stack.push(three); stack.push(two); } break; case Pyc::ROT_FOUR: { PycRef one = stack.top(); stack.pop(); PycRef two = stack.top(); stack.pop(); PycRef three = stack.top(); stack.pop(); if (stack.top().type() == ASTNode::NODE_CHAINSTORE) { stack.pop(); } PycRef four = stack.top(); stack.pop(); stack.push(one); stack.push(four); stack.push(three); stack.push(two); } break; case Pyc::SET_LINENO_A: // Ignore break; case Pyc::SETUP_WITH_A: case Pyc::WITH_EXCEPT_START: { PycRef withblock = new ASTWithBlock(pos+operand); blocks.push(withblock); curblock = blocks.top(); } break; case Pyc::BEFORE_WITH: /* Python 3.11: setup for with block; ignore. */ break; case Pyc::WITH_CLEANUP: case Pyc::WITH_CLEANUP_START: { // Stack top should be a None. Ignore it. PycRef none = stack.top(); stack.pop(); if (none != NULL) { fprintf(stderr, "Something TERRIBLE happened!\n"); break; } if (curblock->blktype() == ASTBlock::BLK_WITH && curblock->end() == curpos) { PycRef with = curblock; blocks.pop(); curblock = blocks.top(); curblock->append(with.cast()); } else { fprintf(stderr, "Something TERRIBLE happened! No matching with block found for WITH_CLEANUP at %d\n", curpos); } } break; case Pyc::WITH_CLEANUP_FINISH: /* Ignore this */ break; case Pyc::SETUP_EXCEPT_A: { if (curblock->blktype() == ASTBlock::BLK_CONTAINER) { curblock.cast()->setExcept(pos+operand); } else { PycRef next = new ASTContainerBlock(0, pos+operand); blocks.push(next.cast()); } /* Store the current stack for the except/finally statement(s) */ stack_hist.push(stack); PycRef tryblock = new ASTBlock(ASTBlock::BLK_TRY, pos+operand, true); blocks.push(tryblock.cast()); curblock = blocks.top(); need_try = false; } break; case Pyc::SETUP_FINALLY_A: { PycRef next = new ASTContainerBlock(pos+operand); blocks.push(next.cast()); curblock = blocks.top(); need_try = true; } break; case Pyc::SETUP_LOOP_A: { PycRef next = new ASTCondBlock(ASTBlock::BLK_WHILE, pos+operand, NULL, false); blocks.push(next.cast()); curblock = blocks.top(); } break; case Pyc::SLICE_0: { PycRef name = stack.top(); stack.pop(); PycRef slice = new ASTSlice(ASTSlice::SLICE0); stack.push(new ASTSubscr(name, slice)); } break; case Pyc::SLICE_1: { PycRef lower = stack.top(); stack.pop(); PycRef name = stack.top(); stack.pop(); PycRef slice = new ASTSlice(ASTSlice::SLICE1, lower); stack.push(new ASTSubscr(name, slice)); } break; case Pyc::SLICE_2: { PycRef upper = stack.top(); stack.pop(); PycRef name = stack.top(); stack.pop(); PycRef slice = new ASTSlice(ASTSlice::SLICE2, NULL, upper); stack.push(new ASTSubscr(name, slice)); } break; case Pyc::SLICE_3: { PycRef upper = stack.top(); stack.pop(); PycRef lower = stack.top(); stack.pop(); PycRef name = stack.top(); stack.pop(); PycRef slice = new ASTSlice(ASTSlice::SLICE3, lower, upper); stack.push(new ASTSubscr(name, slice)); } break; case Pyc::STORE_ATTR_A: { if (unpack) { PycRef name = stack.top(); stack.pop(); PycRef attr = new ASTBinary(name, new ASTName(code->getName(operand)), ASTBinary::BIN_ATTR); PycRef tup = stack.top(); if (tup.type() == ASTNode::NODE_TUPLE) tup.cast()->add(attr); else fputs("Something TERRIBLE happened!\n", stderr); if (--unpack <= 0) { stack.pop(); PycRef seq = stack.top(); stack.pop(); if (seq.type() == ASTNode::NODE_CHAINSTORE) { append_to_chain_store(seq, tup, stack, curblock); } else { curblock->append(new ASTStore(seq, tup)); } } } else { PycRef name = stack.top(); stack.pop(); PycRef value = stack.top(); stack.pop(); PycRef attr = new ASTBinary(name, new ASTName(code->getName(operand)), ASTBinary::BIN_ATTR); if (value.type() == ASTNode::NODE_CHAINSTORE) { append_to_chain_store(value, attr, stack, curblock); } else { curblock->append(new ASTStore(value, attr)); } } } break; case Pyc::STORE_DEREF_A: { if (unpack) { PycRef name = new ASTName(code->getCellVar(mod, operand)); PycRef tup = stack.top(); if (tup.type() == ASTNode::NODE_TUPLE) tup.cast()->add(name); else fputs("Something TERRIBLE happened!\n", stderr); if (--unpack <= 0) { stack.pop(); PycRef seq = stack.top(); stack.pop(); if (seq.type() == ASTNode::NODE_CHAINSTORE) { append_to_chain_store(seq, tup, stack, curblock); } else { curblock->append(new ASTStore(seq, tup)); } } } else { PycRef value = stack.top(); stack.pop(); PycRef name = new ASTName(code->getCellVar(mod, operand)); if (value.type() == ASTNode::NODE_CHAINSTORE) { append_to_chain_store(value, name, stack, curblock); } else { curblock->append(new ASTStore(value, name)); } } } break; case Pyc::STORE_FAST_A: { if (unpack) { PycRef name; if (mod->verCompare(1, 3) < 0) name = new ASTName(code->getName(operand)); else name = new ASTName(code->getLocal(operand)); PycRef tup = stack.top(); if (tup.type() == ASTNode::NODE_TUPLE) tup.cast()->add(name); else fputs("Something TERRIBLE happened!\n", stderr); if (--unpack <= 0) { stack.pop(); PycRef seq = stack.top(); stack.pop(); if (curblock->blktype() == ASTBlock::BLK_FOR && !curblock->inited()) { PycRef tuple = tup.try_cast(); if (tuple != NULL) tuple->setRequireParens(false); curblock.cast()->setIndex(tup); } else if (seq.type() == ASTNode::NODE_CHAINSTORE) { append_to_chain_store(seq, tup, stack, curblock); } else { curblock->append(new ASTStore(seq, tup)); } } } else { PycRef value = stack.top(); stack.pop(); PycRef name; if (mod->verCompare(1, 3) < 0) name = new ASTName(code->getName(operand)); else name = new ASTName(code->getLocal(operand)); if (name.cast()->name()->value()[0] == '_' && name.cast()->name()->value()[1] == '[') { /* Don't show stores of list comp append objects. */ break; } if (curblock->blktype() == ASTBlock::BLK_FOR && !curblock->inited()) { curblock.cast()->setIndex(name); } else if (curblock->blktype() == ASTBlock::BLK_WITH && !curblock->inited()) { curblock.cast()->setExpr(value); curblock.cast()->setVar(name); } else if (value.type() == ASTNode::NODE_CHAINSTORE) { append_to_chain_store(value, name, stack, curblock); } else { curblock->append(new ASTStore(value, name)); } } } break; case Pyc::STORE_GLOBAL_A: { PycRef name = new ASTName(code->getName(operand)); if (unpack) { PycRef tup = stack.top(); if (tup.type() == ASTNode::NODE_TUPLE) tup.cast()->add(name); else fputs("Something TERRIBLE happened!\n", stderr); if (--unpack <= 0) { stack.pop(); PycRef seq = stack.top(); stack.pop(); if (curblock->blktype() == ASTBlock::BLK_FOR && !curblock->inited()) { PycRef tuple = tup.try_cast(); if (tuple != NULL) tuple->setRequireParens(false); curblock.cast()->setIndex(tup); } else if (seq.type() == ASTNode::NODE_CHAINSTORE) { append_to_chain_store(seq, tup, stack, curblock); } else { curblock->append(new ASTStore(seq, tup)); } } } else { PycRef value = stack.top(); stack.pop(); if (value.type() == ASTNode::NODE_CHAINSTORE) { append_to_chain_store(value, name, stack, curblock); } else { curblock->append(new ASTStore(value, name)); } } /* Mark the global as used */ code->markGlobal(name.cast()->name()); } break; case Pyc::STORE_NAME_A: { if (unpack) { PycRef name = new ASTName(code->getName(operand)); PycRef tup = stack.top(); if (tup.type() == ASTNode::NODE_TUPLE) tup.cast()->add(name); else fputs("Something TERRIBLE happened!\n", stderr); if (--unpack <= 0) { stack.pop(); PycRef seq = stack.top(); stack.pop(); if (curblock->blktype() == ASTBlock::BLK_FOR && !curblock->inited()) { PycRef tuple = tup.try_cast(); if (tuple != NULL) tuple->setRequireParens(false); curblock.cast()->setIndex(tup); } else if (seq.type() == ASTNode::NODE_CHAINSTORE) { append_to_chain_store(seq, tup, stack, curblock); } else { curblock->append(new ASTStore(seq, tup)); } } } else { PycRef value = stack.top(); stack.pop(); PycRef varname = code->getName(operand); if (varname->length() >= 2 && varname->value()[0] == '_' && varname->value()[1] == '[') { /* Don't show stores of list comp append objects. */ break; } // Return private names back to their original name const std::string class_prefix = std::string("_") + code->name()->strValue(); if (varname->startsWith(class_prefix + std::string("__"))) varname->setValue(varname->strValue().substr(class_prefix.size())); PycRef name = new ASTName(varname); if (curblock->blktype() == ASTBlock::BLK_FOR && !curblock->inited()) { curblock.cast()->setIndex(name); } else if (stack.top().type() == ASTNode::NODE_IMPORT) { PycRef import = stack.top().cast(); import->add_store(new ASTStore(value, name)); } else if (curblock->blktype() == ASTBlock::BLK_WITH && !curblock->inited()) { curblock.cast()->setExpr(value); curblock.cast()->setVar(name); } else if (value.type() == ASTNode::NODE_CHAINSTORE) { append_to_chain_store(value, name, stack, curblock); } else { curblock->append(new ASTStore(value, name)); if (value.type() == ASTNode::NODE_INVALID) break; } } } break; case Pyc::STORE_SLICE_0: { PycRef dest = stack.top(); stack.pop(); PycRef value = stack.top(); stack.pop(); curblock->append(new ASTStore(value, new ASTSubscr(dest, new ASTSlice(ASTSlice::SLICE0)))); } break; case Pyc::STORE_SLICE_1: { PycRef upper = stack.top(); stack.pop(); PycRef dest = stack.top(); stack.pop(); PycRef value = stack.top(); stack.pop(); curblock->append(new ASTStore(value, new ASTSubscr(dest, new ASTSlice(ASTSlice::SLICE1, upper)))); } break; case Pyc::STORE_SLICE_2: { PycRef lower = stack.top(); stack.pop(); PycRef dest = stack.top(); stack.pop(); PycRef value = stack.top(); stack.pop(); curblock->append(new ASTStore(value, new ASTSubscr(dest, new ASTSlice(ASTSlice::SLICE2, NULL, lower)))); } break; case Pyc::STORE_SLICE_3: { PycRef lower = stack.top(); stack.pop(); PycRef upper = stack.top(); stack.pop(); PycRef dest = stack.top(); stack.pop(); PycRef value = stack.top(); stack.pop(); curblock->append(new ASTStore(value, new ASTSubscr(dest, new ASTSlice(ASTSlice::SLICE3, upper, lower)))); } break; case Pyc::STORE_SUBSCR: { if (unpack) { PycRef subscr = stack.top(); stack.pop(); PycRef dest = stack.top(); stack.pop(); PycRef save = new ASTSubscr(dest, subscr); PycRef tup = stack.top(); if (tup.type() == ASTNode::NODE_TUPLE) tup.cast()->add(save); else fputs("Something TERRIBLE happened!\n", stderr); if (--unpack <= 0) { stack.pop(); PycRef seq = stack.top(); stack.pop(); if (seq.type() == ASTNode::NODE_CHAINSTORE) { append_to_chain_store(seq, tup, stack, curblock); } else { curblock->append(new ASTStore(seq, tup)); } } } else { PycRef subscr = stack.top(); stack.pop(); PycRef dest = stack.top(); stack.pop(); PycRef src = stack.top(); stack.pop(); // If variable annotations are enabled, we'll need to check for them here. // Python handles a varaible annotation by setting: // __annotations__['var-name'] = type const bool found_annotated_var = (variable_annotations && dest->type() == ASTNode::Type::NODE_NAME && dest.cast()->name()->isEqual("__annotations__")); if (found_annotated_var) { // Annotations can be done alone or as part of an assignment. // In the case of an assignment, we'll see a NODE_STORE on the stack. if (!curblock->nodes().empty() && curblock->nodes().back()->type() == ASTNode::Type::NODE_STORE) { // Replace the existing NODE_STORE with a new one that includes the annotation. PycRef store = curblock->nodes().back().cast(); curblock->removeLast(); curblock->append(new ASTStore(store->src(), new ASTAnnotatedVar(subscr, src))); } else { curblock->append(new ASTAnnotatedVar(subscr, src)); } } else { if (dest.type() == ASTNode::NODE_MAP) { dest.cast()->add(subscr, src); } else if (src.type() == ASTNode::NODE_CHAINSTORE) { append_to_chain_store(src, new ASTSubscr(dest, subscr), stack, curblock); } else { curblock->append(new ASTStore(src, new ASTSubscr(dest, subscr))); } } } } break; case Pyc::UNARY_CALL: { PycRef func = stack.top(); stack.pop(); stack.push(new ASTCall(func, ASTCall::pparam_t(), ASTCall::kwparam_t())); } break; case Pyc::UNARY_CONVERT: { PycRef name = stack.top(); stack.pop(); stack.push(new ASTConvert(name)); } break; case Pyc::UNARY_INVERT: { PycRef arg = stack.top(); stack.pop(); stack.push(new ASTUnary(arg, ASTUnary::UN_INVERT)); } break; case Pyc::UNARY_NEGATIVE: { PycRef arg = stack.top(); stack.pop(); stack.push(new ASTUnary(arg, ASTUnary::UN_NEGATIVE)); } break; case Pyc::UNARY_NOT: { PycRef arg = stack.top(); stack.pop(); stack.push(new ASTUnary(arg, ASTUnary::UN_NOT)); } break; case Pyc::UNARY_POSITIVE: { PycRef arg = stack.top(); stack.pop(); stack.push(new ASTUnary(arg, ASTUnary::UN_POSITIVE)); } break; case Pyc::UNPACK_LIST_A: case Pyc::UNPACK_TUPLE_A: case Pyc::UNPACK_SEQUENCE_A: { unpack = operand; if (unpack > 0) { ASTTuple::value_t vals; stack.push(new ASTTuple(vals)); } else { // Unpack zero values and assign it to top of stack or for loop variable. // E.g. [] = TOS / for [] in X ASTTuple::value_t vals; auto tup = new ASTTuple(vals); if (curblock->blktype() == ASTBlock::BLK_FOR && !curblock->inited()) { tup->setRequireParens(true); curblock.cast()->setIndex(tup); } else if (stack.top().type() == ASTNode::NODE_CHAINSTORE) { auto chainStore = stack.top(); stack.pop(); append_to_chain_store(chainStore, tup, stack, curblock); } else { curblock->append(new ASTStore(stack.top(), tup)); stack.pop(); } } } break; case Pyc::YIELD_FROM: { PycRef dest = stack.top(); stack.pop(); // TODO: Support yielding into a non-null destination PycRef value = stack.top(); if (value) { value->setProcessed(); curblock->append(new ASTReturn(value, ASTReturn::YIELD_FROM)); } } break; case Pyc::YIELD_VALUE: case Pyc::INSTRUMENTED_YIELD_VALUE_A: { PycRef value = stack.top(); stack.pop(); curblock->append(new ASTReturn(value, ASTReturn::YIELD)); } break; case Pyc::SETUP_ANNOTATIONS: variable_annotations = true; break; case Pyc::PRECALL_A: case Pyc::RESUME_A: case Pyc::INSTRUMENTED_RESUME_A: /* We just entirely ignore this / no-op */ break; case Pyc::CACHE: /* These "fake" opcodes are used as placeholders for optimizing certain opcodes in Python 3.11+. Since we have no need for that during disassembly/decompilation, we can just treat these as no-ops. */ break; case Pyc::PUSH_NULL: stack.push(nullptr); break; case Pyc::GEN_START_A: stack.pop(); break; case Pyc::SWAP_A: { unpack = operand; ASTTuple::value_t values; ASTTuple::value_t next_tuple; values.resize(operand); for (int i = 0; i < operand; i++) { values[operand - i - 1] = stack.top(); stack.pop(); } auto tup = new ASTTuple(values); tup->setRequireParens(false); auto next_tup = new ASTTuple(next_tuple); next_tup->setRequireParens(false); stack.push(tup); stack.push(next_tup); } break; case Pyc::BINARY_SLICE: { PycRef end = stack.top(); stack.pop(); PycRef start = stack.top(); stack.pop(); PycRef dest = stack.top(); stack.pop(); if (start.type() == ASTNode::NODE_OBJECT && start.cast()->object() == Pyc_None) { start = NULL; } if (end.type() == ASTNode::NODE_OBJECT && end.cast()->object() == Pyc_None) { end = NULL; } PycRef slice; if (start == NULL && end == NULL) { slice = new ASTSlice(ASTSlice::SLICE0); } else if (start == NULL) { slice = new ASTSlice(ASTSlice::SLICE2, start, end); } else if (end == NULL) { slice = new ASTSlice(ASTSlice::SLICE1, start, end); } else { slice = new ASTSlice(ASTSlice::SLICE3, start, end); } stack.push(new ASTSubscr(dest, slice)); } break; case Pyc::STORE_SLICE: { PycRef end = stack.top(); stack.pop(); PycRef start = stack.top(); stack.pop(); PycRef dest = stack.top(); stack.pop(); PycRef values = stack.top(); stack.pop(); if (start.type() == ASTNode::NODE_OBJECT && start.cast()->object() == Pyc_None) { start = NULL; } if (end.type() == ASTNode::NODE_OBJECT && end.cast()->object() == Pyc_None) { end = NULL; } PycRef slice; if (start == NULL && end == NULL) { slice = new ASTSlice(ASTSlice::SLICE0); } else if (start == NULL) { slice = new ASTSlice(ASTSlice::SLICE2, start, end); } else if (end == NULL) { slice = new ASTSlice(ASTSlice::SLICE1, start, end); } else { slice = new ASTSlice(ASTSlice::SLICE3, start, end); } curblock->append(new ASTStore(values, new ASTSubscr(dest, slice))); } break; case Pyc::COPY_A: { PycRef value = stack.top(operand); stack.push(value); } break; case Pyc::MATCH_CLASS_A: { /* 3.11 `match`/`case` class pattern. The match pre-scan registered this as a SIMPLE handleable case (positional captures only, no guard, no kw patterns); anything else is unregistered -> bail. */ auto mci = matchCase.find(curpos); if (mci == matchCase.end()) { fprintf(stderr, "Unsupported opcode: %s (%d)\n", Pyc::OpcodeName(opcode), opcode); cleanBuild = false; return new ASTNodeList(defblock->nodes()); } const MCase& mc = mci->second; /* stack: [..., (leftover subject copies), subject, class, kwnames] */ stack.pop(); // kwnames (empty tuple) PycRef classnode = stack.top(); stack.pop(); PycRef subject = stack.top(); stack.pop(); for (int k = 0; k < mc.popExtra; ++k) // matched-path leftover-subject pops if (!stack.empty()) stack.pop(); /* pattern renders like a call: ClassName(cap0, cap1, …) */ ASTCall::pparam_t pparams; for (const auto& c : mc.caps) pparams.push_back(c); PycRef pattern = new ASTCall(classnode, pparams, ASTCall::kwparam_t()); if (mc.isFirst) { blocks.push(new ASTMatchBlock(mc.matchEnd, subject)); curblock = blocks.top(); } blocks.push(new ASTCaseBlock(mc.failTarget, pattern)); curblock = blocks.top(); curblock->init(); /* skip the whole case-test machinery; the body reconstructs normally and the BLK_CASE/BLK_MATCH close at their ends. */ source.setPos(mc.bodyStart); pos = mc.bodyStart; } break; default: fprintf(stderr, "Unsupported opcode: %s (%d)\n", Pyc::OpcodeName(opcode), opcode); cleanBuild = false; return new ASTNodeList(defblock->nodes()); } else_pop = ( (curblock->blktype() == ASTBlock::BLK_ELSE) || (curblock->blktype() == ASTBlock::BLK_IF) || (curblock->blktype() == ASTBlock::BLK_ELIF) ) && (curblock->end() == pos); } if (stack_hist.size()) { fputs("Warning: Stack history is not empty!\n", stderr); while (stack_hist.size()) { stack_hist.pop(); } } if (blocks.size() > 1) { fputs("Warning: block stack is not empty!\n", stderr); while (blocks.size() > 1) { PycRef tmp = blocks.top(); blocks.pop(); blocks.top()->append(tmp.cast()); } } cleanBuild = true; return new ASTNodeList(defblock->nodes()); } static void append_to_chain_store(const PycRef &chainStore, PycRef item, FastStack& stack, const PycRef& curblock) { stack.pop(); // ignore identical source object. chainStore.cast()->append(item); if (stack.top().type() == PycObject::TYPE_NULL) { curblock->append(chainStore); } else { stack.push(chainStore); } } static int cmp_prec(PycRef parent, PycRef child) { /* Determine whether the parent has higher precedence than therefore child, so we don't flood the source code with extraneous parens. Else we'd have expressions like (((a + b) + c) + d) when therefore equivalent, a + b + c + d would suffice. */ if (parent.type() == ASTNode::NODE_UNARY && parent.cast()->op() == ASTUnary::UN_NOT) return 1; // Always parenthesize not(x) if (child.type() == ASTNode::NODE_BINARY) { PycRef binChild = child.cast(); if (parent.type() == ASTNode::NODE_BINARY) { PycRef binParent = parent.cast(); if (binParent->right() == child) { if (binParent->op() == ASTBinary::BIN_SUBTRACT && binChild->op() == ASTBinary::BIN_ADD) return 1; else if (binParent->op() == ASTBinary::BIN_DIVIDE && binChild->op() == ASTBinary::BIN_MULTIPLY) return 1; } return binChild->op() - binParent->op(); } else if (parent.type() == ASTNode::NODE_COMPARE) return (binChild->op() == ASTBinary::BIN_LOG_AND || binChild->op() == ASTBinary::BIN_LOG_OR) ? 1 : -1; else if (parent.type() == ASTNode::NODE_UNARY) return (binChild->op() == ASTBinary::BIN_POWER) ? -1 : 1; } else if (child.type() == ASTNode::NODE_UNARY) { PycRef unChild = child.cast(); if (parent.type() == ASTNode::NODE_BINARY) { PycRef binParent = parent.cast(); if (binParent->op() == ASTBinary::BIN_LOG_AND || binParent->op() == ASTBinary::BIN_LOG_OR) return -1; else if (unChild->op() == ASTUnary::UN_NOT) return 1; else if (binParent->op() == ASTBinary::BIN_POWER) return 1; else return -1; } else if (parent.type() == ASTNode::NODE_COMPARE) { return (unChild->op() == ASTUnary::UN_NOT) ? 1 : -1; } else if (parent.type() == ASTNode::NODE_UNARY) { return unChild->op() - parent.cast()->op(); } } else if (child.type() == ASTNode::NODE_COMPARE) { PycRef cmpChild = child.cast(); if (parent.type() == ASTNode::NODE_BINARY) return (parent.cast()->op() == ASTBinary::BIN_LOG_AND || parent.cast()->op() == ASTBinary::BIN_LOG_OR) ? -1 : 1; else if (parent.type() == ASTNode::NODE_COMPARE) return cmpChild->op() - parent.cast()->op(); else if (parent.type() == ASTNode::NODE_UNARY) return (parent.cast()->op() == ASTUnary::UN_NOT) ? -1 : 1; } /* For normal nodes, don't parenthesize anything */ return -1; } static void print_ordered(PycRef parent, PycRef child, PycModule* mod, std::ostream& pyc_output) { if (child.type() == ASTNode::NODE_BINARY || child.type() == ASTNode::NODE_COMPARE) { if (cmp_prec(parent, child) > 0) { pyc_output << "("; print_src(child, mod, pyc_output); pyc_output << ")"; } else { print_src(child, mod, pyc_output); } } else if (child.type() == ASTNode::NODE_UNARY) { if (cmp_prec(parent, child) > 0) { pyc_output << "("; print_src(child, mod, pyc_output); pyc_output << ")"; } else { print_src(child, mod, pyc_output); } } else { print_src(child, mod, pyc_output); } } static void start_line(int indent, std::ostream& pyc_output) { if (inLambda) return; for (int i=0; i blk, PycModule* mod, std::ostream& pyc_output) { ASTBlock::list_t lines = blk->nodes(); if (lines.size() == 0) { PycRef pass = new ASTKeyword(ASTKeyword::KW_PASS); start_line(cur_indent, pyc_output); print_src(pass, mod, pyc_output); } for (auto ln = lines.cbegin(); ln != lines.cend();) { if ((*ln).cast().type() != ASTNode::NODE_NODELIST) { start_line(cur_indent, pyc_output); } print_src(*ln, mod, pyc_output); if (++ln != lines.end()) { end_line(pyc_output); } } } void print_formatted_value(PycRef formatted_value, PycModule* mod, std::ostream& pyc_output) { pyc_output << "{"; print_src(formatted_value->val(), mod, pyc_output); switch (formatted_value->conversion() & ASTFormattedValue::CONVERSION_MASK) { case ASTFormattedValue::NONE: break; case ASTFormattedValue::STR: pyc_output << "!s"; break; case ASTFormattedValue::REPR: pyc_output << "!r"; break; case ASTFormattedValue::ASCII: pyc_output << "!a"; break; } if (formatted_value->conversion() & ASTFormattedValue::HAVE_FMT_SPEC) { pyc_output << ":" << formatted_value->format_spec().cast()->object().cast()->value(); } pyc_output << "}"; } static std::unordered_set node_seen; void print_src(PycRef node, PycModule* mod, std::ostream& pyc_output) { if (node == NULL) { pyc_output << "None"; cleanBuild = true; return; } if (node_seen.find((ASTNode *)node) != node_seen.end()) { fputs("WARNING: Circular reference detected\n", stderr); return; } node_seen.insert((ASTNode *)node); switch (node->type()) { case ASTNode::NODE_BINARY: case ASTNode::NODE_COMPARE: { PycRef bin = node.cast(); print_ordered(node, bin->left(), mod, pyc_output); pyc_output << bin->op_str(); print_ordered(node, bin->right(), mod, pyc_output); } break; case ASTNode::NODE_UNARY: { PycRef un = node.cast(); pyc_output << un->op_str(); print_ordered(node, un->operand(), mod, pyc_output); } break; case ASTNode::NODE_CALL: { PycRef call = node.cast(); print_src(call->func(), mod, pyc_output); pyc_output << "("; bool first = true; for (const auto& param : call->pparams()) { if (!first) pyc_output << ", "; print_src(param, mod, pyc_output); first = false; } for (const auto& param : call->kwparams()) { if (!first) pyc_output << ", "; if (param.first.type() == ASTNode::NODE_NAME) { pyc_output << param.first.cast()->name()->value() << " = "; } else { PycRef str_name = param.first.cast()->object().cast(); pyc_output << str_name->value() << " = "; } print_src(param.second, mod, pyc_output); first = false; } if (call->hasVar()) { if (!first) pyc_output << ", "; pyc_output << "*"; print_src(call->var(), mod, pyc_output); first = false; } if (call->hasKW()) { if (!first) pyc_output << ", "; pyc_output << "**"; print_src(call->kw(), mod, pyc_output); first = false; } pyc_output << ")"; } break; case ASTNode::NODE_DELETE: { pyc_output << "del "; print_src(node.cast()->value(), mod, pyc_output); } break; case ASTNode::NODE_EXEC: { PycRef exec = node.cast(); pyc_output << "exec "; print_src(exec->statement(), mod, pyc_output); if (exec->globals() != NULL) { pyc_output << " in "; print_src(exec->globals(), mod, pyc_output); if (exec->locals() != NULL && exec->globals() != exec->locals()) { pyc_output << ", "; print_src(exec->locals(), mod, pyc_output); } } } break; case ASTNode::NODE_FORMATTEDVALUE: pyc_output << "f" F_STRING_QUOTE; print_formatted_value(node.cast(), mod, pyc_output); pyc_output << F_STRING_QUOTE; break; case ASTNode::NODE_JOINEDSTR: pyc_output << "f" F_STRING_QUOTE; for (const auto& val : node.cast()->values()) { switch (val.type()) { case ASTNode::NODE_FORMATTEDVALUE: print_formatted_value(val.cast(), mod, pyc_output); break; case ASTNode::NODE_OBJECT: // When printing a piece of the f-string, keep the quote style consistent. // This avoids problems when ''' or """ is part of the string. print_const(pyc_output, val.cast()->object(), mod, F_STRING_QUOTE); break; default: fprintf(stderr, "Unsupported node type %d in NODE_JOINEDSTR\n", val.type()); } } pyc_output << F_STRING_QUOTE; break; case ASTNode::NODE_KEYWORD: pyc_output << node.cast()->word_str(); break; case ASTNode::NODE_LIST: { pyc_output << "["; bool first = true; cur_indent++; for (const auto& val : node.cast()->values()) { if (first) pyc_output << "\n"; else pyc_output << ",\n"; start_line(cur_indent, pyc_output); print_src(val, mod, pyc_output); first = false; } cur_indent--; pyc_output << "]"; } break; case ASTNode::NODE_SET: { pyc_output << "{"; bool first = true; cur_indent++; for (const auto& val : node.cast()->values()) { if (first) pyc_output << "\n"; else pyc_output << ",\n"; start_line(cur_indent, pyc_output); print_src(val, mod, pyc_output); first = false; } cur_indent--; pyc_output << "}"; } break; case ASTNode::NODE_COMPREHENSION: { PycRef comp = node.cast(); pyc_output << "[ "; print_src(comp->result(), mod, pyc_output); for (const auto& gen : comp->generators()) { pyc_output << " for "; print_src(gen->index(), mod, pyc_output); pyc_output << " in "; print_src(gen->iter(), mod, pyc_output); if (gen->condition()) { pyc_output << " if "; print_src(gen->condition(), mod, pyc_output); } } pyc_output << " ]"; } break; case ASTNode::NODE_MAP: { pyc_output << "{"; bool first = true; cur_indent++; for (const auto& val : node.cast()->values()) { if (first) pyc_output << "\n"; else pyc_output << ",\n"; start_line(cur_indent, pyc_output); print_src(val.first, mod, pyc_output); pyc_output << ": "; print_src(val.second, mod, pyc_output); first = false; } cur_indent--; pyc_output << " }"; } break; case ASTNode::NODE_CONST_MAP: { PycRef const_map = node.cast(); PycTuple::value_t keys = const_map->keys().cast()->object().cast()->values(); ASTConstMap::values_t values = const_map->values(); auto map = new ASTMap; for (const auto& key : keys) { // Values are pushed onto the stack in reverse order. PycRef value = values.back(); values.pop_back(); map->add(new ASTObject(key), value); } print_src(map, mod, pyc_output); } break; case ASTNode::NODE_NAME: pyc_output << node.cast()->name()->value(); break; case ASTNode::NODE_NODELIST: { cur_indent++; for (const auto& ln : node.cast()->nodes()) { if (ln.cast().type() != ASTNode::NODE_NODELIST) { start_line(cur_indent, pyc_output); } print_src(ln, mod, pyc_output); end_line(pyc_output); } cur_indent--; } break; case ASTNode::NODE_BLOCK: { PycRef blk = node.cast(); if (blk->blktype() == ASTBlock::BLK_ELSE && blk->size() == 0) break; if (blk->blktype() == ASTBlock::BLK_CONTAINER) { end_line(pyc_output); print_block(blk, mod, pyc_output); end_line(pyc_output); break; } pyc_output << blk->type_str(); if (blk->blktype() == ASTBlock::BLK_IF || blk->blktype() == ASTBlock::BLK_ELIF || blk->blktype() == ASTBlock::BLK_WHILE) { if (blk.cast()->negative()) pyc_output << " not "; else pyc_output << " "; print_src(blk.cast()->cond(), mod, pyc_output); } else if (blk->blktype() == ASTBlock::BLK_FOR || blk->blktype() == ASTBlock::BLK_ASYNCFOR) { pyc_output << " "; print_src(blk.cast()->index(), mod, pyc_output); pyc_output << " in "; print_src(blk.cast()->iter(), mod, pyc_output); } else if (blk->blktype() == ASTBlock::BLK_MATCH) { pyc_output << " "; print_src(blk.cast()->subject(), mod, pyc_output); } else if (blk->blktype() == ASTBlock::BLK_CASE) { pyc_output << " "; print_src(blk.cast()->pattern(), mod, pyc_output); } else if (blk->blktype() == ASTBlock::BLK_EXCEPT && blk.cast()->cond() != NULL) { pyc_output << " "; print_src(blk.cast()->cond(), mod, pyc_output); } else if (blk->blktype() == ASTBlock::BLK_WITH) { pyc_output << " "; print_src(blk.cast()->expr(), mod, pyc_output); PycRef var = blk.try_cast()->var(); if (var != NULL) { pyc_output << " as "; print_src(var, mod, pyc_output); } } pyc_output << ":\n"; cur_indent++; print_block(blk, mod, pyc_output); cur_indent--; } break; case ASTNode::NODE_OBJECT: { PycRef obj = node.cast()->object(); if (obj.type() == PycObject::TYPE_CODE) { PycRef code = obj.cast(); decompyle(code, mod, pyc_output); } else { print_const(pyc_output, obj, mod); } } break; case ASTNode::NODE_PRINT: { pyc_output << "print "; bool first = true; if (node.cast()->stream() != nullptr) { pyc_output << ">>"; print_src(node.cast()->stream(), mod, pyc_output); first = false; } for (const auto& val : node.cast()->values()) { if (!first) pyc_output << ", "; print_src(val, mod, pyc_output); first = false; } if (!node.cast()->eol()) pyc_output << ","; } break; case ASTNode::NODE_RAISE: { PycRef raise = node.cast(); pyc_output << "raise "; bool first = true; for (const auto& param : raise->params()) { if (!first) pyc_output << ", "; print_src(param, mod, pyc_output); first = false; } } break; case ASTNode::NODE_RETURN: { PycRef ret = node.cast(); PycRef value = ret->value(); if (!inLambda) { switch (ret->rettype()) { case ASTReturn::RETURN: pyc_output << "return "; break; case ASTReturn::YIELD: pyc_output << "yield "; break; case ASTReturn::YIELD_FROM: if (value.type() == ASTNode::NODE_AWAITABLE) { pyc_output << "await "; value = value.cast()->expression(); } else { pyc_output << "yield from "; } break; } } print_src(value, mod, pyc_output); } break; case ASTNode::NODE_SLICE: { PycRef slice = node.cast(); if (slice->op() & ASTSlice::SLICE1) { print_src(slice->left(), mod, pyc_output); } pyc_output << ":"; if (slice->op() & ASTSlice::SLICE2) { print_src(slice->right(), mod, pyc_output); } } break; case ASTNode::NODE_IMPORT: { PycRef import = node.cast(); if (import->stores().size()) { ASTImport::list_t stores = import->stores(); pyc_output << "from "; if (import->name().type() == ASTNode::NODE_IMPORT) print_src(import->name().cast()->name(), mod, pyc_output); else print_src(import->name(), mod, pyc_output); pyc_output << " import "; if (stores.size() == 1) { auto src = stores.front()->src(); auto dest = stores.front()->dest(); print_src(src, mod, pyc_output); if (src.cast()->name()->value() != dest.cast()->name()->value()) { pyc_output << " as "; print_src(dest, mod, pyc_output); } } else { bool first = true; for (const auto& st : stores) { if (!first) pyc_output << ", "; print_src(st->src(), mod, pyc_output); first = false; if (st->src().cast()->name()->value() != st->dest().cast()->name()->value()) { pyc_output << " as "; print_src(st->dest(), mod, pyc_output); } } } } else { pyc_output << "import "; print_src(import->name(), mod, pyc_output); } } break; case ASTNode::NODE_FUNCTION: { /* Actual named functions are NODE_STORE with a name */ pyc_output << "(lambda "; PycRef code = node.cast()->code(); PycRef code_src = code.cast()->object().cast(); ASTFunction::defarg_t defargs = node.cast()->defargs(); ASTFunction::defarg_t kwdefargs = node.cast()->kwdefargs(); auto da = defargs.cbegin(); int narg = 0; for (int i=0; iargCount(); i++) { if (narg) pyc_output << ", "; pyc_output << code_src->getLocal(narg++)->value(); if ((code_src->argCount() - i) <= (int)defargs.size()) { pyc_output << " = "; print_src(*da++, mod, pyc_output); } } da = kwdefargs.cbegin(); if (code_src->kwOnlyArgCount() != 0) { pyc_output << (narg == 0 ? "*" : ", *"); for (int i = 0; i < code_src->argCount(); i++) { pyc_output << ", "; pyc_output << code_src->getLocal(narg++)->value(); if ((code_src->kwOnlyArgCount() - i) <= (int)kwdefargs.size()) { pyc_output << " = "; print_src(*da++, mod, pyc_output); } } } pyc_output << ": "; inLambda = true; print_src(code, mod, pyc_output); inLambda = false; pyc_output << ")"; } break; case ASTNode::NODE_STORE: { PycRef src = node.cast()->src(); PycRef dest = node.cast()->dest(); if (src.type() == ASTNode::NODE_FUNCTION) { PycRef code = src.cast()->code(); PycRef code_src = code.cast()->object().cast(); bool isLambda = false; if (strcmp(code_src->name()->value(), "") == 0) { pyc_output << "\n"; start_line(cur_indent, pyc_output); print_src(dest, mod, pyc_output); pyc_output << " = lambda "; isLambda = true; } else { pyc_output << "\n"; start_line(cur_indent, pyc_output); if (code_src->flags() & PycCode::CO_COROUTINE) pyc_output << "async "; pyc_output << "def "; print_src(dest, mod, pyc_output); pyc_output << "("; } ASTFunction::defarg_t defargs = src.cast()->defargs(); ASTFunction::defarg_t kwdefargs = src.cast()->kwdefargs(); auto da = defargs.cbegin(); int narg = 0; for (int i = 0; i < code_src->argCount(); ++i) { if (narg) pyc_output << ", "; pyc_output << code_src->getLocal(narg++)->value(); if ((code_src->argCount() - i) <= (int)defargs.size()) { pyc_output << " = "; print_src(*da++, mod, pyc_output); } } da = kwdefargs.cbegin(); if (code_src->kwOnlyArgCount() != 0) { pyc_output << (narg == 0 ? "*" : ", *"); for (int i = 0; i < code_src->kwOnlyArgCount(); ++i) { pyc_output << ", "; pyc_output << code_src->getLocal(narg++)->value(); if ((code_src->kwOnlyArgCount() - i) <= (int)kwdefargs.size()) { pyc_output << " = "; print_src(*da++, mod, pyc_output); } } } if (code_src->flags() & PycCode::CO_VARARGS) { if (narg) pyc_output << ", "; pyc_output << "*" << code_src->getLocal(narg++)->value(); } if (code_src->flags() & PycCode::CO_VARKEYWORDS) { if (narg) pyc_output << ", "; pyc_output << "**" << code_src->getLocal(narg++)->value(); } if (isLambda) { pyc_output << ": "; } else { pyc_output << "):\n"; printDocstringAndGlobals = true; } bool preLambda = inLambda; inLambda |= isLambda; print_src(code, mod, pyc_output); inLambda = preLambda; } else if (src.type() == ASTNode::NODE_CLASS) { pyc_output << "\n"; start_line(cur_indent, pyc_output); pyc_output << "class "; print_src(dest, mod, pyc_output); PycRef bases = src.cast()->bases().cast(); if (bases->values().size() > 0) { pyc_output << "("; bool first = true; for (const auto& val : bases->values()) { if (!first) pyc_output << ", "; print_src(val, mod, pyc_output); first = false; } pyc_output << "):\n"; } else { // Don't put parens if there are no base classes pyc_output << ":\n"; } printClassDocstring = true; PycRef code = src.cast()->code().cast() ->func().cast()->code(); print_src(code, mod, pyc_output); } else if (src.type() == ASTNode::NODE_IMPORT) { PycRef import = src.cast(); if (import->fromlist() != NULL) { PycRef fromlist = import->fromlist().cast()->object(); if (fromlist != Pyc_None) { pyc_output << "from "; if (import->name().type() == ASTNode::NODE_IMPORT) print_src(import->name().cast()->name(), mod, pyc_output); else print_src(import->name(), mod, pyc_output); pyc_output << " import "; if (fromlist.type() == PycObject::TYPE_TUPLE || fromlist.type() == PycObject::TYPE_SMALL_TUPLE) { bool first = true; for (const auto& val : fromlist.cast()->values()) { if (!first) pyc_output << ", "; pyc_output << val.cast()->value(); first = false; } } else { pyc_output << fromlist.cast()->value(); } } else { pyc_output << "import "; print_src(import->name(), mod, pyc_output); } } else { pyc_output << "import "; PycRef import_name = import->name(); print_src(import_name, mod, pyc_output); if (!dest.cast()->name()->isEqual(import_name.cast()->name().cast())) { pyc_output << " as "; print_src(dest, mod, pyc_output); } } } else if (src.type() == ASTNode::NODE_BINARY && src.cast()->is_inplace()) { print_src(src, mod, pyc_output); } else { print_src(dest, mod, pyc_output); pyc_output << " = "; print_src(src, mod, pyc_output); } } break; case ASTNode::NODE_CHAINSTORE: { for (auto& dest : node.cast()->nodes()) { print_src(dest, mod, pyc_output); pyc_output << " = "; } print_src(node.cast()->src(), mod, pyc_output); } break; case ASTNode::NODE_SUBSCR: { print_src(node.cast()->name(), mod, pyc_output); pyc_output << "["; print_src(node.cast()->key(), mod, pyc_output); pyc_output << "]"; } break; case ASTNode::NODE_CONVERT: { pyc_output << "`"; print_src(node.cast()->name(), mod, pyc_output); pyc_output << "`"; } break; case ASTNode::NODE_TUPLE: { PycRef tuple = node.cast(); ASTTuple::value_t values = tuple->values(); if (tuple->requireParens()) pyc_output << "("; bool first = true; for (const auto& val : values) { if (!first) pyc_output << ", "; print_src(val, mod, pyc_output); first = false; } if (values.size() == 1) pyc_output << ','; if (tuple->requireParens()) pyc_output << ')'; } break; case ASTNode::NODE_ANNOTATED_VAR: { PycRef annotated_var = node.cast(); PycRef name = annotated_var->name().cast(); PycRef annotation = annotated_var->annotation(); pyc_output << name->object().cast()->value(); pyc_output << ": "; print_src(annotation, mod, pyc_output); } break; case ASTNode::NODE_TERNARY: { /* parenthesis might be needed * * when if-expr is part of numerical expression, ternary has the LOWEST precedence * print(a + b if False else c) * output is c, not a+c (a+b is calculated first) * * but, let's not add parenthesis - to keep the source as close to original as possible in most cases */ PycRef ternary = node.cast(); //pyc_output << "("; print_src(ternary->if_expr(), mod, pyc_output); const auto if_block = ternary->if_block().cast(); pyc_output << " if "; if (if_block->negative()) pyc_output << "not "; print_src(if_block->cond(), mod, pyc_output); pyc_output << " else "; print_src(ternary->else_expr(), mod, pyc_output); //pyc_output << ")"; } break; default: pyc_output << "type() << ">"; fprintf(stderr, "Unsupported Node type: %d\n", node->type()); cleanBuild = false; node_seen.erase((ASTNode *)node); return; } cleanBuild = true; node_seen.erase((ASTNode *)node); } bool print_docstring(PycRef obj, int indent, PycModule* mod, std::ostream& pyc_output) { // docstrings are translated from the bytecode __doc__ = 'string' to simply '''string''' auto doc = obj.try_cast(); if (doc != nullptr) { start_line(indent, pyc_output); doc->print(pyc_output, mod, true); pyc_output << "\n"; return true; } return false; } static std::unordered_set code_seen; void decompyle(PycRef code, PycModule* mod, std::ostream& pyc_output) { if (code_seen.find((PycCode *)code) != code_seen.end()) { fputs("WARNING: Circular reference detected\n", stderr); return; } code_seen.insert((PycCode *)code); PycRef source = BuildFromCode(code, mod); PycRef clean = source.cast(); if (cleanBuild) { // The Python compiler adds some stuff that we don't really care // about, and would add extra code for re-compilation anyway. // We strip these lines out here, and then add a "pass" statement // if the cleaned up code is empty if (clean->nodes().front().type() == ASTNode::NODE_STORE) { PycRef store = clean->nodes().front().cast(); if (store->src().type() == ASTNode::NODE_NAME && store->dest().type() == ASTNode::NODE_NAME) { PycRef src = store->src().cast(); PycRef dest = store->dest().cast(); if (src->name()->isEqual("__name__") && dest->name()->isEqual("__module__")) { // __module__ = __name__ // Automatically added by Python 2.2.1 and later clean->removeFirst(); } } } if (clean->nodes().front().type() == ASTNode::NODE_STORE) { PycRef store = clean->nodes().front().cast(); if (store->src().type() == ASTNode::NODE_OBJECT && store->dest().type() == ASTNode::NODE_NAME) { PycRef src = store->src().cast(); PycRef srcString = src->object().try_cast(); PycRef dest = store->dest().cast(); if (dest->name()->isEqual("__qualname__")) { // __qualname__ = '' // Automatically added by Python 3.3 and later clean->removeFirst(); } } } // Class and module docstrings may only appear at the beginning of their source if (printClassDocstring && clean->nodes().front().type() == ASTNode::NODE_STORE) { PycRef store = clean->nodes().front().cast(); if (store->dest().type() == ASTNode::NODE_NAME && store->dest().cast()->name()->isEqual("__doc__") && store->src().type() == ASTNode::NODE_OBJECT) { if (print_docstring(store->src().cast()->object(), cur_indent + (code->name()->isEqual("") ? 0 : 1), mod, pyc_output)) clean->removeFirst(); } } if (clean->nodes().back().type() == ASTNode::NODE_RETURN) { PycRef ret = clean->nodes().back().cast(); PycRef retObj = ret->value().try_cast(); if (ret->value() == NULL || ret->value().type() == ASTNode::NODE_LOCALS || (retObj && retObj->object().type() == PycObject::TYPE_NONE)) { clean->removeLast(); // Always an extraneous return statement } } } if (printClassDocstring) printClassDocstring = false; // This is outside the clean check so a source block will always // be compilable, even if decompylation failed. if (clean->nodes().size() == 0 && !code.isIdent(mod->code())) clean->append(new ASTKeyword(ASTKeyword::KW_PASS)); bool part1clean = cleanBuild; if (printDocstringAndGlobals) { if (code->consts()->size()) print_docstring(code->getConst(0), cur_indent + 1, mod, pyc_output); PycCode::globals_t globs = code->getGlobals(); if (globs.size()) { start_line(cur_indent + 1, pyc_output); pyc_output << "global "; bool first = true; for (const auto& glob : globs) { if (!first) pyc_output << ", "; pyc_output << glob->value(); first = false; } pyc_output << "\n"; } printDocstringAndGlobals = false; } print_src(source, mod, pyc_output); if (!cleanBuild || !part1clean) { start_line(cur_indent, pyc_output); pyc_output << "# WARNING: Decompyle incomplete\n"; } code_seen.erase((PycCode *)code); }