Reconstruct 3.11 while-loops (bottom-test optimization)

Python 3.11 compiles 'while cond:' with a bottom test: an entry guard
(eval cond; POP_JUMP_FORWARD_IF_FALSE end) before the body, and a
back-edge (eval cond; POP_JUMP_BACKWARD_IF_TRUE loop_start) at the end.
pycdc rendered both halves as separate 'if' blocks, so EVERY while loop
came out as 'if cond: ... if not cond: pass' and never looped.

- New ScanWhileLoops pre-pass pairs each conditional backward jump with
  the forward guard immediately preceding its target (guard skipping to
  the instruction after the back-edge) to identify genuine while loops.
- At the guard, open a BLK_WHILE with the condition instead of an if.
- At the back-edge, discard the duplicated condition and close the loop,
  but only when a BLK_WHILE is actually open (guard against a
  misidentified back-edge collapsing the block stack and crashing).

Fixes while loops across all files (including nested loops and
continue). decompilation target: 237/239 files, corpus 41/95 (+1: signature; 0 regressions).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
polfg 2026-06-08 12:27:24 +02:00
commit 9c5ea2f2bc
6 changed files with 145 additions and 4 deletions

View file

@ -4,6 +4,7 @@
#include <unordered_set>
#include <map>
#include <vector>
#include <set>
#include "ASTree.h"
#include "FastStack.h"
#include "pyc_numeric.h"
@ -355,6 +356,71 @@ static void ScanTryFinally(PycRef<PycCode> code, PycModule* mod,
}
}
/* Python 3.11 while-loop pre-pass. A while loop with the standard bottom-test
layout compiles to:
<eval cond>; POP_JUMP_FORWARD_IF_FALSE end (entry guard)
loop_start:
<body>
<eval cond>; POP_JUMP_BACKWARD_IF_TRUE loop_start (back-edge)
end:
Without recognition pycdc renders both halves as independent `if` blocks and
never forms the loop (the body's trailing back-edge becomes `if not cond:
pass`). Detect it from a conditional backward jump whose target is the
instruction immediately following a matching forward guard, and whose guard
skips to the instruction right after the back-edge. Record the guard position
(the BLK_WHILE opens there) and the back-edge position (the loop closes
there). */
static void ScanWhileLoops(PycRef<PycCode> code, PycModule* mod,
std::map<int,int>& whileGuardEnd,
std::set<int>& whileBackedges)
{
PycBuffer src(code->code()->value(), code->code()->length());
int opcode, operand, pos = 0;
std::vector<int> order;
std::map<int,int> opAt, operAt, nextAt;
while (!src.atEof()) {
int p = pos;
bc_next(src, mod, opcode, operand, pos);
order.push_back(p);
opAt[p] = opcode; operAt[p] = operand; nextAt[p] = pos;
}
std::map<int,int> instrEndingAt; /* nextpos -> pos */
for (int p : order)
instrEndingAt[nextAt[p]] = p;
auto isCondBack = [](int op){
return op == Pyc::POP_JUMP_BACKWARD_IF_TRUE_A
|| op == Pyc::POP_JUMP_BACKWARD_IF_FALSE_A
|| op == Pyc::POP_JUMP_BACKWARD_IF_NONE_A
|| op == Pyc::POP_JUMP_BACKWARD_IF_NOT_NONE_A;
};
auto isFwdGuard = [](int op){
return op == Pyc::POP_JUMP_FORWARD_IF_FALSE_A
|| op == Pyc::POP_JUMP_FORWARD_IF_TRUE_A
|| op == Pyc::POP_JUMP_FORWARD_IF_NONE_A
|| op == Pyc::POP_JUMP_FORWARD_IF_NOT_NONE_A;
};
for (int b : order) {
if (!isCondBack(opAt[b]))
continue;
int end = nextAt[b];
int loop_start = end - operAt[b] * 2; /* backward target */
if (loop_start < 0 || loop_start >= b)
continue;
auto git = instrEndingAt.find(loop_start);
if (git == instrEndingAt.end())
continue;
int guard = git->second;
if (!isFwdGuard(opAt[guard]))
continue;
int guard_target = nextAt[guard] + operAt[guard] * 2;
if (guard_target != end) /* guard must skip to loop end */
continue;
whileGuardEnd[guard] = end;
whileBackedges.insert(b);
}
}
PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
{
PycBuffer source(code->code()->value(), code->code()->length());
@ -388,12 +454,17 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
std::map<int, int> finallyEndByStart; /* entry start -> finally end */
std::map<int, int> finallyResumeByEnd; /* finally end -> resume offset */
/* Python 3.11 while-loop reconstruction state. */
std::map<int, int> whileGuardEnd; /* guard pos -> loop end */
std::set<int> whileBackedges; /* back-edge positions */
if (mod->verCompare(3, 11) >= 0) {
exception_entries = code->exceptionTableEntries();
ScanWithBlocks(code, mod, exception_entries,
withBodyEndByBefore, withResumeByBodyEnd);
ScanTryFinally(code, mod, exception_entries,
finallyTryEndByStart, finallyEndByStart, finallyResumeByEnd);
ScanWhileLoops(code, mod, whileGuardEnd, whileBackedges);
}
while (!source.atEof()) {
@ -1655,6 +1726,31 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
case Pyc::INSTRUMENTED_POP_JUMP_IF_FALSE_A:
case Pyc::INSTRUMENTED_POP_JUMP_IF_TRUE_A:
{
/* Python 3.11 while-loop entry guard: open a BLK_WHILE here
instead of an if. The matching back-edge (POP_JUMP_BACKWARD)
closes it. */
auto wit = whileGuardEnd.find(curpos);
if (wit != whileGuardEnd.end()) {
PycRef<ASTNode> wcond = stack.top();
stack.pop();
bool wneg = false;
if (opcode == Pyc::POP_JUMP_FORWARD_IF_NONE_A) {
wcond = new ASTCompare(wcond, new ASTObject(Pyc_None),
ASTCompare::CMP_IS_NOT);
} else if (opcode == Pyc::POP_JUMP_FORWARD_IF_NOT_NONE_A) {
wcond = new ASTCompare(wcond, new ASTObject(Pyc_None),
ASTCompare::CMP_IS);
} else if (opcode == Pyc::POP_JUMP_FORWARD_IF_TRUE_A) {
wneg = true;
}
PycRef<ASTCondBlock> wh = new ASTCondBlock(
ASTBlock::BLK_WHILE, wit->second, wcond, wneg);
wh->init(ASTCondBlock::PRE_POPPED);
blocks.push(wh.cast<ASTBlock>());
curblock = blocks.top();
break;
}
PycRef<ASTNode> cond = stack.top();
PycRef<ASTCondBlock> ifblk;
int popped = ASTCondBlock::UNINITED;
@ -1803,6 +1899,46 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
case Pyc::POP_JUMP_BACKWARD_IF_NONE_A:
case Pyc::POP_JUMP_BACKWARD_IF_NOT_NONE_A:
{
/* Python 3.11 while-loop back-edge: the duplicate condition has
just been evaluated onto the stack; discard it and close the
BLK_WHILE opened at the matching entry guard. Only act when a
BLK_WHILE is actually open (guard against a misidentified
back-edge collapsing the whole block stack). */
if (whileBackedges.count(curpos)) {
bool hasWhile = false;
{
std::stack<PycRef<ASTBlock> > tmp = blocks;
while (!tmp.empty()) {
if (tmp.top()->blktype() == ASTBlock::BLK_WHILE) {
hasWhile = true;
break;
}
tmp.pop();
}
}
if (hasWhile) {
if (!stack.empty())
stack.pop();
/* Close any blocks still open inside the loop body, then
the BLK_WHILE itself. */
while (blocks.size() > 1
&& curblock->blktype() != ASTBlock::BLK_WHILE) {
PycRef<ASTBlock> inner = curblock;
blocks.pop();
curblock = blocks.top();
curblock->append(inner.cast<ASTNode>());
}
if (curblock->blktype() == ASTBlock::BLK_WHILE
&& blocks.size() > 1) {
PycRef<ASTBlock> wh = curblock;
blocks.pop();
curblock = blocks.top();
curblock->append(wh.cast<ASTNode>());
}
break;
}
}
/* Python 3.11: conditional jump backwards. In compiled loops
this implements an in-body guard ("if cond: <rest of body>")
where a failed test jumps back to the loop header to start the

Binary file not shown.

View file

@ -764,6 +764,7 @@ CMakeFiles/pycdc.dir/ASTree.cpp.o: /tmp/pycdc/ASTree.cpp \
/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include/c++/v1/cstdarg \
/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include/c++/v1/map \
/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include/c++/v1/__tree \
/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include/c++/v1/set \
/tmp/pycdc/ASTree.h /tmp/pycdc/ASTNode.h /tmp/pycdc/pyc_module.h \
/tmp/pycdc/pyc_code.h /tmp/pycdc/pyc_sequence.h \
/tmp/pycdc/pyc_object.h /tmp/pycdc/pyc_string.h /tmp/pycdc/data.h \

View file

@ -1400,6 +1400,7 @@ CMakeFiles/pycdc.dir/ASTree.cpp.o
/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include/c++/v1/print
/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include/c++/v1/queue
/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include/c++/v1/ratio
/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include/c++/v1/set
/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include/c++/v1/stack
/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include/c++/v1/stddef.h
/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include/c++/v1/stdexcept

View file

@ -1398,6 +1398,7 @@ CMakeFiles/pycdc.dir/ASTree.cpp.o: ASTree.cpp \
/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include/c++/v1/print \
/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include/c++/v1/queue \
/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include/c++/v1/ratio \
/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include/c++/v1/set \
/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include/c++/v1/stack \
/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include/c++/v1/stddef.h \
/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include/c++/v1/stdexcept \
@ -2379,8 +2380,14 @@ CMakeFiles/pycdc.dir/pycdc.cpp.o: pycdc.cpp \
/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include/c++/v1/iostream:
/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include/c++/v1/filesystem:
pycdc.cpp:
/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include/math.h:
/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include/c++/v1/set:
/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include/c++/v1/__tree:
ASTree.cpp:
@ -2557,8 +2564,6 @@ pyc_sequence.h:
/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include/pthread/qos.h:
/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include/math.h:
/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include/malloc/_ptrcheck.h:
/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include/sys/_types/_suseconds_t.h:
@ -3921,8 +3926,6 @@ ASTNode.cpp:
/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include/_xlocale.h:
/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include/c++/v1/filesystem:
/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include/malloc/_malloc_type.h:
/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include/c++/v1/__utility/exchange.h:

BIN
pycdc

Binary file not shown.