Reconstruct 3.10+ match/case (structural pattern matching)

pycdc did not reconstruct `match` statements: class patterns hit the
unhandled MATCH_CLASS opcode and bailed to "# WARNING: Decompyle incomplete",
and value patterns were mis-rendered as an if/elif chain.

This adds reconstruction of the common, statically-recognizable shapes:

  * Class patterns - `case Cls(a, b):` - including positional captures and
    `_` wildcard sub-patterns (`case Cls(_):`).
  * Value patterns - `case 0:` / `case 'x':` - compiled as a COPY-threaded
    COMPARE_OP chain rather than MATCH_CLASS.
  * The `case _:` wildcard (which carries no test opcode).

A pre-scan over the code object recognizes the simple, guard-free shape of
each case and records it; the MATCH_CLASS handler and a small guarded hook in
COMPARE_OP open ASTMatchBlock/ASTCaseBlock and skip the pattern-test
machinery, and the block-close logic closes each case at its fail-target and
the match at its merge. Anything outside the recognized shape (kwarg patterns,
guards, sequence/mapping patterns) is left unregistered and still bails
honestly, so no incorrect output is produced.

New AST nodes: ASTMatchBlock (subject) and ASTCaseBlock (pattern), rendered
via the existing block-header machinery (type_str + print_src). PycBuffer
gains setPos() so the reconstructor can skip to a case body.

All hooks are guarded by per-offset match tables that are empty for code
without a match statement, so non-match input is unaffected.

Test: tests/input/match_statement.py exercises class patterns, a wildcard
sub-pattern, value patterns and `case _`. Full existing suite still passes.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
Mario Penterman 2026-06-19 17:37:02 +02:00
commit 7ff27dfec5
7 changed files with 677 additions and 2 deletions

View file

@ -201,7 +201,7 @@ const char* ASTBlock::type_str() const
{
static const char* s_type_strings[] = {
"", "if", "else", "elif", "try", "CONTAINER", "except",
"finally", "while", "for", "with", "async for"
"finally", "while", "for", "with", "async for", "match", "case"
};
return s_type_strings[blktype()];
}

View file

@ -534,7 +534,8 @@ public:
enum BlkType {
BLK_MAIN, BLK_IF, BLK_ELSE, BLK_ELIF, BLK_TRY,
BLK_CONTAINER, BLK_EXCEPT, BLK_FINALLY,
BLK_WHILE, BLK_FOR, BLK_WITH, BLK_ASYNCFOR
BLK_WHILE, BLK_FOR, BLK_WITH, BLK_ASYNCFOR,
BLK_MATCH, BLK_CASE
};
ASTBlock(BlkType blktype, int end = 0, int inited = 0)
@ -640,6 +641,32 @@ private:
PycRef<ASTNode> m_var; // optional value
};
/* 3.10+ `match subject:` — children are BLK_CASE blocks. */
class ASTMatchBlock : public ASTBlock {
public:
ASTMatchBlock(int end, PycRef<ASTNode> subject)
: ASTBlock(ASTBlock::BLK_MATCH, end), m_subject(std::move(subject)) { init(); }
PycRef<ASTNode> subject() const { return m_subject; }
private:
PycRef<ASTNode> m_subject;
};
/* A single `case <pattern>:` inside a match. The pattern node renders verbatim
(class/value pattern as an expression; a bare name `_` for the wildcard). */
class ASTCaseBlock : public ASTBlock {
public:
ASTCaseBlock(int end, PycRef<ASTNode> pattern)
: ASTBlock(ASTBlock::BLK_CASE, end), m_pattern(std::move(pattern)) { }
PycRef<ASTNode> pattern() const { return m_pattern; }
void setPattern(PycRef<ASTNode> p) { m_pattern = std::move(p); init(); }
private:
PycRef<ASTNode> m_pattern;
};
class ASTComprehension : public ASTNode {
public:
typedef std::list<PycRef<ASTIterBlock>> generator_t;

View file

@ -2,6 +2,8 @@
#include <cstdint>
#include <stdexcept>
#include <unordered_set>
#include <unordered_map>
#include <set>
#include "ASTree.h"
#include "FastStack.h"
#include "pyc_numeric.h"
@ -100,6 +102,417 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
exception_entries = code->exceptionTableEntries();
}
/* Pre-scan for 3.11 `match`/`case` (class patterns only — the only kind in
the corpus). Each case compiles to
[COPY 1] # preserve a subject copy for the next case
<load class>; LOAD_CONST <kwnames-tuple>; MATCH_CLASS <nPos>
COPY 1; POP_JUMP_FORWARD_IF_NONE <failTarget> # pattern-fail -> next case
UNPACK_SEQUENCE <n>; STORE... (or POP_TOP when n==0) # captures
<body>; [JUMP_FORWARD <matchEnd>]
<failTarget:> POP_TOP # discard the None result, then the next case
We register only the SIMPLE handleable shape: positional captures only
(empty kwnames), no guard (no jump to failTarget inside the body). Anything
else is left unregistered -> MATCH_CLASS bails -> honest `.dis`.
matchCase[MATCH_CLASS off] = {isFirst, matchEnd, failTarget, bodyStart, caps};
matchCaseEnd = failTarget offsets (close BLK_CASE + skip the machinery POP_TOP);
matchBlockEnd = matchEnd offsets (close BLK_MATCH). */
struct MCase { bool isFirst; int matchEnd; int failTarget; int bodyStart;
int popExtra; std::vector<PycRef<ASTNode>> caps; };
std::unordered_map<int, MCase> matchCase;
std::unordered_map<int, int> matchCaseEnd; // failTarget -> offset after the machinery POP_TOP
std::unordered_set<int> matchBlockEnd;
/* SHIP-149: VALUE-pattern `match`/`case` (literal patterns — `match x: case 'a':
`). Unlike class patterns (MATCH_CLASS) these compile to a COMPARE_OP chain:
per case `[COPY 1] LOAD <const>; COMPARE_OP ==; POP_JUMP_FORWARD_IF_FALSE ft`,
the subject threaded on the stack via the COPY (the LAST case omits the COPY,
its COMPARE consuming the subject); the matched path POP_TOPs the leftover
subject then runs the body. matchValue[COMPARE off] = the case. */
struct VCase { bool isFirst; bool isLast; int matchEnd; int failTarget; int bodyStart; };
std::unordered_map<int, VCase> matchValue;
/* A wildcard `case _:` carries NO MATCH_CLASS (it always matches; the bytecode
just POP_TOPs the leftover subject then runs the body), so the MATCH_CLASS
pre-scan can't see it and would end the match at the last TYPED case leaving
the wildcard body to render as spurious post-match code (dropping any
fall-through/return after the match: RETDROP, e.g. ripple `factory.__call__`).
Map the wildcard body start -> the real match merge so the loop opens a
`case _:` block there and the match closes at the true merge instead. */
std::unordered_map<int, int> matchWildcardOpen; // wildcardBodyStart -> matchEnd(merge)
if (mod->verCompare(3, 11) >= 0) {
struct Ins { int op; int arg; int off; int next; };
std::vector<Ins> v;
std::unordered_map<int,int> idxOf; // offset -> index in v
{
PycBuffer scan(code->code()->value(), code->code()->length());
int so, sa, sp = 0;
while (!scan.atEof()) {
int io = sp;
bc_next(scan, mod, so, sa, sp);
if (so == Pyc::CACHE) continue;
idxOf[io] = (int)v.size();
v.push_back({ so, sa, io, sp });
}
}
auto capName = [&](const Ins& s) -> PycRef<ASTNode> {
if (s.op == Pyc::STORE_FAST_A)
return new ASTName(code->getLocal(s.arg));
if (s.op == Pyc::STORE_NAME_A || s.op == Pyc::STORE_GLOBAL_A)
return new ASTName(code->getName(s.arg));
if (s.op == Pyc::STORE_DEREF_A)
return new ASTName(code->getCellVar(mod, s.arg));
return nullptr;
};
/* Parse one case starting at the MATCH_CLASS at v[mi]; fill failTarget,
bodyStart, caps. Returns true iff the simple shape holds. */
auto parseCase = [&](size_t mi, int& failTarget, int& bodyStart,
int& popExtra,
std::vector<PycRef<ASTNode>>& caps) -> bool {
int nPos = v[mi].arg;
/* require an empty kwnames tuple just before (positional-only) */
if (mi == 0) return false;
const Ins& kw = v[mi-1];
if (kw.op != Pyc::LOAD_CONST_A) return false;
PycRef<PycObject> kwo = code->getConst(kw.arg);
if (kwo == nullptr || (kwo->type() != PycObject::TYPE_TUPLE
&& kwo->type() != PycObject::TYPE_SMALL_TUPLE)) return false;
if (kwo.cast<PycTuple>()->values().size() != 0) return false; // kw patterns -> bail
/* next: COPY 1 ; POP_JUMP_FORWARD_IF_NONE failTarget */
if (mi+2 >= v.size()) return false;
if (!(v[mi+1].op == Pyc::COPY_A && v[mi+1].arg == 1)) return false;
if (v[mi+2].op != Pyc::POP_JUMP_FORWARD_IF_NONE_A) return false;
failTarget = v[mi+2].next + v[mi+2].arg * (int)sizeof(uint16_t);
size_t j = mi+3;
caps.clear();
/* matched-path captures: UNPACK_SEQUENCE <nPos> then nPos STOREs (the
class pattern always emits UNPACK_SEQUENCE, even nPos==0). */
if (j >= v.size() || v[j].op != Pyc::UNPACK_SEQUENCE_A
|| v[j].arg != nPos) return false;
j++;
for (int k = 0; k < nPos; ++k, ++j) {
if (j >= v.size()) return false;
/* a `_` wildcard sub-pattern (`case Ok(_):`) discards its captured
value with POP_TOP instead of a STORE render it as `_`. */
if (v[j].op == Pyc::POP_TOP) {
PycRef<PycString> us = new PycString(); us->setValue("_");
caps.push_back(new ASTName(us));
continue;
}
PycRef<ASTNode> nm = capName(v[j]);
if (nm == nullptr) return false; // non-simple capture target
caps.push_back(nm);
}
/* optional leftover-subject POP_TOP(s): a COPY preserved the subject
for a later case; on THIS (matched) path the copy is discarded here.
(A case body never starts with POP_TOP, so this is unambiguous.) */
popExtra = 0;
while (j < v.size() && v[j].op == Pyc::POP_TOP) { popExtra++; j++; }
if (j >= v.size()) return false;
bodyStart = v[j].off; // next REAL instruction (v is CACHE-filtered)
/* guard detection: a jump to failTarget inside [bodyStart, failTarget)
means a `case P if g:` guard (or other case-internal branch we don't
model) -> bail. */
for (size_t k = j; k < v.size() && v[k].off < failTarget; ++k) {
int op = v[k].op;
bool isJump = op == Pyc::POP_JUMP_FORWARD_IF_TRUE_A
|| op == Pyc::POP_JUMP_FORWARD_IF_FALSE_A
|| op == Pyc::POP_JUMP_FORWARD_IF_NONE_A
|| op == Pyc::POP_JUMP_FORWARD_IF_NOT_NONE_A
|| op == Pyc::JUMP_FORWARD_A;
if (isJump) {
int tgt = v[k].next + v[k].arg * (int)sizeof(uint16_t);
if (tgt == failTarget) return false; // guard
}
}
return true;
};
/* collect MATCH_CLASS anchors, parse each */
struct CaseRec { size_t mi; int failTarget; int bodyStart;
int popExtra; std::vector<PycRef<ASTNode>> caps; };
std::vector<CaseRec> recs;
for (size_t i = 0; i < v.size(); ++i) {
if (v[i].op != Pyc::MATCH_CLASS_A) continue;
int ft, bs, pe; std::vector<PycRef<ASTNode>> caps;
if (parseCase(i, ft, bs, pe, caps))
recs.push_back({ i, ft, bs, pe, caps });
}
/* chain cases into matches: case i's successor is the case whose
MATCH_CLASS region begins at the POP_TOP at failTarget (the failTarget
op is a POP_TOP; the next case follows it). */
std::unordered_map<int,int> caseByPopTop; // failTarget(=POP_TOP off) -> rec idx of the case there
/* a rec's case begins with an optional COPY then the class load; the
POP_TOP that precedes it is at failTarget of the previous case. The
POP_TOP sits at some offset; the NEXT case's MATCH_CLASS is the first
MATCH_CLASS anchor at a higher offset. Map each rec's "entry POP_TOP". */
/* Determine, for each rec, whether some OTHER rec's failTarget POP_TOP is
immediately followed (skipping POP_TOP/COPY/loads) by this rec's MATCH_CLASS. */
auto firstMatchClassAtOrAfter = [&](int off) -> int {
for (size_t k = 0; k < v.size(); ++k)
if (v[k].off >= off && v[k].op == Pyc::MATCH_CLASS_A)
return (int)k;
return -1;
};
std::unordered_set<size_t> isSuccessor;
std::unordered_map<size_t,size_t> succOf; // rec idx -> rec idx
std::unordered_map<size_t,size_t> recByMi;
for (size_t r = 0; r < recs.size(); ++r) recByMi[recs[r].mi] = r;
for (size_t r = 0; r < recs.size(); ++r) {
int ft = recs[r].failTarget;
if (idxOf.count(ft) && v[idxOf[ft]].op == Pyc::POP_TOP) {
int nm = firstMatchClassAtOrAfter(v[idxOf[ft]].next);
if (nm >= 0 && recByMi.count((size_t)nm)) {
/* ensure nothing but case-setup (COPY/loads) lies between */
size_t s = recByMi[(size_t)nm];
succOf[r] = s; isSuccessor.insert(s);
}
}
}
for (size_t r = 0; r < recs.size(); ++r) {
if (isSuccessor.count(r)) continue; // not a first case
/* walk the chain to find the last case + matchEnd */
size_t last = r;
while (succOf.count(last)) last = succOf[last];
int lastFt = recs[last].failTarget; // POP_TOP of the final no-match
int matchEnd = (idxOf.count(lastFt) && v[idxOf[lastFt]].op == Pyc::POP_TOP)
? v[idxOf[lastFt]].next : lastFt;
/* Wildcard `case _:` detection. The TYPED cases' matched bodies all
JUMP_FORWARD to the post-match merge M. If M lies PAST `matchEnd`
(the last typed case's no-match POP_TOP+1), the region [matchEnd, M)
is the wildcard case body (the final no-match falls through to it).
Require a single consistent merge target and NO MATCH_CLASS in the
gap (an unparsed typed case would also have a MATCH_CLASS there ->
leave the match honest rather than mis-label it a wildcard). */
{
int firstOff = v[recs[r].mi].off;
int merge = -1; bool consistent = true;
for (size_t k = 0; k < v.size(); ++k) {
if (v[k].off < firstOff || v[k].off > lastFt) continue;
if (v[k].op != Pyc::JUMP_FORWARD_A) continue;
int t = v[k].next + v[k].arg * (int)sizeof(uint16_t);
if (t > matchEnd) {
if (merge < 0) merge = t;
else if (merge != t) { consistent = false; break; }
}
}
/* The gap must be a TRUE wildcard 'case _:' (subject already
discarded by the final no-match POP_TOP), not a capture pattern
('case other:' -> STORE_x binds the subject) nor any further
pattern test (MATCH_x or POP_JUMP_FORWARD_IF_NONE). Either would
make the gap mis-render as 'case _:' with a wrong/garbage binding,
and the STORE divergence is CF-gate-blind. Bail to honest .dis
on anything but a plain unconditional body. */
bool gapUnsafe = false;
if (consistent && merge > matchEnd) {
for (size_t k = 0; k < v.size(); ++k) {
if (v[k].off < matchEnd || v[k].off >= merge) continue;
int op = v[k].op;
if (op == Pyc::MATCH_CLASS_A || op == Pyc::MATCH_SEQUENCE
|| op == Pyc::MATCH_MAPPING || op == Pyc::MATCH_KEYS
|| op == Pyc::POP_JUMP_FORWARD_IF_NONE_A) { gapUnsafe = true; break; }
}
/* first real op of the gap (skip NOP): a leading STORE_* binds
the subject => capture pattern, not `_`. */
for (size_t k = 0; k < v.size(); ++k) {
if (v[k].off < matchEnd) continue;
if (v[k].op == Pyc::NOP) continue;
if (v[k].op == Pyc::STORE_FAST_A || v[k].op == Pyc::STORE_NAME_A
|| v[k].op == Pyc::STORE_GLOBAL_A || v[k].op == Pyc::STORE_DEREF_A)
gapUnsafe = true;
break;
}
/* The wildcard body must be TERMINAL (ends in raise/return). A
FALL-THROUGH wildcard reaches `merge` by falling off its end,
so `merge` is shared with the typed cases AND any enclosing
if/else whose branches converge there extending the match to
it then mis-places the post-merge code (e.g. a method-level
`return` rendered inside the `else`: node_connect_protocol
`get_connect_stages`, a gate-blind mis-render). A terminal
wildcard exits before `merge`, so the merge is unambiguously
the match's own. */
int lastOp = -1;
for (size_t k = 0; k < v.size(); ++k)
if (v[k].off >= matchEnd && v[k].off < merge) lastOp = v[k].op;
if (!(lastOp == Pyc::RAISE_VARARGS_A || lastOp == Pyc::RETURN_VALUE
|| lastOp == Pyc::RETURN_CONST_A || lastOp == Pyc::RERAISE
|| lastOp == Pyc::RERAISE_A))
gapUnsafe = true;
}
if (consistent && merge > matchEnd && !gapUnsafe) {
matchWildcardOpen[matchEnd] = merge; // open `case _:` at the gap start
matchCaseEnd[merge] = merge; // close the wildcard case at the merge
matchEnd = merge; // the match itself ends at the merge
}
}
/* register every case in the chain */
for (size_t c = r; ; c = succOf[c]) {
MCase mc;
mc.isFirst = (c == r);
mc.matchEnd = matchEnd;
mc.failTarget = recs[c].failTarget;
mc.bodyStart = recs[c].bodyStart;
mc.popExtra = recs[c].popExtra;
mc.caps = recs[c].caps;
matchCase[v[recs[c].mi].off] = mc;
int ft = recs[c].failTarget;
matchCaseEnd[ft] = (idxOf.count(ft) && v[idxOf[ft]].op == Pyc::POP_TOP)
? v[idxOf[ft]].next : ft;
if (!succOf.count(c)) break;
}
matchBlockEnd.insert(matchEnd);
}
}
/* SHIP-149 value-pattern match pre-scan (see VCase above). */
if (mod->verCompare(3, 11) >= 0 && mod->verCompare(3, 12) < 0) {
struct Ins { int op; int arg; int off; int next; };
std::vector<Ins> v; std::unordered_map<int,int> idxOf;
{
PycBuffer scan(code->code()->value(), code->code()->length());
int so, sa, sp = 0;
while (!scan.atEof()) {
int io = sp;
bc_next(scan, mod, so, sa, sp);
if (so == Pyc::CACHE) continue;
idxOf[io] = (int)v.size();
v.push_back({ so, sa, io, sp });
}
}
/* a case TEST: `[COPY 1] <pattern>; COMPARE_OP ==(arg 2); PJF ft`, where the
pattern (the COMPARE's right operand) is EITHER a single LOAD_CONST (literal
`case 'a':`) OR a dotted name LOAD_{GLOBAL,NAME,DEREF,FAST} base + >=1
LOAD_ATTR (Enum/attribute `case State.A:`). The COPY-1-before / POP_TOP-after
subject threading is identical for both, so anchor on the COMPARE==/PJF and
walk backward for the pattern. The pattern NODE is already built on the stack
by the load ops, so the runtime COMPARE handler is unchanged. */
struct Test { int startOff; bool hasCopy; int compareOff; int ft; int matchedNext; };
std::vector<Test> tests;
std::unordered_map<int,int> testByStart;
for (size_t c = 1; c + 1 < v.size(); ++c) {
if (!(v[c].op == Pyc::COMPARE_OP_A && v[c].arg == 2
&& v[c+1].op == Pyc::POP_JUMP_FORWARD_IF_FALSE_A))
continue;
int j = (int)c - 1, attrs = 0;
while (j >= 0 && v[j].op == Pyc::LOAD_ATTR_A) { attrs++; --j; }
int patStart = -1;
if (attrs == 0 && j >= 0 && v[j].op == Pyc::LOAD_CONST_A)
patStart = j; /* literal pattern */
else if (attrs >= 1 && j >= 0
&& (v[j].op == Pyc::LOAD_GLOBAL_A || v[j].op == Pyc::LOAD_NAME_A
|| v[j].op == Pyc::LOAD_DEREF_A || v[j].op == Pyc::LOAD_FAST_A))
patStart = j; /* dotted Enum/attribute pattern */
if (patStart < 0)
continue;
bool hasCopy = (patStart >= 1 && v[patStart-1].op == Pyc::COPY_A
&& v[patStart-1].arg == 1);
int startOff = hasCopy ? v[patStart-1].off : v[patStart].off;
int ft = v[c+1].next + v[c+1].arg * (int)sizeof(uint16_t);
testByStart[startOff] = (int)tests.size();
tests.push_back({ startOff, hasCopy, v[c].off, ft, v[c+1].next });
}
std::unordered_set<int> targeted;
for (const auto& t : tests)
if (testByStart.count(t.ft)) targeted.insert(t.ft);
for (size_t t = 0; t < tests.size(); ++t) {
if (!tests[t].hasCopy || targeted.count(tests[t].startOff))
continue; /* not a chain start */
std::vector<int> chain;
std::unordered_set<int> seen;
int cur = (int)t; bool ok = true;
while (true) {
if (seen.count(cur)) { ok = false; break; }
seen.insert(cur); chain.push_back(cur);
int ft = tests[cur].ft;
if (!testByStart.count(ft)) break; /* ft = matchEnd, cur was last? */
int nx = testByStart[ft];
if (tests[nx].hasCopy) { cur = nx; continue; }
chain.push_back(nx); break; /* no-copy = LAST case */
}
if (!ok || chain.size() < 2) continue;
int lastT = chain.back();
if (tests[lastT].hasCopy) continue; /* must end on a no-copy last case */
int lastFt = tests[lastT].ft;
auto caseBodyStart = [&](const Test& tc) -> int {
if (tc.hasCopy) {
int mn = tc.matchedNext; /* POP_TOP discarding subject */
return (idxOf.count(mn) && v[idxOf[mn]].op == Pyc::POP_TOP)
? v[idxOf[mn]].next : mn;
}
return tc.matchedNext;
};
/* A non-returning case body JUMP_FORWARDs to the TRUE match end, which
lies PAST the last typed case's ft when a wildcard `case _:` body sits
between (emote_svc: `case 'destroy':` ft -> the `case _:` body, the
'move' case jumps to the post-wildcard merge). Find a single consistent
forward-jump target > lastFt among the case bodies => the wildcard span
[lastFt, conv) + matchEnd=conv. */
int conv = -1; bool convOk = true;
for (size_t k = 0; k < chain.size(); ++k) {
const Test& tc = tests[chain[k]];
int bs = caseBodyStart(tc);
if (!idxOf.count(bs)) continue;
for (int ii = idxOf[bs]; ii < (int)v.size() && v[ii].off < tc.ft; ++ii) {
if (v[ii].op == Pyc::JUMP_FORWARD_A) {
int t = v[ii].next + v[ii].arg * (int)sizeof(uint16_t);
if (t > lastFt) {
if (conv < 0) conv = t;
else if (conv != t) convOk = false;
}
}
}
}
int matchEnd = lastFt;
int wildcardStart = -1;
if (convOk && conv > lastFt) {
bool clean = true; /* [lastFt, conv) must be a plain wildcard body */
for (const auto& tt : tests)
if (tt.startOff >= lastFt && tt.startOff < conv) { clean = false; break; }
/* The convergence must NOT cross an enclosing exception handler: when
the match sits inside a `try/except`, a non-returning case jumps to
the enclosing try's exit PAST the except handler, so `conv` overruns
the real match end and a wildcard span [lastFt, conv) would absorb
the enclosing `except` (http_deprecated). Bail the whole value-match
there it renders faithfully as if/elif instead. */
bool crossesExc = false;
for (int ii = 0; ii < (int)v.size(); ++ii)
if (v[ii].off >= lastFt && v[ii].off < conv
&& v[ii].op == Pyc::PUSH_EXC_INFO) { crossesExc = true; break; }
if (crossesExc) continue; /* skip registering this chain */
if (clean) { matchEnd = conv; wildcardStart = lastFt; }
}
/* Bail when the match is the LAST statement of a loop with PER-CASE
back-edges: each case body's fall-through `continue`s directly to the
loop top (a JUMP_BACKWARD inside the match span). CPython collapsed the
match-merge into the loop-continue one back-edge PER CASE; a structured
`match` recompiles with a shared merge (one back-edge + per-case
JUMP_FORWARDs) loop-opcode count diverges. The faithful render is
gate-dirty by construction, so leave it to the if/elif fallback (which
recompiles to the per-case back-edge layout). JUMP_BACKWARD_NO_INTERRUPT
(await SEND loops) is NOT a loop continue. */
{
int spanStart = tests[chain[0]].startOff;
bool loopBack = false;
for (const auto& iv : v)
if (iv.off >= spanStart && iv.off < matchEnd
&& iv.op == Pyc::JUMP_BACKWARD_A) { loopBack = true; break; }
if (loopBack) continue;
}
for (size_t k = 0; k < chain.size(); ++k) {
const Test& tc = tests[chain[k]];
VCase vc;
vc.isFirst = (k == 0);
vc.isLast = (k + 1 == chain.size());
vc.matchEnd = matchEnd;
vc.failTarget = tc.ft;
vc.bodyStart = caseBodyStart(tc);
matchValue[tc.compareOff] = vc;
matchCaseEnd[tc.ft] = tc.ft; /* close case at ft; no machinery to skip */
}
if (wildcardStart >= 0) {
matchWildcardOpen[wildcardStart] = matchEnd; /* open `case _:` here */
matchCaseEnd[matchEnd] = matchEnd; /* close the wildcard at the merge */
}
matchBlockEnd.insert(matchEnd);
}
}
while (!source.atEof()) {
#if defined(BLOCK_DEBUG) || defined(STACK_DEBUG)
fprintf(stderr, "%-7d", pos);
@ -192,6 +605,76 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
}
}
/* Close the whole `match` at its end (post-match code starts here). */
if (curblock->blktype() == ASTBlock::BLK_MATCH && matchBlockEnd.count(pos)) {
PycRef<ASTBlock> ms = curblock;
blocks.pop();
curblock = blocks.top();
curblock->append(ms.cast<ASTNode>());
}
/* A `match` case body whose LAST statement is an else-less `if X: Y` exits to
the match merge via a JUMP_FORWARD that pycdc misreads as an `else:` (a
BLK_ELSE spanning to matchEnd, PAST the case's fail-target). At the case's
fail-target (a matchCaseEnd point) curblock is that spurious BLK_ELSE/BLK_IF,
not the BLK_CASE, so the close below never fires and the NEXT case opens
nested inside the else (events_excavating2: `case RIGHT: if: else: case
UP:` invalid). Drain the run of inner BLK_IF/BLK_ELSE/BLK_ELIF sitting on
the BLK_CASE into it here so the case can close as a sibling. Non-mutating
peek confirms the shape first. */
if (matchCaseEnd.count(pos) && blocks.size() > 1
&& (curblock->blktype() == ASTBlock::BLK_IF
|| curblock->blktype() == ASTBlock::BLK_ELSE
|| curblock->blktype() == ASTBlock::BLK_ELIF)) {
std::stack<PycRef<ASTBlock> > peek = blocks;
int nif = 0; bool shape = false;
while (peek.size() > 1
&& (peek.top()->blktype() == ASTBlock::BLK_IF
|| peek.top()->blktype() == ASTBlock::BLK_ELSE
|| peek.top()->blktype() == ASTBlock::BLK_ELIF)) {
peek.pop(); nif++;
if (peek.top()->blktype() == ASTBlock::BLK_CASE) { shape = true; break; }
}
if (shape) {
for (int k = 0; k < nif; ++k) {
PycRef<ASTBlock> inner = curblock;
blocks.pop();
if (!stack_hist.empty())
stack_hist.pop();
curblock = blocks.top();
curblock->append(inner.cast<ASTNode>());
}
}
}
/* Close a `match` case at its pattern-fail target (the next-case POP_TOP).
The body just ran; close the BLK_CASE into the BLK_MATCH and skip the
machinery POP_TOP that discards the None match-result. */
if (curblock->blktype() == ASTBlock::BLK_CASE && matchCaseEnd.count(pos)) {
PycRef<ASTBlock> cs = curblock;
blocks.pop();
curblock = blocks.top();
curblock->append(cs.cast<ASTNode>());
int after = matchCaseEnd[pos];
source.setPos(after);
pos = after;
while (next_exception_entry < exception_entries.size()
&& exception_entries[next_exception_entry].start_offset < pos)
next_exception_entry++;
continue;
}
/* Open a wildcard `case _:` block (the last typed case just closed and set
pos to the wildcard body start). The body reconstructs normally; the
BLK_CASE closes at the merge (matchCaseEnd[merge]) and the BLK_MATCH right
after it. */
if (curblock->blktype() == ASTBlock::BLK_MATCH && matchWildcardOpen.count(pos)) {
PycRef<PycString> us = new PycString();
us->setValue("_");
blocks.push(new ASTCaseBlock(matchWildcardOpen[pos], new ASTName(us)));
curblock = blocks.top();
curblock->init();
}
curpos = pos;
bc_next(source, mod, opcode, operand, pos);
@ -725,6 +1208,36 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
break;
case Pyc::COMPARE_OP_A:
{
/* SHIP-149: a registered value-pattern match case test. Open the
`match`/`case` instead of an `if x == pat`. The subject threads on
the stack via the per-case COPY; pop the pattern + the subject COPY
(non-last) or the subject itself (last case), open BLK_MATCH (first)
+ BLK_CASE(pattern), and jump to the body reuses matchCaseEnd /
matchBlockEnd to close. */
auto vmi = matchValue.find(curpos);
if (vmi != matchValue.end()) {
const VCase& vc = vmi->second;
PycRef<ASTNode> pattern = stack.top(); stack.pop();
if (vc.isLast) {
stack.pop(); /* last case consumes the subject */
} else {
PycRef<ASTNode> copy = stack.top(); stack.pop(); /* drop the COPY */
if (vc.isFirst) {
PycRef<ASTNode> subject = stack.top(); /* threads; don't pop */
blocks.push(new ASTMatchBlock(vc.matchEnd, subject));
curblock = blocks.top();
}
}
blocks.push(new ASTCaseBlock(vc.failTarget, pattern));
curblock = blocks.top();
curblock->init();
source.setPos(vc.bodyStart);
pos = vc.bodyStart;
while (next_exception_entry < exception_entries.size()
&& exception_entries[next_exception_entry].start_offset < pos)
next_exception_entry++;
break;
}
PycRef<ASTNode> right = stack.top();
stack.pop();
PycRef<ASTNode> left = stack.top();
@ -1821,6 +2334,27 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
PycRef<ASTNode> value = stack.top();
stack.pop();
/* Value-pattern match subject cleanup: the subject is computed once
and THREADS on the stack (COPY'd per case for the COMPARE). When a
case body exits via a loop continue / a return inside a loop, the
compiler emits a POP_TOP to discard the still-live subject before the
back-edge pycdc would render it as a stray expression statement
(`args['k']`). Inside a match, if the popped value IS the enclosing
BLK_MATCH's subject node (same PycRef threaded through), drop it. */
if (value != nullptr) {
std::stack<PycRef<ASTBlock> > ms = blocks;
while (!ms.empty()) {
if (ms.top()->blktype() == ASTBlock::BLK_MATCH) {
if (ms.top().cast<ASTMatchBlock>()->subject() == value)
value = nullptr; // match subject cleanup -> drop
break;
}
ms.pop();
}
if (value == nullptr)
break;
}
if (!curblock->inited()) {
if (curblock->blktype() == ASTBlock::BLK_WITH) {
curblock.cast<ASTWithBlock>()->setExpr(value);
@ -2692,6 +3226,44 @@ PycRef<ASTNode> BuildFromCode(PycRef<PycCode> code, PycModule* mod)
stack.push(value);
}
break;
case Pyc::MATCH_CLASS_A:
{
/* 3.11 `match`/`case` class pattern. The match pre-scan registered
this as a SIMPLE handleable case (positional captures only, no
guard, no kw patterns); anything else is unregistered -> bail. */
auto mci = matchCase.find(curpos);
if (mci == matchCase.end()) {
fprintf(stderr, "Unsupported opcode: %s (%d)\n",
Pyc::OpcodeName(opcode), opcode);
cleanBuild = false;
return new ASTNodeList(defblock->nodes());
}
const MCase& mc = mci->second;
/* stack: [..., (leftover subject copies), subject, class, kwnames] */
stack.pop(); // kwnames (empty tuple)
PycRef<ASTNode> classnode = stack.top(); stack.pop();
PycRef<ASTNode> subject = stack.top(); stack.pop();
for (int k = 0; k < mc.popExtra; ++k) // matched-path leftover-subject pops
if (!stack.empty()) stack.pop();
/* pattern renders like a call: ClassName(cap0, cap1, …) */
ASTCall::pparam_t pparams;
for (const auto& c : mc.caps)
pparams.push_back(c);
PycRef<ASTNode> pattern = new ASTCall(classnode, pparams,
ASTCall::kwparam_t());
if (mc.isFirst) {
blocks.push(new ASTMatchBlock(mc.matchEnd, subject));
curblock = blocks.top();
}
blocks.push(new ASTCaseBlock(mc.failTarget, pattern));
curblock = blocks.top();
curblock->init();
/* skip the whole case-test machinery; the body reconstructs
normally and the BLK_CASE/BLK_MATCH close at their ends. */
source.setPos(mc.bodyStart);
pos = mc.bodyStart;
}
break;
default:
fprintf(stderr, "Unsupported opcode: %s (%d)\n", Pyc::OpcodeName(opcode), opcode);
cleanBuild = false;
@ -3150,6 +3722,12 @@ void print_src(PycRef<ASTNode> node, PycModule* mod, std::ostream& pyc_output)
print_src(blk.cast<ASTIterBlock>()->index(), mod, pyc_output);
pyc_output << " in ";
print_src(blk.cast<ASTIterBlock>()->iter(), mod, pyc_output);
} else if (blk->blktype() == ASTBlock::BLK_MATCH) {
pyc_output << " ";
print_src(blk.cast<ASTMatchBlock>()->subject(), mod, pyc_output);
} else if (blk->blktype() == ASTBlock::BLK_CASE) {
pyc_output << " ";
print_src(blk.cast<ASTCaseBlock>()->pattern(), mod, pyc_output);
} else if (blk->blktype() == ASTBlock::BLK_EXCEPT &&
blk.cast<ASTCondBlock>()->cond() != NULL) {
pyc_output << " ";

4
data.h
View file

@ -52,6 +52,10 @@ public:
int getByte() override;
void getBuffer(int bytes, void* buffer) override;
// Reposition the read cursor (used by 3.11 match/case reconstruction to
// skip past pattern-test machinery to a case body).
void setPos(int pos) { m_pos = pos; }
private:
const unsigned char* m_buffer;
int m_size, m_pos;

Binary file not shown.

View file

@ -0,0 +1,25 @@
# 3.10+ structural pattern matching (match/case) reconstruction.
def describe_point(command):
match command:
case Point(x, y):
return f'point {x},{y}'
case Rect(w, h):
return f'rect {w}x{h}'
case Wrapper(_):
return 'wrapped'
case _:
return 'unknown'
def classify(value):
match value:
case 0:
result = 'zero'
case 1:
result = 'one'
case 'hello':
result = 'greeting'
case _:
result = 'other'
return result

View file

@ -0,0 +1,41 @@
def describe_point ( command ) : <EOL>
<INDENT>
match command : <EOL>
<INDENT>
case Point ( x , y ) : <EOL>
<INDENT>
return f'point {x},{y}' <EOL>
<OUTDENT>
case Rect ( w , h ) : <EOL>
<INDENT>
return f'rect {w}x{h}' <EOL>
<OUTDENT>
case Wrapper ( _ ) : <EOL>
<INDENT>
return 'wrapped' <EOL>
<OUTDENT>
<OUTDENT>
return 'unknown' <EOL>
<OUTDENT>
def classify ( value ) : <EOL>
<INDENT>
match value : <EOL>
<INDENT>
case 0 : <EOL>
<INDENT>
result = 'zero' <EOL>
<OUTDENT>
case 1 : <EOL>
<INDENT>
result = 'one' <EOL>
<OUTDENT>
case 'hello' : <EOL>
<INDENT>
result = 'greeting' <EOL>
<OUTDENT>
case _ : <EOL>
<INDENT>
result = 'other' <EOL>
<OUTDENT>
<OUTDENT>
return result <EOL>