unaligned load

This commit is contained in:
George Hotz 2025-03-24 14:54:11 +08:00
commit b1f8018bf4

View file

@ -134,6 +134,10 @@ dsp_pm_late = PatternMatcher([
# prefetch L1
(UPat(Ops.LOAD, dtype=(dtypes.uchar.vec(4), dtypes.uchar.vec(8)), name="ld"), prefetch_l1),
# unaligned load
((UPat(Ops.LOAD, src=(UPat(Ops.CAST, src=(UPat(Ops.INDEX, src=(UPat(), UPat()+UPat.cvar("c"))),), name="ptr"),), dtype=dtypes.uchar.vec(128))),
lambda c,ptr: UOp(Ops.CUSTOM, dtype=dtypes.uchar.vec(128), src=(ptr.src[0],), arg="vmemu({0})") if c.arg%128 != 0 else None),
# __builtin_HEXAGON_V6_vrmpybus_acc_128B
(UPat(Ops.CUSTOMI, dtype=dtypes.int.vec(32), name="c")+UPat.var("x"), add_to_mul),
@ -163,7 +167,9 @@ class DSPRenderer(ClangRenderer):
device = "DSP"
supports_float4 = True
buffer_suffix = " restrict __attribute__((align_value(128)))"
kernel_prefix = "__attribute__((noinline)) "
kernel_prefix = "typedef long HVX_Vect_UN __attribute__((__vector_size__(128)))__attribute__((aligned(4)));\n"+\
"#define vmemu(A) *((HVX_Vect_UN*)(A))\n"+\
"__attribute__((noinline)) "
pre_matcher = dsp_pm
extra_matcher = dsp_pm_late+ClangRenderer.extra_matcher+pretty_render
type_map = { **ClangRenderer.type_map, dtypes.uint64: "unsigned long long", dtypes.int64: "long long" }