在 2022/3/7 下午8:21, ~eopxd 写道:
+    uint32_t vta = vext_vta(desc);                        \
     uint32_t i;                                           \
     TD s1 =  *((TD *)vs1 + HD(0));                        \
                                                           \
@@ -4562,6 +4565,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
     }                                                     \
     *((TD *)vd + HD(0)) = s1;                             \
     env->vstart = 0;                                      \
+    /* set tail elements to 1s */                         \
+    vext_set_elems_1s_fns[ctzl(esz)](vd, vta, 1, esz,     \
+                                     vlenb);              \
 }
 
 /* vd[0] = sum(vs1[0], vs2[*]) */
@@ -4667,6 +4673,9 @@ void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1,
 {
     uint32_t vm = vext_vm(desc);
     uint32_t vl = env->vl;
+    uint32_t esz = sizeof(uint32_t);
+    uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3;

vlenb  also can get from maxsz field of desc,  that is

uint32_t vlenb = simd_maxsz(desc)

Regards,

Weiwei Li