--reset

--cfg=f32,f16,bf16bf16f32,u8s8s8
--stag=ab,ba --wtag=ab,ba --dtag=ab
--runtime_m=0,1 --runtime_n=0,1 --runtime_k=0,1
--bia_dt=undef,f32
--bia_mask=1,2,3
m10n20k30

--attr-oscale=common:2
--attr-post-ops=sum+add:f32+add:u8:per_dim_01+linear:0.5:1.5:2.0+mul:f32:per_dim_0+add:s8:per_oc+add:f32:per_tensor
m10n20k30

--attr-oscale=per_oc:2
--attr-post-ops=relu+add:f32+add:u8:per_tensor+linear:0.5:1.5:2.0+mul:f32:per_dim_0+add:s8:per_oc+add:f32:per_dim_01
m10n20k30

--attr-oscale=
--attr-post-ops=sum:2+add:f32+add:u8:per_tensor+mul:f32:per_dim_0+add:s8:per_oc+add:f32:per_dim_01+linear:3:-1
m10n1k30

--attr-post-ops=add:f32+add:u8:per_dim_01+mul:f32:per_dim_0+add:s8:per_oc+add:f32:per_tensor+linear:3:-1:2
m10n1k30

# test any
--reset
--cfg=f32,f16,f16f16s8,f16f16u8,bf16bf16bf16,s8s8f32
--runtime_m=0 --runtime_n=0 --runtime_k=0
--bia_dt=undef

--stag=ab,ba,any --wtag=ab,ba,any --dtag=ab,any       m1n20k30

# test x8x8x8
--reset

--cfg=u8s8s32,u8s8f32,s8s8s8
--runtime_m=0,1 --runtime_n=1 --runtime_k=0
--bia_dt=undef,f32,u8
--stag=ab,ba --wtag=ab --dtag=ab
--attr-oscale=common:2*
--attr-zero-points=src:common:1*+wei:common:-1*+dst:common:2*
--attr-post-ops=sum+relu+add:f32+add:u8:per_tensor+linear:0.5:1.5:2.0+mul:f32:per_dim_0+add:s8:per_oc+add:f32:per_dim_01
m100n10k10

--cfg=s8s8s8,s8s8s32,s8s8u8
--runtime_m=0 --runtime_n=0,1 --runtime_k=1
--bia_dt=undef,u8
--stag=ba --wtag=ab,ba --dtag=ab
--attr-oscale=common:2
--attr-zero-points=src:common:1+wei:common:-2+dst:common:3*
--attr-post-ops=sum+relu+add:f32+add:u8:per_dim_01+linear:0.5:1.5:2.0+mul:f32:per_dim_0+add:s8:per_oc+add:f32:per_tensor
m10n10k100

# 3d
--reset

--cfg=f32,f16,f16f16s8,f16f16u8,bf16bf16bf16,bf16bf16f32
--stag=abc,acb --wtag=abc,acb --dtag=abc
--runtime_mb=0,1 --runtime_m=0,1 --runtime_n=0,1 --runtime_k=0,1
--bia_dt=undef,f32
--bia_mask=4,6
--attr-oscale=common:2
--attr-post-ops=sum+add:f32+add:u8:per_dim_01+mul:f32:per_dim_0+add:s8:per_tensor+add:f32:per_dim_01+linear:2:-1
mb3m30n20k1

# test regressions
--reset

--cfg=f32
--stag=ab --wtag=ab --dtag=ab
m96n512k8

--reset

--cfg=bf16bf16bf16
--stag=ab --wtag=ab --dtag=ab
--bia_dt=bf16
m2n3k4

--reset
#runtime oscale test
--attr-oscale=common:2* m512n16k16

# test all features at once: 6D tensors with two-way broadcast, bias, postops, scale
--reset
--cfg=bf16bf16bf16
--stag=abx --wtag=abx --dtag=abx
--bia_dt=f32 --bia_mask=4
--attr-oscale=common:2
--attr-post-ops=sum+add:f32+add:u8:per_dim_01+mul:f32:per_dim_0+add:s8:per_tensor+add:f32:per_dim_01+linear:2:-1
1x5x4x3x30x2:6x1x4x3x2x20:6x5x4x3x30x20

# Test layers of some key GPU DL Frameworks
--reset
--batch=option_set_fwks_key_gpu
