diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index f6936b98bf3e4..1d7c41452f7d5 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -1369,8 +1369,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); - // TODO: Handle other cost kinds. - if (CostKind != TTI::TCK_RecipThroughput) + if (getTLI()->getValueType(DL, ValTy, true) == MVT::Other) return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, Op1Info, Op2Info, I); diff --git a/llvm/test/Analysis/CostModel/AArch64/arith-overflow.ll b/llvm/test/Analysis/CostModel/AArch64/arith-overflow.ll index 8b52b123d6d25..3fbd12ac8813c 100644 --- a/llvm/test/Analysis/CostModel/AArch64/arith-overflow.ll +++ b/llvm/test/Analysis/CostModel/AArch64/arith-overflow.ll @@ -27,20 +27,20 @@ define i32 @sadd(i32 %arg) { ; CHECK-LABEL: 'sadd' ; CHECK-NEXT: Cost Model: Found costs of 1 for: %I64 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 undef, i64 undef) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.sadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.sadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:13 CodeSize:4 Lat:4 SizeLat:4 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.sadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:6 Lat:6 SizeLat:6 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.sadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:13 CodeSize:10 Lat:10 SizeLat:10 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.sadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; CHECK-NEXT: Cost Model: Found costs of 1 for: %I32 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 undef, i32 undef) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.sadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:13 CodeSize:4 Lat:4 SizeLat:4 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.sadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:6 Lat:6 SizeLat:6 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:13 CodeSize:10 Lat:10 SizeLat:10 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.sadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; CHECK-NEXT: Cost Model: Found costs of 3 for: %I16 = call { i16, i1 } @llvm.sadd.with.overflow.i16(i16 undef, i16 undef) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.sadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:6 Lat:6 SizeLat:6 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:10 Lat:10 SizeLat:10 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; CHECK-NEXT: Cost Model: Found costs of 3 for: %I8 = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 undef, i8 undef) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.sadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:6 Lat:6 SizeLat:6 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:10 Lat:10 SizeLat:10 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef ; %I64 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 undef, i64 undef) @@ -90,20 +90,20 @@ define i32 @uadd(i32 %arg) { ; CHECK-LABEL: 'uadd' ; CHECK-NEXT: Cost Model: Found costs of 1 for: %I64 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 undef, i64 undef) ; CHECK-NEXT: Cost Model: Found costs of 2 for: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.uadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.uadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:2 Lat:2 SizeLat:2 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.uadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.uadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:5 Lat:5 SizeLat:5 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.uadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; CHECK-NEXT: Cost Model: Found costs of 1 for: %I32 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef) ; CHECK-NEXT: Cost Model: Found costs of 2 for: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.uadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:2 Lat:2 SizeLat:2 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.uadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:5 Lat:5 SizeLat:5 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.uadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; CHECK-NEXT: Cost Model: Found costs of 3 for: %I16 = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 undef, i16 undef) ; CHECK-NEXT: Cost Model: Found costs of 2 for: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.uadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:2 Lat:2 SizeLat:2 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:5 Lat:5 SizeLat:5 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; CHECK-NEXT: Cost Model: Found costs of 3 for: %I8 = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 undef, i8 undef) ; CHECK-NEXT: Cost Model: Found costs of 2 for: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.uadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:2 Lat:2 SizeLat:2 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:5 Lat:5 SizeLat:5 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef ; %I64 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 undef, i64 undef) @@ -153,20 +153,20 @@ define i32 @ssub(i32 %arg) { ; CHECK-LABEL: 'ssub' ; CHECK-NEXT: Cost Model: Found costs of 1 for: %I64 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 undef, i64 undef) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.ssub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.ssub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:13 CodeSize:4 Lat:4 SizeLat:4 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.ssub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:6 Lat:6 SizeLat:6 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.ssub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:13 CodeSize:10 Lat:10 SizeLat:10 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.ssub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; CHECK-NEXT: Cost Model: Found costs of 1 for: %I32 = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 undef, i32 undef) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.ssub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:13 CodeSize:4 Lat:4 SizeLat:4 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.ssub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:6 Lat:6 SizeLat:6 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:13 CodeSize:10 Lat:10 SizeLat:10 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.ssub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; CHECK-NEXT: Cost Model: Found costs of 3 for: %I16 = call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 undef, i16 undef) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.ssub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:6 Lat:6 SizeLat:6 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:10 Lat:10 SizeLat:10 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; CHECK-NEXT: Cost Model: Found costs of 3 for: %I8 = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 undef, i8 undef) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.ssub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:6 Lat:6 SizeLat:6 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:10 Lat:10 SizeLat:10 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef ; %I64 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 undef, i64 undef) @@ -216,20 +216,20 @@ define i32 @usub(i32 %arg) { ; CHECK-LABEL: 'usub' ; CHECK-NEXT: Cost Model: Found costs of 1 for: %I64 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 undef, i64 undef) ; CHECK-NEXT: Cost Model: Found costs of 2 for: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.usub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.usub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:2 Lat:2 SizeLat:2 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.usub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.usub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:5 Lat:5 SizeLat:5 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.usub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; CHECK-NEXT: Cost Model: Found costs of 1 for: %I32 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 undef, i32 undef) ; CHECK-NEXT: Cost Model: Found costs of 2 for: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.usub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:2 Lat:2 SizeLat:2 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.usub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:5 Lat:5 SizeLat:5 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.usub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; CHECK-NEXT: Cost Model: Found costs of 3 for: %I16 = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 undef, i16 undef) ; CHECK-NEXT: Cost Model: Found costs of 2 for: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.usub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:2 Lat:2 SizeLat:2 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:5 Lat:5 SizeLat:5 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; CHECK-NEXT: Cost Model: Found costs of 3 for: %I8 = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 undef, i8 undef) ; CHECK-NEXT: Cost Model: Found costs of 2 for: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.usub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:2 Lat:2 SizeLat:2 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:5 Lat:5 SizeLat:5 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef ; %I64 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 undef, i64 undef) @@ -279,20 +279,20 @@ define i32 @smul(i32 %arg) { ; CHECK-LABEL: 'smul' ; CHECK-NEXT: Cost Model: Found costs of 3 for: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef) ; CHECK-NEXT: Cost Model: Found costs of RThru:34 CodeSize:8 Lat:8 SizeLat:8 for: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:68 CodeSize:8 Lat:8 SizeLat:8 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:136 CodeSize:8 Lat:8 SizeLat:8 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:68 CodeSize:9 Lat:9 SizeLat:9 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:136 CodeSize:11 Lat:11 SizeLat:11 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; CHECK-NEXT: Cost Model: Found costs of 2 for: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef) ; CHECK-NEXT: Cost Model: Found costs of RThru:38 CodeSize:8 Lat:8 SizeLat:8 for: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:76 CodeSize:8 Lat:8 SizeLat:8 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:152 CodeSize:8 Lat:8 SizeLat:8 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:76 CodeSize:9 Lat:9 SizeLat:9 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:152 CodeSize:11 Lat:11 SizeLat:11 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; CHECK-NEXT: Cost Model: Found costs of 5 for: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef) ; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:8 Lat:8 SizeLat:8 for: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:24 CodeSize:8 Lat:8 SizeLat:8 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:48 CodeSize:8 Lat:8 SizeLat:8 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:24 CodeSize:9 Lat:9 SizeLat:9 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:48 CodeSize:11 Lat:11 SizeLat:11 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; CHECK-NEXT: Cost Model: Found costs of 5 for: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef) ; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:8 Lat:8 SizeLat:8 for: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:24 CodeSize:8 Lat:8 SizeLat:8 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:48 CodeSize:8 Lat:8 SizeLat:8 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:24 CodeSize:9 Lat:9 SizeLat:9 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:48 CodeSize:11 Lat:11 SizeLat:11 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef ; %I64 = call {i64, i1} @llvm.smul.with.overflow.i64(i64 undef, i64 undef) @@ -342,20 +342,20 @@ define i32 @umul(i32 %arg) { ; CHECK-LABEL: 'umul' ; CHECK-NEXT: Cost Model: Found costs of 3 for: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef) ; CHECK-NEXT: Cost Model: Found costs of RThru:33 CodeSize:7 Lat:7 SizeLat:7 for: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:66 CodeSize:7 Lat:7 SizeLat:7 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:132 CodeSize:7 Lat:7 SizeLat:7 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:66 CodeSize:8 Lat:8 SizeLat:8 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:132 CodeSize:10 Lat:10 SizeLat:10 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; CHECK-NEXT: Cost Model: Found costs of 2 for: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef) ; CHECK-NEXT: Cost Model: Found costs of RThru:37 CodeSize:7 Lat:7 SizeLat:7 for: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:74 CodeSize:7 Lat:7 SizeLat:7 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:148 CodeSize:7 Lat:7 SizeLat:7 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:74 CodeSize:8 Lat:8 SizeLat:8 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:148 CodeSize:10 Lat:10 SizeLat:10 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef) ; CHECK-NEXT: Cost Model: Found costs of RThru:11 CodeSize:7 Lat:7 SizeLat:7 for: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:22 CodeSize:7 Lat:7 SizeLat:7 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:44 CodeSize:7 Lat:7 SizeLat:7 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:22 CodeSize:8 Lat:8 SizeLat:8 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:44 CodeSize:10 Lat:10 SizeLat:10 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef) ; CHECK-NEXT: Cost Model: Found costs of RThru:11 CodeSize:7 Lat:7 SizeLat:7 for: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:22 CodeSize:7 Lat:7 SizeLat:7 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; CHECK-NEXT: Cost Model: Found costs of RThru:44 CodeSize:7 Lat:7 SizeLat:7 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:22 CodeSize:8 Lat:8 SizeLat:8 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; CHECK-NEXT: Cost Model: Found costs of RThru:44 CodeSize:10 Lat:10 SizeLat:10 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef ; %I64 = call {i64, i1} @llvm.umul.with.overflow.i64(i64 undef, i64 undef) diff --git a/llvm/test/Analysis/CostModel/AArch64/cmp.ll b/llvm/test/Analysis/CostModel/AArch64/cmp.ll index b55a759a78cc3..10c995786d57a 100644 --- a/llvm/test/Analysis/CostModel/AArch64/cmp.ll +++ b/llvm/test/Analysis/CostModel/AArch64/cmp.ll @@ -9,7 +9,7 @@ define void @cmps() { ; CHECK-NEXT: Cost Model: Found costs of 1 for: %c16 = icmp ult i16 undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %c32 = icmp sge i32 undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %c64 = icmp ne i64 undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %c128 = icmp ult i128 undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %c128 = icmp ult i128 undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %cv16i8 = icmp slt <16 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %cv8i16 = icmp ult <8 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %cv4i32 = icmp sge <4 x i32> undef, undef @@ -17,10 +17,10 @@ define void @cmps() { ; CHECK-NEXT: Cost Model: Found costs of 1 for: %cf32 = fcmp ogt float undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %cf64 = fcmp ogt double undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %cbf64 = fcmp ogt bfloat undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %cfv816 = fcmp olt <8 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %cfv816 = fcmp olt <8 x half> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %cfv432 = fcmp oge <4 x float> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %cfv264 = fcmp oge <2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %cbfv816 = fcmp olt <8 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %cbfv816 = fcmp olt <8 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %c8 = icmp slt i8 undef, undef @@ -62,7 +62,7 @@ define void @andcmp() { ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: %c64leneg = icmp sle i64 %a64, -1 ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: %c64gtneg = icmp sgt i64 %a64, -1 ; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a128 = and i128 undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %c128 = icmp eq i128 %a128, 0 +; CHECK-NEXT: Cost Model: Found costs of 2 for: %c128 = icmp eq i128 %a128, 0 ; CHECK-NEXT: Cost Model: Found costs of 1 for: %av16i8 = and <16 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %cv16i8 = icmp ne <16 x i8> %av16i8, zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of 1 for: %av8i16 = and <8 x i16> undef, undef diff --git a/llvm/test/Analysis/CostModel/AArch64/fcmp.ll b/llvm/test/Analysis/CostModel/AArch64/fcmp.ll index fb9888d98af6b..d2a1340f60460 100644 --- a/llvm/test/Analysis/CostModel/AArch64/fcmp.ll +++ b/llvm/test/Analysis/CostModel/AArch64/fcmp.ll @@ -7,12 +7,12 @@ define void @fcmp_oeq(i32 %arg) { ; CHECK-NEXT: Cost Model: Found costs of 1 for: %f32 = fcmp oeq float undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp oeq <2 x float> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp oeq <4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v8f32 = fcmp oeq <8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp oeq <8 x float> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %f64 = fcmp oeq double undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp oeq <2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4f64 = fcmp oeq <4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp oeq <4 x double> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %f128 = fcmp oeq fp128 undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v2f128 = fcmp oeq <2 x fp128> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v2f128 = fcmp oeq <2 x fp128> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %f32 = fcmp oeq float undef, undef @@ -32,8 +32,8 @@ define void @fcmp_oeq_half(i32 %arg) { ; CHECK-BASE-NEXT: Cost Model: Found costs of 1 for: %f16 = fcmp oeq half undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of 3 for: %v2f16 = fcmp oeq <2 x half> undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of 4 for: %v4f16 = fcmp oeq <4 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8f16 = fcmp oeq <8 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16f16 = fcmp oeq <16 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8f16 = fcmp oeq <8 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16f16 = fcmp oeq <16 x half> undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-FP16-LABEL: 'fcmp_oeq_half' @@ -41,7 +41,7 @@ define void @fcmp_oeq_half(i32 %arg) { ; CHECK-FP16-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp oeq <2 x half> undef, undef ; CHECK-FP16-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp oeq <4 x half> undef, undef ; CHECK-FP16-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp oeq <8 x half> undef, undef -; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16f16 = fcmp oeq <16 x half> undef, undef +; CHECK-FP16-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp oeq <16 x half> undef, undef ; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %f16 = fcmp oeq half undef, undef @@ -57,8 +57,8 @@ define void @fcmp_oeq_bfloat(i32 %arg) { ; CHECK-NEXT: Cost Model: Found costs of 1 for: %bf16 = fcmp oeq bfloat undef, undef ; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2bf16 = fcmp oeq <2 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 4 for: %v4bf16 = fcmp oeq <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8bf16 = fcmp oeq <8 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16bf16 = fcmp oeq <16 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp oeq <8 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16bf16 = fcmp oeq <16 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %bf16 = fcmp oeq bfloat undef, undef @@ -74,12 +74,12 @@ define void @fcmp_ogt(i32 %arg) { ; CHECK-NEXT: Cost Model: Found costs of 1 for: %f32 = fcmp ogt float undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp ogt <2 x float> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp ogt <4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v8f32 = fcmp ogt <8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp ogt <8 x float> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %f64 = fcmp ogt double undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp ogt <2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4f64 = fcmp ogt <4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp ogt <4 x double> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %f128 = fcmp ogt fp128 undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v2f128 = fcmp ogt <2 x fp128> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v2f128 = fcmp ogt <2 x fp128> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %f32 = fcmp ogt float undef, undef @@ -99,8 +99,8 @@ define void @fcmp_ogt_half(i32 %arg) { ; CHECK-BASE-NEXT: Cost Model: Found costs of 1 for: %f16 = fcmp ogt half undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of 3 for: %v2f16 = fcmp ogt <2 x half> undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of 4 for: %v4f16 = fcmp ogt <4 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8f16 = fcmp ogt <8 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16f16 = fcmp ogt <16 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8f16 = fcmp ogt <8 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16f16 = fcmp ogt <16 x half> undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-FP16-LABEL: 'fcmp_ogt_half' @@ -108,7 +108,7 @@ define void @fcmp_ogt_half(i32 %arg) { ; CHECK-FP16-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp ogt <2 x half> undef, undef ; CHECK-FP16-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp ogt <4 x half> undef, undef ; CHECK-FP16-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp ogt <8 x half> undef, undef -; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16f16 = fcmp ogt <16 x half> undef, undef +; CHECK-FP16-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp ogt <16 x half> undef, undef ; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %f16 = fcmp ogt half undef, undef @@ -124,8 +124,8 @@ define void @fcmp_ogt_bfloat(i32 %arg) { ; CHECK-NEXT: Cost Model: Found costs of 1 for: %bf16 = fcmp ogt bfloat undef, undef ; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2bf16 = fcmp ogt <2 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 4 for: %v4bf16 = fcmp ogt <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8bf16 = fcmp ogt <8 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16bf16 = fcmp ogt <16 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp ogt <8 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16bf16 = fcmp ogt <16 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %bf16 = fcmp ogt bfloat undef, undef @@ -141,12 +141,12 @@ define void @fcmp_oge(i32 %arg) { ; CHECK-NEXT: Cost Model: Found costs of 1 for: %f32 = fcmp oge float undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp oge <2 x float> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp oge <4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v8f32 = fcmp oge <8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp oge <8 x float> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %f64 = fcmp oge double undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp oge <2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4f64 = fcmp oge <4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp oge <4 x double> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %f128 = fcmp oge fp128 undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v2f128 = fcmp oge <2 x fp128> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v2f128 = fcmp oge <2 x fp128> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %f32 = fcmp oge float undef, undef @@ -166,8 +166,8 @@ define void @fcmp_oge_half(i32 %arg) { ; CHECK-BASE-NEXT: Cost Model: Found costs of 1 for: %f16 = fcmp oge half undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of 3 for: %v2f16 = fcmp oge <2 x half> undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of 4 for: %v4f16 = fcmp oge <4 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8f16 = fcmp oge <8 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16f16 = fcmp oge <16 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8f16 = fcmp oge <8 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16f16 = fcmp oge <16 x half> undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-FP16-LABEL: 'fcmp_oge_half' @@ -175,7 +175,7 @@ define void @fcmp_oge_half(i32 %arg) { ; CHECK-FP16-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp oge <2 x half> undef, undef ; CHECK-FP16-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp oge <4 x half> undef, undef ; CHECK-FP16-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp oge <8 x half> undef, undef -; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16f16 = fcmp oge <16 x half> undef, undef +; CHECK-FP16-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp oge <16 x half> undef, undef ; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %f16 = fcmp oge half undef, undef @@ -191,8 +191,8 @@ define void @fcmp_oge_bfloat(i32 %arg) { ; CHECK-NEXT: Cost Model: Found costs of 1 for: %bf16 = fcmp oge bfloat undef, undef ; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2bf16 = fcmp oge <2 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 4 for: %v4bf16 = fcmp oge <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8bf16 = fcmp oge <8 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16bf16 = fcmp oge <16 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp oge <8 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16bf16 = fcmp oge <16 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %bf16 = fcmp oge bfloat undef, undef @@ -208,12 +208,12 @@ define void @fcmp_olt(i32 %arg) { ; CHECK-NEXT: Cost Model: Found costs of 1 for: %f32 = fcmp olt float undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp olt <2 x float> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp olt <4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v8f32 = fcmp olt <8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp olt <8 x float> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %f64 = fcmp olt double undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp olt <2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4f64 = fcmp olt <4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp olt <4 x double> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %f128 = fcmp olt fp128 undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v2f128 = fcmp olt <2 x fp128> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v2f128 = fcmp olt <2 x fp128> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %f32 = fcmp olt float undef, undef @@ -233,8 +233,8 @@ define void @fcmp_olt_half(i32 %arg) { ; CHECK-BASE-NEXT: Cost Model: Found costs of 1 for: %f16 = fcmp olt half undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of 3 for: %v2f16 = fcmp olt <2 x half> undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of 4 for: %v4f16 = fcmp olt <4 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8f16 = fcmp olt <8 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16f16 = fcmp olt <16 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8f16 = fcmp olt <8 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16f16 = fcmp olt <16 x half> undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-FP16-LABEL: 'fcmp_olt_half' @@ -242,7 +242,7 @@ define void @fcmp_olt_half(i32 %arg) { ; CHECK-FP16-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp olt <2 x half> undef, undef ; CHECK-FP16-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp olt <4 x half> undef, undef ; CHECK-FP16-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp olt <8 x half> undef, undef -; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16f16 = fcmp olt <16 x half> undef, undef +; CHECK-FP16-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp olt <16 x half> undef, undef ; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %f16 = fcmp olt half undef, undef @@ -258,8 +258,8 @@ define void @fcmp_olt_bfloat(i32 %arg) { ; CHECK-NEXT: Cost Model: Found costs of 1 for: %bf16 = fcmp olt bfloat undef, undef ; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2bf16 = fcmp olt <2 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 4 for: %v4bf16 = fcmp olt <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8bf16 = fcmp olt <8 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16bf16 = fcmp olt <16 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp olt <8 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16bf16 = fcmp olt <16 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %bf16 = fcmp olt bfloat undef, undef @@ -275,12 +275,12 @@ define void @fcmp_ole(i32 %arg) { ; CHECK-NEXT: Cost Model: Found costs of 1 for: %f32 = fcmp ole float undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp ole <2 x float> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp ole <4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v8f32 = fcmp ole <8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp ole <8 x float> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %f64 = fcmp ole double undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp ole <2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4f64 = fcmp ole <4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp ole <4 x double> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %f128 = fcmp ole fp128 undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v2f128 = fcmp ole <2 x fp128> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v2f128 = fcmp ole <2 x fp128> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %f32 = fcmp ole float undef, undef @@ -300,8 +300,8 @@ define void @fcmp_ole_half(i32 %arg) { ; CHECK-BASE-NEXT: Cost Model: Found costs of 1 for: %f16 = fcmp ole half undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of 3 for: %v2f16 = fcmp ole <2 x half> undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of 4 for: %v4f16 = fcmp ole <4 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8f16 = fcmp ole <8 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16f16 = fcmp ole <16 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8f16 = fcmp ole <8 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16f16 = fcmp ole <16 x half> undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-FP16-LABEL: 'fcmp_ole_half' @@ -309,7 +309,7 @@ define void @fcmp_ole_half(i32 %arg) { ; CHECK-FP16-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp ole <2 x half> undef, undef ; CHECK-FP16-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp ole <4 x half> undef, undef ; CHECK-FP16-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp ole <8 x half> undef, undef -; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16f16 = fcmp ole <16 x half> undef, undef +; CHECK-FP16-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp ole <16 x half> undef, undef ; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %f16 = fcmp ole half undef, undef @@ -325,8 +325,8 @@ define void @fcmp_ole_bfloat(i32 %arg) { ; CHECK-NEXT: Cost Model: Found costs of 1 for: %bf16 = fcmp ole bfloat undef, undef ; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2bf16 = fcmp ole <2 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 4 for: %v4bf16 = fcmp ole <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8bf16 = fcmp ole <8 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16bf16 = fcmp ole <16 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp ole <8 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16bf16 = fcmp ole <16 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %bf16 = fcmp ole bfloat undef, undef @@ -342,12 +342,12 @@ define void @fcmp_one(i32 %arg) { ; CHECK-NEXT: Cost Model: Found costs of 1 for: %f32 = fcmp one float undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp one <2 x float> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp one <4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v8f32 = fcmp one <8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp one <8 x float> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %f64 = fcmp one double undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp one <2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4f64 = fcmp one <4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp one <4 x double> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %f128 = fcmp one fp128 undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v2f128 = fcmp one <2 x fp128> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v2f128 = fcmp one <2 x fp128> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %f32 = fcmp one float undef, undef @@ -367,8 +367,8 @@ define void @fcmp_one_half(i32 %arg) { ; CHECK-BASE-NEXT: Cost Model: Found costs of 1 for: %f16 = fcmp one half undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of 3 for: %v2f16 = fcmp one <2 x half> undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of 4 for: %v4f16 = fcmp one <4 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8f16 = fcmp one <8 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16f16 = fcmp one <16 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8f16 = fcmp one <8 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16f16 = fcmp one <16 x half> undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-FP16-LABEL: 'fcmp_one_half' @@ -376,7 +376,7 @@ define void @fcmp_one_half(i32 %arg) { ; CHECK-FP16-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp one <2 x half> undef, undef ; CHECK-FP16-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp one <4 x half> undef, undef ; CHECK-FP16-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp one <8 x half> undef, undef -; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16f16 = fcmp one <16 x half> undef, undef +; CHECK-FP16-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp one <16 x half> undef, undef ; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %f16 = fcmp one half undef, undef @@ -392,8 +392,8 @@ define void @fcmp_one_bfloat(i32 %arg) { ; CHECK-NEXT: Cost Model: Found costs of 1 for: %bf16 = fcmp one bfloat undef, undef ; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2bf16 = fcmp one <2 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 4 for: %v4bf16 = fcmp one <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8bf16 = fcmp one <8 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16bf16 = fcmp one <16 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp one <8 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16bf16 = fcmp one <16 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %bf16 = fcmp one bfloat undef, undef @@ -409,12 +409,12 @@ define void @fcmp_ord(i32 %arg) { ; CHECK-NEXT: Cost Model: Found costs of 1 for: %f32 = fcmp ord float undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp ord <2 x float> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp ord <4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v8f32 = fcmp ord <8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp ord <8 x float> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %f64 = fcmp ord double undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp ord <2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4f64 = fcmp ord <4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp ord <4 x double> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %f128 = fcmp ord fp128 undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v2f128 = fcmp ord <2 x fp128> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v2f128 = fcmp ord <2 x fp128> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %f32 = fcmp ord float undef, undef @@ -434,8 +434,8 @@ define void @fcmp_ord_half(i32 %arg) { ; CHECK-BASE-NEXT: Cost Model: Found costs of 1 for: %f16 = fcmp ord half undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of 3 for: %v2f16 = fcmp ord <2 x half> undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of 4 for: %v4f16 = fcmp ord <4 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8f16 = fcmp ord <8 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16f16 = fcmp ord <16 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8f16 = fcmp ord <8 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16f16 = fcmp ord <16 x half> undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-FP16-LABEL: 'fcmp_ord_half' @@ -443,7 +443,7 @@ define void @fcmp_ord_half(i32 %arg) { ; CHECK-FP16-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp ord <2 x half> undef, undef ; CHECK-FP16-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp ord <4 x half> undef, undef ; CHECK-FP16-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp ord <8 x half> undef, undef -; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16f16 = fcmp ord <16 x half> undef, undef +; CHECK-FP16-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp ord <16 x half> undef, undef ; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %f16 = fcmp ord half undef, undef @@ -459,8 +459,8 @@ define void @fcmp_ord_bfloat(i32 %arg) { ; CHECK-NEXT: Cost Model: Found costs of 1 for: %bf16 = fcmp ord bfloat undef, undef ; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2bf16 = fcmp ord <2 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 4 for: %v4bf16 = fcmp ord <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8bf16 = fcmp ord <8 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16bf16 = fcmp ord <16 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp ord <8 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16bf16 = fcmp ord <16 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %bf16 = fcmp ord bfloat undef, undef @@ -476,12 +476,12 @@ define void @fcmp_ueq(i32 %arg) { ; CHECK-NEXT: Cost Model: Found costs of 1 for: %f32 = fcmp ueq float undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp ueq <2 x float> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp ueq <4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v8f32 = fcmp ueq <8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp ueq <8 x float> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %f64 = fcmp ueq double undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp ueq <2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4f64 = fcmp ueq <4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp ueq <4 x double> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %f128 = fcmp ueq fp128 undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v2f128 = fcmp ueq <2 x fp128> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v2f128 = fcmp ueq <2 x fp128> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %f32 = fcmp ueq float undef, undef @@ -501,8 +501,8 @@ define void @fcmp_ueq_half(i32 %arg) { ; CHECK-BASE-NEXT: Cost Model: Found costs of 1 for: %f16 = fcmp ueq half undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of 3 for: %v2f16 = fcmp ueq <2 x half> undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of 4 for: %v4f16 = fcmp ueq <4 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8f16 = fcmp ueq <8 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16f16 = fcmp ueq <16 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8f16 = fcmp ueq <8 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16f16 = fcmp ueq <16 x half> undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-FP16-LABEL: 'fcmp_ueq_half' @@ -510,7 +510,7 @@ define void @fcmp_ueq_half(i32 %arg) { ; CHECK-FP16-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp ueq <2 x half> undef, undef ; CHECK-FP16-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp ueq <4 x half> undef, undef ; CHECK-FP16-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp ueq <8 x half> undef, undef -; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16f16 = fcmp ueq <16 x half> undef, undef +; CHECK-FP16-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp ueq <16 x half> undef, undef ; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %f16 = fcmp ueq half undef, undef @@ -526,8 +526,8 @@ define void @fcmp_ueq_bfloat(i32 %arg) { ; CHECK-NEXT: Cost Model: Found costs of 1 for: %bf16 = fcmp ueq bfloat undef, undef ; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2bf16 = fcmp ueq <2 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 4 for: %v4bf16 = fcmp ueq <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8bf16 = fcmp ueq <8 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16bf16 = fcmp ueq <16 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp ueq <8 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16bf16 = fcmp ueq <16 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %bf16 = fcmp ueq bfloat undef, undef @@ -543,12 +543,12 @@ define void @fcmp_ugt(i32 %arg) { ; CHECK-NEXT: Cost Model: Found costs of 1 for: %f32 = fcmp ugt float undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp ugt <2 x float> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp ugt <4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v8f32 = fcmp ugt <8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp ugt <8 x float> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %f64 = fcmp ugt double undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp ugt <2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4f64 = fcmp ugt <4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp ugt <4 x double> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %f128 = fcmp ugt fp128 undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v2f128 = fcmp ugt <2 x fp128> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v2f128 = fcmp ugt <2 x fp128> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %f32 = fcmp ugt float undef, undef @@ -568,8 +568,8 @@ define void @fcmp_ugt_half(i32 %arg) { ; CHECK-BASE-NEXT: Cost Model: Found costs of 1 for: %f16 = fcmp ugt half undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of 3 for: %v2f16 = fcmp ugt <2 x half> undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of 4 for: %v4f16 = fcmp ugt <4 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8f16 = fcmp ugt <8 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16f16 = fcmp ugt <16 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8f16 = fcmp ugt <8 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16f16 = fcmp ugt <16 x half> undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-FP16-LABEL: 'fcmp_ugt_half' @@ -577,7 +577,7 @@ define void @fcmp_ugt_half(i32 %arg) { ; CHECK-FP16-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp ugt <2 x half> undef, undef ; CHECK-FP16-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp ugt <4 x half> undef, undef ; CHECK-FP16-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp ugt <8 x half> undef, undef -; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16f16 = fcmp ugt <16 x half> undef, undef +; CHECK-FP16-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp ugt <16 x half> undef, undef ; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %f16 = fcmp ugt half undef, undef @@ -593,8 +593,8 @@ define void @fcmp_ugt_bfloat(i32 %arg) { ; CHECK-NEXT: Cost Model: Found costs of 1 for: %bf16 = fcmp ugt bfloat undef, undef ; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2bf16 = fcmp ugt <2 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 4 for: %v4bf16 = fcmp ugt <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8bf16 = fcmp ugt <8 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16bf16 = fcmp ugt <16 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp ugt <8 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16bf16 = fcmp ugt <16 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %bf16 = fcmp ugt bfloat undef, undef @@ -610,12 +610,12 @@ define void @fcmp_uge(i32 %arg) { ; CHECK-NEXT: Cost Model: Found costs of 1 for: %f32 = fcmp uge float undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp uge <2 x float> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp uge <4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v8f32 = fcmp uge <8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp uge <8 x float> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %f64 = fcmp uge double undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp uge <2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4f64 = fcmp uge <4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp uge <4 x double> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %f128 = fcmp uge fp128 undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v2f128 = fcmp uge <2 x fp128> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v2f128 = fcmp uge <2 x fp128> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %f32 = fcmp uge float undef, undef @@ -635,8 +635,8 @@ define void @fcmp_uge_half(i32 %arg) { ; CHECK-BASE-NEXT: Cost Model: Found costs of 1 for: %f16 = fcmp uge half undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of 3 for: %v2f16 = fcmp uge <2 x half> undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of 4 for: %v4f16 = fcmp uge <4 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8f16 = fcmp uge <8 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16f16 = fcmp uge <16 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8f16 = fcmp uge <8 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16f16 = fcmp uge <16 x half> undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-FP16-LABEL: 'fcmp_uge_half' @@ -644,7 +644,7 @@ define void @fcmp_uge_half(i32 %arg) { ; CHECK-FP16-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp uge <2 x half> undef, undef ; CHECK-FP16-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp uge <4 x half> undef, undef ; CHECK-FP16-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp uge <8 x half> undef, undef -; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16f16 = fcmp uge <16 x half> undef, undef +; CHECK-FP16-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp uge <16 x half> undef, undef ; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %f16 = fcmp uge half undef, undef @@ -660,8 +660,8 @@ define void @fcmp_uge_bfloat(i32 %arg) { ; CHECK-NEXT: Cost Model: Found costs of 1 for: %bf16 = fcmp uge bfloat undef, undef ; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2bf16 = fcmp uge <2 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 4 for: %v4bf16 = fcmp uge <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8bf16 = fcmp uge <8 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16bf16 = fcmp uge <16 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp uge <8 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16bf16 = fcmp uge <16 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %bf16 = fcmp uge bfloat undef, undef @@ -677,12 +677,12 @@ define void @fcmp_ult(i32 %arg) { ; CHECK-NEXT: Cost Model: Found costs of 1 for: %f32 = fcmp ult float undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp ult <2 x float> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp ult <4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v8f32 = fcmp ult <8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp ult <8 x float> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %f64 = fcmp ult double undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp ult <2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4f64 = fcmp ult <4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp ult <4 x double> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %f128 = fcmp ult fp128 undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v2f128 = fcmp ult <2 x fp128> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v2f128 = fcmp ult <2 x fp128> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %f32 = fcmp ult float undef, undef @@ -702,8 +702,8 @@ define void @fcmp_ult_half(i32 %arg) { ; CHECK-BASE-NEXT: Cost Model: Found costs of 1 for: %f16 = fcmp ult half undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of 3 for: %v2f16 = fcmp ult <2 x half> undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of 4 for: %v4f16 = fcmp ult <4 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8f16 = fcmp ult <8 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16f16 = fcmp ult <16 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8f16 = fcmp ult <8 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16f16 = fcmp ult <16 x half> undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-FP16-LABEL: 'fcmp_ult_half' @@ -711,7 +711,7 @@ define void @fcmp_ult_half(i32 %arg) { ; CHECK-FP16-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp ult <2 x half> undef, undef ; CHECK-FP16-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp ult <4 x half> undef, undef ; CHECK-FP16-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp ult <8 x half> undef, undef -; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16f16 = fcmp ult <16 x half> undef, undef +; CHECK-FP16-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp ult <16 x half> undef, undef ; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %f16 = fcmp ult half undef, undef @@ -727,8 +727,8 @@ define void @fcmp_ult_bfloat(i32 %arg) { ; CHECK-NEXT: Cost Model: Found costs of 1 for: %bf16 = fcmp ult bfloat undef, undef ; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2bf16 = fcmp ult <2 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 4 for: %v4bf16 = fcmp ult <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8bf16 = fcmp ult <8 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16bf16 = fcmp ult <16 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp ult <8 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16bf16 = fcmp ult <16 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %bf16 = fcmp ult bfloat undef, undef @@ -744,12 +744,12 @@ define void @fcmp_ule(i32 %arg) { ; CHECK-NEXT: Cost Model: Found costs of 1 for: %f32 = fcmp ule float undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp ule <2 x float> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp ule <4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v8f32 = fcmp ule <8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp ule <8 x float> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %f64 = fcmp ule double undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp ule <2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4f64 = fcmp ule <4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp ule <4 x double> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %f128 = fcmp ule fp128 undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v2f128 = fcmp ule <2 x fp128> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v2f128 = fcmp ule <2 x fp128> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %f32 = fcmp ule float undef, undef @@ -769,8 +769,8 @@ define void @fcmp_ule_half(i32 %arg) { ; CHECK-BASE-NEXT: Cost Model: Found costs of 1 for: %f16 = fcmp ule half undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of 3 for: %v2f16 = fcmp ule <2 x half> undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of 4 for: %v4f16 = fcmp ule <4 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8f16 = fcmp ule <8 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16f16 = fcmp ule <16 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8f16 = fcmp ule <8 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16f16 = fcmp ule <16 x half> undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-FP16-LABEL: 'fcmp_ule_half' @@ -778,7 +778,7 @@ define void @fcmp_ule_half(i32 %arg) { ; CHECK-FP16-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp ule <2 x half> undef, undef ; CHECK-FP16-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp ule <4 x half> undef, undef ; CHECK-FP16-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp ule <8 x half> undef, undef -; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16f16 = fcmp ule <16 x half> undef, undef +; CHECK-FP16-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp ule <16 x half> undef, undef ; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %f16 = fcmp ule half undef, undef @@ -794,8 +794,8 @@ define void @fcmp_ule_bfloat(i32 %arg) { ; CHECK-NEXT: Cost Model: Found costs of 1 for: %bf16 = fcmp ule bfloat undef, undef ; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2bf16 = fcmp ule <2 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 4 for: %v4bf16 = fcmp ule <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8bf16 = fcmp ule <8 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16bf16 = fcmp ule <16 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp ule <8 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16bf16 = fcmp ule <16 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %bf16 = fcmp ule bfloat undef, undef @@ -811,12 +811,12 @@ define void @fcmp_une(i32 %arg) { ; CHECK-NEXT: Cost Model: Found costs of 1 for: %f32 = fcmp une float undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp une <2 x float> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp une <4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v8f32 = fcmp une <8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp une <8 x float> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %f64 = fcmp une double undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp une <2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4f64 = fcmp une <4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp une <4 x double> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %f128 = fcmp une fp128 undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v2f128 = fcmp une <2 x fp128> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v2f128 = fcmp une <2 x fp128> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %f32 = fcmp une float undef, undef @@ -836,8 +836,8 @@ define void @fcmp_une_half(i32 %arg) { ; CHECK-BASE-NEXT: Cost Model: Found costs of 1 for: %f16 = fcmp une half undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of 3 for: %v2f16 = fcmp une <2 x half> undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of 4 for: %v4f16 = fcmp une <4 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8f16 = fcmp une <8 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16f16 = fcmp une <16 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8f16 = fcmp une <8 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16f16 = fcmp une <16 x half> undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-FP16-LABEL: 'fcmp_une_half' @@ -845,7 +845,7 @@ define void @fcmp_une_half(i32 %arg) { ; CHECK-FP16-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp une <2 x half> undef, undef ; CHECK-FP16-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp une <4 x half> undef, undef ; CHECK-FP16-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp une <8 x half> undef, undef -; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16f16 = fcmp une <16 x half> undef, undef +; CHECK-FP16-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp une <16 x half> undef, undef ; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %f16 = fcmp une half undef, undef @@ -861,8 +861,8 @@ define void @fcmp_une_bfloat(i32 %arg) { ; CHECK-NEXT: Cost Model: Found costs of 1 for: %bf16 = fcmp une bfloat undef, undef ; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2bf16 = fcmp une <2 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 4 for: %v4bf16 = fcmp une <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8bf16 = fcmp une <8 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16bf16 = fcmp une <16 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp une <8 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16bf16 = fcmp une <16 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %bf16 = fcmp une bfloat undef, undef @@ -878,12 +878,12 @@ define void @fcmp_uno(i32 %arg) { ; CHECK-NEXT: Cost Model: Found costs of 1 for: %f32 = fcmp uno float undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp uno <2 x float> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp uno <4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v8f32 = fcmp uno <8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp uno <8 x float> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %f64 = fcmp uno double undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp uno <2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4f64 = fcmp uno <4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp uno <4 x double> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %f128 = fcmp uno fp128 undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v2f128 = fcmp uno <2 x fp128> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v2f128 = fcmp uno <2 x fp128> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %f32 = fcmp uno float undef, undef @@ -903,8 +903,8 @@ define void @fcmp_uno_half(i32 %arg) { ; CHECK-BASE-NEXT: Cost Model: Found costs of 1 for: %f16 = fcmp uno half undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of 3 for: %v2f16 = fcmp uno <2 x half> undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of 4 for: %v4f16 = fcmp uno <4 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8f16 = fcmp uno <8 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16f16 = fcmp uno <16 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8f16 = fcmp uno <8 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16f16 = fcmp uno <16 x half> undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-FP16-LABEL: 'fcmp_uno_half' @@ -912,7 +912,7 @@ define void @fcmp_uno_half(i32 %arg) { ; CHECK-FP16-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp uno <2 x half> undef, undef ; CHECK-FP16-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp uno <4 x half> undef, undef ; CHECK-FP16-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp uno <8 x half> undef, undef -; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16f16 = fcmp uno <16 x half> undef, undef +; CHECK-FP16-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp uno <16 x half> undef, undef ; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %f16 = fcmp uno half undef, undef @@ -928,8 +928,8 @@ define void @fcmp_uno_bfloat(i32 %arg) { ; CHECK-NEXT: Cost Model: Found costs of 1 for: %bf16 = fcmp uno bfloat undef, undef ; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2bf16 = fcmp uno <2 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 4 for: %v4bf16 = fcmp uno <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8bf16 = fcmp uno <8 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16bf16 = fcmp uno <16 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp uno <8 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16bf16 = fcmp uno <16 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %bf16 = fcmp uno bfloat undef, undef @@ -945,12 +945,12 @@ define void @fcmp_true(i32 %arg) { ; CHECK-NEXT: Cost Model: Found costs of 1 for: %f32 = fcmp true float undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp true <2 x float> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp true <4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v8f32 = fcmp true <8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp true <8 x float> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %f64 = fcmp true double undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp true <2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4f64 = fcmp true <4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp true <4 x double> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %f128 = fcmp true fp128 undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v2f128 = fcmp true <2 x fp128> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v2f128 = fcmp true <2 x fp128> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %f32 = fcmp true float undef, undef @@ -970,8 +970,8 @@ define void @fcmp_true_half(i32 %arg) { ; CHECK-BASE-NEXT: Cost Model: Found costs of 1 for: %f16 = fcmp true half undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of 3 for: %v2f16 = fcmp true <2 x half> undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of 4 for: %v4f16 = fcmp true <4 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8f16 = fcmp true <8 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16f16 = fcmp true <16 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8f16 = fcmp true <8 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16f16 = fcmp true <16 x half> undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-FP16-LABEL: 'fcmp_true_half' @@ -979,7 +979,7 @@ define void @fcmp_true_half(i32 %arg) { ; CHECK-FP16-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp true <2 x half> undef, undef ; CHECK-FP16-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp true <4 x half> undef, undef ; CHECK-FP16-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp true <8 x half> undef, undef -; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16f16 = fcmp true <16 x half> undef, undef +; CHECK-FP16-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp true <16 x half> undef, undef ; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %f16 = fcmp true half undef, undef @@ -995,8 +995,8 @@ define void @fcmp_true_bfloat(i32 %arg) { ; CHECK-NEXT: Cost Model: Found costs of 1 for: %bf16 = fcmp true bfloat undef, undef ; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2bf16 = fcmp true <2 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 4 for: %v4bf16 = fcmp true <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8bf16 = fcmp true <8 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16bf16 = fcmp true <16 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp true <8 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16bf16 = fcmp true <16 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %bf16 = fcmp true bfloat undef, undef @@ -1012,12 +1012,12 @@ define void @fcmp_false(i32 %arg) { ; CHECK-NEXT: Cost Model: Found costs of 1 for: %f32 = fcmp false float undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp false <2 x float> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp false <4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v8f32 = fcmp false <8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp false <8 x float> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %f64 = fcmp false double undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp false <2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4f64 = fcmp false <4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp false <4 x double> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %f128 = fcmp false fp128 undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v2f128 = fcmp false <2 x fp128> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v2f128 = fcmp false <2 x fp128> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %f32 = fcmp false float undef, undef @@ -1037,8 +1037,8 @@ define void @fcmp_false_half(i32 %arg) { ; CHECK-BASE-NEXT: Cost Model: Found costs of 1 for: %f16 = fcmp false half undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of 3 for: %v2f16 = fcmp false <2 x half> undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of 4 for: %v4f16 = fcmp false <4 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8f16 = fcmp false <8 x half> undef, undef -; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16f16 = fcmp false <16 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8f16 = fcmp false <8 x half> undef, undef +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16f16 = fcmp false <16 x half> undef, undef ; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-FP16-LABEL: 'fcmp_false_half' @@ -1046,7 +1046,7 @@ define void @fcmp_false_half(i32 %arg) { ; CHECK-FP16-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp false <2 x half> undef, undef ; CHECK-FP16-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp false <4 x half> undef, undef ; CHECK-FP16-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp false <8 x half> undef, undef -; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16f16 = fcmp false <16 x half> undef, undef +; CHECK-FP16-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp false <16 x half> undef, undef ; CHECK-FP16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %f16 = fcmp false half undef, undef @@ -1062,8 +1062,8 @@ define void @fcmp_false_bfloat(i32 %arg) { ; CHECK-NEXT: Cost Model: Found costs of 1 for: %bf16 = fcmp false bfloat undef, undef ; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2bf16 = fcmp false <2 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 4 for: %v4bf16 = fcmp false <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8bf16 = fcmp false <8 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16bf16 = fcmp false <16 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp false <8 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16bf16 = fcmp false <16 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %bf16 = fcmp false bfloat undef, undef diff --git a/llvm/test/Analysis/CostModel/AArch64/fshl.ll b/llvm/test/Analysis/CostModel/AArch64/fshl.ll index 59e86aefeeb6c..9d06b4bdec9b4 100644 --- a/llvm/test/Analysis/CostModel/AArch64/fshl.ll +++ b/llvm/test/Analysis/CostModel/AArch64/fshl.ll @@ -236,7 +236,7 @@ declare <4 x i30> @llvm.fshl.v4i30(<4 x i30>, <4 x i30>, <4 x i30>) define <2 x i66> @fshl_v2i66_3rd_arg_vec_const_lanes_different(<2 x i66> %a, <2 x i66> %b) { ; CHECK-LABEL: 'fshl_v2i66_3rd_arg_vec_const_lanes_different' -; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:6 Lat:6 SizeLat:6 for: %fshl = tail call <2 x i66> @llvm.fshl.v2i66(<2 x i66> %a, <2 x i66> %b, <2 x i66> ) +; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:20 SizeLat:20 for: %fshl = tail call <2 x i66> @llvm.fshl.v2i66(<2 x i66> %a, <2 x i66> %b, <2 x i66> ) ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i66> %fshl ; entry: @@ -259,7 +259,7 @@ declare i66 @llvm.fshl.i66(i66, i66, i66) define <2 x i128> @fshl_v2i128_3rd_arg_vec_const_lanes_different(<2 x i128> %a, <2 x i128> %b) { ; CHECK-LABEL: 'fshl_v2i128_3rd_arg_vec_const_lanes_different' -; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:6 Lat:6 SizeLat:6 for: %fshl = tail call <2 x i128> @llvm.fshl.v2i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> ) +; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:20 SizeLat:20 for: %fshl = tail call <2 x i128> @llvm.fshl.v2i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> ) ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i128> %fshl ; entry: @@ -270,7 +270,7 @@ declare <2 x i128> @llvm.fshl.v4i128(<2 x i128>, <2 x i128>, <2 x i128>) define i128 @fshl_i128(i128 %a, i128 %b) { ; CHECK-LABEL: 'fshl_i128' -; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:6 Lat:6 SizeLat:6 for: %fshl = tail call i128 @llvm.fshl.i128(i128 %a, i128 %b, i128 9) +; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:8 Lat:8 SizeLat:8 for: %fshl = tail call i128 @llvm.fshl.i128(i128 %a, i128 %b, i128 9) ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i128 %fshl ; entry: diff --git a/llvm/test/Analysis/CostModel/AArch64/fshr.ll b/llvm/test/Analysis/CostModel/AArch64/fshr.ll index 53cede900c398..b31806b647868 100644 --- a/llvm/test/Analysis/CostModel/AArch64/fshr.ll +++ b/llvm/test/Analysis/CostModel/AArch64/fshr.ll @@ -236,7 +236,7 @@ declare <4 x i30> @llvm.fshr.v4i30(<4 x i30>, <4 x i30>, <4 x i30>) define <2 x i66> @fshr_v2i66_3rd_arg_vec_const_lanes_different(<2 x i66> %a, <2 x i66> %b) { ; CHECK-LABEL: 'fshr_v2i66_3rd_arg_vec_const_lanes_different' -; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:6 Lat:6 SizeLat:6 for: %fshr = tail call <2 x i66> @llvm.fshr.v2i66(<2 x i66> %a, <2 x i66> %b, <2 x i66> ) +; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:20 SizeLat:20 for: %fshr = tail call <2 x i66> @llvm.fshr.v2i66(<2 x i66> %a, <2 x i66> %b, <2 x i66> ) ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i66> %fshr ; entry: @@ -259,7 +259,7 @@ declare i66 @llvm.fshr.i66(i66, i66, i66) define <2 x i128> @fshr_v2i128_3rd_arg_vec_const_lanes_different(<2 x i128> %a, <2 x i128> %b) { ; CHECK-LABEL: 'fshr_v2i128_3rd_arg_vec_const_lanes_different' -; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:6 Lat:6 SizeLat:6 for: %fshr = tail call <2 x i128> @llvm.fshr.v2i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> ) +; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:20 SizeLat:20 for: %fshr = tail call <2 x i128> @llvm.fshr.v2i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> ) ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i128> %fshr ; entry: @@ -270,7 +270,7 @@ declare <2 x i128> @llvm.fshr.v4i128(<2 x i128>, <2 x i128>, <2 x i128>) define i128 @fshr_i128(i128 %a, i128 %b) { ; CHECK-LABEL: 'fshr_i128' -; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:6 Lat:6 SizeLat:6 for: %fshr = tail call i128 @llvm.fshr.i128(i128 %a, i128 %b, i128 9) +; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:8 Lat:8 SizeLat:8 for: %fshr = tail call i128 @llvm.fshr.i128(i128 %a, i128 %b, i128 9) ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i128 %fshr ; entry: diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-cmpsel.ll b/llvm/test/Analysis/CostModel/AArch64/sve-cmpsel.ll index 9a0e5c3eb7964..83f62c8b44303 100644 --- a/llvm/test/Analysis/CostModel/AArch64/sve-cmpsel.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sve-cmpsel.ll @@ -22,7 +22,7 @@ define void @cmp_legal_int() { ; Check icmp for an illegal integer vector. define @cmp_nxv4i64() { ; CHECK-LABEL: 'cmp_nxv4i64' -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %res = icmp ne undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %res = icmp ne undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret %res ; %res = icmp ne undef, undef @@ -48,7 +48,7 @@ define void @cmp_legal_pred() { ; Check icmp for an illegal predicate vector. define @cmp_nxv32i1() { ; CHECK-LABEL: 'cmp_nxv32i1' -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %res = icmp ne undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %res = icmp ne undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret %res ; %res = icmp ne undef, undef @@ -74,7 +74,7 @@ define void @cmp_legal_fp() #0 { ; Check fcmp for an illegal FP vector define @cmp_nxv16f16() { ; CHECK-LABEL: 'cmp_nxv16f16' -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %res = fcmp oge undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %res = fcmp oge undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret %res ; %res = fcmp oge undef, undef @@ -100,7 +100,7 @@ define void @sel_legal_int() { ; Check select for an illegal integer vector define @sel_nxv16i16() { ; CHECK-LABEL: 'sel_nxv16i16' -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %res = select undef, undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %res = select undef, undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret %res ; %res = select undef, undef, undef @@ -126,7 +126,7 @@ define void @sel_legal_fp() #0 { ; Check select for an illegal FP vector define @sel_nxv8f32() { ; CHECK-LABEL: 'sel_nxv8f32' -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %res = select undef, undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %res = select undef, undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret %res ; %res = select undef, undef, undef @@ -152,7 +152,7 @@ define void @sel_legal_pred() { ; Check select for an illegal predicate vector define @sel_nxv32i1() { ; CHECK-LABEL: 'sel_nxv32i1' -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %res = select undef, undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %res = select undef, undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret %res ; %res = select undef, undef, undef diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-fcmp.ll b/llvm/test/Analysis/CostModel/AArch64/sve-fcmp.ll index 7fdd6b4971658..9801d14bcc6cd 100644 --- a/llvm/test/Analysis/CostModel/AArch64/sve-fcmp.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sve-fcmp.ll @@ -5,13 +5,13 @@ define void @fcmp_oeq(i32 %arg) { ; CHECK-LABEL: 'fcmp_oeq' ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp oeq <2 x float> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp oeq <4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v8f32 = fcmp oeq <8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp oeq <8 x float> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp oeq <2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4f64 = fcmp oeq <4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp oeq <4 x double> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp oeq <2 x half> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp oeq <4 x half> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp oeq <8 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16f16 = fcmp oeq <16 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp oeq <16 x half> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v2f32 = fcmp oeq <2 x float> undef, undef @@ -30,7 +30,7 @@ define void @fcmp_oeq_bfloat(i32 %arg) { ; CHECK-LABEL: 'fcmp_oeq_bfloat' ; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2bf16 = fcmp oeq <2 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 4 for: %v4bf16 = fcmp oeq <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8bf16 = fcmp oeq <8 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp oeq <8 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v2bf16 = fcmp oeq <2 x bfloat> undef, undef @@ -43,13 +43,13 @@ define void @fcmp_ogt(i32 %arg) { ; CHECK-LABEL: 'fcmp_ogt' ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp ogt <2 x float> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp ogt <4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v8f32 = fcmp ogt <8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp ogt <8 x float> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp ogt <2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4f64 = fcmp ogt <4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp ogt <4 x double> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp ogt <2 x half> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp ogt <4 x half> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp ogt <8 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16f16 = fcmp ogt <16 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp ogt <16 x half> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v2f32 = fcmp ogt <2 x float> undef, undef @@ -68,7 +68,7 @@ define void @fcmp_ogt_bfloat(i32 %arg) { ; CHECK-LABEL: 'fcmp_ogt_bfloat' ; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2bf16 = fcmp ogt <2 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 4 for: %v4bf16 = fcmp ogt <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8bf16 = fcmp ogt <8 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp ogt <8 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v2bf16 = fcmp ogt <2 x bfloat> undef, undef @@ -81,13 +81,13 @@ define void @fcmp_oge(i32 %arg) { ; CHECK-LABEL: 'fcmp_oge' ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp oge <2 x float> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp oge <4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v8f32 = fcmp oge <8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp oge <8 x float> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp oge <2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4f64 = fcmp oge <4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp oge <4 x double> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp oge <2 x half> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp oge <4 x half> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp oge <8 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16f16 = fcmp oge <16 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp oge <16 x half> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v2f32 = fcmp oge <2 x float> undef, undef @@ -106,7 +106,7 @@ define void @fcmp_oge_bfloat(i32 %arg) { ; CHECK-LABEL: 'fcmp_oge_bfloat' ; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2bf16 = fcmp oge <2 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 4 for: %v4bf16 = fcmp oge <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8bf16 = fcmp oge <8 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp oge <8 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v2bf16 = fcmp oge <2 x bfloat> undef, undef @@ -119,13 +119,13 @@ define void @fcmp_olt(i32 %arg) { ; CHECK-LABEL: 'fcmp_olt' ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp olt <2 x float> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp olt <4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v8f32 = fcmp olt <8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp olt <8 x float> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp olt <2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4f64 = fcmp olt <4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp olt <4 x double> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp olt <2 x half> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp olt <4 x half> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp olt <8 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16f16 = fcmp olt <16 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp olt <16 x half> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v2f32 = fcmp olt <2 x float> undef, undef @@ -144,7 +144,7 @@ define void @fcmp_olt_bfloat(i32 %arg) { ; CHECK-LABEL: 'fcmp_olt_bfloat' ; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2bf16 = fcmp olt <2 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 4 for: %v4bf16 = fcmp olt <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8bf16 = fcmp olt <8 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp olt <8 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v2bf16 = fcmp olt <2 x bfloat> undef, undef @@ -157,13 +157,13 @@ define void @fcmp_ole(i32 %arg) { ; CHECK-LABEL: 'fcmp_ole' ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp ole <2 x float> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp ole <4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v8f32 = fcmp ole <8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp ole <8 x float> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp ole <2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4f64 = fcmp ole <4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp ole <4 x double> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp ole <2 x half> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp ole <4 x half> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp ole <8 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16f16 = fcmp ole <16 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp ole <16 x half> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v2f32 = fcmp ole <2 x float> undef, undef @@ -182,7 +182,7 @@ define void @fcmp_ole_bfloat(i32 %arg) { ; CHECK-LABEL: 'fcmp_ole_bfloat' ; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2bf16 = fcmp ole <2 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 4 for: %v4bf16 = fcmp ole <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8bf16 = fcmp ole <8 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp ole <8 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v2bf16 = fcmp ole <2 x bfloat> undef, undef @@ -195,13 +195,13 @@ define void @fcmp_one(i32 %arg) { ; CHECK-LABEL: 'fcmp_one' ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp one <2 x float> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp one <4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v8f32 = fcmp one <8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp one <8 x float> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp one <2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4f64 = fcmp one <4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp one <4 x double> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp one <2 x half> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp one <4 x half> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp one <8 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16f16 = fcmp one <16 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp one <16 x half> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v2f32 = fcmp one <2 x float> undef, undef @@ -220,7 +220,7 @@ define void @fcmp_one_bfloat(i32 %arg) { ; CHECK-LABEL: 'fcmp_one_bfloat' ; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2bf16 = fcmp one <2 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 4 for: %v4bf16 = fcmp one <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8bf16 = fcmp one <8 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp one <8 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v2bf16 = fcmp one <2 x bfloat> undef, undef @@ -233,13 +233,13 @@ define void @fcmp_ord(i32 %arg) { ; CHECK-LABEL: 'fcmp_ord' ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp ord <2 x float> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp ord <4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v8f32 = fcmp ord <8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp ord <8 x float> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp ord <2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4f64 = fcmp ord <4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp ord <4 x double> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp ord <2 x half> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp ord <4 x half> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp ord <8 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16f16 = fcmp ord <16 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp ord <16 x half> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v2f32 = fcmp ord <2 x float> undef, undef @@ -258,7 +258,7 @@ define void @fcmp_ord_bfloat(i32 %arg) { ; CHECK-LABEL: 'fcmp_ord_bfloat' ; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2bf16 = fcmp ord <2 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 4 for: %v4bf16 = fcmp ord <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8bf16 = fcmp ord <8 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp ord <8 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v2bf16 = fcmp ord <2 x bfloat> undef, undef @@ -271,13 +271,13 @@ define void @fcmp_ueq(i32 %arg) { ; CHECK-LABEL: 'fcmp_ueq' ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp ueq <2 x float> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp ueq <4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v8f32 = fcmp ueq <8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp ueq <8 x float> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp ueq <2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4f64 = fcmp ueq <4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp ueq <4 x double> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp ueq <2 x half> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp ueq <4 x half> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp ueq <8 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16f16 = fcmp ueq <16 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp ueq <16 x half> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v2f32 = fcmp ueq <2 x float> undef, undef @@ -296,7 +296,7 @@ define void @fcmp_ueq_bfloat(i32 %arg) { ; CHECK-LABEL: 'fcmp_ueq_bfloat' ; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2bf16 = fcmp ueq <2 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 4 for: %v4bf16 = fcmp ueq <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8bf16 = fcmp ueq <8 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp ueq <8 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v2bf16 = fcmp ueq <2 x bfloat> undef, undef @@ -309,13 +309,13 @@ define void @fcmp_ugt(i32 %arg) { ; CHECK-LABEL: 'fcmp_ugt' ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp ugt <2 x float> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp ugt <4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v8f32 = fcmp ugt <8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp ugt <8 x float> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp ugt <2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4f64 = fcmp ugt <4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp ugt <4 x double> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp ugt <2 x half> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp ugt <4 x half> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp ugt <8 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16f16 = fcmp ugt <16 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp ugt <16 x half> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v2f32 = fcmp ugt <2 x float> undef, undef @@ -334,7 +334,7 @@ define void @fcmp_ugt_bfloat(i32 %arg) { ; CHECK-LABEL: 'fcmp_ugt_bfloat' ; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2bf16 = fcmp ugt <2 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 4 for: %v4bf16 = fcmp ugt <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8bf16 = fcmp ugt <8 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp ugt <8 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v2bf16 = fcmp ugt <2 x bfloat> undef, undef @@ -347,13 +347,13 @@ define void @fcmp_uge(i32 %arg) { ; CHECK-LABEL: 'fcmp_uge' ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp uge <2 x float> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp uge <4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v8f32 = fcmp uge <8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp uge <8 x float> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp uge <2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4f64 = fcmp uge <4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp uge <4 x double> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp uge <2 x half> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp uge <4 x half> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp uge <8 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16f16 = fcmp uge <16 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp uge <16 x half> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v2f32 = fcmp uge <2 x float> undef, undef @@ -372,7 +372,7 @@ define void @fcmp_uge_bfloat(i32 %arg) { ; CHECK-LABEL: 'fcmp_uge_bfloat' ; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2bf16 = fcmp uge <2 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 4 for: %v4bf16 = fcmp uge <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8bf16 = fcmp uge <8 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp uge <8 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v2bf16 = fcmp uge <2 x bfloat> undef, undef @@ -385,13 +385,13 @@ define void @fcmp_ult(i32 %arg) { ; CHECK-LABEL: 'fcmp_ult' ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp ult <2 x float> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp ult <4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v8f32 = fcmp ult <8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp ult <8 x float> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp ult <2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4f64 = fcmp ult <4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp ult <4 x double> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp ult <2 x half> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp ult <4 x half> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp ult <8 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16f16 = fcmp ult <16 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp ult <16 x half> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v2f32 = fcmp ult <2 x float> undef, undef @@ -410,7 +410,7 @@ define void @fcmp_ult_bfloat(i32 %arg) { ; CHECK-LABEL: 'fcmp_ult_bfloat' ; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2bf16 = fcmp ult <2 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 4 for: %v4bf16 = fcmp ult <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8bf16 = fcmp ult <8 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp ult <8 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v2bf16 = fcmp ult <2 x bfloat> undef, undef @@ -423,13 +423,13 @@ define void @fcmp_ule(i32 %arg) { ; CHECK-LABEL: 'fcmp_ule' ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp ule <2 x float> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp ule <4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v8f32 = fcmp ule <8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp ule <8 x float> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp ule <2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4f64 = fcmp ule <4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp ule <4 x double> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp ule <2 x half> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp ule <4 x half> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp ule <8 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16f16 = fcmp ule <16 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp ule <16 x half> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v2f32 = fcmp ule <2 x float> undef, undef @@ -448,8 +448,8 @@ define void @fcmp_ule_bfloat(i32 %arg) { ; CHECK-LABEL: 'fcmp_ule_bfloat' ; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2bf16 = fcmp ule <2 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 4 for: %v4bf16 = fcmp ule <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8bf16 = fcmp ule <8 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16bf16 = fcmp ule <16 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp ule <8 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16bf16 = fcmp ule <16 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v2bf16 = fcmp ule <2 x bfloat> undef, undef @@ -463,13 +463,13 @@ define void @fcmp_une(i32 %arg) { ; CHECK-LABEL: 'fcmp_une' ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp une <2 x float> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp une <4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v8f32 = fcmp une <8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp une <8 x float> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp une <2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4f64 = fcmp une <4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp une <4 x double> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp une <2 x half> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp une <4 x half> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp une <8 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16f16 = fcmp une <16 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp une <16 x half> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v2f32 = fcmp une <2 x float> undef, undef @@ -488,8 +488,8 @@ define void @fcmp_une_bfloat(i32 %arg) { ; CHECK-LABEL: 'fcmp_une_bfloat' ; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2bf16 = fcmp une <2 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 4 for: %v4bf16 = fcmp une <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8bf16 = fcmp une <8 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16bf16 = fcmp une <16 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp une <8 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16bf16 = fcmp une <16 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v2bf16 = fcmp une <2 x bfloat> undef, undef @@ -503,13 +503,13 @@ define void @fcmp_uno(i32 %arg) { ; CHECK-LABEL: 'fcmp_uno' ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp uno <2 x float> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp uno <4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v8f32 = fcmp uno <8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp uno <8 x float> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp uno <2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4f64 = fcmp uno <4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp uno <4 x double> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp uno <2 x half> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp uno <4 x half> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp uno <8 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16f16 = fcmp uno <16 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp uno <16 x half> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v2f32 = fcmp uno <2 x float> undef, undef @@ -528,8 +528,8 @@ define void @fcmp_uno_bfloat(i32 %arg) { ; CHECK-LABEL: 'fcmp_uno_bfloat' ; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2bf16 = fcmp uno <2 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 4 for: %v4bf16 = fcmp uno <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8bf16 = fcmp uno <8 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16bf16 = fcmp uno <16 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp uno <8 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16bf16 = fcmp uno <16 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v2bf16 = fcmp uno <2 x bfloat> undef, undef @@ -543,13 +543,13 @@ define void @fcmp_true(i32 %arg) { ; CHECK-LABEL: 'fcmp_true' ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp true <2 x float> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp true <4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v8f32 = fcmp true <8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp true <8 x float> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp true <2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4f64 = fcmp true <4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp true <4 x double> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp true <2 x half> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp true <4 x half> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp true <8 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16f16 = fcmp true <16 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp true <16 x half> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v2f32 = fcmp true <2 x float> undef, undef @@ -568,8 +568,8 @@ define void @fcmp_true_bfloat(i32 %arg) { ; CHECK-LABEL: 'fcmp_true_bfloat' ; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2bf16 = fcmp true <2 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 4 for: %v4bf16 = fcmp true <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8bf16 = fcmp true <8 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16bf16 = fcmp true <16 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp true <8 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16bf16 = fcmp true <16 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v2bf16 = fcmp true <2 x bfloat> undef, undef @@ -583,13 +583,13 @@ define void @fcmp_false(i32 %arg) { ; CHECK-LABEL: 'fcmp_false' ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f32 = fcmp false <2 x float> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f32 = fcmp false <4 x float> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v8f32 = fcmp false <8 x float> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v8f32 = fcmp false <8 x float> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f64 = fcmp false <2 x double> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4f64 = fcmp false <4 x double> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4f64 = fcmp false <4 x double> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2f16 = fcmp false <2 x half> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v4f16 = fcmp false <4 x half> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %v8f16 = fcmp false <8 x half> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16f16 = fcmp false <16 x half> undef, undef +; CHECK-NEXT: Cost Model: Found costs of 2 for: %v16f16 = fcmp false <16 x half> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v2f32 = fcmp false <2 x float> undef, undef @@ -608,8 +608,8 @@ define void @fcmp_false_bfloat(i32 %arg) { ; CHECK-LABEL: 'fcmp_false_bfloat' ; CHECK-NEXT: Cost Model: Found costs of 3 for: %v2bf16 = fcmp false <2 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 4 for: %v4bf16 = fcmp false <4 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8bf16 = fcmp false <8 x bfloat> undef, undef -; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16bf16 = fcmp false <16 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp false <8 x bfloat> undef, undef +; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16bf16 = fcmp false <16 x bfloat> undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %v2bf16 = fcmp false <2 x bfloat> undef, undef diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll index ec84c58bf9681..16e10587dbb6d 100644 --- a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll @@ -930,8 +930,8 @@ define void @get_lane_mask() #0 { ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 16 for: %mask_v8i1_i32 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 undef, i32 undef) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 8 for: %mask_v4i1_i32 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 undef, i32 undef) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 4 for: %mask_v2i1_i32 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 undef, i32 undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:48 CodeSize:33 Lat:33 SizeLat:33 for: %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 undef, i64 undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:5 Lat:5 SizeLat:5 for: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 undef, i16 undef) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 48 for: %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 undef, i64 undef) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 6 for: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 undef, i16 undef) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-VSCALE-2-LABEL: 'get_lane_mask' @@ -953,8 +953,8 @@ define void @get_lane_mask() #0 { ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 16 for: %mask_v8i1_i32 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 undef, i32 undef) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 8 for: %mask_v4i1_i32 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 undef, i32 undef) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 4 for: %mask_v2i1_i32 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 undef, i32 undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:48 CodeSize:33 Lat:33 SizeLat:33 for: %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 undef, i64 undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:5 Lat:5 SizeLat:5 for: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 undef, i16 undef) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 48 for: %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 undef, i64 undef) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 6 for: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 undef, i16 undef) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; TYPE_BASED_ONLY-LABEL: 'get_lane_mask' @@ -976,8 +976,8 @@ define void @get_lane_mask() #0 { ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 16 for: %mask_v8i1_i32 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 undef, i32 undef) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 8 for: %mask_v4i1_i32 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 undef, i32 undef) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 4 for: %mask_v2i1_i32 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 undef, i32 undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of RThru:48 CodeSize:33 Lat:33 SizeLat:33 for: %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 undef, i64 undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of RThru:6 CodeSize:5 Lat:5 SizeLat:5 for: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 undef, i16 undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 48 for: %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 undef, i64 undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 6 for: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 undef, i16 undef) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %mask_nxv16i1_i64 = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 undef, i64 undef) diff --git a/llvm/test/Analysis/CostModel/AArch64/vector-select.ll b/llvm/test/Analysis/CostModel/AArch64/vector-select.ll index a317d6da4028d..29f7fc938e62e 100644 --- a/llvm/test/Analysis/CostModel/AArch64/vector-select.ll +++ b/llvm/test/Analysis/CostModel/AArch64/vector-select.ll @@ -83,7 +83,7 @@ define <2 x i64> @v2i64_select_sle(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) { define <3 x i64> @v3i64_select_sle(<3 x i64> %a, <3 x i64> %b, <3 x i64> %c) { ; COST-LABEL: 'v3i64_select_sle' -; COST-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = icmp sle <3 x i64> %a, %b +; COST-NEXT: Cost Model: Found costs of 2 for: %cmp.1 = icmp sle <3 x i64> %a, %b ; COST-NEXT: Cost Model: Found costs of 2 for: %s.1 = select <3 x i1> %cmp.1, <3 x i64> %a, <3 x i64> %c ; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <3 x i64> %s.1 ; @@ -94,7 +94,7 @@ define <3 x i64> @v3i64_select_sle(<3 x i64> %a, <3 x i64> %b, <3 x i64> %c) { define <2 x i64> @v2i64_select_no_cmp(<2 x i64> %a, <2 x i64> %b, <2 x i1> %cond) { ; COST-LABEL: 'v2i64_select_no_cmp' -; COST-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <2 x i1> %cond, <2 x i64> %a, <2 x i64> %b +; COST-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:6 SizeLat:6 for: %s.1 = select <2 x i1> %cond, <2 x i64> %a, <2 x i64> %b ; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %s.1 ; %s.1 = select <2 x i1> %cond, <2 x i64> %a, <2 x i64> %b @@ -119,7 +119,7 @@ define <4 x half> @v4f16_select_ogt(<4 x half> %a, <4 x half> %b, <4 x half> %c) define <8 x half> @v8f16_select_ogt(<8 x half> %a, <8 x half> %b, <8 x half> %c) { ; COST-NOFP16-LABEL: 'v8f16_select_ogt' -; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %cmp.1 = fcmp ogt <8 x half> %a, %b +; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %cmp.1 = fcmp ogt <8 x half> %a, %b ; COST-NOFP16-NEXT: Cost Model: Found costs of 2 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c ; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x half> %s.1 ; @@ -169,7 +169,7 @@ define <2 x double> @v2f64_select_ogt(<2 x double> %a, <2 x double> %b, <2 x dou define <4 x bfloat> @v4bf16_select_ogt(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bfloat> %c) { ; COST-LABEL: 'v4bf16_select_ogt' ; COST-NEXT: Cost Model: Found costs of 4 for: %cmp.1 = fcmp ogt <4 x bfloat> %a, %b -; COST-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <4 x i1> %cmp.1, <4 x bfloat> %a, <4 x bfloat> %c +; COST-NEXT: Cost Model: Found costs of RThru:10 CodeSize:7 Lat:10 SizeLat:10 for: %s.1 = select <4 x i1> %cmp.1, <4 x bfloat> %a, <4 x bfloat> %c ; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x bfloat> %s.1 ; %cmp.1 = fcmp ogt <4 x bfloat> %a, %b @@ -179,8 +179,8 @@ define <4 x bfloat> @v4bf16_select_ogt(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bf define <8 x bfloat> @v8bf16_select_ogt(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bfloat> %c) { ; COST-LABEL: 'v8bf16_select_ogt' -; COST-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %cmp.1 = fcmp ogt <8 x bfloat> %a, %b -; COST-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x bfloat> %a, <8 x bfloat> %c +; COST-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %cmp.1 = fcmp ogt <8 x bfloat> %a, %b +; COST-NEXT: Cost Model: Found costs of RThru:22 CodeSize:15 Lat:22 SizeLat:22 for: %s.1 = select <8 x i1> %cmp.1, <8 x bfloat> %a, <8 x bfloat> %c ; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x bfloat> %s.1 ; %cmp.1 = fcmp ogt <8 x bfloat> %a, %b @@ -206,7 +206,7 @@ define <4 x half> @v4f16_select_oge(<4 x half> %a, <4 x half> %b, <4 x half> %c) define <8 x half> @v8f16_select_oge(<8 x half> %a, <8 x half> %b, <8 x half> %c) { ; COST-NOFP16-LABEL: 'v8f16_select_oge' -; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %cmp.1 = fcmp oge <8 x half> %a, %b +; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %cmp.1 = fcmp oge <8 x half> %a, %b ; COST-NOFP16-NEXT: Cost Model: Found costs of 2 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c ; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x half> %s.1 ; @@ -256,7 +256,7 @@ define <2 x double> @v2f64_select_oge(<2 x double> %a, <2 x double> %b, <2 x dou define <4 x bfloat> @v4bf16_select_oge(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bfloat> %c) { ; COST-LABEL: 'v4bf16_select_oge' ; COST-NEXT: Cost Model: Found costs of 4 for: %cmp.1 = fcmp oge <4 x bfloat> %a, %b -; COST-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <4 x i1> %cmp.1, <4 x bfloat> %a, <4 x bfloat> %c +; COST-NEXT: Cost Model: Found costs of RThru:10 CodeSize:7 Lat:10 SizeLat:10 for: %s.1 = select <4 x i1> %cmp.1, <4 x bfloat> %a, <4 x bfloat> %c ; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x bfloat> %s.1 ; %cmp.1 = fcmp oge <4 x bfloat> %a, %b @@ -266,8 +266,8 @@ define <4 x bfloat> @v4bf16_select_oge(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bf define <8 x bfloat> @v8bf16_select_oge(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bfloat> %c) { ; COST-LABEL: 'v8bf16_select_oge' -; COST-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %cmp.1 = fcmp oge <8 x bfloat> %a, %b -; COST-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x bfloat> %a, <8 x bfloat> %c +; COST-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %cmp.1 = fcmp oge <8 x bfloat> %a, %b +; COST-NEXT: Cost Model: Found costs of RThru:22 CodeSize:15 Lat:22 SizeLat:22 for: %s.1 = select <8 x i1> %cmp.1, <8 x bfloat> %a, <8 x bfloat> %c ; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x bfloat> %s.1 ; %cmp.1 = fcmp oge <8 x bfloat> %a, %b @@ -293,7 +293,7 @@ define <4 x half> @v4f16_select_olt(<4 x half> %a, <4 x half> %b, <4 x half> %c) define <8 x half> @v8f16_select_olt(<8 x half> %a, <8 x half> %b, <8 x half> %c) { ; COST-NOFP16-LABEL: 'v8f16_select_olt' -; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %cmp.1 = fcmp olt <8 x half> %a, %b +; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %cmp.1 = fcmp olt <8 x half> %a, %b ; COST-NOFP16-NEXT: Cost Model: Found costs of 2 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c ; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x half> %s.1 ; @@ -343,7 +343,7 @@ define <2 x double> @v2f64_select_olt(<2 x double> %a, <2 x double> %b, <2 x dou define <4 x bfloat> @v4bf16_select_olt(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bfloat> %c) { ; COST-LABEL: 'v4bf16_select_olt' ; COST-NEXT: Cost Model: Found costs of 4 for: %cmp.1 = fcmp olt <4 x bfloat> %a, %b -; COST-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <4 x i1> %cmp.1, <4 x bfloat> %a, <4 x bfloat> %c +; COST-NEXT: Cost Model: Found costs of RThru:10 CodeSize:7 Lat:10 SizeLat:10 for: %s.1 = select <4 x i1> %cmp.1, <4 x bfloat> %a, <4 x bfloat> %c ; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x bfloat> %s.1 ; %cmp.1 = fcmp olt <4 x bfloat> %a, %b @@ -353,8 +353,8 @@ define <4 x bfloat> @v4bf16_select_olt(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bf define <8 x bfloat> @v8bf16_select_olt(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bfloat> %c) { ; COST-LABEL: 'v8bf16_select_olt' -; COST-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %cmp.1 = fcmp olt <8 x bfloat> %a, %b -; COST-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x bfloat> %a, <8 x bfloat> %c +; COST-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %cmp.1 = fcmp olt <8 x bfloat> %a, %b +; COST-NEXT: Cost Model: Found costs of RThru:22 CodeSize:15 Lat:22 SizeLat:22 for: %s.1 = select <8 x i1> %cmp.1, <8 x bfloat> %a, <8 x bfloat> %c ; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x bfloat> %s.1 ; %cmp.1 = fcmp olt <8 x bfloat> %a, %b @@ -380,7 +380,7 @@ define <4 x half> @v4f16_select_ole(<4 x half> %a, <4 x half> %b, <4 x half> %c) define <8 x half> @v8f16_select_ole(<8 x half> %a, <8 x half> %b, <8 x half> %c) { ; COST-NOFP16-LABEL: 'v8f16_select_ole' -; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %cmp.1 = fcmp ole <8 x half> %a, %b +; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %cmp.1 = fcmp ole <8 x half> %a, %b ; COST-NOFP16-NEXT: Cost Model: Found costs of 2 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c ; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x half> %s.1 ; @@ -430,7 +430,7 @@ define <2 x double> @v2f64_select_ole(<2 x double> %a, <2 x double> %b, <2 x dou define <4 x bfloat> @v4bf16_select_ole(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bfloat> %c) { ; COST-LABEL: 'v4bf16_select_ole' ; COST-NEXT: Cost Model: Found costs of 4 for: %cmp.1 = fcmp ole <4 x bfloat> %a, %b -; COST-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <4 x i1> %cmp.1, <4 x bfloat> %a, <4 x bfloat> %c +; COST-NEXT: Cost Model: Found costs of RThru:10 CodeSize:7 Lat:10 SizeLat:10 for: %s.1 = select <4 x i1> %cmp.1, <4 x bfloat> %a, <4 x bfloat> %c ; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x bfloat> %s.1 ; %cmp.1 = fcmp ole <4 x bfloat> %a, %b @@ -440,8 +440,8 @@ define <4 x bfloat> @v4bf16_select_ole(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bf define <8 x bfloat> @v8bf16_select_ole(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bfloat> %c) { ; COST-LABEL: 'v8bf16_select_ole' -; COST-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %cmp.1 = fcmp ole <8 x bfloat> %a, %b -; COST-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x bfloat> %a, <8 x bfloat> %c +; COST-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %cmp.1 = fcmp ole <8 x bfloat> %a, %b +; COST-NEXT: Cost Model: Found costs of RThru:22 CodeSize:15 Lat:22 SizeLat:22 for: %s.1 = select <8 x i1> %cmp.1, <8 x bfloat> %a, <8 x bfloat> %c ; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x bfloat> %s.1 ; %cmp.1 = fcmp ole <8 x bfloat> %a, %b @@ -467,7 +467,7 @@ define <4 x half> @v4f16_select_oeq(<4 x half> %a, <4 x half> %b, <4 x half> %c) define <8 x half> @v8f16_select_oeq(<8 x half> %a, <8 x half> %b, <8 x half> %c) { ; COST-NOFP16-LABEL: 'v8f16_select_oeq' -; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %cmp.1 = fcmp oeq <8 x half> %a, %b +; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %cmp.1 = fcmp oeq <8 x half> %a, %b ; COST-NOFP16-NEXT: Cost Model: Found costs of 2 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c ; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x half> %s.1 ; @@ -517,7 +517,7 @@ define <2 x double> @v2f64_select_oeq(<2 x double> %a, <2 x double> %b, <2 x dou define <4 x bfloat> @v4bf16_select_oeq(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bfloat> %c) { ; COST-LABEL: 'v4bf16_select_oeq' ; COST-NEXT: Cost Model: Found costs of 4 for: %cmp.1 = fcmp oeq <4 x bfloat> %a, %b -; COST-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <4 x i1> %cmp.1, <4 x bfloat> %a, <4 x bfloat> %c +; COST-NEXT: Cost Model: Found costs of RThru:10 CodeSize:7 Lat:10 SizeLat:10 for: %s.1 = select <4 x i1> %cmp.1, <4 x bfloat> %a, <4 x bfloat> %c ; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x bfloat> %s.1 ; %cmp.1 = fcmp oeq <4 x bfloat> %a, %b @@ -527,8 +527,8 @@ define <4 x bfloat> @v4bf16_select_oeq(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bf define <8 x bfloat> @v8bf16_select_oeq(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bfloat> %c) { ; COST-LABEL: 'v8bf16_select_oeq' -; COST-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %cmp.1 = fcmp oeq <8 x bfloat> %a, %b -; COST-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x bfloat> %a, <8 x bfloat> %c +; COST-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %cmp.1 = fcmp oeq <8 x bfloat> %a, %b +; COST-NEXT: Cost Model: Found costs of RThru:22 CodeSize:15 Lat:22 SizeLat:22 for: %s.1 = select <8 x i1> %cmp.1, <8 x bfloat> %a, <8 x bfloat> %c ; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x bfloat> %s.1 ; %cmp.1 = fcmp oeq <8 x bfloat> %a, %b @@ -554,7 +554,7 @@ define <4 x half> @v4f16_select_one(<4 x half> %a, <4 x half> %b, <4 x half> %c) define <8 x half> @v8f16_select_one(<8 x half> %a, <8 x half> %b, <8 x half> %c) { ; COST-NOFP16-LABEL: 'v8f16_select_one' -; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %cmp.1 = fcmp one <8 x half> %a, %b +; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %cmp.1 = fcmp one <8 x half> %a, %b ; COST-NOFP16-NEXT: Cost Model: Found costs of 2 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c ; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x half> %s.1 ; @@ -604,7 +604,7 @@ define <2 x double> @v2f64_select_one(<2 x double> %a, <2 x double> %b, <2 x dou define <4 x bfloat> @v4bf16_select_one(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bfloat> %c) { ; COST-LABEL: 'v4bf16_select_one' ; COST-NEXT: Cost Model: Found costs of 4 for: %cmp.1 = fcmp one <4 x bfloat> %a, %b -; COST-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <4 x i1> %cmp.1, <4 x bfloat> %a, <4 x bfloat> %c +; COST-NEXT: Cost Model: Found costs of RThru:10 CodeSize:7 Lat:10 SizeLat:10 for: %s.1 = select <4 x i1> %cmp.1, <4 x bfloat> %a, <4 x bfloat> %c ; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x bfloat> %s.1 ; %cmp.1 = fcmp one <4 x bfloat> %a, %b @@ -614,8 +614,8 @@ define <4 x bfloat> @v4bf16_select_one(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bf define <8 x bfloat> @v8bf16_select_one(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bfloat> %c) { ; COST-LABEL: 'v8bf16_select_one' -; COST-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %cmp.1 = fcmp one <8 x bfloat> %a, %b -; COST-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x bfloat> %a, <8 x bfloat> %c +; COST-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %cmp.1 = fcmp one <8 x bfloat> %a, %b +; COST-NEXT: Cost Model: Found costs of RThru:22 CodeSize:15 Lat:22 SizeLat:22 for: %s.1 = select <8 x i1> %cmp.1, <8 x bfloat> %a, <8 x bfloat> %c ; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x bfloat> %s.1 ; %cmp.1 = fcmp one <8 x bfloat> %a, %b @@ -641,7 +641,7 @@ define <4 x half> @v4f16_select_une(<4 x half> %a, <4 x half> %b, <4 x half> %c) define <8 x half> @v8f16_select_une(<8 x half> %a, <8 x half> %b, <8 x half> %c) { ; COST-NOFP16-LABEL: 'v8f16_select_une' -; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %cmp.1 = fcmp une <8 x half> %a, %b +; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %cmp.1 = fcmp une <8 x half> %a, %b ; COST-NOFP16-NEXT: Cost Model: Found costs of 2 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c ; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x half> %s.1 ; @@ -691,7 +691,7 @@ define <2 x double> @v2f64_select_une(<2 x double> %a, <2 x double> %b, <2 x dou define <4 x bfloat> @v4bf16_select_une(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bfloat> %c) { ; COST-LABEL: 'v4bf16_select_une' ; COST-NEXT: Cost Model: Found costs of 4 for: %cmp.1 = fcmp une <4 x bfloat> %a, %b -; COST-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <4 x i1> %cmp.1, <4 x bfloat> %a, <4 x bfloat> %c +; COST-NEXT: Cost Model: Found costs of RThru:10 CodeSize:7 Lat:10 SizeLat:10 for: %s.1 = select <4 x i1> %cmp.1, <4 x bfloat> %a, <4 x bfloat> %c ; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x bfloat> %s.1 ; %cmp.1 = fcmp une <4 x bfloat> %a, %b @@ -701,8 +701,8 @@ define <4 x bfloat> @v4bf16_select_une(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bf define <8 x bfloat> @v8bf16_select_une(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bfloat> %c) { ; COST-LABEL: 'v8bf16_select_une' -; COST-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %cmp.1 = fcmp une <8 x bfloat> %a, %b -; COST-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x bfloat> %a, <8 x bfloat> %c +; COST-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %cmp.1 = fcmp une <8 x bfloat> %a, %b +; COST-NEXT: Cost Model: Found costs of RThru:22 CodeSize:15 Lat:22 SizeLat:22 for: %s.1 = select <8 x i1> %cmp.1, <8 x bfloat> %a, <8 x bfloat> %c ; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x bfloat> %s.1 ; %cmp.1 = fcmp une <8 x bfloat> %a, %b @@ -728,7 +728,7 @@ define <4 x half> @v4f16_select_ord(<4 x half> %a, <4 x half> %b, <4 x half> %c) define <8 x half> @v8f16_select_ord(<8 x half> %a, <8 x half> %b, <8 x half> %c) { ; COST-NOFP16-LABEL: 'v8f16_select_ord' -; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %cmp.1 = fcmp ord <8 x half> %a, %b +; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %cmp.1 = fcmp ord <8 x half> %a, %b ; COST-NOFP16-NEXT: Cost Model: Found costs of 2 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c ; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x half> %s.1 ; @@ -778,7 +778,7 @@ define <2 x double> @v2f64_select_ord(<2 x double> %a, <2 x double> %b, <2 x dou define <4 x bfloat> @v4bf16_select_ord(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bfloat> %c) { ; COST-LABEL: 'v4bf16_select_ord' ; COST-NEXT: Cost Model: Found costs of 4 for: %cmp.1 = fcmp ord <4 x bfloat> %a, %b -; COST-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <4 x i1> %cmp.1, <4 x bfloat> %a, <4 x bfloat> %c +; COST-NEXT: Cost Model: Found costs of RThru:10 CodeSize:7 Lat:10 SizeLat:10 for: %s.1 = select <4 x i1> %cmp.1, <4 x bfloat> %a, <4 x bfloat> %c ; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x bfloat> %s.1 ; %cmp.1 = fcmp ord <4 x bfloat> %a, %b @@ -788,8 +788,8 @@ define <4 x bfloat> @v4bf16_select_ord(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bf define <8 x bfloat> @v8bf16_select_ord(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bfloat> %c) { ; COST-LABEL: 'v8bf16_select_ord' -; COST-NEXT: Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %cmp.1 = fcmp ord <8 x bfloat> %a, %b -; COST-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x bfloat> %a, <8 x bfloat> %c +; COST-NEXT: Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %cmp.1 = fcmp ord <8 x bfloat> %a, %b +; COST-NEXT: Cost Model: Found costs of RThru:22 CodeSize:15 Lat:22 SizeLat:22 for: %s.1 = select <8 x i1> %cmp.1, <8 x bfloat> %a, <8 x bfloat> %c ; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x bfloat> %s.1 ; %cmp.1 = fcmp ord <8 x bfloat> %a, %b diff --git a/llvm/test/Analysis/CostModel/AMDGPU/reduce-and.ll b/llvm/test/Analysis/CostModel/AMDGPU/reduce-and.ll index 781eba144205d..00ce9680cab32 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/reduce-and.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/reduce-and.ll @@ -24,8 +24,8 @@ define i32 @reduce_i1(i32 %arg) { ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16 = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> undef) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32 = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> undef) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V64 = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 129 for instruction: %V128 = call i1 @llvm.vector.reduce.and.v128i1(<128 x i1> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 257 for instruction: %V256 = call i1 @llvm.vector.reduce.and.v256i1(<256 x i1> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V128 = call i1 @llvm.vector.reduce.and.v128i1(<128 x i1> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %V256 = call i1 @llvm.vector.reduce.and.v256i1(<256 x i1> undef) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %V1 = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> undef) diff --git a/llvm/test/Analysis/CostModel/AMDGPU/reduce-or.ll b/llvm/test/Analysis/CostModel/AMDGPU/reduce-or.ll index 9e03620e2aa6f..eccaffc9d2a40 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/reduce-or.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/reduce-or.ll @@ -24,8 +24,8 @@ define i32 @reduce_i1(i32 %arg) { ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16 = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> undef) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32 = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> undef) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V64 = call i1 @llvm.vector.reduce.or.v64i1(<64 x i1> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 129 for instruction: %V128 = call i1 @llvm.vector.reduce.or.v128i1(<128 x i1> undef) -; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 257 for instruction: %V256 = call i1 @llvm.vector.reduce.or.v256i1(<256 x i1> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V128 = call i1 @llvm.vector.reduce.or.v128i1(<128 x i1> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %V256 = call i1 @llvm.vector.reduce.or.v256i1(<256 x i1> undef) ; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %V1 = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> undef) diff --git a/llvm/test/Analysis/CostModel/ARM/arith-overflow.ll b/llvm/test/Analysis/CostModel/ARM/arith-overflow.ll index 484cd201d14a0..9c2d6bc20d302 100644 --- a/llvm/test/Analysis/CostModel/ARM/arith-overflow.ll +++ b/llvm/test/Analysis/CostModel/ARM/arith-overflow.ll @@ -108,27 +108,27 @@ define i32 @sadd(i32 %arg) { ; NEON-SIZE-LABEL: 'sadd' ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 undef, i64 undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.sadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.sadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.sadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.sadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.sadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 undef, i32 undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.sadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.sadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.sadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.sadd.with.overflow.i16(i16 undef, i16 undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.sadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 undef, i8 undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.sadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; MVE-SIZE-LABEL: 'sadd' ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 undef, i64 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.sadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 75 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.sadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 147 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.sadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.sadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 145 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.sadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 289 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.sadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 undef, i32 undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.sadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) @@ -266,27 +266,27 @@ define i32 @uadd(i32 %arg) { ; NEON-SIZE-LABEL: 'uadd' ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 undef, i64 undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.uadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.uadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.uadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.uadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.uadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.uadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.uadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.uadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 undef, i16 undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.uadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 undef, i8 undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.uadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; MVE-SIZE-LABEL: 'uadd' ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 undef, i64 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.uadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.uadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 145 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.uadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.uadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.uadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.uadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.uadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) @@ -424,27 +424,27 @@ define i32 @ssub(i32 %arg) { ; NEON-SIZE-LABEL: 'ssub' ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 undef, i64 undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.ssub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.ssub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.ssub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.ssub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.ssub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 undef, i32 undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.ssub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.ssub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.ssub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 undef, i16 undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.ssub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 undef, i8 undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.ssub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; MVE-SIZE-LABEL: 'ssub' ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 undef, i64 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.ssub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 75 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.ssub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 147 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.ssub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.ssub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 145 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.ssub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 289 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.ssub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 undef, i32 undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.ssub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) @@ -582,27 +582,27 @@ define i32 @usub(i32 %arg) { ; NEON-SIZE-LABEL: 'usub' ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 undef, i64 undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.usub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.usub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.usub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.usub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.usub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 undef, i32 undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.usub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.usub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.usub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 undef, i16 undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.usub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 undef, i8 undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.usub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; MVE-SIZE-LABEL: 'usub' ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 undef, i64 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.usub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.usub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 145 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.usub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.usub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.usub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.usub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 undef, i32 undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.usub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) @@ -720,47 +720,47 @@ define i32 @smul(i32 %arg) { ; ; V8M-SIZE-LABEL: 'smul' ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 162 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 195 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 162 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 322 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; NEON-SIZE-LABEL: 'smul' ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; MVE-SIZE-LABEL: 'smul' ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 213 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 284 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 150 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 328 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) @@ -878,47 +878,47 @@ define i32 @umul(i32 %arg) { ; ; V8M-SIZE-LABEL: 'umul' ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 258 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; NEON-SIZE-LABEL: 'umul' ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; MVE-SIZE-LABEL: 'umul' ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 149 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 326 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) diff --git a/llvm/test/Analysis/CostModel/ARM/arith-ssat.ll b/llvm/test/Analysis/CostModel/ARM/arith-ssat.ll index da7096af4d80c..34397bd8bfb4b 100644 --- a/llvm/test/Analysis/CostModel/ARM/arith-ssat.ll +++ b/llvm/test/Analysis/CostModel/ARM/arith-ssat.ll @@ -112,31 +112,31 @@ define i32 @add(i32 %arg) { ; ; V8M-SIZE-LABEL: 'add' ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I8 = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I8 = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V4I8 = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V8I8 = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; NEON-SIZE-LABEL: 'add' -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) @@ -162,22 +162,22 @@ define i32 @add(i32 %arg) { ; ; MVE-SIZE-LABEL: 'add' ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 150 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 185 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 369 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I8 = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) @@ -318,31 +318,31 @@ define i32 @sub(i32 %arg) { ; ; V8M-SIZE-LABEL: 'sub' ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> undef, <2 x i32> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> undef, <2 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I16 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> undef, <4 x i16> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V4I16 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> undef, <4 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I8 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> undef, <2 x i8> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I8 = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> undef, <4 x i8> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I8 = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> undef, <8 x i8> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2I8 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> undef, <2 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V4I8 = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> undef, <4 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V8I8 = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> undef, <8 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; NEON-SIZE-LABEL: 'sub' -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) @@ -368,22 +368,22 @@ define i32 @sub(i32 %arg) { ; ; MVE-SIZE-LABEL: 'sub' ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 150 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 185 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 369 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> undef, <2 x i32> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> undef, <2 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> undef, <4 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V2I8 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> undef, <2 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V2I8 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> undef, <2 x i8> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> undef, <4 x i8> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I8 = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> undef, <8 x i8> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) diff --git a/llvm/test/Analysis/CostModel/ARM/arith-usat.ll b/llvm/test/Analysis/CostModel/ARM/arith-usat.ll index 8a12ca2df234e..28e4860c8810c 100644 --- a/llvm/test/Analysis/CostModel/ARM/arith-usat.ll +++ b/llvm/test/Analysis/CostModel/ARM/arith-usat.ll @@ -112,31 +112,31 @@ define i32 @add(i32 %arg) { ; ; V8M-SIZE-LABEL: 'add' ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I32 = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I16 = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I16 = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I16 = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I16 = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I8 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I8 = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I8 = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 129 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I8 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I8 = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I8 = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; NEON-SIZE-LABEL: 'add' -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) @@ -162,22 +162,22 @@ define i32 @add(i32 %arg) { ; ; MVE-SIZE-LABEL: 'add' ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 146 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 220 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2I32 = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V2I32 = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2I16 = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V2I16 = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2I8 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V2I8 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I8 = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) @@ -318,31 +318,31 @@ define i32 @sub(i32 %arg) { ; ; V8M-SIZE-LABEL: 'sub' ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> undef, <2 x i32> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I32 = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> undef, <2 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I16 = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> undef, <2 x i16> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I16 = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> undef, <4 x i16> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I16 = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> undef, <2 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I16 = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> undef, <4 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I8 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> undef, <2 x i8> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I8 = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> undef, <4 x i8> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I8 = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> undef, <8 x i8> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) -; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 129 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I8 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> undef, <2 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I8 = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> undef, <4 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I8 = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> undef, <8 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef) +; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef) ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; NEON-SIZE-LABEL: 'sub' -; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) +; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) @@ -368,22 +368,22 @@ define i32 @sub(i32 %arg) { ; ; MVE-SIZE-LABEL: 'sub' ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 146 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 220 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2I32 = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> undef, <2 x i32> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V2I32 = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> undef, <2 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2I16 = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> undef, <2 x i16> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V2I16 = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> undef, <2 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> undef, <4 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2I8 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> undef, <2 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V2I8 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> undef, <2 x i8> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> undef, <4 x i8> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I8 = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> undef, <8 x i8> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) diff --git a/llvm/test/Analysis/CostModel/ARM/cmps.ll b/llvm/test/Analysis/CostModel/ARM/cmps.ll index e86a1b95a7b36..090fccee81bbe 100644 --- a/llvm/test/Analysis/CostModel/ARM/cmps.ll +++ b/llvm/test/Analysis/CostModel/ARM/cmps.ll @@ -96,7 +96,7 @@ define i32 @cmps() { ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a9 = fcmp ogt double undef, undef ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a10 = fcmp olt <8 x half> undef, undef ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a11 = fcmp oge <4 x float> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a12 = fcmp oge <2 x double> undef, undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a12 = fcmp oge <2 x double> undef, undef ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p = icmp eq ptr undef, undef ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q = icmp eq <4 x ptr> undef, undef ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef @@ -106,17 +106,17 @@ define i32 @cmps() { ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b = icmp ult i16 undef, undef ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = icmp sge i32 undef, undef ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = icmp ne i64 undef, undef -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = icmp slt <16 x i8> undef, undef -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f = icmp ult <8 x i16> undef, undef -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = icmp sge <4 x i32> undef, undef +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %e = icmp slt <16 x i8> undef, undef +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %f = icmp ult <8 x i16> undef, undef +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %g = icmp sge <4 x i32> undef, undef ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a7 = fcmp oge half undef, undef ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a8 = fcmp ogt float undef, undef ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a9 = fcmp ogt double undef, undef -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a10 = fcmp olt <8 x half> undef, undef -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a11 = fcmp oge <4 x float> undef, undef -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a12 = fcmp oge <2 x double> undef, undef +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %a10 = fcmp olt <8 x half> undef, undef +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a11 = fcmp oge <4 x float> undef, undef +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %a12 = fcmp oge <2 x double> undef, undef ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p = icmp eq ptr undef, undef -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q = icmp eq <4 x ptr> undef, undef +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %q = icmp eq <4 x ptr> undef, undef ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8M-BASE-SIZE-LABEL: 'cmps' @@ -124,17 +124,17 @@ define i32 @cmps() { ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %b = icmp ult i16 undef, undef ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c = icmp sge i32 undef, undef ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %d = icmp ne i64 undef, undef -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = icmp slt <16 x i8> undef, undef -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f = icmp ult <8 x i16> undef, undef -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = icmp sge <4 x i32> undef, undef +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %e = icmp slt <16 x i8> undef, undef +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %f = icmp ult <8 x i16> undef, undef +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %g = icmp sge <4 x i32> undef, undef ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a7 = fcmp oge half undef, undef ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a8 = fcmp ogt float undef, undef ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a9 = fcmp ogt double undef, undef -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a10 = fcmp olt <8 x half> undef, undef -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a11 = fcmp oge <4 x float> undef, undef -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a12 = fcmp oge <2 x double> undef, undef +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %a10 = fcmp olt <8 x half> undef, undef +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a11 = fcmp oge <4 x float> undef, undef +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %a12 = fcmp oge <2 x double> undef, undef ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p = icmp eq ptr undef, undef -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q = icmp eq <4 x ptr> undef, undef +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %q = icmp eq <4 x ptr> undef, undef ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8R-SIZE-LABEL: 'cmps' @@ -148,9 +148,9 @@ define i32 @cmps() { ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a7 = fcmp oge half undef, undef ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a8 = fcmp ogt float undef, undef ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a9 = fcmp ogt double undef, undef -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a10 = fcmp olt <8 x half> undef, undef +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %a10 = fcmp olt <8 x half> undef, undef ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a11 = fcmp oge <4 x float> undef, undef -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a12 = fcmp oge <2 x double> undef, undef +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a12 = fcmp oge <2 x double> undef, undef ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p = icmp eq ptr undef, undef ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %q = icmp eq <4 x ptr> undef, undef ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef @@ -257,11 +257,11 @@ define void @minmax() { ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c3 = icmp slt i32 undef, undef ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s3 = select i1 %c3, i32 undef, i32 undef ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %c4 = icmp slt <4 x i32> undef, undef -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s4 = select <4 x i1> %c4, <4 x i32> undef, <4 x i32> undef +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %s4 = select <4 x i1> %c4, <4 x i32> undef, <4 x i32> undef ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c5 = icmp slt ptr undef, undef ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s5 = select i1 %c5, ptr undef, ptr undef -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c6 = icmp slt <4 x ptr> undef, undef -; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s6 = select <4 x i1> %c6, <4 x ptr> undef, <4 x ptr> undef +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %c6 = icmp slt <4 x ptr> undef, undef +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %s6 = select <4 x i1> %c6, <4 x ptr> undef, <4 x ptr> undef ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8M-BASE-SIZE-LABEL: 'minmax' @@ -272,11 +272,11 @@ define void @minmax() { ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c3 = icmp slt i32 undef, undef ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s3 = select i1 %c3, i32 undef, i32 undef ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %c4 = icmp slt <4 x i32> undef, undef -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s4 = select <4 x i1> %c4, <4 x i32> undef, <4 x i32> undef +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %s4 = select <4 x i1> %c4, <4 x i32> undef, <4 x i32> undef ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c5 = icmp slt ptr undef, undef ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s5 = select i1 %c5, ptr undef, ptr undef -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c6 = icmp slt <4 x ptr> undef, undef -; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s6 = select <4 x i1> %c6, <4 x ptr> undef, <4 x ptr> undef +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %c6 = icmp slt <4 x ptr> undef, undef +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %s6 = select <4 x i1> %c6, <4 x ptr> undef, <4 x ptr> undef ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-V8R-SIZE-LABEL: 'minmax' diff --git a/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll b/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll index 66240c8255ad7..7a40252dfa619 100644 --- a/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll +++ b/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll @@ -236,17 +236,17 @@ define void @fshl(i32 %a, i32 %b, i32 %c, <16 x i32> %va, <16 x i32> %vb, <16 x ; ; LATE-LABEL: 'fshl' ; LATE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c) -; LATE-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc) +; LATE-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc) ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'fshl' ; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c) -; SIZE-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc) +; SIZE-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE_LATE-LABEL: 'fshl' ; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c) -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc) +; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc) ; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c) diff --git a/llvm/test/Analysis/CostModel/ARM/mve-abs.ll b/llvm/test/Analysis/CostModel/ARM/mve-abs.ll index 12a1ae7a43610..cc8f2da57f072 100644 --- a/llvm/test/Analysis/CostModel/ARM/mve-abs.ll +++ b/llvm/test/Analysis/CostModel/ARM/mve-abs.ll @@ -58,22 +58,22 @@ define i32 @abs(i32 %arg) { ; ; MVE-SIZE-LABEL: 'abs' ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I64 = call i64 @llvm.abs.i64(i64 undef, i1 false) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V2I64 = call <2 x i64> @llvm.abs.v2i64(<2 x i64> undef, i1 false) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V4I64 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> undef, i1 false) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 146 for instruction: %V8I64 = call <8 x i64> @llvm.abs.v8i64(<8 x i64> undef, i1 false) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V2I64 = call <2 x i64> @llvm.abs.v2i64(<2 x i64> undef, i1 false) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V4I64 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> undef, i1 false) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 220 for instruction: %V8I64 = call <8 x i64> @llvm.abs.v8i64(<8 x i64> undef, i1 false) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.abs.i32(i32 undef, i1 false) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2I32 = call <2 x i32> @llvm.abs.v2i32(<2 x i32> undef, i1 false) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V2I32 = call <2 x i32> @llvm.abs.v2i32(<2 x i32> undef, i1 false) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> undef, i1 false) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.abs.v8i32(<8 x i32> undef, i1 false) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> undef, i1 false) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.abs.i16(i16 undef, i1 false) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2I16 = call <2 x i16> @llvm.abs.v2i16(<2 x i16> undef, i1 false) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V2I16 = call <2 x i16> @llvm.abs.v2i16(<2 x i16> undef, i1 false) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 false) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.abs.i8(i8 undef, i1 false) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2I8 = call <2 x i8> @llvm.abs.v2i8(<2 x i8> undef, i1 false) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V2I8 = call <2 x i8> @llvm.abs.v2i8(<2 x i8> undef, i1 false) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = call <4 x i8> @llvm.abs.v4i8(<4 x i8> undef, i1 false) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = call <8 x i8> @llvm.abs.v8i8(<8 x i8> undef, i1 false) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.abs.v16i8(<16 x i8> undef, i1 false) diff --git a/llvm/test/Analysis/CostModel/ARM/mve-minmax.ll b/llvm/test/Analysis/CostModel/ARM/mve-minmax.ll index 7f1005285747c..01341e4dcb648 100644 --- a/llvm/test/Analysis/CostModel/ARM/mve-minmax.ll +++ b/llvm/test/Analysis/CostModel/ARM/mve-minmax.ll @@ -60,22 +60,22 @@ define i32 @smin(i32 %arg) { ; ; MVE-SIZE-LABEL: 'smin' ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.smin.i64(i64 undef, i64 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.smin.v2i64(<2 x i64> undef, <2 x i64> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.smin.v4i64(<4 x i64> undef, <4 x i64> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = call <8 x i64> @llvm.smin.v8i64(<8 x i64> undef, <8 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.smin.v2i64(<2 x i64> undef, <2 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V4I64 = call <4 x i64> @llvm.smin.v4i64(<4 x i64> undef, <4 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V8I64 = call <8 x i64> @llvm.smin.v8i64(<8 x i64> undef, <8 x i64> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.smin.i32(i32 undef, i32 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.smin.v2i32(<2 x i32> undef, <2 x i32> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I32 = call <2 x i32> @llvm.smin.v2i32(<2 x i32> undef, <2 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.smin.v8i32(<8 x i32> undef, <8 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.smin.v16i32(<16 x i32> undef, <16 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.smin.i16(i16 undef, i16 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = call <2 x i16> @llvm.smin.v2i16(<2 x i16> undef, <2 x i16> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I16 = call <2 x i16> @llvm.smin.v2i16(<2 x i16> undef, <2 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = call <4 x i16> @llvm.smin.v4i16(<4 x i16> undef, <4 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> undef, <8 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.smin.v16i16(<16 x i16> undef, <16 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.smin.v32i16(<32 x i16> undef, <32 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.smin.i8(i8 undef, i8 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = call <2 x i8> @llvm.smin.v2i8(<2 x i8> undef, <2 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I8 = call <2 x i8> @llvm.smin.v2i8(<2 x i8> undef, <2 x i8> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = call <4 x i8> @llvm.smin.v4i8(<4 x i8> undef, <4 x i8> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = call <8 x i8> @llvm.smin.v8i8(<8 x i8> undef, <8 x i8> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.smin.v16i8(<16 x i8> undef, <16 x i8> undef) @@ -167,22 +167,22 @@ define i32 @smax(i32 %arg) { ; ; MVE-SIZE-LABEL: 'smax' ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.smax.i64(i64 undef, i64 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> undef, <2 x i64> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> undef, <4 x i64> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = call <8 x i64> @llvm.smax.v8i64(<8 x i64> undef, <8 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> undef, <2 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V4I64 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> undef, <4 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V8I64 = call <8 x i64> @llvm.smax.v8i64(<8 x i64> undef, <8 x i64> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.smax.i32(i32 undef, i32 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.smax.v2i32(<2 x i32> undef, <2 x i32> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I32 = call <2 x i32> @llvm.smax.v2i32(<2 x i32> undef, <2 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> undef, <4 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> undef, <8 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.smax.v16i32(<16 x i32> undef, <16 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.smax.i16(i16 undef, i16 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = call <2 x i16> @llvm.smax.v2i16(<2 x i16> undef, <2 x i16> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I16 = call <2 x i16> @llvm.smax.v2i16(<2 x i16> undef, <2 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = call <4 x i16> @llvm.smax.v4i16(<4 x i16> undef, <4 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.smax.v8i16(<8 x i16> undef, <8 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.smax.v16i16(<16 x i16> undef, <16 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.smax.v32i16(<32 x i16> undef, <32 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.smax.i8(i8 undef, i8 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = call <2 x i8> @llvm.smax.v2i8(<2 x i8> undef, <2 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I8 = call <2 x i8> @llvm.smax.v2i8(<2 x i8> undef, <2 x i8> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = call <4 x i8> @llvm.smax.v4i8(<4 x i8> undef, <4 x i8> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = call <8 x i8> @llvm.smax.v8i8(<8 x i8> undef, <8 x i8> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.smax.v16i8(<16 x i8> undef, <16 x i8> undef) @@ -275,22 +275,22 @@ define i32 @umin(i32 %arg) { ; ; MVE-SIZE-LABEL: 'umin' ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.umin.i64(i64 undef, i64 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.umin.v2i64(<2 x i64> undef, <2 x i64> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.umin.v4i64(<4 x i64> undef, <4 x i64> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = call <8 x i64> @llvm.umin.v8i64(<8 x i64> undef, <8 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.umin.v2i64(<2 x i64> undef, <2 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V4I64 = call <4 x i64> @llvm.umin.v4i64(<4 x i64> undef, <4 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V8I64 = call <8 x i64> @llvm.umin.v8i64(<8 x i64> undef, <8 x i64> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.umin.i32(i32 undef, i32 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.umin.v2i32(<2 x i32> undef, <2 x i32> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I32 = call <2 x i32> @llvm.umin.v2i32(<2 x i32> undef, <2 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.umin.v4i32(<4 x i32> undef, <4 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.umin.v8i32(<8 x i32> undef, <8 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.umin.v16i32(<16 x i32> undef, <16 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.umin.i16(i16 undef, i16 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = call <2 x i16> @llvm.umin.v2i16(<2 x i16> undef, <2 x i16> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I16 = call <2 x i16> @llvm.umin.v2i16(<2 x i16> undef, <2 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = call <4 x i16> @llvm.umin.v4i16(<4 x i16> undef, <4 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.umin.v8i16(<8 x i16> undef, <8 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.umin.v16i16(<16 x i16> undef, <16 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.umin.v32i16(<32 x i16> undef, <32 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.umin.i8(i8 undef, i8 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = call <2 x i8> @llvm.umin.v2i8(<2 x i8> undef, <2 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I8 = call <2 x i8> @llvm.umin.v2i8(<2 x i8> undef, <2 x i8> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = call <4 x i8> @llvm.umin.v4i8(<4 x i8> undef, <4 x i8> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = call <8 x i8> @llvm.umin.v8i8(<8 x i8> undef, <8 x i8> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.umin.v16i8(<16 x i8> undef, <16 x i8> undef) @@ -382,22 +382,22 @@ define i32 @sub(i32 %arg) { ; ; MVE-SIZE-LABEL: 'sub' ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.umax.i64(i64 undef, i64 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.umax.v2i64(<2 x i64> undef, <2 x i64> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.umax.v4i64(<4 x i64> undef, <4 x i64> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = call <8 x i64> @llvm.umax.v8i64(<8 x i64> undef, <8 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.umax.v2i64(<2 x i64> undef, <2 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V4I64 = call <4 x i64> @llvm.umax.v4i64(<4 x i64> undef, <4 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V8I64 = call <8 x i64> @llvm.umax.v8i64(<8 x i64> undef, <8 x i64> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.umax.i32(i32 undef, i32 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.umax.v2i32(<2 x i32> undef, <2 x i32> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I32 = call <2 x i32> @llvm.umax.v2i32(<2 x i32> undef, <2 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> undef, <4 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.umax.v8i32(<8 x i32> undef, <8 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.umax.v16i32(<16 x i32> undef, <16 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.umax.i16(i16 undef, i16 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = call <2 x i16> @llvm.umax.v2i16(<2 x i16> undef, <2 x i16> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I16 = call <2 x i16> @llvm.umax.v2i16(<2 x i16> undef, <2 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = call <4 x i16> @llvm.umax.v4i16(<4 x i16> undef, <4 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.umax.v8i16(<8 x i16> undef, <8 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.umax.v16i16(<16 x i16> undef, <16 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.umax.v32i16(<32 x i16> undef, <32 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.umax.i8(i8 undef, i8 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = call <2 x i8> @llvm.umax.v2i8(<2 x i8> undef, <2 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I8 = call <2 x i8> @llvm.umax.v2i8(<2 x i8> undef, <2 x i8> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = call <4 x i8> @llvm.umax.v4i8(<4 x i8> undef, <4 x i8> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = call <8 x i8> @llvm.umax.v8i8(<8 x i8> undef, <8 x i8> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.umax.v16i8(<16 x i8> undef, <16 x i8> undef) diff --git a/llvm/test/Analysis/CostModel/ARM/select.ll b/llvm/test/Analysis/CostModel/ARM/select.ll index 4e2ccffa7cc5b..f626901990f01 100644 --- a/llvm/test/Analysis/CostModel/ARM/select.ll +++ b/llvm/test/Analysis/CostModel/ARM/select.ll @@ -155,21 +155,21 @@ define void @selects() { ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v11 = select <2 x i1> undef, <2 x i16> undef, <2 x i16> undef ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v12 = select <4 x i1> undef, <4 x i16> undef, <4 x i16> undef ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v13 = select <8 x i1> undef, <8 x i16> undef, <8 x i16> undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v14 = select <2 x i1> undef, <2 x i32> undef, <2 x i32> undef ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v15 = select <4 x i1> undef, <4 x i32> undef, <4 x i32> undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16 = select <2 x i1> undef, <2 x i64> undef, <2 x i64> undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v17 = select <2 x i1> undef, <2 x float> undef, <2 x float> undef ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v18 = select <4 x i1> undef, <4 x float> undef, <4 x float> undef ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v19 = select <2 x i1> undef, <2 x double> undef, <2 x double> undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v21 = select <3 x i1> undef, <3 x float> undef, <3 x float> undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-NEON-SIZE-LABEL: 'selects' @@ -212,28 +212,28 @@ define void @selects() { ; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4 = select i1 undef, i64 undef, i64 undef ; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v5 = select i1 undef, float undef, float undef ; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v6 = select i1 undef, double undef, double undef -; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v7 = select <2 x i1> undef, <2 x i8> undef, <2 x i8> undef -; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8 = select <4 x i1> undef, <4 x i8> undef, <4 x i8> undef -; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v9 = select <8 x i1> undef, <8 x i8> undef, <8 x i8> undef -; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v10 = select <16 x i1> undef, <16 x i8> undef, <16 x i8> undef -; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v11 = select <2 x i1> undef, <2 x i16> undef, <2 x i16> undef -; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v12 = select <4 x i1> undef, <4 x i16> undef, <4 x i16> undef -; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v13 = select <8 x i1> undef, <8 x i16> undef, <8 x i16> undef -; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef -; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v14 = select <2 x i1> undef, <2 x i32> undef, <2 x i32> undef -; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v15 = select <4 x i1> undef, <4 x i32> undef, <4 x i32> undef -; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef -; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef -; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16 = select <2 x i1> undef, <2 x i64> undef, <2 x i64> undef -; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef -; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef -; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef -; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v17 = select <2 x i1> undef, <2 x float> undef, <2 x float> undef -; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v18 = select <4 x i1> undef, <4 x float> undef, <4 x float> undef -; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v19 = select <2 x i1> undef, <2 x double> undef, <2 x double> undef -; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef -; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v21 = select <3 x i1> undef, <3 x float> undef, <3 x float> undef -; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef +; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v7 = select <2 x i1> undef, <2 x i8> undef, <2 x i8> undef +; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v8 = select <4 x i1> undef, <4 x i8> undef, <4 x i8> undef +; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v9 = select <8 x i1> undef, <8 x i8> undef, <8 x i8> undef +; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v10 = select <16 x i1> undef, <16 x i8> undef, <16 x i8> undef +; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v11 = select <2 x i1> undef, <2 x i16> undef, <2 x i16> undef +; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v12 = select <4 x i1> undef, <4 x i16> undef, <4 x i16> undef +; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v13 = select <8 x i1> undef, <8 x i16> undef, <8 x i16> undef +; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef +; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v14 = select <2 x i1> undef, <2 x i32> undef, <2 x i32> undef +; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v15 = select <4 x i1> undef, <4 x i32> undef, <4 x i32> undef +; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef +; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef +; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v16 = select <2 x i1> undef, <2 x i64> undef, <2 x i64> undef +; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef +; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef +; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef +; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v17 = select <2 x i1> undef, <2 x float> undef, <2 x float> undef +; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v18 = select <4 x i1> undef, <4 x float> undef, <4 x float> undef +; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v19 = select <2 x i1> undef, <2 x double> undef, <2 x double> undef +; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef +; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v21 = select <3 x i1> undef, <3 x float> undef, <3 x float> undef +; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef ; CHECK-THUMB1-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; CHECK-THUMB2-SIZE-LABEL: 'selects' @@ -244,28 +244,28 @@ define void @selects() { ; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4 = select i1 undef, i64 undef, i64 undef ; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v5 = select i1 undef, float undef, float undef ; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v6 = select i1 undef, double undef, double undef -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v7 = select <2 x i1> undef, <2 x i8> undef, <2 x i8> undef -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8 = select <4 x i1> undef, <4 x i8> undef, <4 x i8> undef -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v9 = select <8 x i1> undef, <8 x i8> undef, <8 x i8> undef -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v10 = select <16 x i1> undef, <16 x i8> undef, <16 x i8> undef -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v11 = select <2 x i1> undef, <2 x i16> undef, <2 x i16> undef -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v12 = select <4 x i1> undef, <4 x i16> undef, <4 x i16> undef -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v13 = select <8 x i1> undef, <8 x i16> undef, <8 x i16> undef -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v14 = select <2 x i1> undef, <2 x i32> undef, <2 x i32> undef -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v15 = select <4 x i1> undef, <4 x i32> undef, <4 x i32> undef -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16 = select <2 x i1> undef, <2 x i64> undef, <2 x i64> undef -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v17 = select <2 x i1> undef, <2 x float> undef, <2 x float> undef -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v18 = select <4 x i1> undef, <4 x float> undef, <4 x float> undef -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v19 = select <2 x i1> undef, <2 x double> undef, <2 x double> undef -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v21 = select <3 x i1> undef, <3 x float> undef, <3 x float> undef -; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef +; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v7 = select <2 x i1> undef, <2 x i8> undef, <2 x i8> undef +; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v8 = select <4 x i1> undef, <4 x i8> undef, <4 x i8> undef +; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v9 = select <8 x i1> undef, <8 x i8> undef, <8 x i8> undef +; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v10 = select <16 x i1> undef, <16 x i8> undef, <16 x i8> undef +; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v11 = select <2 x i1> undef, <2 x i16> undef, <2 x i16> undef +; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v12 = select <4 x i1> undef, <4 x i16> undef, <4 x i16> undef +; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v13 = select <8 x i1> undef, <8 x i16> undef, <8 x i16> undef +; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef +; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v14 = select <2 x i1> undef, <2 x i32> undef, <2 x i32> undef +; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v15 = select <4 x i1> undef, <4 x i32> undef, <4 x i32> undef +; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef +; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef +; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v16 = select <2 x i1> undef, <2 x i64> undef, <2 x i64> undef +; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef +; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef +; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef +; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v17 = select <2 x i1> undef, <2 x float> undef, <2 x float> undef +; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v18 = select <4 x i1> undef, <4 x float> undef, <4 x float> undef +; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v19 = select <2 x i1> undef, <2 x double> undef, <2 x double> undef +; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef +; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v21 = select <3 x i1> undef, <3 x float> undef, <3 x float> undef +; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef ; CHECK-THUMB2-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %v0 = select i1 undef, i1 undef, i1 undef diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-vselect.ll b/llvm/test/CodeGen/Thumb2/mve-pred-vselect.ll index a990cee1f5fb3..c9a51f0507844 100644 --- a/llvm/test/CodeGen/Thumb2/mve-pred-vselect.ll +++ b/llvm/test/CodeGen/Thumb2/mve-pred-vselect.ll @@ -377,34 +377,30 @@ entry: define arm_aapcs_vfpcc <2 x i64> @cmpeqz_v2i1_i1(<2 x i64> %a, <2 x i64> %b, i64 %c) { ; CHECK-LABEL: cmpeqz_v2i1_i1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r7, lr} -; CHECK-NEXT: push {r4, r5, r7, lr} -; CHECK-NEXT: vmov r2, r3, d2 -; CHECK-NEXT: orrs r2, r3 -; CHECK-NEXT: vmov r3, r4, d3 -; CHECK-NEXT: csetm r12, eq -; CHECK-NEXT: movs r2, #0 -; CHECK-NEXT: orrs r3, r4 -; CHECK-NEXT: vmov r4, r3, d0 -; CHECK-NEXT: csetm r5, eq -; CHECK-NEXT: orrs r3, r4 -; CHECK-NEXT: vmov r3, r4, d1 -; CHECK-NEXT: csetm lr, eq -; CHECK-NEXT: orrs r3, r4 -; CHECK-NEXT: csetm r4, eq ; CHECK-NEXT: orrs r0, r1 ; CHECK-NEXT: beq .LBB15_2 -; CHECK-NEXT: @ %bb.1: @ %select.false -; CHECK-NEXT: bfi r2, r12, #0, #8 -; CHECK-NEXT: bfi r2, r5, #8, #8 +; CHECK-NEXT: @ %bb.1: @ %select.false.sink +; CHECK-NEXT: vmov r0, r1, d2 +; CHECK-NEXT: orrs r0, r1 +; CHECK-NEXT: mov.w r1, #0 +; CHECK-NEXT: csetm r0, eq +; CHECK-NEXT: bfi r1, r0, #0, #8 +; CHECK-NEXT: vmov r0, r2, d3 ; CHECK-NEXT: b .LBB15_3 -; CHECK-NEXT: .LBB15_2: -; CHECK-NEXT: bfi r2, lr, #0, #8 -; CHECK-NEXT: bfi r2, r4, #8, #8 +; CHECK-NEXT: .LBB15_2: @ %select.true.sink +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: orrs r0, r1 +; CHECK-NEXT: mov.w r1, #0 +; CHECK-NEXT: csetm r0, eq +; CHECK-NEXT: bfi r1, r0, #0, #8 +; CHECK-NEXT: vmov r0, r2, d1 ; CHECK-NEXT: .LBB15_3: @ %select.end -; CHECK-NEXT: vmsr p0, r2 +; CHECK-NEXT: orrs r0, r2 +; CHECK-NEXT: csetm r0, eq +; CHECK-NEXT: bfi r1, r0, #8, #8 +; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpsel q0, q0, q1 -; CHECK-NEXT: pop {r4, r5, r7, pc} +; CHECK-NEXT: bx lr entry: %c1 = icmp eq <2 x i64> %a, zeroinitializer %c2 = icmp eq <2 x i64> %b, zeroinitializer diff --git a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll index 7bbddefd82721..db6136c4a2b28 100644 --- a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll +++ b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll @@ -205,292 +205,277 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind { ; X86-NEXT: testl %edi, %edi ; X86-NEXT: cmovnel %esi, %edx ; X86-NEXT: addl $64, %edx -; X86-NEXT: movl 20(%ebp), %edi -; X86-NEXT: movl %edi, %esi +; X86-NEXT: movl 20(%ebp), %esi ; X86-NEXT: orl %ebx, %esi ; X86-NEXT: cmovnel %ecx, %edx +; X86-NEXT: xorl %edi, %edi ; X86-NEXT: subl %edx, %eax -; X86-NEXT: movl $0, %edx -; X86-NEXT: sbbl %edx, %edx ; X86-NEXT: movl $0, %ebx ; X86-NEXT: sbbl %ebx, %ebx +; X86-NEXT: movl $0, %ecx +; X86-NEXT: sbbl %ecx, %ecx ; X86-NEXT: movl $0, %esi ; X86-NEXT: sbbl %esi, %esi -; X86-NEXT: movl $127, %ecx -; X86-NEXT: cmpl %eax, %ecx -; X86-NEXT: movl $0, %ecx -; X86-NEXT: sbbl %edx, %ecx -; X86-NEXT: movl $0, %ecx -; X86-NEXT: sbbl %ebx, %ecx -; X86-NEXT: movl $0, %ecx -; X86-NEXT: sbbl %esi, %ecx -; X86-NEXT: setb %cl -; X86-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Folded Reload +; X86-NEXT: movl $127, %edx ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: xorl $127, %eax +; X86-NEXT: cmpl %eax, %edx +; X86-NEXT: movl $0, %edx ; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: orl %ebx, %eax -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: orl %esi, %edx -; X86-NEXT: orl %eax, %edx -; X86-NEXT: sete %al -; X86-NEXT: testb %cl, %cl -; X86-NEXT: movb %cl, %ah -; X86-NEXT: movl 24(%ebp), %ebx -; X86-NEXT: movl $0, %esi -; X86-NEXT: cmovnel %esi, %ebx -; X86-NEXT: movl %edi, %ecx -; X86-NEXT: cmovnel %esi, %ecx +; X86-NEXT: sbbl %ebx, %edx ; X86-NEXT: movl $0, %edx ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 16(%ebp), %esi -; X86-NEXT: cmovnel %edx, %esi -; X86-NEXT: movl 12(%ebp), %edi -; X86-NEXT: movl %edi, %ecx -; X86-NEXT: cmovnel %edx, %ecx -; X86-NEXT: orb %ah, %al -; X86-NEXT: movl 44(%ebp), %eax -; X86-NEXT: jne .LBB4_7 -; X86-NEXT: # %bb.1: # %udiv-bb1 -; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: sbbl %ecx, %edx +; X86-NEXT: movl $0, %edx +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: sbbl %esi, %edx +; X86-NEXT: setb %dl +; X86-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Folded Reload +; X86-NEXT: movl 24(%ebp), %eax +; X86-NEXT: cmovnel %edi, %eax +; X86-NEXT: movl 20(%ebp), %esi +; X86-NEXT: cmovnel %edi, %esi +; X86-NEXT: movl 16(%ebp), %edx +; X86-NEXT: cmovnel %edi, %edx +; X86-NEXT: movl 12(%ebp), %ebx +; X86-NEXT: cmovnel %edi, %ebx +; X86-NEXT: movl 44(%ebp), %edi +; X86-NEXT: jne .LBB4_8 +; X86-NEXT: # %bb.1: # %_udiv-special-cases +; X86-NEXT: movl %eax, %edi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: xorl $127, %eax +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: orl %eax, %ecx +; X86-NEXT: movl %edi, %eax +; X86-NEXT: movl 44(%ebp), %edi +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: je .LBB4_8 +; X86-NEXT: # %bb.2: # %udiv-bb1 +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NEXT: xorps %xmm0, %xmm0 ; X86-NEXT: movaps %xmm0, {{[0-9]+}}(%esp) ; X86-NEXT: movl 16(%ebp), %eax ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 20(%ebp), %edx -; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl 20(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 24(%ebp), %eax ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: movl %edx, %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, %ecx ; X86-NEXT: xorb $127, %cl ; X86-NEXT: movl %ecx, %eax ; X86-NEXT: shrb $3, %al ; X86-NEXT: andb $12, %al ; X86-NEXT: negb %al ; X86-NEXT: movsbl %al, %eax -; X86-NEXT: movl 136(%esp,%eax), %edi -; X86-NEXT: movl 140(%esp,%eax), %esi -; X86-NEXT: shldl %cl, %edi, %esi -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 136(%esp,%eax), %esi +; X86-NEXT: movl 140(%esp,%eax), %edx +; X86-NEXT: shldl %cl, %esi, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl 128(%esp,%eax), %ebx -; X86-NEXT: movl 132(%esp,%eax), %eax -; X86-NEXT: shldl %cl, %eax, %edi -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %eax, %edi -; X86-NEXT: shldl %cl, %ebx, %edi +; X86-NEXT: movl 132(%esp,%eax), %edx +; X86-NEXT: shldl %cl, %edx, %esi +; X86-NEXT: shldl %cl, %ebx, %edx ; X86-NEXT: shll %cl, %ebx -; X86-NEXT: movl %ebx, %ecx -; X86-NEXT: addl $1, %edx -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: addl $1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: adcl $0, %eax -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT: adcl $0, %esi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: adcl $0, %ecx ; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT: movl 20(%ebp), %ebx -; X86-NEXT: jae .LBB4_2 -; X86-NEXT: # %bb.5: -; X86-NEXT: xorl %edx, %edx -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: movl %edi, %esi -; X86-NEXT: jmp .LBB4_6 -; X86-NEXT: .LBB4_2: # %udiv-preheader +; X86-NEXT: jae .LBB4_3 +; X86-NEXT: # %bb.6: +; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: jmp .LBB4_7 +; X86-NEXT: .LBB4_3: # %udiv-preheader ; X86-NEXT: movaps %xmm0, {{[0-9]+}}(%esp) -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 12(%ebp), %edx -; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) -; X86-NEXT: movl 16(%ebp), %edx -; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) -; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-NEXT: movl 12(%ebp), %edi +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %edi +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl 20(%ebp), %edi +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl 24(%ebp), %edi +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 24(%ebp), %eax -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: # kill: def $al killed $al killed $eax +; X86-NEXT: movl %ecx, %eax ; X86-NEXT: shrb $3, %al ; X86-NEXT: andb $12, %al ; X86-NEXT: movzbl %al, %eax -; X86-NEXT: movl 92(%esp,%eax), %esi -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 88(%esp,%eax), %edx -; X86-NEXT: movl %edx, %ebx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: # kill: def $cl killed $cl killed $ecx -; X86-NEXT: shrdl %cl, %esi, %ebx -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 92(%esp,%eax), %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 88(%esp,%eax), %edi ; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 80(%esp,%eax), %edi -; X86-NEXT: movl 84(%esp,%eax), %eax -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT: movl %eax, %esi -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: shrdl %cl, %edx, %esi -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: shrl %cl, %edx -; X86-NEXT: # kill: def $cl killed $cl killed $ecx -; X86-NEXT: shrdl %cl, %eax, %edi +; X86-NEXT: shrdl %cl, %edx, %edi ; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 80(%esp,%eax), %edx +; X86-NEXT: movl 84(%esp,%eax), %eax +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: shrdl %cl, %edi, %ebx +; X86-NEXT: shrl %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NEXT: shrdl %cl, %eax, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl 28(%ebp), %eax ; X86-NEXT: addl $-1, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl 32(%ebp), %eax ; X86-NEXT: adcl $-1, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 36(%ebp), %esi -; X86-NEXT: adcl $-1, %esi -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 36(%ebp), %eax +; X86-NEXT: adcl $-1, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl 40(%ebp), %eax ; X86-NEXT: adcl $-1, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: xorl %eax, %eax ; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NEXT: .p2align 4 -; X86-NEXT: .LBB4_3: # %udiv-do-while +; X86-NEXT: .LBB4_4: # %udiv-do-while ; X86-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT: shldl $1, %esi, %edx -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT: shldl $1, %edi, %esi -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT: shldl $1, %ebx, %edi ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: shldl $1, %edx, %ebx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: shldl $1, %ecx, %edx -; X86-NEXT: orl %eax, %edx +; X86-NEXT: shldl $1, %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: shldl $1, %ebx, %edx ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: shldl $1, %edx, %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: shldl $1, %edi, %ebx +; X86-NEXT: shldl $1, %ecx, %edi +; X86-NEXT: shldl $1, %esi, %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: orl %eax, %ecx ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: shldl $1, %ecx, %edx -; X86-NEXT: orl %eax, %edx -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: addl %ecx, %ecx -; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: shldl $1, %ecx, %esi +; X86-NEXT: orl %eax, %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: shldl $1, %esi, %ecx +; X86-NEXT: orl %eax, %ecx ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: cmpl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X86-NEXT: addl %esi, %esi +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: cmpl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: sbbl %edi, %ecx +; X86-NEXT: sbbl %ebx, %ecx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: sbbl %esi, %ecx +; X86-NEXT: sbbl %edx, %ecx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NEXT: sbbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X86-NEXT: sarl $31, %ecx ; X86-NEXT: movl %ecx, %eax ; X86-NEXT: andl $1, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: andl 40(%ebp), %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %ecx, %esi +; X86-NEXT: andl 40(%ebp), %esi ; X86-NEXT: movl %ecx, %eax ; X86-NEXT: andl 36(%ebp), %eax ; X86-NEXT: movl %ecx, %edx ; X86-NEXT: andl 32(%ebp), %edx ; X86-NEXT: andl 28(%ebp), %ecx -; X86-NEXT: subl %ecx, %ebx -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT: sbbl %edx, %edi +; X86-NEXT: subl %ecx, %edi ; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: sbbl %eax, %esi -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: sbbl %edx, %ebx +; X86-NEXT: sbbl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: sbbl %esi, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: sbbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NEXT: addl $-1, %ecx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: adcl $-1, %eax -; X86-NEXT: adcl $-1, %ebx -; X86-NEXT: adcl $-1, %esi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: adcl $-1, %edx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: adcl $-1, %edi ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: orl %esi, %eax +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: orl %edi, %eax ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: orl %ebx, %ecx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: orl %edx, %ecx ; X86-NEXT: orl %eax, %ecx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: jne .LBB4_3 -; X86-NEXT: # %bb.4: -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: jne .LBB4_4 +; X86-NEXT: # %bb.5: ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: .LBB4_6: # %udiv-loop-exit ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT: shldl $1, %edi, %ebx -; X86-NEXT: orl %eax, %ebx -; X86-NEXT: shldl $1, %esi, %edi -; X86-NEXT: orl %eax, %edi -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: shldl $1, %ecx, %esi -; X86-NEXT: orl %eax, %esi -; X86-NEXT: addl %ecx, %ecx -; X86-NEXT: orl %edx, %ecx -; X86-NEXT: movl 44(%ebp), %eax -; X86-NEXT: .LBB4_7: # %udiv-end -; X86-NEXT: movl %ecx, (%eax) -; X86-NEXT: movl %esi, 4(%eax) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: movl %edx, 8(%eax) -; X86-NEXT: movl %ebx, 12(%eax) -; X86-NEXT: movl %esi, %edx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: movl 44(%ebp), %edi +; X86-NEXT: .LBB4_7: # %udiv-loop-exit +; X86-NEXT: shldl $1, %esi, %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: orl %ecx, %eax +; X86-NEXT: shldl $1, %edx, %esi +; X86-NEXT: orl %ecx, %esi +; X86-NEXT: shldl $1, %ebx, %edx +; X86-NEXT: orl %ecx, %edx +; X86-NEXT: addl %ebx, %ebx +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X86-NEXT: .LBB4_8: # %udiv-end ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %ebx, (%edi) +; X86-NEXT: movl %edx, 4(%edi) +; X86-NEXT: movl %esi, 8(%edi) +; X86-NEXT: movl %eax, 12(%edi) +; X86-NEXT: movl %eax, %ecx ; X86-NEXT: movl 36(%ebp), %eax ; X86-NEXT: movl %eax, %esi ; X86-NEXT: imull %edx, %esi -; X86-NEXT: mull %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: mull %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: addl %esi, %edx ; X86-NEXT: movl 40(%ebp), %edi -; X86-NEXT: imull %ecx, %edi +; X86-NEXT: imull %ebx, %edi ; X86-NEXT: addl %edx, %edi ; X86-NEXT: movl 28(%ebp), %eax -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT: mull %esi -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: imull 28(%ebp), %ebx -; X86-NEXT: addl %edx, %ebx -; X86-NEXT: movl 32(%ebp), %edx -; X86-NEXT: imull %edx, %esi -; X86-NEXT: addl %ebx, %esi -; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: adcl %edi, %esi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: mull %ebx +; X86-NEXT: movl %eax, %esi +; X86-NEXT: imull 28(%ebp), %ecx +; X86-NEXT: addl %edx, %ecx +; X86-NEXT: movl 32(%ebp), %eax +; X86-NEXT: imull %eax, %ebx +; X86-NEXT: addl %ecx, %ebx +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT: movl %edi, %eax +; X86-NEXT: adcl %edi, %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: movl %esi, %eax ; X86-NEXT: movl 28(%ebp), %ecx ; X86-NEXT: mull %ecx -; X86-NEXT: movl %edx, %esi +; X86-NEXT: movl %edx, %edi ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT: movl %ebx, %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: mull %ecx +; X86-NEXT: movl %edx, %ebx ; X86-NEXT: movl %eax, %ecx -; X86-NEXT: addl %esi, %ecx -; X86-NEXT: adcl $0, %edx -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %edi, %eax +; X86-NEXT: addl %edi, %ecx +; X86-NEXT: adcl $0, %ebx +; X86-NEXT: movl %esi, %eax ; X86-NEXT: mull 32(%ebp) ; X86-NEXT: movl 16(%ebp), %esi ; X86-NEXT: movl %edx, %edi ; X86-NEXT: addl %ecx, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X86-NEXT: adcl %ebx, %edi ; X86-NEXT: setb %cl -; X86-NEXT: movl %ebx, %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: mull 32(%ebp) ; X86-NEXT: addl %edi, %eax ; X86-NEXT: movzbl %cl, %ecx diff --git a/llvm/test/CodeGen/X86/test-shrink-bug.ll b/llvm/test/CodeGen/X86/test-shrink-bug.ll index 953a0d65c5386..ab28a3b4a2b63 100644 --- a/llvm/test/CodeGen/X86/test-shrink-bug.ll +++ b/llvm/test/CodeGen/X86/test-shrink-bug.ll @@ -48,18 +48,17 @@ define dso_local void @fail(i16 %a, <2 x i8> %b) { ; CHECK-X86: ## %bb.0: ; CHECK-X86-NEXT: subl $12, %esp ; CHECK-X86-NEXT: .cfi_def_cfa_offset 16 -; CHECK-X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; CHECK-X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx ; CHECK-X86-NEXT: cmpb $123, {{[0-9]+}}(%esp) -; CHECK-X86-NEXT: setne %cl -; CHECK-X86-NEXT: testl $263, %eax ## imm = 0x107 -; CHECK-X86-NEXT: setne %al -; CHECK-X86-NEXT: testb %cl, %al -; CHECK-X86-NEXT: jne LBB1_2 -; CHECK-X86-NEXT: ## %bb.1: ## %yes -; CHECK-X86-NEXT: addl $12, %esp -; CHECK-X86-NEXT: retl -; CHECK-X86-NEXT: LBB1_2: ## %no +; CHECK-X86-NEXT: sete %al +; CHECK-X86-NEXT: testl $263, %ecx ## imm = 0x107 +; CHECK-X86-NEXT: je LBB1_3 +; CHECK-X86-NEXT: ## %bb.1: +; CHECK-X86-NEXT: testb %al, %al +; CHECK-X86-NEXT: jne LBB1_3 +; CHECK-X86-NEXT: ## %bb.2: ## %no ; CHECK-X86-NEXT: calll _bar +; CHECK-X86-NEXT: LBB1_3: ## %yes ; CHECK-X86-NEXT: addl $12, %esp ; CHECK-X86-NEXT: retl ; diff --git a/llvm/test/Transforms/SimplifyCFG/ARM/phi-eliminate.ll b/llvm/test/Transforms/SimplifyCFG/ARM/phi-eliminate.ll index 8a36226851592..f23e27f6d6686 100644 --- a/llvm/test/Transforms/SimplifyCFG/ARM/phi-eliminate.ll +++ b/llvm/test/Transforms/SimplifyCFG/ARM/phi-eliminate.ll @@ -241,13 +241,17 @@ define i64 @test_i64(i1 %a, i1 %b, i64 %i, i64 %j, i64 %k) { ; ; CHECK-V8A-TWO-FOLD-6-LABEL: @test_i64( ; CHECK-V8A-TWO-FOLD-6-NEXT: entry: +; CHECK-V8A-TWO-FOLD-6-NEXT: br i1 [[A:%.*]], label [[M:%.*]], label [[O:%.*]] +; CHECK-V8A-TWO-FOLD-6: O: ; CHECK-V8A-TWO-FOLD-6-NEXT: [[IAJ:%.*]] = add i64 [[I:%.*]], [[J:%.*]] ; CHECK-V8A-TWO-FOLD-6-NEXT: [[IAJAK:%.*]] = add i64 [[IAJ]], [[K:%.*]] ; CHECK-V8A-TWO-FOLD-6-NEXT: [[IXJ:%.*]] = xor i64 [[I]], [[J]] ; CHECK-V8A-TWO-FOLD-6-NEXT: [[IXJXK:%.*]] = xor i64 [[IXJ]], [[K]] ; CHECK-V8A-TWO-FOLD-6-NEXT: [[WP:%.*]] = select i1 [[B:%.*]], i64 [[IAJAK]], i64 [[IXJXK]] ; CHECK-V8A-TWO-FOLD-6-NEXT: [[WP2:%.*]] = add i64 [[WP]], [[WP]] -; CHECK-V8A-TWO-FOLD-6-NEXT: [[W:%.*]] = select i1 [[A:%.*]], i64 2, i64 [[WP2]] +; CHECK-V8A-TWO-FOLD-6-NEXT: br label [[M]] +; CHECK-V8A-TWO-FOLD-6: M: +; CHECK-V8A-TWO-FOLD-6-NEXT: [[W:%.*]] = phi i64 [ [[WP2]], [[O]] ], [ 2, [[ENTRY:%.*]] ] ; CHECK-V8A-TWO-FOLD-6-NEXT: [[R:%.*]] = add i64 [[W]], 1 ; CHECK-V8A-TWO-FOLD-6-NEXT: ret i64 [[R]] ; @@ -374,13 +378,17 @@ define i64 @test_i64_minsize(i1 %a, i1 %b, i64 %i, i64 %j, i64 %k) #0 { ; ; CHECK-V8A-TWO-FOLD-6-LABEL: @test_i64_minsize( ; CHECK-V8A-TWO-FOLD-6-NEXT: entry: +; CHECK-V8A-TWO-FOLD-6-NEXT: br i1 [[A:%.*]], label [[M:%.*]], label [[O:%.*]] +; CHECK-V8A-TWO-FOLD-6: O: ; CHECK-V8A-TWO-FOLD-6-NEXT: [[IAJ:%.*]] = add i64 [[I:%.*]], [[J:%.*]] ; CHECK-V8A-TWO-FOLD-6-NEXT: [[IAJAK:%.*]] = add i64 [[IAJ]], [[K:%.*]] ; CHECK-V8A-TWO-FOLD-6-NEXT: [[IXJ:%.*]] = xor i64 [[I]], [[J]] ; CHECK-V8A-TWO-FOLD-6-NEXT: [[IXJXK:%.*]] = xor i64 [[IXJ]], [[K]] ; CHECK-V8A-TWO-FOLD-6-NEXT: [[WP:%.*]] = select i1 [[B:%.*]], i64 [[IAJAK]], i64 [[IXJXK]] ; CHECK-V8A-TWO-FOLD-6-NEXT: [[WP2:%.*]] = add i64 [[WP]], [[WP]] -; CHECK-V8A-TWO-FOLD-6-NEXT: [[W:%.*]] = select i1 [[A:%.*]], i64 2, i64 [[WP2]] +; CHECK-V8A-TWO-FOLD-6-NEXT: br label [[M]] +; CHECK-V8A-TWO-FOLD-6: M: +; CHECK-V8A-TWO-FOLD-6-NEXT: [[W:%.*]] = phi i64 [ [[WP2]], [[O]] ], [ 2, [[ENTRY:%.*]] ] ; CHECK-V8A-TWO-FOLD-6-NEXT: [[R:%.*]] = add i64 [[W]], 1 ; CHECK-V8A-TWO-FOLD-6-NEXT: ret i64 [[R]] ;