diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index f6936b98bf3e4..1d7c41452f7d5 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -1369,8 +1369,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
     int ISD = TLI->InstructionOpcodeToISD(Opcode);
     assert(ISD && "Invalid opcode");
 
-    // TODO: Handle other cost kinds.
-    if (CostKind != TTI::TCK_RecipThroughput)
+    if (getTLI()->getValueType(DL, ValTy, true) == MVT::Other)
       return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind,
                                        Op1Info, Op2Info, I);
 
diff --git a/llvm/test/Analysis/CostModel/AArch64/arith-overflow.ll b/llvm/test/Analysis/CostModel/AArch64/arith-overflow.ll
index 8b52b123d6d25..3fbd12ac8813c 100644
--- a/llvm/test/Analysis/CostModel/AArch64/arith-overflow.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/arith-overflow.ll
@@ -27,20 +27,20 @@ define i32 @sadd(i32 %arg) {
 ; CHECK-LABEL: 'sadd'
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %I64 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 undef, i64 undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.sadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.sadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:13 CodeSize:4 Lat:4 SizeLat:4 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.sadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:6 Lat:6 SizeLat:6 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.sadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:13 CodeSize:10 Lat:10 SizeLat:10 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.sadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %I32 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.sadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:13 CodeSize:4 Lat:4 SizeLat:4 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.sadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:6 Lat:6 SizeLat:6 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:13 CodeSize:10 Lat:10 SizeLat:10 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.sadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %I16 = call { i16, i1 } @llvm.sadd.with.overflow.i16(i16 undef, i16 undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.sadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:6 Lat:6 SizeLat:6 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:10 Lat:10 SizeLat:10 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %I8 = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 undef, i8 undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.sadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:6 Lat:6 SizeLat:6 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:10 Lat:10 SizeLat:10 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
   %I64 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 undef, i64 undef)
@@ -90,20 +90,20 @@ define i32 @uadd(i32 %arg) {
 ; CHECK-LABEL: 'uadd'
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %I64 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 undef, i64 undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.uadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.uadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:2 Lat:2 SizeLat:2 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.uadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.uadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:5 Lat:5 SizeLat:5 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.uadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %I32 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.uadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:2 Lat:2 SizeLat:2 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.uadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:5 Lat:5 SizeLat:5 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.uadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %I16 = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 undef, i16 undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.uadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:2 Lat:2 SizeLat:2 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:5 Lat:5 SizeLat:5 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %I8 = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 undef, i8 undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.uadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:2 Lat:2 SizeLat:2 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:5 Lat:5 SizeLat:5 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
   %I64 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 undef, i64 undef)
@@ -153,20 +153,20 @@ define i32 @ssub(i32 %arg) {
 ; CHECK-LABEL: 'ssub'
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %I64 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 undef, i64 undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.ssub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.ssub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:13 CodeSize:4 Lat:4 SizeLat:4 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.ssub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:6 Lat:6 SizeLat:6 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.ssub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:13 CodeSize:10 Lat:10 SizeLat:10 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.ssub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %I32 = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.ssub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:13 CodeSize:4 Lat:4 SizeLat:4 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.ssub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:6 Lat:6 SizeLat:6 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:13 CodeSize:10 Lat:10 SizeLat:10 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.ssub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %I16 = call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 undef, i16 undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.ssub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:6 Lat:6 SizeLat:6 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:10 Lat:10 SizeLat:10 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %I8 = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 undef, i8 undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.ssub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:6 Lat:6 SizeLat:6 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:16 CodeSize:10 Lat:10 SizeLat:10 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
   %I64 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 undef, i64 undef)
@@ -216,20 +216,20 @@ define i32 @usub(i32 %arg) {
 ; CHECK-LABEL: 'usub'
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %I64 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 undef, i64 undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.usub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.usub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:2 Lat:2 SizeLat:2 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.usub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.usub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:5 Lat:5 SizeLat:5 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.usub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %I32 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.usub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:2 Lat:2 SizeLat:2 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.usub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:5 Lat:5 SizeLat:5 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.usub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %I16 = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 undef, i16 undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.usub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:2 Lat:2 SizeLat:2 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:5 Lat:5 SizeLat:5 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %I8 = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 undef, i8 undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 2 for: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.usub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:2 Lat:2 SizeLat:2 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:3 Lat:3 SizeLat:3 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:8 CodeSize:5 Lat:5 SizeLat:5 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
   %I64 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 undef, i64 undef)
@@ -279,20 +279,20 @@ define i32 @smul(i32 %arg) {
 ; CHECK-LABEL: 'smul'
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:34 CodeSize:8 Lat:8 SizeLat:8 for: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:68 CodeSize:8 Lat:8 SizeLat:8 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:136 CodeSize:8 Lat:8 SizeLat:8 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:68 CodeSize:9 Lat:9 SizeLat:9 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:136 CodeSize:11 Lat:11 SizeLat:11 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 2 for: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:38 CodeSize:8 Lat:8 SizeLat:8 for: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:76 CodeSize:8 Lat:8 SizeLat:8 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:152 CodeSize:8 Lat:8 SizeLat:8 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:76 CodeSize:9 Lat:9 SizeLat:9 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:152 CodeSize:11 Lat:11 SizeLat:11 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 5 for: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:8 Lat:8 SizeLat:8 for: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:8 Lat:8 SizeLat:8 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:8 Lat:8 SizeLat:8 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:9 Lat:9 SizeLat:9 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:11 Lat:11 SizeLat:11 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 5 for: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:8 Lat:8 SizeLat:8 for: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:8 Lat:8 SizeLat:8 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:8 Lat:8 SizeLat:8 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:24 CodeSize:9 Lat:9 SizeLat:9 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:11 Lat:11 SizeLat:11 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
   %I64 = call {i64, i1} @llvm.smul.with.overflow.i64(i64 undef, i64 undef)
@@ -342,20 +342,20 @@ define i32 @umul(i32 %arg) {
 ; CHECK-LABEL: 'umul'
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:33 CodeSize:7 Lat:7 SizeLat:7 for: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:66 CodeSize:7 Lat:7 SizeLat:7 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:132 CodeSize:7 Lat:7 SizeLat:7 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:66 CodeSize:8 Lat:8 SizeLat:8 for: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:132 CodeSize:10 Lat:10 SizeLat:10 for: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 2 for: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:37 CodeSize:7 Lat:7 SizeLat:7 for: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:74 CodeSize:7 Lat:7 SizeLat:7 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:148 CodeSize:7 Lat:7 SizeLat:7 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:74 CodeSize:8 Lat:8 SizeLat:8 for: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:148 CodeSize:10 Lat:10 SizeLat:10 for: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:11 CodeSize:7 Lat:7 SizeLat:7 for: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:7 Lat:7 SizeLat:7 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:44 CodeSize:7 Lat:7 SizeLat:7 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:8 Lat:8 SizeLat:8 for: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:44 CodeSize:10 Lat:10 SizeLat:10 for: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:11 CodeSize:7 Lat:7 SizeLat:7 for: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:7 Lat:7 SizeLat:7 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; CHECK-NEXT:  Cost Model: Found costs of RThru:44 CodeSize:7 Lat:7 SizeLat:7 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:8 Lat:8 SizeLat:8 for: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:44 CodeSize:10 Lat:10 SizeLat:10 for: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
 ;
   %I64 = call {i64, i1} @llvm.umul.with.overflow.i64(i64 undef, i64 undef)
diff --git a/llvm/test/Analysis/CostModel/AArch64/cmp.ll b/llvm/test/Analysis/CostModel/AArch64/cmp.ll
index b55a759a78cc3..10c995786d57a 100644
--- a/llvm/test/Analysis/CostModel/AArch64/cmp.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/cmp.ll
@@ -9,7 +9,7 @@ define void @cmps() {
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %c16 = icmp ult i16 undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %c32 = icmp sge i32 undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %c64 = icmp ne i64 undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %c128 = icmp ult i128 undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %c128 = icmp ult i128 undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %cv16i8 = icmp slt <16 x i8> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %cv8i16 = icmp ult <8 x i16> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %cv4i32 = icmp sge <4 x i32> undef, undef
@@ -17,10 +17,10 @@ define void @cmps() {
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %cf32 = fcmp ogt float undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %cf64 = fcmp ogt double undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %cbf64 = fcmp ogt bfloat undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %cfv816 = fcmp olt <8 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %cfv816 = fcmp olt <8 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %cfv432 = fcmp oge <4 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %cfv264 = fcmp oge <2 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %cbfv816 = fcmp olt <8 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %cbfv816 = fcmp olt <8 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %c8 = icmp slt i8 undef, undef
@@ -62,7 +62,7 @@ define void @andcmp() {
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: %c64leneg = icmp sle i64 %a64, -1
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: %c64gtneg = icmp sgt i64 %a64, -1
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a128 = and i128 undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %c128 = icmp eq i128 %a128, 0
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %c128 = icmp eq i128 %a128, 0
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %av16i8 = and <16 x i8> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %cv16i8 = icmp ne <16 x i8> %av16i8, zeroinitializer
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %av8i16 = and <8 x i16> undef, undef
diff --git a/llvm/test/Analysis/CostModel/AArch64/fcmp.ll b/llvm/test/Analysis/CostModel/AArch64/fcmp.ll
index fb9888d98af6b..d2a1340f60460 100644
--- a/llvm/test/Analysis/CostModel/AArch64/fcmp.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/fcmp.ll
@@ -7,12 +7,12 @@ define void @fcmp_oeq(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %f32 = fcmp oeq float undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f32 = fcmp oeq <2 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v4f32 = fcmp oeq <4 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v8f32 = fcmp oeq <8 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v8f32 = fcmp oeq <8 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %f64 = fcmp oeq double undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f64 = fcmp oeq <2 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4f64 = fcmp oeq <4 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v4f64 = fcmp oeq <4 x double> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %f128 = fcmp oeq fp128 undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v2f128 = fcmp oeq <2 x fp128> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v2f128 = fcmp oeq <2 x fp128> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %f32 = fcmp oeq float undef, undef
@@ -32,8 +32,8 @@ define void @fcmp_oeq_half(i32 %arg) {
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of 1 for: %f16 = fcmp oeq half undef, undef
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of 3 for: %v2f16 = fcmp oeq <2 x half> undef, undef
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of 4 for: %v4f16 = fcmp oeq <4 x half> undef, undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8f16 = fcmp oeq <8 x half> undef, undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16f16 = fcmp oeq <16 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8f16 = fcmp oeq <8 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16f16 = fcmp oeq <16 x half> undef, undef
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-FP16-LABEL: 'fcmp_oeq_half'
@@ -41,7 +41,7 @@ define void @fcmp_oeq_half(i32 %arg) {
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %v2f16 = fcmp oeq <2 x half> undef, undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %v4f16 = fcmp oeq <4 x half> undef, undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %v8f16 = fcmp oeq <8 x half> undef, undef
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16f16 = fcmp oeq <16 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of 2 for: %v16f16 = fcmp oeq <16 x half> undef, undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %f16 = fcmp oeq half undef, undef
@@ -57,8 +57,8 @@ define void @fcmp_oeq_bfloat(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %bf16 = fcmp oeq bfloat undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %v2bf16 = fcmp oeq <2 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %v4bf16 = fcmp oeq <4 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8bf16 = fcmp oeq <8 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16bf16 = fcmp oeq <16 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp oeq <8 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16bf16 = fcmp oeq <16 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %bf16 = fcmp oeq bfloat undef, undef
@@ -74,12 +74,12 @@ define void @fcmp_ogt(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %f32 = fcmp ogt float undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f32 = fcmp ogt <2 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v4f32 = fcmp ogt <4 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v8f32 = fcmp ogt <8 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v8f32 = fcmp ogt <8 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %f64 = fcmp ogt double undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f64 = fcmp ogt <2 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4f64 = fcmp ogt <4 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v4f64 = fcmp ogt <4 x double> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %f128 = fcmp ogt fp128 undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v2f128 = fcmp ogt <2 x fp128> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v2f128 = fcmp ogt <2 x fp128> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %f32 = fcmp ogt float undef, undef
@@ -99,8 +99,8 @@ define void @fcmp_ogt_half(i32 %arg) {
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of 1 for: %f16 = fcmp ogt half undef, undef
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of 3 for: %v2f16 = fcmp ogt <2 x half> undef, undef
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of 4 for: %v4f16 = fcmp ogt <4 x half> undef, undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8f16 = fcmp ogt <8 x half> undef, undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16f16 = fcmp ogt <16 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8f16 = fcmp ogt <8 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16f16 = fcmp ogt <16 x half> undef, undef
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-FP16-LABEL: 'fcmp_ogt_half'
@@ -108,7 +108,7 @@ define void @fcmp_ogt_half(i32 %arg) {
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %v2f16 = fcmp ogt <2 x half> undef, undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %v4f16 = fcmp ogt <4 x half> undef, undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %v8f16 = fcmp ogt <8 x half> undef, undef
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16f16 = fcmp ogt <16 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of 2 for: %v16f16 = fcmp ogt <16 x half> undef, undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %f16 = fcmp ogt half undef, undef
@@ -124,8 +124,8 @@ define void @fcmp_ogt_bfloat(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %bf16 = fcmp ogt bfloat undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %v2bf16 = fcmp ogt <2 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %v4bf16 = fcmp ogt <4 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8bf16 = fcmp ogt <8 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16bf16 = fcmp ogt <16 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp ogt <8 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16bf16 = fcmp ogt <16 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %bf16 = fcmp ogt bfloat undef, undef
@@ -141,12 +141,12 @@ define void @fcmp_oge(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %f32 = fcmp oge float undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f32 = fcmp oge <2 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v4f32 = fcmp oge <4 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v8f32 = fcmp oge <8 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v8f32 = fcmp oge <8 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %f64 = fcmp oge double undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f64 = fcmp oge <2 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4f64 = fcmp oge <4 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v4f64 = fcmp oge <4 x double> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %f128 = fcmp oge fp128 undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v2f128 = fcmp oge <2 x fp128> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v2f128 = fcmp oge <2 x fp128> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %f32 = fcmp oge float undef, undef
@@ -166,8 +166,8 @@ define void @fcmp_oge_half(i32 %arg) {
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of 1 for: %f16 = fcmp oge half undef, undef
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of 3 for: %v2f16 = fcmp oge <2 x half> undef, undef
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of 4 for: %v4f16 = fcmp oge <4 x half> undef, undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8f16 = fcmp oge <8 x half> undef, undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16f16 = fcmp oge <16 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8f16 = fcmp oge <8 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16f16 = fcmp oge <16 x half> undef, undef
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-FP16-LABEL: 'fcmp_oge_half'
@@ -175,7 +175,7 @@ define void @fcmp_oge_half(i32 %arg) {
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %v2f16 = fcmp oge <2 x half> undef, undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %v4f16 = fcmp oge <4 x half> undef, undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %v8f16 = fcmp oge <8 x half> undef, undef
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16f16 = fcmp oge <16 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of 2 for: %v16f16 = fcmp oge <16 x half> undef, undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %f16 = fcmp oge half undef, undef
@@ -191,8 +191,8 @@ define void @fcmp_oge_bfloat(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %bf16 = fcmp oge bfloat undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %v2bf16 = fcmp oge <2 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %v4bf16 = fcmp oge <4 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8bf16 = fcmp oge <8 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16bf16 = fcmp oge <16 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp oge <8 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16bf16 = fcmp oge <16 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %bf16 = fcmp oge bfloat undef, undef
@@ -208,12 +208,12 @@ define void @fcmp_olt(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %f32 = fcmp olt float undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f32 = fcmp olt <2 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v4f32 = fcmp olt <4 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v8f32 = fcmp olt <8 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v8f32 = fcmp olt <8 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %f64 = fcmp olt double undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f64 = fcmp olt <2 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4f64 = fcmp olt <4 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v4f64 = fcmp olt <4 x double> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %f128 = fcmp olt fp128 undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v2f128 = fcmp olt <2 x fp128> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v2f128 = fcmp olt <2 x fp128> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %f32 = fcmp olt float undef, undef
@@ -233,8 +233,8 @@ define void @fcmp_olt_half(i32 %arg) {
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of 1 for: %f16 = fcmp olt half undef, undef
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of 3 for: %v2f16 = fcmp olt <2 x half> undef, undef
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of 4 for: %v4f16 = fcmp olt <4 x half> undef, undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8f16 = fcmp olt <8 x half> undef, undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16f16 = fcmp olt <16 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8f16 = fcmp olt <8 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16f16 = fcmp olt <16 x half> undef, undef
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-FP16-LABEL: 'fcmp_olt_half'
@@ -242,7 +242,7 @@ define void @fcmp_olt_half(i32 %arg) {
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %v2f16 = fcmp olt <2 x half> undef, undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %v4f16 = fcmp olt <4 x half> undef, undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %v8f16 = fcmp olt <8 x half> undef, undef
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16f16 = fcmp olt <16 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of 2 for: %v16f16 = fcmp olt <16 x half> undef, undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %f16 = fcmp olt half undef, undef
@@ -258,8 +258,8 @@ define void @fcmp_olt_bfloat(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %bf16 = fcmp olt bfloat undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %v2bf16 = fcmp olt <2 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %v4bf16 = fcmp olt <4 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8bf16 = fcmp olt <8 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16bf16 = fcmp olt <16 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp olt <8 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16bf16 = fcmp olt <16 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %bf16 = fcmp olt bfloat undef, undef
@@ -275,12 +275,12 @@ define void @fcmp_ole(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %f32 = fcmp ole float undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f32 = fcmp ole <2 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v4f32 = fcmp ole <4 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v8f32 = fcmp ole <8 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v8f32 = fcmp ole <8 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %f64 = fcmp ole double undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f64 = fcmp ole <2 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4f64 = fcmp ole <4 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v4f64 = fcmp ole <4 x double> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %f128 = fcmp ole fp128 undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v2f128 = fcmp ole <2 x fp128> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v2f128 = fcmp ole <2 x fp128> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %f32 = fcmp ole float undef, undef
@@ -300,8 +300,8 @@ define void @fcmp_ole_half(i32 %arg) {
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of 1 for: %f16 = fcmp ole half undef, undef
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of 3 for: %v2f16 = fcmp ole <2 x half> undef, undef
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of 4 for: %v4f16 = fcmp ole <4 x half> undef, undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8f16 = fcmp ole <8 x half> undef, undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16f16 = fcmp ole <16 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8f16 = fcmp ole <8 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16f16 = fcmp ole <16 x half> undef, undef
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-FP16-LABEL: 'fcmp_ole_half'
@@ -309,7 +309,7 @@ define void @fcmp_ole_half(i32 %arg) {
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %v2f16 = fcmp ole <2 x half> undef, undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %v4f16 = fcmp ole <4 x half> undef, undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %v8f16 = fcmp ole <8 x half> undef, undef
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16f16 = fcmp ole <16 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of 2 for: %v16f16 = fcmp ole <16 x half> undef, undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %f16 = fcmp ole half undef, undef
@@ -325,8 +325,8 @@ define void @fcmp_ole_bfloat(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %bf16 = fcmp ole bfloat undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %v2bf16 = fcmp ole <2 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %v4bf16 = fcmp ole <4 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8bf16 = fcmp ole <8 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16bf16 = fcmp ole <16 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp ole <8 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16bf16 = fcmp ole <16 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %bf16 = fcmp ole bfloat undef, undef
@@ -342,12 +342,12 @@ define void @fcmp_one(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %f32 = fcmp one float undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f32 = fcmp one <2 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v4f32 = fcmp one <4 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v8f32 = fcmp one <8 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v8f32 = fcmp one <8 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %f64 = fcmp one double undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f64 = fcmp one <2 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4f64 = fcmp one <4 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v4f64 = fcmp one <4 x double> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %f128 = fcmp one fp128 undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v2f128 = fcmp one <2 x fp128> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v2f128 = fcmp one <2 x fp128> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %f32 = fcmp one float undef, undef
@@ -367,8 +367,8 @@ define void @fcmp_one_half(i32 %arg) {
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of 1 for: %f16 = fcmp one half undef, undef
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of 3 for: %v2f16 = fcmp one <2 x half> undef, undef
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of 4 for: %v4f16 = fcmp one <4 x half> undef, undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8f16 = fcmp one <8 x half> undef, undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16f16 = fcmp one <16 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8f16 = fcmp one <8 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16f16 = fcmp one <16 x half> undef, undef
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-FP16-LABEL: 'fcmp_one_half'
@@ -376,7 +376,7 @@ define void @fcmp_one_half(i32 %arg) {
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %v2f16 = fcmp one <2 x half> undef, undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %v4f16 = fcmp one <4 x half> undef, undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %v8f16 = fcmp one <8 x half> undef, undef
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16f16 = fcmp one <16 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of 2 for: %v16f16 = fcmp one <16 x half> undef, undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %f16 = fcmp one half undef, undef
@@ -392,8 +392,8 @@ define void @fcmp_one_bfloat(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %bf16 = fcmp one bfloat undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %v2bf16 = fcmp one <2 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %v4bf16 = fcmp one <4 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8bf16 = fcmp one <8 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16bf16 = fcmp one <16 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp one <8 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16bf16 = fcmp one <16 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %bf16 = fcmp one bfloat undef, undef
@@ -409,12 +409,12 @@ define void @fcmp_ord(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %f32 = fcmp ord float undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f32 = fcmp ord <2 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v4f32 = fcmp ord <4 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v8f32 = fcmp ord <8 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v8f32 = fcmp ord <8 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %f64 = fcmp ord double undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f64 = fcmp ord <2 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4f64 = fcmp ord <4 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v4f64 = fcmp ord <4 x double> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %f128 = fcmp ord fp128 undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v2f128 = fcmp ord <2 x fp128> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v2f128 = fcmp ord <2 x fp128> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %f32 = fcmp ord float undef, undef
@@ -434,8 +434,8 @@ define void @fcmp_ord_half(i32 %arg) {
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of 1 for: %f16 = fcmp ord half undef, undef
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of 3 for: %v2f16 = fcmp ord <2 x half> undef, undef
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of 4 for: %v4f16 = fcmp ord <4 x half> undef, undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8f16 = fcmp ord <8 x half> undef, undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16f16 = fcmp ord <16 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8f16 = fcmp ord <8 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16f16 = fcmp ord <16 x half> undef, undef
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-FP16-LABEL: 'fcmp_ord_half'
@@ -443,7 +443,7 @@ define void @fcmp_ord_half(i32 %arg) {
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %v2f16 = fcmp ord <2 x half> undef, undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %v4f16 = fcmp ord <4 x half> undef, undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %v8f16 = fcmp ord <8 x half> undef, undef
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16f16 = fcmp ord <16 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of 2 for: %v16f16 = fcmp ord <16 x half> undef, undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %f16 = fcmp ord half undef, undef
@@ -459,8 +459,8 @@ define void @fcmp_ord_bfloat(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %bf16 = fcmp ord bfloat undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %v2bf16 = fcmp ord <2 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %v4bf16 = fcmp ord <4 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8bf16 = fcmp ord <8 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16bf16 = fcmp ord <16 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp ord <8 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16bf16 = fcmp ord <16 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %bf16 = fcmp ord bfloat undef, undef
@@ -476,12 +476,12 @@ define void @fcmp_ueq(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %f32 = fcmp ueq float undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f32 = fcmp ueq <2 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v4f32 = fcmp ueq <4 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v8f32 = fcmp ueq <8 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v8f32 = fcmp ueq <8 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %f64 = fcmp ueq double undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f64 = fcmp ueq <2 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4f64 = fcmp ueq <4 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v4f64 = fcmp ueq <4 x double> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %f128 = fcmp ueq fp128 undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v2f128 = fcmp ueq <2 x fp128> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v2f128 = fcmp ueq <2 x fp128> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %f32 = fcmp ueq float undef, undef
@@ -501,8 +501,8 @@ define void @fcmp_ueq_half(i32 %arg) {
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of 1 for: %f16 = fcmp ueq half undef, undef
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of 3 for: %v2f16 = fcmp ueq <2 x half> undef, undef
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of 4 for: %v4f16 = fcmp ueq <4 x half> undef, undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8f16 = fcmp ueq <8 x half> undef, undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16f16 = fcmp ueq <16 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8f16 = fcmp ueq <8 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16f16 = fcmp ueq <16 x half> undef, undef
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-FP16-LABEL: 'fcmp_ueq_half'
@@ -510,7 +510,7 @@ define void @fcmp_ueq_half(i32 %arg) {
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %v2f16 = fcmp ueq <2 x half> undef, undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %v4f16 = fcmp ueq <4 x half> undef, undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %v8f16 = fcmp ueq <8 x half> undef, undef
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16f16 = fcmp ueq <16 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of 2 for: %v16f16 = fcmp ueq <16 x half> undef, undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %f16 = fcmp ueq half undef, undef
@@ -526,8 +526,8 @@ define void @fcmp_ueq_bfloat(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %bf16 = fcmp ueq bfloat undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %v2bf16 = fcmp ueq <2 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %v4bf16 = fcmp ueq <4 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8bf16 = fcmp ueq <8 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16bf16 = fcmp ueq <16 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp ueq <8 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16bf16 = fcmp ueq <16 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %bf16 = fcmp ueq bfloat undef, undef
@@ -543,12 +543,12 @@ define void @fcmp_ugt(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %f32 = fcmp ugt float undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f32 = fcmp ugt <2 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v4f32 = fcmp ugt <4 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v8f32 = fcmp ugt <8 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v8f32 = fcmp ugt <8 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %f64 = fcmp ugt double undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f64 = fcmp ugt <2 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4f64 = fcmp ugt <4 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v4f64 = fcmp ugt <4 x double> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %f128 = fcmp ugt fp128 undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v2f128 = fcmp ugt <2 x fp128> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v2f128 = fcmp ugt <2 x fp128> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %f32 = fcmp ugt float undef, undef
@@ -568,8 +568,8 @@ define void @fcmp_ugt_half(i32 %arg) {
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of 1 for: %f16 = fcmp ugt half undef, undef
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of 3 for: %v2f16 = fcmp ugt <2 x half> undef, undef
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of 4 for: %v4f16 = fcmp ugt <4 x half> undef, undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8f16 = fcmp ugt <8 x half> undef, undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16f16 = fcmp ugt <16 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8f16 = fcmp ugt <8 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16f16 = fcmp ugt <16 x half> undef, undef
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-FP16-LABEL: 'fcmp_ugt_half'
@@ -577,7 +577,7 @@ define void @fcmp_ugt_half(i32 %arg) {
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %v2f16 = fcmp ugt <2 x half> undef, undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %v4f16 = fcmp ugt <4 x half> undef, undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %v8f16 = fcmp ugt <8 x half> undef, undef
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16f16 = fcmp ugt <16 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of 2 for: %v16f16 = fcmp ugt <16 x half> undef, undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %f16 = fcmp ugt half undef, undef
@@ -593,8 +593,8 @@ define void @fcmp_ugt_bfloat(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %bf16 = fcmp ugt bfloat undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %v2bf16 = fcmp ugt <2 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %v4bf16 = fcmp ugt <4 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8bf16 = fcmp ugt <8 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16bf16 = fcmp ugt <16 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp ugt <8 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16bf16 = fcmp ugt <16 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %bf16 = fcmp ugt bfloat undef, undef
@@ -610,12 +610,12 @@ define void @fcmp_uge(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %f32 = fcmp uge float undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f32 = fcmp uge <2 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v4f32 = fcmp uge <4 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v8f32 = fcmp uge <8 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v8f32 = fcmp uge <8 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %f64 = fcmp uge double undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f64 = fcmp uge <2 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4f64 = fcmp uge <4 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v4f64 = fcmp uge <4 x double> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %f128 = fcmp uge fp128 undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v2f128 = fcmp uge <2 x fp128> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v2f128 = fcmp uge <2 x fp128> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %f32 = fcmp uge float undef, undef
@@ -635,8 +635,8 @@ define void @fcmp_uge_half(i32 %arg) {
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of 1 for: %f16 = fcmp uge half undef, undef
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of 3 for: %v2f16 = fcmp uge <2 x half> undef, undef
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of 4 for: %v4f16 = fcmp uge <4 x half> undef, undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8f16 = fcmp uge <8 x half> undef, undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16f16 = fcmp uge <16 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8f16 = fcmp uge <8 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16f16 = fcmp uge <16 x half> undef, undef
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-FP16-LABEL: 'fcmp_uge_half'
@@ -644,7 +644,7 @@ define void @fcmp_uge_half(i32 %arg) {
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %v2f16 = fcmp uge <2 x half> undef, undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %v4f16 = fcmp uge <4 x half> undef, undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %v8f16 = fcmp uge <8 x half> undef, undef
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16f16 = fcmp uge <16 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of 2 for: %v16f16 = fcmp uge <16 x half> undef, undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %f16 = fcmp uge half undef, undef
@@ -660,8 +660,8 @@ define void @fcmp_uge_bfloat(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %bf16 = fcmp uge bfloat undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %v2bf16 = fcmp uge <2 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %v4bf16 = fcmp uge <4 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8bf16 = fcmp uge <8 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16bf16 = fcmp uge <16 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp uge <8 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16bf16 = fcmp uge <16 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %bf16 = fcmp uge bfloat undef, undef
@@ -677,12 +677,12 @@ define void @fcmp_ult(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %f32 = fcmp ult float undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f32 = fcmp ult <2 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v4f32 = fcmp ult <4 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v8f32 = fcmp ult <8 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v8f32 = fcmp ult <8 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %f64 = fcmp ult double undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f64 = fcmp ult <2 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4f64 = fcmp ult <4 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v4f64 = fcmp ult <4 x double> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %f128 = fcmp ult fp128 undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v2f128 = fcmp ult <2 x fp128> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v2f128 = fcmp ult <2 x fp128> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %f32 = fcmp ult float undef, undef
@@ -702,8 +702,8 @@ define void @fcmp_ult_half(i32 %arg) {
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of 1 for: %f16 = fcmp ult half undef, undef
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of 3 for: %v2f16 = fcmp ult <2 x half> undef, undef
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of 4 for: %v4f16 = fcmp ult <4 x half> undef, undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8f16 = fcmp ult <8 x half> undef, undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16f16 = fcmp ult <16 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8f16 = fcmp ult <8 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16f16 = fcmp ult <16 x half> undef, undef
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-FP16-LABEL: 'fcmp_ult_half'
@@ -711,7 +711,7 @@ define void @fcmp_ult_half(i32 %arg) {
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %v2f16 = fcmp ult <2 x half> undef, undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %v4f16 = fcmp ult <4 x half> undef, undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %v8f16 = fcmp ult <8 x half> undef, undef
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16f16 = fcmp ult <16 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of 2 for: %v16f16 = fcmp ult <16 x half> undef, undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %f16 = fcmp ult half undef, undef
@@ -727,8 +727,8 @@ define void @fcmp_ult_bfloat(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %bf16 = fcmp ult bfloat undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %v2bf16 = fcmp ult <2 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %v4bf16 = fcmp ult <4 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8bf16 = fcmp ult <8 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16bf16 = fcmp ult <16 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp ult <8 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16bf16 = fcmp ult <16 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %bf16 = fcmp ult bfloat undef, undef
@@ -744,12 +744,12 @@ define void @fcmp_ule(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %f32 = fcmp ule float undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f32 = fcmp ule <2 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v4f32 = fcmp ule <4 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v8f32 = fcmp ule <8 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v8f32 = fcmp ule <8 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %f64 = fcmp ule double undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f64 = fcmp ule <2 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4f64 = fcmp ule <4 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v4f64 = fcmp ule <4 x double> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %f128 = fcmp ule fp128 undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v2f128 = fcmp ule <2 x fp128> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v2f128 = fcmp ule <2 x fp128> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %f32 = fcmp ule float undef, undef
@@ -769,8 +769,8 @@ define void @fcmp_ule_half(i32 %arg) {
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of 1 for: %f16 = fcmp ule half undef, undef
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of 3 for: %v2f16 = fcmp ule <2 x half> undef, undef
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of 4 for: %v4f16 = fcmp ule <4 x half> undef, undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8f16 = fcmp ule <8 x half> undef, undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16f16 = fcmp ule <16 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8f16 = fcmp ule <8 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16f16 = fcmp ule <16 x half> undef, undef
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-FP16-LABEL: 'fcmp_ule_half'
@@ -778,7 +778,7 @@ define void @fcmp_ule_half(i32 %arg) {
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %v2f16 = fcmp ule <2 x half> undef, undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %v4f16 = fcmp ule <4 x half> undef, undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %v8f16 = fcmp ule <8 x half> undef, undef
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16f16 = fcmp ule <16 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of 2 for: %v16f16 = fcmp ule <16 x half> undef, undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %f16 = fcmp ule half undef, undef
@@ -794,8 +794,8 @@ define void @fcmp_ule_bfloat(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %bf16 = fcmp ule bfloat undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %v2bf16 = fcmp ule <2 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %v4bf16 = fcmp ule <4 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8bf16 = fcmp ule <8 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16bf16 = fcmp ule <16 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp ule <8 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16bf16 = fcmp ule <16 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %bf16 = fcmp ule bfloat undef, undef
@@ -811,12 +811,12 @@ define void @fcmp_une(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %f32 = fcmp une float undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f32 = fcmp une <2 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v4f32 = fcmp une <4 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v8f32 = fcmp une <8 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v8f32 = fcmp une <8 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %f64 = fcmp une double undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f64 = fcmp une <2 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4f64 = fcmp une <4 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v4f64 = fcmp une <4 x double> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %f128 = fcmp une fp128 undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v2f128 = fcmp une <2 x fp128> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v2f128 = fcmp une <2 x fp128> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %f32 = fcmp une float undef, undef
@@ -836,8 +836,8 @@ define void @fcmp_une_half(i32 %arg) {
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of 1 for: %f16 = fcmp une half undef, undef
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of 3 for: %v2f16 = fcmp une <2 x half> undef, undef
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of 4 for: %v4f16 = fcmp une <4 x half> undef, undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8f16 = fcmp une <8 x half> undef, undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16f16 = fcmp une <16 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8f16 = fcmp une <8 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16f16 = fcmp une <16 x half> undef, undef
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-FP16-LABEL: 'fcmp_une_half'
@@ -845,7 +845,7 @@ define void @fcmp_une_half(i32 %arg) {
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %v2f16 = fcmp une <2 x half> undef, undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %v4f16 = fcmp une <4 x half> undef, undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %v8f16 = fcmp une <8 x half> undef, undef
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16f16 = fcmp une <16 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of 2 for: %v16f16 = fcmp une <16 x half> undef, undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %f16 = fcmp une half undef, undef
@@ -861,8 +861,8 @@ define void @fcmp_une_bfloat(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %bf16 = fcmp une bfloat undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %v2bf16 = fcmp une <2 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %v4bf16 = fcmp une <4 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8bf16 = fcmp une <8 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16bf16 = fcmp une <16 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp une <8 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16bf16 = fcmp une <16 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %bf16 = fcmp une bfloat undef, undef
@@ -878,12 +878,12 @@ define void @fcmp_uno(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %f32 = fcmp uno float undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f32 = fcmp uno <2 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v4f32 = fcmp uno <4 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v8f32 = fcmp uno <8 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v8f32 = fcmp uno <8 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %f64 = fcmp uno double undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f64 = fcmp uno <2 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4f64 = fcmp uno <4 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v4f64 = fcmp uno <4 x double> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %f128 = fcmp uno fp128 undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v2f128 = fcmp uno <2 x fp128> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v2f128 = fcmp uno <2 x fp128> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %f32 = fcmp uno float undef, undef
@@ -903,8 +903,8 @@ define void @fcmp_uno_half(i32 %arg) {
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of 1 for: %f16 = fcmp uno half undef, undef
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of 3 for: %v2f16 = fcmp uno <2 x half> undef, undef
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of 4 for: %v4f16 = fcmp uno <4 x half> undef, undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8f16 = fcmp uno <8 x half> undef, undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16f16 = fcmp uno <16 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8f16 = fcmp uno <8 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16f16 = fcmp uno <16 x half> undef, undef
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-FP16-LABEL: 'fcmp_uno_half'
@@ -912,7 +912,7 @@ define void @fcmp_uno_half(i32 %arg) {
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %v2f16 = fcmp uno <2 x half> undef, undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %v4f16 = fcmp uno <4 x half> undef, undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %v8f16 = fcmp uno <8 x half> undef, undef
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16f16 = fcmp uno <16 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of 2 for: %v16f16 = fcmp uno <16 x half> undef, undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %f16 = fcmp uno half undef, undef
@@ -928,8 +928,8 @@ define void @fcmp_uno_bfloat(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %bf16 = fcmp uno bfloat undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %v2bf16 = fcmp uno <2 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %v4bf16 = fcmp uno <4 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8bf16 = fcmp uno <8 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16bf16 = fcmp uno <16 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp uno <8 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16bf16 = fcmp uno <16 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %bf16 = fcmp uno bfloat undef, undef
@@ -945,12 +945,12 @@ define void @fcmp_true(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %f32 = fcmp true float undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f32 = fcmp true <2 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v4f32 = fcmp true <4 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v8f32 = fcmp true <8 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v8f32 = fcmp true <8 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %f64 = fcmp true double undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f64 = fcmp true <2 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4f64 = fcmp true <4 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v4f64 = fcmp true <4 x double> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %f128 = fcmp true fp128 undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v2f128 = fcmp true <2 x fp128> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v2f128 = fcmp true <2 x fp128> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %f32 = fcmp true float undef, undef
@@ -970,8 +970,8 @@ define void @fcmp_true_half(i32 %arg) {
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of 1 for: %f16 = fcmp true half undef, undef
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of 3 for: %v2f16 = fcmp true <2 x half> undef, undef
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of 4 for: %v4f16 = fcmp true <4 x half> undef, undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8f16 = fcmp true <8 x half> undef, undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16f16 = fcmp true <16 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8f16 = fcmp true <8 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16f16 = fcmp true <16 x half> undef, undef
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-FP16-LABEL: 'fcmp_true_half'
@@ -979,7 +979,7 @@ define void @fcmp_true_half(i32 %arg) {
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %v2f16 = fcmp true <2 x half> undef, undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %v4f16 = fcmp true <4 x half> undef, undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %v8f16 = fcmp true <8 x half> undef, undef
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16f16 = fcmp true <16 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of 2 for: %v16f16 = fcmp true <16 x half> undef, undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %f16 = fcmp true half undef, undef
@@ -995,8 +995,8 @@ define void @fcmp_true_bfloat(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %bf16 = fcmp true bfloat undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %v2bf16 = fcmp true <2 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %v4bf16 = fcmp true <4 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8bf16 = fcmp true <8 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16bf16 = fcmp true <16 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp true <8 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16bf16 = fcmp true <16 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %bf16 = fcmp true bfloat undef, undef
@@ -1012,12 +1012,12 @@ define void @fcmp_false(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %f32 = fcmp false float undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f32 = fcmp false <2 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v4f32 = fcmp false <4 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v8f32 = fcmp false <8 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v8f32 = fcmp false <8 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %f64 = fcmp false double undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f64 = fcmp false <2 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4f64 = fcmp false <4 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v4f64 = fcmp false <4 x double> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %f128 = fcmp false fp128 undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v2f128 = fcmp false <2 x fp128> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v2f128 = fcmp false <2 x fp128> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %f32 = fcmp false float undef, undef
@@ -1037,8 +1037,8 @@ define void @fcmp_false_half(i32 %arg) {
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of 1 for: %f16 = fcmp false half undef, undef
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of 3 for: %v2f16 = fcmp false <2 x half> undef, undef
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of 4 for: %v4f16 = fcmp false <4 x half> undef, undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8f16 = fcmp false <8 x half> undef, undef
-; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16f16 = fcmp false <16 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8f16 = fcmp false <8 x half> undef, undef
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16f16 = fcmp false <16 x half> undef, undef
 ; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-FP16-LABEL: 'fcmp_false_half'
@@ -1046,7 +1046,7 @@ define void @fcmp_false_half(i32 %arg) {
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %v2f16 = fcmp false <2 x half> undef, undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %v4f16 = fcmp false <4 x half> undef, undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of 1 for: %v8f16 = fcmp false <8 x half> undef, undef
-; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16f16 = fcmp false <16 x half> undef, undef
+; CHECK-FP16-NEXT:  Cost Model: Found costs of 2 for: %v16f16 = fcmp false <16 x half> undef, undef
 ; CHECK-FP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %f16 = fcmp false half undef, undef
@@ -1062,8 +1062,8 @@ define void @fcmp_false_bfloat(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %bf16 = fcmp false bfloat undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %v2bf16 = fcmp false <2 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %v4bf16 = fcmp false <4 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8bf16 = fcmp false <8 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16bf16 = fcmp false <16 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp false <8 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16bf16 = fcmp false <16 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %bf16 = fcmp false bfloat undef, undef
diff --git a/llvm/test/Analysis/CostModel/AArch64/fshl.ll b/llvm/test/Analysis/CostModel/AArch64/fshl.ll
index 59e86aefeeb6c..9d06b4bdec9b4 100644
--- a/llvm/test/Analysis/CostModel/AArch64/fshl.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/fshl.ll
@@ -236,7 +236,7 @@ declare <4 x i30> @llvm.fshl.v4i30(<4 x i30>, <4 x i30>, <4 x i30>)
 
 define <2 x i66> @fshl_v2i66_3rd_arg_vec_const_lanes_different(<2 x i66> %a, <2 x i66> %b) {
 ; CHECK-LABEL: 'fshl_v2i66_3rd_arg_vec_const_lanes_different'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:6 Lat:6 SizeLat:6 for: %fshl = tail call <2 x i66> @llvm.fshl.v2i66(<2 x i66> %a, <2 x i66> %b, <2 x i66> <i66 1, i66 2>)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:20 SizeLat:20 for: %fshl = tail call <2 x i66> @llvm.fshl.v2i66(<2 x i66> %a, <2 x i66> %b, <2 x i66> <i66 1, i66 2>)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i66> %fshl
 ;
 entry:
@@ -259,7 +259,7 @@ declare i66 @llvm.fshl.i66(i66, i66, i66)
 
 define <2 x i128> @fshl_v2i128_3rd_arg_vec_const_lanes_different(<2 x i128> %a, <2 x i128> %b) {
 ; CHECK-LABEL: 'fshl_v2i128_3rd_arg_vec_const_lanes_different'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:6 Lat:6 SizeLat:6 for: %fshl = tail call <2 x i128> @llvm.fshl.v2i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> <i128 1, i128 2>)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:20 SizeLat:20 for: %fshl = tail call <2 x i128> @llvm.fshl.v2i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> <i128 1, i128 2>)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i128> %fshl
 ;
 entry:
@@ -270,7 +270,7 @@ declare <2 x i128> @llvm.fshl.v4i128(<2 x i128>, <2 x i128>, <2 x i128>)
 
 define i128 @fshl_i128(i128 %a, i128 %b) {
 ; CHECK-LABEL: 'fshl_i128'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:6 Lat:6 SizeLat:6 for: %fshl = tail call i128 @llvm.fshl.i128(i128 %a, i128 %b, i128 9)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:8 Lat:8 SizeLat:8 for: %fshl = tail call i128 @llvm.fshl.i128(i128 %a, i128 %b, i128 9)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i128 %fshl
 ;
 entry:
diff --git a/llvm/test/Analysis/CostModel/AArch64/fshr.ll b/llvm/test/Analysis/CostModel/AArch64/fshr.ll
index 53cede900c398..b31806b647868 100644
--- a/llvm/test/Analysis/CostModel/AArch64/fshr.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/fshr.ll
@@ -236,7 +236,7 @@ declare <4 x i30> @llvm.fshr.v4i30(<4 x i30>, <4 x i30>, <4 x i30>)
 
 define <2 x i66> @fshr_v2i66_3rd_arg_vec_const_lanes_different(<2 x i66> %a, <2 x i66> %b) {
 ; CHECK-LABEL: 'fshr_v2i66_3rd_arg_vec_const_lanes_different'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:6 Lat:6 SizeLat:6 for: %fshr = tail call <2 x i66> @llvm.fshr.v2i66(<2 x i66> %a, <2 x i66> %b, <2 x i66> <i66 1, i66 2>)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:20 SizeLat:20 for: %fshr = tail call <2 x i66> @llvm.fshr.v2i66(<2 x i66> %a, <2 x i66> %b, <2 x i66> <i66 1, i66 2>)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i66> %fshr
 ;
 entry:
@@ -259,7 +259,7 @@ declare i66 @llvm.fshr.i66(i66, i66, i66)
 
 define <2 x i128> @fshr_v2i128_3rd_arg_vec_const_lanes_different(<2 x i128> %a, <2 x i128> %b) {
 ; CHECK-LABEL: 'fshr_v2i128_3rd_arg_vec_const_lanes_different'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:6 Lat:6 SizeLat:6 for: %fshr = tail call <2 x i128> @llvm.fshr.v2i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> <i128 1, i128 2>)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:32 CodeSize:16 Lat:20 SizeLat:20 for: %fshr = tail call <2 x i128> @llvm.fshr.v2i128(<2 x i128> %a, <2 x i128> %b, <2 x i128> <i128 1, i128 2>)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i128> %fshr
 ;
 entry:
@@ -270,7 +270,7 @@ declare <2 x i128> @llvm.fshr.v4i128(<2 x i128>, <2 x i128>, <2 x i128>)
 
 define i128 @fshr_i128(i128 %a, i128 %b) {
 ; CHECK-LABEL: 'fshr_i128'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:6 Lat:6 SizeLat:6 for: %fshr = tail call i128 @llvm.fshr.i128(i128 %a, i128 %b, i128 9)
+; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:8 Lat:8 SizeLat:8 for: %fshr = tail call i128 @llvm.fshr.i128(i128 %a, i128 %b, i128 9)
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i128 %fshr
 ;
 entry:
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-cmpsel.ll b/llvm/test/Analysis/CostModel/AArch64/sve-cmpsel.ll
index 9a0e5c3eb7964..83f62c8b44303 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-cmpsel.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-cmpsel.ll
@@ -22,7 +22,7 @@ define void @cmp_legal_int() {
 ; Check icmp for an illegal integer vector.
 define <vscale x 4 x i1> @cmp_nxv4i64() {
 ; CHECK-LABEL: 'cmp_nxv4i64'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %res = icmp ne <vscale x 4 x i64> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %res = icmp ne <vscale x 4 x i64> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <vscale x 4 x i1> %res
 ;
   %res = icmp ne <vscale x 4 x i64> undef, undef
@@ -48,7 +48,7 @@ define void @cmp_legal_pred() {
 ; Check icmp for an illegal predicate vector.
 define <vscale x 32 x i1> @cmp_nxv32i1() {
 ; CHECK-LABEL: 'cmp_nxv32i1'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %res = icmp ne <vscale x 32 x i1> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %res = icmp ne <vscale x 32 x i1> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <vscale x 32 x i1> %res
 ;
   %res = icmp ne <vscale x 32 x i1> undef, undef
@@ -74,7 +74,7 @@ define void @cmp_legal_fp() #0 {
 ; Check fcmp for an illegal FP vector
 define <vscale x 16 x i1> @cmp_nxv16f16() {
 ; CHECK-LABEL: 'cmp_nxv16f16'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %res = fcmp oge <vscale x 16 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %res = fcmp oge <vscale x 16 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <vscale x 16 x i1> %res
 ;
   %res = fcmp oge <vscale x 16 x half> undef, undef
@@ -100,7 +100,7 @@ define void @sel_legal_int() {
 ; Check select for an illegal integer vector
 define <vscale x 16 x i16> @sel_nxv16i16() {
 ; CHECK-LABEL: 'sel_nxv16i16'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %res = select <vscale x 16 x i1> undef, <vscale x 16 x i16> undef, <vscale x 16 x i16> undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %res = select <vscale x 16 x i1> undef, <vscale x 16 x i16> undef, <vscale x 16 x i16> undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <vscale x 16 x i16> %res
 ;
   %res = select <vscale x 16 x i1> undef, <vscale x 16 x i16> undef, <vscale x 16 x i16> undef
@@ -126,7 +126,7 @@ define void @sel_legal_fp() #0 {
 ; Check select for an illegal FP vector
 define <vscale x 8 x float> @sel_nxv8f32() {
 ; CHECK-LABEL: 'sel_nxv8f32'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %res = select <vscale x 8 x i1> undef, <vscale x 8 x float> undef, <vscale x 8 x float> undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %res = select <vscale x 8 x i1> undef, <vscale x 8 x float> undef, <vscale x 8 x float> undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <vscale x 8 x float> %res
 ;
   %res = select <vscale x 8 x i1> undef, <vscale x 8 x float> undef, <vscale x 8 x float> undef
@@ -152,7 +152,7 @@ define void @sel_legal_pred() {
 ; Check select for an illegal predicate vector
 define <vscale x 32 x i1> @sel_nxv32i1() {
 ; CHECK-LABEL: 'sel_nxv32i1'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %res = select <vscale x 32 x i1> undef, <vscale x 32 x i1> undef, <vscale x 32 x i1> undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %res = select <vscale x 32 x i1> undef, <vscale x 32 x i1> undef, <vscale x 32 x i1> undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <vscale x 32 x i1> %res
 ;
   %res = select <vscale x 32 x i1> undef, <vscale x 32 x i1> undef, <vscale x 32 x i1> undef
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-fcmp.ll b/llvm/test/Analysis/CostModel/AArch64/sve-fcmp.ll
index 7fdd6b4971658..9801d14bcc6cd 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-fcmp.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-fcmp.ll
@@ -5,13 +5,13 @@ define void @fcmp_oeq(i32 %arg) {
 ; CHECK-LABEL: 'fcmp_oeq'
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f32 = fcmp oeq <2 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v4f32 = fcmp oeq <4 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v8f32 = fcmp oeq <8 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v8f32 = fcmp oeq <8 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f64 = fcmp oeq <2 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4f64 = fcmp oeq <4 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v4f64 = fcmp oeq <4 x double> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f16 = fcmp oeq <2 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v4f16 = fcmp oeq <4 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v8f16 = fcmp oeq <8 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16f16 = fcmp oeq <16 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v16f16 = fcmp oeq <16 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v2f32 = fcmp oeq <2 x float> undef, undef
@@ -30,7 +30,7 @@ define void @fcmp_oeq_bfloat(i32 %arg) {
 ; CHECK-LABEL: 'fcmp_oeq_bfloat'
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %v2bf16 = fcmp oeq <2 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %v4bf16 = fcmp oeq <4 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8bf16 = fcmp oeq <8 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp oeq <8 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v2bf16 = fcmp oeq <2 x bfloat> undef, undef
@@ -43,13 +43,13 @@ define void @fcmp_ogt(i32 %arg) {
 ; CHECK-LABEL: 'fcmp_ogt'
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f32 = fcmp ogt <2 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v4f32 = fcmp ogt <4 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v8f32 = fcmp ogt <8 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v8f32 = fcmp ogt <8 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f64 = fcmp ogt <2 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4f64 = fcmp ogt <4 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v4f64 = fcmp ogt <4 x double> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f16 = fcmp ogt <2 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v4f16 = fcmp ogt <4 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v8f16 = fcmp ogt <8 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16f16 = fcmp ogt <16 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v16f16 = fcmp ogt <16 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v2f32 = fcmp ogt <2 x float> undef, undef
@@ -68,7 +68,7 @@ define void @fcmp_ogt_bfloat(i32 %arg) {
 ; CHECK-LABEL: 'fcmp_ogt_bfloat'
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %v2bf16 = fcmp ogt <2 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %v4bf16 = fcmp ogt <4 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8bf16 = fcmp ogt <8 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp ogt <8 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v2bf16 = fcmp ogt <2 x bfloat> undef, undef
@@ -81,13 +81,13 @@ define void @fcmp_oge(i32 %arg) {
 ; CHECK-LABEL: 'fcmp_oge'
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f32 = fcmp oge <2 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v4f32 = fcmp oge <4 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v8f32 = fcmp oge <8 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v8f32 = fcmp oge <8 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f64 = fcmp oge <2 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4f64 = fcmp oge <4 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v4f64 = fcmp oge <4 x double> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f16 = fcmp oge <2 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v4f16 = fcmp oge <4 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v8f16 = fcmp oge <8 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16f16 = fcmp oge <16 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v16f16 = fcmp oge <16 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v2f32 = fcmp oge <2 x float> undef, undef
@@ -106,7 +106,7 @@ define void @fcmp_oge_bfloat(i32 %arg) {
 ; CHECK-LABEL: 'fcmp_oge_bfloat'
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %v2bf16 = fcmp oge <2 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %v4bf16 = fcmp oge <4 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8bf16 = fcmp oge <8 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp oge <8 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v2bf16 = fcmp oge <2 x bfloat> undef, undef
@@ -119,13 +119,13 @@ define void @fcmp_olt(i32 %arg) {
 ; CHECK-LABEL: 'fcmp_olt'
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f32 = fcmp olt <2 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v4f32 = fcmp olt <4 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v8f32 = fcmp olt <8 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v8f32 = fcmp olt <8 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f64 = fcmp olt <2 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4f64 = fcmp olt <4 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v4f64 = fcmp olt <4 x double> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f16 = fcmp olt <2 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v4f16 = fcmp olt <4 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v8f16 = fcmp olt <8 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16f16 = fcmp olt <16 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v16f16 = fcmp olt <16 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v2f32 = fcmp olt <2 x float> undef, undef
@@ -144,7 +144,7 @@ define void @fcmp_olt_bfloat(i32 %arg) {
 ; CHECK-LABEL: 'fcmp_olt_bfloat'
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %v2bf16 = fcmp olt <2 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %v4bf16 = fcmp olt <4 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8bf16 = fcmp olt <8 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp olt <8 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v2bf16 = fcmp olt <2 x bfloat> undef, undef
@@ -157,13 +157,13 @@ define void @fcmp_ole(i32 %arg) {
 ; CHECK-LABEL: 'fcmp_ole'
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f32 = fcmp ole <2 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v4f32 = fcmp ole <4 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v8f32 = fcmp ole <8 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v8f32 = fcmp ole <8 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f64 = fcmp ole <2 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4f64 = fcmp ole <4 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v4f64 = fcmp ole <4 x double> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f16 = fcmp ole <2 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v4f16 = fcmp ole <4 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v8f16 = fcmp ole <8 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16f16 = fcmp ole <16 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v16f16 = fcmp ole <16 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v2f32 = fcmp ole <2 x float> undef, undef
@@ -182,7 +182,7 @@ define void @fcmp_ole_bfloat(i32 %arg) {
 ; CHECK-LABEL: 'fcmp_ole_bfloat'
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %v2bf16 = fcmp ole <2 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %v4bf16 = fcmp ole <4 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8bf16 = fcmp ole <8 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp ole <8 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v2bf16 = fcmp ole <2 x bfloat> undef, undef
@@ -195,13 +195,13 @@ define void @fcmp_one(i32 %arg) {
 ; CHECK-LABEL: 'fcmp_one'
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f32 = fcmp one <2 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v4f32 = fcmp one <4 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v8f32 = fcmp one <8 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v8f32 = fcmp one <8 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f64 = fcmp one <2 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4f64 = fcmp one <4 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v4f64 = fcmp one <4 x double> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f16 = fcmp one <2 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v4f16 = fcmp one <4 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v8f16 = fcmp one <8 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16f16 = fcmp one <16 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v16f16 = fcmp one <16 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v2f32 = fcmp one <2 x float> undef, undef
@@ -220,7 +220,7 @@ define void @fcmp_one_bfloat(i32 %arg) {
 ; CHECK-LABEL: 'fcmp_one_bfloat'
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %v2bf16 = fcmp one <2 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %v4bf16 = fcmp one <4 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8bf16 = fcmp one <8 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp one <8 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v2bf16 = fcmp one <2 x bfloat> undef, undef
@@ -233,13 +233,13 @@ define void @fcmp_ord(i32 %arg) {
 ; CHECK-LABEL: 'fcmp_ord'
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f32 = fcmp ord <2 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v4f32 = fcmp ord <4 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v8f32 = fcmp ord <8 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v8f32 = fcmp ord <8 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f64 = fcmp ord <2 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4f64 = fcmp ord <4 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v4f64 = fcmp ord <4 x double> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f16 = fcmp ord <2 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v4f16 = fcmp ord <4 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v8f16 = fcmp ord <8 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16f16 = fcmp ord <16 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v16f16 = fcmp ord <16 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v2f32 = fcmp ord <2 x float> undef, undef
@@ -258,7 +258,7 @@ define void @fcmp_ord_bfloat(i32 %arg) {
 ; CHECK-LABEL: 'fcmp_ord_bfloat'
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %v2bf16 = fcmp ord <2 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %v4bf16 = fcmp ord <4 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8bf16 = fcmp ord <8 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp ord <8 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v2bf16 = fcmp ord <2 x bfloat> undef, undef
@@ -271,13 +271,13 @@ define void @fcmp_ueq(i32 %arg) {
 ; CHECK-LABEL: 'fcmp_ueq'
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f32 = fcmp ueq <2 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v4f32 = fcmp ueq <4 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v8f32 = fcmp ueq <8 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v8f32 = fcmp ueq <8 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f64 = fcmp ueq <2 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4f64 = fcmp ueq <4 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v4f64 = fcmp ueq <4 x double> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f16 = fcmp ueq <2 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v4f16 = fcmp ueq <4 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v8f16 = fcmp ueq <8 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16f16 = fcmp ueq <16 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v16f16 = fcmp ueq <16 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v2f32 = fcmp ueq <2 x float> undef, undef
@@ -296,7 +296,7 @@ define void @fcmp_ueq_bfloat(i32 %arg) {
 ; CHECK-LABEL: 'fcmp_ueq_bfloat'
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %v2bf16 = fcmp ueq <2 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %v4bf16 = fcmp ueq <4 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8bf16 = fcmp ueq <8 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp ueq <8 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v2bf16 = fcmp ueq <2 x bfloat> undef, undef
@@ -309,13 +309,13 @@ define void @fcmp_ugt(i32 %arg) {
 ; CHECK-LABEL: 'fcmp_ugt'
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f32 = fcmp ugt <2 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v4f32 = fcmp ugt <4 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v8f32 = fcmp ugt <8 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v8f32 = fcmp ugt <8 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f64 = fcmp ugt <2 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4f64 = fcmp ugt <4 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v4f64 = fcmp ugt <4 x double> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f16 = fcmp ugt <2 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v4f16 = fcmp ugt <4 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v8f16 = fcmp ugt <8 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16f16 = fcmp ugt <16 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v16f16 = fcmp ugt <16 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v2f32 = fcmp ugt <2 x float> undef, undef
@@ -334,7 +334,7 @@ define void @fcmp_ugt_bfloat(i32 %arg) {
 ; CHECK-LABEL: 'fcmp_ugt_bfloat'
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %v2bf16 = fcmp ugt <2 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %v4bf16 = fcmp ugt <4 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8bf16 = fcmp ugt <8 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp ugt <8 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v2bf16 = fcmp ugt <2 x bfloat> undef, undef
@@ -347,13 +347,13 @@ define void @fcmp_uge(i32 %arg) {
 ; CHECK-LABEL: 'fcmp_uge'
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f32 = fcmp uge <2 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v4f32 = fcmp uge <4 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v8f32 = fcmp uge <8 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v8f32 = fcmp uge <8 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f64 = fcmp uge <2 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4f64 = fcmp uge <4 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v4f64 = fcmp uge <4 x double> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f16 = fcmp uge <2 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v4f16 = fcmp uge <4 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v8f16 = fcmp uge <8 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16f16 = fcmp uge <16 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v16f16 = fcmp uge <16 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v2f32 = fcmp uge <2 x float> undef, undef
@@ -372,7 +372,7 @@ define void @fcmp_uge_bfloat(i32 %arg) {
 ; CHECK-LABEL: 'fcmp_uge_bfloat'
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %v2bf16 = fcmp uge <2 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %v4bf16 = fcmp uge <4 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8bf16 = fcmp uge <8 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp uge <8 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v2bf16 = fcmp uge <2 x bfloat> undef, undef
@@ -385,13 +385,13 @@ define void @fcmp_ult(i32 %arg) {
 ; CHECK-LABEL: 'fcmp_ult'
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f32 = fcmp ult <2 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v4f32 = fcmp ult <4 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v8f32 = fcmp ult <8 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v8f32 = fcmp ult <8 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f64 = fcmp ult <2 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4f64 = fcmp ult <4 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v4f64 = fcmp ult <4 x double> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f16 = fcmp ult <2 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v4f16 = fcmp ult <4 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v8f16 = fcmp ult <8 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16f16 = fcmp ult <16 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v16f16 = fcmp ult <16 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v2f32 = fcmp ult <2 x float> undef, undef
@@ -410,7 +410,7 @@ define void @fcmp_ult_bfloat(i32 %arg) {
 ; CHECK-LABEL: 'fcmp_ult_bfloat'
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %v2bf16 = fcmp ult <2 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %v4bf16 = fcmp ult <4 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8bf16 = fcmp ult <8 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp ult <8 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v2bf16 = fcmp ult <2 x bfloat> undef, undef
@@ -423,13 +423,13 @@ define void @fcmp_ule(i32 %arg) {
 ; CHECK-LABEL: 'fcmp_ule'
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f32 = fcmp ule <2 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v4f32 = fcmp ule <4 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v8f32 = fcmp ule <8 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v8f32 = fcmp ule <8 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f64 = fcmp ule <2 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4f64 = fcmp ule <4 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v4f64 = fcmp ule <4 x double> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f16 = fcmp ule <2 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v4f16 = fcmp ule <4 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v8f16 = fcmp ule <8 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16f16 = fcmp ule <16 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v16f16 = fcmp ule <16 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v2f32 = fcmp ule <2 x float> undef, undef
@@ -448,8 +448,8 @@ define void @fcmp_ule_bfloat(i32 %arg) {
 ; CHECK-LABEL: 'fcmp_ule_bfloat'
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %v2bf16 = fcmp ule <2 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %v4bf16 = fcmp ule <4 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8bf16 = fcmp ule <8 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16bf16 = fcmp ule <16 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp ule <8 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16bf16 = fcmp ule <16 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v2bf16 = fcmp ule <2 x bfloat> undef, undef
@@ -463,13 +463,13 @@ define void @fcmp_une(i32 %arg) {
 ; CHECK-LABEL: 'fcmp_une'
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f32 = fcmp une <2 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v4f32 = fcmp une <4 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v8f32 = fcmp une <8 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v8f32 = fcmp une <8 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f64 = fcmp une <2 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4f64 = fcmp une <4 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v4f64 = fcmp une <4 x double> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f16 = fcmp une <2 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v4f16 = fcmp une <4 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v8f16 = fcmp une <8 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16f16 = fcmp une <16 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v16f16 = fcmp une <16 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v2f32 = fcmp une <2 x float> undef, undef
@@ -488,8 +488,8 @@ define void @fcmp_une_bfloat(i32 %arg) {
 ; CHECK-LABEL: 'fcmp_une_bfloat'
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %v2bf16 = fcmp une <2 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %v4bf16 = fcmp une <4 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8bf16 = fcmp une <8 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16bf16 = fcmp une <16 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp une <8 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16bf16 = fcmp une <16 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v2bf16 = fcmp une <2 x bfloat> undef, undef
@@ -503,13 +503,13 @@ define void @fcmp_uno(i32 %arg) {
 ; CHECK-LABEL: 'fcmp_uno'
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f32 = fcmp uno <2 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v4f32 = fcmp uno <4 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v8f32 = fcmp uno <8 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v8f32 = fcmp uno <8 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f64 = fcmp uno <2 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4f64 = fcmp uno <4 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v4f64 = fcmp uno <4 x double> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f16 = fcmp uno <2 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v4f16 = fcmp uno <4 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v8f16 = fcmp uno <8 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16f16 = fcmp uno <16 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v16f16 = fcmp uno <16 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v2f32 = fcmp uno <2 x float> undef, undef
@@ -528,8 +528,8 @@ define void @fcmp_uno_bfloat(i32 %arg) {
 ; CHECK-LABEL: 'fcmp_uno_bfloat'
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %v2bf16 = fcmp uno <2 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %v4bf16 = fcmp uno <4 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8bf16 = fcmp uno <8 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16bf16 = fcmp uno <16 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp uno <8 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16bf16 = fcmp uno <16 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v2bf16 = fcmp uno <2 x bfloat> undef, undef
@@ -543,13 +543,13 @@ define void @fcmp_true(i32 %arg) {
 ; CHECK-LABEL: 'fcmp_true'
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f32 = fcmp true <2 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v4f32 = fcmp true <4 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v8f32 = fcmp true <8 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v8f32 = fcmp true <8 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f64 = fcmp true <2 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4f64 = fcmp true <4 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v4f64 = fcmp true <4 x double> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f16 = fcmp true <2 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v4f16 = fcmp true <4 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v8f16 = fcmp true <8 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16f16 = fcmp true <16 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v16f16 = fcmp true <16 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v2f32 = fcmp true <2 x float> undef, undef
@@ -568,8 +568,8 @@ define void @fcmp_true_bfloat(i32 %arg) {
 ; CHECK-LABEL: 'fcmp_true_bfloat'
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %v2bf16 = fcmp true <2 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %v4bf16 = fcmp true <4 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8bf16 = fcmp true <8 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16bf16 = fcmp true <16 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp true <8 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16bf16 = fcmp true <16 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v2bf16 = fcmp true <2 x bfloat> undef, undef
@@ -583,13 +583,13 @@ define void @fcmp_false(i32 %arg) {
 ; CHECK-LABEL: 'fcmp_false'
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f32 = fcmp false <2 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v4f32 = fcmp false <4 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v8f32 = fcmp false <8 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v8f32 = fcmp false <8 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f64 = fcmp false <2 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v4f64 = fcmp false <4 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v4f64 = fcmp false <4 x double> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v2f16 = fcmp false <2 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v4f16 = fcmp false <4 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 1 for: %v8f16 = fcmp false <8 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %v16f16 = fcmp false <16 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of 2 for: %v16f16 = fcmp false <16 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v2f32 = fcmp false <2 x float> undef, undef
@@ -608,8 +608,8 @@ define void @fcmp_false_bfloat(i32 %arg) {
 ; CHECK-LABEL: 'fcmp_false_bfloat'
 ; CHECK-NEXT:  Cost Model: Found costs of 3 for: %v2bf16 = fcmp false <2 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of 4 for: %v4bf16 = fcmp false <4 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %v8bf16 = fcmp false <8 x bfloat> undef, undef
-; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:4 Lat:4 SizeLat:4 for: %v16bf16 = fcmp false <16 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %v8bf16 = fcmp false <8 x bfloat> undef, undef
+; CHECK-NEXT:  Cost Model: Found costs of RThru:14 CodeSize:7 Lat:7 SizeLat:7 for: %v16bf16 = fcmp false <16 x bfloat> undef, undef
 ; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %v2bf16 = fcmp false <2 x bfloat> undef, undef
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
index ec84c58bf9681..16e10587dbb6d 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
@@ -930,8 +930,8 @@ define void @get_lane_mask() #0 {
 ; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of 16 for: %mask_v8i1_i32 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 undef, i32 undef)
 ; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of 8 for: %mask_v4i1_i32 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 undef, i32 undef)
 ; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of 4 for: %mask_v2i1_i32 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 undef, i32 undef)
-; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:33 Lat:33 SizeLat:33 for: %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 undef, i64 undef)
-; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:5 Lat:5 SizeLat:5 for: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 undef, i16 undef)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of 48 for: %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 undef, i64 undef)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of 6 for: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 undef, i16 undef)
 ; CHECK-VSCALE-1-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; CHECK-VSCALE-2-LABEL: 'get_lane_mask'
@@ -953,8 +953,8 @@ define void @get_lane_mask() #0 {
 ; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of 16 for: %mask_v8i1_i32 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 undef, i32 undef)
 ; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of 8 for: %mask_v4i1_i32 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 undef, i32 undef)
 ; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of 4 for: %mask_v2i1_i32 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 undef, i32 undef)
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:33 Lat:33 SizeLat:33 for: %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 undef, i64 undef)
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:5 Lat:5 SizeLat:5 for: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 undef, i16 undef)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of 48 for: %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 undef, i64 undef)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of 6 for: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 undef, i16 undef)
 ; CHECK-VSCALE-2-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
 ; TYPE_BASED_ONLY-LABEL: 'get_lane_mask'
@@ -976,8 +976,8 @@ define void @get_lane_mask() #0 {
 ; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of 16 for: %mask_v8i1_i32 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 undef, i32 undef)
 ; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of 8 for: %mask_v4i1_i32 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 undef, i32 undef)
 ; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of 4 for: %mask_v2i1_i32 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 undef, i32 undef)
-; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of RThru:48 CodeSize:33 Lat:33 SizeLat:33 for: %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 undef, i64 undef)
-; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:5 Lat:5 SizeLat:5 for: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 undef, i16 undef)
+; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of 48 for: %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 undef, i64 undef)
+; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of 6 for: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 undef, i16 undef)
 ; TYPE_BASED_ONLY-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %mask_nxv16i1_i64 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 undef, i64 undef)
diff --git a/llvm/test/Analysis/CostModel/AArch64/vector-select.ll b/llvm/test/Analysis/CostModel/AArch64/vector-select.ll
index a317d6da4028d..29f7fc938e62e 100644
--- a/llvm/test/Analysis/CostModel/AArch64/vector-select.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/vector-select.ll
@@ -83,7 +83,7 @@ define <2 x i64> @v2i64_select_sle(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) {
 
 define <3 x i64> @v3i64_select_sle(<3 x i64> %a, <3 x i64> %b, <3 x i64> %c) {
 ; COST-LABEL: 'v3i64_select_sle'
-; COST-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = icmp sle <3 x i64> %a, %b
+; COST-NEXT:  Cost Model: Found costs of 2 for: %cmp.1 = icmp sle <3 x i64> %a, %b
 ; COST-NEXT:  Cost Model: Found costs of 2 for: %s.1 = select <3 x i1> %cmp.1, <3 x i64> %a, <3 x i64> %c
 ; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <3 x i64> %s.1
 ;
@@ -94,7 +94,7 @@ define <3 x i64> @v3i64_select_sle(<3 x i64> %a, <3 x i64> %b, <3 x i64> %c) {
 
 define <2 x i64> @v2i64_select_no_cmp(<2 x i64> %a, <2 x i64> %b, <2 x i1> %cond) {
 ; COST-LABEL: 'v2i64_select_no_cmp'
-; COST-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <2 x i1> %cond, <2 x i64> %a, <2 x i64> %b
+; COST-NEXT:  Cost Model: Found costs of RThru:6 CodeSize:4 Lat:6 SizeLat:6 for: %s.1 = select <2 x i1> %cond, <2 x i64> %a, <2 x i64> %b
 ; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %s.1
 ;
   %s.1 = select <2 x i1> %cond, <2 x i64> %a, <2 x i64> %b
@@ -119,7 +119,7 @@ define <4 x half> @v4f16_select_ogt(<4 x half> %a, <4 x half> %b, <4 x half> %c)
 
 define <8 x half> @v8f16_select_ogt(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
 ; COST-NOFP16-LABEL: 'v8f16_select_ogt'
-; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %cmp.1 = fcmp ogt <8 x half> %a, %b
+; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %cmp.1 = fcmp ogt <8 x half> %a, %b
 ; COST-NOFP16-NEXT:  Cost Model: Found costs of 2 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
 ; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x half> %s.1
 ;
@@ -169,7 +169,7 @@ define <2 x double> @v2f64_select_ogt(<2 x double> %a, <2 x double> %b, <2 x dou
 define <4 x bfloat> @v4bf16_select_ogt(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bfloat> %c) {
 ; COST-LABEL: 'v4bf16_select_ogt'
 ; COST-NEXT:  Cost Model: Found costs of 4 for: %cmp.1 = fcmp ogt <4 x bfloat> %a, %b
-; COST-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <4 x i1> %cmp.1, <4 x bfloat> %a, <4 x bfloat> %c
+; COST-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:7 Lat:10 SizeLat:10 for: %s.1 = select <4 x i1> %cmp.1, <4 x bfloat> %a, <4 x bfloat> %c
 ; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x bfloat> %s.1
 ;
   %cmp.1 = fcmp ogt <4 x bfloat> %a, %b
@@ -179,8 +179,8 @@ define <4 x bfloat> @v4bf16_select_ogt(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bf
 
 define <8 x bfloat> @v8bf16_select_ogt(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bfloat> %c) {
 ; COST-LABEL: 'v8bf16_select_ogt'
-; COST-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %cmp.1 = fcmp ogt <8 x bfloat> %a, %b
-; COST-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x bfloat> %a, <8 x bfloat> %c
+; COST-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %cmp.1 = fcmp ogt <8 x bfloat> %a, %b
+; COST-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:15 Lat:22 SizeLat:22 for: %s.1 = select <8 x i1> %cmp.1, <8 x bfloat> %a, <8 x bfloat> %c
 ; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x bfloat> %s.1
 ;
   %cmp.1 = fcmp ogt <8 x bfloat> %a, %b
@@ -206,7 +206,7 @@ define <4 x half> @v4f16_select_oge(<4 x half> %a, <4 x half> %b, <4 x half> %c)
 
 define <8 x half> @v8f16_select_oge(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
 ; COST-NOFP16-LABEL: 'v8f16_select_oge'
-; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %cmp.1 = fcmp oge <8 x half> %a, %b
+; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %cmp.1 = fcmp oge <8 x half> %a, %b
 ; COST-NOFP16-NEXT:  Cost Model: Found costs of 2 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
 ; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x half> %s.1
 ;
@@ -256,7 +256,7 @@ define <2 x double> @v2f64_select_oge(<2 x double> %a, <2 x double> %b, <2 x dou
 define <4 x bfloat> @v4bf16_select_oge(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bfloat> %c) {
 ; COST-LABEL: 'v4bf16_select_oge'
 ; COST-NEXT:  Cost Model: Found costs of 4 for: %cmp.1 = fcmp oge <4 x bfloat> %a, %b
-; COST-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <4 x i1> %cmp.1, <4 x bfloat> %a, <4 x bfloat> %c
+; COST-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:7 Lat:10 SizeLat:10 for: %s.1 = select <4 x i1> %cmp.1, <4 x bfloat> %a, <4 x bfloat> %c
 ; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x bfloat> %s.1
 ;
   %cmp.1 = fcmp oge <4 x bfloat> %a, %b
@@ -266,8 +266,8 @@ define <4 x bfloat> @v4bf16_select_oge(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bf
 
 define <8 x bfloat> @v8bf16_select_oge(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bfloat> %c) {
 ; COST-LABEL: 'v8bf16_select_oge'
-; COST-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %cmp.1 = fcmp oge <8 x bfloat> %a, %b
-; COST-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x bfloat> %a, <8 x bfloat> %c
+; COST-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %cmp.1 = fcmp oge <8 x bfloat> %a, %b
+; COST-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:15 Lat:22 SizeLat:22 for: %s.1 = select <8 x i1> %cmp.1, <8 x bfloat> %a, <8 x bfloat> %c
 ; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x bfloat> %s.1
 ;
   %cmp.1 = fcmp oge <8 x bfloat> %a, %b
@@ -293,7 +293,7 @@ define <4 x half> @v4f16_select_olt(<4 x half> %a, <4 x half> %b, <4 x half> %c)
 
 define <8 x half> @v8f16_select_olt(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
 ; COST-NOFP16-LABEL: 'v8f16_select_olt'
-; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %cmp.1 = fcmp olt <8 x half> %a, %b
+; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %cmp.1 = fcmp olt <8 x half> %a, %b
 ; COST-NOFP16-NEXT:  Cost Model: Found costs of 2 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
 ; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x half> %s.1
 ;
@@ -343,7 +343,7 @@ define <2 x double> @v2f64_select_olt(<2 x double> %a, <2 x double> %b, <2 x dou
 define <4 x bfloat> @v4bf16_select_olt(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bfloat> %c) {
 ; COST-LABEL: 'v4bf16_select_olt'
 ; COST-NEXT:  Cost Model: Found costs of 4 for: %cmp.1 = fcmp olt <4 x bfloat> %a, %b
-; COST-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <4 x i1> %cmp.1, <4 x bfloat> %a, <4 x bfloat> %c
+; COST-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:7 Lat:10 SizeLat:10 for: %s.1 = select <4 x i1> %cmp.1, <4 x bfloat> %a, <4 x bfloat> %c
 ; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x bfloat> %s.1
 ;
   %cmp.1 = fcmp olt <4 x bfloat> %a, %b
@@ -353,8 +353,8 @@ define <4 x bfloat> @v4bf16_select_olt(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bf
 
 define <8 x bfloat> @v8bf16_select_olt(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bfloat> %c) {
 ; COST-LABEL: 'v8bf16_select_olt'
-; COST-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %cmp.1 = fcmp olt <8 x bfloat> %a, %b
-; COST-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x bfloat> %a, <8 x bfloat> %c
+; COST-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %cmp.1 = fcmp olt <8 x bfloat> %a, %b
+; COST-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:15 Lat:22 SizeLat:22 for: %s.1 = select <8 x i1> %cmp.1, <8 x bfloat> %a, <8 x bfloat> %c
 ; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x bfloat> %s.1
 ;
   %cmp.1 = fcmp olt <8 x bfloat> %a, %b
@@ -380,7 +380,7 @@ define <4 x half> @v4f16_select_ole(<4 x half> %a, <4 x half> %b, <4 x half> %c)
 
 define <8 x half> @v8f16_select_ole(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
 ; COST-NOFP16-LABEL: 'v8f16_select_ole'
-; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %cmp.1 = fcmp ole <8 x half> %a, %b
+; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %cmp.1 = fcmp ole <8 x half> %a, %b
 ; COST-NOFP16-NEXT:  Cost Model: Found costs of 2 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
 ; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x half> %s.1
 ;
@@ -430,7 +430,7 @@ define <2 x double> @v2f64_select_ole(<2 x double> %a, <2 x double> %b, <2 x dou
 define <4 x bfloat> @v4bf16_select_ole(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bfloat> %c) {
 ; COST-LABEL: 'v4bf16_select_ole'
 ; COST-NEXT:  Cost Model: Found costs of 4 for: %cmp.1 = fcmp ole <4 x bfloat> %a, %b
-; COST-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <4 x i1> %cmp.1, <4 x bfloat> %a, <4 x bfloat> %c
+; COST-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:7 Lat:10 SizeLat:10 for: %s.1 = select <4 x i1> %cmp.1, <4 x bfloat> %a, <4 x bfloat> %c
 ; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x bfloat> %s.1
 ;
   %cmp.1 = fcmp ole <4 x bfloat> %a, %b
@@ -440,8 +440,8 @@ define <4 x bfloat> @v4bf16_select_ole(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bf
 
 define <8 x bfloat> @v8bf16_select_ole(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bfloat> %c) {
 ; COST-LABEL: 'v8bf16_select_ole'
-; COST-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %cmp.1 = fcmp ole <8 x bfloat> %a, %b
-; COST-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x bfloat> %a, <8 x bfloat> %c
+; COST-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %cmp.1 = fcmp ole <8 x bfloat> %a, %b
+; COST-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:15 Lat:22 SizeLat:22 for: %s.1 = select <8 x i1> %cmp.1, <8 x bfloat> %a, <8 x bfloat> %c
 ; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x bfloat> %s.1
 ;
   %cmp.1 = fcmp ole <8 x bfloat> %a, %b
@@ -467,7 +467,7 @@ define <4 x half> @v4f16_select_oeq(<4 x half> %a, <4 x half> %b, <4 x half> %c)
 
 define <8 x half> @v8f16_select_oeq(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
 ; COST-NOFP16-LABEL: 'v8f16_select_oeq'
-; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %cmp.1 = fcmp oeq <8 x half> %a, %b
+; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %cmp.1 = fcmp oeq <8 x half> %a, %b
 ; COST-NOFP16-NEXT:  Cost Model: Found costs of 2 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
 ; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x half> %s.1
 ;
@@ -517,7 +517,7 @@ define <2 x double> @v2f64_select_oeq(<2 x double> %a, <2 x double> %b, <2 x dou
 define <4 x bfloat> @v4bf16_select_oeq(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bfloat> %c) {
 ; COST-LABEL: 'v4bf16_select_oeq'
 ; COST-NEXT:  Cost Model: Found costs of 4 for: %cmp.1 = fcmp oeq <4 x bfloat> %a, %b
-; COST-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <4 x i1> %cmp.1, <4 x bfloat> %a, <4 x bfloat> %c
+; COST-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:7 Lat:10 SizeLat:10 for: %s.1 = select <4 x i1> %cmp.1, <4 x bfloat> %a, <4 x bfloat> %c
 ; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x bfloat> %s.1
 ;
   %cmp.1 = fcmp oeq <4 x bfloat> %a, %b
@@ -527,8 +527,8 @@ define <4 x bfloat> @v4bf16_select_oeq(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bf
 
 define <8 x bfloat> @v8bf16_select_oeq(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bfloat> %c) {
 ; COST-LABEL: 'v8bf16_select_oeq'
-; COST-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %cmp.1 = fcmp oeq <8 x bfloat> %a, %b
-; COST-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x bfloat> %a, <8 x bfloat> %c
+; COST-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %cmp.1 = fcmp oeq <8 x bfloat> %a, %b
+; COST-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:15 Lat:22 SizeLat:22 for: %s.1 = select <8 x i1> %cmp.1, <8 x bfloat> %a, <8 x bfloat> %c
 ; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x bfloat> %s.1
 ;
   %cmp.1 = fcmp oeq <8 x bfloat> %a, %b
@@ -554,7 +554,7 @@ define <4 x half> @v4f16_select_one(<4 x half> %a, <4 x half> %b, <4 x half> %c)
 
 define <8 x half> @v8f16_select_one(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
 ; COST-NOFP16-LABEL: 'v8f16_select_one'
-; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %cmp.1 = fcmp one <8 x half> %a, %b
+; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %cmp.1 = fcmp one <8 x half> %a, %b
 ; COST-NOFP16-NEXT:  Cost Model: Found costs of 2 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
 ; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x half> %s.1
 ;
@@ -604,7 +604,7 @@ define <2 x double> @v2f64_select_one(<2 x double> %a, <2 x double> %b, <2 x dou
 define <4 x bfloat> @v4bf16_select_one(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bfloat> %c) {
 ; COST-LABEL: 'v4bf16_select_one'
 ; COST-NEXT:  Cost Model: Found costs of 4 for: %cmp.1 = fcmp one <4 x bfloat> %a, %b
-; COST-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <4 x i1> %cmp.1, <4 x bfloat> %a, <4 x bfloat> %c
+; COST-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:7 Lat:10 SizeLat:10 for: %s.1 = select <4 x i1> %cmp.1, <4 x bfloat> %a, <4 x bfloat> %c
 ; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x bfloat> %s.1
 ;
   %cmp.1 = fcmp one <4 x bfloat> %a, %b
@@ -614,8 +614,8 @@ define <4 x bfloat> @v4bf16_select_one(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bf
 
 define <8 x bfloat> @v8bf16_select_one(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bfloat> %c) {
 ; COST-LABEL: 'v8bf16_select_one'
-; COST-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %cmp.1 = fcmp one <8 x bfloat> %a, %b
-; COST-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x bfloat> %a, <8 x bfloat> %c
+; COST-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %cmp.1 = fcmp one <8 x bfloat> %a, %b
+; COST-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:15 Lat:22 SizeLat:22 for: %s.1 = select <8 x i1> %cmp.1, <8 x bfloat> %a, <8 x bfloat> %c
 ; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x bfloat> %s.1
 ;
   %cmp.1 = fcmp one <8 x bfloat> %a, %b
@@ -641,7 +641,7 @@ define <4 x half> @v4f16_select_une(<4 x half> %a, <4 x half> %b, <4 x half> %c)
 
 define <8 x half> @v8f16_select_une(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
 ; COST-NOFP16-LABEL: 'v8f16_select_une'
-; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %cmp.1 = fcmp une <8 x half> %a, %b
+; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %cmp.1 = fcmp une <8 x half> %a, %b
 ; COST-NOFP16-NEXT:  Cost Model: Found costs of 2 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
 ; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x half> %s.1
 ;
@@ -691,7 +691,7 @@ define <2 x double> @v2f64_select_une(<2 x double> %a, <2 x double> %b, <2 x dou
 define <4 x bfloat> @v4bf16_select_une(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bfloat> %c) {
 ; COST-LABEL: 'v4bf16_select_une'
 ; COST-NEXT:  Cost Model: Found costs of 4 for: %cmp.1 = fcmp une <4 x bfloat> %a, %b
-; COST-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <4 x i1> %cmp.1, <4 x bfloat> %a, <4 x bfloat> %c
+; COST-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:7 Lat:10 SizeLat:10 for: %s.1 = select <4 x i1> %cmp.1, <4 x bfloat> %a, <4 x bfloat> %c
 ; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x bfloat> %s.1
 ;
   %cmp.1 = fcmp une <4 x bfloat> %a, %b
@@ -701,8 +701,8 @@ define <4 x bfloat> @v4bf16_select_une(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bf
 
 define <8 x bfloat> @v8bf16_select_une(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bfloat> %c) {
 ; COST-LABEL: 'v8bf16_select_une'
-; COST-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %cmp.1 = fcmp une <8 x bfloat> %a, %b
-; COST-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x bfloat> %a, <8 x bfloat> %c
+; COST-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %cmp.1 = fcmp une <8 x bfloat> %a, %b
+; COST-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:15 Lat:22 SizeLat:22 for: %s.1 = select <8 x i1> %cmp.1, <8 x bfloat> %a, <8 x bfloat> %c
 ; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x bfloat> %s.1
 ;
   %cmp.1 = fcmp une <8 x bfloat> %a, %b
@@ -728,7 +728,7 @@ define <4 x half> @v4f16_select_ord(<4 x half> %a, <4 x half> %b, <4 x half> %c)
 
 define <8 x half> @v8f16_select_ord(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
 ; COST-NOFP16-LABEL: 'v8f16_select_ord'
-; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %cmp.1 = fcmp ord <8 x half> %a, %b
+; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %cmp.1 = fcmp ord <8 x half> %a, %b
 ; COST-NOFP16-NEXT:  Cost Model: Found costs of 2 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
 ; COST-NOFP16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x half> %s.1
 ;
@@ -778,7 +778,7 @@ define <2 x double> @v2f64_select_ord(<2 x double> %a, <2 x double> %b, <2 x dou
 define <4 x bfloat> @v4bf16_select_ord(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bfloat> %c) {
 ; COST-LABEL: 'v4bf16_select_ord'
 ; COST-NEXT:  Cost Model: Found costs of 4 for: %cmp.1 = fcmp ord <4 x bfloat> %a, %b
-; COST-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <4 x i1> %cmp.1, <4 x bfloat> %a, <4 x bfloat> %c
+; COST-NEXT:  Cost Model: Found costs of RThru:10 CodeSize:7 Lat:10 SizeLat:10 for: %s.1 = select <4 x i1> %cmp.1, <4 x bfloat> %a, <4 x bfloat> %c
 ; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x bfloat> %s.1
 ;
   %cmp.1 = fcmp ord <4 x bfloat> %a, %b
@@ -788,8 +788,8 @@ define <4 x bfloat> @v4bf16_select_ord(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bf
 
 define <8 x bfloat> @v8bf16_select_ord(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bfloat> %c) {
 ; COST-LABEL: 'v8bf16_select_ord'
-; COST-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:4 Lat:4 SizeLat:4 for: %cmp.1 = fcmp ord <8 x bfloat> %a, %b
-; COST-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x bfloat> %a, <8 x bfloat> %c
+; COST-NEXT:  Cost Model: Found costs of RThru:7 CodeSize:5 Lat:5 SizeLat:5 for: %cmp.1 = fcmp ord <8 x bfloat> %a, %b
+; COST-NEXT:  Cost Model: Found costs of RThru:22 CodeSize:15 Lat:22 SizeLat:22 for: %s.1 = select <8 x i1> %cmp.1, <8 x bfloat> %a, <8 x bfloat> %c
 ; COST-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x bfloat> %s.1
 ;
   %cmp.1 = fcmp ord <8 x bfloat> %a, %b
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/reduce-and.ll b/llvm/test/Analysis/CostModel/AMDGPU/reduce-and.ll
index 781eba144205d..00ce9680cab32 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/reduce-and.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/reduce-and.ll
@@ -24,8 +24,8 @@ define i32 @reduce_i1(i32 %arg) {
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16 = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> undef)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V32 = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> undef)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %V64 = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> undef)
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 129 for instruction: %V128 = call i1 @llvm.vector.reduce.and.v128i1(<128 x i1> undef)
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 257 for instruction: %V256 = call i1 @llvm.vector.reduce.and.v256i1(<256 x i1> undef)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 130 for instruction: %V128 = call i1 @llvm.vector.reduce.and.v128i1(<128 x i1> undef)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 260 for instruction: %V256 = call i1 @llvm.vector.reduce.and.v256i1(<256 x i1> undef)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> undef)
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/reduce-or.ll b/llvm/test/Analysis/CostModel/AMDGPU/reduce-or.ll
index 9e03620e2aa6f..eccaffc9d2a40 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/reduce-or.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/reduce-or.ll
@@ -24,8 +24,8 @@ define i32 @reduce_i1(i32 %arg) {
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16 = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> undef)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V32 = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> undef)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %V64 = call i1 @llvm.vector.reduce.or.v64i1(<64 x i1> undef)
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 129 for instruction: %V128 = call i1 @llvm.vector.reduce.or.v128i1(<128 x i1> undef)
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 257 for instruction: %V256 = call i1 @llvm.vector.reduce.or.v256i1(<256 x i1> undef)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 130 for instruction: %V128 = call i1 @llvm.vector.reduce.or.v128i1(<128 x i1> undef)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 260 for instruction: %V256 = call i1 @llvm.vector.reduce.or.v256i1(<256 x i1> undef)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %V1   = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> undef)
diff --git a/llvm/test/Analysis/CostModel/ARM/arith-overflow.ll b/llvm/test/Analysis/CostModel/ARM/arith-overflow.ll
index 484cd201d14a0..9c2d6bc20d302 100644
--- a/llvm/test/Analysis/CostModel/ARM/arith-overflow.ll
+++ b/llvm/test/Analysis/CostModel/ARM/arith-overflow.ll
@@ -108,27 +108,27 @@ define i32 @sadd(i32 %arg) {
 ; NEON-SIZE-LABEL: 'sadd'
 ; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 undef, i64 undef)
 ; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.sadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.sadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.sadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.sadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.sadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 undef, i32 undef)
 ; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.sadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.sadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
+; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
+; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.sadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
 ; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.sadd.with.overflow.i16(i16 undef, i16 undef)
 ; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.sadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
+; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 undef, i8 undef)
 ; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.sadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
 ; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; MVE-SIZE-LABEL: 'sadd'
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 undef, i64 undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.sadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 75 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.sadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 147 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.sadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.sadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 145 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.sadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 289 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.sadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 undef, i32 undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.sadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 71 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
@@ -266,27 +266,27 @@ define i32 @uadd(i32 %arg) {
 ; NEON-SIZE-LABEL: 'uadd'
 ; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 undef, i64 undef)
 ; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.uadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.uadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.uadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.uadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.uadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef)
 ; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.uadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.uadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
+; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
+; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.uadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
 ; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 undef, i16 undef)
 ; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.uadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
+; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 undef, i8 undef)
 ; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.uadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
 ; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; MVE-SIZE-LABEL: 'uadd'
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 undef, i64 undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 37 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.uadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 73 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.uadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 145 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.uadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.uadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.uadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 216 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.uadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.uadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
@@ -424,27 +424,27 @@ define i32 @ssub(i32 %arg) {
 ; NEON-SIZE-LABEL: 'ssub'
 ; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 undef, i64 undef)
 ; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.ssub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.ssub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.ssub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.ssub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.ssub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 undef, i32 undef)
 ; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.ssub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.ssub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
+; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
+; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.ssub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
 ; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 undef, i16 undef)
 ; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.ssub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
+; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 undef, i8 undef)
 ; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.ssub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
 ; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; MVE-SIZE-LABEL: 'ssub'
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 undef, i64 undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.ssub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 75 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.ssub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 147 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.ssub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.ssub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 145 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.ssub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 289 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.ssub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 undef, i32 undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.ssub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 71 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
@@ -582,27 +582,27 @@ define i32 @usub(i32 %arg) {
 ; NEON-SIZE-LABEL: 'usub'
 ; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 undef, i64 undef)
 ; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.usub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.usub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.usub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.usub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.usub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 undef, i32 undef)
 ; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.usub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.usub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
+; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
+; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.usub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
 ; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 undef, i16 undef)
 ; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.usub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
+; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 undef, i8 undef)
 ; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.usub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
 ; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; MVE-SIZE-LABEL: 'usub'
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 undef, i64 undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 37 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.usub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 73 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.usub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 145 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.usub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.usub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.usub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 216 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.usub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 undef, i32 undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.usub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
@@ -720,47 +720,47 @@ define i32 @smul(i32 %arg) {
 ;
 ; V8M-SIZE-LABEL: 'smul'
 ; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 45 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 85 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 83 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
 ; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 99 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 82 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 162 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 99 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 195 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 82 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 162 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 322 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
 ; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; NEON-SIZE-LABEL: 'smul'
 ; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef)
 ; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef)
 ; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
+; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
+; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
 ; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef)
 ; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
+; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef)
 ; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
 ; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; MVE-SIZE-LABEL: 'smul'
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 57 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 109 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 213 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 74 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 144 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 284 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 150 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 328 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
@@ -878,47 +878,47 @@ define i32 @umul(i32 %arg) {
 ;
 ; V8M-SIZE-LABEL: 'umul'
 ; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 37 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 69 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 98 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
 ; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 130 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 131 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 130 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 258 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
 ; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; NEON-SIZE-LABEL: 'umul'
 ; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef)
 ; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef)
 ; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
+; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
+; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
 ; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef)
 ; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
+; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef)
 ; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
-; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
+; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
 ; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; MVE-SIZE-LABEL: 'umul'
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 37 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 69 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 140 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 149 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 326 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
diff --git a/llvm/test/Analysis/CostModel/ARM/arith-ssat.ll b/llvm/test/Analysis/CostModel/ARM/arith-ssat.ll
index da7096af4d80c..34397bd8bfb4b 100644
--- a/llvm/test/Analysis/CostModel/ARM/arith-ssat.ll
+++ b/llvm/test/Analysis/CostModel/ARM/arith-ssat.ll
@@ -112,31 +112,31 @@ define i32 @add(i32 %arg) {
 ;
 ; V8M-SIZE-LABEL: 'add'
 ; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 136 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
 ; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4I8 = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V8I8 = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 131 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V4I8 = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V8I8 = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V32I8 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 640 for instruction: %V64I8 = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
 ; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; NEON-SIZE-LABEL: 'add'
-; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef)
+; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef)
 ; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
 ; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
 ; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
@@ -162,22 +162,22 @@ define i32 @add(i32 %arg) {
 ;
 ; MVE-SIZE-LABEL: 'add'
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 61 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 78 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 150 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 185 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 369 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I8 = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
@@ -318,31 +318,31 @@ define i32 @sub(i32 %arg) {
 ;
 ; V8M-SIZE-LABEL: 'sub'
 ; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 136 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
 ; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4I16 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V4I16 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I8 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4I8 = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V8I8 = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 131 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V2I8 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V4I8 = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V8I8 = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V32I8 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 640 for instruction: %V64I8 = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
 ; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; NEON-SIZE-LABEL: 'sub'
-; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef)
+; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef)
 ; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
 ; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
 ; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
@@ -368,22 +368,22 @@ define i32 @sub(i32 %arg) {
 ;
 ; MVE-SIZE-LABEL: 'sub'
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 61 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 78 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 150 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 185 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 369 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V2I8 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V2I8 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I8 = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
diff --git a/llvm/test/Analysis/CostModel/ARM/arith-usat.ll b/llvm/test/Analysis/CostModel/ARM/arith-usat.ll
index 8a12ca2df234e..28e4860c8810c 100644
--- a/llvm/test/Analysis/CostModel/ARM/arith-usat.ll
+++ b/llvm/test/Analysis/CostModel/ARM/arith-usat.ll
@@ -112,31 +112,31 @@ define i32 @add(i32 %arg) {
 ;
 ; V8M-SIZE-LABEL: 'add'
 ; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2I32 = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
 ; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I16 = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V4I16 = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2I16 = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I16 = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I8 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V4I8 = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V8I8 = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 129 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2I8 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I8 = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I8 = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V32I8 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V64I8 = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
 ; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; NEON-SIZE-LABEL: 'add'
-; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef)
+; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef)
 ; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
 ; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
 ; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
@@ -162,22 +162,22 @@ define i32 @add(i32 %arg) {
 ;
 ; MVE-SIZE-LABEL: 'add'
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 74 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 146 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 55 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 220 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V2I32 = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V2I32 = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V2I16 = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V2I16 = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V2I8 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V2I8 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I8 = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
@@ -318,31 +318,31 @@ define i32 @sub(i32 %arg) {
 ;
 ; V8M-SIZE-LABEL: 'sub'
 ; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2I32 = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
 ; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I16 = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V4I16 = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2I16 = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I16 = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I8 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V4I8 = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V8I8 = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
-; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 129 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2I8 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I8 = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I8 = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V32I8 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> undef, <32 x i8> undef)
+; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V64I8 = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> undef, <64 x i8> undef)
 ; V8M-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; NEON-SIZE-LABEL: 'sub'
-; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef)
+; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef)
 ; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
 ; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
 ; NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
@@ -368,22 +368,22 @@ define i32 @sub(i32 %arg) {
 ;
 ; MVE-SIZE-LABEL: 'sub'
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 74 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 146 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 55 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 220 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V2I32 = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V2I32 = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> undef, <2 x i32> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V2I16 = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V2I16 = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V2I8 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V2I8 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I8 = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> undef, <8 x i8> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef)
diff --git a/llvm/test/Analysis/CostModel/ARM/cmps.ll b/llvm/test/Analysis/CostModel/ARM/cmps.ll
index e86a1b95a7b36..090fccee81bbe 100644
--- a/llvm/test/Analysis/CostModel/ARM/cmps.ll
+++ b/llvm/test/Analysis/CostModel/ARM/cmps.ll
@@ -96,7 +96,7 @@ define i32 @cmps() {
 ; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a9 = fcmp ogt double undef, undef
 ; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a10 = fcmp olt <8 x half> undef, undef
 ; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a11 = fcmp oge <4 x float> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a12 = fcmp oge <2 x double> undef, undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a12 = fcmp oge <2 x double> undef, undef
 ; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %p = icmp eq ptr undef, undef
 ; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %q = icmp eq <4 x ptr> undef, undef
 ; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
@@ -106,17 +106,17 @@ define i32 @cmps() {
 ; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %b = icmp ult i16 undef, undef
 ; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c = icmp sge i32 undef, undef
 ; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d = icmp ne i64 undef, undef
-; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = icmp slt <16 x i8> undef, undef
-; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f = icmp ult <8 x i16> undef, undef
-; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g = icmp sge <4 x i32> undef, undef
+; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %e = icmp slt <16 x i8> undef, undef
+; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %f = icmp ult <8 x i16> undef, undef
+; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %g = icmp sge <4 x i32> undef, undef
 ; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a7 = fcmp oge half undef, undef
 ; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a8 = fcmp ogt float undef, undef
 ; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a9 = fcmp ogt double undef, undef
-; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a10 = fcmp olt <8 x half> undef, undef
-; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a11 = fcmp oge <4 x float> undef, undef
-; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a12 = fcmp oge <2 x double> undef, undef
+; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %a10 = fcmp olt <8 x half> undef, undef
+; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %a11 = fcmp oge <4 x float> undef, undef
+; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %a12 = fcmp oge <2 x double> undef, undef
 ; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %p = icmp eq ptr undef, undef
-; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %q = icmp eq <4 x ptr> undef, undef
+; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %q = icmp eq <4 x ptr> undef, undef
 ; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; CHECK-V8M-BASE-SIZE-LABEL: 'cmps'
@@ -124,17 +124,17 @@ define i32 @cmps() {
 ; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %b = icmp ult i16 undef, undef
 ; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c = icmp sge i32 undef, undef
 ; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d = icmp ne i64 undef, undef
-; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = icmp slt <16 x i8> undef, undef
-; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f = icmp ult <8 x i16> undef, undef
-; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g = icmp sge <4 x i32> undef, undef
+; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %e = icmp slt <16 x i8> undef, undef
+; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %f = icmp ult <8 x i16> undef, undef
+; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %g = icmp sge <4 x i32> undef, undef
 ; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a7 = fcmp oge half undef, undef
 ; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a8 = fcmp ogt float undef, undef
 ; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a9 = fcmp ogt double undef, undef
-; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a10 = fcmp olt <8 x half> undef, undef
-; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a11 = fcmp oge <4 x float> undef, undef
-; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a12 = fcmp oge <2 x double> undef, undef
+; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %a10 = fcmp olt <8 x half> undef, undef
+; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %a11 = fcmp oge <4 x float> undef, undef
+; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %a12 = fcmp oge <2 x double> undef, undef
 ; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %p = icmp eq ptr undef, undef
-; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %q = icmp eq <4 x ptr> undef, undef
+; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %q = icmp eq <4 x ptr> undef, undef
 ; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; CHECK-V8R-SIZE-LABEL: 'cmps'
@@ -148,9 +148,9 @@ define i32 @cmps() {
 ; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a7 = fcmp oge half undef, undef
 ; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a8 = fcmp ogt float undef, undef
 ; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a9 = fcmp ogt double undef, undef
-; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a10 = fcmp olt <8 x half> undef, undef
+; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %a10 = fcmp olt <8 x half> undef, undef
 ; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a11 = fcmp oge <4 x float> undef, undef
-; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %a12 = fcmp oge <2 x double> undef, undef
+; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %a12 = fcmp oge <2 x double> undef, undef
 ; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %p = icmp eq ptr undef, undef
 ; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %q = icmp eq <4 x ptr> undef, undef
 ; CHECK-V8R-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
@@ -257,11 +257,11 @@ define void @minmax() {
 ; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c3 = icmp slt i32 undef, undef
 ; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s3 = select i1 %c3, i32 undef, i32 undef
 ; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %c4 = icmp slt <4 x i32> undef, undef
-; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s4 = select <4 x i1> %c4, <4 x i32> undef, <4 x i32> undef
+; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %s4 = select <4 x i1> %c4, <4 x i32> undef, <4 x i32> undef
 ; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c5 = icmp slt ptr undef, undef
 ; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s5 = select i1 %c5, ptr undef, ptr undef
-; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c6 = icmp slt <4 x ptr> undef, undef
-; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s6 = select <4 x i1> %c6, <4 x ptr> undef, <4 x ptr> undef
+; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %c6 = icmp slt <4 x ptr> undef, undef
+; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %s6 = select <4 x i1> %c6, <4 x ptr> undef, <4 x ptr> undef
 ; CHECK-V8M-MAIN-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8M-BASE-SIZE-LABEL: 'minmax'
@@ -272,11 +272,11 @@ define void @minmax() {
 ; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c3 = icmp slt i32 undef, undef
 ; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s3 = select i1 %c3, i32 undef, i32 undef
 ; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %c4 = icmp slt <4 x i32> undef, undef
-; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s4 = select <4 x i1> %c4, <4 x i32> undef, <4 x i32> undef
+; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %s4 = select <4 x i1> %c4, <4 x i32> undef, <4 x i32> undef
 ; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c5 = icmp slt ptr undef, undef
 ; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s5 = select i1 %c5, ptr undef, ptr undef
-; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c6 = icmp slt <4 x ptr> undef, undef
-; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s6 = select <4 x i1> %c6, <4 x ptr> undef, <4 x ptr> undef
+; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %c6 = icmp slt <4 x ptr> undef, undef
+; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %s6 = select <4 x i1> %c6, <4 x ptr> undef, <4 x ptr> undef
 ; CHECK-V8M-BASE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8R-SIZE-LABEL: 'minmax'
diff --git a/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll b/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll
index 66240c8255ad7..7a40252dfa619 100644
--- a/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll
+++ b/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll
@@ -236,17 +236,17 @@ define void @fshl(i32 %a, i32 %b, i32 %c, <16 x i32> %va, <16 x i32> %vb, <16 x
 ;
 ; LATE-LABEL: 'fshl'
 ; LATE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
-; LATE-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
+; LATE-NEXT:  Cost Model: Found an estimated cost of 120 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
 ; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SIZE-LABEL: 'fshl'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 89 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SIZE_LATE-LABEL: 'fshl'
 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
-; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
+; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 120 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
diff --git a/llvm/test/Analysis/CostModel/ARM/mve-abs.ll b/llvm/test/Analysis/CostModel/ARM/mve-abs.ll
index 12a1ae7a43610..cc8f2da57f072 100644
--- a/llvm/test/Analysis/CostModel/ARM/mve-abs.ll
+++ b/llvm/test/Analysis/CostModel/ARM/mve-abs.ll
@@ -58,22 +58,22 @@ define i32 @abs(i32 %arg) {
 ;
 ; MVE-SIZE-LABEL: 'abs'
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I64 = call i64 @llvm.abs.i64(i64 undef, i1 false)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V2I64 = call <2 x i64> @llvm.abs.v2i64(<2 x i64> undef, i1 false)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 74 for instruction: %V4I64 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> undef, i1 false)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 146 for instruction: %V8I64 = call <8 x i64> @llvm.abs.v8i64(<8 x i64> undef, i1 false)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 55 for instruction: %V2I64 = call <2 x i64> @llvm.abs.v2i64(<2 x i64> undef, i1 false)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V4I64 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> undef, i1 false)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 220 for instruction: %V8I64 = call <8 x i64> @llvm.abs.v8i64(<8 x i64> undef, i1 false)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.abs.i32(i32 undef, i1 false)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V2I32 = call <2 x i32> @llvm.abs.v2i32(<2 x i32> undef, i1 false)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V2I32 = call <2 x i32> @llvm.abs.v2i32(<2 x i32> undef, i1 false)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> undef, i1 false)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.abs.v8i32(<8 x i32> undef, i1 false)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> undef, i1 false)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.abs.i16(i16 undef, i1 false)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V2I16 = call <2 x i16> @llvm.abs.v2i16(<2 x i16> undef, i1 false)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V2I16 = call <2 x i16> @llvm.abs.v2i16(<2 x i16> undef, i1 false)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 false)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.abs.i8(i8 undef, i1 false)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V2I8 = call <2 x i8> @llvm.abs.v2i8(<2 x i8> undef, i1 false)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V2I8 = call <2 x i8> @llvm.abs.v2i8(<2 x i8> undef, i1 false)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = call <4 x i8> @llvm.abs.v4i8(<4 x i8> undef, i1 false)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = call <8 x i8> @llvm.abs.v8i8(<8 x i8> undef, i1 false)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.abs.v16i8(<16 x i8> undef, i1 false)
diff --git a/llvm/test/Analysis/CostModel/ARM/mve-minmax.ll b/llvm/test/Analysis/CostModel/ARM/mve-minmax.ll
index 7f1005285747c..01341e4dcb648 100644
--- a/llvm/test/Analysis/CostModel/ARM/mve-minmax.ll
+++ b/llvm/test/Analysis/CostModel/ARM/mve-minmax.ll
@@ -60,22 +60,22 @@ define i32 @smin(i32 %arg) {
 ;
 ; MVE-SIZE-LABEL: 'smin'
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.smin.i64(i64 undef, i64 undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.smin.v2i64(<2 x i64> undef, <2 x i64> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.smin.v4i64(<4 x i64> undef, <4 x i64> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = call <8 x i64> @llvm.smin.v8i64(<8 x i64> undef, <8 x i64> undef)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.smin.v2i64(<2 x i64> undef, <2 x i64> undef)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V4I64 = call <4 x i64> @llvm.smin.v4i64(<4 x i64> undef, <4 x i64> undef)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %V8I64 = call <8 x i64> @llvm.smin.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.smin.i32(i32 undef, i32 undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.smin.v2i32(<2 x i32> undef, <2 x i32> undef)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2I32 = call <2 x i32> @llvm.smin.v2i32(<2 x i32> undef, <2 x i32> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.smin.v8i32(<8 x i32> undef, <8 x i32> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.smin.v16i32(<16 x i32> undef, <16 x i32> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.smin.i16(i16 undef, i16 undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = call <2 x i16> @llvm.smin.v2i16(<2 x i16> undef, <2 x i16> undef)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2I16 = call <2 x i16> @llvm.smin.v2i16(<2 x i16> undef, <2 x i16> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = call <4 x i16> @llvm.smin.v4i16(<4 x i16> undef, <4 x i16> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> undef, <8 x i16> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.smin.v16i16(<16 x i16> undef, <16 x i16> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.smin.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.smin.i8(i8 undef, i8 undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = call <2 x i8> @llvm.smin.v2i8(<2 x i8> undef, <2 x i8> undef)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2I8 = call <2 x i8> @llvm.smin.v2i8(<2 x i8> undef, <2 x i8> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = call <4 x i8> @llvm.smin.v4i8(<4 x i8> undef, <4 x i8> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = call <8 x i8> @llvm.smin.v8i8(<8 x i8> undef, <8 x i8> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.smin.v16i8(<16 x i8> undef, <16 x i8> undef)
@@ -167,22 +167,22 @@ define i32 @smax(i32 %arg) {
 ;
 ; MVE-SIZE-LABEL: 'smax'
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.smax.i64(i64 undef, i64 undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> undef, <2 x i64> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> undef, <4 x i64> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = call <8 x i64> @llvm.smax.v8i64(<8 x i64> undef, <8 x i64> undef)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> undef, <2 x i64> undef)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V4I64 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> undef, <4 x i64> undef)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %V8I64 = call <8 x i64> @llvm.smax.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.smax.i32(i32 undef, i32 undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.smax.v2i32(<2 x i32> undef, <2 x i32> undef)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2I32 = call <2 x i32> @llvm.smax.v2i32(<2 x i32> undef, <2 x i32> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> undef, <4 x i32> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> undef, <8 x i32> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.smax.v16i32(<16 x i32> undef, <16 x i32> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.smax.i16(i16 undef, i16 undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = call <2 x i16> @llvm.smax.v2i16(<2 x i16> undef, <2 x i16> undef)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2I16 = call <2 x i16> @llvm.smax.v2i16(<2 x i16> undef, <2 x i16> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = call <4 x i16> @llvm.smax.v4i16(<4 x i16> undef, <4 x i16> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.smax.v8i16(<8 x i16> undef, <8 x i16> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.smax.v16i16(<16 x i16> undef, <16 x i16> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.smax.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.smax.i8(i8 undef, i8 undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = call <2 x i8> @llvm.smax.v2i8(<2 x i8> undef, <2 x i8> undef)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2I8 = call <2 x i8> @llvm.smax.v2i8(<2 x i8> undef, <2 x i8> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = call <4 x i8> @llvm.smax.v4i8(<4 x i8> undef, <4 x i8> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = call <8 x i8> @llvm.smax.v8i8(<8 x i8> undef, <8 x i8> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.smax.v16i8(<16 x i8> undef, <16 x i8> undef)
@@ -275,22 +275,22 @@ define i32 @umin(i32 %arg) {
 ;
 ; MVE-SIZE-LABEL: 'umin'
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.umin.i64(i64 undef, i64 undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.umin.v2i64(<2 x i64> undef, <2 x i64> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.umin.v4i64(<4 x i64> undef, <4 x i64> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = call <8 x i64> @llvm.umin.v8i64(<8 x i64> undef, <8 x i64> undef)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.umin.v2i64(<2 x i64> undef, <2 x i64> undef)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V4I64 = call <4 x i64> @llvm.umin.v4i64(<4 x i64> undef, <4 x i64> undef)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %V8I64 = call <8 x i64> @llvm.umin.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.umin.i32(i32 undef, i32 undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.umin.v2i32(<2 x i32> undef, <2 x i32> undef)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2I32 = call <2 x i32> @llvm.umin.v2i32(<2 x i32> undef, <2 x i32> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.umin.v4i32(<4 x i32> undef, <4 x i32> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.umin.v8i32(<8 x i32> undef, <8 x i32> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.umin.v16i32(<16 x i32> undef, <16 x i32> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.umin.i16(i16 undef, i16 undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = call <2 x i16> @llvm.umin.v2i16(<2 x i16> undef, <2 x i16> undef)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2I16 = call <2 x i16> @llvm.umin.v2i16(<2 x i16> undef, <2 x i16> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = call <4 x i16> @llvm.umin.v4i16(<4 x i16> undef, <4 x i16> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.umin.v8i16(<8 x i16> undef, <8 x i16> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.umin.v16i16(<16 x i16> undef, <16 x i16> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.umin.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.umin.i8(i8 undef, i8 undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = call <2 x i8> @llvm.umin.v2i8(<2 x i8> undef, <2 x i8> undef)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2I8 = call <2 x i8> @llvm.umin.v2i8(<2 x i8> undef, <2 x i8> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = call <4 x i8> @llvm.umin.v4i8(<4 x i8> undef, <4 x i8> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = call <8 x i8> @llvm.umin.v8i8(<8 x i8> undef, <8 x i8> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.umin.v16i8(<16 x i8> undef, <16 x i8> undef)
@@ -382,22 +382,22 @@ define i32 @sub(i32 %arg) {
 ;
 ; MVE-SIZE-LABEL: 'sub'
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.umax.i64(i64 undef, i64 undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.umax.v2i64(<2 x i64> undef, <2 x i64> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.umax.v4i64(<4 x i64> undef, <4 x i64> undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = call <8 x i64> @llvm.umax.v8i64(<8 x i64> undef, <8 x i64> undef)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call <2 x i64> @llvm.umax.v2i64(<2 x i64> undef, <2 x i64> undef)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V4I64 = call <4 x i64> @llvm.umax.v4i64(<4 x i64> undef, <4 x i64> undef)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %V8I64 = call <8 x i64> @llvm.umax.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.umax.i32(i32 undef, i32 undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.umax.v2i32(<2 x i32> undef, <2 x i32> undef)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2I32 = call <2 x i32> @llvm.umax.v2i32(<2 x i32> undef, <2 x i32> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> undef, <4 x i32> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.umax.v8i32(<8 x i32> undef, <8 x i32> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.umax.v16i32(<16 x i32> undef, <16 x i32> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.umax.i16(i16 undef, i16 undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = call <2 x i16> @llvm.umax.v2i16(<2 x i16> undef, <2 x i16> undef)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2I16 = call <2 x i16> @llvm.umax.v2i16(<2 x i16> undef, <2 x i16> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = call <4 x i16> @llvm.umax.v4i16(<4 x i16> undef, <4 x i16> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.umax.v8i16(<8 x i16> undef, <8 x i16> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.umax.v16i16(<16 x i16> undef, <16 x i16> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.umax.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.umax.i8(i8 undef, i8 undef)
-; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = call <2 x i8> @llvm.umax.v2i8(<2 x i8> undef, <2 x i8> undef)
+; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2I8 = call <2 x i8> @llvm.umax.v2i8(<2 x i8> undef, <2 x i8> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = call <4 x i8> @llvm.umax.v4i8(<4 x i8> undef, <4 x i8> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = call <8 x i8> @llvm.umax.v8i8(<8 x i8> undef, <8 x i8> undef)
 ; MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.umax.v16i8(<16 x i8> undef, <16 x i8> undef)
diff --git a/llvm/test/Analysis/CostModel/ARM/select.ll b/llvm/test/Analysis/CostModel/ARM/select.ll
index 4e2ccffa7cc5b..f626901990f01 100644
--- a/llvm/test/Analysis/CostModel/ARM/select.ll
+++ b/llvm/test/Analysis/CostModel/ARM/select.ll
@@ -155,21 +155,21 @@ define void @selects() {
 ; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v11 = select <2 x i1> undef, <2 x i16> undef, <2 x i16> undef
 ; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v12 = select <4 x i1> undef, <4 x i16> undef, <4 x i16> undef
 ; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v13 = select <8 x i1> undef, <8 x i16> undef, <8 x i16> undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef
 ; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v14 = select <2 x i1> undef, <2 x i32> undef, <2 x i32> undef
 ; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v15 = select <4 x i1> undef, <4 x i32> undef, <4 x i32> undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef
 ; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16 = select <2 x i1> undef, <2 x i64> undef, <2 x i64> undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef
 ; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v17 = select <2 x i1> undef, <2 x float> undef, <2 x float> undef
 ; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v18 = select <4 x i1> undef, <4 x float> undef, <4 x float> undef
 ; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v19 = select <2 x i1> undef, <2 x double> undef, <2 x double> undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef
 ; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v21 = select <3 x i1> undef, <3 x float> undef, <3 x float> undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef
 ; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-NEON-SIZE-LABEL: 'selects'
@@ -212,28 +212,28 @@ define void @selects() {
 ; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4 = select i1 undef, i64 undef, i64 undef
 ; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v5 = select i1 undef, float undef, float undef
 ; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v6 = select i1 undef, double undef, double undef
-; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v7 = select <2 x i1> undef, <2 x i8> undef, <2 x i8> undef
-; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8 = select <4 x i1> undef, <4 x i8> undef, <4 x i8> undef
-; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v9 = select <8 x i1> undef, <8 x i8> undef, <8 x i8> undef
-; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v10 = select <16 x i1> undef, <16 x i8> undef, <16 x i8> undef
-; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v11 = select <2 x i1> undef, <2 x i16> undef, <2 x i16> undef
-; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v12 = select <4 x i1> undef, <4 x i16> undef, <4 x i16> undef
-; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v13 = select <8 x i1> undef, <8 x i16> undef, <8 x i16> undef
-; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef
-; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v14 = select <2 x i1> undef, <2 x i32> undef, <2 x i32> undef
-; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v15 = select <4 x i1> undef, <4 x i32> undef, <4 x i32> undef
-; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef
-; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef
-; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16 = select <2 x i1> undef, <2 x i64> undef, <2 x i64> undef
-; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef
-; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef
-; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef
-; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v17 = select <2 x i1> undef, <2 x float> undef, <2 x float> undef
-; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v18 = select <4 x i1> undef, <4 x float> undef, <4 x float> undef
-; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v19 = select <2 x i1> undef, <2 x double> undef, <2 x double> undef
-; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef
-; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v21 = select <3 x i1> undef, <3 x float> undef, <3 x float> undef
-; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef
+; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v7 = select <2 x i1> undef, <2 x i8> undef, <2 x i8> undef
+; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v8 = select <4 x i1> undef, <4 x i8> undef, <4 x i8> undef
+; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v9 = select <8 x i1> undef, <8 x i8> undef, <8 x i8> undef
+; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v10 = select <16 x i1> undef, <16 x i8> undef, <16 x i8> undef
+; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v11 = select <2 x i1> undef, <2 x i16> undef, <2 x i16> undef
+; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v12 = select <4 x i1> undef, <4 x i16> undef, <4 x i16> undef
+; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v13 = select <8 x i1> undef, <8 x i16> undef, <8 x i16> undef
+; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef
+; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v14 = select <2 x i1> undef, <2 x i32> undef, <2 x i32> undef
+; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v15 = select <4 x i1> undef, <4 x i32> undef, <4 x i32> undef
+; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef
+; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef
+; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v16 = select <2 x i1> undef, <2 x i64> undef, <2 x i64> undef
+; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef
+; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef
+; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef
+; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v17 = select <2 x i1> undef, <2 x float> undef, <2 x float> undef
+; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v18 = select <4 x i1> undef, <4 x float> undef, <4 x float> undef
+; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v19 = select <2 x i1> undef, <2 x double> undef, <2 x double> undef
+; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef
+; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v21 = select <3 x i1> undef, <3 x float> undef, <3 x float> undef
+; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef
 ; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-THUMB2-SIZE-LABEL: 'selects'
@@ -244,28 +244,28 @@ define void @selects() {
 ; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v4 = select i1 undef, i64 undef, i64 undef
 ; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v5 = select i1 undef, float undef, float undef
 ; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v6 = select i1 undef, double undef, double undef
-; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v7 = select <2 x i1> undef, <2 x i8> undef, <2 x i8> undef
-; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v8 = select <4 x i1> undef, <4 x i8> undef, <4 x i8> undef
-; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v9 = select <8 x i1> undef, <8 x i8> undef, <8 x i8> undef
-; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v10 = select <16 x i1> undef, <16 x i8> undef, <16 x i8> undef
-; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v11 = select <2 x i1> undef, <2 x i16> undef, <2 x i16> undef
-; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v12 = select <4 x i1> undef, <4 x i16> undef, <4 x i16> undef
-; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v13 = select <8 x i1> undef, <8 x i16> undef, <8 x i16> undef
-; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef
-; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v14 = select <2 x i1> undef, <2 x i32> undef, <2 x i32> undef
-; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v15 = select <4 x i1> undef, <4 x i32> undef, <4 x i32> undef
-; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef
-; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef
-; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16 = select <2 x i1> undef, <2 x i64> undef, <2 x i64> undef
-; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef
-; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef
-; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef
-; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v17 = select <2 x i1> undef, <2 x float> undef, <2 x float> undef
-; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v18 = select <4 x i1> undef, <4 x float> undef, <4 x float> undef
-; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v19 = select <2 x i1> undef, <2 x double> undef, <2 x double> undef
-; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef
-; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v21 = select <3 x i1> undef, <3 x float> undef, <3 x float> undef
-; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef
+; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v7 = select <2 x i1> undef, <2 x i8> undef, <2 x i8> undef
+; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v8 = select <4 x i1> undef, <4 x i8> undef, <4 x i8> undef
+; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v9 = select <8 x i1> undef, <8 x i8> undef, <8 x i8> undef
+; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v10 = select <16 x i1> undef, <16 x i8> undef, <16 x i8> undef
+; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v11 = select <2 x i1> undef, <2 x i16> undef, <2 x i16> undef
+; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v12 = select <4 x i1> undef, <4 x i16> undef, <4 x i16> undef
+; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v13 = select <8 x i1> undef, <8 x i16> undef, <8 x i16> undef
+; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef
+; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v14 = select <2 x i1> undef, <2 x i32> undef, <2 x i32> undef
+; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v15 = select <4 x i1> undef, <4 x i32> undef, <4 x i32> undef
+; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef
+; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef
+; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v16 = select <2 x i1> undef, <2 x i64> undef, <2 x i64> undef
+; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef
+; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef
+; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef
+; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v17 = select <2 x i1> undef, <2 x float> undef, <2 x float> undef
+; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v18 = select <4 x i1> undef, <4 x float> undef, <4 x float> undef
+; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v19 = select <2 x i1> undef, <2 x double> undef, <2 x double> undef
+; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef
+; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %v21 = select <3 x i1> undef, <3 x float> undef, <3 x float> undef
+; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef
 ; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %v0 = select i1 undef, i1 undef, i1 undef
diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-vselect.ll b/llvm/test/CodeGen/Thumb2/mve-pred-vselect.ll
index a990cee1f5fb3..c9a51f0507844 100644
--- a/llvm/test/CodeGen/Thumb2/mve-pred-vselect.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-pred-vselect.ll
@@ -377,34 +377,30 @@ entry:
 define arm_aapcs_vfpcc <2 x i64> @cmpeqz_v2i1_i1(<2 x i64> %a, <2 x i64> %b, i64 %c) {
 ; CHECK-LABEL: cmpeqz_v2i1_i1:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    .save {r4, r5, r7, lr}
-; CHECK-NEXT:    push {r4, r5, r7, lr}
-; CHECK-NEXT:    vmov r2, r3, d2
-; CHECK-NEXT:    orrs r2, r3
-; CHECK-NEXT:    vmov r3, r4, d3
-; CHECK-NEXT:    csetm r12, eq
-; CHECK-NEXT:    movs r2, #0
-; CHECK-NEXT:    orrs r3, r4
-; CHECK-NEXT:    vmov r4, r3, d0
-; CHECK-NEXT:    csetm r5, eq
-; CHECK-NEXT:    orrs r3, r4
-; CHECK-NEXT:    vmov r3, r4, d1
-; CHECK-NEXT:    csetm lr, eq
-; CHECK-NEXT:    orrs r3, r4
-; CHECK-NEXT:    csetm r4, eq
 ; CHECK-NEXT:    orrs r0, r1
 ; CHECK-NEXT:    beq .LBB15_2
-; CHECK-NEXT:  @ %bb.1: @ %select.false
-; CHECK-NEXT:    bfi r2, r12, #0, #8
-; CHECK-NEXT:    bfi r2, r5, #8, #8
+; CHECK-NEXT:  @ %bb.1: @ %select.false.sink
+; CHECK-NEXT:    vmov r0, r1, d2
+; CHECK-NEXT:    orrs r0, r1
+; CHECK-NEXT:    mov.w r1, #0
+; CHECK-NEXT:    csetm r0, eq
+; CHECK-NEXT:    bfi r1, r0, #0, #8
+; CHECK-NEXT:    vmov r0, r2, d3
 ; CHECK-NEXT:    b .LBB15_3
-; CHECK-NEXT:  .LBB15_2:
-; CHECK-NEXT:    bfi r2, lr, #0, #8
-; CHECK-NEXT:    bfi r2, r4, #8, #8
+; CHECK-NEXT:  .LBB15_2: @ %select.true.sink
+; CHECK-NEXT:    vmov r0, r1, d0
+; CHECK-NEXT:    orrs r0, r1
+; CHECK-NEXT:    mov.w r1, #0
+; CHECK-NEXT:    csetm r0, eq
+; CHECK-NEXT:    bfi r1, r0, #0, #8
+; CHECK-NEXT:    vmov r0, r2, d1
 ; CHECK-NEXT:  .LBB15_3: @ %select.end
-; CHECK-NEXT:    vmsr p0, r2
+; CHECK-NEXT:    orrs r0, r2
+; CHECK-NEXT:    csetm r0, eq
+; CHECK-NEXT:    bfi r1, r0, #8, #8
+; CHECK-NEXT:    vmsr p0, r1
 ; CHECK-NEXT:    vpsel q0, q0, q1
-; CHECK-NEXT:    pop {r4, r5, r7, pc}
+; CHECK-NEXT:    bx lr
 entry:
   %c1 = icmp eq <2 x i64> %a, zeroinitializer
   %c2 = icmp eq <2 x i64> %b, zeroinitializer
diff --git a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll
index 7bbddefd82721..db6136c4a2b28 100644
--- a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll
+++ b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll
@@ -205,292 +205,277 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
 ; X86-NEXT:    testl %edi, %edi
 ; X86-NEXT:    cmovnel %esi, %edx
 ; X86-NEXT:    addl $64, %edx
-; X86-NEXT:    movl 20(%ebp), %edi
-; X86-NEXT:    movl %edi, %esi
+; X86-NEXT:    movl 20(%ebp), %esi
 ; X86-NEXT:    orl %ebx, %esi
 ; X86-NEXT:    cmovnel %ecx, %edx
+; X86-NEXT:    xorl %edi, %edi
 ; X86-NEXT:    subl %edx, %eax
-; X86-NEXT:    movl $0, %edx
-; X86-NEXT:    sbbl %edx, %edx
 ; X86-NEXT:    movl $0, %ebx
 ; X86-NEXT:    sbbl %ebx, %ebx
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    sbbl %ecx, %ecx
 ; X86-NEXT:    movl $0, %esi
 ; X86-NEXT:    sbbl %esi, %esi
-; X86-NEXT:    movl $127, %ecx
-; X86-NEXT:    cmpl %eax, %ecx
-; X86-NEXT:    movl $0, %ecx
-; X86-NEXT:    sbbl %edx, %ecx
-; X86-NEXT:    movl $0, %ecx
-; X86-NEXT:    sbbl %ebx, %ecx
-; X86-NEXT:    movl $0, %ecx
-; X86-NEXT:    sbbl %esi, %ecx
-; X86-NEXT:    setb %cl
-; X86-NEXT:    orb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Folded Reload
+; X86-NEXT:    movl $127, %edx
 ; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    xorl $127, %eax
+; X86-NEXT:    cmpl %eax, %edx
+; X86-NEXT:    movl $0, %edx
 ; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    orl %ebx, %eax
-; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    orl %esi, %edx
-; X86-NEXT:    orl %eax, %edx
-; X86-NEXT:    sete %al
-; X86-NEXT:    testb %cl, %cl
-; X86-NEXT:    movb %cl, %ah
-; X86-NEXT:    movl 24(%ebp), %ebx
-; X86-NEXT:    movl $0, %esi
-; X86-NEXT:    cmovnel %esi, %ebx
-; X86-NEXT:    movl %edi, %ecx
-; X86-NEXT:    cmovnel %esi, %ecx
+; X86-NEXT:    sbbl %ebx, %edx
 ; X86-NEXT:    movl $0, %edx
 ; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl 16(%ebp), %esi
-; X86-NEXT:    cmovnel %edx, %esi
-; X86-NEXT:    movl 12(%ebp), %edi
-; X86-NEXT:    movl %edi, %ecx
-; X86-NEXT:    cmovnel %edx, %ecx
-; X86-NEXT:    orb %ah, %al
-; X86-NEXT:    movl 44(%ebp), %eax
-; X86-NEXT:    jne .LBB4_7
-; X86-NEXT:  # %bb.1: # %udiv-bb1
-; X86-NEXT:    movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT:    sbbl %ecx, %edx
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    sbbl %esi, %edx
+; X86-NEXT:    setb %dl
+; X86-NEXT:    orb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Folded Reload
+; X86-NEXT:    movl 24(%ebp), %eax
+; X86-NEXT:    cmovnel %edi, %eax
+; X86-NEXT:    movl 20(%ebp), %esi
+; X86-NEXT:    cmovnel %edi, %esi
+; X86-NEXT:    movl 16(%ebp), %edx
+; X86-NEXT:    cmovnel %edi, %edx
+; X86-NEXT:    movl 12(%ebp), %ebx
+; X86-NEXT:    cmovnel %edi, %ebx
+; X86-NEXT:    movl 44(%ebp), %edi
+; X86-NEXT:    jne .LBB4_8
+; X86-NEXT:  # %bb.1: # %_udiv-special-cases
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    xorl $127, %eax
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    movl 44(%ebp), %edi
+; X86-NEXT:    movl 12(%ebp), %ecx
+; X86-NEXT:    je .LBB4_8
+; X86-NEXT:  # %bb.2: # %udiv-bb1
+; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
 ; X86-NEXT:    xorps %xmm0, %xmm0
 ; X86-NEXT:    movaps %xmm0, {{[0-9]+}}(%esp)
 ; X86-NEXT:    movl 16(%ebp), %eax
 ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT:    movl 20(%ebp), %edx
-; X86-NEXT:    movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 20(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
 ; X86-NEXT:    movl 24(%ebp), %eax
 ; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, %ecx
 ; X86-NEXT:    xorb $127, %cl
 ; X86-NEXT:    movl %ecx, %eax
 ; X86-NEXT:    shrb $3, %al
 ; X86-NEXT:    andb $12, %al
 ; X86-NEXT:    negb %al
 ; X86-NEXT:    movsbl %al, %eax
-; X86-NEXT:    movl 136(%esp,%eax), %edi
-; X86-NEXT:    movl 140(%esp,%eax), %esi
-; X86-NEXT:    shldl %cl, %edi, %esi
-; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 136(%esp,%eax), %esi
+; X86-NEXT:    movl 140(%esp,%eax), %edx
+; X86-NEXT:    shldl %cl, %esi, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NEXT:    movl 128(%esp,%eax), %ebx
-; X86-NEXT:    movl 132(%esp,%eax), %eax
-; X86-NEXT:    shldl %cl, %eax, %edi
-; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl %eax, %edi
-; X86-NEXT:    shldl %cl, %ebx, %edi
+; X86-NEXT:    movl 132(%esp,%eax), %edx
+; X86-NEXT:    shldl %cl, %edx, %esi
+; X86-NEXT:    shldl %cl, %ebx, %edx
 ; X86-NEXT:    shll %cl, %ebx
-; X86-NEXT:    movl %ebx, %ecx
-; X86-NEXT:    addl $1, %edx
-; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    addl $1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
 ; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X86-NEXT:    adcl $0, %eax
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    adcl $0, %ecx
 ; X86-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-NEXT:    movl 20(%ebp), %ebx
-; X86-NEXT:    jae .LBB4_2
-; X86-NEXT:  # %bb.5:
-; X86-NEXT:    xorl %edx, %edx
-; X86-NEXT:    xorl %eax, %eax
-; X86-NEXT:    movl %edi, %esi
-; X86-NEXT:    jmp .LBB4_6
-; X86-NEXT:  .LBB4_2: # %udiv-preheader
+; X86-NEXT:    jae .LBB4_3
+; X86-NEXT:  # %bb.6:
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    jmp .LBB4_7
+; X86-NEXT:  .LBB4_3: # %udiv-preheader
 ; X86-NEXT:    movaps %xmm0, {{[0-9]+}}(%esp)
-; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl 12(%ebp), %edx
-; X86-NEXT:    movl %edx, {{[0-9]+}}(%esp)
-; X86-NEXT:    movl 16(%ebp), %edx
-; X86-NEXT:    movl %edx, {{[0-9]+}}(%esp)
-; X86-NEXT:    movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 12(%ebp), %edi
+; X86-NEXT:    movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 16(%ebp), %edi
+; X86-NEXT:    movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 20(%ebp), %edi
+; X86-NEXT:    movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 24(%ebp), %edi
+; X86-NEXT:    movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
 ; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl 24(%ebp), %eax
-; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT:    # kill: def $al killed $al killed $eax
+; X86-NEXT:    movl %ecx, %eax
 ; X86-NEXT:    shrb $3, %al
 ; X86-NEXT:    andb $12, %al
 ; X86-NEXT:    movzbl %al, %eax
-; X86-NEXT:    movl 92(%esp,%eax), %esi
-; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl 88(%esp,%eax), %edx
-; X86-NEXT:    movl %edx, %ebx
-; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X86-NEXT:    shrdl %cl, %esi, %ebx
-; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 92(%esp,%eax), %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 88(%esp,%eax), %edi
 ; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl 80(%esp,%eax), %edi
-; X86-NEXT:    movl 84(%esp,%eax), %eax
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X86-NEXT:    movl %eax, %esi
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT:    shrdl %cl, %edx, %esi
-; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT:    shrl %cl, %edx
-; X86-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X86-NEXT:    shrdl %cl, %eax, %edi
+; X86-NEXT:    shrdl %cl, %edx, %edi
 ; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 80(%esp,%eax), %edx
+; X86-NEXT:    movl 84(%esp,%eax), %eax
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shrdl %cl, %edi, %ebx
+; X86-NEXT:    shrl %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-NEXT:    shrdl %cl, %eax, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NEXT:    movl 28(%ebp), %eax
 ; X86-NEXT:    addl $-1, %eax
 ; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NEXT:    movl 32(%ebp), %eax
 ; X86-NEXT:    adcl $-1, %eax
 ; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl 36(%ebp), %esi
-; X86-NEXT:    adcl $-1, %esi
-; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 36(%ebp), %eax
+; X86-NEXT:    adcl $-1, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NEXT:    movl 40(%ebp), %eax
 ; X86-NEXT:    adcl $-1, %eax
 ; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    xorl %eax, %eax
 ; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
 ; X86-NEXT:    .p2align 4
-; X86-NEXT:  .LBB4_3: # %udiv-do-while
+; X86-NEXT:  .LBB4_4: # %udiv-do-while
 ; X86-NEXT:    # =>This Inner Loop Header: Depth=1
-; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X86-NEXT:    shldl $1, %esi, %edx
-; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X86-NEXT:    shldl $1, %edi, %esi
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X86-NEXT:    shldl $1, %ebx, %edi
 ; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT:    shldl $1, %edx, %ebx
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT:    shldl $1, %ecx, %edx
-; X86-NEXT:    orl %eax, %edx
+; X86-NEXT:    shldl $1, %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    shldl $1, %ebx, %edx
 ; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT:    shldl $1, %edx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    shldl $1, %edi, %ebx
+; X86-NEXT:    shldl $1, %ecx, %edi
+; X86-NEXT:    shldl $1, %esi, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X86-NEXT:    orl %eax, %ecx
 ; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT:    shldl $1, %ecx, %edx
-; X86-NEXT:    orl %eax, %edx
-; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    addl %ecx, %ecx
-; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    shldl $1, %ecx, %esi
+; X86-NEXT:    orl %eax, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    shldl $1, %esi, %ecx
+; X86-NEXT:    orl %eax, %ecx
 ; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    cmpl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    addl %esi, %esi
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
 ; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT:    sbbl %edi, %ecx
+; X86-NEXT:    sbbl %ebx, %ecx
 ; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT:    sbbl %esi, %ecx
+; X86-NEXT:    sbbl %edx, %ecx
 ; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
 ; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
 ; X86-NEXT:    sarl $31, %ecx
 ; X86-NEXT:    movl %ecx, %eax
 ; X86-NEXT:    andl $1, %eax
 ; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl %ecx, %eax
-; X86-NEXT:    andl 40(%ebp), %eax
-; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, %esi
+; X86-NEXT:    andl 40(%ebp), %esi
 ; X86-NEXT:    movl %ecx, %eax
 ; X86-NEXT:    andl 36(%ebp), %eax
 ; X86-NEXT:    movl %ecx, %edx
 ; X86-NEXT:    andl 32(%ebp), %edx
 ; X86-NEXT:    andl 28(%ebp), %ecx
-; X86-NEXT:    subl %ecx, %ebx
-; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X86-NEXT:    sbbl %edx, %edi
+; X86-NEXT:    subl %ecx, %edi
 ; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    sbbl %eax, %esi
-; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    sbbl %edx, %ebx
+; X86-NEXT:    sbbl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    sbbl %esi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT:    sbbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
 ; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
 ; X86-NEXT:    addl $-1, %ecx
 ; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X86-NEXT:    adcl $-1, %eax
-; X86-NEXT:    adcl $-1, %ebx
-; X86-NEXT:    adcl $-1, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    adcl $-1, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    adcl $-1, %edi
 ; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    orl %esi, %eax
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl %edi, %eax
 ; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    orl %ebx, %ecx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    orl %edx, %ecx
 ; X86-NEXT:    orl %eax, %ecx
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT:    jne .LBB4_3
-; X86-NEXT:  # %bb.4:
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
 ; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    jne .LBB4_4
+; X86-NEXT:  # %bb.5:
 ; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT:  .LBB4_6: # %udiv-loop-exit
 ; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X86-NEXT:    shldl $1, %edi, %ebx
-; X86-NEXT:    orl %eax, %ebx
-; X86-NEXT:    shldl $1, %esi, %edi
-; X86-NEXT:    orl %eax, %edi
-; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    shldl $1, %ecx, %esi
-; X86-NEXT:    orl %eax, %esi
-; X86-NEXT:    addl %ecx, %ecx
-; X86-NEXT:    orl %edx, %ecx
-; X86-NEXT:    movl 44(%ebp), %eax
-; X86-NEXT:  .LBB4_7: # %udiv-end
-; X86-NEXT:    movl %ecx, (%eax)
-; X86-NEXT:    movl %esi, 4(%eax)
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT:    movl %edx, 8(%eax)
-; X86-NEXT:    movl %ebx, 12(%eax)
-; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    movl 44(%ebp), %edi
+; X86-NEXT:  .LBB4_7: # %udiv-loop-exit
+; X86-NEXT:    shldl $1, %esi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    orl %ecx, %eax
+; X86-NEXT:    shldl $1, %edx, %esi
+; X86-NEXT:    orl %ecx, %esi
+; X86-NEXT:    shldl $1, %ebx, %edx
+; X86-NEXT:    orl %ecx, %edx
+; X86-NEXT:    addl %ebx, %ebx
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:  .LBB4_8: # %udiv-end
 ; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, (%edi)
+; X86-NEXT:    movl %edx, 4(%edi)
+; X86-NEXT:    movl %esi, 8(%edi)
+; X86-NEXT:    movl %eax, 12(%edi)
+; X86-NEXT:    movl %eax, %ecx
 ; X86-NEXT:    movl 36(%ebp), %eax
 ; X86-NEXT:    movl %eax, %esi
 ; X86-NEXT:    imull %edx, %esi
-; X86-NEXT:    mull %ecx
-; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NEXT:    addl %esi, %edx
 ; X86-NEXT:    movl 40(%ebp), %edi
-; X86-NEXT:    imull %ecx, %edi
+; X86-NEXT:    imull %ebx, %edi
 ; X86-NEXT:    addl %edx, %edi
 ; X86-NEXT:    movl 28(%ebp), %eax
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X86-NEXT:    mull %esi
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    imull 28(%ebp), %ebx
-; X86-NEXT:    addl %edx, %ebx
-; X86-NEXT:    movl 32(%ebp), %edx
-; X86-NEXT:    imull %edx, %esi
-; X86-NEXT:    addl %ebx, %esi
-; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    adcl %edi, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    imull 28(%ebp), %ecx
+; X86-NEXT:    addl %edx, %ecx
+; X86-NEXT:    movl 32(%ebp), %eax
+; X86-NEXT:    imull %eax, %ebx
+; X86-NEXT:    addl %ecx, %ebx
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
 ; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    adcl %edi, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    movl %esi, %eax
 ; X86-NEXT:    movl 28(%ebp), %ecx
 ; X86-NEXT:    mull %ecx
-; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl %edx, %edi
 ; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %edx, %ebx
 ; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    addl %esi, %ecx
-; X86-NEXT:    adcl $0, %edx
-; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    addl %edi, %ecx
+; X86-NEXT:    adcl $0, %ebx
+; X86-NEXT:    movl %esi, %eax
 ; X86-NEXT:    mull 32(%ebp)
 ; X86-NEXT:    movl 16(%ebp), %esi
 ; X86-NEXT:    movl %edx, %edi
 ; X86-NEXT:    addl %ecx, %eax
 ; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    adcl %ebx, %edi
 ; X86-NEXT:    setb %cl
-; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X86-NEXT:    mull 32(%ebp)
 ; X86-NEXT:    addl %edi, %eax
 ; X86-NEXT:    movzbl %cl, %ecx
diff --git a/llvm/test/CodeGen/X86/test-shrink-bug.ll b/llvm/test/CodeGen/X86/test-shrink-bug.ll
index 953a0d65c5386..ab28a3b4a2b63 100644
--- a/llvm/test/CodeGen/X86/test-shrink-bug.ll
+++ b/llvm/test/CodeGen/X86/test-shrink-bug.ll
@@ -48,18 +48,17 @@ define dso_local void @fail(i16 %a, <2 x i8> %b) {
 ; CHECK-X86:       ## %bb.0:
 ; CHECK-X86-NEXT:    subl $12, %esp
 ; CHECK-X86-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; CHECK-X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
 ; CHECK-X86-NEXT:    cmpb $123, {{[0-9]+}}(%esp)
-; CHECK-X86-NEXT:    setne %cl
-; CHECK-X86-NEXT:    testl $263, %eax ## imm = 0x107
-; CHECK-X86-NEXT:    setne %al
-; CHECK-X86-NEXT:    testb %cl, %al
-; CHECK-X86-NEXT:    jne LBB1_2
-; CHECK-X86-NEXT:  ## %bb.1: ## %yes
-; CHECK-X86-NEXT:    addl $12, %esp
-; CHECK-X86-NEXT:    retl
-; CHECK-X86-NEXT:  LBB1_2: ## %no
+; CHECK-X86-NEXT:    sete %al
+; CHECK-X86-NEXT:    testl $263, %ecx ## imm = 0x107
+; CHECK-X86-NEXT:    je LBB1_3
+; CHECK-X86-NEXT:  ## %bb.1:
+; CHECK-X86-NEXT:    testb %al, %al
+; CHECK-X86-NEXT:    jne LBB1_3
+; CHECK-X86-NEXT:  ## %bb.2: ## %no
 ; CHECK-X86-NEXT:    calll _bar
+; CHECK-X86-NEXT:  LBB1_3: ## %yes
 ; CHECK-X86-NEXT:    addl $12, %esp
 ; CHECK-X86-NEXT:    retl
 ;
diff --git a/llvm/test/Transforms/SimplifyCFG/ARM/phi-eliminate.ll b/llvm/test/Transforms/SimplifyCFG/ARM/phi-eliminate.ll
index 8a36226851592..f23e27f6d6686 100644
--- a/llvm/test/Transforms/SimplifyCFG/ARM/phi-eliminate.ll
+++ b/llvm/test/Transforms/SimplifyCFG/ARM/phi-eliminate.ll
@@ -241,13 +241,17 @@ define i64 @test_i64(i1 %a, i1 %b, i64 %i, i64 %j, i64 %k) {
 ;
 ; CHECK-V8A-TWO-FOLD-6-LABEL: @test_i64(
 ; CHECK-V8A-TWO-FOLD-6-NEXT:  entry:
+; CHECK-V8A-TWO-FOLD-6-NEXT:    br i1 [[A:%.*]], label [[M:%.*]], label [[O:%.*]]
+; CHECK-V8A-TWO-FOLD-6:       O:
 ; CHECK-V8A-TWO-FOLD-6-NEXT:    [[IAJ:%.*]] = add i64 [[I:%.*]], [[J:%.*]]
 ; CHECK-V8A-TWO-FOLD-6-NEXT:    [[IAJAK:%.*]] = add i64 [[IAJ]], [[K:%.*]]
 ; CHECK-V8A-TWO-FOLD-6-NEXT:    [[IXJ:%.*]] = xor i64 [[I]], [[J]]
 ; CHECK-V8A-TWO-FOLD-6-NEXT:    [[IXJXK:%.*]] = xor i64 [[IXJ]], [[K]]
 ; CHECK-V8A-TWO-FOLD-6-NEXT:    [[WP:%.*]] = select i1 [[B:%.*]], i64 [[IAJAK]], i64 [[IXJXK]]
 ; CHECK-V8A-TWO-FOLD-6-NEXT:    [[WP2:%.*]] = add i64 [[WP]], [[WP]]
-; CHECK-V8A-TWO-FOLD-6-NEXT:    [[W:%.*]] = select i1 [[A:%.*]], i64 2, i64 [[WP2]]
+; CHECK-V8A-TWO-FOLD-6-NEXT:    br label [[M]]
+; CHECK-V8A-TWO-FOLD-6:       M:
+; CHECK-V8A-TWO-FOLD-6-NEXT:    [[W:%.*]] = phi i64 [ [[WP2]], [[O]] ], [ 2, [[ENTRY:%.*]] ]
 ; CHECK-V8A-TWO-FOLD-6-NEXT:    [[R:%.*]] = add i64 [[W]], 1
 ; CHECK-V8A-TWO-FOLD-6-NEXT:    ret i64 [[R]]
 ;
@@ -374,13 +378,17 @@ define i64 @test_i64_minsize(i1 %a, i1 %b, i64 %i, i64 %j, i64 %k) #0 {
 ;
 ; CHECK-V8A-TWO-FOLD-6-LABEL: @test_i64_minsize(
 ; CHECK-V8A-TWO-FOLD-6-NEXT:  entry:
+; CHECK-V8A-TWO-FOLD-6-NEXT:    br i1 [[A:%.*]], label [[M:%.*]], label [[O:%.*]]
+; CHECK-V8A-TWO-FOLD-6:       O:
 ; CHECK-V8A-TWO-FOLD-6-NEXT:    [[IAJ:%.*]] = add i64 [[I:%.*]], [[J:%.*]]
 ; CHECK-V8A-TWO-FOLD-6-NEXT:    [[IAJAK:%.*]] = add i64 [[IAJ]], [[K:%.*]]
 ; CHECK-V8A-TWO-FOLD-6-NEXT:    [[IXJ:%.*]] = xor i64 [[I]], [[J]]
 ; CHECK-V8A-TWO-FOLD-6-NEXT:    [[IXJXK:%.*]] = xor i64 [[IXJ]], [[K]]
 ; CHECK-V8A-TWO-FOLD-6-NEXT:    [[WP:%.*]] = select i1 [[B:%.*]], i64 [[IAJAK]], i64 [[IXJXK]]
 ; CHECK-V8A-TWO-FOLD-6-NEXT:    [[WP2:%.*]] = add i64 [[WP]], [[WP]]
-; CHECK-V8A-TWO-FOLD-6-NEXT:    [[W:%.*]] = select i1 [[A:%.*]], i64 2, i64 [[WP2]]
+; CHECK-V8A-TWO-FOLD-6-NEXT:    br label [[M]]
+; CHECK-V8A-TWO-FOLD-6:       M:
+; CHECK-V8A-TWO-FOLD-6-NEXT:    [[W:%.*]] = phi i64 [ [[WP2]], [[O]] ], [ 2, [[ENTRY:%.*]] ]
 ; CHECK-V8A-TWO-FOLD-6-NEXT:    [[R:%.*]] = add i64 [[W]], 1
 ; CHECK-V8A-TWO-FOLD-6-NEXT:    ret i64 [[R]]
 ;