From 13b88a4fff0e72e84381b200eea2c10c36d81ad8 Mon Sep 17 00:00:00 2001
From: Stephen Canon <scanon@apple.com>
Date: Sat, 31 May 2025 21:10:20 -0400
Subject: [PATCH 1/3] Add doc comments for some concrete SIMD overloads.

Also removed concrete SIMDMask(lowHalf:highHalf:) init, as there is no corresponding generic operation; it was added in error.
---
 .../SIMDFloatConcreteOperations.swift.gyb     | 19 ++++++++
 .../SIMDIntegerConcreteOperations.swift.gyb   | 19 ++++++++
 .../core/SIMDMaskConcreteOperations.swift.gyb | 13 -----
 stdlib/public/core/SIMDVector.swift           | 10 +++-
 stdlib/public/core/SIMDVectorTypes.swift.gyb  | 11 ++++-
 test/stdlib/SIMDMaskInitializers.swift.gyb    | 48 -------------------
 6 files changed, 57 insertions(+), 63 deletions(-)

diff --git a/stdlib/public/core/SIMDFloatConcreteOperations.swift.gyb b/stdlib/public/core/SIMDFloatConcreteOperations.swift.gyb
index 71d2e91ba08ec..be97883a5a673 100644
--- a/stdlib/public/core/SIMDFloatConcreteOperations.swift.gyb
+++ b/stdlib/public/core/SIMDFloatConcreteOperations.swift.gyb
@@ -34,6 +34,15 @@ extension SIMD${n} where Scalar == ${Scalar} {
     _storage = ${Scalar}.SIMD${storageN}Storage(_builtin)
   }
   
+  /// A vector with the specified scalar in all lanes.
+  ///
+  /// Equivalent to:
+  /// ```
+  /// var result = SIMD${n}<${Scalar}>()
+  /// for i in result.indices {
+  ///   result[i] = scalar
+  /// }
+  /// ```
   @_alwaysEmitIntoClient @_transparent
   public init(repeating scalar: ${Scalar}) {
     let asVector = Builtin.insertelement_${Builtin}_FPIEEE${bits}_Int32(
@@ -52,6 +61,16 @@ extension SIMD${n} where Scalar == ${Scalar} {
   }
   
 %  if n >= 4:
+  /// A vector formed by concatenating lowHalf and highHalf.
+  ///
+  /// Equivalent to:
+  /// ```
+  /// var result = SIMD${n}<${Scalar}>()
+  /// for i in 0..<${n//2} {
+  ///   result[i] = lowHalf[i]
+  ///   result[${n//2}+i] = highHalf[i]
+  /// }
+  /// ```
   @_alwaysEmitIntoClient @_transparent
   public init(
     lowHalf: SIMD${n//2}<${Scalar}>,
diff --git a/stdlib/public/core/SIMDIntegerConcreteOperations.swift.gyb b/stdlib/public/core/SIMDIntegerConcreteOperations.swift.gyb
index 1c8ac2c162283..963dfa7510371 100644
--- a/stdlib/public/core/SIMDIntegerConcreteOperations.swift.gyb
+++ b/stdlib/public/core/SIMDIntegerConcreteOperations.swift.gyb
@@ -31,6 +31,15 @@ extension SIMD${n} where Scalar == ${Scalar} {
     _storage = ${Scalar}.SIMD${storageN}Storage(_builtin)
   }
   
+  /// A vector with the specified scalar in all lanes.
+  ///
+  /// Equivalent to:
+  /// ```
+  /// var result = SIMD${n}<${Scalar}>()
+  /// for i in result.indices {
+  ///   result[i] = scalar
+  /// }
+  /// ```
   @_alwaysEmitIntoClient @_transparent
   public init(repeating scalar: ${Scalar}) {
     let asVector = Builtin.insertelement_${Builtin}_Int${int.bits}_Int32(
@@ -49,6 +58,16 @@ extension SIMD${n} where Scalar == ${Scalar} {
   }
   
 %  if n >= 4:
+  /// A vector formed by concatenating lowHalf and highHalf.
+  ///
+  /// Equivalent to:
+  /// ```
+  /// var result = SIMD${n}<${Scalar}>()
+  /// for i in 0..<${n//2} {
+  ///   result[i] = lowHalf[i]
+  ///   result[${n//2}+i] = highHalf[i]
+  /// }
+  /// ```
   @_alwaysEmitIntoClient @_transparent
   public init(
     lowHalf: SIMD${n//2}<${Scalar}>,
diff --git a/stdlib/public/core/SIMDMaskConcreteOperations.swift.gyb b/stdlib/public/core/SIMDMaskConcreteOperations.swift.gyb
index aca6ef52d24bd..58ad6e2f2aa75 100644
--- a/stdlib/public/core/SIMDMaskConcreteOperations.swift.gyb
+++ b/stdlib/public/core/SIMDMaskConcreteOperations.swift.gyb
@@ -36,19 +36,6 @@ extension SIMDMask where Storage == ${Vector} {
     _storage = ${Vector}(repeating: scalar ? -1 : 0)
   }
   
-%  if n >= 4:
-  @_alwaysEmitIntoClient @_transparent
-  public init(
-    lowHalf: SIMDMask<SIMD${n//2}<${Scalar}>>,
-    highHalf: SIMDMask<SIMD${n//2}<${Scalar}>>
-  ) {
-    _storage = ${Vector}(
-      lowHalf: lowHalf._storage,
-      highHalf: highHalf._storage
-    )
-  }
-  
-%  end
   @_alwaysEmitIntoClient
   internal static var allTrue: Self {
     let zero = ${Vector}()
diff --git a/stdlib/public/core/SIMDVector.swift b/stdlib/public/core/SIMDVector.swift
index 77ad4a8c8b5c3..ee83084878b2b 100644
--- a/stdlib/public/core/SIMDVector.swift
+++ b/stdlib/public/core/SIMDVector.swift
@@ -111,7 +111,15 @@ extension SIMD {
     return 0 ..< scalarCount
   }
   
-  /// A vector with the specified value in all lanes.
+  /// A vector with the specified scalar in all lanes.
+  ///
+  /// Equivalent to:
+  /// ```
+  /// var result = Self()
+  /// for i in result.indices {
+  ///   result[i] = scalar
+  /// }
+  /// ```
   @_transparent
   public init(repeating value: Scalar) {
     self.init()
diff --git a/stdlib/public/core/SIMDVectorTypes.swift.gyb b/stdlib/public/core/SIMDVectorTypes.swift.gyb
index 671519e2fd549..24741b8668ef1 100644
--- a/stdlib/public/core/SIMDVectorTypes.swift.gyb
+++ b/stdlib/public/core/SIMDVectorTypes.swift.gyb
@@ -88,7 +88,16 @@ public struct SIMD${n}<Scalar>: SIMD where Scalar: SIMDScalar {
 %  end
 % end
 % if n >= 4:
-  /// Creates a new vector from two half-length vectors.
+  /// A vector formed by concatenating lowHalf and highHalf.
+  ///
+  /// Equivalent to:
+  /// ```
+  /// var result = SIMD${n}<Scalar>()
+  /// for i in 0..<${n//2} {
+  ///   result[i] = lowHalf[i]
+  ///   result[${n//2}+i] = highHalf[i]
+  /// }
+  /// ```
   @_transparent
   public init(lowHalf: SIMD${n//2}<Scalar>, highHalf: SIMD${n//2}<Scalar>) {
     self.init()
diff --git a/test/stdlib/SIMDMaskInitializers.swift.gyb b/test/stdlib/SIMDMaskInitializers.swift.gyb
index b972196382748..226713918c818 100644
--- a/test/stdlib/SIMDMaskInitializers.swift.gyb
+++ b/test/stdlib/SIMDMaskInitializers.swift.gyb
@@ -34,51 +34,3 @@ func repeating${n}_mask${bits}(_ scalar: Bool) -> SIMDMask<SIMD${n}<Int${bits}>>
 %  end
 % end
 %end
-
-func concat8x8(_ a: SIMDMask<SIMD8<Int8>>, _ b: SIMDMask<SIMD8<Int8>>) -> SIMDMask<SIMD16<Int8>> {
-  SIMDMask(lowHalf: a, highHalf: b)
-}
-// CHECK: s20SIMDMaskInitializers9concat8x8ys0A0Vys6SIMD16Vys4Int8VGGADys5SIMD8VyAHGG_ANtF:
-// CHECKO-arm64-NEXT: mov.d v0[1], v1[0]
-// CHECKO-arm64-NEXT: ret
-// CHECKO-x86_64: punpcklqdq
-
-func concat16x8(_ a: SIMDMask<SIMD16<Int8>>, _ b: SIMDMask<SIMD16<Int8>>) -> SIMDMask<SIMD32<Int8>> {
-  SIMDMask(lowHalf: a, highHalf: b)
-}
-// CHECK: s20SIMDMaskInitializers10concat16x8ys0A0Vys6SIMD32Vys4Int8VGGADys6SIMD16VyAHGG_ANtF:
-// CHECKO-arm64-NEXT: ret
-
-func concat4x16(_ a: SIMDMask<SIMD4<Int16>>, _ b: SIMDMask<SIMD4<Int16>>) -> SIMDMask<SIMD8<Int16>> {
-  SIMDMask(lowHalf: a, highHalf: b)
-}
-// CHECK: s20SIMDMaskInitializers10concat4x16ys0A0Vys5SIMD8Vys5Int16VGGADys5SIMD4VyAHGG_ANtF:
-// CHECKO-arm64-NEXT: mov.d v0[1], v1[0]
-// CHECKO-arm64-NEXT: ret
-// CHECKO-x86_64: punpcklqdq
-
-func concat8x16(_ a: SIMDMask<SIMD8<Int16>>, _ b: SIMDMask<SIMD8<Int16>>) -> SIMDMask<SIMD16<Int16>> {
-  SIMDMask(lowHalf: a, highHalf: b)
-}
-// CHECK: s20SIMDMaskInitializers10concat8x16ys0A0Vys6SIMD16Vys5Int16VGGADys5SIMD8VyAHGG_ANtF:
-// CHECKO-arm64-NEXT: ret
-
-func concat2x32(_ a: SIMDMask<SIMD2<Int32>>, _ b: SIMDMask<SIMD2<Int32>>) -> SIMDMask<SIMD4<Int32>> {
-  SIMDMask(lowHalf: a, highHalf: b)
-}
-// CHECK: s20SIMDMaskInitializers10concat2x32ys0A0Vys5SIMD4Vys5Int32VGGADys5SIMD2VyAHGG_ANtF:
-// CHECKO-arm64-NEXT: mov.d v0[1], v1[0]
-// CHECKO-arm64-NEXT: ret
-// CHECKO-x86_64: punpcklqdq
-
-func concat4x32(_ a: SIMDMask<SIMD4<Int32>>, _ b: SIMDMask<SIMD4<Int32>>) -> SIMDMask<SIMD8<Int32>> {
-  SIMDMask(lowHalf: a, highHalf: b)
-}
-// CHECK: s20SIMDMaskInitializers10concat4x32ys0A0Vys5SIMD8Vys5Int32VGGADys5SIMD4VyAHGG_ANtF:
-// CHECKO-arm64-NEXT: ret
-
-func concat2x64(_ a: SIMDMask<SIMD2<Int64>>, _ b: SIMDMask<SIMD2<Int64>>) -> SIMDMask<SIMD4<Int64>> {
-  SIMDMask(lowHalf: a, highHalf: b)
-}
-// CHECK: s20SIMDMaskInitializers10concat2x64ys0A0Vys5SIMD4Vys5Int64VGGADys5SIMD2VyAHGG_ANtF:
-// CHECKO-arm64-NEXT: ret

From 60adcff1d8970db398e32fdba2205dda7e818de5 Mon Sep 17 00:00:00 2001
From: Stephen Canon <scanon@apple.com>
Date: Sun, 1 Jun 2025 10:44:43 -0400
Subject: [PATCH 2/3] Mark concrete SIMD float comparisons transparent

There's no reason for these to ever be calls, so they should be transparent instead of just aEIC. Also adds concrete versions of comparisons with scalars, and filecheck tests to make sure these generate 1-2 instruction sequences in release on arm64 (x86_64 is a little trickier to test due to frame pointers, but if we get the right codgen on arm64, in practice we do well on x86_64 for these too). Also makes filecheck patterns for repeating initializers a bit more robust.
---
 .../SIMDFloatConcreteOperations.swift.gyb     | 100 ++++++-----
 test/stdlib/SIMDFloatComparisons.swift.gyb    | 161 ++++++++++++++++++
 test/stdlib/SIMDFloatInitializers.swift.gyb   |   4 +-
 test/stdlib/SIMDMaskInitializers.swift.gyb    |   2 +-
 test/stdlib/SIMDSignedInitializers.swift.gyb  |   2 +-
 .../stdlib/SIMDUnsignedInitializers.swift.gyb |   2 +-
 6 files changed, 225 insertions(+), 46 deletions(-)
 create mode 100644 test/stdlib/SIMDFloatComparisons.swift.gyb

diff --git a/stdlib/public/core/SIMDFloatConcreteOperations.swift.gyb b/stdlib/public/core/SIMDFloatConcreteOperations.swift.gyb
index be97883a5a673..699c819e12210 100644
--- a/stdlib/public/core/SIMDFloatConcreteOperations.swift.gyb
+++ b/stdlib/public/core/SIMDFloatConcreteOperations.swift.gyb
@@ -80,53 +80,71 @@ extension SIMD${n} where Scalar == ${Scalar} {
   }
   
 %  end
-  /// A vector mask with the result of a pointwise equality comparison.
-  @_alwaysEmitIntoClient
-  public static func .==(a: Self, b: Self) -> SIMDMask<MaskStorage> {
-    SIMDMask<MaskStorage>(${MaskExt}(
-      Builtin.fcmp_oeq_${Builtin}(a._storage._value, b._storage._value)
-    ))
-  }
-  
-  /// A vector mask with the result of a pointwise inequality comparison.
-  @_alwaysEmitIntoClient
-  public static func .!=(a: Self, b: Self) -> SIMDMask<MaskStorage> {
-    SIMDMask<MaskStorage>(${MaskExt}(
-      Builtin.fcmp_une_${Builtin}(a._storage._value, b._storage._value)
-    ))
-  }
-  
-  /// A vector mask with the result of a pointwise less-than comparison.
-  @_alwaysEmitIntoClient
-  public static func .<(a: Self, b: Self) -> SIMDMask<MaskStorage> {
-    SIMDMask<MaskStorage>(${MaskExt}(
-      Builtin.fcmp_olt_${Builtin}(a._storage._value, b._storage._value)
-    ))
-  }
-  
-  /// A vector mask with the result of a pointwise less-than-or-equal-to comparison.
-  @_alwaysEmitIntoClient
-  public static func .<=(a: Self, b: Self) -> SIMDMask<MaskStorage> {
+%{
+compares = [
+  ("==", "oeq", "equal to"),
+  ("!=", "une", "not equal to"),
+  ("<",  "olt", "less than"),
+  ("<=", "ole", "less than or equal to"),
+  (">=", "oge", "greater than or equal to"),
+  (">",  "ogt", "greater than")
+]
+}%
+%  for (op, bi, description) in compares:
+  /// Pointwise compare ${description}.
+  ///
+  /// Each lane of the result is true if that lane of a is ${description} the
+  /// corresponding lane of b, and false otherwise.
+  ///
+  /// Equivalent to:
+  /// ```
+  /// var result = SIMDMask<MaskStorage>()
+  /// for i in 0..<${n} {
+  ///   result[i] = (a[i] ${op} b[i])
+  /// }
+  /// ```
+  @_alwaysEmitIntoClient @_transparent
+  public static func .${op}(a: Self, b: Self) -> SIMDMask<MaskStorage> {
     SIMDMask<MaskStorage>(${MaskExt}(
-      Builtin.fcmp_ole_${Builtin}(a._storage._value, b._storage._value)
+      Builtin.fcmp_${bi}_${Builtin}(a._storage._value, b._storage._value)
     ))
   }
   
-  /// A vector mask with the result of a pointwise greater-than comparison.
-  @_alwaysEmitIntoClient
-  public static func .>(a: Self, b: Self) -> SIMDMask<MaskStorage> {
-    SIMDMask<MaskStorage>(${MaskExt}(
-      Builtin.fcmp_ogt_${Builtin}(a._storage._value, b._storage._value)
-    ))
+  /// Pointwise compare ${description}.
+  ///
+  /// Each lane of the result is true if that lane of a is ${description} b,
+  /// and false otherwise.
+  ///
+  /// Equivalent to:
+  /// ```
+  /// var result = SIMDMask<MaskStorage>()
+  /// for i in 0..<${n} {
+  ///   result[i] = (a[i] ${op} b)
+  /// }
+  /// ```
+  @_alwaysEmitIntoClient @_transparent
+  public static func .${op}(a: Self, b: Scalar) -> SIMDMask<MaskStorage> {
+    a .${op} Self(repeating: b)
   }
-  
-  /// A vector mask with the result of a pointwise greater-than-or-equal-to comparison.
-  @_alwaysEmitIntoClient
-  public static func .>=(a: Self, b: Self) -> SIMDMask<MaskStorage> {
-    SIMDMask<MaskStorage>(${MaskExt}(
-      Builtin.fcmp_oge_${Builtin}(a._storage._value, b._storage._value)
-    ))
+
+  /// Pointwise compare ${description}.
+  ///
+  /// Each lane of the result is true if a is ${description} the corresponding
+  /// lane of b, and false otherwise.
+  ///
+  /// Equivalent to:
+  /// ```
+  /// var result = SIMDMask<MaskStorage>()
+  /// for i in 0..<${n} {
+  ///   result[i] = (a ${op} b[i])
+  /// }
+  /// ```
+  @_alwaysEmitIntoClient @_transparent
+  public static func .${op}(a: Scalar, b: Self) -> SIMDMask<MaskStorage> {
+    Self(repeating: a) .${op} b
   }
+
+%  end
 }
 %  if bits == 16:
 #endif
diff --git a/test/stdlib/SIMDFloatComparisons.swift.gyb b/test/stdlib/SIMDFloatComparisons.swift.gyb
new file mode 100644
index 0000000000000..eafbbb0240ec2
--- /dev/null
+++ b/test/stdlib/SIMDFloatComparisons.swift.gyb
@@ -0,0 +1,161 @@
+//===--- SIMDFloatComparisons.swift.gyb -----------------------*- swift -*-===//
+//
+// This source file is part of the Swift.org open source project
+//
+// Copyright (c) 2025 Apple Inc. and the Swift project authors
+// Licensed under Apache License v2.0 with Runtime Library Exception
+//
+// See https://swift.org/LICENSE.txt for license information
+// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
+//
+//===----------------------------------------------------------------------===//
+// RUN: %empty-directory(%t)
+// RUN: %gyb %s -o %t/SIMDFloatComparisons.swift
+// RUN: %target-swift-frontend -primary-file %t/SIMDFloatComparisons.swift -S | %FileCheck %t/SIMDFloatComparisons.swift --check-prefix=CHECK --check-prefix=CHECK-%target-cpu --check-prefix=CHECKOnone-%target-cpu
+// RUN: %target-swift-frontend -primary-file %t/SIMDFloatComparisons.swift -S -O | %FileCheck %t/SIMDFloatComparisons.swift --check-prefix=CHECK --check-prefix=CHECK-%target-cpu --check-prefix=CHECKO-%target-cpu
+
+import Swift
+
+%for bits in [16,32,64]:
+% scalar = {16:'Float16',32:'Float',64:'Double'}[bits]
+% for totalBits in [64,128]:
+%  n = totalBits // bits
+%  if n != 1:
+%   neonSuffix = str(n) + {8:'b',16:'h',32:'s',64:'d'}[bits]
+%   if bits == 16:
+#if arch(arm64)
+@available(macOS 11.0, iOS 14.0, tvOS 14.0, watchOS 7.0, *)
+%   end
+func compare_eq${n}x${bits}(
+  _ a: SIMD${n}<${scalar}>, _ b: SIMD${n}<${scalar}>
+) -> SIMDMask<SIMD${n}<Int${bits}>> {
+  a .== b
+}
+%   if bits == 16:
+#endif
+// CHECK-arm64: compare_eq${n}x${bits}{{[[:alnum:]_]+}}:
+%   else:
+// CHECK: compare_eq${n}x${bits}{{[[:alnum:]_]+}}:
+%   end
+// CHECKO-arm64-NEXT: fcmeq.${neonSuffix} v0, v0, v1
+// CHECKO-arm64-NEXT: ret
+// CHECKOnone-arm64: fcmeq.${neonSuffix}
+// CHECKOnone-arm64: ret
+// CHECK-x86_64: cmpeqp${'s' if bits == 32 else 'd'}
+// CHECK-x86_64: ret
+
+%   if bits == 16:
+#if arch(arm64)
+@available(macOS 11.0, iOS 14.0, tvOS 14.0, watchOS 7.0, *)
+%   end
+func compare_ne${n}x${bits}(
+  _ a: SIMD${n}<${scalar}>, _ b: SIMD${n}<${scalar}>
+) -> SIMDMask<SIMD${n}<Int${bits}>> {
+  a .!= b
+}
+%   if bits == 16:
+#endif
+// CHECK-arm64: compare_ne${n}x${bits}{{[[:alnum:]_]+}}:
+%   else:
+// CHECK: compare_ne${n}x${bits}{{[[:alnum:]_]+}}:
+%   end
+// CHECKO-arm64-NEXT: fcmeq.${neonSuffix} [[TMP:v[0-9]+]], v0, v1
+// CHECKO-arm64-NEXT: mvn.${totalBits//8}b v0, [[TMP]]
+// CHECKO-arm64-NEXT: ret
+// CHECKOnone-arm64: fcmeq.${neonSuffix}
+// CHECKOnone-arm64: mvn.${totalBits//8}b
+// CHECKOnone-arm64: ret
+// CHECK-x86_64: cmpnep${'s' if bits == 32 else 'd'}
+// CHECK-x86_64: ret
+
+%   if bits == 16:
+#if arch(arm64)
+@available(macOS 11.0, iOS 14.0, tvOS 14.0, watchOS 7.0, *)
+%   end
+func compare_lt${n}x${bits}(
+  _ a: SIMD${n}<${scalar}>, _ b: SIMD${n}<${scalar}>
+) -> SIMDMask<SIMD${n}<Int${bits}>> {
+  a .< b
+}
+%   if bits == 16:
+#endif
+// CHECK-arm64: compare_lt${n}x${bits}{{[[:alnum:]_]+}}:
+%   else:
+// CHECK: compare_lt${n}x${bits}{{[[:alnum:]_]+}}:
+%   end
+// CHECKO-arm64-NEXT: fcmgt.${neonSuffix} v0, v1, v0
+// CHECKO-arm64-NEXT: ret
+// CHECKOnone-arm64: fcmgt.${neonSuffix}
+// CHECKOnone-arm64: ret
+// CHECK-x86_64: cmpltp${'s' if bits == 32 else 'd'}
+// CHECK-x86_64: ret
+
+%   if bits == 16:
+#if arch(arm64)
+@available(macOS 11.0, iOS 14.0, tvOS 14.0, watchOS 7.0, *)
+%   end
+func compare_le${n}x${bits}(
+  _ a: SIMD${n}<${scalar}>, _ b: SIMD${n}<${scalar}>
+) -> SIMDMask<SIMD${n}<Int${bits}>> {
+  a .<= b
+}
+%   if bits == 16:
+#endif
+// CHECK-arm64: compare_le${n}x${bits}{{[[:alnum:]_]+}}:
+%   else:
+// CHECK: compare_le${n}x${bits}{{[[:alnum:]_]+}}:
+%   end
+// CHECKO-arm64-NEXT: fcmge.${neonSuffix} v0, v1, v0
+// CHECKO-arm64-NEXT: ret
+// CHECKOnone-arm64: fcmge.${neonSuffix}
+// CHECKOnone-arm64: ret
+// CHECK-x86_64: cmplep${'s' if bits == 32 else 'd'}
+// CHECK-x86_64: ret
+
+%   if bits == 16:
+#if arch(arm64)
+@available(macOS 11.0, iOS 14.0, tvOS 14.0, watchOS 7.0, *)
+%   end
+func compare_ge${n}x${bits}(
+  _ a: SIMD${n}<${scalar}>, _ b: SIMD${n}<${scalar}>
+) -> SIMDMask<SIMD${n}<Int${bits}>> {
+  a .>= b
+}
+%   if bits == 16:
+#endif
+// CHECK-arm64: compare_ge${n}x${bits}{{[[:alnum:]_]+}}:
+%   else:
+// CHECK: compare_ge${n}x${bits}{{[[:alnum:]_]+}}:
+%   end
+// CHECKO-arm64-NEXT: fcmge.${neonSuffix} v0, v0, v1
+// CHECKO-arm64-NEXT: ret
+// CHECKOnone-arm64: fcmge.${neonSuffix}
+// CHECKOnone-arm64: ret
+// CHECK-x86_64: cmplep${'s' if bits == 32 else 'd'}
+// CHECK-x86_64: ret
+
+%   if bits == 16:
+#if arch(arm64)
+@available(macOS 11.0, iOS 14.0, tvOS 14.0, watchOS 7.0, *)
+%   end
+func compare_gt${n}x${bits}(
+  _ a: SIMD${n}<${scalar}>, _ b: SIMD${n}<${scalar}>
+) -> SIMDMask<SIMD${n}<Int${bits}>> {
+  a .> b
+}
+%   if bits == 16:
+#endif
+// CHECK-arm64: compare_gt${n}x${bits}{{[[:alnum:]_]+}}:
+%   else:
+// CHECK: compare_gt${n}x${bits}{{[[:alnum:]_]+}}:
+%   end
+// CHECKO-arm64-NEXT: fcmgt.${neonSuffix} v0, v0, v1
+// CHECKO-arm64-NEXT: ret
+// CHECKOnone-arm64: fcmgt.${neonSuffix}
+// CHECKOnone-arm64: ret
+// CHECK-x86_64: cmpltp${'s' if bits == 32 else 'd'}
+// CHECK-x86_64: ret
+
+%  end
+% end
+%end
diff --git a/test/stdlib/SIMDFloatInitializers.swift.gyb b/test/stdlib/SIMDFloatInitializers.swift.gyb
index c535d57bf1de8..1323113326429 100644
--- a/test/stdlib/SIMDFloatInitializers.swift.gyb
+++ b/test/stdlib/SIMDFloatInitializers.swift.gyb
@@ -31,9 +31,9 @@ func repeating${n}x${bits}(_ scalar: ${scalar}) -> SIMD${n}<${scalar}> {
 }
 %   if bits == 16:
 #endif
-// CHECK-arm64: repeating${n}x${bits}{{[[:alnum:]]+}}:
+// CHECK-arm64: repeating${n}x${bits}{{[[:alnum:]_]+}}:
 %   else:
-// CHECK: repeating${n}x${bits}{{[[:alnum:]]+}}:
+// CHECK: repeating${n}x${bits}{{[[:alnum:]_]+}}:
 %   end
 // CHECKO-arm64-NEXT: dup.${neonSuffix} v0, v0[0]
 // CHECKO-arm64-NEXT: ret
diff --git a/test/stdlib/SIMDMaskInitializers.swift.gyb b/test/stdlib/SIMDMaskInitializers.swift.gyb
index 226713918c818..ff1c89100ff60 100644
--- a/test/stdlib/SIMDMaskInitializers.swift.gyb
+++ b/test/stdlib/SIMDMaskInitializers.swift.gyb
@@ -24,7 +24,7 @@ import Swift
 func repeating${n}_mask${bits}(_ scalar: Bool) -> SIMDMask<SIMD${n}<Int${bits}>> {
   SIMDMask(repeating: scalar)
 }
-// CHECK: repeating${n}_mask${bits}{{[[:alnum:]]+}}:
+// CHECK: repeating${n}_mask${bits}{{[[:alnum:]_]+}}:
 // CHECKO-arm64-NEXT: sbfx [[REG:[wx][0-9]]], {{[wx]}}0, #0, #1
 // CHECKO-arm64-NEXT: dup.${neonSuffix} v0, [[REG]]
 // CHECKO-arm64-NEXT: ret
diff --git a/test/stdlib/SIMDSignedInitializers.swift.gyb b/test/stdlib/SIMDSignedInitializers.swift.gyb
index d6b16ca6a5c13..15cc5bfb0bae7 100644
--- a/test/stdlib/SIMDSignedInitializers.swift.gyb
+++ b/test/stdlib/SIMDSignedInitializers.swift.gyb
@@ -25,7 +25,7 @@ import Swift
 func repeating${n}_int${bits}(_ scalar: Int${bits}) -> SIMD${n}<Int${bits}> {
   SIMD${n}(repeating: scalar)
 }
-// CHECK: repeating${n}_int${bits}{{[[:alnum:]]+}}:
+// CHECK: repeating${n}_int${bits}{{[[:alnum:]_]+}}:
 // CHECKO-arm64-NEXT: dup.${neonSuffix} v0, {{[wx]}}0
 // CHECKO-arm64-NEXT: ret
 // CHECKOnone-arm64: dup.${neonSuffix}
diff --git a/test/stdlib/SIMDUnsignedInitializers.swift.gyb b/test/stdlib/SIMDUnsignedInitializers.swift.gyb
index 191b75d7d29fb..eb0166958dc60 100644
--- a/test/stdlib/SIMDUnsignedInitializers.swift.gyb
+++ b/test/stdlib/SIMDUnsignedInitializers.swift.gyb
@@ -25,7 +25,7 @@ import Swift
 func repeating${n}_uint${bits}(_ scalar: UInt${bits}) -> SIMD${n}<UInt${bits}> {
   SIMD${n}(repeating: scalar)
 }
-// CHECK: repeating${n}_uint${bits}{{[[:alnum:]]+}}:
+// CHECK: repeating${n}_uint${bits}{{[[:alnum:]_]+}}:
 // CHECKO-arm64-NEXT: dup.${neonSuffix} v0, {{[wx]}}0
 // CHECKO-arm64-NEXT: ret
 // CHECKOnone-arm64: dup.${neonSuffix}

From 355692bf18db709c71cc951e90611c8127fac967 Mon Sep 17 00:00:00 2001
From: Stephen Canon <scanon@apple.com>
Date: Sun, 1 Jun 2025 15:41:19 -0400
Subject: [PATCH 3/3] Fixup x86_64 tests for float16.

---
 test/stdlib/SIMDFloatComparisons.swift.gyb | 24 +++++++++++-----------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/test/stdlib/SIMDFloatComparisons.swift.gyb b/test/stdlib/SIMDFloatComparisons.swift.gyb
index eafbbb0240ec2..9c1718206c3db 100644
--- a/test/stdlib/SIMDFloatComparisons.swift.gyb
+++ b/test/stdlib/SIMDFloatComparisons.swift.gyb
@@ -36,13 +36,13 @@ func compare_eq${n}x${bits}(
 // CHECK-arm64: compare_eq${n}x${bits}{{[[:alnum:]_]+}}:
 %   else:
 // CHECK: compare_eq${n}x${bits}{{[[:alnum:]_]+}}:
+// CHECK-x86_64: cmpeqp${'s' if bits == 32 else 'd'}
+// CHECK-x86_64: ret
 %   end
 // CHECKO-arm64-NEXT: fcmeq.${neonSuffix} v0, v0, v1
 // CHECKO-arm64-NEXT: ret
 // CHECKOnone-arm64: fcmeq.${neonSuffix}
 // CHECKOnone-arm64: ret
-// CHECK-x86_64: cmpeqp${'s' if bits == 32 else 'd'}
-// CHECK-x86_64: ret
 
 %   if bits == 16:
 #if arch(arm64)
@@ -58,6 +58,8 @@ func compare_ne${n}x${bits}(
 // CHECK-arm64: compare_ne${n}x${bits}{{[[:alnum:]_]+}}:
 %   else:
 // CHECK: compare_ne${n}x${bits}{{[[:alnum:]_]+}}:
+// CHECK-x86_64: cmpneqp${'s' if bits == 32 else 'd'}
+// CHECK-x86_64: ret
 %   end
 // CHECKO-arm64-NEXT: fcmeq.${neonSuffix} [[TMP:v[0-9]+]], v0, v1
 // CHECKO-arm64-NEXT: mvn.${totalBits//8}b v0, [[TMP]]
@@ -65,8 +67,6 @@ func compare_ne${n}x${bits}(
 // CHECKOnone-arm64: fcmeq.${neonSuffix}
 // CHECKOnone-arm64: mvn.${totalBits//8}b
 // CHECKOnone-arm64: ret
-// CHECK-x86_64: cmpnep${'s' if bits == 32 else 'd'}
-// CHECK-x86_64: ret
 
 %   if bits == 16:
 #if arch(arm64)
@@ -82,13 +82,13 @@ func compare_lt${n}x${bits}(
 // CHECK-arm64: compare_lt${n}x${bits}{{[[:alnum:]_]+}}:
 %   else:
 // CHECK: compare_lt${n}x${bits}{{[[:alnum:]_]+}}:
+// CHECK-x86_64: cmpltp${'s' if bits == 32 else 'd'}
+// CHECK-x86_64: ret
 %   end
 // CHECKO-arm64-NEXT: fcmgt.${neonSuffix} v0, v1, v0
 // CHECKO-arm64-NEXT: ret
 // CHECKOnone-arm64: fcmgt.${neonSuffix}
 // CHECKOnone-arm64: ret
-// CHECK-x86_64: cmpltp${'s' if bits == 32 else 'd'}
-// CHECK-x86_64: ret
 
 %   if bits == 16:
 #if arch(arm64)
@@ -104,13 +104,13 @@ func compare_le${n}x${bits}(
 // CHECK-arm64: compare_le${n}x${bits}{{[[:alnum:]_]+}}:
 %   else:
 // CHECK: compare_le${n}x${bits}{{[[:alnum:]_]+}}:
+// CHECK-x86_64: cmplep${'s' if bits == 32 else 'd'}
+// CHECK-x86_64: ret
 %   end
 // CHECKO-arm64-NEXT: fcmge.${neonSuffix} v0, v1, v0
 // CHECKO-arm64-NEXT: ret
 // CHECKOnone-arm64: fcmge.${neonSuffix}
 // CHECKOnone-arm64: ret
-// CHECK-x86_64: cmplep${'s' if bits == 32 else 'd'}
-// CHECK-x86_64: ret
 
 %   if bits == 16:
 #if arch(arm64)
@@ -126,13 +126,13 @@ func compare_ge${n}x${bits}(
 // CHECK-arm64: compare_ge${n}x${bits}{{[[:alnum:]_]+}}:
 %   else:
 // CHECK: compare_ge${n}x${bits}{{[[:alnum:]_]+}}:
+// CHECK-x86_64: cmplep${'s' if bits == 32 else 'd'}
+// CHECK-x86_64: ret
 %   end
 // CHECKO-arm64-NEXT: fcmge.${neonSuffix} v0, v0, v1
 // CHECKO-arm64-NEXT: ret
 // CHECKOnone-arm64: fcmge.${neonSuffix}
 // CHECKOnone-arm64: ret
-// CHECK-x86_64: cmplep${'s' if bits == 32 else 'd'}
-// CHECK-x86_64: ret
 
 %   if bits == 16:
 #if arch(arm64)
@@ -148,13 +148,13 @@ func compare_gt${n}x${bits}(
 // CHECK-arm64: compare_gt${n}x${bits}{{[[:alnum:]_]+}}:
 %   else:
 // CHECK: compare_gt${n}x${bits}{{[[:alnum:]_]+}}:
+// CHECK-x86_64: cmpltp${'s' if bits == 32 else 'd'}
+// CHECK-x86_64: ret
 %   end
 // CHECKO-arm64-NEXT: fcmgt.${neonSuffix} v0, v0, v1
 // CHECKO-arm64-NEXT: ret
 // CHECKOnone-arm64: fcmgt.${neonSuffix}
 // CHECKOnone-arm64: ret
-// CHECK-x86_64: cmpltp${'s' if bits == 32 else 'd'}
-// CHECK-x86_64: ret
 
 %  end
 % end