-
Notifications
You must be signed in to change notification settings - Fork 14.5k
[bazel] Update after db7888ca9aef6c203b363bbb395549b4e6cfa9d4 (#146732) #147726
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-mc @llvm/pr-subscribers-llvm-ir Author: None (DeanSturtevant1) ChangesPatch is 142.46 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/147726.diff 44 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index fb358297a5eed..a5ee8013adff6 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -429,6 +429,7 @@ TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x32_fp8_fp8, "V16fV2iV4iV16fiIiI
TARGET_BUILTIN(__builtin_amdgcn_cvt_f32_bf8, "fiIi", "nc", "fp8-conversion-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_f32_fp8, "fiIi", "nc", "fp8-conversion-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_f32_fp8_e5m3, "fiIi", "nc", "fp8e5m3-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f32_bf8, "V2fiIb", "nc", "fp8-conversion-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f32_fp8, "V2fiIb", "nc", "fp8-conversion-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_bf8_f32, "iffiIb", "nc", "fp8-conversion-insts")
diff --git a/clang/test/CodeGenOpenCL/amdgpu-features.cl b/clang/test/CodeGenOpenCL/amdgpu-features.cl
index dc7a83002b7f1..77d2414230cf2 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-features.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-features.cl
@@ -108,7 +108,7 @@
// GFX1153: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
// GFX1200: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
// GFX1201: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
-// GFX1250: "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bitop3-insts,+ci-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+permlane16-swap,+prng-inst,+setprio-inc-wg-inst,+transpose-load-f4f6-insts,+wavefrontsize32"
+// GFX1250: "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bitop3-insts,+ci-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+permlane16-swap,+prng-inst,+setprio-inc-wg-inst,+transpose-load-f4f6-insts,+wavefrontsize32"
// GFX1103-W64: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize64"
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
index 4d4afedae3658..421099d3876e3 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
@@ -139,3 +139,41 @@ void test_cvt_pk_f16_bf8(global half2* out, short a)
{
out[0] = __builtin_amdgcn_cvt_pk_f16_bf8(a);
}
+
+// CHECK-LABEL: @test_cvt_f32_fp8_e5m3(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
+// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.amdgcn.cvt.f32.fp8.e5m3(i32 [[TMP0]], i32 0)
+// CHECK-NEXT: [[CONV:%.*]] = fptosi float [[TMP1]] to i32
+// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[CONV]], ptr addrspace(1) [[TMP2]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.cvt.f32.fp8.e5m3(i32 [[TMP3]], i32 1)
+// CHECK-NEXT: [[CONV1:%.*]] = fptosi float [[TMP4]] to i32
+// CHECK-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[CONV1]], ptr addrspace(1) [[TMP5]], align 4
+// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP7:%.*]] = call float @llvm.amdgcn.cvt.f32.fp8.e5m3(i32 [[TMP6]], i32 2)
+// CHECK-NEXT: [[CONV2:%.*]] = fptosi float [[TMP7]] to i32
+// CHECK-NEXT: [[TMP8:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[CONV2]], ptr addrspace(1) [[TMP8]], align 4
+// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.cvt.f32.fp8.e5m3(i32 [[TMP9]], i32 3)
+// CHECK-NEXT: [[CONV3:%.*]] = fptosi float [[TMP10]] to i32
+// CHECK-NEXT: [[TMP11:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[CONV3]], ptr addrspace(1) [[TMP11]], align 4
+// CHECK-NEXT: ret void
+//
+void test_cvt_f32_fp8_e5m3(global int* out, int a)
+{
+ *out = __builtin_amdgcn_cvt_f32_fp8_e5m3(a, 0);
+ *out = __builtin_amdgcn_cvt_f32_fp8_e5m3(a, 1);
+ *out = __builtin_amdgcn_cvt_f32_fp8_e5m3(a, 2);
+ *out = __builtin_amdgcn_cvt_f32_fp8_e5m3(a, 3);
+}
diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-param.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-param.cl
index 3ba0d50e79031..7494c4f984353 100644
--- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-param.cl
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-param.cl
@@ -27,3 +27,8 @@ void test_amdgcn_tensor_load_store(v4i sg0, v8i sg1, v4i sg2, v4i sg3, int cpol)
__builtin_amdgcn_tensor_store_from_lds(sg0, sg1, sg2, sg3, cpol); // expected-error {{'__builtin_amdgcn_tensor_store_from_lds' must be a constant integer}}
__builtin_amdgcn_tensor_store_from_lds_d2(sg0, sg1, cpol); // expected-error {{'__builtin_amdgcn_tensor_store_from_lds_d2' must be a constant integer}}
}
+
+void test_cvt_f32_fp8_e5m3(global int* out, int a)
+{
+ *out = __builtin_amdgcn_cvt_f32_fp8_e5m3(a, a); // expected-error {{'__builtin_amdgcn_cvt_f32_fp8_e5m3' must be a constant integer}}
+}
diff --git a/lldb/source/Host/windows/MainLoopWindows.cpp b/lldb/source/Host/windows/MainLoopWindows.cpp
index b3322e8b3ae59..a1de895c0ba98 100644
--- a/lldb/source/Host/windows/MainLoopWindows.cpp
+++ b/lldb/source/Host/windows/MainLoopWindows.cpp
@@ -12,16 +12,16 @@
#include "lldb/Host/windows/windows.h"
#include "lldb/Utility/Status.h"
#include "llvm/Config/llvm-config.h"
-#include "llvm/Support/Casting.h"
#include "llvm/Support/WindowsError.h"
#include <algorithm>
#include <cassert>
-#include <cerrno>
-#include <csignal>
#include <ctime>
#include <io.h>
+#include <synchapi.h>
#include <thread>
#include <vector>
+#include <winbase.h>
+#include <winerror.h>
#include <winsock2.h>
using namespace lldb;
@@ -42,11 +42,12 @@ namespace {
class PipeEvent : public MainLoopWindows::IOEvent {
public:
explicit PipeEvent(HANDLE handle)
- : IOEvent(CreateEventW(NULL, /*bManualReset=*/FALSE,
+ : IOEvent(CreateEventW(NULL, /*bManualReset=*/TRUE,
/*bInitialState=*/FALSE, NULL)),
- m_handle(handle), m_ready(CreateEventW(NULL, /*bManualReset=*/FALSE,
+ m_handle(handle), m_ready(CreateEventW(NULL, /*bManualReset=*/TRUE,
/*bInitialState=*/FALSE, NULL)) {
assert(m_event && m_ready);
+ m_monitor_thread = std::thread(&PipeEvent::Monitor, this);
}
~PipeEvent() override {
@@ -65,15 +66,27 @@ class PipeEvent : public MainLoopWindows::IOEvent {
}
void WillPoll() override {
- if (!m_monitor_thread.joinable())
- m_monitor_thread = std::thread(&PipeEvent::Monitor, this);
+ if (WaitForSingleObject(m_event, /*dwMilliseconds=*/0) != WAIT_TIMEOUT) {
+ // The thread has already signalled that the data is available. No need
+ // for further polling until we consume that event.
+ return;
+ }
+ if (WaitForSingleObject(m_ready, /*dwMilliseconds=*/0) != WAIT_TIMEOUT) {
+ // The thread is already waiting for data to become available.
+ return;
+ }
+ // Start waiting.
+ SetEvent(m_ready);
}
- void Disarm() override { SetEvent(m_ready); }
+ void Disarm() override { ResetEvent(m_event); }
/// Monitors the handle performing a zero byte read to determine when data is
/// avaiable.
void Monitor() {
+ // Wait until the MainLoop tells us to start.
+ WaitForSingleObject(m_ready, INFINITE);
+
do {
char buf[1];
DWORD bytes_read = 0;
@@ -110,7 +123,11 @@ class PipeEvent : public MainLoopWindows::IOEvent {
continue;
}
+ // Notify that data is available on the pipe. It's important to set this
+ // before clearing m_ready to avoid a race with WillPoll.
SetEvent(m_event);
+ // Stop polling until we're told to resume.
+ ResetEvent(m_ready);
// Wait until the current read is consumed before doing the next read.
WaitForSingleObject(m_ready, INFINITE);
diff --git a/lldb/unittests/Host/MainLoopTest.cpp b/lldb/unittests/Host/MainLoopTest.cpp
index 502028ae1a343..30585d12fe81d 100644
--- a/lldb/unittests/Host/MainLoopTest.cpp
+++ b/lldb/unittests/Host/MainLoopTest.cpp
@@ -10,6 +10,7 @@
#include "TestingSupport/SubsystemRAII.h"
#include "lldb/Host/ConnectionFileDescriptor.h"
#include "lldb/Host/FileSystem.h"
+#include "lldb/Host/MainLoopBase.h"
#include "lldb/Host/PseudoTerminal.h"
#include "lldb/Host/common/TCPSocket.h"
#include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX
@@ -64,7 +65,7 @@ class MainLoopTest : public testing::Test {
};
} // namespace
-TEST_F(MainLoopTest, ReadObject) {
+TEST_F(MainLoopTest, ReadSocketObject) {
char X = 'X';
size_t len = sizeof(X);
ASSERT_TRUE(socketpair[0]->Write(&X, len).Success());
@@ -101,6 +102,144 @@ TEST_F(MainLoopTest, ReadPipeObject) {
ASSERT_EQ(1u, callback_count);
}
+TEST_F(MainLoopTest, MultipleReadsPipeObject) {
+ Pipe pipe;
+
+ ASSERT_TRUE(pipe.CreateNew().Success());
+
+ MainLoop loop;
+
+ std::future<void> async_writer = std::async(std::launch::async, [&] {
+ for (int i = 0; i < 5; ++i) {
+ std::this_thread::sleep_for(std::chrono::milliseconds(500));
+ char X = 'X';
+ size_t len = sizeof(X);
+ ASSERT_THAT_EXPECTED(pipe.Write(&X, len), llvm::HasValue(1));
+ }
+ });
+
+ Status error;
+ lldb::FileSP file = std::make_shared<NativeFile>(
+ pipe.GetReadFileDescriptor(), File::eOpenOptionReadOnly, false);
+ auto handle = loop.RegisterReadObject(
+ file,
+ [&](MainLoopBase &loop) {
+ callback_count++;
+ if (callback_count == 5)
+ loop.RequestTermination();
+
+ // Read some data to ensure the handle is not in a readable state.
+ char buf[1024] = {0};
+ size_t len = sizeof(buf);
+ ASSERT_THAT_ERROR(file->Read(buf, len).ToError(), llvm::Succeeded());
+ EXPECT_EQ(len, 1);
+ EXPECT_EQ(buf[0], 'X');
+ },
+ error);
+ ASSERT_TRUE(error.Success());
+ ASSERT_TRUE(handle);
+ ASSERT_TRUE(loop.Run().Success());
+ ASSERT_EQ(5u, callback_count);
+ async_writer.wait();
+}
+
+TEST_F(MainLoopTest, PipeDelayBetweenRegisterAndRun) {
+ Pipe pipe;
+
+ ASSERT_TRUE(pipe.CreateNew().Success());
+
+ MainLoop loop;
+
+ Status error;
+ lldb::FileSP file = std::make_shared<NativeFile>(
+ pipe.GetReadFileDescriptor(), File::eOpenOptionReadOnly, false);
+ auto handle = loop.RegisterReadObject(
+ file,
+ [&](MainLoopBase &loop) {
+ callback_count++;
+
+ // Read some data to ensure the handle is not in a readable state.
+ char buf[1024] = {0};
+ size_t len = sizeof(buf);
+ ASSERT_THAT_ERROR(file->Read(buf, len).ToError(), llvm::Succeeded());
+ EXPECT_EQ(len, 2);
+ EXPECT_EQ(buf[0], 'X');
+ EXPECT_EQ(buf[1], 'X');
+ },
+ error);
+ auto cb = [&](MainLoopBase &) {
+ callback_count++;
+ char X = 'X';
+ size_t len = sizeof(X);
+ // Write twice and ensure we coalesce into a single read.
+ ASSERT_THAT_EXPECTED(pipe.Write(&X, len), llvm::HasValue(1));
+ ASSERT_THAT_EXPECTED(pipe.Write(&X, len), llvm::HasValue(1));
+ };
+ // Add a write that triggers a read events.
+ loop.AddCallback(cb, std::chrono::milliseconds(500));
+ loop.AddCallback([](MainLoopBase &loop) { loop.RequestTermination(); },
+ std::chrono::milliseconds(1000));
+ ASSERT_TRUE(error.Success());
+ ASSERT_TRUE(handle);
+
+ // Write between RegisterReadObject / Run should NOT invoke the callback.
+ cb(loop);
+ ASSERT_EQ(1u, callback_count);
+
+ ASSERT_TRUE(loop.Run().Success());
+ ASSERT_EQ(4u, callback_count);
+}
+
+TEST_F(MainLoopTest, NoSelfTriggersDuringPipeHandler) {
+ Pipe pipe;
+
+ ASSERT_TRUE(pipe.CreateNew().Success());
+
+ MainLoop loop;
+
+ Status error;
+ lldb::FileSP file = std::make_shared<NativeFile>(
+ pipe.GetReadFileDescriptor(), File::eOpenOptionReadOnly, false);
+ auto handle = loop.RegisterReadObject(
+ file,
+ [&](MainLoopBase &lop) {
+ callback_count++;
+
+ char X = 'Y';
+ size_t len = sizeof(X);
+ // writes / reads during the handler callback should NOT trigger itself.
+ ASSERT_THAT_EXPECTED(pipe.Write(&X, len), llvm::HasValue(1));
+
+ char buf[1024] = {0};
+ len = sizeof(buf);
+ ASSERT_THAT_ERROR(file->Read(buf, len).ToError(), llvm::Succeeded());
+ EXPECT_EQ(len, 2);
+ EXPECT_EQ(buf[0], 'X');
+ EXPECT_EQ(buf[1], 'Y');
+
+ if (callback_count == 2)
+ loop.RequestTermination();
+ },
+ error);
+ // Add a write that triggers a read event.
+ loop.AddPendingCallback([&](MainLoopBase &) {
+ char X = 'X';
+ size_t len = sizeof(X);
+ ASSERT_THAT_EXPECTED(pipe.Write(&X, len), llvm::HasValue(1));
+ });
+ loop.AddCallback(
+ [&](MainLoopBase &) {
+ char X = 'X';
+ size_t len = sizeof(X);
+ ASSERT_THAT_EXPECTED(pipe.Write(&X, len), llvm::HasValue(1));
+ },
+ std::chrono::milliseconds(500));
+ ASSERT_TRUE(error.Success());
+ ASSERT_TRUE(handle);
+ ASSERT_TRUE(loop.Run().Success());
+ ASSERT_EQ(2u, callback_count);
+}
+
TEST_F(MainLoopTest, NoSpuriousPipeReads) {
Pipe pipe;
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index c43870392361d..98b793aace7a3 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1702,12 +1702,13 @@ class TargetTransformInfo {
/// unordered-atomic memory intrinsic.
LLVM_ABI unsigned getAtomicMemIntrinsicMaxElementSize() const;
- /// \returns A value which is the result of the given memory intrinsic. New
- /// instructions may be created to extract the result from the given intrinsic
- /// memory operation. Returns nullptr if the target cannot create a result
- /// from the given intrinsic.
- LLVM_ABI Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
- Type *ExpectedType) const;
+ /// \returns A value which is the result of the given memory intrinsic. If \p
+ /// CanCreate is true, new instructions may be created to extract the result
+ /// from the given intrinsic memory operation. Returns nullptr if the target
+ /// cannot create a result from the given intrinsic.
+ LLVM_ABI Value *
+ getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType,
+ bool CanCreate = true) const;
/// \returns The type to use in a loop expansion of a memcpy call.
LLVM_ABI Type *getMemcpyLoopLoweringType(
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 12f87226c5f57..ddc8a5eaffa94 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -983,8 +983,9 @@ class TargetTransformInfoImplBase {
return 0;
}
- virtual Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
- Type *ExpectedType) const {
+ virtual Value *
+ getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType,
+ bool CanCreate = true) const {
return nullptr;
}
diff --git a/llvm/include/llvm/CodeGen/SDPatternMatch.h b/llvm/include/llvm/CodeGen/SDPatternMatch.h
index 7c5cdbbeb0ca8..2967532226197 100644
--- a/llvm/include/llvm/CodeGen/SDPatternMatch.h
+++ b/llvm/include/llvm/CodeGen/SDPatternMatch.h
@@ -655,6 +655,21 @@ struct MaxMin_match {
template <typename MatchContext>
bool match(const MatchContext &Ctx, SDValue N) {
+ auto MatchMinMax = [&](SDValue L, SDValue R, SDValue TrueValue,
+ SDValue FalseValue, ISD::CondCode CC) {
+ if ((TrueValue != L || FalseValue != R) &&
+ (TrueValue != R || FalseValue != L))
+ return false;
+
+ ISD::CondCode Cond =
+ TrueValue == L ? CC : getSetCCInverse(CC, L.getValueType());
+ if (!Pred_t::match(Cond))
+ return false;
+
+ return (LHS.match(Ctx, L) && RHS.match(Ctx, R)) ||
+ (Commutable && LHS.match(Ctx, R) && RHS.match(Ctx, L));
+ };
+
if (sd_context_match(N, Ctx, m_Opc(ISD::SELECT)) ||
sd_context_match(N, Ctx, m_Opc(ISD::VSELECT))) {
EffectiveOperands<ExcludeChain> EO_SELECT(N, Ctx);
@@ -670,23 +685,22 @@ struct MaxMin_match {
SDValue R = Cond->getOperand(EO_SETCC.FirstIndex + 1);
auto *CondNode =
cast<CondCodeSDNode>(Cond->getOperand(EO_SETCC.FirstIndex + 2));
-
- if ((TrueValue != L || FalseValue != R) &&
- (TrueValue != R || FalseValue != L)) {
- return false;
- }
-
- ISD::CondCode Cond =
- TrueValue == L ? CondNode->get()
- : getSetCCInverse(CondNode->get(), L.getValueType());
- if (!Pred_t::match(Cond)) {
- return false;
- }
- return (LHS.match(Ctx, L) && RHS.match(Ctx, R)) ||
- (Commutable && LHS.match(Ctx, R) && RHS.match(Ctx, L));
+ return MatchMinMax(L, R, TrueValue, FalseValue, CondNode->get());
}
}
+ if (sd_context_match(N, Ctx, m_Opc(ISD::SELECT_CC))) {
+ EffectiveOperands<ExcludeChain> EO_SELECT(N, Ctx);
+ assert(EO_SELECT.Size == 5);
+ SDValue L = N->getOperand(EO_SELECT.FirstIndex);
+ SDValue R = N->getOperand(EO_SELECT.FirstIndex + 1);
+ SDValue TrueValue = N->getOperand(EO_SELECT.FirstIndex + 2);
+ SDValue FalseValue = N->getOperand(EO_SELECT.FirstIndex + 3);
+ auto *CondNode =
+ cast<CondCodeSDNode>(N->getOperand(EO_SELECT.FirstIndex + 4));
+ return MatchMinMax(L, R, TrueValue, FalseValue, CondNode->get());
+ }
+
return false;
}
};
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index f7a9b65854696..16885f331e9dd 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@...
[truncated]
|
@llvm/pr-subscribers-llvm-transforms Author: None (DeanSturtevant1) ChangesPatch is 142.46 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/147726.diff 44 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index fb358297a5eed..a5ee8013adff6 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -429,6 +429,7 @@ TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x32_fp8_fp8, "V16fV2iV4iV16fiIiI
TARGET_BUILTIN(__builtin_amdgcn_cvt_f32_bf8, "fiIi", "nc", "fp8-conversion-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_f32_fp8, "fiIi", "nc", "fp8-conversion-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_f32_fp8_e5m3, "fiIi", "nc", "fp8e5m3-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f32_bf8, "V2fiIb", "nc", "fp8-conversion-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f32_fp8, "V2fiIb", "nc", "fp8-conversion-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_bf8_f32, "iffiIb", "nc", "fp8-conversion-insts")
diff --git a/clang/test/CodeGenOpenCL/amdgpu-features.cl b/clang/test/CodeGenOpenCL/amdgpu-features.cl
index dc7a83002b7f1..77d2414230cf2 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-features.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-features.cl
@@ -108,7 +108,7 @@
// GFX1153: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
// GFX1200: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
// GFX1201: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
-// GFX1250: "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bitop3-insts,+ci-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+permlane16-swap,+prng-inst,+setprio-inc-wg-inst,+transpose-load-f4f6-insts,+wavefrontsize32"
+// GFX1250: "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bitop3-insts,+ci-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+permlane16-swap,+prng-inst,+setprio-inc-wg-inst,+transpose-load-f4f6-insts,+wavefrontsize32"
// GFX1103-W64: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize64"
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
index 4d4afedae3658..421099d3876e3 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
@@ -139,3 +139,41 @@ void test_cvt_pk_f16_bf8(global half2* out, short a)
{
out[0] = __builtin_amdgcn_cvt_pk_f16_bf8(a);
}
+
+// CHECK-LABEL: @test_cvt_f32_fp8_e5m3(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
+// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.amdgcn.cvt.f32.fp8.e5m3(i32 [[TMP0]], i32 0)
+// CHECK-NEXT: [[CONV:%.*]] = fptosi float [[TMP1]] to i32
+// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[CONV]], ptr addrspace(1) [[TMP2]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.cvt.f32.fp8.e5m3(i32 [[TMP3]], i32 1)
+// CHECK-NEXT: [[CONV1:%.*]] = fptosi float [[TMP4]] to i32
+// CHECK-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[CONV1]], ptr addrspace(1) [[TMP5]], align 4
+// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP7:%.*]] = call float @llvm.amdgcn.cvt.f32.fp8.e5m3(i32 [[TMP6]], i32 2)
+// CHECK-NEXT: [[CONV2:%.*]] = fptosi float [[TMP7]] to i32
+// CHECK-NEXT: [[TMP8:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[CONV2]], ptr addrspace(1) [[TMP8]], align 4
+// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.cvt.f32.fp8.e5m3(i32 [[TMP9]], i32 3)
+// CHECK-NEXT: [[CONV3:%.*]] = fptosi float [[TMP10]] to i32
+// CHECK-NEXT: [[TMP11:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[CONV3]], ptr addrspace(1) [[TMP11]], align 4
+// CHECK-NEXT: ret void
+//
+void test_cvt_f32_fp8_e5m3(global int* out, int a)
+{
+ *out = __builtin_amdgcn_cvt_f32_fp8_e5m3(a, 0);
+ *out = __builtin_amdgcn_cvt_f32_fp8_e5m3(a, 1);
+ *out = __builtin_amdgcn_cvt_f32_fp8_e5m3(a, 2);
+ *out = __builtin_amdgcn_cvt_f32_fp8_e5m3(a, 3);
+}
diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-param.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-param.cl
index 3ba0d50e79031..7494c4f984353 100644
--- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-param.cl
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-param.cl
@@ -27,3 +27,8 @@ void test_amdgcn_tensor_load_store(v4i sg0, v8i sg1, v4i sg2, v4i sg3, int cpol)
__builtin_amdgcn_tensor_store_from_lds(sg0, sg1, sg2, sg3, cpol); // expected-error {{'__builtin_amdgcn_tensor_store_from_lds' must be a constant integer}}
__builtin_amdgcn_tensor_store_from_lds_d2(sg0, sg1, cpol); // expected-error {{'__builtin_amdgcn_tensor_store_from_lds_d2' must be a constant integer}}
}
+
+void test_cvt_f32_fp8_e5m3(global int* out, int a)
+{
+ *out = __builtin_amdgcn_cvt_f32_fp8_e5m3(a, a); // expected-error {{'__builtin_amdgcn_cvt_f32_fp8_e5m3' must be a constant integer}}
+}
diff --git a/lldb/source/Host/windows/MainLoopWindows.cpp b/lldb/source/Host/windows/MainLoopWindows.cpp
index b3322e8b3ae59..a1de895c0ba98 100644
--- a/lldb/source/Host/windows/MainLoopWindows.cpp
+++ b/lldb/source/Host/windows/MainLoopWindows.cpp
@@ -12,16 +12,16 @@
#include "lldb/Host/windows/windows.h"
#include "lldb/Utility/Status.h"
#include "llvm/Config/llvm-config.h"
-#include "llvm/Support/Casting.h"
#include "llvm/Support/WindowsError.h"
#include <algorithm>
#include <cassert>
-#include <cerrno>
-#include <csignal>
#include <ctime>
#include <io.h>
+#include <synchapi.h>
#include <thread>
#include <vector>
+#include <winbase.h>
+#include <winerror.h>
#include <winsock2.h>
using namespace lldb;
@@ -42,11 +42,12 @@ namespace {
class PipeEvent : public MainLoopWindows::IOEvent {
public:
explicit PipeEvent(HANDLE handle)
- : IOEvent(CreateEventW(NULL, /*bManualReset=*/FALSE,
+ : IOEvent(CreateEventW(NULL, /*bManualReset=*/TRUE,
/*bInitialState=*/FALSE, NULL)),
- m_handle(handle), m_ready(CreateEventW(NULL, /*bManualReset=*/FALSE,
+ m_handle(handle), m_ready(CreateEventW(NULL, /*bManualReset=*/TRUE,
/*bInitialState=*/FALSE, NULL)) {
assert(m_event && m_ready);
+ m_monitor_thread = std::thread(&PipeEvent::Monitor, this);
}
~PipeEvent() override {
@@ -65,15 +66,27 @@ class PipeEvent : public MainLoopWindows::IOEvent {
}
void WillPoll() override {
- if (!m_monitor_thread.joinable())
- m_monitor_thread = std::thread(&PipeEvent::Monitor, this);
+ if (WaitForSingleObject(m_event, /*dwMilliseconds=*/0) != WAIT_TIMEOUT) {
+ // The thread has already signalled that the data is available. No need
+ // for further polling until we consume that event.
+ return;
+ }
+ if (WaitForSingleObject(m_ready, /*dwMilliseconds=*/0) != WAIT_TIMEOUT) {
+ // The thread is already waiting for data to become available.
+ return;
+ }
+ // Start waiting.
+ SetEvent(m_ready);
}
- void Disarm() override { SetEvent(m_ready); }
+ void Disarm() override { ResetEvent(m_event); }
/// Monitors the handle performing a zero byte read to determine when data is
/// avaiable.
void Monitor() {
+ // Wait until the MainLoop tells us to start.
+ WaitForSingleObject(m_ready, INFINITE);
+
do {
char buf[1];
DWORD bytes_read = 0;
@@ -110,7 +123,11 @@ class PipeEvent : public MainLoopWindows::IOEvent {
continue;
}
+ // Notify that data is available on the pipe. It's important to set this
+ // before clearing m_ready to avoid a race with WillPoll.
SetEvent(m_event);
+ // Stop polling until we're told to resume.
+ ResetEvent(m_ready);
// Wait until the current read is consumed before doing the next read.
WaitForSingleObject(m_ready, INFINITE);
diff --git a/lldb/unittests/Host/MainLoopTest.cpp b/lldb/unittests/Host/MainLoopTest.cpp
index 502028ae1a343..30585d12fe81d 100644
--- a/lldb/unittests/Host/MainLoopTest.cpp
+++ b/lldb/unittests/Host/MainLoopTest.cpp
@@ -10,6 +10,7 @@
#include "TestingSupport/SubsystemRAII.h"
#include "lldb/Host/ConnectionFileDescriptor.h"
#include "lldb/Host/FileSystem.h"
+#include "lldb/Host/MainLoopBase.h"
#include "lldb/Host/PseudoTerminal.h"
#include "lldb/Host/common/TCPSocket.h"
#include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX
@@ -64,7 +65,7 @@ class MainLoopTest : public testing::Test {
};
} // namespace
-TEST_F(MainLoopTest, ReadObject) {
+TEST_F(MainLoopTest, ReadSocketObject) {
char X = 'X';
size_t len = sizeof(X);
ASSERT_TRUE(socketpair[0]->Write(&X, len).Success());
@@ -101,6 +102,144 @@ TEST_F(MainLoopTest, ReadPipeObject) {
ASSERT_EQ(1u, callback_count);
}
+TEST_F(MainLoopTest, MultipleReadsPipeObject) {
+ Pipe pipe;
+
+ ASSERT_TRUE(pipe.CreateNew().Success());
+
+ MainLoop loop;
+
+ std::future<void> async_writer = std::async(std::launch::async, [&] {
+ for (int i = 0; i < 5; ++i) {
+ std::this_thread::sleep_for(std::chrono::milliseconds(500));
+ char X = 'X';
+ size_t len = sizeof(X);
+ ASSERT_THAT_EXPECTED(pipe.Write(&X, len), llvm::HasValue(1));
+ }
+ });
+
+ Status error;
+ lldb::FileSP file = std::make_shared<NativeFile>(
+ pipe.GetReadFileDescriptor(), File::eOpenOptionReadOnly, false);
+ auto handle = loop.RegisterReadObject(
+ file,
+ [&](MainLoopBase &loop) {
+ callback_count++;
+ if (callback_count == 5)
+ loop.RequestTermination();
+
+ // Read some data to ensure the handle is not in a readable state.
+ char buf[1024] = {0};
+ size_t len = sizeof(buf);
+ ASSERT_THAT_ERROR(file->Read(buf, len).ToError(), llvm::Succeeded());
+ EXPECT_EQ(len, 1);
+ EXPECT_EQ(buf[0], 'X');
+ },
+ error);
+ ASSERT_TRUE(error.Success());
+ ASSERT_TRUE(handle);
+ ASSERT_TRUE(loop.Run().Success());
+ ASSERT_EQ(5u, callback_count);
+ async_writer.wait();
+}
+
+TEST_F(MainLoopTest, PipeDelayBetweenRegisterAndRun) {
+ Pipe pipe;
+
+ ASSERT_TRUE(pipe.CreateNew().Success());
+
+ MainLoop loop;
+
+ Status error;
+ lldb::FileSP file = std::make_shared<NativeFile>(
+ pipe.GetReadFileDescriptor(), File::eOpenOptionReadOnly, false);
+ auto handle = loop.RegisterReadObject(
+ file,
+ [&](MainLoopBase &loop) {
+ callback_count++;
+
+ // Read some data to ensure the handle is not in a readable state.
+ char buf[1024] = {0};
+ size_t len = sizeof(buf);
+ ASSERT_THAT_ERROR(file->Read(buf, len).ToError(), llvm::Succeeded());
+ EXPECT_EQ(len, 2);
+ EXPECT_EQ(buf[0], 'X');
+ EXPECT_EQ(buf[1], 'X');
+ },
+ error);
+ auto cb = [&](MainLoopBase &) {
+ callback_count++;
+ char X = 'X';
+ size_t len = sizeof(X);
+ // Write twice and ensure we coalesce into a single read.
+ ASSERT_THAT_EXPECTED(pipe.Write(&X, len), llvm::HasValue(1));
+ ASSERT_THAT_EXPECTED(pipe.Write(&X, len), llvm::HasValue(1));
+ };
+ // Add a write that triggers a read events.
+ loop.AddCallback(cb, std::chrono::milliseconds(500));
+ loop.AddCallback([](MainLoopBase &loop) { loop.RequestTermination(); },
+ std::chrono::milliseconds(1000));
+ ASSERT_TRUE(error.Success());
+ ASSERT_TRUE(handle);
+
+ // Write between RegisterReadObject / Run should NOT invoke the callback.
+ cb(loop);
+ ASSERT_EQ(1u, callback_count);
+
+ ASSERT_TRUE(loop.Run().Success());
+ ASSERT_EQ(4u, callback_count);
+}
+
+TEST_F(MainLoopTest, NoSelfTriggersDuringPipeHandler) {
+ Pipe pipe;
+
+ ASSERT_TRUE(pipe.CreateNew().Success());
+
+ MainLoop loop;
+
+ Status error;
+ lldb::FileSP file = std::make_shared<NativeFile>(
+ pipe.GetReadFileDescriptor(), File::eOpenOptionReadOnly, false);
+ auto handle = loop.RegisterReadObject(
+ file,
+ [&](MainLoopBase &lop) {
+ callback_count++;
+
+ char X = 'Y';
+ size_t len = sizeof(X);
+ // writes / reads during the handler callback should NOT trigger itself.
+ ASSERT_THAT_EXPECTED(pipe.Write(&X, len), llvm::HasValue(1));
+
+ char buf[1024] = {0};
+ len = sizeof(buf);
+ ASSERT_THAT_ERROR(file->Read(buf, len).ToError(), llvm::Succeeded());
+ EXPECT_EQ(len, 2);
+ EXPECT_EQ(buf[0], 'X');
+ EXPECT_EQ(buf[1], 'Y');
+
+ if (callback_count == 2)
+ loop.RequestTermination();
+ },
+ error);
+ // Add a write that triggers a read event.
+ loop.AddPendingCallback([&](MainLoopBase &) {
+ char X = 'X';
+ size_t len = sizeof(X);
+ ASSERT_THAT_EXPECTED(pipe.Write(&X, len), llvm::HasValue(1));
+ });
+ loop.AddCallback(
+ [&](MainLoopBase &) {
+ char X = 'X';
+ size_t len = sizeof(X);
+ ASSERT_THAT_EXPECTED(pipe.Write(&X, len), llvm::HasValue(1));
+ },
+ std::chrono::milliseconds(500));
+ ASSERT_TRUE(error.Success());
+ ASSERT_TRUE(handle);
+ ASSERT_TRUE(loop.Run().Success());
+ ASSERT_EQ(2u, callback_count);
+}
+
TEST_F(MainLoopTest, NoSpuriousPipeReads) {
Pipe pipe;
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index c43870392361d..98b793aace7a3 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1702,12 +1702,13 @@ class TargetTransformInfo {
/// unordered-atomic memory intrinsic.
LLVM_ABI unsigned getAtomicMemIntrinsicMaxElementSize() const;
- /// \returns A value which is the result of the given memory intrinsic. New
- /// instructions may be created to extract the result from the given intrinsic
- /// memory operation. Returns nullptr if the target cannot create a result
- /// from the given intrinsic.
- LLVM_ABI Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
- Type *ExpectedType) const;
+ /// \returns A value which is the result of the given memory intrinsic. If \p
+ /// CanCreate is true, new instructions may be created to extract the result
+ /// from the given intrinsic memory operation. Returns nullptr if the target
+ /// cannot create a result from the given intrinsic.
+ LLVM_ABI Value *
+ getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType,
+ bool CanCreate = true) const;
/// \returns The type to use in a loop expansion of a memcpy call.
LLVM_ABI Type *getMemcpyLoopLoweringType(
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 12f87226c5f57..ddc8a5eaffa94 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -983,8 +983,9 @@ class TargetTransformInfoImplBase {
return 0;
}
- virtual Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
- Type *ExpectedType) const {
+ virtual Value *
+ getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType,
+ bool CanCreate = true) const {
return nullptr;
}
diff --git a/llvm/include/llvm/CodeGen/SDPatternMatch.h b/llvm/include/llvm/CodeGen/SDPatternMatch.h
index 7c5cdbbeb0ca8..2967532226197 100644
--- a/llvm/include/llvm/CodeGen/SDPatternMatch.h
+++ b/llvm/include/llvm/CodeGen/SDPatternMatch.h
@@ -655,6 +655,21 @@ struct MaxMin_match {
template <typename MatchContext>
bool match(const MatchContext &Ctx, SDValue N) {
+ auto MatchMinMax = [&](SDValue L, SDValue R, SDValue TrueValue,
+ SDValue FalseValue, ISD::CondCode CC) {
+ if ((TrueValue != L || FalseValue != R) &&
+ (TrueValue != R || FalseValue != L))
+ return false;
+
+ ISD::CondCode Cond =
+ TrueValue == L ? CC : getSetCCInverse(CC, L.getValueType());
+ if (!Pred_t::match(Cond))
+ return false;
+
+ return (LHS.match(Ctx, L) && RHS.match(Ctx, R)) ||
+ (Commutable && LHS.match(Ctx, R) && RHS.match(Ctx, L));
+ };
+
if (sd_context_match(N, Ctx, m_Opc(ISD::SELECT)) ||
sd_context_match(N, Ctx, m_Opc(ISD::VSELECT))) {
EffectiveOperands<ExcludeChain> EO_SELECT(N, Ctx);
@@ -670,23 +685,22 @@ struct MaxMin_match {
SDValue R = Cond->getOperand(EO_SETCC.FirstIndex + 1);
auto *CondNode =
cast<CondCodeSDNode>(Cond->getOperand(EO_SETCC.FirstIndex + 2));
-
- if ((TrueValue != L || FalseValue != R) &&
- (TrueValue != R || FalseValue != L)) {
- return false;
- }
-
- ISD::CondCode Cond =
- TrueValue == L ? CondNode->get()
- : getSetCCInverse(CondNode->get(), L.getValueType());
- if (!Pred_t::match(Cond)) {
- return false;
- }
- return (LHS.match(Ctx, L) && RHS.match(Ctx, R)) ||
- (Commutable && LHS.match(Ctx, R) && RHS.match(Ctx, L));
+ return MatchMinMax(L, R, TrueValue, FalseValue, CondNode->get());
}
}
+ if (sd_context_match(N, Ctx, m_Opc(ISD::SELECT_CC))) {
+ EffectiveOperands<ExcludeChain> EO_SELECT(N, Ctx);
+ assert(EO_SELECT.Size == 5);
+ SDValue L = N->getOperand(EO_SELECT.FirstIndex);
+ SDValue R = N->getOperand(EO_SELECT.FirstIndex + 1);
+ SDValue TrueValue = N->getOperand(EO_SELECT.FirstIndex + 2);
+ SDValue FalseValue = N->getOperand(EO_SELECT.FirstIndex + 3);
+ auto *CondNode =
+ cast<CondCodeSDNode>(N->getOperand(EO_SELECT.FirstIndex + 4));
+ return MatchMinMax(L, R, TrueValue, FalseValue, CondNode->get());
+ }
+
return false;
}
};
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index f7a9b65854696..16885f331e9dd 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@...
[truncated]
|
@llvm/pr-subscribers-lldb Author: None (DeanSturtevant1) ChangesPatch is 142.46 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/147726.diff 44 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index fb358297a5eed..a5ee8013adff6 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -429,6 +429,7 @@ TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x32_fp8_fp8, "V16fV2iV4iV16fiIiI
TARGET_BUILTIN(__builtin_amdgcn_cvt_f32_bf8, "fiIi", "nc", "fp8-conversion-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_f32_fp8, "fiIi", "nc", "fp8-conversion-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_f32_fp8_e5m3, "fiIi", "nc", "fp8e5m3-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f32_bf8, "V2fiIb", "nc", "fp8-conversion-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f32_fp8, "V2fiIb", "nc", "fp8-conversion-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_bf8_f32, "iffiIb", "nc", "fp8-conversion-insts")
diff --git a/clang/test/CodeGenOpenCL/amdgpu-features.cl b/clang/test/CodeGenOpenCL/amdgpu-features.cl
index dc7a83002b7f1..77d2414230cf2 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-features.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-features.cl
@@ -108,7 +108,7 @@
// GFX1153: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
// GFX1200: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
// GFX1201: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
-// GFX1250: "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bitop3-insts,+ci-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+permlane16-swap,+prng-inst,+setprio-inc-wg-inst,+transpose-load-f4f6-insts,+wavefrontsize32"
+// GFX1250: "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bitop3-insts,+ci-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+permlane16-swap,+prng-inst,+setprio-inc-wg-inst,+transpose-load-f4f6-insts,+wavefrontsize32"
// GFX1103-W64: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize64"
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
index 4d4afedae3658..421099d3876e3 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
@@ -139,3 +139,41 @@ void test_cvt_pk_f16_bf8(global half2* out, short a)
{
out[0] = __builtin_amdgcn_cvt_pk_f16_bf8(a);
}
+
+// CHECK-LABEL: @test_cvt_f32_fp8_e5m3(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
+// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.amdgcn.cvt.f32.fp8.e5m3(i32 [[TMP0]], i32 0)
+// CHECK-NEXT: [[CONV:%.*]] = fptosi float [[TMP1]] to i32
+// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[CONV]], ptr addrspace(1) [[TMP2]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.cvt.f32.fp8.e5m3(i32 [[TMP3]], i32 1)
+// CHECK-NEXT: [[CONV1:%.*]] = fptosi float [[TMP4]] to i32
+// CHECK-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[CONV1]], ptr addrspace(1) [[TMP5]], align 4
+// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP7:%.*]] = call float @llvm.amdgcn.cvt.f32.fp8.e5m3(i32 [[TMP6]], i32 2)
+// CHECK-NEXT: [[CONV2:%.*]] = fptosi float [[TMP7]] to i32
+// CHECK-NEXT: [[TMP8:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[CONV2]], ptr addrspace(1) [[TMP8]], align 4
+// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.cvt.f32.fp8.e5m3(i32 [[TMP9]], i32 3)
+// CHECK-NEXT: [[CONV3:%.*]] = fptosi float [[TMP10]] to i32
+// CHECK-NEXT: [[TMP11:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[CONV3]], ptr addrspace(1) [[TMP11]], align 4
+// CHECK-NEXT: ret void
+//
+void test_cvt_f32_fp8_e5m3(global int* out, int a)
+{
+ *out = __builtin_amdgcn_cvt_f32_fp8_e5m3(a, 0);
+ *out = __builtin_amdgcn_cvt_f32_fp8_e5m3(a, 1);
+ *out = __builtin_amdgcn_cvt_f32_fp8_e5m3(a, 2);
+ *out = __builtin_amdgcn_cvt_f32_fp8_e5m3(a, 3);
+}
diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-param.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-param.cl
index 3ba0d50e79031..7494c4f984353 100644
--- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-param.cl
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-param.cl
@@ -27,3 +27,8 @@ void test_amdgcn_tensor_load_store(v4i sg0, v8i sg1, v4i sg2, v4i sg3, int cpol)
__builtin_amdgcn_tensor_store_from_lds(sg0, sg1, sg2, sg3, cpol); // expected-error {{'__builtin_amdgcn_tensor_store_from_lds' must be a constant integer}}
__builtin_amdgcn_tensor_store_from_lds_d2(sg0, sg1, cpol); // expected-error {{'__builtin_amdgcn_tensor_store_from_lds_d2' must be a constant integer}}
}
+
+void test_cvt_f32_fp8_e5m3(global int* out, int a)
+{
+ *out = __builtin_amdgcn_cvt_f32_fp8_e5m3(a, a); // expected-error {{'__builtin_amdgcn_cvt_f32_fp8_e5m3' must be a constant integer}}
+}
diff --git a/lldb/source/Host/windows/MainLoopWindows.cpp b/lldb/source/Host/windows/MainLoopWindows.cpp
index b3322e8b3ae59..a1de895c0ba98 100644
--- a/lldb/source/Host/windows/MainLoopWindows.cpp
+++ b/lldb/source/Host/windows/MainLoopWindows.cpp
@@ -12,16 +12,16 @@
#include "lldb/Host/windows/windows.h"
#include "lldb/Utility/Status.h"
#include "llvm/Config/llvm-config.h"
-#include "llvm/Support/Casting.h"
#include "llvm/Support/WindowsError.h"
#include <algorithm>
#include <cassert>
-#include <cerrno>
-#include <csignal>
#include <ctime>
#include <io.h>
+#include <synchapi.h>
#include <thread>
#include <vector>
+#include <winbase.h>
+#include <winerror.h>
#include <winsock2.h>
using namespace lldb;
@@ -42,11 +42,12 @@ namespace {
class PipeEvent : public MainLoopWindows::IOEvent {
public:
explicit PipeEvent(HANDLE handle)
- : IOEvent(CreateEventW(NULL, /*bManualReset=*/FALSE,
+ : IOEvent(CreateEventW(NULL, /*bManualReset=*/TRUE,
/*bInitialState=*/FALSE, NULL)),
- m_handle(handle), m_ready(CreateEventW(NULL, /*bManualReset=*/FALSE,
+ m_handle(handle), m_ready(CreateEventW(NULL, /*bManualReset=*/TRUE,
/*bInitialState=*/FALSE, NULL)) {
assert(m_event && m_ready);
+ m_monitor_thread = std::thread(&PipeEvent::Monitor, this);
}
~PipeEvent() override {
@@ -65,15 +66,27 @@ class PipeEvent : public MainLoopWindows::IOEvent {
}
void WillPoll() override {
- if (!m_monitor_thread.joinable())
- m_monitor_thread = std::thread(&PipeEvent::Monitor, this);
+ if (WaitForSingleObject(m_event, /*dwMilliseconds=*/0) != WAIT_TIMEOUT) {
+ // The thread has already signalled that the data is available. No need
+ // for further polling until we consume that event.
+ return;
+ }
+ if (WaitForSingleObject(m_ready, /*dwMilliseconds=*/0) != WAIT_TIMEOUT) {
+ // The thread is already waiting for data to become available.
+ return;
+ }
+ // Start waiting.
+ SetEvent(m_ready);
}
- void Disarm() override { SetEvent(m_ready); }
+ void Disarm() override { ResetEvent(m_event); }
/// Monitors the handle performing a zero byte read to determine when data is
/// avaiable.
void Monitor() {
+ // Wait until the MainLoop tells us to start.
+ WaitForSingleObject(m_ready, INFINITE);
+
do {
char buf[1];
DWORD bytes_read = 0;
@@ -110,7 +123,11 @@ class PipeEvent : public MainLoopWindows::IOEvent {
continue;
}
+ // Notify that data is available on the pipe. It's important to set this
+ // before clearing m_ready to avoid a race with WillPoll.
SetEvent(m_event);
+ // Stop polling until we're told to resume.
+ ResetEvent(m_ready);
// Wait until the current read is consumed before doing the next read.
WaitForSingleObject(m_ready, INFINITE);
diff --git a/lldb/unittests/Host/MainLoopTest.cpp b/lldb/unittests/Host/MainLoopTest.cpp
index 502028ae1a343..30585d12fe81d 100644
--- a/lldb/unittests/Host/MainLoopTest.cpp
+++ b/lldb/unittests/Host/MainLoopTest.cpp
@@ -10,6 +10,7 @@
#include "TestingSupport/SubsystemRAII.h"
#include "lldb/Host/ConnectionFileDescriptor.h"
#include "lldb/Host/FileSystem.h"
+#include "lldb/Host/MainLoopBase.h"
#include "lldb/Host/PseudoTerminal.h"
#include "lldb/Host/common/TCPSocket.h"
#include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX
@@ -64,7 +65,7 @@ class MainLoopTest : public testing::Test {
};
} // namespace
-TEST_F(MainLoopTest, ReadObject) {
+TEST_F(MainLoopTest, ReadSocketObject) {
char X = 'X';
size_t len = sizeof(X);
ASSERT_TRUE(socketpair[0]->Write(&X, len).Success());
@@ -101,6 +102,144 @@ TEST_F(MainLoopTest, ReadPipeObject) {
ASSERT_EQ(1u, callback_count);
}
+TEST_F(MainLoopTest, MultipleReadsPipeObject) {
+ Pipe pipe;
+
+ ASSERT_TRUE(pipe.CreateNew().Success());
+
+ MainLoop loop;
+
+ std::future<void> async_writer = std::async(std::launch::async, [&] {
+ for (int i = 0; i < 5; ++i) {
+ std::this_thread::sleep_for(std::chrono::milliseconds(500));
+ char X = 'X';
+ size_t len = sizeof(X);
+ ASSERT_THAT_EXPECTED(pipe.Write(&X, len), llvm::HasValue(1));
+ }
+ });
+
+ Status error;
+ lldb::FileSP file = std::make_shared<NativeFile>(
+ pipe.GetReadFileDescriptor(), File::eOpenOptionReadOnly, false);
+ auto handle = loop.RegisterReadObject(
+ file,
+ [&](MainLoopBase &loop) {
+ callback_count++;
+ if (callback_count == 5)
+ loop.RequestTermination();
+
+ // Read some data to ensure the handle is not in a readable state.
+ char buf[1024] = {0};
+ size_t len = sizeof(buf);
+ ASSERT_THAT_ERROR(file->Read(buf, len).ToError(), llvm::Succeeded());
+ EXPECT_EQ(len, 1);
+ EXPECT_EQ(buf[0], 'X');
+ },
+ error);
+ ASSERT_TRUE(error.Success());
+ ASSERT_TRUE(handle);
+ ASSERT_TRUE(loop.Run().Success());
+ ASSERT_EQ(5u, callback_count);
+ async_writer.wait();
+}
+
+TEST_F(MainLoopTest, PipeDelayBetweenRegisterAndRun) {
+ Pipe pipe;
+
+ ASSERT_TRUE(pipe.CreateNew().Success());
+
+ MainLoop loop;
+
+ Status error;
+ lldb::FileSP file = std::make_shared<NativeFile>(
+ pipe.GetReadFileDescriptor(), File::eOpenOptionReadOnly, false);
+ auto handle = loop.RegisterReadObject(
+ file,
+ [&](MainLoopBase &loop) {
+ callback_count++;
+
+ // Read some data to ensure the handle is not in a readable state.
+ char buf[1024] = {0};
+ size_t len = sizeof(buf);
+ ASSERT_THAT_ERROR(file->Read(buf, len).ToError(), llvm::Succeeded());
+ EXPECT_EQ(len, 2);
+ EXPECT_EQ(buf[0], 'X');
+ EXPECT_EQ(buf[1], 'X');
+ },
+ error);
+ auto cb = [&](MainLoopBase &) {
+ callback_count++;
+ char X = 'X';
+ size_t len = sizeof(X);
+ // Write twice and ensure we coalesce into a single read.
+ ASSERT_THAT_EXPECTED(pipe.Write(&X, len), llvm::HasValue(1));
+ ASSERT_THAT_EXPECTED(pipe.Write(&X, len), llvm::HasValue(1));
+ };
+ // Add a write that triggers a read events.
+ loop.AddCallback(cb, std::chrono::milliseconds(500));
+ loop.AddCallback([](MainLoopBase &loop) { loop.RequestTermination(); },
+ std::chrono::milliseconds(1000));
+ ASSERT_TRUE(error.Success());
+ ASSERT_TRUE(handle);
+
+ // Write between RegisterReadObject / Run should NOT invoke the callback.
+ cb(loop);
+ ASSERT_EQ(1u, callback_count);
+
+ ASSERT_TRUE(loop.Run().Success());
+ ASSERT_EQ(4u, callback_count);
+}
+
+TEST_F(MainLoopTest, NoSelfTriggersDuringPipeHandler) {
+ Pipe pipe;
+
+ ASSERT_TRUE(pipe.CreateNew().Success());
+
+ MainLoop loop;
+
+ Status error;
+ lldb::FileSP file = std::make_shared<NativeFile>(
+ pipe.GetReadFileDescriptor(), File::eOpenOptionReadOnly, false);
+ auto handle = loop.RegisterReadObject(
+ file,
+ [&](MainLoopBase &lop) {
+ callback_count++;
+
+ char X = 'Y';
+ size_t len = sizeof(X);
+ // writes / reads during the handler callback should NOT trigger itself.
+ ASSERT_THAT_EXPECTED(pipe.Write(&X, len), llvm::HasValue(1));
+
+ char buf[1024] = {0};
+ len = sizeof(buf);
+ ASSERT_THAT_ERROR(file->Read(buf, len).ToError(), llvm::Succeeded());
+ EXPECT_EQ(len, 2);
+ EXPECT_EQ(buf[0], 'X');
+ EXPECT_EQ(buf[1], 'Y');
+
+ if (callback_count == 2)
+ loop.RequestTermination();
+ },
+ error);
+ // Add a write that triggers a read event.
+ loop.AddPendingCallback([&](MainLoopBase &) {
+ char X = 'X';
+ size_t len = sizeof(X);
+ ASSERT_THAT_EXPECTED(pipe.Write(&X, len), llvm::HasValue(1));
+ });
+ loop.AddCallback(
+ [&](MainLoopBase &) {
+ char X = 'X';
+ size_t len = sizeof(X);
+ ASSERT_THAT_EXPECTED(pipe.Write(&X, len), llvm::HasValue(1));
+ },
+ std::chrono::milliseconds(500));
+ ASSERT_TRUE(error.Success());
+ ASSERT_TRUE(handle);
+ ASSERT_TRUE(loop.Run().Success());
+ ASSERT_EQ(2u, callback_count);
+}
+
TEST_F(MainLoopTest, NoSpuriousPipeReads) {
Pipe pipe;
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index c43870392361d..98b793aace7a3 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1702,12 +1702,13 @@ class TargetTransformInfo {
/// unordered-atomic memory intrinsic.
LLVM_ABI unsigned getAtomicMemIntrinsicMaxElementSize() const;
- /// \returns A value which is the result of the given memory intrinsic. New
- /// instructions may be created to extract the result from the given intrinsic
- /// memory operation. Returns nullptr if the target cannot create a result
- /// from the given intrinsic.
- LLVM_ABI Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
- Type *ExpectedType) const;
+ /// \returns A value which is the result of the given memory intrinsic. If \p
+ /// CanCreate is true, new instructions may be created to extract the result
+ /// from the given intrinsic memory operation. Returns nullptr if the target
+ /// cannot create a result from the given intrinsic.
+ LLVM_ABI Value *
+ getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType,
+ bool CanCreate = true) const;
/// \returns The type to use in a loop expansion of a memcpy call.
LLVM_ABI Type *getMemcpyLoopLoweringType(
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 12f87226c5f57..ddc8a5eaffa94 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -983,8 +983,9 @@ class TargetTransformInfoImplBase {
return 0;
}
- virtual Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
- Type *ExpectedType) const {
+ virtual Value *
+ getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType,
+ bool CanCreate = true) const {
return nullptr;
}
diff --git a/llvm/include/llvm/CodeGen/SDPatternMatch.h b/llvm/include/llvm/CodeGen/SDPatternMatch.h
index 7c5cdbbeb0ca8..2967532226197 100644
--- a/llvm/include/llvm/CodeGen/SDPatternMatch.h
+++ b/llvm/include/llvm/CodeGen/SDPatternMatch.h
@@ -655,6 +655,21 @@ struct MaxMin_match {
template <typename MatchContext>
bool match(const MatchContext &Ctx, SDValue N) {
+ auto MatchMinMax = [&](SDValue L, SDValue R, SDValue TrueValue,
+ SDValue FalseValue, ISD::CondCode CC) {
+ if ((TrueValue != L || FalseValue != R) &&
+ (TrueValue != R || FalseValue != L))
+ return false;
+
+ ISD::CondCode Cond =
+ TrueValue == L ? CC : getSetCCInverse(CC, L.getValueType());
+ if (!Pred_t::match(Cond))
+ return false;
+
+ return (LHS.match(Ctx, L) && RHS.match(Ctx, R)) ||
+ (Commutable && LHS.match(Ctx, R) && RHS.match(Ctx, L));
+ };
+
if (sd_context_match(N, Ctx, m_Opc(ISD::SELECT)) ||
sd_context_match(N, Ctx, m_Opc(ISD::VSELECT))) {
EffectiveOperands<ExcludeChain> EO_SELECT(N, Ctx);
@@ -670,23 +685,22 @@ struct MaxMin_match {
SDValue R = Cond->getOperand(EO_SETCC.FirstIndex + 1);
auto *CondNode =
cast<CondCodeSDNode>(Cond->getOperand(EO_SETCC.FirstIndex + 2));
-
- if ((TrueValue != L || FalseValue != R) &&
- (TrueValue != R || FalseValue != L)) {
- return false;
- }
-
- ISD::CondCode Cond =
- TrueValue == L ? CondNode->get()
- : getSetCCInverse(CondNode->get(), L.getValueType());
- if (!Pred_t::match(Cond)) {
- return false;
- }
- return (LHS.match(Ctx, L) && RHS.match(Ctx, R)) ||
- (Commutable && LHS.match(Ctx, R) && RHS.match(Ctx, L));
+ return MatchMinMax(L, R, TrueValue, FalseValue, CondNode->get());
}
}
+ if (sd_context_match(N, Ctx, m_Opc(ISD::SELECT_CC))) {
+ EffectiveOperands<ExcludeChain> EO_SELECT(N, Ctx);
+ assert(EO_SELECT.Size == 5);
+ SDValue L = N->getOperand(EO_SELECT.FirstIndex);
+ SDValue R = N->getOperand(EO_SELECT.FirstIndex + 1);
+ SDValue TrueValue = N->getOperand(EO_SELECT.FirstIndex + 2);
+ SDValue FalseValue = N->getOperand(EO_SELECT.FirstIndex + 3);
+ auto *CondNode =
+ cast<CondCodeSDNode>(N->getOperand(EO_SELECT.FirstIndex + 4));
+ return MatchMinMax(L, R, TrueValue, FalseValue, CondNode->get());
+ }
+
return false;
}
};
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index f7a9b65854696..16885f331e9dd 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@...
[truncated]
|
@llvm/pr-subscribers-backend-aarch64 Author: None (DeanSturtevant1) ChangesPatch is 142.46 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/147726.diff 44 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index fb358297a5eed..a5ee8013adff6 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -429,6 +429,7 @@ TARGET_BUILTIN(__builtin_amdgcn_smfmac_f32_32x32x32_fp8_fp8, "V16fV2iV4iV16fiIiI
TARGET_BUILTIN(__builtin_amdgcn_cvt_f32_bf8, "fiIi", "nc", "fp8-conversion-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_f32_fp8, "fiIi", "nc", "fp8-conversion-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_f32_fp8_e5m3, "fiIi", "nc", "fp8e5m3-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f32_bf8, "V2fiIb", "nc", "fp8-conversion-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f32_fp8, "V2fiIb", "nc", "fp8-conversion-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_bf8_f32, "iffiIb", "nc", "fp8-conversion-insts")
diff --git a/clang/test/CodeGenOpenCL/amdgpu-features.cl b/clang/test/CodeGenOpenCL/amdgpu-features.cl
index dc7a83002b7f1..77d2414230cf2 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-features.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-features.cl
@@ -108,7 +108,7 @@
// GFX1153: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
// GFX1200: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
// GFX1201: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
-// GFX1250: "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bitop3-insts,+ci-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+permlane16-swap,+prng-inst,+setprio-inc-wg-inst,+transpose-load-f4f6-insts,+wavefrontsize32"
+// GFX1250: "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bitop3-insts,+ci-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+permlane16-swap,+prng-inst,+setprio-inc-wg-inst,+transpose-load-f4f6-insts,+wavefrontsize32"
// GFX1103-W64: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize64"
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
index 4d4afedae3658..421099d3876e3 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
@@ -139,3 +139,41 @@ void test_cvt_pk_f16_bf8(global half2* out, short a)
{
out[0] = __builtin_amdgcn_cvt_pk_f16_bf8(a);
}
+
+// CHECK-LABEL: @test_cvt_f32_fp8_e5m3(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
+// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.amdgcn.cvt.f32.fp8.e5m3(i32 [[TMP0]], i32 0)
+// CHECK-NEXT: [[CONV:%.*]] = fptosi float [[TMP1]] to i32
+// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[CONV]], ptr addrspace(1) [[TMP2]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.cvt.f32.fp8.e5m3(i32 [[TMP3]], i32 1)
+// CHECK-NEXT: [[CONV1:%.*]] = fptosi float [[TMP4]] to i32
+// CHECK-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[CONV1]], ptr addrspace(1) [[TMP5]], align 4
+// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP7:%.*]] = call float @llvm.amdgcn.cvt.f32.fp8.e5m3(i32 [[TMP6]], i32 2)
+// CHECK-NEXT: [[CONV2:%.*]] = fptosi float [[TMP7]] to i32
+// CHECK-NEXT: [[TMP8:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[CONV2]], ptr addrspace(1) [[TMP8]], align 4
+// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.cvt.f32.fp8.e5m3(i32 [[TMP9]], i32 3)
+// CHECK-NEXT: [[CONV3:%.*]] = fptosi float [[TMP10]] to i32
+// CHECK-NEXT: [[TMP11:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[CONV3]], ptr addrspace(1) [[TMP11]], align 4
+// CHECK-NEXT: ret void
+//
+void test_cvt_f32_fp8_e5m3(global int* out, int a)
+{
+ *out = __builtin_amdgcn_cvt_f32_fp8_e5m3(a, 0);
+ *out = __builtin_amdgcn_cvt_f32_fp8_e5m3(a, 1);
+ *out = __builtin_amdgcn_cvt_f32_fp8_e5m3(a, 2);
+ *out = __builtin_amdgcn_cvt_f32_fp8_e5m3(a, 3);
+}
diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-param.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-param.cl
index 3ba0d50e79031..7494c4f984353 100644
--- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-param.cl
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-param.cl
@@ -27,3 +27,8 @@ void test_amdgcn_tensor_load_store(v4i sg0, v8i sg1, v4i sg2, v4i sg3, int cpol)
__builtin_amdgcn_tensor_store_from_lds(sg0, sg1, sg2, sg3, cpol); // expected-error {{'__builtin_amdgcn_tensor_store_from_lds' must be a constant integer}}
__builtin_amdgcn_tensor_store_from_lds_d2(sg0, sg1, cpol); // expected-error {{'__builtin_amdgcn_tensor_store_from_lds_d2' must be a constant integer}}
}
+
+void test_cvt_f32_fp8_e5m3(global int* out, int a)
+{
+ *out = __builtin_amdgcn_cvt_f32_fp8_e5m3(a, a); // expected-error {{'__builtin_amdgcn_cvt_f32_fp8_e5m3' must be a constant integer}}
+}
diff --git a/lldb/source/Host/windows/MainLoopWindows.cpp b/lldb/source/Host/windows/MainLoopWindows.cpp
index b3322e8b3ae59..a1de895c0ba98 100644
--- a/lldb/source/Host/windows/MainLoopWindows.cpp
+++ b/lldb/source/Host/windows/MainLoopWindows.cpp
@@ -12,16 +12,16 @@
#include "lldb/Host/windows/windows.h"
#include "lldb/Utility/Status.h"
#include "llvm/Config/llvm-config.h"
-#include "llvm/Support/Casting.h"
#include "llvm/Support/WindowsError.h"
#include <algorithm>
#include <cassert>
-#include <cerrno>
-#include <csignal>
#include <ctime>
#include <io.h>
+#include <synchapi.h>
#include <thread>
#include <vector>
+#include <winbase.h>
+#include <winerror.h>
#include <winsock2.h>
using namespace lldb;
@@ -42,11 +42,12 @@ namespace {
class PipeEvent : public MainLoopWindows::IOEvent {
public:
explicit PipeEvent(HANDLE handle)
- : IOEvent(CreateEventW(NULL, /*bManualReset=*/FALSE,
+ : IOEvent(CreateEventW(NULL, /*bManualReset=*/TRUE,
/*bInitialState=*/FALSE, NULL)),
- m_handle(handle), m_ready(CreateEventW(NULL, /*bManualReset=*/FALSE,
+ m_handle(handle), m_ready(CreateEventW(NULL, /*bManualReset=*/TRUE,
/*bInitialState=*/FALSE, NULL)) {
assert(m_event && m_ready);
+ m_monitor_thread = std::thread(&PipeEvent::Monitor, this);
}
~PipeEvent() override {
@@ -65,15 +66,27 @@ class PipeEvent : public MainLoopWindows::IOEvent {
}
void WillPoll() override {
- if (!m_monitor_thread.joinable())
- m_monitor_thread = std::thread(&PipeEvent::Monitor, this);
+ if (WaitForSingleObject(m_event, /*dwMilliseconds=*/0) != WAIT_TIMEOUT) {
+ // The thread has already signalled that the data is available. No need
+ // for further polling until we consume that event.
+ return;
+ }
+ if (WaitForSingleObject(m_ready, /*dwMilliseconds=*/0) != WAIT_TIMEOUT) {
+ // The thread is already waiting for data to become available.
+ return;
+ }
+ // Start waiting.
+ SetEvent(m_ready);
}
- void Disarm() override { SetEvent(m_ready); }
+ void Disarm() override { ResetEvent(m_event); }
/// Monitors the handle performing a zero byte read to determine when data is
/// avaiable.
void Monitor() {
+ // Wait until the MainLoop tells us to start.
+ WaitForSingleObject(m_ready, INFINITE);
+
do {
char buf[1];
DWORD bytes_read = 0;
@@ -110,7 +123,11 @@ class PipeEvent : public MainLoopWindows::IOEvent {
continue;
}
+ // Notify that data is available on the pipe. It's important to set this
+ // before clearing m_ready to avoid a race with WillPoll.
SetEvent(m_event);
+ // Stop polling until we're told to resume.
+ ResetEvent(m_ready);
// Wait until the current read is consumed before doing the next read.
WaitForSingleObject(m_ready, INFINITE);
diff --git a/lldb/unittests/Host/MainLoopTest.cpp b/lldb/unittests/Host/MainLoopTest.cpp
index 502028ae1a343..30585d12fe81d 100644
--- a/lldb/unittests/Host/MainLoopTest.cpp
+++ b/lldb/unittests/Host/MainLoopTest.cpp
@@ -10,6 +10,7 @@
#include "TestingSupport/SubsystemRAII.h"
#include "lldb/Host/ConnectionFileDescriptor.h"
#include "lldb/Host/FileSystem.h"
+#include "lldb/Host/MainLoopBase.h"
#include "lldb/Host/PseudoTerminal.h"
#include "lldb/Host/common/TCPSocket.h"
#include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX
@@ -64,7 +65,7 @@ class MainLoopTest : public testing::Test {
};
} // namespace
-TEST_F(MainLoopTest, ReadObject) {
+TEST_F(MainLoopTest, ReadSocketObject) {
char X = 'X';
size_t len = sizeof(X);
ASSERT_TRUE(socketpair[0]->Write(&X, len).Success());
@@ -101,6 +102,144 @@ TEST_F(MainLoopTest, ReadPipeObject) {
ASSERT_EQ(1u, callback_count);
}
+TEST_F(MainLoopTest, MultipleReadsPipeObject) {
+ Pipe pipe;
+
+ ASSERT_TRUE(pipe.CreateNew().Success());
+
+ MainLoop loop;
+
+ std::future<void> async_writer = std::async(std::launch::async, [&] {
+ for (int i = 0; i < 5; ++i) {
+ std::this_thread::sleep_for(std::chrono::milliseconds(500));
+ char X = 'X';
+ size_t len = sizeof(X);
+ ASSERT_THAT_EXPECTED(pipe.Write(&X, len), llvm::HasValue(1));
+ }
+ });
+
+ Status error;
+ lldb::FileSP file = std::make_shared<NativeFile>(
+ pipe.GetReadFileDescriptor(), File::eOpenOptionReadOnly, false);
+ auto handle = loop.RegisterReadObject(
+ file,
+ [&](MainLoopBase &loop) {
+ callback_count++;
+ if (callback_count == 5)
+ loop.RequestTermination();
+
+ // Read some data to ensure the handle is not in a readable state.
+ char buf[1024] = {0};
+ size_t len = sizeof(buf);
+ ASSERT_THAT_ERROR(file->Read(buf, len).ToError(), llvm::Succeeded());
+ EXPECT_EQ(len, 1);
+ EXPECT_EQ(buf[0], 'X');
+ },
+ error);
+ ASSERT_TRUE(error.Success());
+ ASSERT_TRUE(handle);
+ ASSERT_TRUE(loop.Run().Success());
+ ASSERT_EQ(5u, callback_count);
+ async_writer.wait();
+}
+
+TEST_F(MainLoopTest, PipeDelayBetweenRegisterAndRun) {
+ Pipe pipe;
+
+ ASSERT_TRUE(pipe.CreateNew().Success());
+
+ MainLoop loop;
+
+ Status error;
+ lldb::FileSP file = std::make_shared<NativeFile>(
+ pipe.GetReadFileDescriptor(), File::eOpenOptionReadOnly, false);
+ auto handle = loop.RegisterReadObject(
+ file,
+ [&](MainLoopBase &loop) {
+ callback_count++;
+
+ // Read some data to ensure the handle is not in a readable state.
+ char buf[1024] = {0};
+ size_t len = sizeof(buf);
+ ASSERT_THAT_ERROR(file->Read(buf, len).ToError(), llvm::Succeeded());
+ EXPECT_EQ(len, 2);
+ EXPECT_EQ(buf[0], 'X');
+ EXPECT_EQ(buf[1], 'X');
+ },
+ error);
+ auto cb = [&](MainLoopBase &) {
+ callback_count++;
+ char X = 'X';
+ size_t len = sizeof(X);
+ // Write twice and ensure we coalesce into a single read.
+ ASSERT_THAT_EXPECTED(pipe.Write(&X, len), llvm::HasValue(1));
+ ASSERT_THAT_EXPECTED(pipe.Write(&X, len), llvm::HasValue(1));
+ };
+ // Add a write that triggers a read events.
+ loop.AddCallback(cb, std::chrono::milliseconds(500));
+ loop.AddCallback([](MainLoopBase &loop) { loop.RequestTermination(); },
+ std::chrono::milliseconds(1000));
+ ASSERT_TRUE(error.Success());
+ ASSERT_TRUE(handle);
+
+ // Write between RegisterReadObject / Run should NOT invoke the callback.
+ cb(loop);
+ ASSERT_EQ(1u, callback_count);
+
+ ASSERT_TRUE(loop.Run().Success());
+ ASSERT_EQ(4u, callback_count);
+}
+
+TEST_F(MainLoopTest, NoSelfTriggersDuringPipeHandler) {
+ Pipe pipe;
+
+ ASSERT_TRUE(pipe.CreateNew().Success());
+
+ MainLoop loop;
+
+ Status error;
+ lldb::FileSP file = std::make_shared<NativeFile>(
+ pipe.GetReadFileDescriptor(), File::eOpenOptionReadOnly, false);
+ auto handle = loop.RegisterReadObject(
+ file,
+ [&](MainLoopBase &lop) {
+ callback_count++;
+
+ char X = 'Y';
+ size_t len = sizeof(X);
+ // writes / reads during the handler callback should NOT trigger itself.
+ ASSERT_THAT_EXPECTED(pipe.Write(&X, len), llvm::HasValue(1));
+
+ char buf[1024] = {0};
+ len = sizeof(buf);
+ ASSERT_THAT_ERROR(file->Read(buf, len).ToError(), llvm::Succeeded());
+ EXPECT_EQ(len, 2);
+ EXPECT_EQ(buf[0], 'X');
+ EXPECT_EQ(buf[1], 'Y');
+
+ if (callback_count == 2)
+ loop.RequestTermination();
+ },
+ error);
+ // Add a write that triggers a read event.
+ loop.AddPendingCallback([&](MainLoopBase &) {
+ char X = 'X';
+ size_t len = sizeof(X);
+ ASSERT_THAT_EXPECTED(pipe.Write(&X, len), llvm::HasValue(1));
+ });
+ loop.AddCallback(
+ [&](MainLoopBase &) {
+ char X = 'X';
+ size_t len = sizeof(X);
+ ASSERT_THAT_EXPECTED(pipe.Write(&X, len), llvm::HasValue(1));
+ },
+ std::chrono::milliseconds(500));
+ ASSERT_TRUE(error.Success());
+ ASSERT_TRUE(handle);
+ ASSERT_TRUE(loop.Run().Success());
+ ASSERT_EQ(2u, callback_count);
+}
+
TEST_F(MainLoopTest, NoSpuriousPipeReads) {
Pipe pipe;
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index c43870392361d..98b793aace7a3 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1702,12 +1702,13 @@ class TargetTransformInfo {
/// unordered-atomic memory intrinsic.
LLVM_ABI unsigned getAtomicMemIntrinsicMaxElementSize() const;
- /// \returns A value which is the result of the given memory intrinsic. New
- /// instructions may be created to extract the result from the given intrinsic
- /// memory operation. Returns nullptr if the target cannot create a result
- /// from the given intrinsic.
- LLVM_ABI Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
- Type *ExpectedType) const;
+ /// \returns A value which is the result of the given memory intrinsic. If \p
+ /// CanCreate is true, new instructions may be created to extract the result
+ /// from the given intrinsic memory operation. Returns nullptr if the target
+ /// cannot create a result from the given intrinsic.
+ LLVM_ABI Value *
+ getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType,
+ bool CanCreate = true) const;
/// \returns The type to use in a loop expansion of a memcpy call.
LLVM_ABI Type *getMemcpyLoopLoweringType(
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 12f87226c5f57..ddc8a5eaffa94 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -983,8 +983,9 @@ class TargetTransformInfoImplBase {
return 0;
}
- virtual Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
- Type *ExpectedType) const {
+ virtual Value *
+ getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType,
+ bool CanCreate = true) const {
return nullptr;
}
diff --git a/llvm/include/llvm/CodeGen/SDPatternMatch.h b/llvm/include/llvm/CodeGen/SDPatternMatch.h
index 7c5cdbbeb0ca8..2967532226197 100644
--- a/llvm/include/llvm/CodeGen/SDPatternMatch.h
+++ b/llvm/include/llvm/CodeGen/SDPatternMatch.h
@@ -655,6 +655,21 @@ struct MaxMin_match {
template <typename MatchContext>
bool match(const MatchContext &Ctx, SDValue N) {
+ auto MatchMinMax = [&](SDValue L, SDValue R, SDValue TrueValue,
+ SDValue FalseValue, ISD::CondCode CC) {
+ if ((TrueValue != L || FalseValue != R) &&
+ (TrueValue != R || FalseValue != L))
+ return false;
+
+ ISD::CondCode Cond =
+ TrueValue == L ? CC : getSetCCInverse(CC, L.getValueType());
+ if (!Pred_t::match(Cond))
+ return false;
+
+ return (LHS.match(Ctx, L) && RHS.match(Ctx, R)) ||
+ (Commutable && LHS.match(Ctx, R) && RHS.match(Ctx, L));
+ };
+
if (sd_context_match(N, Ctx, m_Opc(ISD::SELECT)) ||
sd_context_match(N, Ctx, m_Opc(ISD::VSELECT))) {
EffectiveOperands<ExcludeChain> EO_SELECT(N, Ctx);
@@ -670,23 +685,22 @@ struct MaxMin_match {
SDValue R = Cond->getOperand(EO_SETCC.FirstIndex + 1);
auto *CondNode =
cast<CondCodeSDNode>(Cond->getOperand(EO_SETCC.FirstIndex + 2));
-
- if ((TrueValue != L || FalseValue != R) &&
- (TrueValue != R || FalseValue != L)) {
- return false;
- }
-
- ISD::CondCode Cond =
- TrueValue == L ? CondNode->get()
- : getSetCCInverse(CondNode->get(), L.getValueType());
- if (!Pred_t::match(Cond)) {
- return false;
- }
- return (LHS.match(Ctx, L) && RHS.match(Ctx, R)) ||
- (Commutable && LHS.match(Ctx, R) && RHS.match(Ctx, L));
+ return MatchMinMax(L, R, TrueValue, FalseValue, CondNode->get());
}
}
+ if (sd_context_match(N, Ctx, m_Opc(ISD::SELECT_CC))) {
+ EffectiveOperands<ExcludeChain> EO_SELECT(N, Ctx);
+ assert(EO_SELECT.Size == 5);
+ SDValue L = N->getOperand(EO_SELECT.FirstIndex);
+ SDValue R = N->getOperand(EO_SELECT.FirstIndex + 1);
+ SDValue TrueValue = N->getOperand(EO_SELECT.FirstIndex + 2);
+ SDValue FalseValue = N->getOperand(EO_SELECT.FirstIndex + 3);
+ auto *CondNode =
+ cast<CondCodeSDNode>(N->getOperand(EO_SELECT.FirstIndex + 4));
+ return MatchMinMax(L, R, TrueValue, FalseValue, CondNode->get());
+ }
+
return false;
}
};
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index f7a9b65854696..16885f331e9dd 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@...
[truncated]
|
1e0b0c9
to
a26d79d
Compare
No description provided.