llvm · grypp · Aug 7, 2025 · Aug 8, 2025 · Aug 8, 2025 · Aug 8, 2025
@@ -2995,30 +2995,47 @@ def NVVM_WgmmaMmaAsyncOp : NVVM_Op<"wgmma.mma_async",
 // NVVM Griddepcontrol Ops
 //===----------------------------------------------------------------------===//
 
-def NVVM_GriddepcontrolWaitOp : NVVM_IntrOp<"griddepcontrol.wait", [], 0> {
-  let assemblyFormat = "attr-dict";
+def GridDepActionWait : I32EnumAttrCase<"wait", 0>;
+def GridDepActionLaunchDependent : I32EnumAttrCase<"launch_dependents", 1>;
 
+def GridDepActionKind : I32EnumAttr<"GridDepKind", "Action kind for grid dependency control",
+  [GridDepActionWait, GridDepActionLaunchDependent]> {
+  let genSpecializedAttr = 0;
+  let cppNamespace = "::mlir::NVVM";
+}
+
+def GridDepActionAttr : EnumAttr<NVVM_Dialect, GridDepActionKind, "grid_dep_action">;
+
+def NVVM_GriddepcontrolOp : NVVM_Op<"griddepcontrol", []> {
   let description = [{
-    Causes the executing thread to wait until all prerequisite grids in flight 
+    If the $kind attribute is set to `wait`, it causes the 
+    executing thread to wait until all prerequisite grids in flight 
     have completed and all the memory operations from the prerequisite grids 
     are performed and made visible to the current grid.
 
+    When the $kind is launch_dependents, it signals that specific dependents 
+    the runtime system designated to react to this instruction can be scheduled 
+    as soon as all other CTAs in the grid issue the same instruction or have 
+    completed.
 
     [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#parallel-synchronization-and-communication-instructions-griddepcontrol)
   }];
-}
 
-def NVVM_GriddepcontrolLaunchDependentsOp
-    : NVVM_IntrOp<"griddepcontrol.launch.dependents", [], 0> {
-  let assemblyFormat = "attr-dict";
-
-  let description = [{
-    Signals that specific dependents the runtime system designated to react to 
-    this instruction can be scheduled as soon as all other CTAs in the grid 
-    issue the same instruction or have completed.
+  let arguments = (ins GridDepActionAttr:$kind);
 
+  let assemblyFormat = "$kind attr-dict";
 
-    [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#parallel-synchronization-and-communication-instructions-griddepcontrol)
+  string llvmBuilder = [{
+    llvm::Intrinsic::ID id;
+      switch ($kind) {
+        case NVVM::GridDepActionKind::wait:
+          id = llvm::Intrinsic::nvvm_griddepcontrol_wait;
+          break;
+        case NVVM::GridDepActionKind::launch_dependents:
+          id = llvm::Intrinsic::nvvm_griddepcontrol_launch_dependents;
+          break;
+      }
+      createIntrinsicCall(builder, id);
   }];
 }
 

diff --git a/mlir/test/Dialect/LLVMIR/nvvm.mlir b/mlir/test/Dialect/LLVMIR/nvvm.mlir
@@ -535,15 +535,15 @@ func.func @wgmma_wait_group_sync_aligned() {
 }
 
 func.func @griddepcontrol_wait() {
-  // CHECK: nvvm.griddepcontrol.wait
-  nvvm.griddepcontrol.wait
+  // CHECK: nvvm.griddepcontrol wait
+  nvvm.griddepcontrol wait
   return
 }
 
 func.func @griddepcontrol_launch_dependents()
 {
-  // CHECK: nvvm.griddepcontrol.launch.dependents
-  nvvm.griddepcontrol.launch.dependents
+  // CHECK: nvvm.griddepcontrol launch_dependents
+  nvvm.griddepcontrol launch_dependents
   return
 }
 

diff --git a/mlir/test/Target/LLVMIR/nvvmir.mlir b/mlir/test/Target/LLVMIR/nvvmir.mlir
@@ -766,15 +766,15 @@ llvm.func @nvvm_wgmma_wait_group_aligned() {
 // CHECK-LABEL: @nvvm_griddepcontrol_wait
 llvm.func @nvvm_griddepcontrol_wait() {
   // CHECK: call void @llvm.nvvm.griddepcontrol.wait()
-  nvvm.griddepcontrol.wait
+  nvvm.griddepcontrol wait
   llvm.return
 }
 
 // -----
 // CHECK-LABEL: @nvvm_griddepcontrol_launch_dependents
 llvm.func @nvvm_griddepcontrol_launch_dependents() {
   // CHECK: call void @llvm.nvvm.griddepcontrol.launch.dependents()
-  nvvm.griddepcontrol.launch.dependents
+  nvvm.griddepcontrol launch_dependents
   llvm.return
 }
 
@@ -918,3 +918,14 @@ llvm.func @nvvm_dot_accumulate_2way(%a: vector<2xi16>, %b: vector<4xi8>, %c: i32
   %7 = nvvm.dot.accumulate.2way %a <signed>, %b <signed>, %c {b_hi = true}: vector<2xi16>, vector<4xi8>
   llvm.return
 }
+
+// -----
+
+// CHECK-LABEL: @nvvm_pmevent
+llvm.func @nvvm_pmevent() {
+  // CHECK: call void @llvm.nvvm.pm.event.mask(i16 15000)
+  nvvm.pmevent mask = 15000
+  // CHECK: call void @llvm.nvvm.pm.event.mask(i16 4)
+  nvvm.pmevent mask = 4
+  llvm.return
+}