We've 2 ops:

grypp · grypp · commit 5e465ee79684 · 2025-08-07T14:59:54.000Z
1. nvvm.griddepcontrol.wait
1. nvvm.griddepcontrol.launch_dependents

They are related to FDL and same concept. This PR unifies both ops into a single one.
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -2995,30 +2995,47 @@ def NVVM_WgmmaMmaAsyncOp : NVVM_Op<"wgmma.mma_async",
 // NVVM Griddepcontrol Ops
 //===----------------------------------------------------------------------===//
 
-def NVVM_GriddepcontrolWaitOp : NVVM_IntrOp<"griddepcontrol.wait", [], 0> {
-  let assemblyFormat = "attr-dict";
+def FDLWait : I32EnumAttrCase<"wait", 0>;
+def FDLLaunchDependent : I32EnumAttrCase<"launch_dependents", 1>;
 
+def FDLKind : I32EnumAttr<"FDLKind", "Fast dependenct launch kind",
+  [FDLWait, FDLLaunchDependent]> {
+  let genSpecializedAttr = 0;
+  let cppNamespace = "::mlir::NVVM";
+}
+
+def FDLKindAttr : EnumAttr<NVVM_Dialect, FDLKind, "fdl_kind">;
+
+def NVVM_GriddepcontrolOp : NVVM_Op<"griddepcontrol", []> {
   let description = [{
-    Causes the executing thread to wait until all prerequisite grids in flight 
+    If the $kind attribute is set to `wait`, the instruction, it causes the 
+    executing thread to wait until all prerequisite grids in flight 
     have completed and all the memory operations from the prerequisite grids 
     are performed and made visible to the current grid.
 
+    When the $kind is launch_dependents, it signals that specific dependents 
+    the runtime system designated to react to this instruction can be scheduled 
+    as soon as all other CTAs in the grid issue the same instruction or have 
+    completed.
 
     [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#parallel-synchronization-and-communication-instructions-griddepcontrol)
   }];
-}
 
-def NVVM_GriddepcontrolLaunchDependentsOp
-    : NVVM_IntrOp<"griddepcontrol.launch.dependents", [], 0> {
-  let assemblyFormat = "attr-dict";
-
-  let description = [{
-    Signals that specific dependents the runtime system designated to react to 
-    this instruction can be scheduled as soon as all other CTAs in the grid 
-    issue the same instruction or have completed.
+  let arguments = (ins FDLKindAttr:$kind);
 
+  let assemblyFormat = "$kind attr-dict";
 
-    [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#parallel-synchronization-and-communication-instructions-griddepcontrol)
+   string llvmBuilder = [{
+    llvm::Intrinsic::ID id;
+      switch ($kind) {
+        case NVVM::FDLKind::wait:
+          id = llvm::Intrinsic::nvvm_griddepcontrol_wait;
+          break;
+        case NVVM::FDLKind::launch_dependents:
+          id = llvm::Intrinsic::nvvm_griddepcontrol_launch_dependents;
+          break;
+      }
+      createIntrinsicCall(builder, id);
   }];
 }
 
diff --git a/mlir/test/Dialect/LLVMIR/nvvm.mlir b/mlir/test/Dialect/LLVMIR/nvvm.mlir
@@ -535,15 +535,15 @@ func.func @wgmma_wait_group_sync_aligned() {
 }
 
 func.func @griddepcontrol_wait() {
-  // CHECK: nvvm.griddepcontrol.wait
-  nvvm.griddepcontrol.wait
+  // CHECK: nvvm.griddepcontrol wait
+  nvvm.griddepcontrol wait
   return
 }
 
 func.func @griddepcontrol_launch_dependents()
 {
-  // CHECK: nvvm.griddepcontrol.launch.dependents
-  nvvm.griddepcontrol.launch.dependents
+  // CHECK: nvvm.griddepcontrol launch_dependents
+  nvvm.griddepcontrol launch_dependents
   return
 }
 
diff --git a/mlir/test/Target/LLVMIR/nvvmir.mlir b/mlir/test/Target/LLVMIR/nvvmir.mlir
@@ -766,15 +766,15 @@ llvm.func @nvvm_wgmma_wait_group_aligned() {
 // CHECK-LABEL: @nvvm_griddepcontrol_wait
 llvm.func @nvvm_griddepcontrol_wait() {
   // CHECK: call void @llvm.nvvm.griddepcontrol.wait()
-  nvvm.griddepcontrol.wait
+  nvvm.griddepcontrol wait
   llvm.return
 }
 
 // -----
 // CHECK-LABEL: @nvvm_griddepcontrol_launch_dependents
 llvm.func @nvvm_griddepcontrol_launch_dependents() {
   // CHECK: call void @llvm.nvvm.griddepcontrol.launch.dependents()
-  nvvm.griddepcontrol.launch.dependents
+  nvvm.griddepcontrol launch_dependents
   llvm.return
 }
 
@@ -918,3 +918,14 @@ llvm.func @nvvm_dot_accumulate_2way(%a: vector<2xi16>, %b: vector<4xi8>, %c: i32
   %7 = nvvm.dot.accumulate.2way %a <signed>, %b <signed>, %c {b_hi = true}: vector<2xi16>, vector<4xi8>
   llvm.return
 }
+
+// -----
+
+// CHECK-LABEL: @nvvm_pmevent
+llvm.func @nvvm_pmevent() {
+  // CHECK: call void @llvm.nvvm.pm.event.mask(i16 15000)
+  nvvm.pmevent mask = 15000
+  // CHECK: call void @llvm.nvvm.pm.event.mask(i16 4)
+  nvvm.pmevent mask = 4
+  llvm.return
+}

Original file line number	Diff line number	Diff line change
`@@ -535,15 +535,15 @@ func.func @wgmma_wait_group_sync_aligned() {`
`535`	`535`	`}`
`536`	`536`
`537`	`537`	`func.func @griddepcontrol_wait() {`
`538`		`- // CHECK: nvvm.griddepcontrol.wait`
`539`		`- nvvm.griddepcontrol.wait`
	`538`	`+ // CHECK: nvvm.griddepcontrol wait`
	`539`	`+ nvvm.griddepcontrol wait`
`540`	`540`	`return`
`541`	`541`	`}`
`542`	`542`
`543`	`543`	`func.func @griddepcontrol_launch_dependents()`
`544`	`544`	`{`
`545`		`- // CHECK: nvvm.griddepcontrol.launch.dependents`
`546`		`- nvvm.griddepcontrol.launch.dependents`
	`545`	`+ // CHECK: nvvm.griddepcontrol launch_dependents`
	`546`	`+ nvvm.griddepcontrol launch_dependents`
`547`	`547`	`return`
`548`	`548`	`}`
`549`	`549`