diff --git a/mlir/lib/Dialect/Complex/IR/ComplexDialect.cpp b/mlir/lib/Dialect/Complex/IR/ComplexDialect.cpp index f5a42c572ff96..0adfb51a228bb 100644 --- a/mlir/lib/Dialect/Complex/IR/ComplexDialect.cpp +++ b/mlir/lib/Dialect/Complex/IR/ComplexDialect.cpp @@ -48,8 +48,8 @@ Operation *complex::ComplexDialect::materializeConstant(OpBuilder &builder, Type type, Location loc) { if (complex::ConstantOp::isBuildableWith(value, type)) { - return builder.create(loc, type, - llvm::cast(value)); + return complex::ConstantOp::create(builder, loc, type, + llvm::cast(value)); } return arith::ConstantOp::materialize(builder, value, type, loc); } diff --git a/mlir/lib/Dialect/ControlFlow/IR/ControlFlowOps.cpp b/mlir/lib/Dialect/ControlFlow/IR/ControlFlowOps.cpp index 0c11c76cf1f71..4a5c2a99c92aa 100644 --- a/mlir/lib/Dialect/ControlFlow/IR/ControlFlowOps.cpp +++ b/mlir/lib/Dialect/ControlFlow/IR/ControlFlowOps.cpp @@ -312,8 +312,9 @@ struct SimplifyCondBranchIdenticalSuccessors if (std::get<0>(it) == std::get<1>(it)) mergedOperands.push_back(std::get<0>(it)); else - mergedOperands.push_back(rewriter.create( - condbr.getLoc(), condition, std::get<0>(it), std::get<1>(it))); + mergedOperands.push_back( + arith::SelectOp::create(rewriter, condbr.getLoc(), condition, + std::get<0>(it), std::get<1>(it))); } rewriter.replaceOpWithNewOp(condbr, trueDest, mergedOperands); @@ -412,8 +413,8 @@ struct CondBranchTruthPropagation : public OpRewritePattern { replaced = true; if (!constantTrue) - constantTrue = rewriter.create( - condbr.getLoc(), ty, rewriter.getBoolAttr(true)); + constantTrue = arith::ConstantOp::create( + rewriter, condbr.getLoc(), ty, rewriter.getBoolAttr(true)); rewriter.modifyOpInPlace(use.getOwner(), [&] { use.set(constantTrue); }); @@ -427,8 +428,8 @@ struct CondBranchTruthPropagation : public OpRewritePattern { replaced = true; if (!constantFalse) - constantFalse = rewriter.create( - condbr.getLoc(), ty, rewriter.getBoolAttr(false)); + constantFalse = arith::ConstantOp::create( + rewriter, condbr.getLoc(), ty, rewriter.getBoolAttr(false)); rewriter.modifyOpInPlace(use.getOwner(), [&] { use.set(constantFalse); }); diff --git a/mlir/lib/Dialect/ControlFlow/Transforms/BufferDeallocationOpInterfaceImpl.cpp b/mlir/lib/Dialect/ControlFlow/Transforms/BufferDeallocationOpInterfaceImpl.cpp index a077f56f4f472..80dc0c597562d 100644 --- a/mlir/lib/Dialect/ControlFlow/Transforms/BufferDeallocationOpInterfaceImpl.cpp +++ b/mlir/lib/Dialect/ControlFlow/Transforms/BufferDeallocationOpInterfaceImpl.cpp @@ -87,8 +87,8 @@ struct CondBranchOpInterface destOperands.getAsOperandRange(), toRetain); SmallVector adaptedConditions( llvm::map_range(conditions, conditionModifier)); - auto deallocOp = builder.create( - condBr.getLoc(), memrefs, adaptedConditions, toRetain); + auto deallocOp = bufferization::DeallocOp::create( + builder, condBr.getLoc(), memrefs, adaptedConditions, toRetain); state.resetOwnerships(deallocOp.getRetained(), condBr->getBlock()); for (auto [retained, ownership] : llvm::zip( deallocOp.getRetained(), deallocOp.getUpdatedConditions())) { @@ -115,18 +115,19 @@ struct CondBranchOpInterface DeallocOp thenTakenDeallocOp = insertDeallocForBranch( condBr.getTrueDest(), condBr.getTrueDestOperandsMutable(), [&](Value cond) { - return builder.create(condBr.getLoc(), cond, - condBr.getCondition()); + return arith::AndIOp::create(builder, condBr.getLoc(), cond, + condBr.getCondition()); }, thenMapping); DeallocOp elseTakenDeallocOp = insertDeallocForBranch( condBr.getFalseDest(), condBr.getFalseDestOperandsMutable(), [&](Value cond) { - Value trueVal = builder.create( - condBr.getLoc(), builder.getBoolAttr(true)); - Value negation = builder.create( - condBr.getLoc(), trueVal, condBr.getCondition()); - return builder.create(condBr.getLoc(), cond, negation); + Value trueVal = arith::ConstantOp::create(builder, condBr.getLoc(), + builder.getBoolAttr(true)); + Value negation = arith::XOrIOp::create( + builder, condBr.getLoc(), trueVal, condBr.getCondition()); + return arith::AndIOp::create(builder, condBr.getLoc(), cond, + negation); }, elseMapping); @@ -143,9 +144,9 @@ struct CondBranchOpInterface for (Value retained : commonValues) { state.resetOwnerships(retained, condBr->getBlock()); - Value combinedOwnership = builder.create( - condBr.getLoc(), condBr.getCondition(), thenMapping[retained], - elseMapping[retained]); + Value combinedOwnership = arith::SelectOp::create( + builder, condBr.getLoc(), condBr.getCondition(), + thenMapping[retained], elseMapping[retained]); state.updateOwnership(retained, combinedOwnership, condBr->getBlock()); } diff --git a/mlir/lib/Dialect/EmitC/IR/EmitC.cpp b/mlir/lib/Dialect/EmitC/IR/EmitC.cpp index fccbca6ed05dd..568da8905cbc8 100644 --- a/mlir/lib/Dialect/EmitC/IR/EmitC.cpp +++ b/mlir/lib/Dialect/EmitC/IR/EmitC.cpp @@ -47,13 +47,13 @@ void EmitCDialect::initialize() { Operation *EmitCDialect::materializeConstant(OpBuilder &builder, Attribute value, Type type, Location loc) { - return builder.create(loc, type, value); + return emitc::ConstantOp::create(builder, loc, type, value); } /// Default callback for builders of ops carrying a region. Inserts a yield /// without arguments. void mlir::emitc::buildTerminatedBody(OpBuilder &builder, Location loc) { - builder.create(loc); + emitc::YieldOp::create(builder, loc); } bool mlir::emitc::isSupportedEmitCType(Type type) { diff --git a/mlir/lib/Dialect/EmitC/Transforms/Transforms.cpp b/mlir/lib/Dialect/EmitC/Transforms/Transforms.cpp index 12218f5072982..d5fe3b4ae1e7f 100644 --- a/mlir/lib/Dialect/EmitC/Transforms/Transforms.cpp +++ b/mlir/lib/Dialect/EmitC/Transforms/Transforms.cpp @@ -24,7 +24,7 @@ ExpressionOp createExpression(Operation *op, OpBuilder &builder) { Location loc = op->getLoc(); builder.setInsertionPointAfter(op); - auto expressionOp = builder.create(loc, resultType); + auto expressionOp = emitc::ExpressionOp::create(builder, loc, resultType); // Replace all op's uses with the new expression's result. result.replaceAllUsesWith(expressionOp.getResult()); @@ -33,7 +33,7 @@ ExpressionOp createExpression(Operation *op, OpBuilder &builder) { Region ®ion = expressionOp.getRegion(); Block &block = region.emplaceBlock(); builder.setInsertionPointToEnd(&block); - auto yieldOp = builder.create(loc, result); + auto yieldOp = emitc::YieldOp::create(builder, loc, result); // Move op into the new expression. op->moveBefore(yieldOp); diff --git a/mlir/lib/Dialect/EmitC/Transforms/TypeConversions.cpp b/mlir/lib/Dialect/EmitC/Transforms/TypeConversions.cpp index 72c8fd0f32485..ab7be8d6cedd9 100644 --- a/mlir/lib/Dialect/EmitC/Transforms/TypeConversions.cpp +++ b/mlir/lib/Dialect/EmitC/Transforms/TypeConversions.cpp @@ -21,7 +21,7 @@ Value materializeAsUnrealizedCast(OpBuilder &builder, Type resultType, if (inputs.size() != 1) return Value(); - return builder.create(loc, resultType, inputs) + return UnrealizedConversionCastOp::create(builder, loc, resultType, inputs) .getResult(0); } diff --git a/mlir/lib/Dialect/EmitC/Transforms/WrapFuncInClass.cpp b/mlir/lib/Dialect/EmitC/Transforms/WrapFuncInClass.cpp index 17d436f6df028..612e8099eaf35 100644 --- a/mlir/lib/Dialect/EmitC/Transforms/WrapFuncInClass.cpp +++ b/mlir/lib/Dialect/EmitC/Transforms/WrapFuncInClass.cpp @@ -50,7 +50,7 @@ class WrapFuncInClass : public OpRewritePattern { PatternRewriter &rewriter) const override { auto className = funcOp.getSymNameAttr().str() + "Class"; - ClassOp newClassOp = rewriter.create(funcOp.getLoc(), className); + ClassOp newClassOp = ClassOp::create(rewriter, funcOp.getLoc(), className); SmallVector> fields; rewriter.createBlock(&newClassOp.getBody()); @@ -67,15 +67,15 @@ class WrapFuncInClass : public OpRewritePattern { TypeAttr typeAttr = TypeAttr::get(val.getType()); fields.push_back({fieldName, typeAttr}); - rewriter.create(funcOp.getLoc(), fieldName, typeAttr, - argAttr); + emitc::FieldOp::create(rewriter, funcOp.getLoc(), fieldName, typeAttr, + argAttr); } rewriter.setInsertionPointToEnd(&newClassOp.getBody().front()); FunctionType funcType = funcOp.getFunctionType(); Location loc = funcOp.getLoc(); FuncOp newFuncOp = - rewriter.create(loc, ("execute"), funcType); + emitc::FuncOp::create(rewriter, loc, ("execute"), funcType); rewriter.createBlock(&newFuncOp.getBody()); newFuncOp.getBody().takeBody(funcOp.getBody()); @@ -85,7 +85,7 @@ class WrapFuncInClass : public OpRewritePattern { newArguments.reserve(fields.size()); for (auto &[fieldName, attr] : fields) { GetFieldOp arg = - rewriter.create(loc, attr.getValue(), fieldName); + emitc::GetFieldOp::create(rewriter, loc, attr.getValue(), fieldName); newArguments.push_back(arg); } diff --git a/mlir/lib/Dialect/Func/Extensions/InlinerExtension.cpp b/mlir/lib/Dialect/Func/Extensions/InlinerExtension.cpp index 3328d58551bff..c39e77d823b78 100644 --- a/mlir/lib/Dialect/Func/Extensions/InlinerExtension.cpp +++ b/mlir/lib/Dialect/Func/Extensions/InlinerExtension.cpp @@ -61,7 +61,8 @@ struct FuncInlinerInterface : public DialectInlinerInterface { // Replace the return with a branch to the dest. OpBuilder builder(op); - builder.create(op->getLoc(), newDest, returnOp.getOperands()); + cf::BranchOp::create(builder, op->getLoc(), newDest, + returnOp.getOperands()); op->erase(); } diff --git a/mlir/lib/Dialect/Func/IR/FuncOps.cpp b/mlir/lib/Dialect/Func/IR/FuncOps.cpp index d8309d81f4a3f..3c09a2124bd77 100644 --- a/mlir/lib/Dialect/Func/IR/FuncOps.cpp +++ b/mlir/lib/Dialect/Func/IR/FuncOps.cpp @@ -50,8 +50,8 @@ void FuncDialect::initialize() { Operation *FuncDialect::materializeConstant(OpBuilder &builder, Attribute value, Type type, Location loc) { if (ConstantOp::isBuildableWith(value, type)) - return builder.create(loc, type, - llvm::cast(value)); + return ConstantOp::create(builder, loc, type, + llvm::cast(value)); return nullptr; } diff --git a/mlir/lib/Dialect/Func/TransformOps/FuncTransformOps.cpp b/mlir/lib/Dialect/Func/TransformOps/FuncTransformOps.cpp index 11fc696a258c0..935d3e5ac331b 100644 --- a/mlir/lib/Dialect/Func/TransformOps/FuncTransformOps.cpp +++ b/mlir/lib/Dialect/Func/TransformOps/FuncTransformOps.cpp @@ -170,8 +170,8 @@ transform::CastAndCallOp::apply(transform::TransformRewriter &rewriter, } } - auto callOp = rewriter.create(insertionPoint->getLoc(), - targetFunction, inputs); + auto callOp = func::CallOp::create(rewriter, insertionPoint->getLoc(), + targetFunction, inputs); // Cast the call results back to the expected types. If any conversions fail // this is a definite failure as the call has been constructed at this point. diff --git a/mlir/lib/Dialect/Func/Transforms/FuncConversions.cpp b/mlir/lib/Dialect/Func/Transforms/FuncConversions.cpp index a3638c8766a5c..b6c8cdf2f495a 100644 --- a/mlir/lib/Dialect/Func/Transforms/FuncConversions.cpp +++ b/mlir/lib/Dialect/Func/Transforms/FuncConversions.cpp @@ -46,9 +46,9 @@ struct CallOpSignatureConversion : public OpConversionPattern { // Substitute with the new result types from the corresponding FuncType // conversion. - auto newCallOp = rewriter.create( - callOp.getLoc(), callOp.getCallee(), convertedResults, - flattenValues(adaptor.getOperands())); + auto newCallOp = + CallOp::create(rewriter, callOp.getLoc(), callOp.getCallee(), + convertedResults, flattenValues(adaptor.getOperands())); SmallVector replacements; size_t offset = 0; for (int i = 0, e = callOp->getNumResults(); i < e; ++i) { diff --git a/mlir/lib/Dialect/Func/Utils/Utils.cpp b/mlir/lib/Dialect/Func/Utils/Utils.cpp index 0e9662689ef78..f781ed2d591b4 100644 --- a/mlir/lib/Dialect/Func/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Func/Utils/Utils.cpp @@ -44,8 +44,8 @@ func::replaceFuncWithNewOrder(RewriterBase &rewriter, func::FuncOp funcOp, for (unsigned int idx : newResultsOrder) newOutputTypes.push_back(origOutputTypes[idx]); rewriter.setInsertionPoint(funcOp); - auto newFuncOp = rewriter.create( - funcOp.getLoc(), funcOp.getName(), + auto newFuncOp = func::FuncOp::create( + rewriter, funcOp.getLoc(), funcOp.getName(), rewriter.getFunctionType(newInputTypes, newOutputTypes)); Region &newRegion = newFuncOp.getBody(); @@ -80,7 +80,7 @@ func::replaceFuncWithNewOrder(RewriterBase &rewriter, func::FuncOp funcOp, newReturnValues.push_back(returnOp.getOperand(idx)); rewriter.setInsertionPoint(returnOp); auto newReturnOp = - rewriter.create(newFuncOp.getLoc(), newReturnValues); + func::ReturnOp::create(rewriter, newFuncOp.getLoc(), newReturnValues); newReturnOp->setDiscardableAttrs(returnOp->getDiscardableAttrDictionary()); rewriter.eraseOp(returnOp); @@ -109,8 +109,9 @@ func::replaceCallOpWithNewOrder(RewriterBase &rewriter, func::CallOp callOp, // Replace the kernel call operation with a new one that has the // reordered arguments. rewriter.setInsertionPoint(callOp); - auto newCallOp = rewriter.create( - callOp.getLoc(), callOp.getCallee(), newResultTypes, newArgsOrderValues); + auto newCallOp = + func::CallOp::create(rewriter, callOp.getLoc(), callOp.getCallee(), + newResultTypes, newArgsOrderValues); newCallOp.setNoInlineAttr(callOp.getNoInlineAttr()); for (auto &&[newIndex, origIndex] : llvm::enumerate(newResultsOrder)) rewriter.replaceAllUsesWith(callOp.getResult(origIndex), diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp index 30b5ac9809139..d186a480c0ce5 100644 --- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp +++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp @@ -136,12 +136,13 @@ int64_t GPUMappingMaskAttr::getMaxNumPhysicalIds() const { return 64; } Value GPUMappingMaskAttr::createLogicalLinearMappingId( OpBuilder &b, Value physicalLinearMappingId) const { Location loc = physicalLinearMappingId.getLoc(); - Value mask = b.create(loc, b.getI64IntegerAttr(getMask())); - Value one = b.create(loc, b.getI64IntegerAttr(1)); - Value filter = b.create(loc, one, physicalLinearMappingId); - filter = b.create(loc, filter, one); - Value filteredId = b.create(loc, mask, filter); - return b.create(loc, filteredId); + Value mask = + arith::ConstantOp::create(b, loc, b.getI64IntegerAttr(getMask())); + Value one = arith::ConstantOp::create(b, loc, b.getI64IntegerAttr(1)); + Value filter = arith::ShLIOp::create(b, loc, one, physicalLinearMappingId); + filter = arith::SubIOp::create(b, loc, filter, one); + Value filteredId = arith::AndIOp::create(b, loc, mask, filter); + return math::CtPopOp::create(b, loc, filteredId); } /// 8 4 0 @@ -157,12 +158,14 @@ Value GPUMappingMaskAttr::createLogicalLinearMappingId( Value GPUMappingMaskAttr::createIsActiveIdPredicate( OpBuilder &b, Value physicalLinearMappingId) const { Location loc = physicalLinearMappingId.getLoc(); - Value mask = b.create(loc, b.getI64IntegerAttr(getMask())); - Value one = b.create(loc, b.getI64IntegerAttr(1)); - Value filter = b.create(loc, one, physicalLinearMappingId); - Value filtered = b.create(loc, mask, filter); - Value zero = b.create(loc, b.getI64IntegerAttr(0)); - return b.create(loc, arith::CmpIPredicate::ne, filtered, zero); + Value mask = + arith::ConstantOp::create(b, loc, b.getI64IntegerAttr(getMask())); + Value one = arith::ConstantOp::create(b, loc, b.getI64IntegerAttr(1)); + Value filter = arith::ShLIOp::create(b, loc, one, physicalLinearMappingId); + Value filtered = arith::AndIOp::create(b, loc, mask, filter); + Value zero = arith::ConstantOp::create(b, loc, b.getI64IntegerAttr(0)); + return arith::CmpIOp::create(b, loc, arith::CmpIPredicate::ne, filtered, + zero); } int64_t GPUMemorySpaceMappingAttr::getMappingId() const { @@ -1137,7 +1140,7 @@ struct FoldLaunchArguments : public OpRewritePattern { OpBuilder::InsertionGuard guard(rewriter); rewriter.setInsertionPointToStart(&op.getBody().front()); zero = - rewriter.create(op.getLoc(), /*value=*/0); + arith::ConstantIndexOp::create(rewriter, op.getLoc(), /*value=*/0); } rewriter.replaceAllUsesWith(id, zero); simplified = true; @@ -1381,10 +1384,10 @@ static void printLaunchFuncOperands(OpAsmPrinter &printer, Operation *, void ShuffleOp::build(OpBuilder &builder, OperationState &result, Value value, int32_t offset, int32_t width, ShuffleMode mode) { build(builder, result, value, - builder.create(result.location, - builder.getI32IntegerAttr(offset)), - builder.create(result.location, - builder.getI32IntegerAttr(width)), + arith::ConstantOp::create(builder, result.location, + builder.getI32IntegerAttr(offset)), + arith::ConstantOp::create(builder, result.location, + builder.getI32IntegerAttr(width)), mode); } @@ -1395,10 +1398,10 @@ void ShuffleOp::build(OpBuilder &builder, OperationState &result, Value value, void RotateOp::build(OpBuilder &builder, OperationState &result, Value value, int32_t offset, int32_t width) { build(builder, result, value, - builder.create(result.location, - builder.getI32IntegerAttr(offset)), - builder.create(result.location, - builder.getI32IntegerAttr(width))); + arith::ConstantOp::create(builder, result.location, + builder.getI32IntegerAttr(offset)), + arith::ConstantOp::create(builder, result.location, + builder.getI32IntegerAttr(width))); } LogicalResult RotateOp::verify() { diff --git a/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp b/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp index c9e91535df946..1d8279c3199ea 100644 --- a/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp +++ b/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp @@ -560,8 +560,8 @@ static DiagnosedSilenceableFailure rewriteOneForallCommonImpl( Value predicate; if (originalBasisWasProvided) { for (Value tmpPredicate : builderResult.predicateOps) { - predicate = predicate ? rewriter.create(loc, predicate, - tmpPredicate) + predicate = predicate ? arith::AndIOp::create(rewriter, loc, predicate, + tmpPredicate) : tmpPredicate; } } @@ -573,8 +573,8 @@ static DiagnosedSilenceableFailure rewriteOneForallCommonImpl( Block::iterator insertionPoint; if (predicate) { // Step 6.a. If predicated, move at the beginning. - auto ifOp = rewriter.create(loc, predicate, - /*withElseRegion=*/false); + auto ifOp = scf::IfOp::create(rewriter, loc, predicate, + /*withElseRegion=*/false); targetBlock = ifOp.thenBlock(); insertionPoint = ifOp.thenBlock()->begin(); } else { @@ -632,7 +632,7 @@ DiagnosedSilenceableFailure mlir::transform::gpu::mapForallToBlocksImpl( // the insertion point. OpBuilder::InsertionGuard guard(rewriter); rewriter.setInsertionPointToStart(parentBlock); - zero = rewriter.create(loc, 0); + zero = arith::ConstantIndexOp::create(rewriter, loc, 0); } ForallRewriteResult rewriteResult; @@ -884,7 +884,7 @@ DiagnosedSilenceableFailure mlir::transform::gpu::mapOneForallToThreadsImpl( return diag; // Add a syncthreads if needed. TODO: warpsync if (syncAfterDistribute) - rewriter.create(loc); + BarrierOp::create(rewriter, loc); return DiagnosedSilenceableFailure::success(); } @@ -901,7 +901,7 @@ DiagnosedSilenceableFailure mlir::transform::gpu::mapNestedForallToThreadsImpl( // Create an early zero index value for replacements. Location loc = target->getLoc(); - Value zero = rewriter.create(loc, 0); + Value zero = arith::ConstantIndexOp::create(rewriter, loc, 0); DiagnosedSilenceableFailure diag = DiagnosedSilenceableFailure::success(); WalkResult walkResult = target->walk([&](scf::ForallOp forallOp) { diag = mlir::transform::gpu::mapOneForallToThreadsImpl( diff --git a/mlir/lib/Dialect/GPU/TransformOps/Utils.cpp b/mlir/lib/Dialect/GPU/TransformOps/Utils.cpp index f6bdbe384c08f..518a42299484f 100644 --- a/mlir/lib/Dialect/GPU/TransformOps/Utils.cpp +++ b/mlir/lib/Dialect/GPU/TransformOps/Utils.cpp @@ -76,9 +76,10 @@ buildPredicates(RewriterBase &rewriter, Location loc, ArrayRef activeIds, } if (activeMappingSize == availableMappingSize) continue; - Value idx = rewriter.create(loc, activeMappingSize); - Value pred = rewriter.create(loc, arith::CmpIPredicate::ult, - activeId, idx); + Value idx = + arith::ConstantIndexOp::create(rewriter, loc, activeMappingSize); + Value pred = arith::CmpIOp::create(rewriter, loc, arith::CmpIPredicate::ult, + activeId, idx); predicateOps.push_back(pred); } return predicateOps; @@ -98,11 +99,11 @@ static Value buildLinearId(RewriterBase &rewriter, Location loc, bindDims(rewriter.getContext(), tx, ty, tz); bindSymbols(rewriter.getContext(), bdx, bdy); SmallVector vals{ - rewriter.create(loc, indexType, Dimension::x) + ThreadOrBlockIdOp::create(rewriter, loc, indexType, Dimension::x) .getResult(), - rewriter.create(loc, indexType, Dimension::y) + ThreadOrBlockIdOp::create(rewriter, loc, indexType, Dimension::y) .getResult(), - rewriter.create(loc, indexType, Dimension::z) + ThreadOrBlockIdOp::create(rewriter, loc, indexType, Dimension::z) .getResult(), originalBasisOfr[0], originalBasisOfr[1]}; OpFoldResult ofr = affine::makeComposedFoldedAffineApply( @@ -151,12 +152,12 @@ commonLinearIdBuilderFn(int64_t multiplicity = 1, if (mask) { scaledLinearId = getValueOrCreateConstantIndexOp(rewriter, loc, scaledLinearIdOfr); - scaledLinearIdI64 = rewriter.create( - loc, rewriter.getI64Type(), scaledLinearId); + scaledLinearIdI64 = arith::IndexCastUIOp::create( + rewriter, loc, rewriter.getI64Type(), scaledLinearId); Value logicalLinearIdI64 = mask.createLogicalLinearMappingId(rewriter, scaledLinearIdI64); - scaledLinearId = rewriter.create( - loc, rewriter.getIndexType(), logicalLinearIdI64); + scaledLinearId = arith::IndexCastUIOp::create( + rewriter, loc, rewriter.getIndexType(), logicalLinearIdI64); LDBG("------adjusting linearId with mask: " << scaledLinearId); } @@ -209,9 +210,9 @@ static GpuIdBuilderFnType common3DIdBuilderFn(int64_t multiplicity = 1) { ArrayRef originalBasis) { IndexType indexType = rewriter.getIndexType(); SmallVector ids{ - rewriter.create(loc, indexType, Dimension::x), - rewriter.create(loc, indexType, Dimension::y), - rewriter.create(loc, indexType, Dimension::z)}; + ThreadOrBlockIdOp::create(rewriter, loc, indexType, Dimension::x), + ThreadOrBlockIdOp::create(rewriter, loc, indexType, Dimension::y), + ThreadOrBlockIdOp::create(rewriter, loc, indexType, Dimension::z)}; // In the 3-D mapping case, scale the first dimension by the multiplicity. SmallVector scaledIds = ids; AffineExpr d0 = getAffineDimExpr(0, rewriter.getContext()); @@ -411,7 +412,7 @@ DiagnosedSilenceableFailure createGpuLaunch( return diag; auto createConst = [&](int dim) { - return rewriter.create(loc, dim); + return arith::ConstantIndexOp::create(rewriter, loc, dim); }; OpBuilder::InsertionGuard guard(rewriter); Value one = createConst(1); @@ -421,10 +422,10 @@ DiagnosedSilenceableFailure createGpuLaunch( Value blkSizeX = blockDimX.has_value() ? createConst(blockDimX.value()) : one; Value blkSizeY = blockDimY.has_value() ? createConst(blockDimY.value()) : one; Value blkSizeZ = blockDimZ.has_value() ? createConst(blockDimZ.value()) : one; - launchOp = rewriter.create(loc, gridSizeX, gridSizeY, gridSizeZ, - blkSizeX, blkSizeY, blkSizeZ); + launchOp = LaunchOp::create(rewriter, loc, gridSizeX, gridSizeY, gridSizeZ, + blkSizeX, blkSizeY, blkSizeZ); rewriter.setInsertionPointToEnd(&launchOp.getBody().front()); - rewriter.create(loc); + TerminatorOp::create(rewriter, loc); return DiagnosedSilenceableFailure::success(); } @@ -445,8 +446,8 @@ DiagnosedSilenceableFailure alterGpuLaunch( OpBuilder::InsertionGuard guard(rewriter); rewriter.setInsertionPointAfterValue(currentBlockdim.x); auto createConstValue = [&](int dim) { - return rewriter.create(currentBlockdim.x.getLoc(), - dim); + return arith::ConstantIndexOp::create(rewriter, currentBlockdim.x.getLoc(), + dim); }; if (gridDimX.has_value()) diff --git a/mlir/lib/Dialect/GPU/Transforms/AllReduceLowering.cpp b/mlir/lib/Dialect/GPU/Transforms/AllReduceLowering.cpp index 98dc8ad3aa416..8c449144af3a9 100644 --- a/mlir/lib/Dialect/GPU/Transforms/AllReduceLowering.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/AllReduceLowering.cpp @@ -145,7 +145,7 @@ struct GpuAllReduceRewriter { // Shortcut to create an op from rewriter using loc as the first argument. template T create(Args... args) { - return rewriter.create(loc, std::forward(args)...); + return T::create(rewriter, loc, std::forward(args)...); } // Creates dimension op of type T, with the result casted to int32. diff --git a/mlir/lib/Dialect/GPU/Transforms/AsyncRegionRewriter.cpp b/mlir/lib/Dialect/GPU/Transforms/AsyncRegionRewriter.cpp index c39ba4a41898d..cd138401e3177 100644 --- a/mlir/lib/Dialect/GPU/Transforms/AsyncRegionRewriter.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/AsyncRegionRewriter.cpp @@ -129,7 +129,7 @@ struct GpuAsyncRegionPass::ThreadTokenCallback { } Value createWaitOp(Location loc, Type resultType, ValueRange operands) { - return builder.create(loc, resultType, operands) + return gpu::WaitOp::create(builder, loc, resultType, operands) .getAsyncToken(); } @@ -165,8 +165,9 @@ async::ExecuteOp addExecuteResults(async::ExecuteOp executeOp, // Clone executeOp with the extra results. OpBuilder builder(executeOp); - auto newOp = builder.create( - executeOp.getLoc(), TypeRange{resultTypes}.drop_front() /*drop token*/, + auto newOp = async::ExecuteOp::create( + builder, executeOp.getLoc(), + TypeRange{resultTypes}.drop_front() /*drop token*/, executeOp.getDependencies(), executeOp.getBodyOperands()); IRMapping mapper; newOp.getRegion().getBlocks().clear(); @@ -247,7 +248,7 @@ struct GpuAsyncRegionPass::DeferWaitCallback { builder.setInsertionPointAfter(op); for (auto asyncToken : asyncTokens) tokens.push_back( - builder.create(loc, asyncToken).getResult()); + async::AwaitOp::create(builder, loc, asyncToken).getResult()); // Set `it` after the inserted async.await ops. it = builder.getInsertionPoint(); }) @@ -279,7 +280,7 @@ struct GpuAsyncRegionPass::DeferWaitCallback { // Otherwise, insert a gpu.wait before 'it'. builder.setInsertionPoint(it->getBlock(), it); - auto waitOp = builder.create(loc, Type{}, tokens); + auto waitOp = gpu::WaitOp::create(builder, loc, Type{}, tokens); // If the new waitOp is at the end of an async.execute region, add it to the // worklist. 'operator()(executeOp)' would do the same, but this is faster. diff --git a/mlir/lib/Dialect/GPU/Transforms/DecomposeMemRefs.cpp b/mlir/lib/Dialect/GPU/Transforms/DecomposeMemRefs.cpp index 65b9407a7efba..7b30906abc2fd 100644 --- a/mlir/lib/Dialect/GPU/Transforms/DecomposeMemRefs.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/DecomposeMemRefs.cpp @@ -62,7 +62,7 @@ getFlatOffsetAndStrides(OpBuilder &rewriter, Location loc, Value source, OpBuilder::InsertionGuard g(rewriter); setInsertionPointToStart(rewriter, source); newExtractStridedMetadata = - rewriter.create(loc, source); + memref::ExtractStridedMetadataOp::create(rewriter, loc, source); } auto &&[sourceStrides, sourceOffset] = sourceType.getStridesAndOffset(); @@ -108,9 +108,9 @@ static Value getFlatMemref(OpBuilder &rewriter, Location loc, Value source, auto &&[base, offset, ignore] = getFlatOffsetAndStrides(rewriter, loc, source, offsetsTemp); MemRefType retType = inferCastResultType(base, offset); - return rewriter.create(loc, retType, base, offset, - ArrayRef(), - ArrayRef()); + return memref::ReinterpretCastOp::create(rewriter, loc, retType, base, offset, + ArrayRef(), + ArrayRef()); } static bool needFlatten(Value val) { diff --git a/mlir/lib/Dialect/GPU/Transforms/GlobalIdRewriter.cpp b/mlir/lib/Dialect/GPU/Transforms/GlobalIdRewriter.cpp index 153ceb23a6ecd..6519b65cec465 100644 --- a/mlir/lib/Dialect/GPU/Transforms/GlobalIdRewriter.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/GlobalIdRewriter.cpp @@ -26,11 +26,11 @@ struct GpuGlobalIdRewriter : public OpRewritePattern { PatternRewriter &rewriter) const override { Location loc = op.getLoc(); auto dim = op.getDimension(); - auto blockId = rewriter.create(loc, dim); - auto blockDim = rewriter.create(loc, dim); + auto blockId = gpu::BlockIdOp::create(rewriter, loc, dim); + auto blockDim = gpu::BlockDimOp::create(rewriter, loc, dim); // Compute blockId.x * blockDim.x - auto tmp = rewriter.create(op.getLoc(), blockId, blockDim); - auto threadId = rewriter.create(loc, dim); + auto tmp = index::MulOp::create(rewriter, op.getLoc(), blockId, blockDim); + auto threadId = gpu::ThreadIdOp::create(rewriter, loc, dim); // Compute threadId.x + blockId.x * blockDim.x rewriter.replaceOpWithNewOp(op, threadId, tmp); return success(); diff --git a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp index 34ea9fcab4188..99f5c5b0cf139 100644 --- a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp @@ -40,7 +40,7 @@ template static void createForAllDimensions(OpBuilder &builder, Location loc, SmallVectorImpl &values) { for (auto dim : {gpu::Dimension::x, gpu::Dimension::y, gpu::Dimension::z}) - values.push_back(builder.create(loc, builder.getIndexType(), dim)); + values.push_back(OpTy::create(builder, loc, builder.getIndexType(), dim)); } /// Adds operations generating block/thread ids and grid/block dimensions at the @@ -195,8 +195,8 @@ static gpu::GPUFuncOp outlineKernelFuncImpl(gpu::LaunchOp launchOp, } FunctionType type = FunctionType::get(launchOp.getContext(), kernelOperandTypes, {}); - auto outlinedFunc = builder.create( - loc, kernelFnName, type, + auto outlinedFunc = gpu::GPUFuncOp::create( + builder, loc, kernelFnName, type, TypeRange(ValueRange(launchOp.getWorkgroupAttributions())), TypeRange(ValueRange(launchOp.getPrivateAttributions()))); outlinedFunc->setAttr(gpu::GPUDialect::getKernelFuncAttrName(), @@ -247,7 +247,7 @@ static gpu::GPUFuncOp outlineKernelFuncImpl(gpu::LaunchOp launchOp, if (!terminator) continue; OpBuilder replacer(terminator); - replacer.create(terminator->getLoc()); + gpu::ReturnOp::create(replacer, terminator->getLoc()); terminator->erase(); } @@ -287,9 +287,9 @@ static void convertToLaunchFuncOp(gpu::LaunchOp launchOp, Value asyncToken = launchOp.getAsyncToken(); std::optional clusterSize = launchOp.getClusterSizeOperandValues(); - auto launchFunc = builder.create( - launchOp.getLoc(), kernelFunc, launchOp.getGridSizeOperandValues(), - launchOp.getBlockSizeOperandValues(), + auto launchFunc = gpu::LaunchFuncOp::create( + builder, launchOp.getLoc(), kernelFunc, + launchOp.getGridSizeOperandValues(), launchOp.getBlockSizeOperandValues(), launchOp.getDynamicSharedMemorySize(), operands, asyncToken ? asyncToken.getType() : nullptr, launchOp.getAsyncDependencies(), clusterSize); @@ -415,8 +415,8 @@ class GpuKernelOutliningPass // Check if the module already exists in the symbol table if (!kernelModule) { // If not found, create a new GPU module - kernelModule = builder.create(kernelFunc.getLoc(), - kernelModuleName); + kernelModule = gpu::GPUModuleOp::create(builder, kernelFunc.getLoc(), + kernelModuleName); } // If a valid data layout spec was provided, attach it to the kernel module. diff --git a/mlir/lib/Dialect/GPU/Transforms/MemoryPromotion.cpp b/mlir/lib/Dialect/GPU/Transforms/MemoryPromotion.cpp index 14c44f27a6249..0d70fa2162bb2 100644 --- a/mlir/lib/Dialect/GPU/Transforms/MemoryPromotion.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/MemoryPromotion.cpp @@ -34,8 +34,8 @@ static void insertCopyLoops(ImplicitLocOpBuilder &b, Value from, Value to) { auto rank = memRefType.getRank(); SmallVector lbs, ubs, steps; - Value zero = b.create(0); - Value one = b.create(1); + Value zero = arith::ConstantIndexOp::create(b, 0); + Value one = arith::ConstantIndexOp::create(b, 1); // Make sure we have enough loops to use all thread dimensions, these trivial // loops should be outermost and therefore inserted first. @@ -59,8 +59,8 @@ static void insertCopyLoops(ImplicitLocOpBuilder &b, Value from, Value to) { auto indexType = b.getIndexType(); SmallVector threadIds, blockDims; for (auto dim : {gpu::Dimension::x, gpu::Dimension::y, gpu::Dimension::z}) { - threadIds.push_back(b.create(indexType, dim)); - blockDims.push_back(b.create(indexType, dim)); + threadIds.push_back(gpu::ThreadIdOp::create(b, indexType, dim)); + blockDims.push_back(gpu::BlockDimOp::create(b, indexType, dim)); } // Produce the loop nest with copies. @@ -70,8 +70,8 @@ static void insertCopyLoops(ImplicitLocOpBuilder &b, Value from, Value to) { [&](OpBuilder &b, Location loc, ValueRange loopIvs) { ivs.assign(loopIvs.begin(), loopIvs.end()); auto activeIvs = llvm::ArrayRef(ivs).take_back(rank); - Value loaded = b.create(loc, from, activeIvs); - b.create(loc, loaded, to, activeIvs); + Value loaded = memref::LoadOp::create(b, loc, from, activeIvs); + memref::StoreOp::create(b, loc, loaded, to, activeIvs); }); // Map the innermost loops to threads in reverse order. @@ -131,10 +131,10 @@ static void insertCopies(Region ®ion, Location loc, Value from, Value to) { auto b = ImplicitLocOpBuilder::atBlockBegin(loc, ®ion.front()); insertCopyLoops(b, from, to); - b.create(); + gpu::BarrierOp::create(b); b.setInsertionPoint(®ion.front().back()); - b.create(); + gpu::BarrierOp::create(b); insertCopyLoops(b, to, from); } diff --git a/mlir/lib/Dialect/GPU/Transforms/ModuleToBinary.cpp b/mlir/lib/Dialect/GPU/Transforms/ModuleToBinary.cpp index 9a69e6dde4274..3c447337d821f 100644 --- a/mlir/lib/Dialect/GPU/Transforms/ModuleToBinary.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/ModuleToBinary.cpp @@ -108,8 +108,8 @@ LogicalResult moduleSerializer(GPUModuleOp op, !handler && moduleHandler) handler = moduleHandler; builder.setInsertionPointAfter(op); - builder.create(op.getLoc(), op.getName(), handler, - builder.getArrayAttr(objects)); + gpu::BinaryOp::create(builder, op.getLoc(), op.getName(), handler, + builder.getArrayAttr(objects)); op->erase(); return success(); } diff --git a/mlir/lib/Dialect/GPU/Transforms/PromoteShuffleToAMDGPU.cpp b/mlir/lib/Dialect/GPU/Transforms/PromoteShuffleToAMDGPU.cpp index 171e64346f155..18c69f5f30e5d 100644 --- a/mlir/lib/Dialect/GPU/Transforms/PromoteShuffleToAMDGPU.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/PromoteShuffleToAMDGPU.cpp @@ -48,10 +48,10 @@ struct PromoteShuffleToSwizzlePattern "offset must be in the range [0, 31]"); Location loc = op.getLoc(); - Value res = rewriter.create( - loc, op.getResult(0).getType(), op.getValue(), /*andMask=*/31, + Value res = amdgpu::SwizzleBitModeOp::create( + rewriter, loc, op.getResult(0).getType(), op.getValue(), /*andMask=*/31, /*orMask=*/0, /*xorMask=*/offsetValue); - Value valid = rewriter.create(loc, 1, /*width*/ 1); + Value valid = arith::ConstantIntOp::create(rewriter, loc, 1, /*width*/ 1); rewriter.replaceOp(op, {res, valid}); return success(); } diff --git a/mlir/lib/Dialect/GPU/Transforms/ShuffleRewriter.cpp b/mlir/lib/Dialect/GPU/Transforms/ShuffleRewriter.cpp index 2d6df0ff6d02d..d88f4d56d9009 100644 --- a/mlir/lib/Dialect/GPU/Transforms/ShuffleRewriter.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/ShuffleRewriter.cpp @@ -47,16 +47,16 @@ struct GpuShuffleRewriter : public OpRewritePattern { // Float types must be converted to i64 to extract the bits. if (isa(valueType)) - value = rewriter.create(valueLoc, i64, value); + value = arith::BitcastOp::create(rewriter, valueLoc, i64, value); // Get the low bits by trunc(value). - lo = rewriter.create(valueLoc, i32, value); + lo = arith::TruncIOp::create(rewriter, valueLoc, i32, value); // Get the high bits by trunc(value >> 32). - auto c32 = rewriter.create( - valueLoc, rewriter.getIntegerAttr(i64, 32)); - hi = rewriter.create(valueLoc, value, c32); - hi = rewriter.create(valueLoc, i32, hi); + auto c32 = arith::ConstantOp::create(rewriter, valueLoc, + rewriter.getIntegerAttr(i64, 32)); + hi = arith::ShRUIOp::create(rewriter, valueLoc, value, c32); + hi = arith::TruncIOp::create(rewriter, valueLoc, i32, hi); // Shuffle the values. ValueRange loRes = @@ -71,21 +71,21 @@ struct GpuShuffleRewriter : public OpRewritePattern { .getResults(); // Convert lo back to i64. - lo = rewriter.create(valueLoc, i64, loRes[0]); + lo = arith::ExtUIOp::create(rewriter, valueLoc, i64, loRes[0]); // Convert hi back to i64. - hi = rewriter.create(valueLoc, i64, hiRes[0]); - hi = rewriter.create(valueLoc, hi, c32); + hi = arith::ExtUIOp::create(rewriter, valueLoc, i64, hiRes[0]); + hi = arith::ShLIOp::create(rewriter, valueLoc, hi, c32); // Obtain the shuffled bits hi | lo. - value = rewriter.create(loc, hi, lo); + value = arith::OrIOp::create(rewriter, loc, hi, lo); // Convert the value back to float. if (isa(valueType)) - value = rewriter.create(valueLoc, valueType, value); + value = arith::BitcastOp::create(rewriter, valueLoc, valueType, value); // Obtain the shuffle validity by combining both validities. - auto validity = rewriter.create(loc, loRes[1], hiRes[1]); + auto validity = arith::AndIOp::create(rewriter, loc, loRes[1], hiRes[1]); // Replace the op. rewriter.replaceOp(op, {value, validity}); diff --git a/mlir/lib/Dialect/GPU/Transforms/SubgroupIdRewriter.cpp b/mlir/lib/Dialect/GPU/Transforms/SubgroupIdRewriter.cpp index 05631ad87dd71..79be247c2a6b5 100644 --- a/mlir/lib/Dialect/GPU/Transforms/SubgroupIdRewriter.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/SubgroupIdRewriter.cpp @@ -54,23 +54,25 @@ struct GpuSubgroupIdRewriter final : OpRewritePattern { Location loc = op->getLoc(); Type indexType = rewriter.getIndexType(); - Value dimX = rewriter.create(loc, gpu::Dimension::x); - Value dimY = rewriter.create(loc, gpu::Dimension::y); - Value tidX = rewriter.create(loc, gpu::Dimension::x); - Value tidY = rewriter.create(loc, gpu::Dimension::y); - Value tidZ = rewriter.create(loc, gpu::Dimension::z); + Value dimX = gpu::BlockDimOp::create(rewriter, loc, gpu::Dimension::x); + Value dimY = gpu::BlockDimOp::create(rewriter, loc, gpu::Dimension::y); + Value tidX = gpu::ThreadIdOp::create(rewriter, loc, gpu::Dimension::x); + Value tidY = gpu::ThreadIdOp::create(rewriter, loc, gpu::Dimension::y); + Value tidZ = gpu::ThreadIdOp::create(rewriter, loc, gpu::Dimension::z); - Value dimYxIdZ = rewriter.create(loc, indexType, dimY, tidZ); + Value dimYxIdZ = + arith::MulIOp::create(rewriter, loc, indexType, dimY, tidZ); Value dimYxIdZPlusIdY = - rewriter.create(loc, indexType, dimYxIdZ, tidY); + arith::AddIOp::create(rewriter, loc, indexType, dimYxIdZ, tidY); Value dimYxIdZPlusIdYTimesDimX = - rewriter.create(loc, indexType, dimX, dimYxIdZPlusIdY); - Value IdXPlusDimYxIdZPlusIdYTimesDimX = rewriter.create( - loc, indexType, tidX, dimYxIdZPlusIdYTimesDimX); - Value subgroupSize = rewriter.create( - loc, rewriter.getIndexType(), /*upper_bound = */ nullptr); - Value subgroupIdOp = rewriter.create( - loc, indexType, IdXPlusDimYxIdZPlusIdYTimesDimX, subgroupSize); + arith::MulIOp::create(rewriter, loc, indexType, dimX, dimYxIdZPlusIdY); + Value IdXPlusDimYxIdZPlusIdYTimesDimX = arith::AddIOp::create( + rewriter, loc, indexType, tidX, dimYxIdZPlusIdYTimesDimX); + Value subgroupSize = gpu::SubgroupSizeOp::create( + rewriter, loc, rewriter.getIndexType(), /*upper_bound = */ nullptr); + Value subgroupIdOp = + arith::DivUIOp::create(rewriter, loc, indexType, + IdXPlusDimYxIdZPlusIdYTimesDimX, subgroupSize); rewriter.replaceOp(op, {subgroupIdOp}); return success(); } diff --git a/mlir/lib/Dialect/GPU/Transforms/SubgroupReduceLowering.cpp b/mlir/lib/Dialect/GPU/Transforms/SubgroupReduceLowering.cpp index 1b3d13623c548..b9e2dd5b19a6f 100644 --- a/mlir/lib/Dialect/GPU/Transforms/SubgroupReduceLowering.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/SubgroupReduceLowering.cpp @@ -79,7 +79,7 @@ struct BreakDownSubgroupReduce final : OpRewritePattern { Location loc = op.getLoc(); Value res = - rewriter.create(loc, rewriter.getZeroAttr(vecTy)); + arith::ConstantOp::create(rewriter, loc, rewriter.getZeroAttr(vecTy)); for (unsigned i = 0; i != numNewReductions; ++i) { int64_t startIdx = i * elementsPerShuffle; @@ -90,23 +90,24 @@ struct BreakDownSubgroupReduce final : OpRewritePattern { Value extracted; if (numElems == 1) { extracted = - rewriter.create(loc, op.getValue(), startIdx); + vector::ExtractOp::create(rewriter, loc, op.getValue(), startIdx); } else { - extracted = rewriter.create( - loc, op.getValue(), /*offsets=*/startIdx, /*sizes=*/numElems, + extracted = vector::ExtractStridedSliceOp::create( + rewriter, loc, op.getValue(), /*offsets=*/startIdx, + /*sizes=*/numElems, /*strides=*/1); } - Value reduce = rewriter.create( - loc, extracted, op.getOp(), op.getUniform(), op.getClusterSize(), - op.getClusterStride()); + Value reduce = gpu::SubgroupReduceOp::create( + rewriter, loc, extracted, op.getOp(), op.getUniform(), + op.getClusterSize(), op.getClusterStride()); if (numElems == 1) { - res = rewriter.create(loc, reduce, res, startIdx); + res = vector::InsertOp::create(rewriter, loc, reduce, res, startIdx); continue; } - res = rewriter.create( - loc, reduce, res, /*offsets=*/startIdx, /*strides=*/1); + res = vector::InsertStridedSliceOp::create( + rewriter, loc, reduce, res, /*offsets=*/startIdx, /*strides=*/1); } rewriter.replaceOp(op, res); @@ -138,10 +139,11 @@ struct ScalarizeSingleElementReduce final assert(vecTy.getRank() == 1 && "Unexpected vector type"); assert(!vecTy.isScalable() && "Unexpected vector type"); Location loc = op.getLoc(); - Value extracted = rewriter.create(loc, op.getValue(), 0); - Value reduce = rewriter.create( - loc, extracted, op.getOp(), op.getUniform(), op.getClusterSize(), - op.getClusterStride()); + Value extracted = + vector::ExtractOp::create(rewriter, loc, op.getValue(), 0); + Value reduce = gpu::SubgroupReduceOp::create( + rewriter, loc, extracted, op.getOp(), op.getUniform(), + op.getClusterSize(), op.getClusterStride()); rewriter.replaceOpWithNewOp(op, vecTy, reduce); return success(); } @@ -254,14 +256,14 @@ struct ScalarSubgroupReduceToShuffles final auto packFn = [loc, &rewriter, equivIntType, shuffleIntType](Value unpackedVal) -> Value { auto asInt = - rewriter.create(loc, equivIntType, unpackedVal); - return rewriter.create(loc, shuffleIntType, asInt); + arith::BitcastOp::create(rewriter, loc, equivIntType, unpackedVal); + return arith::ExtUIOp::create(rewriter, loc, shuffleIntType, asInt); }; auto unpackFn = [loc, &rewriter, equivIntType, valueTy](Value packedVal) -> Value { auto asInt = - rewriter.create(loc, equivIntType, packedVal); - return rewriter.create(loc, valueTy, asInt); + arith::TruncIOp::create(rewriter, loc, equivIntType, packedVal); + return arith::BitcastOp::create(rewriter, loc, valueTy, asInt); }; rewriter.replaceOp( @@ -326,10 +328,10 @@ struct VectorSubgroupReduceToShuffles final static_cast(elementsPerShuffle), vecTy.getElementType()); Value extendedInput = op.getValue(); if (vecBitwidth < shuffleBitwidth) { - auto zero = rewriter.create( - loc, rewriter.getZeroAttr(extendedVecTy)); - extendedInput = rewriter.create( - loc, extendedInput, zero, /*offsets=*/0, /*strides=*/1); + auto zero = arith::ConstantOp::create( + rewriter, loc, rewriter.getZeroAttr(extendedVecTy)); + extendedInput = vector::InsertStridedSliceOp::create( + rewriter, loc, extendedInput, zero, /*offsets=*/0, /*strides=*/1); } auto shuffleIntType = rewriter.getIntegerType(shuffleBitwidth); @@ -337,22 +339,22 @@ struct VectorSubgroupReduceToShuffles final auto packFn = [loc, &rewriter, shuffleVecType](Value unpackedVal) -> Value { auto asIntVec = - rewriter.create(loc, shuffleVecType, unpackedVal); - return rewriter.create(loc, asIntVec, 0); + vector::BitCastOp::create(rewriter, loc, shuffleVecType, unpackedVal); + return vector::ExtractOp::create(rewriter, loc, asIntVec, 0); }; auto unpackFn = [loc, &rewriter, shuffleVecType, extendedVecTy](Value packedVal) -> Value { auto asIntVec = - rewriter.create(loc, shuffleVecType, packedVal); - return rewriter.create(loc, extendedVecTy, asIntVec); + vector::BroadcastOp::create(rewriter, loc, shuffleVecType, packedVal); + return vector::BitCastOp::create(rewriter, loc, extendedVecTy, asIntVec); }; Value res = createSubgroupShuffleReduction( rewriter, loc, extendedInput, op.getOp(), *ci, packFn, unpackFn); if (vecBitwidth < shuffleBitwidth) { - res = rewriter.create( - loc, res, /*offsets=*/0, /*sizes=*/vecTy.getNumElements(), + res = vector::ExtractStridedSliceOp::create( + rewriter, loc, res, /*offsets=*/0, /*sizes=*/vecTy.getNumElements(), /*strides=*/1); } @@ -378,8 +380,8 @@ createSubgroupDPPReduction(PatternRewriter &rewriter, gpu::SubgroupReduceOp op, const bool boundCtrl = true; if (ci.clusterSize >= 2) { // Perform reduction between all lanes N <-> N+1. - dpp = rewriter.create( - loc, res.getType(), res, res, amdgpu::DPPPerm::quad_perm, + dpp = amdgpu::DPPOp::create( + rewriter, loc, res.getType(), res, res, amdgpu::DPPPerm::quad_perm, rewriter.getI32ArrayAttr({1, 0, 3, 2}), allRows, allBanks, boundCtrl); res = vector::makeArithReduction(rewriter, loc, gpu::convertReductionKind(mode), res, dpp); @@ -387,8 +389,8 @@ createSubgroupDPPReduction(PatternRewriter &rewriter, gpu::SubgroupReduceOp op, if (ci.clusterSize >= 4) { // Perform reduction between all lanes N <-> N+2. - dpp = rewriter.create( - loc, res.getType(), res, res, amdgpu::DPPPerm::quad_perm, + dpp = amdgpu::DPPOp::create( + rewriter, loc, res.getType(), res, res, amdgpu::DPPPerm::quad_perm, rewriter.getI32ArrayAttr({2, 3, 0, 1}), allRows, allBanks, boundCtrl); res = vector::makeArithReduction(rewriter, loc, gpu::convertReductionKind(mode), res, dpp); @@ -396,17 +398,18 @@ createSubgroupDPPReduction(PatternRewriter &rewriter, gpu::SubgroupReduceOp op, if (ci.clusterSize >= 8) { // Perform reduction between all lanes N <-> 7-N, // e.g lane[0] <-> lane[7], lane[1] <-> lane[6]..., lane[3] <-> lane[4]. - dpp = rewriter.create( - loc, res.getType(), res, res, amdgpu::DPPPerm::row_half_mirror, - rewriter.getUnitAttr(), allRows, allBanks, boundCtrl); + dpp = amdgpu::DPPOp::create(rewriter, loc, res.getType(), res, res, + amdgpu::DPPPerm::row_half_mirror, + rewriter.getUnitAttr(), allRows, allBanks, + boundCtrl); res = vector::makeArithReduction(rewriter, loc, gpu::convertReductionKind(mode), res, dpp); } if (ci.clusterSize >= 16) { // Perform reduction between all lanes N <-> 15-N, // e.g lane[0] <-> lane[15], lane[1] <-> lane[14]..., lane[7] <-> lane[8]. - dpp = rewriter.create( - loc, res.getType(), res, res, amdgpu::DPPPerm::row_mirror, + dpp = amdgpu::DPPOp::create( + rewriter, loc, res.getType(), res, res, amdgpu::DPPPerm::row_mirror, rewriter.getUnitAttr(), allRows, allBanks, boundCtrl); res = vector::makeArithReduction(rewriter, loc, gpu::convertReductionKind(mode), res, dpp); @@ -415,20 +418,20 @@ createSubgroupDPPReduction(PatternRewriter &rewriter, gpu::SubgroupReduceOp op, if (chipset.majorVersion <= 9) { // Broadcast last value from each row to next row. // Use row mask to avoid polluting rows 1 and 3. - dpp = rewriter.create( - loc, res.getType(), res, res, amdgpu::DPPPerm::row_bcast_15, - rewriter.getUnitAttr(), 0xa, allBanks, - /*bound_ctrl*/ false); + dpp = amdgpu::DPPOp::create(rewriter, loc, res.getType(), res, res, + amdgpu::DPPPerm::row_bcast_15, + rewriter.getUnitAttr(), 0xa, allBanks, + /*bound_ctrl*/ false); res = vector::makeArithReduction( rewriter, loc, gpu::convertReductionKind(mode), res, dpp); } else if (chipset.majorVersion <= 12) { // Use a permute lane to cross rows (row 1 <-> row 0, row 3 <-> row 2). - Value uint32Max = rewriter.create( - loc, rewriter.getI32Type(), rewriter.getI32IntegerAttr(-1)); - dpp = rewriter.create(loc, res.getType(), res, res, - uint32Max, uint32Max, - /*fi=*/true, - /*bound_ctrl=*/false); + Value uint32Max = arith::ConstantOp::create( + rewriter, loc, rewriter.getI32Type(), rewriter.getI32IntegerAttr(-1)); + dpp = ROCDL::PermlaneX16Op::create(rewriter, loc, res.getType(), res, res, + uint32Max, uint32Max, + /*fi=*/true, + /*bound_ctrl=*/false); res = vector::makeArithReduction( rewriter, loc, gpu::convertReductionKind(mode), res, dpp); } else { @@ -437,37 +440,39 @@ createSubgroupDPPReduction(PatternRewriter &rewriter, gpu::SubgroupReduceOp op, "this device."); } if (ci.subgroupSize == 32) { - Value lane31 = rewriter.create( - loc, rewriter.getI32Type(), rewriter.getI32IntegerAttr(31)); - res = rewriter.create(loc, res.getType(), res, lane31); + Value lane31 = arith::ConstantOp::create( + rewriter, loc, rewriter.getI32Type(), rewriter.getI32IntegerAttr(31)); + res = + ROCDL::ReadlaneOp::create(rewriter, loc, res.getType(), res, lane31); } } if (ci.clusterSize >= 64) { if (chipset.majorVersion <= 9) { // Broadcast 31st lane value to rows 2 and 3. - dpp = rewriter.create( - loc, res.getType(), res, res, amdgpu::DPPPerm::row_bcast_31, - rewriter.getUnitAttr(), 0xf, allBanks, - /*bound_ctrl*/ true); + dpp = amdgpu::DPPOp::create(rewriter, loc, res.getType(), res, res, + amdgpu::DPPPerm::row_bcast_31, + rewriter.getUnitAttr(), 0xf, allBanks, + /*bound_ctrl*/ true); res = vector::makeArithReduction( rewriter, loc, gpu::convertReductionKind(mode), dpp, res); // Obtain reduction from last rows, the previous rows are polluted. - Value lane63 = rewriter.create( - loc, rewriter.getI32Type(), rewriter.getI32IntegerAttr(63)); - res = rewriter.create(loc, res.getType(), res, lane63); + Value lane63 = arith::ConstantOp::create( + rewriter, loc, rewriter.getI32Type(), rewriter.getI32IntegerAttr(63)); + res = + ROCDL::ReadlaneOp::create(rewriter, loc, res.getType(), res, lane63); } else if (chipset.majorVersion <= 12) { // Assume reduction across 32 lanes has been done. // Perform final reduction manually by summing values in lane 0 and // lane 32. - Value lane31 = rewriter.create( - loc, rewriter.getI32Type(), rewriter.getI32IntegerAttr(31)); - Value lane63 = rewriter.create( - loc, rewriter.getI32Type(), rewriter.getI32IntegerAttr(63)); + Value lane31 = arith::ConstantOp::create( + rewriter, loc, rewriter.getI32Type(), rewriter.getI32IntegerAttr(31)); + Value lane63 = arith::ConstantOp::create( + rewriter, loc, rewriter.getI32Type(), rewriter.getI32IntegerAttr(63)); lane31 = - rewriter.create(loc, res.getType(), res, lane31); + ROCDL::ReadlaneOp::create(rewriter, loc, res.getType(), res, lane31); lane63 = - rewriter.create(loc, res.getType(), res, lane63); + ROCDL::ReadlaneOp::create(rewriter, loc, res.getType(), res, lane63); res = vector::makeArithReduction( rewriter, loc, gpu::convertReductionKind(mode), lane31, lane63); } else { diff --git a/mlir/lib/Dialect/GPU/Utils/DistributionUtils.cpp b/mlir/lib/Dialect/GPU/Utils/DistributionUtils.cpp index 29f6f32892f72..384d1a0ddccd2 100644 --- a/mlir/lib/Dialect/GPU/Utils/DistributionUtils.cpp +++ b/mlir/lib/Dialect/GPU/Utils/DistributionUtils.cpp @@ -27,9 +27,10 @@ WarpDistributionPattern::moveRegionToNewWarpOpAndReplaceReturns( // Create a new op before the existing one, with the extra operands. OpBuilder::InsertionGuard g(rewriter); rewriter.setInsertionPoint(warpOp); - auto newWarpOp = rewriter.create( - warpOp.getLoc(), newReturnTypes, warpOp.getLaneid(), warpOp.getWarpSize(), - warpOp.getArgs(), warpOp.getBody()->getArgumentTypes()); + auto newWarpOp = WarpExecuteOnLane0Op::create( + rewriter, warpOp.getLoc(), newReturnTypes, warpOp.getLaneid(), + warpOp.getWarpSize(), warpOp.getArgs(), + warpOp.getBody()->getArgumentTypes()); Region &opBody = warpOp.getBodyRegion(); Region &newOpBody = newWarpOp.getBodyRegion(); @@ -124,7 +125,7 @@ bool WarpDistributionPattern::delinearizeLaneId( int64_t usedThreads = 1; - Value zero = builder.create(loc, 0); + Value zero = arith::ConstantIndexOp::create(builder, loc, 0); delinearizedIds.assign(sizes.size(), zero); for (int i = sizes.size() - 1; i >= 0; --i) {