[GPGPU] Use separate basic block for GPU initialization code

This increases the readability of the IR and also clarifies that the GPU inititialization is executed _after_ the scalar initialization which needs to before the code of the transformed scop is executed. Besides increased readability, the IR should not change. Specifically, I do not expect any changes in program semantics due to this patch. llvm-svn: 278125
2025-01-13 19:32:41 +00:00 · 2016-08-09 15:35:03 +00:00 · 2016-08-09 15:35:03 +00:00 · 750160e260
commit 750160e260
parent 776700d0b7
2 changed files with 9 additions and 0 deletions
--- a/polly/lib/CodeGen/PPCGCodeGeneration.cpp
+++ b/polly/lib/CodeGen/PPCGCodeGeneration.cpp
@ -35,6 +35,7 @@
 #include "llvm/Support/TargetSelect.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"

 #include "isl/union_map.h"

@ -436,6 +437,11 @@ private:
 };

 void GPUNodeBuilder::initializeAfterRTH() {
+  BasicBlock *NewBB = SplitBlock(Builder.GetInsertBlock(),
+                                 &*Builder.GetInsertPoint(), &DT, &LI);
+  NewBB->setName("polly.acc.initialize");
+  Builder.SetInsertPoint(&NewBB->front());
+
  GPUContext = createCallInitContext();
  allocateDeviceArrays();
 }
--- a/polly/test/GPGPU/double-parallel-loop.ll
+++ b/polly/test/GPGPU/double-parallel-loop.ll
@ -92,6 +92,9 @@
 ; IR-NEXT:    br i1 true, label %polly.start, label %bb2

 ; IR: polly.start:
+; IR-NEXT: br label %polly.acc.initialize
+
+; IR: polly.acc.initialize:
 ; IR-NEXT:    [[GPUContext:%.*]] = call i8* @polly_initContext()
 ; IR-NEXT:    %p_dev_array_MemRef_A = call i8* @polly_allocateMemoryForDevice(i64 4194304)
 ; IR-NEXT:    [[HostPtr:%.*]] = bitcast [1024 x float]* %A to i8*