-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Fix injection of GPU buffers that do not go by a Func name (i.e. allo…
…c groups).
- Loading branch information
1 parent
9b703f3
commit 8095af6
Showing
3 changed files
with
63 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
#include "Halide.h" | ||
|
||
using namespace Halide; | ||
|
||
int main(int argc, char *argv[]) { | ||
|
||
Target t = get_jit_target_from_environment(); | ||
if (!t.has_gpu_feature()) { | ||
printf("[SKIP] GPU not enabled\n"); | ||
return 0; | ||
} | ||
|
||
// There was a bug that causes the inject profiling logic to try to | ||
// lookup a Func from the environment, by the buffer name of an allocation group. | ||
// Of course there is no Func for that name. | ||
// This happens when the buffer originally was intended for GPUShared, but got somehow | ||
// lifted to Heap (which I ran into before, without doing it explicitly like this below). | ||
// --mcourteaux | ||
|
||
Var x{"x"}, y{"y"}; | ||
|
||
Func f1{"f1"}, f2{"f2"}; | ||
f1(x, y) = cast<float>(x + y); | ||
f2(x, y) = f1(x, y) * 2; | ||
|
||
Func result{"result"}; | ||
result(x, y) = f2(x, y); | ||
|
||
Var xo{"xo"}, yo{"yo"}, xi{"xi"}, yi{"yi"}; | ||
result | ||
.compute_root() | ||
.gpu_tile(x, y, xo, yo, xi, yi, 16, 16) | ||
.reorder(xi, yi, xo, yo); | ||
|
||
f2.compute_at(result, xo) | ||
.gpu_threads(x, y) | ||
.store_in(MemoryType::Heap); | ||
|
||
f1.compute_at(result, xo) | ||
.gpu_threads(x, y) | ||
.store_in(MemoryType::Heap); | ||
|
||
result.print_loop_nest(); | ||
|
||
t.set_feature(Target::Profile); // Make sure profiling is enabled! | ||
result.compile_jit(t); | ||
result.realize({64, 64}, t); | ||
// result.compile_to_conceptual_stmt("gpu_alloc_group_profiling.stmt.html", {}, Halide::HTML, t); | ||
|
||
printf("Success!\n"); | ||
return 0; | ||
} |