Skip to content

Commit

Permalink
Fix injection of GPU buffers that do not go by a Func name (i.e. allo…
Browse files Browse the repository at this point in the history
…c groups).
  • Loading branch information
mcourteaux committed Jun 28, 2024
1 parent 9b703f3 commit 8095af6
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 2 deletions.
12 changes: 10 additions & 2 deletions src/Profiling.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -261,9 +261,17 @@ class InjectProfiling : public IRMutator {
auto [new_extents, changed] = mutate_with_changes(op->extents);
Expr condition = mutate(op->condition);

bool on_stack;
Expr size = compute_allocation_size(new_extents, condition, op->type, op->name, on_stack);
bool can_fit_on_stack;
Expr size = compute_allocation_size(new_extents, condition, op->type, op->name, can_fit_on_stack);
internal_assert(size.type() == UInt(64));

bool on_stack = can_fit_on_stack;
if (can_fit_on_stack) {
if (op->new_expr.defined()) {
on_stack = false;
}
}

func_alloc_sizes.push(op->name, {on_stack, size});

// compute_allocation_size() might return a zero size, if the allocation is
Expand Down
1 change: 1 addition & 0 deletions test/correctness/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ tests(GROUPS correctness
gameoflife.cpp
gather.cpp
gpu_allocation_cache.cpp
gpu_alloc_group_profiling.cpp
gpu_arg_types.cpp
gpu_assertion_in_kernel.cpp
gpu_bounds_inference_failure.cpp
Expand Down
52 changes: 52 additions & 0 deletions test/correctness/gpu_alloc_group_profiling.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#include "Halide.h"

using namespace Halide;

int main(int argc, char *argv[]) {

Target t = get_jit_target_from_environment();
if (!t.has_gpu_feature()) {
printf("[SKIP] GPU not enabled\n");
return 0;
}

// There was a bug that causes the inject profiling logic to try to
// lookup a Func from the environment, by the buffer name of an allocation group.
// Of course there is no Func for that name.
// This happens when the buffer originally was intended for GPUShared, but got somehow
// lifted to Heap (which I ran into before, without doing it explicitly like this below).
// --mcourteaux

Var x{"x"}, y{"y"};

Func f1{"f1"}, f2{"f2"};
f1(x, y) = cast<float>(x + y);
f2(x, y) = f1(x, y) * 2;

Func result{"result"};
result(x, y) = f2(x, y);

Var xo{"xo"}, yo{"yo"}, xi{"xi"}, yi{"yi"};
result
.compute_root()
.gpu_tile(x, y, xo, yo, xi, yi, 16, 16)
.reorder(xi, yi, xo, yo);

f2.compute_at(result, xo)
.gpu_threads(x, y)
.store_in(MemoryType::Heap);

f1.compute_at(result, xo)
.gpu_threads(x, y)
.store_in(MemoryType::Heap);

result.print_loop_nest();

t.set_feature(Target::Profile); // Make sure profiling is enabled!
result.compile_jit(t);
result.realize({64, 64}, t);
// result.compile_to_conceptual_stmt("gpu_alloc_group_profiling.stmt.html", {}, Halide::HTML, t);

printf("Success!\n");
return 0;
}

0 comments on commit 8095af6

Please sign in to comment.