-
Notifications
You must be signed in to change notification settings - Fork 1
/
reveal.h
44 lines (37 loc) · 1.33 KB
/
reveal.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
#pragma once
#include <fstream>
#include <memory>
#include "cuda_runtime_api.h"
#include "md_view.h"
template<class T, std::size_t D>
void dump_value(md_view<T, D> t, std::wstring name) {
auto size = t.size();
auto host_pointer = std::make_unique<T[]>(size);
cudaMemcpy(host_pointer.get(), t.data, size * sizeof(T), cudaMemcpyDeviceToHost);
auto h = host_pointer.get();
// Now feel free to examine host_pointer (or through h)
std::ofstream p(name, std::ios::binary);
p.write((const char*)(h), size * sizeof(T));
p.close();
}
template<class T>
void dump_value(const T* t, size_t size, std::wstring name) {
auto host_pointer = std::make_unique<T[]>(size);
cudaMemcpy(host_pointer.get(), t, size * sizeof(T), cudaMemcpyDeviceToHost);
auto h = host_pointer.get();
// Now feel free to examine host_pointer (or through h)
std::ofstream p(name, std::ios::binary);
p.write((const char*)(h), size * sizeof(T));
p.close();
}
template<class T, std::size_t D>
void debug_me_show_memory(md_view<T, D> t) {
using _T = typename std::decay<T>::type;
auto size = t.size();
auto host_pointer = std::make_unique<_T[]>(size);
auto bytes = size * sizeof(_T);
cudaMemcpy(host_pointer.get(), t.data, bytes, cudaMemcpyDeviceToHost);
auto h = host_pointer.get();
// Now feel free to examine host_pointer (or through h)
return;
}