Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add support for PyTorch 1.11 and CUDA 11.3 #1032

Merged
merged 1 commit into from
Jun 19, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 22 additions & 22 deletions alphapose/models/layers/dcn/src/deform_conv_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,26 +63,26 @@ void shape_check(at::Tensor input, at::Tensor offset, at::Tensor *gradOutput,
at::Tensor weight, int kH, int kW, int dH, int dW, int padH,
int padW, int dilationH, int dilationW, int group,
int deformable_group) {
AT_CHECK(weight.ndimension() == 4,
TORCH_CHECK(weight.ndimension() == 4,
"4D weight tensor (nOutputPlane,nInputPlane,kH,kW) expected, "
"but got: %s",
weight.ndimension());

AT_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous");
TORCH_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous");

AT_CHECK(kW > 0 && kH > 0,
TORCH_CHECK(kW > 0 && kH > 0,
"kernel size should be greater than zero, but got kH: %d kW: %d", kH,
kW);

AT_CHECK((weight.size(2) == kH && weight.size(3) == kW),
TORCH_CHECK((weight.size(2) == kH && weight.size(3) == kW),
"kernel size should be consistent with weight, ",
"but got kH: %d kW: %d weight.size(2): %d, weight.size(3): %d", kH,
kW, weight.size(2), weight.size(3));

AT_CHECK(dW > 0 && dH > 0,
TORCH_CHECK(dW > 0 && dH > 0,
"stride should be greater than zero, but got dH: %d dW: %d", dH, dW);

AT_CHECK(
TORCH_CHECK(
dilationW > 0 && dilationH > 0,
"dilation should be greater than 0, but got dilationH: %d dilationW: %d",
dilationH, dilationW);
Expand All @@ -98,7 +98,7 @@ void shape_check(at::Tensor input, at::Tensor offset, at::Tensor *gradOutput,
dimw++;
}

AT_CHECK(ndim == 3 || ndim == 4, "3D or 4D input tensor expected but got: %s",
TORCH_CHECK(ndim == 3 || ndim == 4, "3D or 4D input tensor expected but got: %s",
ndim);

long nInputPlane = weight.size(1) * group;
Expand All @@ -110,7 +110,7 @@ void shape_check(at::Tensor input, at::Tensor offset, at::Tensor *gradOutput,
long outputWidth =
(inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1;

AT_CHECK(nInputPlane % deformable_group == 0,
TORCH_CHECK(nInputPlane % deformable_group == 0,
"input channels must divide deformable group size");

if (outputWidth < 1 || outputHeight < 1)
Expand All @@ -120,27 +120,27 @@ void shape_check(at::Tensor input, at::Tensor offset, at::Tensor *gradOutput,
nInputPlane, inputHeight, inputWidth, nOutputPlane, outputHeight,
outputWidth);

AT_CHECK(input.size(1) == nInputPlane,
TORCH_CHECK(input.size(1) == nInputPlane,
"invalid number of input planes, expected: %d, but got: %d",
nInputPlane, input.size(1));

AT_CHECK((inputHeight >= kH && inputWidth >= kW),
TORCH_CHECK((inputHeight >= kH && inputWidth >= kW),
"input image is smaller than kernel");

AT_CHECK((offset.size(2) == outputHeight && offset.size(3) == outputWidth),
TORCH_CHECK((offset.size(2) == outputHeight && offset.size(3) == outputWidth),
"invalid spatial size of offset, expected height: %d width: %d, but "
"got height: %d width: %d",
outputHeight, outputWidth, offset.size(2), offset.size(3));

AT_CHECK((offset.size(1) == deformable_group * 2 * kH * kW),
TORCH_CHECK((offset.size(1) == deformable_group * 2 * kH * kW),
"invalid number of channels of offset");

if (gradOutput != NULL) {
AT_CHECK(gradOutput->size(dimf) == nOutputPlane,
TORCH_CHECK(gradOutput->size(dimf) == nOutputPlane,
"invalid number of gradOutput planes, expected: %d, but got: %d",
nOutputPlane, gradOutput->size(dimf));

AT_CHECK((gradOutput->size(dimh) == outputHeight &&
TORCH_CHECK((gradOutput->size(dimh) == outputHeight &&
gradOutput->size(dimw) == outputWidth),
"invalid size of gradOutput, expected height: %d width: %d , but "
"got height: %d width: %d",
Expand Down Expand Up @@ -191,7 +191,7 @@ int deform_conv_forward_cuda(at::Tensor input, at::Tensor weight,
long outputHeight =
(inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1;

AT_CHECK((offset.size(0) == batchSize), "invalid batch size of offset");
TORCH_CHECK((offset.size(0) == batchSize), "invalid batch size of offset");

output = output.view({batchSize / im2col_step, im2col_step, nOutputPlane,
outputHeight, outputWidth});
Expand Down Expand Up @@ -298,7 +298,7 @@ int deform_conv_backward_input_cuda(at::Tensor input, at::Tensor offset,
long outputHeight =
(inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1;

AT_CHECK((offset.size(0) == batchSize), 3, "invalid batch size of offset");
TORCH_CHECK((offset.size(0) == batchSize), 3, "invalid batch size of offset");
gradInput = gradInput.view({batchSize, nInputPlane, inputHeight, inputWidth});
columns = at::zeros(
{nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth},
Expand Down Expand Up @@ -414,7 +414,7 @@ int deform_conv_backward_parameters_cuda(
long outputHeight =
(inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1;

AT_CHECK((offset.size(0) == batchSize), "invalid batch size of offset");
TORCH_CHECK((offset.size(0) == batchSize), "invalid batch size of offset");

columns = at::zeros(
{nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth},
Expand Down Expand Up @@ -494,8 +494,8 @@ void modulated_deform_conv_cuda_forward(
const int pad_h, const int pad_w, const int dilation_h,
const int dilation_w, const int group, const int deformable_group,
const bool with_bias) {
AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
AT_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous");
TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
TORCH_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous");
at::DeviceGuard guard(input.device());

const int batch = input.size(0);
Expand Down Expand Up @@ -576,8 +576,8 @@ void modulated_deform_conv_cuda_backward(
int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h,
int pad_w, int dilation_h, int dilation_w, int group, int deformable_group,
const bool with_bias) {
AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
AT_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous");
TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
TORCH_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous");
at::DeviceGuard guard(input.device());

const int batch = input.size(0);
Expand Down Expand Up @@ -698,4 +698,4 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("modulated_deform_conv_cuda_backward",
&modulated_deform_conv_cuda_backward,
"modulated deform conv backward (CUDA)");
}
}
48 changes: 24 additions & 24 deletions alphapose/models/layers/dcn/src/deform_conv_cuda_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -257,9 +257,9 @@ void deformable_im2col(

AT_DISPATCH_FLOATING_TYPES_AND_HALF(
data_im.scalar_type(), "deformable_im2col_gpu", ([&] {
const scalar_t *data_im_ = data_im.data<scalar_t>();
const scalar_t *data_offset_ = data_offset.data<scalar_t>();
scalar_t *data_col_ = data_col.data<scalar_t>();
const scalar_t *data_im_ = data_im.data_ptr<scalar_t>();
const scalar_t *data_offset_ = data_offset.data_ptr<scalar_t>();
scalar_t *data_col_ = data_col.data_ptr<scalar_t>();

deformable_im2col_gpu_kernel<<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS>>>(
num_kernels, data_im_, data_offset_, height, width, ksize_h, ksize_w,
Expand Down Expand Up @@ -351,9 +351,9 @@ void deformable_col2im(

AT_DISPATCH_FLOATING_TYPES_AND_HALF(
data_col.scalar_type(), "deformable_col2im_gpu", ([&] {
const scalar_t *data_col_ = data_col.data<scalar_t>();
const scalar_t *data_offset_ = data_offset.data<scalar_t>();
scalar_t *grad_im_ = grad_im.data<scalar_t>();
const scalar_t *data_col_ = data_col.data_ptr<scalar_t>();
const scalar_t *data_offset_ = data_offset.data_ptr<scalar_t>();
scalar_t *grad_im_ = grad_im.data_ptr<scalar_t>();

deformable_col2im_gpu_kernel<<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS>>>(
num_kernels, data_col_, data_offset_, channels, height, width, ksize_h,
Expand Down Expand Up @@ -449,10 +449,10 @@ void deformable_col2im_coord(

AT_DISPATCH_FLOATING_TYPES_AND_HALF(
data_col.scalar_type(), "deformable_col2im_coord_gpu", ([&] {
const scalar_t *data_col_ = data_col.data<scalar_t>();
const scalar_t *data_im_ = data_im.data<scalar_t>();
const scalar_t *data_offset_ = data_offset.data<scalar_t>();
scalar_t *grad_offset_ = grad_offset.data<scalar_t>();
const scalar_t *data_col_ = data_col.data_ptr<scalar_t>();
const scalar_t *data_im_ = data_im.data_ptr<scalar_t>();
const scalar_t *data_offset_ = data_offset.data_ptr<scalar_t>();
scalar_t *grad_offset_ = grad_offset.data_ptr<scalar_t>();

deformable_col2im_coord_gpu_kernel<<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS>>>(
num_kernels, data_col_, data_im_, data_offset_, channels, height, width,
Expand Down Expand Up @@ -779,10 +779,10 @@ void modulated_deformable_im2col_cuda(

AT_DISPATCH_FLOATING_TYPES_AND_HALF(
data_im.scalar_type(), "modulated_deformable_im2col_gpu", ([&] {
const scalar_t *data_im_ = data_im.data<scalar_t>();
const scalar_t *data_offset_ = data_offset.data<scalar_t>();
const scalar_t *data_mask_ = data_mask.data<scalar_t>();
scalar_t *data_col_ = data_col.data<scalar_t>();
const scalar_t *data_im_ = data_im.data_ptr<scalar_t>();
const scalar_t *data_offset_ = data_offset.data_ptr<scalar_t>();
const scalar_t *data_mask_ = data_mask.data_ptr<scalar_t>();
scalar_t *data_col_ = data_col.data_ptr<scalar_t>();

modulated_deformable_im2col_gpu_kernel<<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS>>>(
num_kernels, data_im_, data_offset_, data_mask_, height_im, width_im, kernel_h, kenerl_w,
Expand Down Expand Up @@ -811,10 +811,10 @@ void modulated_deformable_col2im_cuda(

AT_DISPATCH_FLOATING_TYPES_AND_HALF(
data_col.scalar_type(), "modulated_deformable_col2im_gpu", ([&] {
const scalar_t *data_col_ = data_col.data<scalar_t>();
const scalar_t *data_offset_ = data_offset.data<scalar_t>();
const scalar_t *data_mask_ = data_mask.data<scalar_t>();
scalar_t *grad_im_ = grad_im.data<scalar_t>();
const scalar_t *data_col_ = data_col.data_ptr<scalar_t>();
const scalar_t *data_offset_ = data_offset.data_ptr<scalar_t>();
const scalar_t *data_mask_ = data_mask.data_ptr<scalar_t>();
scalar_t *grad_im_ = grad_im.data_ptr<scalar_t>();

modulated_deformable_col2im_gpu_kernel<<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS>>>(
num_kernels, data_col_, data_offset_, data_mask_, channels, height_im, width_im,
Expand Down Expand Up @@ -844,12 +844,12 @@ void modulated_deformable_col2im_coord_cuda(

AT_DISPATCH_FLOATING_TYPES_AND_HALF(
data_col.scalar_type(), "modulated_deformable_col2im_coord_gpu", ([&] {
const scalar_t *data_col_ = data_col.data<scalar_t>();
const scalar_t *data_im_ = data_im.data<scalar_t>();
const scalar_t *data_offset_ = data_offset.data<scalar_t>();
const scalar_t *data_mask_ = data_mask.data<scalar_t>();
scalar_t *grad_offset_ = grad_offset.data<scalar_t>();
scalar_t *grad_mask_ = grad_mask.data<scalar_t>();
const scalar_t *data_col_ = data_col.data_ptr<scalar_t>();
const scalar_t *data_im_ = data_im.data_ptr<scalar_t>();
const scalar_t *data_offset_ = data_offset.data_ptr<scalar_t>();
const scalar_t *data_mask_ = data_mask.data_ptr<scalar_t>();
scalar_t *grad_offset_ = grad_offset.data_ptr<scalar_t>();
scalar_t *grad_mask_ = grad_mask.data_ptr<scalar_t>();

modulated_deformable_col2im_coord_gpu_kernel<<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS>>>(
num_kernels, data_col_, data_im_, data_offset_, data_mask_, channels, height_im, width_im,
Expand Down
6 changes: 3 additions & 3 deletions alphapose/models/layers/dcn/src/deform_pool_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ void deform_psroi_pooling_cuda_forward(
at::Tensor top_count, const int no_trans, const float spatial_scale,
const int output_dim, const int group_size, const int pooled_size,
const int part_size, const int sample_per_part, const float trans_std) {
AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
at::DeviceGuard guard(input.device());

const int batch = input.size(0);
Expand All @@ -59,8 +59,8 @@ void deform_psroi_pooling_cuda_backward(
const int no_trans, const float spatial_scale, const int output_dim,
const int group_size, const int pooled_size, const int part_size,
const int sample_per_part, const float trans_std) {
AT_CHECK(out_grad.is_contiguous(), "out_grad tensor has to be contiguous");
AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
TORCH_CHECK(out_grad.is_contiguous(), "out_grad tensor has to be contiguous");
TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
at::DeviceGuard guard(input.device());

const int batch = input.size(0);
Expand Down
26 changes: 13 additions & 13 deletions alphapose/models/layers/dcn/src/deform_pool_cuda_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -290,11 +290,11 @@ void DeformablePSROIPoolForward(const at::Tensor data,

AT_DISPATCH_FLOATING_TYPES_AND_HALF(
data.scalar_type(), "deformable_psroi_pool_forward", ([&] {
const scalar_t *bottom_data = data.data<scalar_t>();
const scalar_t *bottom_rois = bbox.data<scalar_t>();
const scalar_t *bottom_trans = no_trans ? NULL : trans.data<scalar_t>();
scalar_t *top_data = out.data<scalar_t>();
scalar_t *top_count_data = top_count.data<scalar_t>();
const scalar_t *bottom_data = data.data_ptr<scalar_t>();
const scalar_t *bottom_rois = bbox.data_ptr<scalar_t>();
const scalar_t *bottom_trans = no_trans ? NULL : trans.data_ptr<scalar_t>();
scalar_t *top_data = out.data_ptr<scalar_t>();
scalar_t *top_count_data = top_count.data_ptr<scalar_t>();

DeformablePSROIPoolForwardKernel<<<GET_BLOCKS(count), CUDA_NUM_THREADS>>>(
count, bottom_data, (scalar_t)spatial_scale, channels, height, width, pooled_height, pooled_width,
Expand Down Expand Up @@ -341,13 +341,13 @@ void DeformablePSROIPoolBackwardAcc(const at::Tensor out_grad,

AT_DISPATCH_FLOATING_TYPES_AND_HALF(
out_grad.scalar_type(), "deformable_psroi_pool_backward_acc", ([&] {
const scalar_t *top_diff = out_grad.data<scalar_t>();
const scalar_t *bottom_data = data.data<scalar_t>();
const scalar_t *bottom_rois = bbox.data<scalar_t>();
const scalar_t *bottom_trans = no_trans ? NULL : trans.data<scalar_t>();
scalar_t *bottom_data_diff = in_grad.data<scalar_t>();
scalar_t *bottom_trans_diff = no_trans ? NULL : trans_grad.data<scalar_t>();
const scalar_t *top_count_data = top_count.data<scalar_t>();
const scalar_t *top_diff = out_grad.data_ptr<scalar_t>();
const scalar_t *bottom_data = data.data_ptr<scalar_t>();
const scalar_t *bottom_rois = bbox.data_ptr<scalar_t>();
const scalar_t *bottom_trans = no_trans ? NULL : trans.data_ptr<scalar_t>();
scalar_t *bottom_data_diff = in_grad.data_ptr<scalar_t>();
scalar_t *bottom_trans_diff = no_trans ? NULL : trans_grad.data_ptr<scalar_t>();
const scalar_t *top_count_data = top_count.data_ptr<scalar_t>();

DeformablePSROIPoolBackwardAccKernel<<<GET_BLOCKS(count), CUDA_NUM_THREADS>>>(
count, top_diff, top_count_data, num_rois, (scalar_t)spatial_scale, channels, height, width,
Expand All @@ -361,4 +361,4 @@ void DeformablePSROIPoolBackwardAcc(const at::Tensor out_grad,
{
printf("error in DeformablePSROIPoolForward: %s\n", cudaGetErrorString(err));
}
}
}
6 changes: 3 additions & 3 deletions alphapose/utils/roi_align/src/roi_align_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@ int ROIAlignBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois,
const int pooled_height, const int pooled_width,
at::Tensor bottom_grad);

#define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
#define CHECK_CUDA(x) TORCH_CHECK(x.device().is_cuda(), #x, " must be a CUDAtensor ")
#define CHECK_CONTIGUOUS(x) \
AT_CHECK(x.is_contiguous(), #x, " must be contiguous ")
TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ")
#define CHECK_INPUT(x) \
CHECK_CUDA(x); \
CHECK_CONTIGUOUS(x)
Expand Down Expand Up @@ -82,4 +82,4 @@ int roi_align_backward_cuda(at::Tensor top_grad, at::Tensor rois,
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("forward", &roi_align_forward_cuda, "Roi_Align forward (CUDA)");
m.def("backward", &roi_align_backward_cuda, "Roi_Align backward (CUDA)");
}
}
20 changes: 11 additions & 9 deletions alphapose/utils/roi_align/src/roi_align_kernel.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#include <ATen/ATen.h>
#include <THC/THCAtomics.cuh>
#include <ATen/cuda/CUDAContext.h>


#define CUDA_1D_KERNEL_LOOP(i, n) \
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \
Expand Down Expand Up @@ -132,17 +134,17 @@ int ROIAlignForwardLaucher(const at::Tensor features, const at::Tensor rois,
const int output_size = num_rois * pooled_height * pooled_width * channels;
AT_DISPATCH_FLOATING_TYPES_AND_HALF(
features.scalar_type(), "ROIAlignLaucherForward", ([&] {
const scalar_t *bottom_data = features.data<scalar_t>();
const scalar_t *rois_data = rois.data<scalar_t>();
scalar_t *top_data = output.data<scalar_t>();
const scalar_t *bottom_data = features.data_ptr<scalar_t>();
const scalar_t *rois_data = rois.data_ptr<scalar_t>();
scalar_t *top_data = output.data_ptr<scalar_t>();

ROIAlignForward<scalar_t>
<<<GET_BLOCKS(output_size), THREADS_PER_BLOCK>>>(
output_size, bottom_data, rois_data, scalar_t(spatial_scale),
sample_num, channels, height, width, pooled_height,
pooled_width, top_data);
}));
THCudaCheck(cudaGetLastError());
C10_CUDA_CHECK(cudaGetLastError());
return 1;
}

Expand Down Expand Up @@ -275,9 +277,9 @@ int ROIAlignBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois,

AT_DISPATCH_FLOATING_TYPES_AND_HALF(
top_grad.scalar_type(), "ROIAlignLaucherBackward", ([&] {
const scalar_t *top_diff = top_grad.data<scalar_t>();
const scalar_t *rois_data = rois.data<scalar_t>();
scalar_t *bottom_diff = bottom_grad.data<scalar_t>();
const scalar_t *top_diff = top_grad.data_ptr<scalar_t>();
const scalar_t *rois_data = rois.data_ptr<scalar_t>();
scalar_t *bottom_diff = bottom_grad.data_ptr<scalar_t>();
if (sizeof(scalar_t) == sizeof(double)) {
fprintf(stderr, "double is not supported\n");
exit(-1);
Expand All @@ -289,6 +291,6 @@ int ROIAlignBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois,
channels, height, width, pooled_height, pooled_width,
bottom_diff);
}));
THCudaCheck(cudaGetLastError());
C10_CUDA_CHECK(cudaGetLastError());
return 1;
}
}
Loading