Skip to content

Commit

Permalink
Merge pull request #1032 from peteruhrig/master
Browse files Browse the repository at this point in the history
add support for PyTorch 1.11 and CUDA 11.3
  • Loading branch information
HaoyiZhu authored Jun 19, 2022
2 parents d97acd0 + 2330a3d commit 48567fe
Show file tree
Hide file tree
Showing 10 changed files with 101 additions and 101 deletions.
44 changes: 22 additions & 22 deletions alphapose/models/layers/dcn/src/deform_conv_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,26 +63,26 @@ void shape_check(at::Tensor input, at::Tensor offset, at::Tensor *gradOutput,
at::Tensor weight, int kH, int kW, int dH, int dW, int padH,
int padW, int dilationH, int dilationW, int group,
int deformable_group) {
AT_CHECK(weight.ndimension() == 4,
TORCH_CHECK(weight.ndimension() == 4,
"4D weight tensor (nOutputPlane,nInputPlane,kH,kW) expected, "
"but got: %s",
weight.ndimension());

AT_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous");
TORCH_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous");

AT_CHECK(kW > 0 && kH > 0,
TORCH_CHECK(kW > 0 && kH > 0,
"kernel size should be greater than zero, but got kH: %d kW: %d", kH,
kW);

AT_CHECK((weight.size(2) == kH && weight.size(3) == kW),
TORCH_CHECK((weight.size(2) == kH && weight.size(3) == kW),
"kernel size should be consistent with weight, ",
"but got kH: %d kW: %d weight.size(2): %d, weight.size(3): %d", kH,
kW, weight.size(2), weight.size(3));

AT_CHECK(dW > 0 && dH > 0,
TORCH_CHECK(dW > 0 && dH > 0,
"stride should be greater than zero, but got dH: %d dW: %d", dH, dW);

AT_CHECK(
TORCH_CHECK(
dilationW > 0 && dilationH > 0,
"dilation should be greater than 0, but got dilationH: %d dilationW: %d",
dilationH, dilationW);
Expand All @@ -98,7 +98,7 @@ void shape_check(at::Tensor input, at::Tensor offset, at::Tensor *gradOutput,
dimw++;
}

AT_CHECK(ndim == 3 || ndim == 4, "3D or 4D input tensor expected but got: %s",
TORCH_CHECK(ndim == 3 || ndim == 4, "3D or 4D input tensor expected but got: %s",
ndim);

long nInputPlane = weight.size(1) * group;
Expand All @@ -110,7 +110,7 @@ void shape_check(at::Tensor input, at::Tensor offset, at::Tensor *gradOutput,
long outputWidth =
(inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1;

AT_CHECK(nInputPlane % deformable_group == 0,
TORCH_CHECK(nInputPlane % deformable_group == 0,
"input channels must divide deformable group size");

if (outputWidth < 1 || outputHeight < 1)
Expand All @@ -120,27 +120,27 @@ void shape_check(at::Tensor input, at::Tensor offset, at::Tensor *gradOutput,
nInputPlane, inputHeight, inputWidth, nOutputPlane, outputHeight,
outputWidth);

AT_CHECK(input.size(1) == nInputPlane,
TORCH_CHECK(input.size(1) == nInputPlane,
"invalid number of input planes, expected: %d, but got: %d",
nInputPlane, input.size(1));

AT_CHECK((inputHeight >= kH && inputWidth >= kW),
TORCH_CHECK((inputHeight >= kH && inputWidth >= kW),
"input image is smaller than kernel");

AT_CHECK((offset.size(2) == outputHeight && offset.size(3) == outputWidth),
TORCH_CHECK((offset.size(2) == outputHeight && offset.size(3) == outputWidth),
"invalid spatial size of offset, expected height: %d width: %d, but "
"got height: %d width: %d",
outputHeight, outputWidth, offset.size(2), offset.size(3));

AT_CHECK((offset.size(1) == deformable_group * 2 * kH * kW),
TORCH_CHECK((offset.size(1) == deformable_group * 2 * kH * kW),
"invalid number of channels of offset");

if (gradOutput != NULL) {
AT_CHECK(gradOutput->size(dimf) == nOutputPlane,
TORCH_CHECK(gradOutput->size(dimf) == nOutputPlane,
"invalid number of gradOutput planes, expected: %d, but got: %d",
nOutputPlane, gradOutput->size(dimf));

AT_CHECK((gradOutput->size(dimh) == outputHeight &&
TORCH_CHECK((gradOutput->size(dimh) == outputHeight &&
gradOutput->size(dimw) == outputWidth),
"invalid size of gradOutput, expected height: %d width: %d , but "
"got height: %d width: %d",
Expand Down Expand Up @@ -191,7 +191,7 @@ int deform_conv_forward_cuda(at::Tensor input, at::Tensor weight,
long outputHeight =
(inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1;

AT_CHECK((offset.size(0) == batchSize), "invalid batch size of offset");
TORCH_CHECK((offset.size(0) == batchSize), "invalid batch size of offset");

output = output.view({batchSize / im2col_step, im2col_step, nOutputPlane,
outputHeight, outputWidth});
Expand Down Expand Up @@ -298,7 +298,7 @@ int deform_conv_backward_input_cuda(at::Tensor input, at::Tensor offset,
long outputHeight =
(inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1;

AT_CHECK((offset.size(0) == batchSize), 3, "invalid batch size of offset");
TORCH_CHECK((offset.size(0) == batchSize), 3, "invalid batch size of offset");
gradInput = gradInput.view({batchSize, nInputPlane, inputHeight, inputWidth});
columns = at::zeros(
{nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth},
Expand Down Expand Up @@ -414,7 +414,7 @@ int deform_conv_backward_parameters_cuda(
long outputHeight =
(inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1;

AT_CHECK((offset.size(0) == batchSize), "invalid batch size of offset");
TORCH_CHECK((offset.size(0) == batchSize), "invalid batch size of offset");

columns = at::zeros(
{nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth},
Expand Down Expand Up @@ -494,8 +494,8 @@ void modulated_deform_conv_cuda_forward(
const int pad_h, const int pad_w, const int dilation_h,
const int dilation_w, const int group, const int deformable_group,
const bool with_bias) {
AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
AT_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous");
TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
TORCH_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous");
at::DeviceGuard guard(input.device());

const int batch = input.size(0);
Expand Down Expand Up @@ -576,8 +576,8 @@ void modulated_deform_conv_cuda_backward(
int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h,
int pad_w, int dilation_h, int dilation_w, int group, int deformable_group,
const bool with_bias) {
AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
AT_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous");
TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
TORCH_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous");
at::DeviceGuard guard(input.device());

const int batch = input.size(0);
Expand Down Expand Up @@ -698,4 +698,4 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("modulated_deform_conv_cuda_backward",
&modulated_deform_conv_cuda_backward,
"modulated deform conv backward (CUDA)");
}
}
48 changes: 24 additions & 24 deletions alphapose/models/layers/dcn/src/deform_conv_cuda_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -257,9 +257,9 @@ void deformable_im2col(

AT_DISPATCH_FLOATING_TYPES_AND_HALF(
data_im.scalar_type(), "deformable_im2col_gpu", ([&] {
const scalar_t *data_im_ = data_im.data<scalar_t>();
const scalar_t *data_offset_ = data_offset.data<scalar_t>();
scalar_t *data_col_ = data_col.data<scalar_t>();
const scalar_t *data_im_ = data_im.data_ptr<scalar_t>();
const scalar_t *data_offset_ = data_offset.data_ptr<scalar_t>();
scalar_t *data_col_ = data_col.data_ptr<scalar_t>();

deformable_im2col_gpu_kernel<<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS>>>(
num_kernels, data_im_, data_offset_, height, width, ksize_h, ksize_w,
Expand Down Expand Up @@ -351,9 +351,9 @@ void deformable_col2im(

AT_DISPATCH_FLOATING_TYPES_AND_HALF(
data_col.scalar_type(), "deformable_col2im_gpu", ([&] {
const scalar_t *data_col_ = data_col.data<scalar_t>();
const scalar_t *data_offset_ = data_offset.data<scalar_t>();
scalar_t *grad_im_ = grad_im.data<scalar_t>();
const scalar_t *data_col_ = data_col.data_ptr<scalar_t>();
const scalar_t *data_offset_ = data_offset.data_ptr<scalar_t>();
scalar_t *grad_im_ = grad_im.data_ptr<scalar_t>();

deformable_col2im_gpu_kernel<<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS>>>(
num_kernels, data_col_, data_offset_, channels, height, width, ksize_h,
Expand Down Expand Up @@ -449,10 +449,10 @@ void deformable_col2im_coord(

AT_DISPATCH_FLOATING_TYPES_AND_HALF(
data_col.scalar_type(), "deformable_col2im_coord_gpu", ([&] {
const scalar_t *data_col_ = data_col.data<scalar_t>();
const scalar_t *data_im_ = data_im.data<scalar_t>();
const scalar_t *data_offset_ = data_offset.data<scalar_t>();
scalar_t *grad_offset_ = grad_offset.data<scalar_t>();
const scalar_t *data_col_ = data_col.data_ptr<scalar_t>();
const scalar_t *data_im_ = data_im.data_ptr<scalar_t>();
const scalar_t *data_offset_ = data_offset.data_ptr<scalar_t>();
scalar_t *grad_offset_ = grad_offset.data_ptr<scalar_t>();

deformable_col2im_coord_gpu_kernel<<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS>>>(
num_kernels, data_col_, data_im_, data_offset_, channels, height, width,
Expand Down Expand Up @@ -779,10 +779,10 @@ void modulated_deformable_im2col_cuda(

AT_DISPATCH_FLOATING_TYPES_AND_HALF(
data_im.scalar_type(), "modulated_deformable_im2col_gpu", ([&] {
const scalar_t *data_im_ = data_im.data<scalar_t>();
const scalar_t *data_offset_ = data_offset.data<scalar_t>();
const scalar_t *data_mask_ = data_mask.data<scalar_t>();
scalar_t *data_col_ = data_col.data<scalar_t>();
const scalar_t *data_im_ = data_im.data_ptr<scalar_t>();
const scalar_t *data_offset_ = data_offset.data_ptr<scalar_t>();
const scalar_t *data_mask_ = data_mask.data_ptr<scalar_t>();
scalar_t *data_col_ = data_col.data_ptr<scalar_t>();

modulated_deformable_im2col_gpu_kernel<<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS>>>(
num_kernels, data_im_, data_offset_, data_mask_, height_im, width_im, kernel_h, kenerl_w,
Expand Down Expand Up @@ -811,10 +811,10 @@ void modulated_deformable_col2im_cuda(

AT_DISPATCH_FLOATING_TYPES_AND_HALF(
data_col.scalar_type(), "modulated_deformable_col2im_gpu", ([&] {
const scalar_t *data_col_ = data_col.data<scalar_t>();
const scalar_t *data_offset_ = data_offset.data<scalar_t>();
const scalar_t *data_mask_ = data_mask.data<scalar_t>();
scalar_t *grad_im_ = grad_im.data<scalar_t>();
const scalar_t *data_col_ = data_col.data_ptr<scalar_t>();
const scalar_t *data_offset_ = data_offset.data_ptr<scalar_t>();
const scalar_t *data_mask_ = data_mask.data_ptr<scalar_t>();
scalar_t *grad_im_ = grad_im.data_ptr<scalar_t>();

modulated_deformable_col2im_gpu_kernel<<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS>>>(
num_kernels, data_col_, data_offset_, data_mask_, channels, height_im, width_im,
Expand Down Expand Up @@ -844,12 +844,12 @@ void modulated_deformable_col2im_coord_cuda(

AT_DISPATCH_FLOATING_TYPES_AND_HALF(
data_col.scalar_type(), "modulated_deformable_col2im_coord_gpu", ([&] {
const scalar_t *data_col_ = data_col.data<scalar_t>();
const scalar_t *data_im_ = data_im.data<scalar_t>();
const scalar_t *data_offset_ = data_offset.data<scalar_t>();
const scalar_t *data_mask_ = data_mask.data<scalar_t>();
scalar_t *grad_offset_ = grad_offset.data<scalar_t>();
scalar_t *grad_mask_ = grad_mask.data<scalar_t>();
const scalar_t *data_col_ = data_col.data_ptr<scalar_t>();
const scalar_t *data_im_ = data_im.data_ptr<scalar_t>();
const scalar_t *data_offset_ = data_offset.data_ptr<scalar_t>();
const scalar_t *data_mask_ = data_mask.data_ptr<scalar_t>();
scalar_t *grad_offset_ = grad_offset.data_ptr<scalar_t>();
scalar_t *grad_mask_ = grad_mask.data_ptr<scalar_t>();

modulated_deformable_col2im_coord_gpu_kernel<<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS>>>(
num_kernels, data_col_, data_im_, data_offset_, data_mask_, channels, height_im, width_im,
Expand Down
6 changes: 3 additions & 3 deletions alphapose/models/layers/dcn/src/deform_pool_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ void deform_psroi_pooling_cuda_forward(
at::Tensor top_count, const int no_trans, const float spatial_scale,
const int output_dim, const int group_size, const int pooled_size,
const int part_size, const int sample_per_part, const float trans_std) {
AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
at::DeviceGuard guard(input.device());

const int batch = input.size(0);
Expand All @@ -59,8 +59,8 @@ void deform_psroi_pooling_cuda_backward(
const int no_trans, const float spatial_scale, const int output_dim,
const int group_size, const int pooled_size, const int part_size,
const int sample_per_part, const float trans_std) {
AT_CHECK(out_grad.is_contiguous(), "out_grad tensor has to be contiguous");
AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
TORCH_CHECK(out_grad.is_contiguous(), "out_grad tensor has to be contiguous");
TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
at::DeviceGuard guard(input.device());

const int batch = input.size(0);
Expand Down
26 changes: 13 additions & 13 deletions alphapose/models/layers/dcn/src/deform_pool_cuda_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -290,11 +290,11 @@ void DeformablePSROIPoolForward(const at::Tensor data,

AT_DISPATCH_FLOATING_TYPES_AND_HALF(
data.scalar_type(), "deformable_psroi_pool_forward", ([&] {
const scalar_t *bottom_data = data.data<scalar_t>();
const scalar_t *bottom_rois = bbox.data<scalar_t>();
const scalar_t *bottom_trans = no_trans ? NULL : trans.data<scalar_t>();
scalar_t *top_data = out.data<scalar_t>();
scalar_t *top_count_data = top_count.data<scalar_t>();
const scalar_t *bottom_data = data.data_ptr<scalar_t>();
const scalar_t *bottom_rois = bbox.data_ptr<scalar_t>();
const scalar_t *bottom_trans = no_trans ? NULL : trans.data_ptr<scalar_t>();
scalar_t *top_data = out.data_ptr<scalar_t>();
scalar_t *top_count_data = top_count.data_ptr<scalar_t>();

DeformablePSROIPoolForwardKernel<<<GET_BLOCKS(count), CUDA_NUM_THREADS>>>(
count, bottom_data, (scalar_t)spatial_scale, channels, height, width, pooled_height, pooled_width,
Expand Down Expand Up @@ -341,13 +341,13 @@ void DeformablePSROIPoolBackwardAcc(const at::Tensor out_grad,

AT_DISPATCH_FLOATING_TYPES_AND_HALF(
out_grad.scalar_type(), "deformable_psroi_pool_backward_acc", ([&] {
const scalar_t *top_diff = out_grad.data<scalar_t>();
const scalar_t *bottom_data = data.data<scalar_t>();
const scalar_t *bottom_rois = bbox.data<scalar_t>();
const scalar_t *bottom_trans = no_trans ? NULL : trans.data<scalar_t>();
scalar_t *bottom_data_diff = in_grad.data<scalar_t>();
scalar_t *bottom_trans_diff = no_trans ? NULL : trans_grad.data<scalar_t>();
const scalar_t *top_count_data = top_count.data<scalar_t>();
const scalar_t *top_diff = out_grad.data_ptr<scalar_t>();
const scalar_t *bottom_data = data.data_ptr<scalar_t>();
const scalar_t *bottom_rois = bbox.data_ptr<scalar_t>();
const scalar_t *bottom_trans = no_trans ? NULL : trans.data_ptr<scalar_t>();
scalar_t *bottom_data_diff = in_grad.data_ptr<scalar_t>();
scalar_t *bottom_trans_diff = no_trans ? NULL : trans_grad.data_ptr<scalar_t>();
const scalar_t *top_count_data = top_count.data_ptr<scalar_t>();

DeformablePSROIPoolBackwardAccKernel<<<GET_BLOCKS(count), CUDA_NUM_THREADS>>>(
count, top_diff, top_count_data, num_rois, (scalar_t)spatial_scale, channels, height, width,
Expand All @@ -361,4 +361,4 @@ void DeformablePSROIPoolBackwardAcc(const at::Tensor out_grad,
{
printf("error in DeformablePSROIPoolForward: %s\n", cudaGetErrorString(err));
}
}
}
6 changes: 3 additions & 3 deletions alphapose/utils/roi_align/src/roi_align_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@ int ROIAlignBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois,
const int pooled_height, const int pooled_width,
at::Tensor bottom_grad);

#define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
#define CHECK_CUDA(x) TORCH_CHECK(x.device().is_cuda(), #x, " must be a CUDAtensor ")
#define CHECK_CONTIGUOUS(x) \
AT_CHECK(x.is_contiguous(), #x, " must be contiguous ")
TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ")
#define CHECK_INPUT(x) \
CHECK_CUDA(x); \
CHECK_CONTIGUOUS(x)
Expand Down Expand Up @@ -82,4 +82,4 @@ int roi_align_backward_cuda(at::Tensor top_grad, at::Tensor rois,
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("forward", &roi_align_forward_cuda, "Roi_Align forward (CUDA)");
m.def("backward", &roi_align_backward_cuda, "Roi_Align backward (CUDA)");
}
}
20 changes: 11 additions & 9 deletions alphapose/utils/roi_align/src/roi_align_kernel.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#include <ATen/ATen.h>
#include <THC/THCAtomics.cuh>
#include <ATen/cuda/CUDAContext.h>


#define CUDA_1D_KERNEL_LOOP(i, n) \
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \
Expand Down Expand Up @@ -132,17 +134,17 @@ int ROIAlignForwardLaucher(const at::Tensor features, const at::Tensor rois,
const int output_size = num_rois * pooled_height * pooled_width * channels;
AT_DISPATCH_FLOATING_TYPES_AND_HALF(
features.scalar_type(), "ROIAlignLaucherForward", ([&] {
const scalar_t *bottom_data = features.data<scalar_t>();
const scalar_t *rois_data = rois.data<scalar_t>();
scalar_t *top_data = output.data<scalar_t>();
const scalar_t *bottom_data = features.data_ptr<scalar_t>();
const scalar_t *rois_data = rois.data_ptr<scalar_t>();
scalar_t *top_data = output.data_ptr<scalar_t>();

ROIAlignForward<scalar_t>
<<<GET_BLOCKS(output_size), THREADS_PER_BLOCK>>>(
output_size, bottom_data, rois_data, scalar_t(spatial_scale),
sample_num, channels, height, width, pooled_height,
pooled_width, top_data);
}));
THCudaCheck(cudaGetLastError());
C10_CUDA_CHECK(cudaGetLastError());
return 1;
}

Expand Down Expand Up @@ -275,9 +277,9 @@ int ROIAlignBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois,

AT_DISPATCH_FLOATING_TYPES_AND_HALF(
top_grad.scalar_type(), "ROIAlignLaucherBackward", ([&] {
const scalar_t *top_diff = top_grad.data<scalar_t>();
const scalar_t *rois_data = rois.data<scalar_t>();
scalar_t *bottom_diff = bottom_grad.data<scalar_t>();
const scalar_t *top_diff = top_grad.data_ptr<scalar_t>();
const scalar_t *rois_data = rois.data_ptr<scalar_t>();
scalar_t *bottom_diff = bottom_grad.data_ptr<scalar_t>();
if (sizeof(scalar_t) == sizeof(double)) {
fprintf(stderr, "double is not supported\n");
exit(-1);
Expand All @@ -289,6 +291,6 @@ int ROIAlignBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois,
channels, height, width, pooled_height, pooled_width,
bottom_diff);
}));
THCudaCheck(cudaGetLastError());
C10_CUDA_CHECK(cudaGetLastError());
return 1;
}
}
Loading

0 comments on commit 48567fe

Please sign in to comment.