Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[gTest] Add more test coverage for Batchnorm Backward #3445

Draft
wants to merge 3 commits into
base: develop
Choose a base branch
from

Conversation

xinlipn
Copy link
Contributor

@xinlipn xinlipn commented Dec 19, 2024

More test cases added for better coverage. Here's the summary

  1. N C H W: 128 256 14 14
    Covers:
    backward_spatial_single.cpp: variant == 3 and variant == 1 (2nd)
    128 256 14 14.txt

  2. N C H W: 128 256 16 16
    Covers:
    backward_spatial_single.cpp: variant == 3
    128 256 16 16.txt

  3. N C H W: 768 1 14 14
    Covers:
    backward_spatial_multiple.cpp: variant == 2
    670 1 224 224.txt
    Three tests failed

Notes:
For the same reason as this PR3444

  1. backward_spatial_single.cpp will never reach this snippet, therefore, variant will never be 2

    else
    {
    variant = 2;
    ylocalsize = 1024;
    auto segment = int(std::ceil(double(in_cstride) / double(ylocalsize)));
    xgridsize = c;
    ygridsize = segment * ylocalsize;
    ldsgcn = ylocalsize / wavesize;
    ldsnogcn = ylocalsize;
    }

  2. backward_spatial_multiple.cpp will never reach the following two snippets

    if((in_nhw < (32 * 1024 * 1024) && in_cstride > 1024))
    {
    variant = 1;
    xlocalsize = 1024;
    xgridsize = c * xlocalsize;
    ldsgcn = xlocalsize / 64;
    ldsnogcn = xlocalsize;
    }
    //*************************************************************************************************
    // N*H*W < 32M and H*W > 512 use batchnorm variant#1 or variant#3 implementation which
    // parallelize
    // work groups over channels and loop through N.
    //*************************************************************************************************
    else if(in_nhw < (32 * 1024 * 1024) && in_cstride > 512)
    {
    variant = (n >= 32) ? 1 : 3;
    xlocalsize = std::min(64 * ((in_cstride + 63) / 64), static_cast<unsigned int>(1024));
    xgridsize = c * xlocalsize;
    ldsgcn = xlocalsize / 64;
    ldsnogcn = xlocalsize;
    }
    //*************************************************************************************************
    // H*W < 512 use batchnorm variant#0 or variant#3 implementation based on batch size and
    // H*W
    //*************************************************************************************************
    else if(in_cstride <= 512)
    {
    if((n > 64) && (in_cstride > 160))
    {
    variant = 3;
    xlocalsize =
    std::min(64 * ((in_cstride + 63) / 64), static_cast<unsigned int>(1024));
    xgridsize = c * xlocalsize;
    ldsgcn = xlocalsize / 64;
    ldsnogcn = xlocalsize;
    }
    else
    {
    variant = 0;
    if(bfp32parm)
    {
    xlocalsize = 1024;
    xgridsize = static_cast<size_t>(1024) * c;
    }
    else
    {
    xlocalsize = 256;
    xgridsize = static_cast<size_t>(256) * c;
    }
    ldsgcn = xlocalsize / 64;
    ldsnogcn = xlocalsize;
    }
    }

    if((in_cstride < 200) && (in_cstride > 60) && bfpmixparm)
    {
    variant = 1;
    xlocalsize = 1024;
    xgridsize = c * xlocalsize;
    ldsgcn = xlocalsize / 64;
    ldsnogcn = xlocalsize;
    }

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
Projects
None yet
Development

Successfully merging this pull request may close these issues.

2 participants