diff --git a/README.md b/README.md index 5ac8b483bdf..915cc834b88 100644 --- a/README.md +++ b/README.md @@ -31,6 +31,8 @@ use CMake's `BUILD_opencv_*` options. Like in this example: $ cmake -DOPENCV_EXTRA_MODULES_PATH=/modules -DBUILD_opencv_legacy=OFF ``` +If you also want to build the samples from the "samples" folder of each module, also include the "-DBUILD_EXAMPLES=ON" option. + If you prefer using the gui version of cmake (cmake-gui), then, you can add `opencv_contrib` modules within `opencv` core by doing the following: 1. start cmake-gui diff --git a/modules/README.md b/modules/README.md index 6d51a4db49d..26387d2e34c 100644 --- a/modules/README.md +++ b/modules/README.md @@ -48,13 +48,13 @@ $ cmake -D OPENCV_EXTRA_MODULES_PATH=/modules -D BUILD_opencv_ &board, - InputArray cameraMatrix, InputArray distCoeffs, OutputArray rvec, - OutputArray tvec, bool useExtrinsicGuess = false); + InputArray cameraMatrix, InputArray distCoeffs, InputOutputArray rvec, + InputOutputArray tvec, bool useExtrinsicGuess = false); diff --git a/modules/aruco/include/opencv2/aruco/charuco.hpp b/modules/aruco/include/opencv2/aruco/charuco.hpp index be535c7606b..22e0d052f2f 100644 --- a/modules/aruco/include/opencv2/aruco/charuco.hpp +++ b/modules/aruco/include/opencv2/aruco/charuco.hpp @@ -184,8 +184,8 @@ CV_EXPORTS_W int interpolateCornersCharuco(InputArrayOfArrays markerCorners, Inp */ CV_EXPORTS_W bool estimatePoseCharucoBoard(InputArray charucoCorners, InputArray charucoIds, const Ptr &board, InputArray cameraMatrix, - InputArray distCoeffs, OutputArray rvec, OutputArray tvec, - bool useExtrinsicGuess = false); + InputArray distCoeffs, InputOutputArray rvec, + InputOutputArray tvec, bool useExtrinsicGuess = false); diff --git a/modules/aruco/src/aruco.cpp b/modules/aruco/src/aruco.cpp index a275ee7ccfd..dee2669ebc1 100644 --- a/modules/aruco/src/aruco.cpp +++ b/modules/aruco/src/aruco.cpp @@ -210,19 +210,38 @@ static void _reorderCandidatesCorners(vector< vector< Point2f > > &candidates) { } } +/** + * @brief to make sure that the corner's order of both candidates (default/white) is the same + */ +static vector< Point2f > alignContourOrder( Point2f corner, vector< Point2f > candidate){ + uint8_t r=0; + double min = cv::norm( Vec2f( corner - candidate[0] ), NORM_L2SQR); + for(uint8_t pos=1; pos < 4; pos++) { + double nDiff = cv::norm( Vec2f( corner - candidate[pos] ), NORM_L2SQR); + if(nDiff < min){ + r = pos; + min =nDiff; + } + } + std::rotate(candidate.begin(), candidate.begin() + r, candidate.end()); + return candidate; +} /** - * @brief Check candidates that are too close to each other and remove the smaller one + * @brief Check candidates that are too close to each other, save the potential candidates + * (i.e. biggest/smallest contour) and remove the rest */ static void _filterTooCloseCandidates(const vector< vector< Point2f > > &candidatesIn, - vector< vector< Point2f > > &candidatesOut, + vector< vector< vector< Point2f > > > &candidatesSetOut, const vector< vector< Point > > &contoursIn, - vector< vector< Point > > &contoursOut, - double minMarkerDistanceRate) { + vector< vector< vector< Point > > > &contoursSetOut, + double minMarkerDistanceRate, bool detectInvertedMarker) { CV_Assert(minMarkerDistanceRate >= 0); - vector< pair< int, int > > nearCandidates; + vector candGroup; + candGroup.resize(candidatesIn.size(), -1); + vector< vector > groupedCandidates; for(unsigned int i = 0; i < candidatesIn.size(); i++) { for(unsigned int j = i + 1; j < candidatesIn.size(); j++) { @@ -244,83 +263,87 @@ static void _filterTooCloseCandidates(const vector< vector< Point2f > > &candida // if mean square distance is too low, remove the smaller one of the two markers double minMarkerDistancePixels = double(minimumPerimeter) * minMarkerDistanceRate; if(distSq < minMarkerDistancePixels * minMarkerDistancePixels) { - nearCandidates.push_back(pair< int, int >(i, j)); - break; - } - } - } - } - // mark smaller one in pairs to remove - vector< bool > toRemove(candidatesIn.size(), false); - for(unsigned int i = 0; i < nearCandidates.size(); i++) { - // if one of the marker has been already markerd to removed, dont need to do anything - if(toRemove[nearCandidates[i].first] || toRemove[nearCandidates[i].second]) continue; - size_t perimeter1 = contoursIn[nearCandidates[i].first].size(); - size_t perimeter2 = contoursIn[nearCandidates[i].second].size(); - if(perimeter1 > perimeter2) - toRemove[nearCandidates[i].second] = true; - else - toRemove[nearCandidates[i].first] = true; - } + // i and j are not related to a group + if(candGroup[i]<0 && candGroup[j]<0){ + // mark candidates with their corresponding group number + candGroup[i] = candGroup[j] = (int)groupedCandidates.size(); - // remove extra candidates - candidatesOut.clear(); - unsigned long totalRemaining = 0; - for(unsigned int i = 0; i < toRemove.size(); i++) - if(!toRemove[i]) totalRemaining++; - candidatesOut.resize(totalRemaining); - contoursOut.resize(totalRemaining); - for(unsigned int i = 0, currIdx = 0; i < candidatesIn.size(); i++) { - if(toRemove[i]) continue; - candidatesOut[currIdx] = candidatesIn[i]; - contoursOut[currIdx] = contoursIn[i]; - currIdx++; - } -} - - -/** - * ParallelLoopBody class for the parallelization of the basic candidate detections using - * different threhold window sizes. Called from function _detectInitialCandidates() - */ -class DetectInitialCandidatesParallel : public ParallelLoopBody { - public: - DetectInitialCandidatesParallel(const Mat *_grey, - vector< vector< vector< Point2f > > > *_candidatesArrays, - vector< vector< vector< Point > > > *_contoursArrays, - const Ptr &_params) - : grey(_grey), candidatesArrays(_candidatesArrays), contoursArrays(_contoursArrays), - params(_params) {} - - void operator()(const Range &range) const CV_OVERRIDE { - const int begin = range.start; - const int end = range.end; + // create group + vector grouped; + grouped.push_back(i); + grouped.push_back(j); + groupedCandidates.push_back( grouped ); + } + // i is related to a group + else if(candGroup[i] > -1 && candGroup[j] == -1){ + int group = candGroup[i]; + candGroup[j] = group; - for(int i = begin; i < end; i++) { - int currScale = - params->adaptiveThreshWinSizeMin + i * params->adaptiveThreshWinSizeStep; - // threshold - Mat thresh; - _threshold(*grey, thresh, currScale, params->adaptiveThreshConstant); + // add to group + groupedCandidates[group].push_back( j ); + } + // j is related to a group + else if(candGroup[j] > -1 && candGroup[i] == -1){ + int group = candGroup[j]; + candGroup[i] = group; - // detect rectangles - _findMarkerContours(thresh, (*candidatesArrays)[i], (*contoursArrays)[i], - params->minMarkerPerimeterRate, params->maxMarkerPerimeterRate, - params->polygonalApproxAccuracyRate, params->minCornerDistanceRate, - params->minDistanceToBorder); + // add to group + groupedCandidates[group].push_back( i ); + } + } + } } } - private: - DetectInitialCandidatesParallel &operator=(const DetectInitialCandidatesParallel &); + // save possible candidates + candidatesSetOut.clear(); + contoursSetOut.clear(); + + vector< vector< Point2f > > biggerCandidates; + vector< vector< Point > > biggerContours; + vector< vector< Point2f > > smallerCandidates; + vector< vector< Point > > smallerContours; + + // save possible candidates + for( unsigned int i = 0; i < groupedCandidates.size(); i++ ) { + int smallerIdx = groupedCandidates[i][0]; + int biggerIdx = -1; + + // evaluate group elements + for( unsigned int j = 1; j < groupedCandidates[i].size(); j++ ) { + size_t currPerim = contoursIn[ groupedCandidates[i][j] ].size(); + + // check if current contour is bigger + if ( biggerIdx < 0 ) + biggerIdx = groupedCandidates[i][j]; + else if(currPerim >= contoursIn[ biggerIdx ].size()) + biggerIdx = groupedCandidates[i][j]; + + // check if current contour is smaller + if(currPerim < contoursIn[ smallerIdx ].size() && detectInvertedMarker) + smallerIdx = groupedCandidates[i][j]; + } + // add contours und candidates + if(biggerIdx > -1){ - const Mat *grey; - vector< vector< vector< Point2f > > > *candidatesArrays; - vector< vector< vector< Point > > > *contoursArrays; - const Ptr ¶ms; -}; + biggerCandidates.push_back(candidatesIn[biggerIdx]); + biggerContours.push_back(contoursIn[biggerIdx]); + if( detectInvertedMarker ){ + smallerCandidates.push_back(alignContourOrder(candidatesIn[biggerIdx][0], candidatesIn[smallerIdx])); + smallerContours.push_back(contoursIn[smallerIdx]); + } + } + } + // to preserve the structure :: candidateSet< defaultCandidates, whiteCandidates > + // default candidates + candidatesSetOut.push_back(biggerCandidates); + contoursSetOut.push_back(biggerContours); + // white candidates + candidatesSetOut.push_back(smallerCandidates); + contoursSetOut.push_back(smallerContours); +} /** * @brief Initial steps on finding square candidates @@ -341,21 +364,23 @@ static void _detectInitialCandidates(const Mat &grey, vector< vector< Point2f > vector< vector< vector< Point > > > contoursArrays((size_t) nScales); ////for each value in the interval of thresholding window sizes - // for(int i = 0; i < nScales; i++) { - // int currScale = params.adaptiveThreshWinSizeMin + i*params.adaptiveThreshWinSizeStep; - // // treshold - // Mat thresh; - // _threshold(grey, thresh, currScale, params.adaptiveThreshConstant); - // // detect rectangles - // _findMarkerContours(thresh, candidatesArrays[i], contoursArrays[i], - // params.minMarkerPerimeterRate, - // params.maxMarkerPerimeterRate, params.polygonalApproxAccuracyRate, - // params.minCornerDistance, params.minDistanceToBorder); - //} - - // this is the parallel call for the previous commented loop (result is equivalent) - parallel_for_(Range(0, nScales), DetectInitialCandidatesParallel(&grey, &candidatesArrays, - &contoursArrays, params)); + parallel_for_(Range(0, nScales), [&](const Range& range) { + const int begin = range.start; + const int end = range.end; + + for (int i = begin; i < end; i++) { + int currScale = params->adaptiveThreshWinSizeMin + i * params->adaptiveThreshWinSizeStep; + // threshold + Mat thresh; + _threshold(grey, thresh, currScale, params->adaptiveThreshConstant); + + // detect rectangles + _findMarkerContours(thresh, candidatesArrays[i], contoursArrays[i], + params->minMarkerPerimeterRate, params->maxMarkerPerimeterRate, + params->polygonalApproxAccuracyRate, params->minCornerDistanceRate, + params->minDistanceToBorder); + } + }); // join candidates for(int i = 0; i < nScales; i++) { @@ -370,8 +395,8 @@ static void _detectInitialCandidates(const Mat &grey, vector< vector< Point2f > /** * @brief Detect square candidates in the input image */ -static void _detectCandidates(InputArray _image, vector< vector< Point2f > >& candidatesOut, - vector< vector< Point > >& contoursOut, const Ptr &_params) { +static void _detectCandidates(InputArray _image, vector< vector< vector< Point2f > > >& candidatesSetOut, + vector< vector< vector< Point > > >& contoursSetOut, const Ptr &_params) { Mat image = _image.getMat(); CV_Assert(image.total() != 0); @@ -389,8 +414,9 @@ static void _detectCandidates(InputArray _image, vector< vector< Point2f > >& ca _reorderCandidatesCorners(candidates); /// 4. FILTER OUT NEAR CANDIDATE PAIRS - _filterTooCloseCandidates(candidates, candidatesOut, contours, contoursOut, - _params->minMarkerDistanceRate); + // save the outter/inner border (i.e. potential candidates) + _filterTooCloseCandidates(candidates, candidatesSetOut, contours, contoursSetOut, + _params->minMarkerDistanceRate, _params->detectInvertedMarker); } @@ -493,15 +519,19 @@ static int _getBorderErrors(const Mat &bits, int markerSize, int borderSize) { /** * @brief Tries to identify one candidate given the dictionary + * @return candidate typ. zero if the candidate is not valid, + * 1 if the candidate is a black candidate (default candidate) + * 2 if the candidate is a white candidate */ -static bool _identifyOneCandidate(const Ptr& dictionary, InputArray _image, +static uint8_t _identifyOneCandidate(const Ptr& dictionary, InputArray _image, vector& _corners, int& idx, - const Ptr& params) + const Ptr& params, int& rotation) { CV_Assert(_corners.size() == 4); CV_Assert(_image.getMat().total() != 0); CV_Assert(params->markerBorderBits > 0); + uint8_t typ=1; // get bits Mat candidateBits = _extractBits(_image, _corners, dictionary->markerSize, params->markerBorderBits, @@ -523,9 +553,10 @@ static bool _identifyOneCandidate(const Ptr& dictionary, InputArray if(invBError maximumErrorsInBorder) return false; + if(borderErrors > maximumErrorsInBorder) return 0; // border is wrong // take only inner bits Mat onlyBits = @@ -534,57 +565,12 @@ static bool _identifyOneCandidate(const Ptr& dictionary, InputArray .colRange(params->markerBorderBits, candidateBits.rows - params->markerBorderBits); // try to indentify the marker - int rotation; if(!dictionary->identify(onlyBits, idx, rotation, params->errorCorrectionRate)) - return false; + return 0; - // shift corner positions to the correct rotation - if(rotation != 0) { - std::rotate(_corners.begin(), _corners.begin() + 4 - rotation, _corners.end()); - } - return true; + return typ; } - -/** - * ParallelLoopBody class for the parallelization of the marker identification step - * Called from function _identifyCandidates() - */ -class IdentifyCandidatesParallel : public ParallelLoopBody { - public: - IdentifyCandidatesParallel(const Mat& _grey, vector< vector< Point2f > >& _candidates, - const Ptr &_dictionary, - vector< int >& _idsTmp, vector< char >& _validCandidates, - const Ptr &_params) - : grey(_grey), candidates(_candidates), dictionary(_dictionary), - idsTmp(_idsTmp), validCandidates(_validCandidates), params(_params) {} - - void operator()(const Range &range) const CV_OVERRIDE { - const int begin = range.start; - const int end = range.end; - - for(int i = begin; i < end; i++) { - int currId; - if(_identifyOneCandidate(dictionary, grey, candidates[i], currId, params)) { - validCandidates[i] = 1; - idsTmp[i] = currId; - } - } - } - - private: - IdentifyCandidatesParallel &operator=(const IdentifyCandidatesParallel &); // to quiet MSVC - - const Mat &grey; - vector< vector< Point2f > >& candidates; - const Ptr &dictionary; - vector< int > &idsTmp; - vector< char > &validCandidates; - const Ptr ¶ms; -}; - - - /** * @brief Copy the contents of a corners vector to an OutputArray, settings its size. */ @@ -618,19 +604,23 @@ static void _copyVector2Output(vector< vector< Point2f > > &vec, OutputArrayOfAr } } - +/** + * @brief rotate the initial corner to get to the right position + */ +static void correctCornerPosition( vector< Point2f >& _candidate, int rotate){ + std::rotate(_candidate.begin(), _candidate.begin() + 4 - rotate, _candidate.end()); +} /** * @brief Identify square candidates according to a marker dictionary */ -static void _identifyCandidates(InputArray _image, vector< vector< Point2f > >& _candidates, - vector< vector >& _contours, const Ptr &_dictionary, - vector< vector< Point2f > >& _accepted, vector< int >& ids, +static void _identifyCandidates(InputArray _image, vector< vector< vector< Point2f > > >& _candidatesSet, + vector< vector< vector > >& _contoursSet, const Ptr &_dictionary, + vector< vector< Point2f > >& _accepted, vector< vector >& _contours, vector< int >& ids, const Ptr ¶ms, OutputArrayOfArrays _rejected = noArray()) { - int ncandidates = (int)_candidates.size(); - + int ncandidates = (int)_candidatesSet[0].size(); vector< vector< Point2f > > accepted; vector< vector< Point2f > > rejected; @@ -642,120 +632,58 @@ static void _identifyCandidates(InputArray _image, vector< vector< Point2f > >& _convertToGrey(_image.getMat(), grey); vector< int > idsTmp(ncandidates, -1); - vector< char > validCandidates(ncandidates, 0); + vector< int > rotated(ncandidates, 0); + vector< uint8_t > validCandidates(ncandidates, 0); //// Analyze each of the candidates - // for (int i = 0; i < ncandidates; i++) { - // int currId = i; - // Mat currentCandidate = _candidates.getMat(i); - // if (_identifyOneCandidate(dictionary, grey, currentCandidate, currId, params)) { - // validCandidates[i] = 1; - // idsTmp[i] = currId; - // } - //} - - // this is the parallel call for the previous commented loop (result is equivalent) - parallel_for_(Range(0, ncandidates), - IdentifyCandidatesParallel(grey, _candidates, _dictionary, idsTmp, - validCandidates, params)); + parallel_for_(Range(0, ncandidates), [&](const Range &range) { + const int begin = range.start; + const int end = range.end; - for(int i = 0; i < ncandidates; i++) { - if(validCandidates[i] == 1) { - accepted.push_back(_candidates[i]); - ids.push_back(idsTmp[i]); + vector< vector< Point2f > >& candidates = params->detectInvertedMarker ? _candidatesSet[1] : _candidatesSet[0]; - contours.push_back(_contours[i]); + for(int i = begin; i < end; i++) { + int currId; + validCandidates[i] = _identifyOneCandidate(_dictionary, grey, candidates[i], currId, params, rotated[i]); - } else { - rejected.push_back(_candidates[i]); + if(validCandidates[i] > 0) + idsTmp[i] = currId; } - } + }); - // parse output - _accepted = accepted; + for(int i = 0; i < ncandidates; i++) { + if(validCandidates[i] > 0) { + // to choose the right set of candidates :: 0 for default, 1 for white markers + uint8_t set = validCandidates[i]-1; - _contours= contours; + // shift corner positions to the correct rotation + correctCornerPosition(_candidatesSet[set][i], rotated[i]); - if(_rejected.needed()) { - _copyVector2Output(rejected, _rejected); - } -} + if( !params->detectInvertedMarker && validCandidates[i] == 2 ) + continue; + // add valid candidate + accepted.push_back(_candidatesSet[set][i]); + ids.push_back(idsTmp[i]); -/** - * @brief Final filter of markers after its identification - */ -static void _filterDetectedMarkers(vector< vector< Point2f > >& _corners, vector< int >& _ids, vector< vector< Point> >& _contours) { - - CV_Assert(_corners.size() == _ids.size()); - if(_corners.empty()) return; - - // mark markers that will be removed - vector< bool > toRemove(_corners.size(), false); - bool atLeastOneRemove = false; - - // remove repeated markers with same id, if one contains the other (doble border bug) - for(unsigned int i = 0; i < _corners.size() - 1; i++) { - for(unsigned int j = i + 1; j < _corners.size(); j++) { - if(_ids[i] != _ids[j]) continue; - - // check if first marker is inside second - bool inside = true; - for(unsigned int p = 0; p < 4; p++) { - Point2f point = _corners[j][p]; - if(pointPolygonTest(_corners[i], point, false) < 0) { - inside = false; - break; - } - } - if(inside) { - toRemove[j] = true; - atLeastOneRemove = true; - continue; - } + contours.push_back(_contoursSet[set][i]); - // check the second marker - inside = true; - for(unsigned int p = 0; p < 4; p++) { - Point2f point = _corners[i][p]; - if(pointPolygonTest(_corners[j], point, false) < 0) { - inside = false; - break; - } - } - if(inside) { - toRemove[i] = true; - atLeastOneRemove = true; - continue; - } + } else { + rejected.push_back(_candidatesSet[0][i]); } } // parse output - if(atLeastOneRemove) { - vector< vector< Point2f > >::iterator filteredCorners = _corners.begin(); - vector< int >::iterator filteredIds = _ids.begin(); - - vector< vector< Point > >::iterator filteredContours = _contours.begin(); - - for(unsigned int i = 0; i < toRemove.size(); i++) { - if(!toRemove[i]) { - *filteredCorners++ = _corners[i]; - *filteredIds++ = _ids[i]; - - *filteredContours++ = _contours[i]; - } - } + _accepted = accepted; - _ids.erase(filteredIds, _ids.end()); - _corners.erase(filteredCorners, _corners.end()); + _contours= contours; - _contours.erase(filteredContours, _contours.end()); + if(_rejected.needed()) { + _copyVector2Output(rejected, _rejected); } } - /** * @brief Return object points for the system centered in a single marker, given the marker length */ @@ -772,40 +700,6 @@ static void _getSingleMarkerObjectPoints(float markerLength, OutputArray _objPoi objPoints.ptr< Vec3f >(0)[3] = Vec3f(-markerLength / 2.f, -markerLength / 2.f, 0); } - - - -/** - * ParallelLoopBody class for the parallelization of the marker corner subpixel refinement - * Called from function detectMarkers() - */ -class MarkerSubpixelParallel : public ParallelLoopBody { - public: - MarkerSubpixelParallel(const Mat *_grey, OutputArrayOfArrays _corners, - const Ptr &_params) - : grey(_grey), corners(_corners), params(_params) {} - - void operator()(const Range &range) const CV_OVERRIDE { - const int begin = range.start; - const int end = range.end; - - for(int i = begin; i < end; i++) { - cornerSubPix(*grey, corners.getMat(i), - Size(params->cornerRefinementWinSize, params->cornerRefinementWinSize), - Size(-1, -1), TermCriteria(TermCriteria::MAX_ITER | TermCriteria::EPS, - params->cornerRefinementMaxIterations, - params->cornerRefinementMinAccuracy)); - } - } - - private: - MarkerSubpixelParallel &operator=(const MarkerSubpixelParallel &); // to quiet MSVC - - const Mat *grey; - OutputArrayOfArrays corners; - const Ptr ¶ms; -}; - /** * Line fitting A * B = C :: Called from function refineCandidateLines * @param nContours, contour-container @@ -948,34 +842,6 @@ static void _refineCandidateLines(std::vector& nContours, std::vector >& _contours, vector< vector< Point2f > >& _candidates, const Mat& _camMatrix, const Mat& _distCoeff) - : contours(_contours), candidates(_candidates), camMatrix(_camMatrix), distCoeff(_distCoeff){} - - void operator()(const Range &range) const CV_OVERRIDE { - - for(int i = range.start; i < range.end; i++) { - _refineCandidateLines(contours[i], candidates[i], camMatrix, distCoeff); - } - } - - private: - MarkerContourParallel &operator=(const MarkerContourParallel &){ - return *this; - } - - vector< vector< Point > >& contours; - vector< vector< Point2f > >& candidates; - const Mat& camMatrix; - const Mat& distCoeff; -}; - #ifdef APRIL_DEBUG static void _darken(const Mat &im){ for (int y = 0; y < im.rows; y++) { @@ -1127,51 +993,61 @@ void detectMarkers(InputArray _image, const Ptr &_dictionary, Output vector< vector< Point > > contours; vector< int > ids; + vector< vector< vector< Point2f > > > candidatesSet; + vector< vector< vector< Point > > > contoursSet; /// STEP 1.a Detect marker candidates :: using AprilTag - if(_params->cornerRefinementMethod == CORNER_REFINE_APRILTAG) + if(_params->cornerRefinementMethod == CORNER_REFINE_APRILTAG){ _apriltag(grey, _params, candidates, contours); + candidatesSet.push_back(candidates); + contoursSet.push_back(contours); + } + /// STEP 1.b Detect marker candidates :: traditional way else - _detectCandidates(grey, candidates, contours, _params); + _detectCandidates(grey, candidatesSet, contoursSet, _params); /// STEP 2: Check candidate codification (identify markers) - _identifyCandidates(grey, candidates, contours, _dictionary, candidates, ids, _params, + _identifyCandidates(grey, candidatesSet, contoursSet, _dictionary, candidates, contours, ids, _params, _rejectedImgPoints); - /// STEP 3: Filter detected markers; - _filterDetectedMarkers(candidates, ids, contours); - // copy to output arrays _copyVector2Output(candidates, _corners); Mat(ids).copyTo(_ids); - /// STEP 4: Corner refinement :: use corner subpix + /// STEP 3: Corner refinement :: use corner subpix if( _params->cornerRefinementMethod == CORNER_REFINE_SUBPIX ) { CV_Assert(_params->cornerRefinementWinSize > 0 && _params->cornerRefinementMaxIterations > 0 && _params->cornerRefinementMinAccuracy > 0); //// do corner refinement for each of the detected markers - // for (unsigned int i = 0; i < _corners.cols(); i++) { - // cornerSubPix(grey, _corners.getMat(i), - // Size(params.cornerRefinementWinSize, params.cornerRefinementWinSize), - // Size(-1, -1), TermCriteria(TermCriteria::MAX_ITER | TermCriteria::EPS, - // params.cornerRefinementMaxIterations, - // params.cornerRefinementMinAccuracy)); - //} - - // this is the parallel call for the previous commented loop (result is equivalent) - parallel_for_(Range(0, _corners.cols()), - MarkerSubpixelParallel(&grey, _corners, _params)); + parallel_for_(Range(0, _corners.cols()), [&](const Range& range) { + const int begin = range.start; + const int end = range.end; + + for (int i = begin; i < end; i++) { + cornerSubPix(grey, _corners.getMat(i), + Size(_params->cornerRefinementWinSize, _params->cornerRefinementWinSize), + Size(-1, -1), + TermCriteria(TermCriteria::MAX_ITER | TermCriteria::EPS, + _params->cornerRefinementMaxIterations, + _params->cornerRefinementMinAccuracy)); + } + }); } - /// STEP 4, Optional : Corner refinement :: use contour container + /// STEP 3, Optional : Corner refinement :: use contour container if( _params->cornerRefinementMethod == CORNER_REFINE_CONTOUR){ if(! _ids.empty()){ // do corner refinement using the contours for each detected markers - parallel_for_(Range(0, _corners.cols()), MarkerContourParallel(contours, candidates, camMatrix.getMat(), distCoeff.getMat())); + parallel_for_(Range(0, _corners.cols()), [&](const Range& range) { + for (int i = range.start; i < range.end; i++) { + _refineCandidateLines(contours[i], candidates[i], camMatrix.getMat(), + distCoeff.getMat()); + } + }); // copy the corners to the output array _copyVector2Output(candidates, _corners); @@ -1179,42 +1055,6 @@ void detectMarkers(InputArray _image, const Ptr &_dictionary, Output } } - - -/** - * ParallelLoopBody class for the parallelization of the single markers pose estimation - * Called from function estimatePoseSingleMarkers() - */ -class SinglePoseEstimationParallel : public ParallelLoopBody { - public: - SinglePoseEstimationParallel(Mat& _markerObjPoints, InputArrayOfArrays _corners, - InputArray _cameraMatrix, InputArray _distCoeffs, - Mat& _rvecs, Mat& _tvecs) - : markerObjPoints(_markerObjPoints), corners(_corners), cameraMatrix(_cameraMatrix), - distCoeffs(_distCoeffs), rvecs(_rvecs), tvecs(_tvecs) {} - - void operator()(const Range &range) const CV_OVERRIDE { - const int begin = range.start; - const int end = range.end; - - for(int i = begin; i < end; i++) { - solvePnP(markerObjPoints, corners.getMat(i), cameraMatrix, distCoeffs, - rvecs.at(i), tvecs.at(i)); - } - } - - private: - SinglePoseEstimationParallel &operator=(const SinglePoseEstimationParallel &); // to quiet MSVC - - Mat& markerObjPoints; - InputArrayOfArrays corners; - InputArray cameraMatrix, distCoeffs; - Mat& rvecs, tvecs; -}; - - - - /** */ void estimatePoseSingleMarkers(InputArrayOfArrays _corners, float markerLength, @@ -1232,15 +1072,16 @@ void estimatePoseSingleMarkers(InputArrayOfArrays _corners, float markerLength, Mat rvecs = _rvecs.getMat(), tvecs = _tvecs.getMat(); //// for each marker, calculate its pose - // for (int i = 0; i < nMarkers; i++) { - // solvePnP(markerObjPoints, _corners.getMat(i), _cameraMatrix, _distCoeffs, - // _rvecs.getMat(i), _tvecs.getMat(i)); - //} - - // this is the parallel call for the previous commented loop (result is equivalent) - parallel_for_(Range(0, nMarkers), - SinglePoseEstimationParallel(markerObjPoints, _corners, _cameraMatrix, - _distCoeffs, rvecs, tvecs)); + parallel_for_(Range(0, nMarkers), [&](const Range& range) { + const int begin = range.start; + const int end = range.end; + + for (int i = begin; i < end; i++) { + solvePnP(markerObjPoints, _corners.getMat(i), _cameraMatrix, _distCoeffs, rvecs.at(i), + tvecs.at(i)); + } + }); + if(_objPoints.needed()){ markerObjPoints.convertTo(_objPoints, -1); } @@ -1592,8 +1433,8 @@ void refineDetectedMarkers(InputArray _image, const Ptr &_board, /** */ int estimatePoseBoard(InputArrayOfArrays _corners, InputArray _ids, const Ptr &board, - InputArray _cameraMatrix, InputArray _distCoeffs, OutputArray _rvec, - OutputArray _tvec, bool useExtrinsicGuess) { + InputArray _cameraMatrix, InputArray _distCoeffs, InputOutputArray _rvec, + InputOutputArray _tvec, bool useExtrinsicGuess) { CV_Assert(_corners.total() == _ids.total()); diff --git a/modules/aruco/src/charuco.cpp b/modules/aruco/src/charuco.cpp index 5e5f01a9a71..e9fc59599ce 100644 --- a/modules/aruco/src/charuco.cpp +++ b/modules/aruco/src/charuco.cpp @@ -270,50 +270,6 @@ static int _filterCornersWithoutMinMarkers(const Ptr &_board, return (int)_filteredCharucoIds.total(); } - -/** - * ParallelLoopBody class for the parallelization of the charuco corners subpixel refinement - * Called from function _selectAndRefineChessboardCorners() - */ -class CharucoSubpixelParallel : public ParallelLoopBody { - public: - CharucoSubpixelParallel(const Mat *_grey, vector< Point2f > *_filteredChessboardImgPoints, - vector< Size > *_filteredWinSizes, const Ptr &_params) - : grey(_grey), filteredChessboardImgPoints(_filteredChessboardImgPoints), - filteredWinSizes(_filteredWinSizes), params(_params) {} - - void operator()(const Range &range) const CV_OVERRIDE { - const int begin = range.start; - const int end = range.end; - - for(int i = begin; i < end; i++) { - vector< Point2f > in; - in.push_back((*filteredChessboardImgPoints)[i]); - Size winSize = (*filteredWinSizes)[i]; - if(winSize.height == -1 || winSize.width == -1) - winSize = Size(params->cornerRefinementWinSize, params->cornerRefinementWinSize); - - cornerSubPix(*grey, in, winSize, Size(), - TermCriteria(TermCriteria::MAX_ITER | TermCriteria::EPS, - params->cornerRefinementMaxIterations, - params->cornerRefinementMinAccuracy)); - - (*filteredChessboardImgPoints)[i] = in[0]; - } - } - - private: - CharucoSubpixelParallel &operator=(const CharucoSubpixelParallel &); // to quiet MSVC - - const Mat *grey; - vector< Point2f > *filteredChessboardImgPoints; - vector< Size > *filteredWinSizes; - const Ptr ¶ms; -}; - - - - /** * @brief From all projected chessboard corners, select those inside the image and apply subpixel * refinement. Returns number of valid corners. @@ -353,23 +309,25 @@ static int _selectAndRefineChessboardCorners(InputArray _allCorners, InputArray const Ptr params = DetectorParameters::create(); // use default params for corner refinement //// For each of the charuco corners, apply subpixel refinement using its correspondind winSize - // for(unsigned int i=0; i in; - // in.push_back(filteredChessboardImgPoints[i]); - // Size winSize = filteredWinSizes[i]; - // if(winSize.height == -1 || winSize.width == -1) - // winSize = Size(params.cornerRefinementWinSize, params.cornerRefinementWinSize); - // cornerSubPix(grey, in, winSize, Size(), - // TermCriteria(TermCriteria::MAX_ITER | TermCriteria::EPS, - // params->cornerRefinementMaxIterations, - // params->cornerRefinementMinAccuracy)); - // filteredChessboardImgPoints[i] = in[0]; - //} - - // this is the parallel call for the previous commented loop (result is equivalent) - parallel_for_( - Range(0, (int)filteredChessboardImgPoints.size()), - CharucoSubpixelParallel(&grey, &filteredChessboardImgPoints, &filteredWinSizes, params)); + parallel_for_(Range(0, (int)filteredChessboardImgPoints.size()), [&](const Range& range) { + const int begin = range.start; + const int end = range.end; + + for (int i = begin; i < end; i++) { + vector in; + in.push_back(filteredChessboardImgPoints[i]); + Size winSize = filteredWinSizes[i]; + if (winSize.height == -1 || winSize.width == -1) + winSize = Size(params->cornerRefinementWinSize, params->cornerRefinementWinSize); + + cornerSubPix(grey, in, winSize, Size(), + TermCriteria(TermCriteria::MAX_ITER | TermCriteria::EPS, + params->cornerRefinementMaxIterations, + params->cornerRefinementMinAccuracy)); + + filteredChessboardImgPoints[i] = in[0]; + } + }); // parse output Mat(filteredChessboardImgPoints).copyTo(_selectedCorners); @@ -656,7 +614,7 @@ static bool _arePointsEnoughForPoseEstimation(const vector< Point3f > &points) { */ bool estimatePoseCharucoBoard(InputArray _charucoCorners, InputArray _charucoIds, const Ptr &_board, InputArray _cameraMatrix, InputArray _distCoeffs, - OutputArray _rvec, OutputArray _tvec, bool useExtrinsicGuess) { + InputOutputArray _rvec, InputOutputArray _tvec, bool useExtrinsicGuess) { CV_Assert((_charucoCorners.getMat().total() == _charucoIds.getMat().total())); diff --git a/modules/aruco/test/test_arucodetection.cpp b/modules/aruco/test/test_arucodetection.cpp index 3d860dc05e9..e56fdf81252 100644 --- a/modules/aruco/test/test_arucodetection.cpp +++ b/modules/aruco/test/test_arucodetection.cpp @@ -322,24 +322,10 @@ void CV_ArucoDetectionPerspective::run(int tryWith) { } for(int c = 0; c < 4; c++) { double dist = cv::norm(groundTruthCorners[c] - corners[0][c]); // TODO cvtest - if(CV_ArucoDetectionPerspective::DETECT_INVERTED_MARKER == tryWith){ - if(szEnclosed && dist > 3){ + if(dist > 5) { ts->printf(cvtest::TS::LOG, "Incorrect marker corners position"); ts->set_failed_test_info(cvtest::TS::FAIL_BAD_ACCURACY); return; - } - if(!szEnclosed && dist >15){ - ts->printf(cvtest::TS::LOG, "Incorrect marker corners position"); - ts->set_failed_test_info(cvtest::TS::FAIL_BAD_ACCURACY); - return; - } - } - else{ - if(dist > 5) { - ts->printf(cvtest::TS::LOG, "Incorrect marker corners position"); - ts->set_failed_test_info(cvtest::TS::FAIL_BAD_ACCURACY); - return; - } } } } diff --git a/modules/bioinspired/src/retina.cpp b/modules/bioinspired/src/retina.cpp index 32a9ab47afd..8694a3479c3 100644 --- a/modules/bioinspired/src/retina.cpp +++ b/modules/bioinspired/src/retina.cpp @@ -562,7 +562,7 @@ bool RetinaImpl::ocl_run(InputArray inputMatToConvert) void RetinaImpl::run(InputArray inputMatToConvert) { - CV_OCL_RUN((_ocl_retina != 0 && inputMatToConvert.isUMat()), ocl_run(inputMatToConvert)); + CV_OCL_RUN((_ocl_retina && inputMatToConvert.isUMat()), ocl_run(inputMatToConvert)); _wasOCLRunCalled = false; // first convert input image to the compatible format : std::valarray @@ -830,7 +830,7 @@ bool RetinaImpl::_convertCvMat2ValarrayBuffer(InputArray inputMat, std::valarray void RetinaImpl::clearBuffers() { #ifdef HAVE_OPENCL - if (_ocl_retina != 0) + if (_ocl_retina) _ocl_retina->clearBuffers(); #endif diff --git a/modules/ccalib/include/opencv2/ccalib.hpp b/modules/ccalib/include/opencv2/ccalib.hpp index c9b9391cb48..538ec0f81b6 100644 --- a/modules/ccalib/include/opencv2/ccalib.hpp +++ b/modules/ccalib/include/opencv2/ccalib.hpp @@ -96,21 +96,21 @@ class CV_EXPORTS CustomPattern : public Algorithm Calls the calirateCamera function with the same inputs. */ - bool findRt(InputArray objectPoints, InputArray imagePoints, InputArray cameraMatrix, InputArray distCoeffs, - OutputArray rvec, OutputArray tvec, bool useExtrinsicGuess = false, int flags = SOLVEPNP_ITERATIVE); - bool findRt(InputArray image, InputArray cameraMatrix, InputArray distCoeffs, - OutputArray rvec, OutputArray tvec, bool useExtrinsicGuess = false, int flags = SOLVEPNP_ITERATIVE); + bool findRt(InputArray objectPoints, InputArray imagePoints, InputArray cameraMatrix, InputArray distCoeffs, + InputOutputArray rvec, InputOutputArray tvec, bool useExtrinsicGuess = false, int flags = SOLVEPNP_ITERATIVE); + bool findRt(InputArray image, InputArray cameraMatrix, InputArray distCoeffs, + InputOutputArray rvec, InputOutputArray tvec, bool useExtrinsicGuess = false, int flags = SOLVEPNP_ITERATIVE); /**< Uses solvePnP to find the rotation and translation of the pattern with respect to the camera frame. */ - bool findRtRANSAC(InputArray objectPoints, InputArray imagePoints, InputArray cameraMatrix, InputArray distCoeffs, - OutputArray rvec, OutputArray tvec, bool useExtrinsicGuess = false, int iterationsCount = 100, - float reprojectionError = 8.0, int minInliersCount = 100, OutputArray inliers = noArray(), int flags = SOLVEPNP_ITERATIVE); - bool findRtRANSAC(InputArray image, InputArray cameraMatrix, InputArray distCoeffs, - OutputArray rvec, OutputArray tvec, bool useExtrinsicGuess = false, int iterationsCount = 100, - float reprojectionError = 8.0, int minInliersCount = 100, OutputArray inliers = noArray(), int flags = SOLVEPNP_ITERATIVE); + bool findRtRANSAC(InputArray objectPoints, InputArray imagePoints, InputArray cameraMatrix, InputArray distCoeffs, + InputOutputArray rvec, InputOutputArray tvec, bool useExtrinsicGuess = false, int iterationsCount = 100, + float reprojectionError = 8.0, int minInliersCount = 100, OutputArray inliers = noArray(), int flags = SOLVEPNP_ITERATIVE); + bool findRtRANSAC(InputArray image, InputArray cameraMatrix, InputArray distCoeffs, + InputOutputArray rvec, InputOutputArray tvec, bool useExtrinsicGuess = false, int iterationsCount = 100, + float reprojectionError = 8.0, int minInliersCount = 100, OutputArray inliers = noArray(), int flags = SOLVEPNP_ITERATIVE); /**< Uses solvePnPRansac() */ diff --git a/modules/ccalib/src/ccalib.cpp b/modules/ccalib/src/ccalib.cpp index 249d5b14a5c..03f1414d8e6 100644 --- a/modules/ccalib/src/ccalib.cpp +++ b/modules/ccalib/src/ccalib.cpp @@ -425,13 +425,13 @@ double CustomPattern::calibrate(InputArrayOfArrays objectPoints, InputArrayOfArr } bool CustomPattern::findRt(InputArray objectPoints, InputArray imagePoints, InputArray cameraMatrix, - InputArray distCoeffs, OutputArray rvec, OutputArray tvec, bool useExtrinsicGuess, int flags) + InputArray distCoeffs, InputOutputArray rvec, InputOutputArray tvec, bool useExtrinsicGuess, int flags) { return solvePnP(objectPoints, imagePoints, cameraMatrix, distCoeffs, rvec, tvec, useExtrinsicGuess, flags); } bool CustomPattern::findRt(InputArray image, InputArray cameraMatrix, InputArray distCoeffs, - OutputArray rvec, OutputArray tvec, bool useExtrinsicGuess, int flags) + InputOutputArray rvec, InputOutputArray tvec, bool useExtrinsicGuess, int flags) { vector imagePoints; vector objectPoints; @@ -442,17 +442,17 @@ bool CustomPattern::findRt(InputArray image, InputArray cameraMatrix, InputArray } bool CustomPattern::findRtRANSAC(InputArray objectPoints, InputArray imagePoints, InputArray cameraMatrix, InputArray distCoeffs, - OutputArray rvec, OutputArray tvec, bool useExtrinsicGuess, int iterationsCount, - float reprojectionError, int minInliersCount, OutputArray inliers, int flags) + InputOutputArray rvec, InputOutputArray tvec, bool useExtrinsicGuess, int iterationsCount, + float reprojectionError, int minInliersCount, OutputArray inliers, int flags) { solvePnPRansac(objectPoints, imagePoints, cameraMatrix, distCoeffs, rvec, tvec, useExtrinsicGuess, - iterationsCount, reprojectionError, minInliersCount, inliers, flags); + iterationsCount, reprojectionError, minInliersCount, inliers, flags); return true; // for consistency with the other methods } bool CustomPattern::findRtRANSAC(InputArray image, InputArray cameraMatrix, InputArray distCoeffs, - OutputArray rvec, OutputArray tvec, bool useExtrinsicGuess, int iterationsCount, - float reprojectionError, int minInliersCount, OutputArray inliers, int flags) + InputOutputArray rvec, InputOutputArray tvec, bool useExtrinsicGuess, int iterationsCount, + float reprojectionError, int minInliersCount, OutputArray inliers, int flags) { vector imagePoints; vector objectPoints; @@ -460,7 +460,7 @@ bool CustomPattern::findRtRANSAC(InputArray image, InputArray cameraMatrix, Inpu if (!findPattern(image, imagePoints, objectPoints)) return false; solvePnPRansac(objectPoints, imagePoints, cameraMatrix, distCoeffs, rvec, tvec, useExtrinsicGuess, - iterationsCount, reprojectionError, minInliersCount, inliers, flags); + iterationsCount, reprojectionError, minInliersCount, inliers, flags); return true; } diff --git a/modules/cudaarithm/CMakeLists.txt b/modules/cudaarithm/CMakeLists.txt new file mode 100644 index 00000000000..d552bb4ebe9 --- /dev/null +++ b/modules/cudaarithm/CMakeLists.txt @@ -0,0 +1,27 @@ +if(IOS OR WINRT OR (NOT HAVE_CUDA AND NOT BUILD_CUDA_STUBS)) + ocv_module_disable(cudaarithm) +endif() + +set(the_description "CUDA-accelerated Operations on Matrices") + +ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 /wd4324 /wd4512 -Wundef -Wmissing-declarations -Wshadow) + +ocv_add_module(cudaarithm opencv_core OPTIONAL opencv_cudev WRAP python) + +ocv_module_include_directories() +ocv_glob_module_sources() + +set(extra_libs "") + +if(HAVE_CUBLAS) + list(APPEND extra_libs ${CUDA_cublas_LIBRARY}) +endif() + +if(HAVE_CUFFT) + list(APPEND extra_libs ${CUDA_cufft_LIBRARY}) +endif() + +ocv_create_module(${extra_libs}) + +ocv_add_accuracy_tests(DEPENDS_ON opencv_imgproc) +ocv_add_perf_tests(DEPENDS_ON opencv_imgproc) diff --git a/modules/cudaarithm/include/opencv2/cudaarithm.hpp b/modules/cudaarithm/include/opencv2/cudaarithm.hpp new file mode 100644 index 00000000000..5b19ea0f646 --- /dev/null +++ b/modules/cudaarithm/include/opencv2/cudaarithm.hpp @@ -0,0 +1,878 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_CUDAARITHM_HPP +#define OPENCV_CUDAARITHM_HPP + +#ifndef __cplusplus +# error cudaarithm.hpp header must be compiled as C++ +#endif + +#include "opencv2/core/cuda.hpp" + +/** + @addtogroup cuda + @{ + @defgroup cudaarithm Operations on Matrices + @{ + @defgroup cudaarithm_core Core Operations on Matrices + @defgroup cudaarithm_elem Per-element Operations + @defgroup cudaarithm_reduce Matrix Reductions + @defgroup cudaarithm_arithm Arithm Operations on Matrices + @} + @} + */ + +namespace cv { namespace cuda { + +//! @addtogroup cudaarithm +//! @{ + +//! @addtogroup cudaarithm_elem +//! @{ + +/** @brief Computes a matrix-matrix or matrix-scalar sum. + +@param src1 First source matrix or scalar. +@param src2 Second source matrix or scalar. Matrix should have the same size and type as src1 . +@param dst Destination matrix that has the same size and number of channels as the input array(s). +The depth is defined by dtype or src1 depth. +@param mask Optional operation mask, 8-bit single channel array, that specifies elements of the +destination array to be changed. The mask can be used only with single channel images. +@param dtype Optional depth of the output array. +@param stream Stream for the asynchronous version. + +@sa add + */ +CV_EXPORTS_W void add(InputArray src1, InputArray src2, OutputArray dst, InputArray mask = noArray(), int dtype = -1, Stream& stream = Stream::Null()); + +/** @brief Computes a matrix-matrix or matrix-scalar difference. + +@param src1 First source matrix or scalar. +@param src2 Second source matrix or scalar. Matrix should have the same size and type as src1 . +@param dst Destination matrix that has the same size and number of channels as the input array(s). +The depth is defined by dtype or src1 depth. +@param mask Optional operation mask, 8-bit single channel array, that specifies elements of the +destination array to be changed. The mask can be used only with single channel images. +@param dtype Optional depth of the output array. +@param stream Stream for the asynchronous version. + +@sa subtract + */ +CV_EXPORTS_W void subtract(InputArray src1, InputArray src2, OutputArray dst, InputArray mask = noArray(), int dtype = -1, Stream& stream = Stream::Null()); + +/** @brief Computes a matrix-matrix or matrix-scalar per-element product. + +@param src1 First source matrix or scalar. +@param src2 Second source matrix or scalar. +@param dst Destination matrix that has the same size and number of channels as the input array(s). +The depth is defined by dtype or src1 depth. +@param scale Optional scale factor. +@param dtype Optional depth of the output array. +@param stream Stream for the asynchronous version. + +@sa multiply + */ +CV_EXPORTS_W void multiply(InputArray src1, InputArray src2, OutputArray dst, double scale = 1, int dtype = -1, Stream& stream = Stream::Null()); + +/** @brief Computes a matrix-matrix or matrix-scalar division. + +@param src1 First source matrix or a scalar. +@param src2 Second source matrix or scalar. +@param dst Destination matrix that has the same size and number of channels as the input array(s). +The depth is defined by dtype or src1 depth. +@param scale Optional scale factor. +@param dtype Optional depth of the output array. +@param stream Stream for the asynchronous version. + +This function, in contrast to divide, uses a round-down rounding mode. + +@sa divide + */ +CV_EXPORTS_W void divide(InputArray src1, InputArray src2, OutputArray dst, double scale = 1, int dtype = -1, Stream& stream = Stream::Null()); + +/** @brief Computes per-element absolute difference of two matrices (or of a matrix and scalar). + +@param src1 First source matrix or scalar. +@param src2 Second source matrix or scalar. +@param dst Destination matrix that has the same size and type as the input array(s). +@param stream Stream for the asynchronous version. + +@sa absdiff + */ +CV_EXPORTS_W void absdiff(InputArray src1, InputArray src2, OutputArray dst, Stream& stream = Stream::Null()); + +/** @brief Computes an absolute value of each matrix element. + +@param src Source matrix. +@param dst Destination matrix with the same size and type as src . +@param stream Stream for the asynchronous version. + +@sa abs + */ +CV_EXPORTS_W void abs(InputArray src, OutputArray dst, Stream& stream = Stream::Null()); + +/** @brief Computes a square value of each matrix element. + +@param src Source matrix. +@param dst Destination matrix with the same size and type as src . +@param stream Stream for the asynchronous version. + */ +CV_EXPORTS_W void sqr(InputArray src, OutputArray dst, Stream& stream = Stream::Null()); + +/** @brief Computes a square root of each matrix element. + +@param src Source matrix. +@param dst Destination matrix with the same size and type as src . +@param stream Stream for the asynchronous version. + +@sa sqrt + */ +CV_EXPORTS_W void sqrt(InputArray src, OutputArray dst, Stream& stream = Stream::Null()); + +/** @brief Computes an exponent of each matrix element. + +@param src Source matrix. +@param dst Destination matrix with the same size and type as src . +@param stream Stream for the asynchronous version. + +@sa exp + */ +CV_EXPORTS_W void exp(InputArray src, OutputArray dst, Stream& stream = Stream::Null()); + +/** @brief Computes a natural logarithm of absolute value of each matrix element. + +@param src Source matrix. +@param dst Destination matrix with the same size and type as src . +@param stream Stream for the asynchronous version. + +@sa log + */ +CV_EXPORTS_W void log(InputArray src, OutputArray dst, Stream& stream = Stream::Null()); + +/** @brief Raises every matrix element to a power. + +@param src Source matrix. +@param power Exponent of power. +@param dst Destination matrix with the same size and type as src . +@param stream Stream for the asynchronous version. + +The function pow raises every element of the input matrix to power : + +\f[\texttt{dst} (I) = \fork{\texttt{src}(I)^power}{if \texttt{power} is integer}{|\texttt{src}(I)|^power}{otherwise}\f] + +@sa pow + */ +CV_EXPORTS_W void pow(InputArray src, double power, OutputArray dst, Stream& stream = Stream::Null()); + +/** @brief Compares elements of two matrices (or of a matrix and scalar). + +@param src1 First source matrix or scalar. +@param src2 Second source matrix or scalar. +@param dst Destination matrix that has the same size as the input array(s) and type CV_8U. +@param cmpop Flag specifying the relation between the elements to be checked: +- **CMP_EQ:** a(.) == b(.) +- **CMP_GT:** a(.) \> b(.) +- **CMP_GE:** a(.) \>= b(.) +- **CMP_LT:** a(.) \< b(.) +- **CMP_LE:** a(.) \<= b(.) +- **CMP_NE:** a(.) != b(.) +@param stream Stream for the asynchronous version. + +@sa compare + */ +CV_EXPORTS_W void compare(InputArray src1, InputArray src2, OutputArray dst, int cmpop, Stream& stream = Stream::Null()); + +/** @brief Performs a per-element bitwise inversion. + +@param src Source matrix. +@param dst Destination matrix with the same size and type as src . +@param mask Optional operation mask, 8-bit single channel array, that specifies elements of the +destination array to be changed. The mask can be used only with single channel images. +@param stream Stream for the asynchronous version. + */ +CV_EXPORTS_W void bitwise_not(InputArray src, OutputArray dst, InputArray mask = noArray(), Stream& stream = Stream::Null()); + +/** @brief Performs a per-element bitwise disjunction of two matrices (or of matrix and scalar). + +@param src1 First source matrix or scalar. +@param src2 Second source matrix or scalar. +@param dst Destination matrix that has the same size and type as the input array(s). +@param mask Optional operation mask, 8-bit single channel array, that specifies elements of the +destination array to be changed. The mask can be used only with single channel images. +@param stream Stream for the asynchronous version. + */ +CV_EXPORTS_W void bitwise_or(InputArray src1, InputArray src2, OutputArray dst, InputArray mask = noArray(), Stream& stream = Stream::Null()); + +/** @brief Performs a per-element bitwise conjunction of two matrices (or of matrix and scalar). + +@param src1 First source matrix or scalar. +@param src2 Second source matrix or scalar. +@param dst Destination matrix that has the same size and type as the input array(s). +@param mask Optional operation mask, 8-bit single channel array, that specifies elements of the +destination array to be changed. The mask can be used only with single channel images. +@param stream Stream for the asynchronous version. + */ +CV_EXPORTS_W void bitwise_and(InputArray src1, InputArray src2, OutputArray dst, InputArray mask = noArray(), Stream& stream = Stream::Null()); + +/** @brief Performs a per-element bitwise exclusive or operation of two matrices (or of matrix and scalar). + +@param src1 First source matrix or scalar. +@param src2 Second source matrix or scalar. +@param dst Destination matrix that has the same size and type as the input array(s). +@param mask Optional operation mask, 8-bit single channel array, that specifies elements of the +destination array to be changed. The mask can be used only with single channel images. +@param stream Stream for the asynchronous version. + */ +CV_EXPORTS_W void bitwise_xor(InputArray src1, InputArray src2, OutputArray dst, InputArray mask = noArray(), Stream& stream = Stream::Null()); + +/** @brief Performs pixel by pixel right shift of an image by a constant value. + +@param src Source matrix. Supports 1, 3 and 4 channels images with integers elements. +@param val Constant values, one per channel. +@param dst Destination matrix with the same size and type as src . +@param stream Stream for the asynchronous version. + */ +CV_EXPORTS void rshift(InputArray src, Scalar_ val, OutputArray dst, Stream& stream = Stream::Null()); + +/** @brief Performs pixel by pixel right left of an image by a constant value. + +@param src Source matrix. Supports 1, 3 and 4 channels images with CV_8U , CV_16U or CV_32S +depth. +@param val Constant values, one per channel. +@param dst Destination matrix with the same size and type as src . +@param stream Stream for the asynchronous version. + */ +CV_EXPORTS void lshift(InputArray src, Scalar_ val, OutputArray dst, Stream& stream = Stream::Null()); + +/** @brief Computes the per-element minimum of two matrices (or a matrix and a scalar). + +@param src1 First source matrix or scalar. +@param src2 Second source matrix or scalar. +@param dst Destination matrix that has the same size and type as the input array(s). +@param stream Stream for the asynchronous version. + +@sa min + */ +CV_EXPORTS_W void min(InputArray src1, InputArray src2, OutputArray dst, Stream& stream = Stream::Null()); + +/** @brief Computes the per-element maximum of two matrices (or a matrix and a scalar). + +@param src1 First source matrix or scalar. +@param src2 Second source matrix or scalar. +@param dst Destination matrix that has the same size and type as the input array(s). +@param stream Stream for the asynchronous version. + +@sa max + */ +CV_EXPORTS_W void max(InputArray src1, InputArray src2, OutputArray dst, Stream& stream = Stream::Null()); + +/** @brief Computes the weighted sum of two arrays. + +@param src1 First source array. +@param alpha Weight for the first array elements. +@param src2 Second source array of the same size and channel number as src1 . +@param beta Weight for the second array elements. +@param dst Destination array that has the same size and number of channels as the input arrays. +@param gamma Scalar added to each sum. +@param dtype Optional depth of the destination array. When both input arrays have the same depth, +dtype can be set to -1, which will be equivalent to src1.depth(). +@param stream Stream for the asynchronous version. + +The function addWeighted calculates the weighted sum of two arrays as follows: + +\f[\texttt{dst} (I)= \texttt{saturate} ( \texttt{src1} (I)* \texttt{alpha} + \texttt{src2} (I)* \texttt{beta} + \texttt{gamma} )\f] + +where I is a multi-dimensional index of array elements. In case of multi-channel arrays, each +channel is processed independently. + +@sa addWeighted + */ +CV_EXPORTS_W void addWeighted(InputArray src1, double alpha, InputArray src2, double beta, double gamma, OutputArray dst, + int dtype = -1, Stream& stream = Stream::Null()); + +//! adds scaled array to another one (dst = alpha*src1 + src2) +static inline void scaleAdd(InputArray src1, double alpha, InputArray src2, OutputArray dst, Stream& stream = Stream::Null()) +{ + addWeighted(src1, alpha, src2, 1.0, 0.0, dst, -1, stream); +} + +/** @brief Applies a fixed-level threshold to each array element. + +@param src Source array (single-channel). +@param dst Destination array with the same size and type as src . +@param thresh Threshold value. +@param maxval Maximum value to use with THRESH_BINARY and THRESH_BINARY_INV threshold types. +@param type Threshold type. For details, see threshold . The THRESH_OTSU and THRESH_TRIANGLE +threshold types are not supported. +@param stream Stream for the asynchronous version. + +@sa threshold + */ +CV_EXPORTS_W double threshold(InputArray src, OutputArray dst, double thresh, double maxval, int type, Stream& stream = Stream::Null()); + +/** @brief Computes magnitudes of complex matrix elements. + +@param xy Source complex matrix in the interleaved format ( CV_32FC2 ). +@param magnitude Destination matrix of float magnitudes ( CV_32FC1 ). +@param stream Stream for the asynchronous version. + +@sa magnitude + */ +CV_EXPORTS_W void magnitude(InputArray xy, OutputArray magnitude, Stream& stream = Stream::Null()); + +/** @brief Computes squared magnitudes of complex matrix elements. + +@param xy Source complex matrix in the interleaved format ( CV_32FC2 ). +@param magnitude Destination matrix of float magnitude squares ( CV_32FC1 ). +@param stream Stream for the asynchronous version. + */ +CV_EXPORTS_W void magnitudeSqr(InputArray xy, OutputArray magnitude, Stream& stream = Stream::Null()); + +/** @overload + computes magnitude of each (x(i), y(i)) vector + supports only floating-point source +@param x Source matrix containing real components ( CV_32FC1 ). +@param y Source matrix containing imaginary components ( CV_32FC1 ). +@param magnitude Destination matrix of float magnitudes ( CV_32FC1 ). +@param stream Stream for the asynchronous version. + */ +CV_EXPORTS_W void magnitude(InputArray x, InputArray y, OutputArray magnitude, Stream& stream = Stream::Null()); + +/** @overload + computes squared magnitude of each (x(i), y(i)) vector + supports only floating-point source +@param x Source matrix containing real components ( CV_32FC1 ). +@param y Source matrix containing imaginary components ( CV_32FC1 ). +@param magnitude Destination matrix of float magnitude squares ( CV_32FC1 ). +@param stream Stream for the asynchronous version. +*/ +CV_EXPORTS_W void magnitudeSqr(InputArray x, InputArray y, OutputArray magnitude, Stream& stream = Stream::Null()); + +/** @brief Computes polar angles of complex matrix elements. + +@param x Source matrix containing real components ( CV_32FC1 ). +@param y Source matrix containing imaginary components ( CV_32FC1 ). +@param angle Destination matrix of angles ( CV_32FC1 ). +@param angleInDegrees Flag for angles that must be evaluated in degrees. +@param stream Stream for the asynchronous version. + +@sa phase + */ +CV_EXPORTS_W void phase(InputArray x, InputArray y, OutputArray angle, bool angleInDegrees = false, Stream& stream = Stream::Null()); + +/** @brief Converts Cartesian coordinates into polar. + +@param x Source matrix containing real components ( CV_32FC1 ). +@param y Source matrix containing imaginary components ( CV_32FC1 ). +@param magnitude Destination matrix of float magnitudes ( CV_32FC1 ). +@param angle Destination matrix of angles ( CV_32FC1 ). +@param angleInDegrees Flag for angles that must be evaluated in degrees. +@param stream Stream for the asynchronous version. + +@sa cartToPolar + */ +CV_EXPORTS_W void cartToPolar(InputArray x, InputArray y, OutputArray magnitude, OutputArray angle, bool angleInDegrees = false, Stream& stream = Stream::Null()); + +/** @brief Converts polar coordinates into Cartesian. + +@param magnitude Source matrix containing magnitudes ( CV_32FC1 or CV_64FC1 ). +@param angle Source matrix containing angles ( same type as magnitude ). +@param x Destination matrix of real components ( same type as magnitude ). +@param y Destination matrix of imaginary components ( same type as magnitude ). +@param angleInDegrees Flag that indicates angles in degrees. +@param stream Stream for the asynchronous version. + */ +CV_EXPORTS_W void polarToCart(InputArray magnitude, InputArray angle, OutputArray x, OutputArray y, bool angleInDegrees = false, Stream& stream = Stream::Null()); + +//! @} cudaarithm_elem + +//! @addtogroup cudaarithm_core +//! @{ + +/** @brief Makes a multi-channel matrix out of several single-channel matrices. + +@param src Array/vector of source matrices. +@param n Number of source matrices. +@param dst Destination matrix. +@param stream Stream for the asynchronous version. + +@sa merge + */ +CV_EXPORTS_W void merge(const GpuMat* src, size_t n, OutputArray dst, Stream& stream = Stream::Null()); +/** @overload */ +CV_EXPORTS_W void merge(const std::vector& src, OutputArray dst, Stream& stream = Stream::Null()); + +/** @brief Copies each plane of a multi-channel matrix into an array. + +@param src Source matrix. +@param dst Destination array/vector of single-channel matrices. +@param stream Stream for the asynchronous version. + +@sa split + */ +CV_EXPORTS_W void split(InputArray src, GpuMat* dst, Stream& stream = Stream::Null()); +/** @overload */ +CV_EXPORTS_W void split(InputArray src, std::vector& dst, Stream& stream = Stream::Null()); + +/** @brief Transposes a matrix. + +@param src1 Source matrix. 1-, 4-, 8-byte element sizes are supported for now. +@param dst Destination matrix. +@param stream Stream for the asynchronous version. + +@sa transpose + */ +CV_EXPORTS_W void transpose(InputArray src1, OutputArray dst, Stream& stream = Stream::Null()); + +/** @brief Flips a 2D matrix around vertical, horizontal, or both axes. + +@param src Source matrix. Supports 1, 3 and 4 channels images with CV_8U, CV_16U, CV_32S or +CV_32F depth. +@param dst Destination matrix. +@param flipCode Flip mode for the source: +- 0 Flips around x-axis. +- \> 0 Flips around y-axis. +- \< 0 Flips around both axes. +@param stream Stream for the asynchronous version. + +@sa flip + */ +CV_EXPORTS_W void flip(InputArray src, OutputArray dst, int flipCode, Stream& stream = Stream::Null()); + +/** @brief Base class for transform using lookup table. + */ +class CV_EXPORTS_W LookUpTable : public Algorithm +{ +public: + /** @brief Transforms the source matrix into the destination matrix using the given look-up table: + dst(I) = lut(src(I)) . + + @param src Source matrix. CV_8UC1 and CV_8UC3 matrices are supported for now. + @param dst Destination matrix. + @param stream Stream for the asynchronous version. + */ + CV_WRAP virtual void transform(InputArray src, OutputArray dst, Stream& stream = Stream::Null()) = 0; +}; + +/** @brief Creates implementation for cuda::LookUpTable . + +@param lut Look-up table of 256 elements. It is a continuous CV_8U matrix. + */ +CV_EXPORTS_W Ptr createLookUpTable(InputArray lut); + +/** @brief Forms a border around an image. + +@param src Source image. CV_8UC1 , CV_8UC4 , CV_32SC1 , and CV_32FC1 types are supported. +@param dst Destination image with the same type as src. The size is +Size(src.cols+left+right, src.rows+top+bottom) . +@param top Number of top pixels +@param bottom Number of bottom pixels +@param left Number of left pixels +@param right Number of pixels in each direction from the source image rectangle to extrapolate. +For example: top=1, bottom=1, left=1, right=1 mean that 1 pixel-wide border needs to be built. +@param borderType Border type. See borderInterpolate for details. BORDER_REFLECT101 , +BORDER_REPLICATE , BORDER_CONSTANT , BORDER_REFLECT and BORDER_WRAP are supported for now. +@param value Border value. +@param stream Stream for the asynchronous version. + */ +CV_EXPORTS_W void copyMakeBorder(InputArray src, OutputArray dst, int top, int bottom, int left, int right, int borderType, + Scalar value = Scalar(), Stream& stream = Stream::Null()); + +//! @} cudaarithm_core + +//! @addtogroup cudaarithm_reduce +//! @{ + +/** @brief Returns the norm of a matrix (or difference of two matrices). + +@param src1 Source matrix. Any matrices except 64F are supported. +@param normType Norm type. NORM_L1 , NORM_L2 , and NORM_INF are supported for now. +@param mask optional operation mask; it must have the same size as src1 and CV_8UC1 type. + +@sa norm + */ +CV_EXPORTS_W double norm(InputArray src1, int normType, InputArray mask = noArray()); +/** @overload */ +CV_EXPORTS_W void calcNorm(InputArray src, OutputArray dst, int normType, InputArray mask = noArray(), Stream& stream = Stream::Null()); + +/** @brief Returns the difference of two matrices. + +@param src1 Source matrix. Any matrices except 64F are supported. +@param src2 Second source matrix (if any) with the same size and type as src1. +@param normType Norm type. NORM_L1 , NORM_L2 , and NORM_INF are supported for now. + +@sa norm + */ +CV_EXPORTS_W double norm(InputArray src1, InputArray src2, int normType=NORM_L2); +/** @overload */ +CV_EXPORTS_W void calcNormDiff(InputArray src1, InputArray src2, OutputArray dst, int normType=NORM_L2, Stream& stream = Stream::Null()); + +/** @brief Returns the sum of matrix elements. + +@param src Source image of any depth except for CV_64F . +@param mask optional operation mask; it must have the same size as src1 and CV_8UC1 type. + +@sa sum + */ +CV_EXPORTS_W Scalar sum(InputArray src, InputArray mask = noArray()); +/** @overload */ +CV_EXPORTS_W void calcSum(InputArray src, OutputArray dst, InputArray mask = noArray(), Stream& stream = Stream::Null()); + +/** @brief Returns the sum of absolute values for matrix elements. + +@param src Source image of any depth except for CV_64F . +@param mask optional operation mask; it must have the same size as src1 and CV_8UC1 type. + */ +CV_EXPORTS_W Scalar absSum(InputArray src, InputArray mask = noArray()); +/** @overload */ +CV_EXPORTS_W void calcAbsSum(InputArray src, OutputArray dst, InputArray mask = noArray(), Stream& stream = Stream::Null()); + +/** @brief Returns the squared sum of matrix elements. + +@param src Source image of any depth except for CV_64F . +@param mask optional operation mask; it must have the same size as src1 and CV_8UC1 type. + */ +CV_EXPORTS_W Scalar sqrSum(InputArray src, InputArray mask = noArray()); +/** @overload */ +CV_EXPORTS_W void calcSqrSum(InputArray src, OutputArray dst, InputArray mask = noArray(), Stream& stream = Stream::Null()); + +/** @brief Finds global minimum and maximum matrix elements and returns their values. + +@param src Single-channel source image. +@param minVal Pointer to the returned minimum value. Use NULL if not required. +@param maxVal Pointer to the returned maximum value. Use NULL if not required. +@param mask Optional mask to select a sub-matrix. + +The function does not work with CV_64F images on GPUs with the compute capability \< 1.3. + +@sa minMaxLoc + */ +CV_EXPORTS_W void minMax(InputArray src, double* minVal, double* maxVal, InputArray mask = noArray()); +/** @overload */ +CV_EXPORTS_W void findMinMax(InputArray src, OutputArray dst, InputArray mask = noArray(), Stream& stream = Stream::Null()); + +/** @brief Finds global minimum and maximum matrix elements and returns their values with locations. + +@param src Single-channel source image. +@param minVal Pointer to the returned minimum value. Use NULL if not required. +@param maxVal Pointer to the returned maximum value. Use NULL if not required. +@param minLoc Pointer to the returned minimum location. Use NULL if not required. +@param maxLoc Pointer to the returned maximum location. Use NULL if not required. +@param mask Optional mask to select a sub-matrix. + +The function does not work with CV_64F images on GPU with the compute capability \< 1.3. + +@sa minMaxLoc + */ +CV_EXPORTS_W void minMaxLoc(InputArray src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc, + InputArray mask = noArray()); +/** @overload */ +CV_EXPORTS_W void findMinMaxLoc(InputArray src, OutputArray minMaxVals, OutputArray loc, + InputArray mask = noArray(), Stream& stream = Stream::Null()); + +/** @brief Counts non-zero matrix elements. + +@param src Single-channel source image. + +The function does not work with CV_64F images on GPUs with the compute capability \< 1.3. + +@sa countNonZero + */ +CV_EXPORTS_W int countNonZero(InputArray src); +/** @overload */ +CV_EXPORTS_W void countNonZero(InputArray src, OutputArray dst, Stream& stream = Stream::Null()); + +/** @brief Reduces a matrix to a vector. + +@param mtx Source 2D matrix. +@param vec Destination vector. Its size and type is defined by dim and dtype parameters. +@param dim Dimension index along which the matrix is reduced. 0 means that the matrix is reduced +to a single row. 1 means that the matrix is reduced to a single column. +@param reduceOp Reduction operation that could be one of the following: +- **CV_REDUCE_SUM** The output is the sum of all rows/columns of the matrix. +- **CV_REDUCE_AVG** The output is the mean vector of all rows/columns of the matrix. +- **CV_REDUCE_MAX** The output is the maximum (column/row-wise) of all rows/columns of the +matrix. +- **CV_REDUCE_MIN** The output is the minimum (column/row-wise) of all rows/columns of the +matrix. +@param dtype When it is negative, the destination vector will have the same type as the source +matrix. Otherwise, its type will be CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), mtx.channels()) . +@param stream Stream for the asynchronous version. + +The function reduce reduces the matrix to a vector by treating the matrix rows/columns as a set of +1D vectors and performing the specified operation on the vectors until a single row/column is +obtained. For example, the function can be used to compute horizontal and vertical projections of a +raster image. In case of CV_REDUCE_SUM and CV_REDUCE_AVG , the output may have a larger element +bit-depth to preserve accuracy. And multi-channel arrays are also supported in these two reduction +modes. + +@sa reduce + */ +CV_EXPORTS_W void reduce(InputArray mtx, OutputArray vec, int dim, int reduceOp, int dtype = -1, Stream& stream = Stream::Null()); + +/** @brief Computes a mean value and a standard deviation of matrix elements. + +@param mtx Source matrix. CV_8UC1 matrices are supported for now. +@param mean Mean value. +@param stddev Standard deviation value. + +@sa meanStdDev + */ +CV_EXPORTS_W void meanStdDev(InputArray mtx, Scalar& mean, Scalar& stddev); +/** @overload */ +CV_EXPORTS_W void meanStdDev(InputArray mtx, OutputArray dst, Stream& stream = Stream::Null()); + +/** @brief Computes a standard deviation of integral images. + +@param src Source image. Only the CV_32SC1 type is supported. +@param sqr Squared source image. Only the CV_32FC1 type is supported. +@param dst Destination image with the same type and size as src . +@param rect Rectangular window. +@param stream Stream for the asynchronous version. + */ +CV_EXPORTS_W void rectStdDev(InputArray src, InputArray sqr, OutputArray dst, Rect rect, Stream& stream = Stream::Null()); + +/** @brief Normalizes the norm or value range of an array. + +@param src Input array. +@param dst Output array of the same size as src . +@param alpha Norm value to normalize to or the lower range boundary in case of the range +normalization. +@param beta Upper range boundary in case of the range normalization; it is not used for the norm +normalization. +@param norm_type Normalization type ( NORM_MINMAX , NORM_L2 , NORM_L1 or NORM_INF ). +@param dtype When negative, the output array has the same type as src; otherwise, it has the same +number of channels as src and the depth =CV_MAT_DEPTH(dtype). +@param mask Optional operation mask. +@param stream Stream for the asynchronous version. + +@sa normalize + */ +CV_EXPORTS_W void normalize(InputArray src, OutputArray dst, double alpha, double beta, + int norm_type, int dtype, InputArray mask = noArray(), + Stream& stream = Stream::Null()); + +/** @brief Computes an integral image. + +@param src Source image. Only CV_8UC1 images are supported for now. +@param sum Integral image containing 32-bit unsigned integer values packed into CV_32SC1 . +@param stream Stream for the asynchronous version. + +@sa integral + */ +CV_EXPORTS_W void integral(InputArray src, OutputArray sum, Stream& stream = Stream::Null()); + +/** @brief Computes a squared integral image. + +@param src Source image. Only CV_8UC1 images are supported for now. +@param sqsum Squared integral image containing 64-bit unsigned integer values packed into +CV_64FC1 . +@param stream Stream for the asynchronous version. + */ +CV_EXPORTS_W void sqrIntegral(InputArray src, OutputArray sqsum, Stream& stream = Stream::Null()); + +//! @} cudaarithm_reduce + +//! @addtogroup cudaarithm_arithm +//! @{ + +/** @brief Performs generalized matrix multiplication. + +@param src1 First multiplied input matrix that should have CV_32FC1 , CV_64FC1 , CV_32FC2 , or +CV_64FC2 type. +@param src2 Second multiplied input matrix of the same type as src1 . +@param alpha Weight of the matrix product. +@param src3 Third optional delta matrix added to the matrix product. It should have the same type +as src1 and src2 . +@param beta Weight of src3 . +@param dst Destination matrix. It has the proper size and the same type as input matrices. +@param flags Operation flags: +- **GEMM_1_T** transpose src1 +- **GEMM_2_T** transpose src2 +- **GEMM_3_T** transpose src3 +@param stream Stream for the asynchronous version. + +The function performs generalized matrix multiplication similar to the gemm functions in BLAS level +3. For example, gemm(src1, src2, alpha, src3, beta, dst, GEMM_1_T + GEMM_3_T) corresponds to + +\f[\texttt{dst} = \texttt{alpha} \cdot \texttt{src1} ^T \cdot \texttt{src2} + \texttt{beta} \cdot \texttt{src3} ^T\f] + +@note Transposition operation doesn't support CV_64FC2 input type. + +@sa gemm + */ +CV_EXPORTS_W void gemm(InputArray src1, InputArray src2, double alpha, + InputArray src3, double beta, OutputArray dst, int flags = 0, Stream& stream = Stream::Null()); + +/** @brief Performs a per-element multiplication of two Fourier spectrums. + +@param src1 First spectrum. +@param src2 Second spectrum with the same size and type as a . +@param dst Destination spectrum. +@param flags Mock parameter used for CPU/CUDA interfaces similarity. +@param conjB Optional flag to specify if the second spectrum needs to be conjugated before the +multiplication. +@param stream Stream for the asynchronous version. + +Only full (not packed) CV_32FC2 complex spectrums in the interleaved format are supported for now. + +@sa mulSpectrums + */ +CV_EXPORTS_W void mulSpectrums(InputArray src1, InputArray src2, OutputArray dst, int flags, bool conjB=false, Stream& stream = Stream::Null()); + +/** @brief Performs a per-element multiplication of two Fourier spectrums and scales the result. + +@param src1 First spectrum. +@param src2 Second spectrum with the same size and type as a . +@param dst Destination spectrum. +@param flags Mock parameter used for CPU/CUDA interfaces similarity, simply add a `0` value. +@param scale Scale constant. +@param conjB Optional flag to specify if the second spectrum needs to be conjugated before the +multiplication. +@param stream Stream for the asynchronous version. + +Only full (not packed) CV_32FC2 complex spectrums in the interleaved format are supported for now. + +@sa mulSpectrums + */ +CV_EXPORTS_W void mulAndScaleSpectrums(InputArray src1, InputArray src2, OutputArray dst, int flags, float scale, bool conjB=false, Stream& stream = Stream::Null()); + +/** @brief Performs a forward or inverse discrete Fourier transform (1D or 2D) of the floating point matrix. + +@param src Source matrix (real or complex). +@param dst Destination matrix (real or complex). +@param dft_size Size of a discrete Fourier transform. +@param flags Optional flags: +- **DFT_ROWS** transforms each individual row of the source matrix. +- **DFT_SCALE** scales the result: divide it by the number of elements in the transform +(obtained from dft_size ). +- **DFT_INVERSE** inverts DFT. Use for complex-complex cases (real-complex and complex-real +cases are always forward and inverse, respectively). +- **DFT_COMPLEX_INPUT** Specifies that input is complex input with 2 channels. +- **DFT_REAL_OUTPUT** specifies the output as real. The source matrix is the result of +real-complex transform, so the destination matrix must be real. +@param stream Stream for the asynchronous version. + +Use to handle real matrices ( CV32FC1 ) and complex matrices in the interleaved format ( CV32FC2 ). + +The source matrix should be continuous, otherwise reallocation and data copying is performed. The +function chooses an operation mode depending on the flags, size, and channel count of the source +matrix: + +- If the source matrix is complex and the output is not specified as real, the destination +matrix is complex and has the dft_size size and CV_32FC2 type. The destination matrix +contains a full result of the DFT (forward or inverse). +- If the source matrix is complex and the output is specified as real, the function assumes that +its input is the result of the forward transform (see the next item). The destination matrix +has the dft_size size and CV_32FC1 type. It contains the result of the inverse DFT. +- If the source matrix is real (its type is CV_32FC1 ), forward DFT is performed. The result of +the DFT is packed into complex ( CV_32FC2 ) matrix. So, the width of the destination matrix +is dft_size.width / 2 + 1 . But if the source is a single column, the height is reduced +instead of the width. + +@sa dft + */ +CV_EXPORTS_W void dft(InputArray src, OutputArray dst, Size dft_size, int flags=0, Stream& stream = Stream::Null()); + +/** @brief Base class for DFT operator as a cv::Algorithm. : + */ +class CV_EXPORTS_W DFT : public Algorithm +{ +public: + /** @brief Computes an FFT of a given image. + + @param image Source image. Only CV_32FC1 images are supported for now. + @param result Result image. + @param stream Stream for the asynchronous version. + */ + CV_WRAP virtual void compute(InputArray image, OutputArray result, Stream& stream = Stream::Null()) = 0; +}; + +/** @brief Creates implementation for cuda::DFT. + +@param dft_size The image size. +@param flags Optional flags: +- **DFT_ROWS** transforms each individual row of the source matrix. +- **DFT_SCALE** scales the result: divide it by the number of elements in the transform +(obtained from dft_size ). +- **DFT_INVERSE** inverts DFT. Use for complex-complex cases (real-complex and complex-real +cases are always forward and inverse, respectively). +- **DFT_COMPLEX_INPUT** Specifies that inputs will be complex with 2 channels. +- **DFT_REAL_OUTPUT** specifies the output as real. The source matrix is the result of +real-complex transform, so the destination matrix must be real. + */ +CV_EXPORTS_W Ptr createDFT(Size dft_size, int flags); + +/** @brief Base class for convolution (or cross-correlation) operator. : + */ +class CV_EXPORTS_W Convolution : public Algorithm +{ +public: + /** @brief Computes a convolution (or cross-correlation) of two images. + + @param image Source image. Only CV_32FC1 images are supported for now. + @param templ Template image. The size is not greater than the image size. The type is the same as + image . + @param result Result image. If image is *W x H* and templ is *w x h*, then result must be *W-w+1 x + H-h+1*. + @param ccorr Flags to evaluate cross-correlation instead of convolution. + @param stream Stream for the asynchronous version. + */ + virtual void convolve(InputArray image, InputArray templ, OutputArray result, bool ccorr = false, Stream& stream = Stream::Null()) = 0; +}; + +/** @brief Creates implementation for cuda::Convolution . + +@param user_block_size Block size. If you leave default value Size(0,0) then automatic +estimation of block size will be used (which is optimized for speed). By varying user_block_size +you can reduce memory requirements at the cost of speed. + */ +CV_EXPORTS_W Ptr createConvolution(Size user_block_size = Size()); + +//! @} cudaarithm_arithm + +//! @} cudaarithm + +}} // namespace cv { namespace cuda { + +#endif /* OPENCV_CUDAARITHM_HPP */ diff --git a/modules/cudaarithm/perf/perf_arithm.cpp b/modules/cudaarithm/perf/perf_arithm.cpp new file mode 100644 index 00000000000..ca23e19dc14 --- /dev/null +++ b/modules/cudaarithm/perf/perf_arithm.cpp @@ -0,0 +1,254 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "perf_precomp.hpp" + +namespace opencv_test { namespace { + +////////////////////////////////////////////////////////////////////// +// GEMM + +#ifdef HAVE_CUBLAS + +CV_FLAGS(GemmFlags, 0, cv::GEMM_1_T, cv::GEMM_2_T, cv::GEMM_3_T) +#define ALL_GEMM_FLAGS Values(GemmFlags(0), GemmFlags(cv::GEMM_1_T), GemmFlags(cv::GEMM_2_T), GemmFlags(cv::GEMM_3_T), \ + GemmFlags(cv::GEMM_1_T | cv::GEMM_2_T), GemmFlags(cv::GEMM_1_T | cv::GEMM_3_T), GemmFlags(cv::GEMM_1_T | cv::GEMM_2_T | cv::GEMM_3_T)) + +DEF_PARAM_TEST(Sz_Type_Flags, cv::Size, MatType, GemmFlags); + +PERF_TEST_P(Sz_Type_Flags, GEMM, + Combine(Values(cv::Size(512, 512), cv::Size(1024, 1024)), + Values(CV_32FC1, CV_32FC2, CV_64FC1), + ALL_GEMM_FLAGS)) +{ + const cv::Size size = GET_PARAM(0); + const int type = GET_PARAM(1); + const int flags = GET_PARAM(2); + + cv::Mat src1(size, type); + declare.in(src1, WARMUP_RNG); + + cv::Mat src2(size, type); + declare.in(src2, WARMUP_RNG); + + cv::Mat src3(size, type); + declare.in(src3, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + declare.time(5.0); + + const cv::cuda::GpuMat d_src1(src1); + const cv::cuda::GpuMat d_src2(src2); + const cv::cuda::GpuMat d_src3(src3); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, dst, flags); + + CUDA_SANITY_CHECK(dst, 1e-6, ERROR_RELATIVE); + } + else + { + declare.time(50.0); + + cv::Mat dst; + + TEST_CYCLE() cv::gemm(src1, src2, 1.0, src3, 1.0, dst, flags); + + CPU_SANITY_CHECK(dst); + } +} + +#endif + +////////////////////////////////////////////////////////////////////// +// MulSpectrums + +CV_FLAGS(DftFlags, 0, cv::DFT_INVERSE, cv::DFT_SCALE, cv::DFT_ROWS, cv::DFT_COMPLEX_OUTPUT, cv::DFT_REAL_OUTPUT) + +DEF_PARAM_TEST(Sz_Flags, cv::Size, DftFlags); + +PERF_TEST_P(Sz_Flags, MulSpectrums, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(0, DftFlags(cv::DFT_ROWS)))) +{ + const cv::Size size = GET_PARAM(0); + const int flag = GET_PARAM(1); + + cv::Mat a(size, CV_32FC2); + cv::Mat b(size, CV_32FC2); + declare.in(a, b, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_a(a); + const cv::cuda::GpuMat d_b(b); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::mulSpectrums(d_a, d_b, dst, flag); + + CUDA_SANITY_CHECK(dst, 1e-6, ERROR_RELATIVE); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::mulSpectrums(a, b, dst, flag); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// MulAndScaleSpectrums + +PERF_TEST_P(Sz, MulAndScaleSpectrums, + CUDA_TYPICAL_MAT_SIZES) +{ + const cv::Size size = GetParam(); + + const float scale = 1.f / size.area(); + + cv::Mat src1(size, CV_32FC2); + cv::Mat src2(size, CV_32FC2); + declare.in(src1,src2, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src1(src1); + const cv::cuda::GpuMat d_src2(src2); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::mulAndScaleSpectrums(d_src1, d_src2, dst, cv::DFT_ROWS, scale, false); + + CUDA_SANITY_CHECK(dst, 1e-6, ERROR_RELATIVE); + } + else + { + FAIL_NO_CPU(); + } +} + +////////////////////////////////////////////////////////////////////// +// Dft + +PERF_TEST_P(Sz_Flags, Dft, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(0, DftFlags(cv::DFT_ROWS), DftFlags(cv::DFT_INVERSE)))) +{ + declare.time(10.0); + + const cv::Size size = GET_PARAM(0); + const int flag = GET_PARAM(1); + + cv::Mat src(size, CV_32FC2); + declare.in(src, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::dft(d_src, dst, size, flag); + + CUDA_SANITY_CHECK(dst, 1e-6, ERROR_RELATIVE); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::dft(src, dst, flag); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// Convolve + +DEF_PARAM_TEST(Sz_KernelSz_Ccorr, cv::Size, int, bool); + +PERF_TEST_P(Sz_KernelSz_Ccorr, Convolve, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(17, 27, 32, 64), + Bool())) +{ + declare.time(10.0); + + const cv::Size size = GET_PARAM(0); + const int templ_size = GET_PARAM(1); + const bool ccorr = GET_PARAM(2); + + const cv::Mat image(size, CV_32FC1); + const cv::Mat templ(templ_size, templ_size, CV_32FC1); + declare.in(image, templ, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + cv::cuda::GpuMat d_image = cv::cuda::createContinuous(size, CV_32FC1); + d_image.upload(image); + + cv::cuda::GpuMat d_templ = cv::cuda::createContinuous(templ_size, templ_size, CV_32FC1); + d_templ.upload(templ); + + cv::Ptr convolution = cv::cuda::createConvolution(); + + cv::cuda::GpuMat dst; + + TEST_CYCLE() convolution->convolve(d_image, d_templ, dst, ccorr); + + CUDA_SANITY_CHECK(dst, 1e-6, ERROR_RELATIVE); + } + else + { + if (ccorr) + FAIL_NO_CPU(); + + cv::Mat dst; + + TEST_CYCLE() cv::filter2D(image, dst, image.depth(), templ); + + CPU_SANITY_CHECK(dst); + } +} + +}} // namespace diff --git a/modules/cudaarithm/perf/perf_core.cpp b/modules/cudaarithm/perf/perf_core.cpp new file mode 100644 index 00000000000..bc9f0e2f715 --- /dev/null +++ b/modules/cudaarithm/perf/perf_core.cpp @@ -0,0 +1,323 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "perf_precomp.hpp" + +namespace opencv_test { namespace { + +#define ARITHM_MAT_DEPTH Values(CV_8U, CV_16U, CV_32F, CV_64F) + +////////////////////////////////////////////////////////////////////// +// Merge + +DEF_PARAM_TEST(Sz_Depth_Cn, cv::Size, MatDepth, MatCn); + +PERF_TEST_P(Sz_Depth_Cn, Merge, + Combine(CUDA_TYPICAL_MAT_SIZES, + ARITHM_MAT_DEPTH, + Values(2, 3, 4))) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + const int channels = GET_PARAM(2); + + std::vector src(channels); + for (int i = 0; i < channels; ++i) + { + src[i].create(size, depth); + declare.in(src[i], WARMUP_RNG); + } + + if (PERF_RUN_CUDA()) + { + std::vector d_src(channels); + for (int i = 0; i < channels; ++i) + d_src[i].upload(src[i]); + + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::merge(d_src, dst); + + CUDA_SANITY_CHECK(dst, 1e-10); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::merge(src, dst); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// Split + +PERF_TEST_P(Sz_Depth_Cn, Split, + Combine(CUDA_TYPICAL_MAT_SIZES, + ARITHM_MAT_DEPTH, + Values(2, 3, 4))) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + const int channels = GET_PARAM(2); + + cv::Mat src(size, CV_MAKE_TYPE(depth, channels)); + declare.in(src, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + std::vector dst; + + TEST_CYCLE() cv::cuda::split(d_src, dst); + + const cv::cuda::GpuMat& dst0 = dst[0]; + const cv::cuda::GpuMat& dst1 = dst[1]; + + CUDA_SANITY_CHECK(dst0, 1e-10); + CUDA_SANITY_CHECK(dst1, 1e-10); + } + else + { + std::vector dst; + + TEST_CYCLE() cv::split(src, dst); + + const cv::Mat& dst0 = dst[0]; + const cv::Mat& dst1 = dst[1]; + + CPU_SANITY_CHECK(dst0); + CPU_SANITY_CHECK(dst1); + } +} + +////////////////////////////////////////////////////////////////////// +// Transpose + +PERF_TEST_P(Sz_Type, Transpose, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8UC1, CV_8UC4, CV_16UC2, CV_16SC2, CV_32SC1, CV_32SC2, CV_64FC1))) +{ + const cv::Size size = GET_PARAM(0); + const int type = GET_PARAM(1); + + cv::Mat src(size, type); + declare.in(src, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::transpose(d_src, dst); + + CUDA_SANITY_CHECK(dst, 1e-10); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::transpose(src, dst); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// Flip + +enum {FLIP_BOTH = 0, FLIP_X = 1, FLIP_Y = -1}; +CV_ENUM(FlipCode, FLIP_BOTH, FLIP_X, FLIP_Y) + +DEF_PARAM_TEST(Sz_Depth_Cn_Code, cv::Size, MatDepth, MatCn, FlipCode); + +PERF_TEST_P(Sz_Depth_Cn_Code, Flip, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F), + CUDA_CHANNELS_1_3_4, + FlipCode::all())) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + const int channels = GET_PARAM(2); + const int flipCode = GET_PARAM(3); + + const int type = CV_MAKE_TYPE(depth, channels); + + cv::Mat src(size, type); + declare.in(src, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::flip(d_src, dst, flipCode); + + CUDA_SANITY_CHECK(dst); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::flip(src, dst, flipCode); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// LutOneChannel + +PERF_TEST_P(Sz_Type, LutOneChannel, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8UC1, CV_8UC3))) +{ + const cv::Size size = GET_PARAM(0); + const int type = GET_PARAM(1); + + cv::Mat src(size, type); + declare.in(src, WARMUP_RNG); + + cv::Mat lut(1, 256, CV_8UC1); + declare.in(lut, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + cv::Ptr lutAlg = cv::cuda::createLookUpTable(lut); + + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() lutAlg->transform(d_src, dst); + + CUDA_SANITY_CHECK(dst); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::LUT(src, lut, dst); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// LutMultiChannel + +PERF_TEST_P(Sz_Type, LutMultiChannel, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8UC3))) +{ + const cv::Size size = GET_PARAM(0); + const int type = GET_PARAM(1); + + cv::Mat src(size, type); + declare.in(src, WARMUP_RNG); + + cv::Mat lut(1, 256, CV_MAKE_TYPE(CV_8U, src.channels())); + declare.in(lut, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + cv::Ptr lutAlg = cv::cuda::createLookUpTable(lut); + + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() lutAlg->transform(d_src, dst); + + CUDA_SANITY_CHECK(dst); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::LUT(src, lut, dst); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// CopyMakeBorder + +DEF_PARAM_TEST(Sz_Depth_Cn_Border, cv::Size, MatDepth, MatCn, BorderMode); + +PERF_TEST_P(Sz_Depth_Cn_Border, CopyMakeBorder, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F), + CUDA_CHANNELS_1_3_4, + ALL_BORDER_MODES)) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + const int channels = GET_PARAM(2); + const int borderMode = GET_PARAM(3); + + const int type = CV_MAKE_TYPE(depth, channels); + + cv::Mat src(size, type); + declare.in(src, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::copyMakeBorder(d_src, dst, 5, 5, 5, 5, borderMode); + + CUDA_SANITY_CHECK(dst); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::copyMakeBorder(src, dst, 5, 5, 5, 5, borderMode); + + CPU_SANITY_CHECK(dst); + } +} + +}} // namespace diff --git a/modules/cudaarithm/perf/perf_element_operations.cpp b/modules/cudaarithm/perf/perf_element_operations.cpp new file mode 100644 index 00000000000..9aa2d4e4e0f --- /dev/null +++ b/modules/cudaarithm/perf/perf_element_operations.cpp @@ -0,0 +1,1504 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "perf_precomp.hpp" + +namespace opencv_test { namespace { + +#define ARITHM_MAT_DEPTH Values(CV_8U, CV_16U, CV_32F, CV_64F) + +////////////////////////////////////////////////////////////////////// +// AddMat + +DEF_PARAM_TEST(Sz_Depth, cv::Size, MatDepth); + +PERF_TEST_P(Sz_Depth, AddMat, + Combine(CUDA_TYPICAL_MAT_SIZES, + ARITHM_MAT_DEPTH)) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + + cv::Mat src1(size, depth); + declare.in(src1, WARMUP_RNG); + + cv::Mat src2(size, depth); + declare.in(src2, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src1(src1); + const cv::cuda::GpuMat d_src2(src2); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::add(d_src1, d_src2, dst); + + CUDA_SANITY_CHECK(dst, 1e-10); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::add(src1, src2, dst); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// AddScalar + +PERF_TEST_P(Sz_Depth, AddScalar, + Combine(CUDA_TYPICAL_MAT_SIZES, + ARITHM_MAT_DEPTH)) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + + cv::Mat src(size, depth); + declare.in(src, WARMUP_RNG); + + cv::Scalar s; + declare.in(s, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::add(d_src, s, dst); + + CUDA_SANITY_CHECK(dst, 1e-10); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::add(src, s, dst); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// SubtractMat + +PERF_TEST_P(Sz_Depth, SubtractMat, + Combine(CUDA_TYPICAL_MAT_SIZES, + ARITHM_MAT_DEPTH)) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + + cv::Mat src1(size, depth); + declare.in(src1, WARMUP_RNG); + + cv::Mat src2(size, depth); + declare.in(src2, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src1(src1); + const cv::cuda::GpuMat d_src2(src2); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::subtract(d_src1, d_src2, dst); + + CUDA_SANITY_CHECK(dst, 1e-10); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::subtract(src1, src2, dst); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// SubtractScalar + +PERF_TEST_P(Sz_Depth, SubtractScalar, + Combine(CUDA_TYPICAL_MAT_SIZES, + ARITHM_MAT_DEPTH)) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + + cv::Mat src(size, depth); + declare.in(src, WARMUP_RNG); + + cv::Scalar s; + declare.in(s, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::subtract(d_src, s, dst); + + CUDA_SANITY_CHECK(dst, 1e-10); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::subtract(src, s, dst); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// MultiplyMat + +PERF_TEST_P(Sz_Depth, MultiplyMat, + Combine(CUDA_TYPICAL_MAT_SIZES, + ARITHM_MAT_DEPTH)) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + + cv::Mat src1(size, depth); + declare.in(src1, WARMUP_RNG); + + cv::Mat src2(size, depth); + declare.in(src2, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src1(src1); + const cv::cuda::GpuMat d_src2(src2); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::multiply(d_src1, d_src2, dst); + + CUDA_SANITY_CHECK(dst, 1e-6); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::multiply(src1, src2, dst); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// MultiplyScalar + +PERF_TEST_P(Sz_Depth, MultiplyScalar, + Combine(CUDA_TYPICAL_MAT_SIZES, + ARITHM_MAT_DEPTH)) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + + cv::Mat src(size, depth); + declare.in(src, WARMUP_RNG); + + cv::Scalar s; + declare.in(s, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::multiply(d_src, s, dst); + + CUDA_SANITY_CHECK(dst, 1e-6); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::multiply(src, s, dst); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// DivideMat + +PERF_TEST_P(Sz_Depth, DivideMat, + Combine(CUDA_TYPICAL_MAT_SIZES, + ARITHM_MAT_DEPTH)) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + + cv::Mat src1(size, depth); + declare.in(src1, WARMUP_RNG); + + cv::Mat src2(size, depth); + declare.in(src2, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src1(src1); + const cv::cuda::GpuMat d_src2(src2); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::divide(d_src1, d_src2, dst); + + CUDA_SANITY_CHECK(dst, 1e-6); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::divide(src1, src2, dst); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// DivideScalar + +PERF_TEST_P(Sz_Depth, DivideScalar, + Combine(CUDA_TYPICAL_MAT_SIZES, + ARITHM_MAT_DEPTH)) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + + cv::Mat src(size, depth); + declare.in(src, WARMUP_RNG); + + cv::Scalar s; + declare.in(s, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::divide(d_src, s, dst); + + CUDA_SANITY_CHECK(dst, 1e-6); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::divide(src, s, dst); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// DivideScalarInv + +PERF_TEST_P(Sz_Depth, DivideScalarInv, + Combine(CUDA_TYPICAL_MAT_SIZES, + ARITHM_MAT_DEPTH)) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + + cv::Mat src(size, depth); + declare.in(src, WARMUP_RNG); + + cv::Scalar s; + declare.in(s, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::divide(s[0], d_src, dst); + + CUDA_SANITY_CHECK(dst, 1e-6); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::divide(s, src, dst); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// AbsDiffMat + +PERF_TEST_P(Sz_Depth, AbsDiffMat, + Combine(CUDA_TYPICAL_MAT_SIZES, + ARITHM_MAT_DEPTH)) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + + cv::Mat src1(size, depth); + declare.in(src1, WARMUP_RNG); + + cv::Mat src2(size, depth); + declare.in(src2, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src1(src1); + const cv::cuda::GpuMat d_src2(src2); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::absdiff(d_src1, d_src2, dst); + + CUDA_SANITY_CHECK(dst, 1e-10); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::absdiff(src1, src2, dst); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// AbsDiffScalar + +PERF_TEST_P(Sz_Depth, AbsDiffScalar, + Combine(CUDA_TYPICAL_MAT_SIZES, + ARITHM_MAT_DEPTH)) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + + cv::Mat src(size, depth); + declare.in(src, WARMUP_RNG); + + cv::Scalar s; + declare.in(s, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::absdiff(d_src, s, dst); + + CUDA_SANITY_CHECK(dst, 1e-10); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::absdiff(src, s, dst); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// Abs + +PERF_TEST_P(Sz_Depth, Abs, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_16S, CV_32F))) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + + cv::Mat src(size, depth); + declare.in(src, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::abs(d_src, dst); + + CUDA_SANITY_CHECK(dst); + } + else + { + FAIL_NO_CPU(); + } +} + +////////////////////////////////////////////////////////////////////// +// Sqr + +PERF_TEST_P(Sz_Depth, Sqr, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16S, CV_32F))) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + + cv::Mat src(size, depth); + declare.in(src, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::sqr(d_src, dst); + + CUDA_SANITY_CHECK(dst); + } + else + { + FAIL_NO_CPU(); + } +} + +////////////////////////////////////////////////////////////////////// +// Sqrt + +PERF_TEST_P(Sz_Depth, Sqrt, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16S, CV_32F))) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + + cv::Mat src(size, depth); + cv::randu(src, 0, 100000); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::sqrt(d_src, dst); + + CUDA_SANITY_CHECK(dst); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::sqrt(src, dst); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// Log + +PERF_TEST_P(Sz_Depth, Log, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16S, CV_32F))) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + + cv::Mat src(size, depth); + cv::randu(src, 0, 100000); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::log(d_src, dst); + + CUDA_SANITY_CHECK(dst); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::log(src, dst); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// Exp + +PERF_TEST_P(Sz_Depth, Exp, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16S, CV_32F))) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + + cv::Mat src(size, depth); + cv::randu(src, 0, 10); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::exp(d_src, dst); + + CUDA_SANITY_CHECK(dst); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::exp(src, dst); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// Pow + +DEF_PARAM_TEST(Sz_Depth_Power, cv::Size, MatDepth, double); + +PERF_TEST_P(Sz_Depth_Power, Pow, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16S, CV_32F), + Values(0.3, 2.0, 2.4))) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + const double power = GET_PARAM(2); + + cv::Mat src(size, depth); + declare.in(src, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::pow(d_src, power, dst); + + CUDA_SANITY_CHECK(dst); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::pow(src, power, dst); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// CompareMat + +CV_ENUM(CmpCode, cv::CMP_EQ, cv::CMP_GT, cv::CMP_GE, cv::CMP_LT, cv::CMP_LE, cv::CMP_NE) + +DEF_PARAM_TEST(Sz_Depth_Code, cv::Size, MatDepth, CmpCode); + +PERF_TEST_P(Sz_Depth_Code, CompareMat, + Combine(CUDA_TYPICAL_MAT_SIZES, + ARITHM_MAT_DEPTH, + CmpCode::all())) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + const int cmp_code = GET_PARAM(2); + + cv::Mat src1(size, depth); + declare.in(src1, WARMUP_RNG); + + cv::Mat src2(size, depth); + declare.in(src2, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src1(src1); + const cv::cuda::GpuMat d_src2(src2); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::compare(d_src1, d_src2, dst, cmp_code); + + CUDA_SANITY_CHECK(dst); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::compare(src1, src2, dst, cmp_code); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// CompareScalar + +PERF_TEST_P(Sz_Depth_Code, CompareScalar, + Combine(CUDA_TYPICAL_MAT_SIZES, + ARITHM_MAT_DEPTH, + CmpCode::all())) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + const int cmp_code = GET_PARAM(2); + + cv::Mat src(size, depth); + declare.in(src, WARMUP_RNG); + + cv::Scalar s; + declare.in(s, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::compare(d_src, s, dst, cmp_code); + + CUDA_SANITY_CHECK(dst); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::compare(src, s, dst, cmp_code); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// BitwiseNot + +PERF_TEST_P(Sz_Depth, BitwiseNot, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32S))) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + + cv::Mat src(size, depth); + declare.in(src, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::bitwise_not(d_src, dst); + + CUDA_SANITY_CHECK(dst); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::bitwise_not(src, dst); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// BitwiseAndMat + +PERF_TEST_P(Sz_Depth, BitwiseAndMat, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32S))) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + + cv::Mat src1(size, depth); + declare.in(src1, WARMUP_RNG); + + cv::Mat src2(size, depth); + declare.in(src2, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src1(src1); + const cv::cuda::GpuMat d_src2(src2); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::bitwise_and(d_src1, d_src2, dst); + + CUDA_SANITY_CHECK(dst); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::bitwise_and(src1, src2, dst); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// BitwiseAndScalar + +DEF_PARAM_TEST(Sz_Depth_Cn, cv::Size, MatDepth, MatCn); + +PERF_TEST_P(Sz_Depth_Cn, BitwiseAndScalar, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32S), + CUDA_CHANNELS_1_3_4)) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + const int channels = GET_PARAM(2); + + const int type = CV_MAKE_TYPE(depth, channels); + + cv::Mat src(size, type); + declare.in(src, WARMUP_RNG); + + cv::Scalar s; + declare.in(s, WARMUP_RNG); + cv::Scalar_ is = s; + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::bitwise_and(d_src, is, dst); + + CUDA_SANITY_CHECK(dst); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::bitwise_and(src, is, dst); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// BitwiseOrMat + +PERF_TEST_P(Sz_Depth, BitwiseOrMat, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32S))) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + + cv::Mat src1(size, depth); + declare.in(src1, WARMUP_RNG); + + cv::Mat src2(size, depth); + declare.in(src2, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src1(src1); + const cv::cuda::GpuMat d_src2(src2); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::bitwise_or(d_src1, d_src2, dst); + + CUDA_SANITY_CHECK(dst); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::bitwise_or(src1, src2, dst); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// BitwiseOrScalar + +PERF_TEST_P(Sz_Depth_Cn, BitwiseOrScalar, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32S), + CUDA_CHANNELS_1_3_4)) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + const int channels = GET_PARAM(2); + + const int type = CV_MAKE_TYPE(depth, channels); + + cv::Mat src(size, type); + declare.in(src, WARMUP_RNG); + + cv::Scalar s; + declare.in(s, WARMUP_RNG); + cv::Scalar_ is = s; + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::bitwise_or(d_src, is, dst); + + CUDA_SANITY_CHECK(dst); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::bitwise_or(src, is, dst); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// BitwiseXorMat + +PERF_TEST_P(Sz_Depth, BitwiseXorMat, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32S))) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + + cv::Mat src1(size, depth); + declare.in(src1, WARMUP_RNG); + + cv::Mat src2(size, depth); + declare.in(src2, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src1(src1); + const cv::cuda::GpuMat d_src2(src2); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::bitwise_xor(d_src1, d_src2, dst); + + CUDA_SANITY_CHECK(dst); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::bitwise_xor(src1, src2, dst); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// BitwiseXorScalar + +PERF_TEST_P(Sz_Depth_Cn, BitwiseXorScalar, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32S), + CUDA_CHANNELS_1_3_4)) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + const int channels = GET_PARAM(2); + + const int type = CV_MAKE_TYPE(depth, channels); + + cv::Mat src(size, type); + declare.in(src, WARMUP_RNG); + + cv::Scalar s; + declare.in(s, WARMUP_RNG); + cv::Scalar_ is = s; + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::bitwise_xor(d_src, is, dst); + + CUDA_SANITY_CHECK(dst); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::bitwise_xor(src, is, dst); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// RShift + +PERF_TEST_P(Sz_Depth_Cn, RShift, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32S), + CUDA_CHANNELS_1_3_4)) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + const int channels = GET_PARAM(2); + + const int type = CV_MAKE_TYPE(depth, channels); + + cv::Mat src(size, type); + declare.in(src, WARMUP_RNG); + + const cv::Scalar_ val = cv::Scalar_::all(4); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::rshift(d_src, val, dst); + + CUDA_SANITY_CHECK(dst); + } + else + { + FAIL_NO_CPU(); + } +} + +////////////////////////////////////////////////////////////////////// +// LShift + +PERF_TEST_P(Sz_Depth_Cn, LShift, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32S), + CUDA_CHANNELS_1_3_4)) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + const int channels = GET_PARAM(2); + + const int type = CV_MAKE_TYPE(depth, channels); + + cv::Mat src(size, type); + declare.in(src, WARMUP_RNG); + + const cv::Scalar_ val = cv::Scalar_::all(4); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::lshift(d_src, val, dst); + + CUDA_SANITY_CHECK(dst); + } + else + { + FAIL_NO_CPU(); + } +} + +////////////////////////////////////////////////////////////////////// +// MinMat + +PERF_TEST_P(Sz_Depth, MinMat, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F))) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + + cv::Mat src1(size, depth); + declare.in(src1, WARMUP_RNG); + + cv::Mat src2(size, depth); + declare.in(src2, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src1(src1); + const cv::cuda::GpuMat d_src2(src2); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::min(d_src1, d_src2, dst); + + CUDA_SANITY_CHECK(dst); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::min(src1, src2, dst); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// MinScalar + +PERF_TEST_P(Sz_Depth, MinScalar, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F))) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + + cv::Mat src(size, depth); + declare.in(src, WARMUP_RNG); + + cv::Scalar val; + declare.in(val, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::min(d_src, val[0], dst); + + CUDA_SANITY_CHECK(dst); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::min(src, val[0], dst); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// MaxMat + +PERF_TEST_P(Sz_Depth, MaxMat, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F))) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + + cv::Mat src1(size, depth); + declare.in(src1, WARMUP_RNG); + + cv::Mat src2(size, depth); + declare.in(src2, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src1(src1); + const cv::cuda::GpuMat d_src2(src2); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::max(d_src1, d_src2, dst); + + CUDA_SANITY_CHECK(dst); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::max(src1, src2, dst); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// MaxScalar + +PERF_TEST_P(Sz_Depth, MaxScalar, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F))) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + + cv::Mat src(size, depth); + declare.in(src, WARMUP_RNG); + + cv::Scalar val; + declare.in(val, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::max(d_src, val[0], dst); + + CUDA_SANITY_CHECK(dst); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::max(src, val[0], dst); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// AddWeighted + +DEF_PARAM_TEST(Sz_3Depth, cv::Size, MatDepth, MatDepth, MatDepth); + +PERF_TEST_P(Sz_3Depth, AddWeighted, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F, CV_64F), + Values(CV_8U, CV_16U, CV_32F, CV_64F), + Values(CV_8U, CV_16U, CV_32F, CV_64F))) +{ + const cv::Size size = GET_PARAM(0); + const int depth1 = GET_PARAM(1); + const int depth2 = GET_PARAM(2); + const int dst_depth = GET_PARAM(3); + + cv::Mat src1(size, depth1); + declare.in(src1, WARMUP_RNG); + + cv::Mat src2(size, depth2); + declare.in(src2, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src1(src1); + const cv::cuda::GpuMat d_src2(src2); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::addWeighted(d_src1, 0.5, d_src2, 0.5, 10.0, dst, dst_depth); + + CUDA_SANITY_CHECK(dst, 1e-10); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::addWeighted(src1, 0.5, src2, 0.5, 10.0, dst, dst_depth); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// MagnitudeComplex + +PERF_TEST_P(Sz, MagnitudeComplex, + CUDA_TYPICAL_MAT_SIZES) +{ + const cv::Size size = GetParam(); + + cv::Mat src(size, CV_32FC2); + declare.in(src, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::magnitude(d_src, dst); + + CUDA_SANITY_CHECK(dst); + } + else + { + cv::Mat xy[2]; + cv::split(src, xy); + + cv::Mat dst; + + TEST_CYCLE() cv::magnitude(xy[0], xy[1], dst); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// MagnitudeSqrComplex + +PERF_TEST_P(Sz, MagnitudeSqrComplex, + CUDA_TYPICAL_MAT_SIZES) +{ + const cv::Size size = GetParam(); + + cv::Mat src(size, CV_32FC2); + declare.in(src, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::magnitudeSqr(d_src, dst); + + CUDA_SANITY_CHECK(dst); + } + else + { + FAIL_NO_CPU(); + } +} + +////////////////////////////////////////////////////////////////////// +// Magnitude + +PERF_TEST_P(Sz, Magnitude, + CUDA_TYPICAL_MAT_SIZES) +{ + const cv::Size size = GetParam(); + + cv::Mat src1(size, CV_32FC1); + declare.in(src1, WARMUP_RNG); + + cv::Mat src2(size, CV_32FC1); + declare.in(src2, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src1(src1); + const cv::cuda::GpuMat d_src2(src2); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::magnitude(d_src1, d_src2, dst); + + CUDA_SANITY_CHECK(dst); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::magnitude(src1, src2, dst); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// MagnitudeSqr + +PERF_TEST_P(Sz, MagnitudeSqr, + CUDA_TYPICAL_MAT_SIZES) +{ + const cv::Size size = GetParam(); + + cv::Mat src1(size, CV_32FC1); + declare.in(src1, WARMUP_RNG); + + cv::Mat src2(size, CV_32FC1); + declare.in(src2, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src1(src1); + const cv::cuda::GpuMat d_src2(src2); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::magnitudeSqr(d_src1, d_src2, dst); + + CUDA_SANITY_CHECK(dst); + } + else + { + FAIL_NO_CPU(); + } +} + +////////////////////////////////////////////////////////////////////// +// Phase + +DEF_PARAM_TEST(Sz_AngleInDegrees, cv::Size, bool); +DEF_PARAM_TEST(Sz_Type_AngleInDegrees, cv::Size, MatType, bool); + +PERF_TEST_P(Sz_AngleInDegrees, Phase, + Combine(CUDA_TYPICAL_MAT_SIZES, + Bool())) +{ + const cv::Size size = GET_PARAM(0); + const bool angleInDegrees = GET_PARAM(1); + + cv::Mat src1(size, CV_32FC1); + declare.in(src1, WARMUP_RNG); + + cv::Mat src2(size, CV_32FC1); + declare.in(src2, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src1(src1); + const cv::cuda::GpuMat d_src2(src2); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::phase(d_src1, d_src2, dst, angleInDegrees); + + CUDA_SANITY_CHECK(dst, 1e-6, ERROR_RELATIVE); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::phase(src1, src2, dst, angleInDegrees); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// CartToPolar + +PERF_TEST_P(Sz_AngleInDegrees, CartToPolar, + Combine(CUDA_TYPICAL_MAT_SIZES, + Bool())) +{ + const cv::Size size = GET_PARAM(0); + const bool angleInDegrees = GET_PARAM(1); + + cv::Mat src1(size, CV_32FC1); + declare.in(src1, WARMUP_RNG); + + cv::Mat src2(size, CV_32FC1); + declare.in(src2, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src1(src1); + const cv::cuda::GpuMat d_src2(src2); + cv::cuda::GpuMat magnitude; + cv::cuda::GpuMat angle; + + TEST_CYCLE() cv::cuda::cartToPolar(d_src1, d_src2, magnitude, angle, angleInDegrees); + + CUDA_SANITY_CHECK(magnitude); + CUDA_SANITY_CHECK(angle, 1e-6, ERROR_RELATIVE); + } + else + { + cv::Mat magnitude; + cv::Mat angle; + + TEST_CYCLE() cv::cartToPolar(src1, src2, magnitude, angle, angleInDegrees); + + CPU_SANITY_CHECK(magnitude); + CPU_SANITY_CHECK(angle); + } +} + +////////////////////////////////////////////////////////////////////// +// PolarToCart + +PERF_TEST_P(Sz_Type_AngleInDegrees, PolarToCart, + Combine(CUDA_TYPICAL_MAT_SIZES, + testing::Values(CV_32FC1, CV_64FC1), + Bool())) +{ + const cv::Size size = GET_PARAM(0); + const int type = GET_PARAM(1); + const bool angleInDegrees = GET_PARAM(2); + + cv::Mat magnitude(size, type); + declare.in(magnitude, WARMUP_RNG); + + cv::Mat angle(size, type); + declare.in(angle, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_magnitude(magnitude); + const cv::cuda::GpuMat d_angle(angle); + cv::cuda::GpuMat x; + cv::cuda::GpuMat y; + + TEST_CYCLE() cv::cuda::polarToCart(d_magnitude, d_angle, x, y, angleInDegrees); + + CUDA_SANITY_CHECK(x); + CUDA_SANITY_CHECK(y); + } + else + { + cv::Mat x; + cv::Mat y; + + TEST_CYCLE() cv::polarToCart(magnitude, angle, x, y, angleInDegrees); + + CPU_SANITY_CHECK(x); + CPU_SANITY_CHECK(y); + } +} + +////////////////////////////////////////////////////////////////////// +// Threshold + +CV_ENUM(ThreshOp, cv::THRESH_BINARY, cv::THRESH_BINARY_INV, cv::THRESH_TRUNC, cv::THRESH_TOZERO, cv::THRESH_TOZERO_INV) + +DEF_PARAM_TEST(Sz_Depth_Op, cv::Size, MatDepth, ThreshOp); + +PERF_TEST_P(Sz_Depth_Op, Threshold, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F, CV_64F), + ThreshOp::all())) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + const int threshOp = GET_PARAM(2); + + cv::Mat src(size, depth); + declare.in(src, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::threshold(d_src, dst, 100.0, 255.0, threshOp); + + CUDA_SANITY_CHECK(dst, 1e-10); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::threshold(src, dst, 100.0, 255.0, threshOp); + + CPU_SANITY_CHECK(dst); + } +} + +}} // namespace diff --git a/modules/cudaarithm/perf/perf_main.cpp b/modules/cudaarithm/perf/perf_main.cpp new file mode 100644 index 00000000000..118d7596ac2 --- /dev/null +++ b/modules/cudaarithm/perf/perf_main.cpp @@ -0,0 +1,47 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "perf_precomp.hpp" + +using namespace perf; + +CV_PERF_TEST_CUDA_MAIN(cudaarithm) diff --git a/modules/cudaarithm/perf/perf_precomp.hpp b/modules/cudaarithm/perf/perf_precomp.hpp new file mode 100644 index 00000000000..071ac946537 --- /dev/null +++ b/modules/cudaarithm/perf/perf_precomp.hpp @@ -0,0 +1,55 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ +#ifndef __OPENCV_PERF_PRECOMP_HPP__ +#define __OPENCV_PERF_PRECOMP_HPP__ + +#include "opencv2/ts.hpp" +#include "opencv2/ts/cuda_perf.hpp" + +#include "opencv2/cudaarithm.hpp" + +namespace opencv_test { +using namespace perf; +using namespace testing; +} + +#endif diff --git a/modules/cudaarithm/perf/perf_reductions.cpp b/modules/cudaarithm/perf/perf_reductions.cpp new file mode 100644 index 00000000000..71bb5524a63 --- /dev/null +++ b/modules/cudaarithm/perf/perf_reductions.cpp @@ -0,0 +1,520 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "perf_precomp.hpp" + +namespace opencv_test { namespace { + +////////////////////////////////////////////////////////////////////// +// Norm + +DEF_PARAM_TEST(Sz_Depth_Norm, cv::Size, MatDepth, NormType); + +PERF_TEST_P(Sz_Depth_Norm, Norm, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32S, CV_32F), + Values(NormType(cv::NORM_INF), NormType(cv::NORM_L1), NormType(cv::NORM_L2)))) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + const int normType = GET_PARAM(2); + + cv::Mat src(size, depth); + if (depth == CV_8U) + cv::randu(src, 0, 254); + else + declare.in(src, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat d_buf; + double gpu_dst; + + TEST_CYCLE() gpu_dst = cv::cuda::norm(d_src, normType, d_buf); + + SANITY_CHECK(gpu_dst, 1e-6, ERROR_RELATIVE); + } + else + { + double cpu_dst; + + TEST_CYCLE() cpu_dst = cv::norm(src, normType); + + SANITY_CHECK(cpu_dst, 1e-6, ERROR_RELATIVE); + } +} + +////////////////////////////////////////////////////////////////////// +// NormDiff + +DEF_PARAM_TEST(Sz_Norm, cv::Size, NormType); + +PERF_TEST_P(Sz_Norm, NormDiff, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(NormType(cv::NORM_INF), NormType(cv::NORM_L1), NormType(cv::NORM_L2)))) +{ + const cv::Size size = GET_PARAM(0); + const int normType = GET_PARAM(1); + + cv::Mat src1(size, CV_8UC1); + declare.in(src1, WARMUP_RNG); + + cv::Mat src2(size, CV_8UC1); + declare.in(src2, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src1(src1); + const cv::cuda::GpuMat d_src2(src2); + double gpu_dst; + + TEST_CYCLE() gpu_dst = cv::cuda::norm(d_src1, d_src2, normType); + + SANITY_CHECK(gpu_dst); + + } + else + { + double cpu_dst; + + TEST_CYCLE() cpu_dst = cv::norm(src1, src2, normType); + + SANITY_CHECK(cpu_dst); + } +} + +////////////////////////////////////////////////////////////////////// +// Sum + +DEF_PARAM_TEST(Sz_Depth_Cn, cv::Size, MatDepth, MatCn); + +PERF_TEST_P(Sz_Depth_Cn, Sum, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F), + CUDA_CHANNELS_1_3_4)) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + const int channels = GET_PARAM(2); + + const int type = CV_MAKE_TYPE(depth, channels); + + cv::Mat src(size, type); + declare.in(src, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::Scalar gpu_dst; + + TEST_CYCLE() gpu_dst = cv::cuda::sum(d_src); + + SANITY_CHECK(gpu_dst, 1e-5, ERROR_RELATIVE); + } + else + { + cv::Scalar cpu_dst; + + TEST_CYCLE() cpu_dst = cv::sum(src); + + SANITY_CHECK(cpu_dst, 1e-6, ERROR_RELATIVE); + } +} + +////////////////////////////////////////////////////////////////////// +// SumAbs + +PERF_TEST_P(Sz_Depth_Cn, SumAbs, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F), + CUDA_CHANNELS_1_3_4)) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + const int channels = GET_PARAM(2); + + const int type = CV_MAKE_TYPE(depth, channels); + + cv::Mat src(size, type); + declare.in(src, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::Scalar gpu_dst; + + TEST_CYCLE() gpu_dst = cv::cuda::absSum(d_src); + + SANITY_CHECK(gpu_dst, 1e-6, ERROR_RELATIVE); + } + else + { + FAIL_NO_CPU(); + } +} + +////////////////////////////////////////////////////////////////////// +// SumSqr + +PERF_TEST_P(Sz_Depth_Cn, SumSqr, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F), + CUDA_CHANNELS_1_3_4)) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + const int channels = GET_PARAM(2); + + const int type = CV_MAKE_TYPE(depth, channels); + + cv::Mat src(size, type); + declare.in(src, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::Scalar gpu_dst; + + TEST_CYCLE() gpu_dst = cv::cuda::sqrSum(d_src); + + SANITY_CHECK(gpu_dst, 1e-6, ERROR_RELATIVE); + } + else + { + FAIL_NO_CPU(); + } +} + +////////////////////////////////////////////////////////////////////// +// MinMax + +DEF_PARAM_TEST(Sz_Depth, cv::Size, MatDepth); + +PERF_TEST_P(Sz_Depth, MinMax, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F, CV_64F))) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + + cv::Mat src(size, depth); + if (depth == CV_8U) + cv::randu(src, 0, 254); + else + declare.in(src, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + double gpu_minVal, gpu_maxVal; + + TEST_CYCLE() cv::cuda::minMax(d_src, &gpu_minVal, &gpu_maxVal, cv::cuda::GpuMat()); + + SANITY_CHECK(gpu_minVal, 1e-10); + SANITY_CHECK(gpu_maxVal, 1e-10); + } + else + { + double cpu_minVal, cpu_maxVal; + + TEST_CYCLE() cv::minMaxLoc(src, &cpu_minVal, &cpu_maxVal); + + SANITY_CHECK(cpu_minVal); + SANITY_CHECK(cpu_maxVal); + } +} + +////////////////////////////////////////////////////////////////////// +// MinMaxLoc + +PERF_TEST_P(Sz_Depth, MinMaxLoc, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F, CV_64F))) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + + cv::Mat src(size, depth); + if (depth == CV_8U) + cv::randu(src, 0, 254); + else + declare.in(src, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + double gpu_minVal, gpu_maxVal; + cv::Point gpu_minLoc, gpu_maxLoc; + + TEST_CYCLE() cv::cuda::minMaxLoc(d_src, &gpu_minVal, &gpu_maxVal, &gpu_minLoc, &gpu_maxLoc); + + SANITY_CHECK(gpu_minVal, 1e-10); + SANITY_CHECK(gpu_maxVal, 1e-10); + } + else + { + double cpu_minVal, cpu_maxVal; + cv::Point cpu_minLoc, cpu_maxLoc; + + TEST_CYCLE() cv::minMaxLoc(src, &cpu_minVal, &cpu_maxVal, &cpu_minLoc, &cpu_maxLoc); + + SANITY_CHECK(cpu_minVal); + SANITY_CHECK(cpu_maxVal); + } +} + +////////////////////////////////////////////////////////////////////// +// CountNonZero + +PERF_TEST_P(Sz_Depth, CountNonZero, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F, CV_64F))) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + + cv::Mat src(size, depth); + declare.in(src, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + int gpu_dst = 0; + + TEST_CYCLE() gpu_dst = cv::cuda::countNonZero(d_src); + + SANITY_CHECK(gpu_dst); + } + else + { + int cpu_dst = 0; + + TEST_CYCLE() cpu_dst = cv::countNonZero(src); + + SANITY_CHECK(cpu_dst); + } +} + +////////////////////////////////////////////////////////////////////// +// Reduce + +CV_ENUM(ReduceCode, REDUCE_SUM, REDUCE_AVG, REDUCE_MAX, REDUCE_MIN) + +enum {Rows = 0, Cols = 1}; +CV_ENUM(ReduceDim, Rows, Cols) + +DEF_PARAM_TEST(Sz_Depth_Cn_Code_Dim, cv::Size, MatDepth, MatCn, ReduceCode, ReduceDim); + +PERF_TEST_P(Sz_Depth_Cn_Code_Dim, Reduce, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_16S, CV_32F), + Values(1, 2, 3, 4), + ReduceCode::all(), + ReduceDim::all())) +{ + const cv::Size size = GET_PARAM(0); + const int depth = GET_PARAM(1); + const int channels = GET_PARAM(2); + const int reduceOp = GET_PARAM(3); + const int dim = GET_PARAM(4); + + const int type = CV_MAKE_TYPE(depth, channels); + + cv::Mat src(size, type); + declare.in(src, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::reduce(d_src, dst, dim, reduceOp, CV_32F); + + dst = dst.reshape(dst.channels(), 1); + + CUDA_SANITY_CHECK(dst); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::reduce(src, dst, dim, reduceOp, CV_32F); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// Normalize + +DEF_PARAM_TEST(Sz_Depth_NormType, cv::Size, MatDepth, NormType); + +PERF_TEST_P(Sz_Depth_NormType, Normalize, + Combine(CUDA_TYPICAL_MAT_SIZES, + Values(CV_8U, CV_16U, CV_32F, CV_64F), + Values(NormType(cv::NORM_INF), + NormType(cv::NORM_L1), + NormType(cv::NORM_L2), + NormType(cv::NORM_MINMAX)))) +{ + const cv::Size size = GET_PARAM(0); + const int type = GET_PARAM(1); + const int norm_type = GET_PARAM(2); + + const double alpha = 1; + const double beta = 0; + + cv::Mat src(size, type); + declare.in(src, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::normalize(d_src, dst, alpha, beta, norm_type, type, cv::cuda::GpuMat()); + + CUDA_SANITY_CHECK(dst, 1e-6); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::normalize(src, dst, alpha, beta, norm_type, type); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// MeanStdDev + +PERF_TEST_P(Sz, MeanStdDev, + CUDA_TYPICAL_MAT_SIZES) +{ + const cv::Size size = GetParam(); + + cv::Mat src(size, CV_8UC1); + declare.in(src, WARMUP_RNG); + + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::Scalar gpu_mean; + cv::Scalar gpu_stddev; + + TEST_CYCLE() cv::cuda::meanStdDev(d_src, gpu_mean, gpu_stddev); + + SANITY_CHECK(gpu_mean); + SANITY_CHECK(gpu_stddev); + } + else + { + cv::Scalar cpu_mean; + cv::Scalar cpu_stddev; + + TEST_CYCLE() cv::meanStdDev(src, cpu_mean, cpu_stddev); + + SANITY_CHECK(cpu_mean); + SANITY_CHECK(cpu_stddev); + } +} + +////////////////////////////////////////////////////////////////////// +// Integral + +PERF_TEST_P(Sz, Integral, + CUDA_TYPICAL_MAT_SIZES) +{ + const cv::Size size = GetParam(); + + cv::Mat src(size, CV_8UC1); + declare.in(src, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::integral(d_src, dst); + + CUDA_SANITY_CHECK(dst); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::integral(src, dst); + + CPU_SANITY_CHECK(dst); + } +} + +////////////////////////////////////////////////////////////////////// +// IntegralSqr + +PERF_TEST_P(Sz, IntegralSqr, + CUDA_TYPICAL_MAT_SIZES) +{ + const cv::Size size = GetParam(); + + cv::Mat src(size, CV_8UC1); + declare.in(src, WARMUP_RNG); + + if (PERF_RUN_CUDA()) + { + const cv::cuda::GpuMat d_src(src); + cv::cuda::GpuMat dst; + + TEST_CYCLE() cv::cuda::sqrIntegral(d_src, dst); + + CUDA_SANITY_CHECK(dst); + } + else + { + FAIL_NO_CPU(); + } +} + +}} // namespace diff --git a/modules/cudaarithm/src/arithm.cpp b/modules/cudaarithm/src/arithm.cpp new file mode 100644 index 00000000000..381580cff43 --- /dev/null +++ b/modules/cudaarithm/src/arithm.cpp @@ -0,0 +1,582 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" + +using namespace cv; +using namespace cv::cuda; + +#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER) + +void cv::cuda::gemm(InputArray, InputArray, double, InputArray, double, OutputArray, int, Stream&) { throw_no_cuda(); } + +void cv::cuda::mulSpectrums(InputArray, InputArray, OutputArray, int, bool, Stream&) { throw_no_cuda(); } +void cv::cuda::mulAndScaleSpectrums(InputArray, InputArray, OutputArray, int, float, bool, Stream&) { throw_no_cuda(); } + +void cv::cuda::dft(InputArray, OutputArray, Size, int, Stream&) { throw_no_cuda(); } + +Ptr cv::cuda::createConvolution(Size) { throw_no_cuda(); return Ptr(); } + +#else /* !defined (HAVE_CUDA) */ + +namespace +{ + #define error_entry(entry) { entry, #entry } + + struct ErrorEntry + { + int code; + const char* str; + }; + + struct ErrorEntryComparer + { + int code; + ErrorEntryComparer(int code_) : code(code_) {} + bool operator()(const ErrorEntry& e) const { return e.code == code; } + }; + + String getErrorString(int code, const ErrorEntry* errors, size_t n) + { + size_t idx = std::find_if(errors, errors + n, ErrorEntryComparer(code)) - errors; + + const char* msg = (idx != n) ? errors[idx].str : "Unknown error code"; + String str = cv::format("%s [Code = %d]", msg, code); + + return str; + } +} + +#ifdef HAVE_CUBLAS + namespace + { + const ErrorEntry cublas_errors[] = + { + error_entry( CUBLAS_STATUS_SUCCESS ), + error_entry( CUBLAS_STATUS_NOT_INITIALIZED ), + error_entry( CUBLAS_STATUS_ALLOC_FAILED ), + error_entry( CUBLAS_STATUS_INVALID_VALUE ), + error_entry( CUBLAS_STATUS_ARCH_MISMATCH ), + error_entry( CUBLAS_STATUS_MAPPING_ERROR ), + error_entry( CUBLAS_STATUS_EXECUTION_FAILED ), + error_entry( CUBLAS_STATUS_INTERNAL_ERROR ) + }; + + const size_t cublas_error_num = sizeof(cublas_errors) / sizeof(cublas_errors[0]); + + static inline void ___cublasSafeCall(cublasStatus_t err, const char* file, const int line, const char* func) + { + if (CUBLAS_STATUS_SUCCESS != err) + { + String msg = getErrorString(err, cublas_errors, cublas_error_num); + cv::error(cv::Error::GpuApiCallError, msg, func, file, line); + } + } + } + + #define cublasSafeCall(expr) ___cublasSafeCall(expr, __FILE__, __LINE__, CV_Func) +#endif // HAVE_CUBLAS + +#ifdef HAVE_CUFFT + namespace + { + ////////////////////////////////////////////////////////////////////////// + // CUFFT errors + + const ErrorEntry cufft_errors[] = + { + error_entry( CUFFT_INVALID_PLAN ), + error_entry( CUFFT_ALLOC_FAILED ), + error_entry( CUFFT_INVALID_TYPE ), + error_entry( CUFFT_INVALID_VALUE ), + error_entry( CUFFT_INTERNAL_ERROR ), + error_entry( CUFFT_EXEC_FAILED ), + error_entry( CUFFT_SETUP_FAILED ), + error_entry( CUFFT_INVALID_SIZE ), + error_entry( CUFFT_UNALIGNED_DATA ) + }; + + const int cufft_error_num = sizeof(cufft_errors) / sizeof(cufft_errors[0]); + + void ___cufftSafeCall(int err, const char* file, const int line, const char* func) + { + if (CUFFT_SUCCESS != err) + { + String msg = getErrorString(err, cufft_errors, cufft_error_num); + cv::error(cv::Error::GpuApiCallError, msg, func, file, line); + } + } + } + + #define cufftSafeCall(expr) ___cufftSafeCall(expr, __FILE__, __LINE__, CV_Func) + +#endif + +//////////////////////////////////////////////////////////////////////// +// gemm + +void cv::cuda::gemm(InputArray _src1, InputArray _src2, double alpha, InputArray _src3, double beta, OutputArray _dst, int flags, Stream& stream) +{ +#ifndef HAVE_CUBLAS + CV_UNUSED(_src1); + CV_UNUSED(_src2); + CV_UNUSED(alpha); + CV_UNUSED(_src3); + CV_UNUSED(beta); + CV_UNUSED(_dst); + CV_UNUSED(flags); + CV_UNUSED(stream); + CV_Error(Error::StsNotImplemented, "The library was build without CUBLAS"); +#else + // CUBLAS works with column-major matrices + + GpuMat src1 = getInputMat(_src1, stream); + GpuMat src2 = getInputMat(_src2, stream); + GpuMat src3 = getInputMat(_src3, stream); + + CV_Assert( src1.type() == CV_32FC1 || src1.type() == CV_32FC2 || src1.type() == CV_64FC1 || src1.type() == CV_64FC2 ); + CV_Assert( src2.type() == src1.type() && (src3.empty() || src3.type() == src1.type()) ); + + if (src1.depth() == CV_64F) + { + if (!deviceSupports(NATIVE_DOUBLE)) + CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double"); + } + + bool tr1 = (flags & GEMM_1_T) != 0; + bool tr2 = (flags & GEMM_2_T) != 0; + bool tr3 = (flags & GEMM_3_T) != 0; + + if (src1.type() == CV_64FC2) + { + if (tr1 || tr2 || tr3) + CV_Error(cv::Error::StsNotImplemented, "transpose operation doesn't implemented for CV_64FC2 type"); + } + + Size src1Size = tr1 ? Size(src1.rows, src1.cols) : src1.size(); + Size src2Size = tr2 ? Size(src2.rows, src2.cols) : src2.size(); + Size src3Size = tr3 ? Size(src3.rows, src3.cols) : src3.size(); + Size dstSize(src2Size.width, src1Size.height); + + CV_Assert( src1Size.width == src2Size.height ); + CV_Assert( src3.empty() || src3Size == dstSize ); + + GpuMat dst = getOutputMat(_dst, dstSize, src1.type(), stream); + + if (beta != 0) + { + if (src3.empty()) + { + dst.setTo(Scalar::all(0), stream); + } + else + { + if (tr3) + { + cuda::transpose(src3, dst, stream); + } + else + { + src3.copyTo(dst, stream); + } + } + } + + cublasHandle_t handle; + cublasSafeCall( cublasCreate_v2(&handle) ); + + cublasSafeCall( cublasSetStream_v2(handle, StreamAccessor::getStream(stream)) ); + + cublasSafeCall( cublasSetPointerMode_v2(handle, CUBLAS_POINTER_MODE_HOST) ); + + const float alphaf = static_cast(alpha); + const float betaf = static_cast(beta); + + const cuComplex alphacf = make_cuComplex(alphaf, 0); + const cuComplex betacf = make_cuComplex(betaf, 0); + + const cuDoubleComplex alphac = make_cuDoubleComplex(alpha, 0); + const cuDoubleComplex betac = make_cuDoubleComplex(beta, 0); + + cublasOperation_t transa = tr2 ? CUBLAS_OP_T : CUBLAS_OP_N; + cublasOperation_t transb = tr1 ? CUBLAS_OP_T : CUBLAS_OP_N; + + switch (src1.type()) + { + case CV_32FC1: + cublasSafeCall( cublasSgemm_v2(handle, transa, transb, tr2 ? src2.rows : src2.cols, tr1 ? src1.cols : src1.rows, tr2 ? src2.cols : src2.rows, + &alphaf, + src2.ptr(), static_cast(src2.step / sizeof(float)), + src1.ptr(), static_cast(src1.step / sizeof(float)), + &betaf, + dst.ptr(), static_cast(dst.step / sizeof(float))) ); + break; + + case CV_64FC1: + cublasSafeCall( cublasDgemm_v2(handle, transa, transb, tr2 ? src2.rows : src2.cols, tr1 ? src1.cols : src1.rows, tr2 ? src2.cols : src2.rows, + &alpha, + src2.ptr(), static_cast(src2.step / sizeof(double)), + src1.ptr(), static_cast(src1.step / sizeof(double)), + &beta, + dst.ptr(), static_cast(dst.step / sizeof(double))) ); + break; + + case CV_32FC2: + cublasSafeCall( cublasCgemm_v2(handle, transa, transb, tr2 ? src2.rows : src2.cols, tr1 ? src1.cols : src1.rows, tr2 ? src2.cols : src2.rows, + &alphacf, + src2.ptr(), static_cast(src2.step / sizeof(cuComplex)), + src1.ptr(), static_cast(src1.step / sizeof(cuComplex)), + &betacf, + dst.ptr(), static_cast(dst.step / sizeof(cuComplex))) ); + break; + + case CV_64FC2: + cublasSafeCall( cublasZgemm_v2(handle, transa, transb, tr2 ? src2.rows : src2.cols, tr1 ? src1.cols : src1.rows, tr2 ? src2.cols : src2.rows, + &alphac, + src2.ptr(), static_cast(src2.step / sizeof(cuDoubleComplex)), + src1.ptr(), static_cast(src1.step / sizeof(cuDoubleComplex)), + &betac, + dst.ptr(), static_cast(dst.step / sizeof(cuDoubleComplex))) ); + break; + } + + cublasSafeCall( cublasDestroy_v2(handle) ); + + syncOutput(dst, _dst, stream); +#endif +} + +////////////////////////////////////////////////////////////////////////////// +// DFT function + +void cv::cuda::dft(InputArray _src, OutputArray _dst, Size dft_size, int flags, Stream& stream) +{ + if (getInputMat(_src, stream).channels() == 2) + flags |= DFT_COMPLEX_INPUT; + + Ptr dft = createDFT(dft_size, flags); + dft->compute(_src, _dst, stream); +} + +////////////////////////////////////////////////////////////////////////////// +// DFT algorithm + +#ifdef HAVE_CUFFT + +namespace +{ + + class DFTImpl : public DFT + { + Size dft_size, dft_size_opt; + bool is_1d_input, is_row_dft, is_scaled_dft, is_inverse, is_complex_input, is_complex_output; + + cufftType dft_type; + cufftHandle plan; + + public: + DFTImpl(Size dft_size, int flags) + : dft_size(dft_size), + dft_size_opt(dft_size), + is_1d_input((dft_size.height == 1) || (dft_size.width == 1)), + is_row_dft((flags & DFT_ROWS) != 0), + is_scaled_dft((flags & DFT_SCALE) != 0), + is_inverse((flags & DFT_INVERSE) != 0), + is_complex_input((flags & DFT_COMPLEX_INPUT) != 0), + is_complex_output(!(flags & DFT_REAL_OUTPUT)), + dft_type(!is_complex_input ? CUFFT_R2C : (is_complex_output ? CUFFT_C2C : CUFFT_C2R)) + { + // We don't support unpacked output (in the case of real input) + CV_Assert( !(flags & DFT_COMPLEX_OUTPUT) ); + + // We don't support real-to-real transform + CV_Assert( is_complex_input || is_complex_output ); + + if (is_1d_input && !is_row_dft) + { + // If the source matrix is single column handle it as single row + dft_size_opt.width = std::max(dft_size.width, dft_size.height); + dft_size_opt.height = std::min(dft_size.width, dft_size.height); + } + + CV_Assert( dft_size_opt.width > 1 ); + + if (is_1d_input || is_row_dft) + cufftSafeCall( cufftPlan1d(&plan, dft_size_opt.width, dft_type, dft_size_opt.height) ); + else + cufftSafeCall( cufftPlan2d(&plan, dft_size_opt.height, dft_size_opt.width, dft_type) ); + } + + ~DFTImpl() + { + cufftSafeCall( cufftDestroy(plan) ); + } + + void compute(InputArray _src, OutputArray _dst, Stream& stream) + { + GpuMat src = getInputMat(_src, stream); + + CV_Assert( src.type() == CV_32FC1 || src.type() == CV_32FC2 ); + CV_Assert( is_complex_input == (src.channels() == 2) ); + + // Make sure here we work with the continuous input, + // as CUFFT can't handle gaps + GpuMat src_cont; + if (src.isContinuous()) + { + src_cont = src; + } + else + { + BufferPool pool(stream); + src_cont.allocator = pool.getAllocator(); + createContinuous(src.rows, src.cols, src.type(), src_cont); + src.copyTo(src_cont, stream); + } + + cufftSafeCall( cufftSetStream(plan, StreamAccessor::getStream(stream)) ); + + if (is_complex_input) + { + if (is_complex_output) + { + createContinuous(dft_size, CV_32FC2, _dst); + GpuMat dst = _dst.getGpuMat(); + + cufftSafeCall(cufftExecC2C( + plan, src_cont.ptr(), dst.ptr(), + is_inverse ? CUFFT_INVERSE : CUFFT_FORWARD)); + } + else + { + createContinuous(dft_size, CV_32F, _dst); + GpuMat dst = _dst.getGpuMat(); + + cufftSafeCall(cufftExecC2R( + plan, src_cont.ptr(), dst.ptr())); + } + } + else + { + // We could swap dft_size for efficiency. Here we must reflect it + if (dft_size == dft_size_opt) + createContinuous(Size(dft_size.width / 2 + 1, dft_size.height), CV_32FC2, _dst); + else + createContinuous(Size(dft_size.width, dft_size.height / 2 + 1), CV_32FC2, _dst); + + GpuMat dst = _dst.getGpuMat(); + + cufftSafeCall(cufftExecR2C( + plan, src_cont.ptr(), dst.ptr())); + } + + if (is_scaled_dft) + cuda::multiply(_dst, Scalar::all(1. / dft_size.area()), _dst, 1, -1, stream); + } + }; +} + +#endif + +Ptr cv::cuda::createDFT(Size dft_size, int flags) +{ +#ifndef HAVE_CUFFT + CV_UNUSED(dft_size); + CV_UNUSED(flags); + CV_Error(Error::StsNotImplemented, "The library was build without CUFFT"); + return Ptr(); +#else + return makePtr(dft_size, flags); +#endif +} + +////////////////////////////////////////////////////////////////////////////// +// Convolution + +#ifdef HAVE_CUFFT + +namespace +{ + class ConvolutionImpl : public Convolution + { + public: + explicit ConvolutionImpl(Size user_block_size_) : user_block_size(user_block_size_) {} + + void convolve(InputArray image, InputArray templ, OutputArray result, bool ccorr = false, Stream& stream = Stream::Null()); + + private: + void create(Size image_size, Size templ_size); + static Size estimateBlockSize(Size result_size); + + Size result_size; + Size block_size; + Size user_block_size; + Size dft_size; + + GpuMat image_spect, templ_spect, result_spect; + GpuMat image_block, templ_block, result_data; + }; + + void ConvolutionImpl::create(Size image_size, Size templ_size) + { + result_size = Size(image_size.width - templ_size.width + 1, + image_size.height - templ_size.height + 1); + + block_size = user_block_size; + if (user_block_size.width == 0 || user_block_size.height == 0) + block_size = estimateBlockSize(result_size); + + dft_size.width = 1 << int(ceil(std::log(block_size.width + templ_size.width - 1.) / std::log(2.))); + dft_size.height = 1 << int(ceil(std::log(block_size.height + templ_size.height - 1.) / std::log(2.))); + + // CUFFT has hard-coded kernels for power-of-2 sizes (up to 8192), + // see CUDA Toolkit 4.1 CUFFT Library Programming Guide + if (dft_size.width > 8192) + dft_size.width = getOptimalDFTSize(block_size.width + templ_size.width - 1); + if (dft_size.height > 8192) + dft_size.height = getOptimalDFTSize(block_size.height + templ_size.height - 1); + + // To avoid wasting time doing small DFTs + dft_size.width = std::max(dft_size.width, 512); + dft_size.height = std::max(dft_size.height, 512); + + createContinuous(dft_size, CV_32F, image_block); + createContinuous(dft_size, CV_32F, templ_block); + createContinuous(dft_size, CV_32F, result_data); + + int spect_len = dft_size.height * (dft_size.width / 2 + 1); + createContinuous(1, spect_len, CV_32FC2, image_spect); + createContinuous(1, spect_len, CV_32FC2, templ_spect); + createContinuous(1, spect_len, CV_32FC2, result_spect); + + // Use maximum result matrix block size for the estimated DFT block size + block_size.width = std::min(dft_size.width - templ_size.width + 1, result_size.width); + block_size.height = std::min(dft_size.height - templ_size.height + 1, result_size.height); + } + + Size ConvolutionImpl::estimateBlockSize(Size result_size) + { + int width = (result_size.width + 2) / 3; + int height = (result_size.height + 2) / 3; + width = std::min(width, result_size.width); + height = std::min(height, result_size.height); + return Size(width, height); + } + + void ConvolutionImpl::convolve(InputArray _image, InputArray _templ, OutputArray _result, bool ccorr, Stream& _stream) + { + GpuMat image = getInputMat(_image, _stream); + GpuMat templ = getInputMat(_templ, _stream); + + CV_Assert( image.type() == CV_32FC1 ); + CV_Assert( templ.type() == CV_32FC1 ); + + create(image.size(), templ.size()); + + GpuMat result = getOutputMat(_result, result_size, CV_32FC1, _stream); + + cudaStream_t stream = StreamAccessor::getStream(_stream); + + cufftHandle planR2C, planC2R; + cufftSafeCall( cufftPlan2d(&planC2R, dft_size.height, dft_size.width, CUFFT_C2R) ); + cufftSafeCall( cufftPlan2d(&planR2C, dft_size.height, dft_size.width, CUFFT_R2C) ); + + cufftSafeCall( cufftSetStream(planR2C, stream) ); + cufftSafeCall( cufftSetStream(planC2R, stream) ); + + GpuMat templ_roi(templ.size(), CV_32FC1, templ.data, templ.step); + cuda::copyMakeBorder(templ_roi, templ_block, 0, templ_block.rows - templ_roi.rows, 0, + templ_block.cols - templ_roi.cols, 0, Scalar(), _stream); + + cufftSafeCall( cufftExecR2C(planR2C, templ_block.ptr(), templ_spect.ptr()) ); + + // Process all blocks of the result matrix + for (int y = 0; y < result.rows; y += block_size.height) + { + for (int x = 0; x < result.cols; x += block_size.width) + { + Size image_roi_size(std::min(x + dft_size.width, image.cols) - x, + std::min(y + dft_size.height, image.rows) - y); + GpuMat image_roi(image_roi_size, CV_32F, (void*)(image.ptr(y) + x), + image.step); + cuda::copyMakeBorder(image_roi, image_block, 0, image_block.rows - image_roi.rows, + 0, image_block.cols - image_roi.cols, 0, Scalar(), _stream); + + cufftSafeCall(cufftExecR2C(planR2C, image_block.ptr(), + image_spect.ptr())); + cuda::mulAndScaleSpectrums(image_spect, templ_spect, result_spect, 0, + 1.f / dft_size.area(), ccorr, _stream); + cufftSafeCall(cufftExecC2R(planC2R, result_spect.ptr(), + result_data.ptr())); + + Size result_roi_size(std::min(x + block_size.width, result.cols) - x, + std::min(y + block_size.height, result.rows) - y); + GpuMat result_roi(result_roi_size, result.type(), + (void*)(result.ptr(y) + x), result.step); + GpuMat result_block(result_roi_size, result_data.type(), + result_data.ptr(), result_data.step); + + result_block.copyTo(result_roi, _stream); + } + } + + cufftSafeCall( cufftDestroy(planR2C) ); + cufftSafeCall( cufftDestroy(planC2R) ); + + syncOutput(result, _result, _stream); + } +} + +#endif + +Ptr cv::cuda::createConvolution(Size user_block_size) +{ +#ifndef HAVE_CUFFT + CV_UNUSED(user_block_size); + CV_Error(Error::StsNotImplemented, "The library was build without CUFFT"); + return Ptr(); +#else + return makePtr(user_block_size); +#endif +} + +#endif /* !defined (HAVE_CUDA) */ diff --git a/modules/cudaarithm/src/core.cpp b/modules/cudaarithm/src/core.cpp new file mode 100644 index 00000000000..6d97e15dbbd --- /dev/null +++ b/modules/cudaarithm/src/core.cpp @@ -0,0 +1,133 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" + +using namespace cv; +using namespace cv::cuda; + +#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER) + +void cv::cuda::merge(const GpuMat*, size_t, OutputArray, Stream&) { throw_no_cuda(); } +void cv::cuda::merge(const std::vector&, OutputArray, Stream&) { throw_no_cuda(); } + +void cv::cuda::split(InputArray, GpuMat*, Stream&) { throw_no_cuda(); } +void cv::cuda::split(InputArray, std::vector&, Stream&) { throw_no_cuda(); } + +void cv::cuda::transpose(InputArray, OutputArray, Stream&) { throw_no_cuda(); } + +void cv::cuda::flip(InputArray, OutputArray, int, Stream&) { throw_no_cuda(); } + +void cv::cuda::copyMakeBorder(InputArray, OutputArray, int, int, int, int, int, Scalar, Stream&) { throw_no_cuda(); } + +#else /* !defined (HAVE_CUDA) */ + +//////////////////////////////////////////////////////////////////////// +// flip + +namespace +{ + template struct NppTypeTraits; + template<> struct NppTypeTraits { typedef Npp8u npp_t; }; + template<> struct NppTypeTraits { typedef Npp8s npp_t; }; + template<> struct NppTypeTraits { typedef Npp16u npp_t; }; + template<> struct NppTypeTraits { typedef Npp16s npp_t; }; + template<> struct NppTypeTraits { typedef Npp32s npp_t; }; + template<> struct NppTypeTraits { typedef Npp32f npp_t; }; + template<> struct NppTypeTraits { typedef Npp64f npp_t; }; + + template struct NppMirrorFunc + { + typedef typename NppTypeTraits::npp_t npp_t; + + typedef NppStatus (*func_t)(const npp_t* pSrc, int nSrcStep, npp_t* pDst, int nDstStep, NppiSize oROI, NppiAxis flip); + }; + + template ::func_t func> struct NppMirror + { + typedef typename NppMirrorFunc::npp_t npp_t; + + static void call(const GpuMat& src, GpuMat& dst, int flipCode, cudaStream_t stream) + { + NppStreamHandler h(stream); + + NppiSize sz; + sz.width = src.cols; + sz.height = src.rows; + + nppSafeCall( func(src.ptr(), static_cast(src.step), + dst.ptr(), static_cast(dst.step), sz, + (flipCode == 0 ? NPP_HORIZONTAL_AXIS : (flipCode > 0 ? NPP_VERTICAL_AXIS : NPP_BOTH_AXIS))) ); + + if (stream == 0) + cudaSafeCall( cudaDeviceSynchronize() ); + } + }; +} + +void cv::cuda::flip(InputArray _src, OutputArray _dst, int flipCode, Stream& stream) +{ + typedef void (*func_t)(const GpuMat& src, GpuMat& dst, int flipCode, cudaStream_t stream); + static const func_t funcs[6][4] = + { + {NppMirror::call, 0, NppMirror::call, NppMirror::call}, + {0,0,0,0}, + {NppMirror::call, 0, NppMirror::call, NppMirror::call}, + {0,0,0,0}, + {NppMirror::call, 0, NppMirror::call, NppMirror::call}, + {NppMirror::call, 0, NppMirror::call, NppMirror::call} + }; + + GpuMat src = getInputMat(_src, stream); + + CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32S || src.depth() == CV_32F); + CV_Assert(src.channels() == 1 || src.channels() == 3 || src.channels() == 4); + + _dst.create(src.size(), src.type()); + GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream); + + funcs[src.depth()][src.channels() - 1](src, dst, flipCode, StreamAccessor::getStream(stream)); + + syncOutput(dst, _dst, stream); +} + +#endif /* !defined (HAVE_CUDA) */ diff --git a/modules/cudaarithm/src/cuda/absdiff_mat.cu b/modules/cudaarithm/src/cuda/absdiff_mat.cu new file mode 100644 index 00000000000..ec04f122845 --- /dev/null +++ b/modules/cudaarithm/src/cuda/absdiff_mat.cu @@ -0,0 +1,188 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "opencv2/opencv_modules.hpp" + +#ifndef HAVE_OPENCV_CUDEV + +#error "opencv_cudev is required" + +#else + +#include "opencv2/cudev.hpp" + +using namespace cv::cudev; + +void absDiffMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat&, double, Stream& stream, int); + +namespace +{ + __device__ __forceinline__ int _abs(int a) + { + return ::abs(a); + } + __device__ __forceinline__ float _abs(float a) + { + return ::fabsf(a); + } + __device__ __forceinline__ double _abs(double a) + { + return ::fabs(a); + } + + template struct AbsDiffOp1 : binary_function + { + __device__ __forceinline__ T operator ()(T a, T b) const + { + return saturate_cast(_abs(a - b)); + } + }; + + template struct TransformPolicy : DefaultTransformPolicy + { + }; + template <> struct TransformPolicy : DefaultTransformPolicy + { + enum { + shift = 1 + }; + }; + + template + void absDiffMat_v1(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream) + { + gridTransformBinary_< TransformPolicy >(globPtr(src1), globPtr(src2), globPtr(dst), AbsDiffOp1(), stream); + } + + struct AbsDiffOp2 : binary_function + { + __device__ __forceinline__ uint operator ()(uint a, uint b) const + { + return vabsdiff2(a, b); + } + }; + + void absDiffMat_v2(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream) + { + const int vcols = src1.cols >> 1; + + GlobPtrSz src1_ = globPtr((uint*) src1.data, src1.step, src1.rows, vcols); + GlobPtrSz src2_ = globPtr((uint*) src2.data, src2.step, src1.rows, vcols); + GlobPtrSz dst_ = globPtr((uint*) dst.data, dst.step, src1.rows, vcols); + + gridTransformBinary(src1_, src2_, dst_, AbsDiffOp2(), stream); + } + + struct AbsDiffOp4 : binary_function + { + __device__ __forceinline__ uint operator ()(uint a, uint b) const + { + return vabsdiff4(a, b); + } + }; + + void absDiffMat_v4(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream) + { + const int vcols = src1.cols >> 2; + + GlobPtrSz src1_ = globPtr((uint*) src1.data, src1.step, src1.rows, vcols); + GlobPtrSz src2_ = globPtr((uint*) src2.data, src2.step, src1.rows, vcols); + GlobPtrSz dst_ = globPtr((uint*) dst.data, dst.step, src1.rows, vcols); + + gridTransformBinary(src1_, src2_, dst_, AbsDiffOp4(), stream); + } +} + +void absDiffMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat&, double, Stream& stream, int) +{ + typedef void (*func_t)(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream); + static const func_t funcs[] = + { + absDiffMat_v1, + absDiffMat_v1, + absDiffMat_v1, + absDiffMat_v1, + absDiffMat_v1, + absDiffMat_v1, + absDiffMat_v1 + }; + + const int depth = src1.depth(); + + CV_DbgAssert( depth <= CV_64F ); + + GpuMat src1_ = src1.reshape(1); + GpuMat src2_ = src2.reshape(1); + GpuMat dst_ = dst.reshape(1); + + if (depth == CV_8U || depth == CV_16U) + { + const intptr_t src1ptr = reinterpret_cast(src1_.data); + const intptr_t src2ptr = reinterpret_cast(src2_.data); + const intptr_t dstptr = reinterpret_cast(dst_.data); + + const bool isAllAligned = (src1ptr & 31) == 0 && (src2ptr & 31) == 0 && (dstptr & 31) == 0; + + if (isAllAligned) + { + if (depth == CV_8U && (src1_.cols & 3) == 0) + { + absDiffMat_v4(src1_, src2_, dst_, stream); + return; + } + else if (depth == CV_16U && (src1_.cols & 1) == 0) + { + absDiffMat_v2(src1_, src2_, dst_, stream); + return; + } + } + } + + const func_t func = funcs[depth]; + + if (!func) + CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types"); + + func(src1_, src2_, dst_, stream); +} + +#endif diff --git a/modules/cudaarithm/src/cuda/absdiff_scalar.cu b/modules/cudaarithm/src/cuda/absdiff_scalar.cu new file mode 100644 index 00000000000..0955e40c8b1 --- /dev/null +++ b/modules/cudaarithm/src/cuda/absdiff_scalar.cu @@ -0,0 +1,133 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "opencv2/opencv_modules.hpp" + +#ifndef HAVE_OPENCV_CUDEV + +#error "opencv_cudev is required" + +#else + +#include "opencv2/cudev.hpp" + +using namespace cv::cudev; + +void absDiffScalar(const GpuMat& src, cv::Scalar val, bool, GpuMat& dst, const GpuMat&, double, Stream& stream, int); + +namespace +{ + template struct AbsDiffScalarOp : unary_function + { + ScalarType val; + + __device__ __forceinline__ DstType operator ()(SrcType a) const + { + abs_func f; + return saturate_cast(f(saturate_cast(a) - val)); + } + }; + + template struct TransformPolicy : DefaultTransformPolicy + { + }; + template <> struct TransformPolicy : DefaultTransformPolicy + { + enum { + shift = 1 + }; + }; + + template + void absDiffScalarImpl(const GpuMat& src, cv::Scalar value, GpuMat& dst, Stream& stream) + { + typedef typename MakeVec::cn>::type ScalarType; + + cv::Scalar_ value_ = value; + + AbsDiffScalarOp op; + op.val = VecTraits::make(value_.val); + gridTransformUnary_< TransformPolicy >(globPtr(src), globPtr(dst), op, stream); + } +} + +void absDiffScalar(const GpuMat& src, cv::Scalar val, bool, GpuMat& dst, const GpuMat&, double, Stream& stream, int) +{ + typedef void (*func_t)(const GpuMat& src, cv::Scalar val, GpuMat& dst, Stream& stream); + static const func_t funcs[7][4] = + { + { + absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl + }, + { + absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl + }, + { + absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl + }, + { + absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl + }, + { + absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl + }, + { + absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl + }, + { + absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl + } + }; + + const int sdepth = src.depth(); + const int cn = src.channels(); + + CV_DbgAssert( sdepth <= CV_64F && cn <= 4 && src.type() == dst.type()); + + const func_t func = funcs[sdepth][cn - 1]; + if (!func) + CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types"); + + func(src, val, dst, stream); +} + +#endif diff --git a/modules/cudaarithm/src/cuda/add_mat.cu b/modules/cudaarithm/src/cuda/add_mat.cu new file mode 100644 index 00000000000..4166cc104e0 --- /dev/null +++ b/modules/cudaarithm/src/cuda/add_mat.cu @@ -0,0 +1,225 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "opencv2/opencv_modules.hpp" + +#ifndef HAVE_OPENCV_CUDEV + +#error "opencv_cudev is required" + +#else + +#include "opencv2/cudev.hpp" + +using namespace cv::cudev; + +void addMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, double, Stream& _stream, int); + +namespace +{ + template struct AddOp1 : binary_function + { + __device__ __forceinline__ D operator ()(T a, T b) const + { + return saturate_cast(a + b); + } + }; + + template + void addMat_v1(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, Stream& stream) + { + if (mask.data) + gridTransformBinary(globPtr(src1), globPtr(src2), globPtr(dst), AddOp1(), globPtr(mask), stream); + else + gridTransformBinary(globPtr(src1), globPtr(src2), globPtr(dst), AddOp1(), stream); + } + + struct AddOp2 : binary_function + { + __device__ __forceinline__ uint operator ()(uint a, uint b) const + { + return vadd2(a, b); + } + }; + + void addMat_v2(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream) + { + const int vcols = src1.cols >> 1; + + GlobPtrSz src1_ = globPtr((uint*) src1.data, src1.step, src1.rows, vcols); + GlobPtrSz src2_ = globPtr((uint*) src2.data, src2.step, src1.rows, vcols); + GlobPtrSz dst_ = globPtr((uint*) dst.data, dst.step, src1.rows, vcols); + + gridTransformBinary(src1_, src2_, dst_, AddOp2(), stream); + } + + struct AddOp4 : binary_function + { + __device__ __forceinline__ uint operator ()(uint a, uint b) const + { + return vadd4(a, b); + } + }; + + void addMat_v4(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream) + { + const int vcols = src1.cols >> 2; + + GlobPtrSz src1_ = globPtr((uint*) src1.data, src1.step, src1.rows, vcols); + GlobPtrSz src2_ = globPtr((uint*) src2.data, src2.step, src1.rows, vcols); + GlobPtrSz dst_ = globPtr((uint*) dst.data, dst.step, src1.rows, vcols); + + gridTransformBinary(src1_, src2_, dst_, AddOp4(), stream); + } +} + +void addMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, double, Stream& stream, int) +{ + typedef void (*func_t)(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, Stream& stream); + static const func_t funcs[7][7] = + { + { + addMat_v1, + addMat_v1, + addMat_v1, + addMat_v1, + addMat_v1, + addMat_v1, + addMat_v1 + }, + { + addMat_v1, + addMat_v1, + addMat_v1, + addMat_v1, + addMat_v1, + addMat_v1, + addMat_v1 + }, + { + 0 /*addMat_v1*/, + 0 /*addMat_v1*/, + addMat_v1, + addMat_v1, + addMat_v1, + addMat_v1, + addMat_v1 + }, + { + 0 /*addMat_v1*/, + 0 /*addMat_v1*/, + addMat_v1, + addMat_v1, + addMat_v1, + addMat_v1, + addMat_v1 + }, + { + 0 /*addMat_v1*/, + 0 /*addMat_v1*/, + 0 /*addMat_v1*/, + 0 /*addMat_v1*/, + addMat_v1, + addMat_v1, + addMat_v1 + }, + { + 0 /*addMat_v1*/, + 0 /*addMat_v1*/, + 0 /*addMat_v1*/, + 0 /*addMat_v1*/, + 0 /*addMat_v1*/, + addMat_v1, + addMat_v1 + }, + { + 0 /*addMat_v1*/, + 0 /*addMat_v1*/, + 0 /*addMat_v1*/, + 0 /*addMat_v1*/, + 0 /*addMat_v1*/, + 0 /*addMat_v1*/, + addMat_v1 + } + }; + + const int sdepth = src1.depth(); + const int ddepth = dst.depth(); + + CV_DbgAssert( sdepth <= CV_64F && ddepth <= CV_64F ); + + GpuMat src1_ = src1.reshape(1); + GpuMat src2_ = src2.reshape(1); + GpuMat dst_ = dst.reshape(1); + + if (mask.empty() && (sdepth == CV_8U || sdepth == CV_16U) && ddepth == sdepth) + { + const intptr_t src1ptr = reinterpret_cast(src1_.data); + const intptr_t src2ptr = reinterpret_cast(src2_.data); + const intptr_t dstptr = reinterpret_cast(dst_.data); + + const bool isAllAligned = (src1ptr & 31) == 0 && (src2ptr & 31) == 0 && (dstptr & 31) == 0; + + if (isAllAligned) + { + if (sdepth == CV_8U && (src1_.cols & 3) == 0) + { + addMat_v4(src1_, src2_, dst_, stream); + return; + } + else if (sdepth == CV_16U && (src1_.cols & 1) == 0) + { + addMat_v2(src1_, src2_, dst_, stream); + return; + } + } + } + + const func_t func = funcs[sdepth][ddepth]; + + if (!func) + CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types"); + + func(src1_, src2_, dst_, mask, stream); +} + +#endif diff --git a/modules/cudaarithm/src/cuda/add_scalar.cu b/modules/cudaarithm/src/cuda/add_scalar.cu new file mode 100644 index 00000000000..92838a2a57d --- /dev/null +++ b/modules/cudaarithm/src/cuda/add_scalar.cu @@ -0,0 +1,180 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "opencv2/opencv_modules.hpp" + +#ifndef HAVE_OPENCV_CUDEV + +#error "opencv_cudev is required" + +#else + +#include "opencv2/cudev.hpp" + +using namespace cv::cudev; + +void addScalar(const GpuMat& src, cv::Scalar val, bool, GpuMat& dst, const GpuMat& mask, double, Stream& stream, int); + +namespace +{ + template struct AddScalarOp : unary_function + { + ScalarType val; + + __device__ __forceinline__ DstType operator ()(SrcType a) const + { + return saturate_cast(saturate_cast(a) + val); + } + }; + + template struct TransformPolicy : DefaultTransformPolicy + { + }; + template <> struct TransformPolicy : DefaultTransformPolicy + { + enum { + shift = 1 + }; + }; + + template + void addScalarImpl(const GpuMat& src, cv::Scalar value, GpuMat& dst, const GpuMat& mask, Stream& stream) + { + typedef typename MakeVec::cn>::type ScalarType; + + cv::Scalar_ value_ = value; + + AddScalarOp op; + op.val = VecTraits::make(value_.val); + + if (mask.data) + gridTransformUnary_< TransformPolicy >(globPtr(src), globPtr(dst), op, globPtr(mask), stream); + else + gridTransformUnary_< TransformPolicy >(globPtr(src), globPtr(dst), op, stream); + } +} + +void addScalar(const GpuMat& src, cv::Scalar val, bool, GpuMat& dst, const GpuMat& mask, double, Stream& stream, int) +{ + typedef void (*func_t)(const GpuMat& src, cv::Scalar val, GpuMat& dst, const GpuMat& mask, Stream& stream); + static const func_t funcs[7][7][4] = + { + { + {addScalarImpl, addScalarImpl, addScalarImpl, addScalarImpl}, + {addScalarImpl, addScalarImpl, addScalarImpl, addScalarImpl}, + {addScalarImpl, addScalarImpl, addScalarImpl, addScalarImpl}, + {addScalarImpl, addScalarImpl, addScalarImpl, addScalarImpl}, + {addScalarImpl, addScalarImpl, addScalarImpl, addScalarImpl}, + {addScalarImpl, addScalarImpl, addScalarImpl, addScalarImpl}, + {addScalarImpl, addScalarImpl, addScalarImpl, addScalarImpl} + }, + { + {addScalarImpl, addScalarImpl, addScalarImpl, addScalarImpl}, + {addScalarImpl, addScalarImpl, addScalarImpl, addScalarImpl}, + {addScalarImpl, addScalarImpl, addScalarImpl, addScalarImpl}, + {addScalarImpl, addScalarImpl, addScalarImpl, addScalarImpl}, + {addScalarImpl, addScalarImpl, addScalarImpl, addScalarImpl}, + {addScalarImpl, addScalarImpl, addScalarImpl, addScalarImpl}, + {addScalarImpl, addScalarImpl, addScalarImpl, addScalarImpl} + }, + { + {0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/}, + {0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/}, + {addScalarImpl, addScalarImpl, addScalarImpl, addScalarImpl}, + {addScalarImpl, addScalarImpl, addScalarImpl, addScalarImpl}, + {addScalarImpl, addScalarImpl, addScalarImpl, addScalarImpl}, + {addScalarImpl, addScalarImpl, addScalarImpl, addScalarImpl}, + {addScalarImpl, addScalarImpl, addScalarImpl, addScalarImpl} + }, + { + {0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/}, + {0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/}, + {addScalarImpl, addScalarImpl, addScalarImpl, addScalarImpl}, + {addScalarImpl, addScalarImpl, addScalarImpl, addScalarImpl}, + {addScalarImpl, addScalarImpl, addScalarImpl, addScalarImpl}, + {addScalarImpl, addScalarImpl, addScalarImpl, addScalarImpl}, + {addScalarImpl, addScalarImpl, addScalarImpl, addScalarImpl} + }, + { + {0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/}, + {0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/}, + {0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/}, + {0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/}, + {addScalarImpl, addScalarImpl, addScalarImpl, addScalarImpl}, + {addScalarImpl, addScalarImpl, addScalarImpl, addScalarImpl}, + {addScalarImpl, addScalarImpl, addScalarImpl, addScalarImpl} + }, + { + {0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/}, + {0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/}, + {0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/}, + {0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/}, + {0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/}, + {addScalarImpl, addScalarImpl, addScalarImpl, addScalarImpl}, + {addScalarImpl, addScalarImpl, addScalarImpl, addScalarImpl} + }, + { + {0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/}, + {0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/}, + {0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/}, + {0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/}, + {0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/}, + {0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/, 0 /*addScalarImpl*/}, + {addScalarImpl, addScalarImpl, addScalarImpl, addScalarImpl} + } + }; + + const int sdepth = src.depth(); + const int ddepth = dst.depth(); + const int cn = src.channels(); + + CV_DbgAssert( sdepth <= CV_64F && ddepth <= CV_64F && cn <= 4 ); + + const func_t func = funcs[sdepth][ddepth][cn - 1]; + + if (!func) + CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types"); + + func(src, val, dst, mask, stream); +} + +#endif diff --git a/modules/cudaarithm/src/cuda/add_weighted.cu b/modules/cudaarithm/src/cuda/add_weighted.cu new file mode 100644 index 00000000000..929301076d3 --- /dev/null +++ b/modules/cudaarithm/src/cuda/add_weighted.cu @@ -0,0 +1,596 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "opencv2/opencv_modules.hpp" + +#ifndef HAVE_OPENCV_CUDEV + +#error "opencv_cudev is required" + +#else + +#include "opencv2/cudaarithm.hpp" +#include "opencv2/cudev.hpp" +#include "opencv2/core/private.cuda.hpp" + +using namespace cv; +using namespace cv::cuda; +using namespace cv::cudev; + +namespace +{ + template struct AddWeightedOp : binary_function + { + S alpha; + S beta; + S gamma; + + __device__ __forceinline__ D operator ()(T1 a, T2 b) const + { + return cudev::saturate_cast(a * alpha + b * beta + gamma); + } + }; + + template struct TransformPolicy : DefaultTransformPolicy + { + }; + template <> struct TransformPolicy : DefaultTransformPolicy + { + enum { + shift = 1 + }; + }; + + template + void addWeightedImpl(const GpuMat& src1, double alpha, const GpuMat& src2, double beta, double gamma, GpuMat& dst, Stream& stream) + { + typedef typename LargerType::type larger_type1; + typedef typename LargerType::type larger_type2; + typedef typename LargerType::type scalar_type; + + AddWeightedOp op; + op.alpha = static_cast(alpha); + op.beta = static_cast(beta); + op.gamma = static_cast(gamma); + + gridTransformBinary_< TransformPolicy >(globPtr(src1), globPtr(src2), globPtr(dst), op, stream); + } +} + +void cv::cuda::addWeighted(InputArray _src1, double alpha, InputArray _src2, double beta, double gamma, OutputArray _dst, int ddepth, Stream& stream) +{ + typedef void (*func_t)(const GpuMat& src1, double alpha, const GpuMat& src2, double beta, double gamma, GpuMat& dst, Stream& stream); + static const func_t funcs[7][7][7] = + { + { + { + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl + }, + { + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl + }, + { + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl + }, + { + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl + }, + { + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl + }, + { + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl + }, + { + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl + } + }, + { + { + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/ + }, + { + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl + }, + { + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl + }, + { + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl + }, + { + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl + }, + { + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl + }, + { + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl + } + }, + { + { + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/ + }, + { + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/ + }, + { + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl + }, + { + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl + }, + { + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl + }, + { + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl + }, + { + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl + } + }, + { + { + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/ + }, + { + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/ + }, + { + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/ + }, + { + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl + }, + { + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl + }, + { + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl + }, + { + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl + } + }, + { + { + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/ + }, + { + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/ + }, + { + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/ + }, + { + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/ + }, + { + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl + }, + { + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl + }, + { + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl + } + }, + { + { + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/ + }, + { + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/ + }, + { + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/ + }, + { + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/ + }, + { + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/ + }, + { + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl + }, + { + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl + } + }, + { + { + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/ + }, + { + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/ + }, + { + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/ + }, + { + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/ + }, + { + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/ + }, + { + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/, + 0/*addWeightedImpl*/ + }, + { + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl, + addWeightedImpl + } + } + }; + + GpuMat src1 = getInputMat(_src1, stream); + GpuMat src2 = getInputMat(_src2, stream); + + int sdepth1 = src1.depth(); + int sdepth2 = src2.depth(); + + ddepth = ddepth >= 0 ? CV_MAT_DEPTH(ddepth) : std::max(sdepth1, sdepth2); + const int cn = src1.channels(); + + CV_Assert( src2.size() == src1.size() && src2.channels() == cn ); + CV_Assert( sdepth1 <= CV_64F && sdepth2 <= CV_64F && ddepth <= CV_64F ); + + GpuMat dst = getOutputMat(_dst, src1.size(), CV_MAKE_TYPE(ddepth, cn), stream); + + GpuMat src1_single = src1.reshape(1); + GpuMat src2_single = src2.reshape(1); + GpuMat dst_single = dst.reshape(1); + + if (sdepth1 > sdepth2) + { + src1_single.swap(src2_single); + std::swap(alpha, beta); + std::swap(sdepth1, sdepth2); + } + + const func_t func = funcs[sdepth1][sdepth2][ddepth]; + + if (!func) + CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types"); + + func(src1_single, alpha, src2_single, beta, gamma, dst_single, stream); + + syncOutput(dst, _dst, stream); +} + +#endif diff --git a/modules/cudaarithm/src/cuda/bitwise_mat.cu b/modules/cudaarithm/src/cuda/bitwise_mat.cu new file mode 100644 index 00000000000..f151c1a4862 --- /dev/null +++ b/modules/cudaarithm/src/cuda/bitwise_mat.cu @@ -0,0 +1,230 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "opencv2/opencv_modules.hpp" + +#ifndef HAVE_OPENCV_CUDEV + +#error "opencv_cudev is required" + +#else + +#include "opencv2/cudaarithm.hpp" +#include "opencv2/cudev.hpp" +#include "opencv2/core/private.cuda.hpp" + +using namespace cv; +using namespace cv::cuda; +using namespace cv::cudev; + +void bitMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, double, Stream& stream, int op); + +////////////////////////////////////////////////////////////////////////////// +/// bitwise_not + +void cv::cuda::bitwise_not(InputArray _src, OutputArray _dst, InputArray _mask, Stream& stream) +{ + GpuMat src = getInputMat(_src, stream); + GpuMat mask = getInputMat(_mask, stream); + + const int depth = src.depth(); + + CV_DbgAssert( depth <= CV_32F ); + CV_DbgAssert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src.size()) ); + + GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream); + + if (mask.empty()) + { + const int bcols = (int) (src.cols * src.elemSize()); + + if ((bcols & 3) == 0) + { + const int vcols = bcols >> 2; + + GlobPtrSz vsrc = globPtr((uint*) src.data, src.step, src.rows, vcols); + GlobPtrSz vdst = globPtr((uint*) dst.data, dst.step, src.rows, vcols); + + gridTransformUnary(vsrc, vdst, bit_not(), stream); + } + else if ((bcols & 1) == 0) + { + const int vcols = bcols >> 1; + + GlobPtrSz vsrc = globPtr((ushort*) src.data, src.step, src.rows, vcols); + GlobPtrSz vdst = globPtr((ushort*) dst.data, dst.step, src.rows, vcols); + + gridTransformUnary(vsrc, vdst, bit_not(), stream); + } + else + { + GlobPtrSz vsrc = globPtr((uchar*) src.data, src.step, src.rows, bcols); + GlobPtrSz vdst = globPtr((uchar*) dst.data, dst.step, src.rows, bcols); + + gridTransformUnary(vsrc, vdst, bit_not(), stream); + } + } + else + { + if (depth == CV_32F || depth == CV_32S) + { + GlobPtrSz vsrc = globPtr((uint*) src.data, src.step, src.rows, src.cols * src.channels()); + GlobPtrSz vdst = globPtr((uint*) dst.data, dst.step, src.rows, src.cols * src.channels()); + + gridTransformUnary(vsrc, vdst, bit_not(), singleMaskChannels(globPtr(mask), src.channels()), stream); + } + else if (depth == CV_16S || depth == CV_16U) + { + GlobPtrSz vsrc = globPtr((ushort*) src.data, src.step, src.rows, src.cols * src.channels()); + GlobPtrSz vdst = globPtr((ushort*) dst.data, dst.step, src.rows, src.cols * src.channels()); + + gridTransformUnary(vsrc, vdst, bit_not(), singleMaskChannels(globPtr(mask), src.channels()), stream); + } + else + { + GlobPtrSz vsrc = globPtr((uchar*) src.data, src.step, src.rows, src.cols * src.channels()); + GlobPtrSz vdst = globPtr((uchar*) dst.data, dst.step, src.rows, src.cols * src.channels()); + + gridTransformUnary(vsrc, vdst, bit_not(), singleMaskChannels(globPtr(mask), src.channels()), stream); + } + } + + syncOutput(dst, _dst, stream); +} + +////////////////////////////////////////////////////////////////////////////// +/// Binary bitwise logical operations + +namespace +{ + template