From 557aad1e401362b5d81cb4b7c68c8187da671248 Mon Sep 17 00:00:00 2001 From: OKA Motofumi Date: Tue, 23 Aug 2016 13:21:20 +0900 Subject: [PATCH 001/120] Turn: Add RGB48, RGB64 and planar with alpha formats support. --- avs_core/filters/turn.cpp | 129 +++++++++++++++++++++++++++++++++++--- avs_core/filters/turn.h | 12 +++- 2 files changed, 130 insertions(+), 11 deletions(-) diff --git a/avs_core/filters/turn.cpp b/avs_core/filters/turn.cpp index 2ba108634..ed2930711 100644 --- a/avs_core/filters/turn.cpp +++ b/avs_core/filters/turn.cpp @@ -1,4 +1,4 @@ -// Avisynth v2.5. Copyright 2002 Ben Rudiak-Gould et al. +// Avisynth v2.5. Copyright 2002 Ben Rudiak-Gould et al. // http://www.avisynth.org // This program is free software; you can redistribute it and/or modify @@ -34,7 +34,7 @@ /* ** Turn. version 0.1 -** (c) 2003 - Ernst PechÚ +** (c) 2003 - Ernst Peché ** */ @@ -367,6 +367,83 @@ void turn_right_rgb24(const BYTE* srcp, BYTE* dstp, int src_rowsize, int src_hei } +struct Rgb48 { + uint16_t b, g, r; +}; + + +void turn_left_rgb48(const BYTE* srcp, BYTE* dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch) +{ + turn_right_plane_c(srcp, dstp, src_rowsize, src_height, src_pitch, dst_pitch); +} + + +void turn_right_rgb48(const BYTE* srcp, BYTE* dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch) +{ + turn_right_plane_c(srcp + src_pitch * (src_height - 1), dstp + dst_pitch * (src_rowsize / 6 - 1), src_rowsize, src_height, -src_pitch, -dst_pitch); +} + + +void turn_left_rgb64_c(const BYTE* srcp, BYTE* dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch) +{ + turn_right_plane_c(srcp, dstp, src_rowsize, src_height, src_pitch, dst_pitch); +} + + +void turn_right_rgb64_c(const BYTE* srcp, BYTE* dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch) +{ + turn_right_plane_c(srcp + src_pitch * (src_height - 1), dstp + dst_pitch * (src_rowsize / 3 - 1), src_rowsize, src_height, -src_pitch, -dst_pitch); +} + + +static inline void turn_right_plane_64_sse2(const BYTE* srcp, BYTE* dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch) +{ + const BYTE* s0 = srcp + src_pitch * (src_height - 1); + int w = src_rowsize & ~15; + int h = src_height & ~1; + + for (int y = 0; y < h; y += 2) + { + BYTE* d0 = dstp + y * 8; + for (int x = 0; x < w; x += 16) + { + __m128i a01 = _mm_loadu_si128(reinterpret_cast(s0 + x)); + __m128i b01 = _mm_loadu_si128(reinterpret_cast(s0 + x - src_pitch)); + __m128i ab0 = _mm_unpacklo_epi64(a01, b01); + __m128i ab1 = _mm_unpacklo_epi64(a01, b01); + _mm_storeu_si128(reinterpret_cast<__m128i*>(d0), ab0); + _mm_storeu_si128(reinterpret_cast<__m128i*>(d0 + dst_pitch), ab1); + d0 += 2 * dst_pitch; + } + s0 -= 2 * src_pitch; + } + + if (src_rowsize != w) + { + turn_right_plane_c(srcp + w, dstp + w / 8 * dst_pitch, 8, src_height, src_pitch, dst_pitch); + } + + if (src_height != h) + { + turn_right_plane_c(srcp, dstp + h * 8, src_rowsize, 1, src_pitch, dst_pitch); + } +} + + +void turn_left_rgb64_sse2(const BYTE* srcp, BYTE* dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch) +{ + turn_right_plane_64_sse2(srcp, dstp, src_rowsize, src_height, src_pitch, dst_pitch); +} + + +void turn_right_rgb64_sse2(const BYTE* srcp, BYTE* dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch) +{ + turn_right_plane_64_sse2(srcp + src_pitch * (src_height - 1), dstp + dst_pitch * (src_rowsize / 8 - 1), src_rowsize, src_height, -src_pitch, -dst_pitch); +} + + + + static void turn_right_yuy2(const BYTE* srcp, BYTE* dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch) { dstp += (src_height - 2) * 2; @@ -445,7 +522,11 @@ static void turn_180_plane_xsse(const BYTE* srcp, BYTE* dstp, int src_rowsize, i for (int x = 0; x < w; x += 16) { __m128i src = _mm_loadu_si128(reinterpret_cast(s0 + x)); - if (sizeof(T) == 4) + if (sizeof(T) == 8) + { + src = _mm_shuffle_epi32(src, _MM_SHUFFLE(1, 0, 3, 2)); + } + else if (sizeof(T) == 4) { src = _mm_shuffle_epi32(src, _MM_SHUFFLE(0, 1, 2, 3)); } @@ -498,11 +579,18 @@ static void turn_180_yuy2(const BYTE* srcp, BYTE* dstp, int src_rowsize, int src Turn::Turn(PClip c, int direction, IScriptEnvironment* env) : GenericVideoFilter(c), u_source(nullptr), v_source(nullptr) { - num_planes = (vi.pixel_type & VideoInfo::CS_INTERLEAVED) ? 1 : 3; + if (vi.pixel_type & VideoInfo::CS_INTERLEAVED) { + num_planes = 1; + } else if (vi.IsPlanarRGBA() || vi.IsYUVA()) { + num_planes = 4; + } else { + num_planes = 3; + } splanes[0] = 0; - splanes[1] = PLANAR_U; - splanes[2] = PLANAR_V; + splanes[1] = vi.IsRGB() ? PLANAR_B : PLANAR_U; + splanes[2] = vi.IsRGB() ? PLANAR_R : PLANAR_V; + splanes[3] = PLANAR_A; if (direction != DIRECTION_180) { @@ -567,11 +655,26 @@ void Turn::SetTurnFunction(int direction, IScriptEnvironment* env) funcs[2] = t180; }; - if (vi.IsRGB32()) + if (vi.IsRGB64()) + { + if (cpu & CPUF_SSE2) + { + set_funcs(turn_left_rgb64_sse2, turn_right_rgb64_sse2, turn_180_plane_xsse); + } + else + { + set_funcs(turn_left_rgb64_c, turn_right_rgb64_c, turn_180_plane_c); + } + } + else if (vi.IsRGB48()) + { + set_funcs(turn_left_rgb48, turn_right_rgb48, turn_180_plane_c); + } + else if (vi.IsRGB32()) { if (cpu & CPUF_SSE2) { - set_funcs(turn_left_rgb32_sse2, turn_right_rgb32_sse2, turn_180_plane_xsse); + set_funcs(turn_left_rgb32_sse2, turn_right_rgb32_sse2, turn_180_plane_xsse); } else { @@ -632,15 +735,21 @@ int __stdcall Turn::SetCacheHints(int cachehints, int frame_range) PVideoFrame __stdcall Turn::GetFrame(int n, IScriptEnvironment* env) { - static const int dplanes[] = { 0, PLANAR_U, PLANAR_V }; + static const int dplanes[] = { + 0, + vi.IsRGB() ? PLANAR_B : PLANAR_U, + vi.IsRGB() ? PLANAR_R : PLANAR_V, + PLANAR_A, + }; auto src = child->GetFrame(n, env); auto dst = env->NewVideoFrame(vi); - PVideoFrame srcs[3] = { + PVideoFrame srcs[4] = { src, u_source ? u_source->GetFrame(n, env) : src, v_source ? v_source->GetFrame(n, env) : src, + src, }; for (int p = 0; p < num_planes; ++p) { diff --git a/avs_core/filters/turn.h b/avs_core/filters/turn.h index 0d274c75d..9946ffb8f 100644 --- a/avs_core/filters/turn.h +++ b/avs_core/filters/turn.h @@ -46,7 +46,7 @@ class Turn : public GenericVideoFilter { PClip v_source; int num_planes; - int splanes[3]; + int splanes[4]; void SetUVSource(int mul_h, int mul_v, IScriptEnvironment* env); void SetTurnFunction(int direction, IScriptEnvironment* env); @@ -79,6 +79,11 @@ void turn_left_rgb24(const BYTE *srcp, BYTE *dstp, int src_rowsize, int height, void turn_left_rgb32_c(const BYTE *srcp, BYTE *dstp, int src_rowsize, int height, int src_pitch, int dst_pitch); void turn_left_rgb32_sse2(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch); +void turn_left_rgb48(const BYTE *srcp, BYTE *dstp, int src_rowsize, int height, int src_pitch, int dst_pitch); + +void turn_left_rgb64_c(const BYTE *srcp, BYTE *dstp, int src_rowsize, int height, int src_pitch, int dst_pitch); +void turn_left_rgb64_sse2(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch); + void turn_right_plane_8_c(const BYTE *srcp, BYTE *dstp, int src_rowsize, int height, int src_pitch, int dst_pitch); void turn_right_plane_8_sse2(const BYTE* srcp, BYTE* dstp, int src_rowsize, int srcHeight, int src_pitch, int dst_pitch); @@ -93,4 +98,9 @@ void turn_right_rgb24(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_hei void turn_right_rgb32_c(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch); void turn_right_rgb32_sse2(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch); +void turn_right_rgb48(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch); + +void turn_right_rgb64_c(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch); +void turn_right_rgb64_sse2(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch); + #endif // _AVS_TURN_H From 3e0456ae7f1f0b4fd6bc9f12a7044c150151c096 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Tue, 23 Aug 2016 16:48:55 +0200 Subject: [PATCH 002/120] ConvertTo8/16/Float: true 10-12-14 bit range support. New parameters. --- avs_core/convert/convert.cpp | 449 +++++++++++++++++++++++------- avs_core/convert/convert_planar.h | 16 +- 2 files changed, 356 insertions(+), 109 deletions(-) diff --git a/avs_core/convert/convert.cpp b/avs_core/convert/convert.cpp index 12f442042..355dd1422 100644 --- a/avs_core/convert/convert.cpp +++ b/avs_core/convert/convert.cpp @@ -64,9 +64,9 @@ extern const AVSFunction Convert_filters[] = { // matrix can be "rec601", { "ConvertToYUV420", BUILTIN_FUNC_PREFIX, "c[interlaced]b[matrix]s[ChromaInPlacement]s[chromaresample]s[ChromaOutPlacement]s", ConvertToPlanarGeneric::CreateYUV420}, { "ConvertToYUV422", BUILTIN_FUNC_PREFIX, "c[interlaced]b[matrix]s[ChromaInPlacement]s[chromaresample]s", ConvertToPlanarGeneric::CreateYUV422}, { "ConvertToYUV444", BUILTIN_FUNC_PREFIX, "c[interlaced]b[matrix]s[ChromaInPlacement]s[chromaresample]s", ConvertToPlanarGeneric::CreateYUV444}, - { "ConvertTo8bit", BUILTIN_FUNC_PREFIX, "c[scale]f[dither]i", ConvertTo8bit::Create}, - { "ConvertTo16bit", BUILTIN_FUNC_PREFIX, "c[scale]f[dither]i[bits]i[modifyrange]b", ConvertTo16bit::Create}, - { "ConvertToFloat", BUILTIN_FUNC_PREFIX, "c[scale]f", ConvertToFloat::Create}, + { "ConvertTo8bit", BUILTIN_FUNC_PREFIX, "c[truerange]b[dither]i[scale]f", ConvertTo8bit::Create}, + { "ConvertTo16bit", BUILTIN_FUNC_PREFIX, "c[bits]i[truerange]b[dither]i[scale]f", ConvertTo16bit::Create}, + { "ConvertToFloat", BUILTIN_FUNC_PREFIX, "c[truerange]b[scale]f", ConvertToFloat::Create}, { 0 } }; @@ -746,7 +746,8 @@ AVSValue __cdecl ConvertToYV12::Create(AVSValue args, void*, IScriptEnvironment* ****** Bitdepth conversions ***** **********************************/ -static void convert_rgb_16_to_8_c(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range) +template +static void convert_rgb_uint16_to_8_c(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range) { const uint16_t *srcp0 = reinterpret_cast(srcp); src_pitch = src_pitch / sizeof(uint16_t); @@ -755,7 +756,15 @@ static void convert_rgb_16_to_8_c(const BYTE *srcp, BYTE *dstp, int src_rowsize, { for (int x = 0; x < src_width; x++) { - dstp[x] = srcp0[x] / 257; // RGB: full range 0..255 <-> 0..65535 + // test + if(sourcebits==16) + dstp[x] = srcp0[x] / 257; // RGB: full range 0..255 <-> 0..65535 (*255 / 65535) + else if (sourcebits==14) + dstp[x] = srcp0[x] * 255 / 16383; // RGB: full range 0..255 <-> 0..16384-1 + else if (sourcebits==12) + dstp[x] = srcp0[x] * 255 / 4095; // RGB: full range 0..255 <-> 0..4096-1 + else if (sourcebits==10) + dstp[x] = srcp0[x] * 255 / 1023; // RGB: full range 0..255 <-> 0..1024-1 } dstp += dst_pitch; srcp0 += src_pitch; @@ -764,7 +773,9 @@ static void convert_rgb_16_to_8_c(const BYTE *srcp, BYTE *dstp, int src_rowsize, // YUV conversions (bit shifts) // BitDepthConvFuncPtr -static void convert_16_to_8_c(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range) +// Conversion from 16-14-12-10 to 8 bits (bitshift: 8-6-4-2) +template +static void convert_uint16_to_8_c(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range) { const uint16_t *srcp0 = reinterpret_cast(srcp); src_pitch = src_pitch / sizeof(uint16_t); @@ -773,15 +784,15 @@ static void convert_16_to_8_c(const BYTE *srcp, BYTE *dstp, int src_rowsize, int { for (int x = 0; x < src_width; x++) { - dstp[x] = srcp0[x] >> 8; // no dithering, no range conversion, simply use msb + dstp[x] = srcp0[x] >> (sourcebits-8); // no dithering, no range conversion, simply shift } dstp += dst_pitch; srcp0 += src_pitch; } } -// float to 8 bit, float to 16 bit -template +// float to 8 bit, float to 10/12/14/16 bit +template static void convert_32_to_uintN_c(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range) { const float *srcp0 = reinterpret_cast(srcp); @@ -792,11 +803,7 @@ static void convert_32_to_uintN_c(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_width = src_rowsize / sizeof(float); - float max_dst_pixelvalue; - if(sizeof(pixel_t)==1) - max_dst_pixelvalue = 255.0f; - if(sizeof(pixel_t)==2) - max_dst_pixelvalue = 65535.0f; + float max_dst_pixelvalue = (float)((1< +static void convert_rgb_8_to_uint16_c(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range) { const uint8_t *srcp0 = reinterpret_cast(srcp); uint16_t *dstp0 = reinterpret_cast(dstp); @@ -827,14 +835,24 @@ static void convert_rgb_8_to_16_c(const BYTE *srcp, BYTE *dstp, int src_rowsize, { for (int x = 0; x < src_width; x++) { - dstp0[x] = srcp0[x] * 257; // hope that compiler recognizes n*256 +n + // test + if(targetbits==16) + dstp0[x] = srcp0[x] * 257; // RGB: full range 0..255 <-> 0..65535 (257 = 65535 / 255) + else if (targetbits==14) + dstp0[x] = srcp0[x] * 16383 / 255; // RGB: full range 0..255 <-> 0..16384-1 + else if (targetbits==12) + dstp0[x] = srcp0[x] * 4095 / 255; // RGB: full range 0..255 <-> 0..4096-1 + else if (targetbits==10) + dstp0[x] = srcp0[x] * 1023 / 255; // RGB: full range 0..255 <-> 0..1024-1 } dstp0 += dst_pitch; srcp0 += src_pitch; } } -static void convert_8_to_16_c(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range) +// YUV: bit shift 8 to 10-12-14-16 bits +template +static void convert_8_to_uint16_c(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range) { const uint8_t *srcp0 = reinterpret_cast(srcp); uint16_t *dstp0 = reinterpret_cast(dstp); @@ -848,15 +866,72 @@ static void convert_8_to_16_c(const BYTE *srcp, BYTE *dstp, int src_rowsize, int { for (int x = 0; x < src_width; x++) { - dstp0[x] = srcp0[x] << 8; + dstp0[x] = srcp0[x] << (targetbits-8); } dstp0 += dst_pitch; srcp0 += src_pitch; } } -// 8 bit to float, 16 bit to float -template +// RGB full range: 10-12-14 <=> 16 bits +template +static void convert_rgb_uint16_to_uint16_c(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range) +{ + const uint16_t *srcp0 = reinterpret_cast(srcp); + uint16_t *dstp0 = reinterpret_cast(dstp); + + src_pitch = src_pitch / sizeof(uint16_t); + dst_pitch = dst_pitch / sizeof(uint16_t); + + const int src_width = src_rowsize / sizeof(uint16_t); + + const uint16_t source_max = (1 << sourcebits) - 1; + const uint16_t target_max = (1 << targetbits) - 1; + + for(int y=0; y 16 bits +template +static void convert_uint16_to_uint16_c(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range) +{ + const uint16_t *srcp0 = reinterpret_cast(srcp); + uint16_t *dstp0 = reinterpret_cast(dstp); + + src_pitch = src_pitch / sizeof(uint16_t); + dst_pitch = dst_pitch / sizeof(uint16_t); + + const int src_width = src_rowsize / sizeof(uint16_t); + + for(int y=0; y> shiftbits; // reduce range + } + dstp0 += dst_pitch; + srcp0 += src_pitch; + } +} + +// 8 bit to float, 16/14/12/10 bits to float +template static void convert_uintN_to_float_c(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range) { const pixel_t *srcp0 = reinterpret_cast(srcp); @@ -867,11 +942,7 @@ static void convert_uintN_to_float_c(const BYTE *srcp, BYTE *dstp, int src_rowsi int src_width = src_rowsize / sizeof(pixel_t); - float max_src_pixelvalue; - if (sizeof(pixel_t) == 1) - max_src_pixelvalue = 255.0; - if (sizeof(pixel_t) == 2) - max_src_pixelvalue = 65535.0; + float max_src_pixelvalue = (float)((1< 0..float_range @@ -886,22 +957,47 @@ static void convert_uintN_to_float_c(const BYTE *srcp, BYTE *dstp, int src_rowsi } } -ConvertTo8bit::ConvertTo8bit(PClip _child, const float _float_range, const int _dither_mode, IScriptEnvironment* env) : - GenericVideoFilter(_child), float_range(_float_range), dither_mode(_dither_mode) +ConvertTo8bit::ConvertTo8bit(PClip _child, const float _float_range, const int _dither_mode, const int _bitdepth, const int _truerange, IScriptEnvironment* env) : + GenericVideoFilter(_child), float_range(_float_range), dither_mode(_dither_mode), source_bitdepth(_bitdepth), truerange(_truerange) { - if (vi.ComponentSize() == 2) // 16->8 bit + if (vi.ComponentSize() == 2) // 16(,14,12,10)->8 bit { // for RGB scaling is not shift by 8 as in YUV but 0..65535->0..255 - if (vi.IsRGB48() || vi.IsRGB64() || vi.IsPlanarRGB() || vi.IsPlanarRGBA()) - conv_function = convert_rgb_16_to_8_c; - else if (vi.IsYUV() || vi.IsYUVA()) - conv_function = convert_16_to_8_c; - else + if (vi.IsRGB48() || vi.IsRGB64()) + conv_function = convert_rgb_uint16_to_8_c<16>; + else if (vi.IsPlanarRGB() || vi.IsPlanarRGBA()) { + if(truerange) { + switch(source_bitdepth) + { + case 10: conv_function = convert_rgb_uint16_to_8_c<10>; break; + case 12: conv_function = convert_rgb_uint16_to_8_c<12>; break; + case 14: conv_function = convert_rgb_uint16_to_8_c<14>; break; + case 16: conv_function = convert_rgb_uint16_to_8_c<16>; break; + default: env->ThrowError("ConvertTo8bit: invalid source bitdepth"); + } + } else { + conv_function = convert_rgb_uint16_to_8_c<16>; + } + } else if (vi.IsYUV() || vi.IsYUVA()) + { + if(truerange) { + switch(source_bitdepth) + { + case 10: conv_function = convert_uint16_to_8_c<10>; break; + case 12: conv_function = convert_uint16_to_8_c<12>; break; + case 14: conv_function = convert_uint16_to_8_c<14>; break; + case 16: conv_function = convert_uint16_to_8_c<16>; break; + default: env->ThrowError("ConvertTo8bit: invalid source bitdepth"); + } + } else { + conv_function = convert_uint16_to_8_c<16>; // always convet from 16 bit scale + } + } else env->ThrowError("ConvertTo8bit: unsupported color space"); } else if (vi.ComponentSize() == 4) // 32->8 bit { - conv_function = convert_32_to_uintN_c; + conv_function = convert_32_to_uintN_c; } else env->ThrowError("ConvertTo8bit: unsupported bit depth"); @@ -931,6 +1027,8 @@ AVSValue __cdecl ConvertTo8bit::Create(AVSValue args, void*, IScriptEnvironment* const VideoInfo &vi = clip->GetVideoInfo(); + // c[truerange]b[dither]i[scale]f, + if (!vi.IsPlanar() && !vi.IsRGB()) env->ThrowError("ConvertTo8bit: Can only convert from Planar YUV/RGB or packed RGB."); @@ -952,7 +1050,17 @@ AVSValue __cdecl ConvertTo8bit::Create(AVSValue args, void*, IScriptEnvironment* // dither parameter rfu int dither_type = args[2].AsInt(-1); - return new ConvertTo8bit(clip, float_range, dither_type, env); + if ((!vi.IsPlanar() || vi.ComponentSize() != 2) && args[3].Defined()) + env->ThrowError("ConvertTo8bit: truerange specified for non-16bit or non-planar source"); + + int source_bitdepth = 16; // n/a + if (vi.IsPlanar() && vi.ComponentSize() == 2) + source_bitdepth = vi.BitsPerComponent(); + + // when converting from 10-16 bit formats, truerange=false indicates bitdepth of 16 bits regardless of the 10-12-14 bit format + int assume_truerange = args[3].AsBool(true); // n/a for non planar formats + + return new ConvertTo8bit(clip, float_range, dither_type, source_bitdepth, assume_truerange, env); } PVideoFrame __stdcall ConvertTo8bit::GetFrame(int n, IScriptEnvironment* env) { @@ -981,37 +1089,146 @@ PVideoFrame __stdcall ConvertTo8bit::GetFrame(int n, IScriptEnvironment* env) { return dst; } -// 16 bit -ConvertTo16bit::ConvertTo16bit(PClip _child, const float _float_range, const int _dither_mode, const int _bitdepth, bool _modify_range, IScriptEnvironment* env) : - GenericVideoFilter(_child), float_range(_float_range), dither_mode(_dither_mode), bitdepth(_bitdepth), modify_range(_modify_range) +// Conversion to uint16_t targets +// planar YUV(A) and RGB(A): +// from 8 bit -> 10/12/14/16 with strict range expansion or expansion to 16 +// from 10/12/14 -> 16 bit with strict source range (expansion from 10/12/14 to 16 bit) or just casting pixel_type +// from 16 bit -> 10/12/14 bit with strict target range (reducing range from 16 bit to 10/12/14 bits) or just casting pixel_type +// from float -> 10/12/14/16 with strict range expansion or expansion to 16 +// packed RGB: +// RGB24->RGB48, RGB32->RGB64 +ConvertTo16bit::ConvertTo16bit(PClip _child, const float _float_range, const int _dither_mode, const int _source_bitdepth, const int _target_bitdepth, bool _truerange, IScriptEnvironment* env) : + GenericVideoFilter(_child), float_range(_float_range), dither_mode(_dither_mode), source_bitdepth(_source_bitdepth), target_bitdepth(_target_bitdepth), truerange(_truerange) { - change_only_format = false; + change_only_format = false; - if (vi.ComponentSize() == 1) // 8->16 bit - { - // for RGB scaling is not shift by 8 as in YUV but 0..65535->0..255 - if (vi.IsPlanarRGB() || vi.IsPlanarRGBA() || vi.IsRGB24() || vi.IsRGB32()) - conv_function = convert_rgb_8_to_16_c; - else if(vi.IsYUV() || vi.IsYUVA()) - conv_function = convert_8_to_16_c; - else - env->ThrowError("ConvertTo16bit: unsupported color space"); - } else if (vi.ComponentSize() == 2) - { - // 10/12/14 -> 16 bit / 16 bit -> 10/12/14 bit - // range reducing or expansion, or just overriding the pixel_type - if(modify_range) - env->ThrowError("ConvertTo16bit: effective bit depth conversion within 16 bit is not yet implemented"); - else - change_only_format = true; - } else if (vi.ComponentSize() == 4) // 32->16 bit - { - conv_function = convert_32_to_uintN_c; - } else - env->ThrowError("ConvertTo16bit: unsupported bit depth"); + if (vi.ComponentSize() == 1) // 8->10-12-14-16 bit + { + // RGB scaling is not shift by 8 as in YUV but like 0..255->0..65535 + if (vi.IsRGB24() || vi.IsRGB32()) + conv_function = convert_rgb_8_to_uint16_c<16>; + else if (vi.IsPlanarRGB() || vi.IsPlanarRGBA()) { + if (truerange) + { + switch (target_bitdepth) + { + case 10: conv_function = convert_rgb_8_to_uint16_c<10>; break; + case 12: conv_function = convert_rgb_8_to_uint16_c<12>; break; + case 14: conv_function = convert_rgb_8_to_uint16_c<14>; break; + case 16: conv_function = convert_rgb_8_to_uint16_c<16>; break; + default: env->ThrowError("ConvertTo16bit: unsupported bit depth"); + } + } + else { + conv_function = convert_rgb_8_to_uint16_c<16>; + } + } + else if (vi.IsYUV() || vi.IsYUVA()) { + if (truerange) + { + switch (target_bitdepth) + { + case 10: conv_function = convert_8_to_uint16_c<10>; break; + case 12: conv_function = convert_8_to_uint16_c<12>; break; + case 14: conv_function = convert_8_to_uint16_c<14>; break; + case 16: conv_function = convert_8_to_uint16_c<16>; break; + default: env->ThrowError("ConvertTo16bit: unsupported bit depth"); + } + } + else { + conv_function = convert_8_to_uint16_c<16>; // always 16 bit scale + } + } + else + env->ThrowError("ConvertTo16bit: unsupported color space"); + } + else if (vi.ComponentSize() == 2) + { + // 10/12/14 -> 16 bit or 16 bit -> 10/12/14 bit + // range reducing or expansion (truerange=true), or just overriding the pixel_type, keeping scale at 16 bits + if (truerange) { + // invalid combinations were already checked + if (vi.IsPlanarRGB() || vi.IsPlanarRGBA()) { + if (source_bitdepth > target_bitdepth) // reduce range + { + if (source_bitdepth == 16) // 16->10/12/14 keep full range + switch (target_bitdepth) + { + case 10: conv_function = convert_rgb_uint16_to_uint16_c<16, 10>; break; + case 12: conv_function = convert_rgb_uint16_to_uint16_c<16, 12>; break; + case 14: conv_function = convert_rgb_uint16_to_uint16_c<16, 14>; break; + } + else if (source_bitdepth == 14) // 14->10/12 keep full range + switch (target_bitdepth) + { + case 10: conv_function = convert_rgb_uint16_to_uint16_c<14, 10>; break; + case 12: conv_function = convert_rgb_uint16_to_uint16_c<14, 12>; break; + } + else if (source_bitdepth == 12) // 14->10/12 keep full range + switch (target_bitdepth) + { + case 10: conv_function = convert_rgb_uint16_to_uint16_c<12, 10>; break; + } + } else {// expand + if (target_bitdepth == 16) // 10/12/14->16 keep full range + switch (source_bitdepth) + { + case 10: conv_function = convert_rgb_uint16_to_uint16_c<10, 16>; break; + case 12: conv_function = convert_rgb_uint16_to_uint16_c<12, 16>; break; + case 14: conv_function = convert_rgb_uint16_to_uint16_c<14, 16>; break; + } + else if (target_bitdepth == 14) // 10/12->14 keep full range + switch (source_bitdepth) + { + case 10: conv_function = convert_rgb_uint16_to_uint16_c<10, 14>; break; + case 12: conv_function = convert_rgb_uint16_to_uint16_c<12, 14>; break; + } + else if (target_bitdepth == 12) // 10->12 keep full range + switch (source_bitdepth) + { + case 10: conv_function = convert_rgb_uint16_to_uint16_c<10, 12>; break; + } + } + } + else if (vi.IsYUV() || vi.IsYUVA()) { + if (source_bitdepth > target_bitdepth) // reduce range 16->14/12/10 14->12/10 12->10. template: bitshift + switch (source_bitdepth - target_bitdepth) + { + case 2: conv_function = convert_uint16_to_uint16_c; break; + case 4: conv_function = convert_uint16_to_uint16_c; break; + case 6: conv_function = convert_uint16_to_uint16_c; break; + } + else // expand range + switch (target_bitdepth - source_bitdepth) + { + case 2: conv_function = convert_uint16_to_uint16_c; break; + case 4: conv_function = convert_uint16_to_uint16_c; break; + case 6: conv_function = convert_uint16_to_uint16_c; break; + } + } + } + else { // truerange==false + change_only_format = true; + } + } + else if (vi.ComponentSize() == 4) // 32->16 bit + { + if (truerange) { + switch(target_bitdepth) + { + case 10: conv_function = convert_32_to_uintN_c; break; + case 12: conv_function = convert_32_to_uintN_c; break; + case 14: conv_function = convert_32_to_uintN_c; break; + case 16: conv_function = convert_32_to_uintN_c; break; + } + } else { + conv_function = convert_32_to_uintN_c; + } + } else + env->ThrowError("ConvertTo16bit: unsupported bit depth"); if (vi.NumComponents() == 1) { - switch(bitdepth) + switch(target_bitdepth) { case 10: vi.pixel_type = VideoInfo::CS_Y10; break; case 12: vi.pixel_type = VideoInfo::CS_Y12; break; @@ -1021,7 +1238,7 @@ ConvertTo16bit::ConvertTo16bit(PClip _child, const float _float_range, const int env->ThrowError("ConvertTo16bit: unsupported effective bit depth"); } } else if (vi.Is420()) { - switch(bitdepth) + switch(target_bitdepth) { case 10: vi.pixel_type = vi.IsYUVA() ? VideoInfo::CS_YUVA420P10 : VideoInfo::CS_YUV420P10; break; case 12: vi.pixel_type = vi.IsYUVA() ? VideoInfo::CS_YUVA420P12 : VideoInfo::CS_YUV420P12; break; @@ -1031,7 +1248,7 @@ ConvertTo16bit::ConvertTo16bit(PClip _child, const float _float_range, const int env->ThrowError("ConvertTo16bit: unsupported effective bit depth"); } } else if (vi.Is422()) { - switch(bitdepth) + switch(target_bitdepth) { case 10: vi.pixel_type = vi.IsYUVA() ? VideoInfo::CS_YUVA422P10 : VideoInfo::CS_YUV422P10; break; case 12: vi.pixel_type = vi.IsYUVA() ? VideoInfo::CS_YUVA422P12 : VideoInfo::CS_YUV422P12; break; @@ -1041,7 +1258,7 @@ ConvertTo16bit::ConvertTo16bit(PClip _child, const float _float_range, const int env->ThrowError("ConvertTo16bit: unsupported effective bit depth"); } } else if (vi.Is444()) { - switch(bitdepth) + switch(target_bitdepth) { case 10: vi.pixel_type = vi.IsYUVA() ? VideoInfo::CS_YUVA444P10 : VideoInfo::CS_YUV444P10; break; case 12: vi.pixel_type = vi.IsYUVA() ? VideoInfo::CS_YUVA444P12 : VideoInfo::CS_YUV444P12; break; @@ -1051,7 +1268,7 @@ ConvertTo16bit::ConvertTo16bit(PClip _child, const float _float_range, const int env->ThrowError("ConvertTo16bit: unsupported effective bit depth"); } } else if (vi.IsPlanarRGB()) { - switch(bitdepth) + switch(target_bitdepth) { case 10: vi.pixel_type = VideoInfo::CS_RGBP10; break; case 12: vi.pixel_type = VideoInfo::CS_RGBP12; break; @@ -1061,7 +1278,7 @@ ConvertTo16bit::ConvertTo16bit(PClip _child, const float _float_range, const int env->ThrowError("ConvertTo16bit: unsupported effective bit depth"); } } else if (vi.IsPlanarRGBA()) { - switch(bitdepth) + switch(target_bitdepth) { case 10: vi.pixel_type = VideoInfo::CS_RGBAP10; break; case 12: vi.pixel_type = VideoInfo::CS_RGBAP12; break; @@ -1071,15 +1288,15 @@ ConvertTo16bit::ConvertTo16bit(PClip _child, const float _float_range, const int env->ThrowError("ConvertTo16bit: unsupported effective bit depth"); } } else if(vi.IsRGB24()) { - if(bitdepth == 16) + if(target_bitdepth == 16) vi.pixel_type = VideoInfo::CS_BGR48; else - env->ThrowError("ConvertTo16bit: unsupported effective bit depth"); + env->ThrowError("ConvertTo16bit: unsupported bit depth"); } else if(vi.IsRGB32()) { - if(bitdepth == 16) + if(target_bitdepth == 16) vi.pixel_type = VideoInfo::CS_BGR64; else - env->ThrowError("ConvertTo16bit: unsupported effective bit depth"); + env->ThrowError("ConvertTo16bit: unsupported bit depth"); } else env->ThrowError("ConvertTo16bit: unsupported color space"); } @@ -1087,33 +1304,46 @@ ConvertTo16bit::ConvertTo16bit(PClip _child, const float _float_range, const int AVSValue __cdecl ConvertTo16bit::Create(AVSValue args, void*, IScriptEnvironment* env) { PClip clip = args[0].AsClip(); - //0 1 2 3 4 - //c[scale]f[dither]i[bitdepth]i[modifyrange]b + //0 1 2 3 4 + //c[bits]i[truerange]b[dither]i[scale]f const VideoInfo &vi = clip->GetVideoInfo(); + if (!vi.IsPlanar() && !vi.IsRGB24() && !vi.IsRGB32()) + env->ThrowError("ConvertTo16bit: Can only convert from Planar YUV/RGB or packed RGB."); + if (vi.ComponentSize() != 4 && args[4].Defined()) + env->ThrowError("ConvertTo16bit: Float range parameter not allowed for non float source"); // float range parameter - int target_bitdepth = args[3].AsInt(16); // default: 16 bit. can override with 10/12/14 bits - bool modify_range = args[4].AsBool(false); // for 10->16 conversion + float float_range = (float)args[4].AsFloat(1.0f); - if (vi.ComponentSize() == 2 && vi.BitsPerComponent() == target_bitdepth) - return clip; // 16 bit -> 16 bit: no conversion 16->10 e.g. conversion option with range reducing + // when converting from/true 10-16 bit formats, truerange=false indicates bitdepth of 16 bits regardless of the 10-12-14 bit format + bool assume_truerange = args[2].AsBool(true); // n/a for non planar formats + int target_bitdepth = args[1].AsInt(16); // default: 16 bit. can override with 10/12/14 bits + int source_bitdepth = vi.BitsPerComponent(); - if (!vi.IsPlanar() && !vi.IsRGB24() && !vi.IsRGB32()) - env->ThrowError("ConvertTo16bit: Can only convert from Planar YUV/RGB or packed RGB."); + if(target_bitdepth!=10 && target_bitdepth!=12 && target_bitdepth!=14 && target_bitdepth!=16) + env->ThrowError("ConvertTo16bit: invalid bit depth"); + if (!vi.IsPlanar() && args[2].Defined()) + env->ThrowError("ConvertTo16bit: truerange specified for non-planar source"); + if (vi.IsRGB24() || vi.IsRGB32()) { if (target_bitdepth != 16) env->ThrowError("ConvertTo16bit: only 16 bit allowed for packed RGB"); - if (modify_range) - env->ThrowError("ConvertTo16bit: range reducing parameter is not allowed for packed RGB"); } - if (vi.ComponentSize() != 4 && args[1].Defined()) - env->ThrowError("ConvertTo16bit: Float range parameter not allowed for non float source"); - - // float range parameter - float float_range = (float)args[1].AsFloat(1.0f); + // 10/12/14/16 -> 10/12/14/16 + if (vi.ComponentSize() == 2) + { + if((source_bitdepth == target_bitdepth) && assume_truerange) // 10->10 .. 16->16 + return clip; + // source_10_bit.ConvertTo16bit(truerange=true) : upscale range + // source_10_bit.ConvertTo16bit(truerange=false) : leaves data, only format conversion + // source_10_bit.ConvertTo16bit(bits=12,truerange=true) : upscale range from 10 to 12 + // source_10_bit.ConvertTo16bit(bits=12,truerange=false) : leaves data, only format conversion + // source_16_bit.ConvertTo16bit(bits=10, truerange=true) : downscale range + // source_16_bit.ConvertTo16bit(bits=10, truerange=false) : leaves data, only format conversion + } if (vi.ComponentSize() == 4) { if(float_range<=0.0) @@ -1122,9 +1352,9 @@ AVSValue __cdecl ConvertTo16bit::Create(AVSValue args, void*, IScriptEnvironment } // dither parameter, rfu - int dither_type = args[2].AsInt(-1); + int dither_type = args[3].AsInt(-1); - return new ConvertTo16bit(clip, float_range, dither_type, target_bitdepth, modify_range, env); + return new ConvertTo16bit(clip, float_range, dither_type, source_bitdepth, target_bitdepth, assume_truerange, env); } PVideoFrame __stdcall ConvertTo16bit::GetFrame(int n, IScriptEnvironment* env) { @@ -1165,18 +1395,29 @@ PVideoFrame __stdcall ConvertTo16bit::GetFrame(int n, IScriptEnvironment* env) { } - // float 32 bit -ConvertToFloat::ConvertToFloat(PClip _child, const float _float_range, const int _dither_mode, IScriptEnvironment* env) : - GenericVideoFilter(_child), float_range(_float_range), dither_mode(_dither_mode) +ConvertToFloat::ConvertToFloat(PClip _child, const float _float_range, const int _source_bitdepth, bool _truerange, IScriptEnvironment* env) : + GenericVideoFilter(_child), float_range(_float_range), source_bitdepth(_source_bitdepth), truerange(_truerange) { if (vi.ComponentSize() == 1) // 8->32 bit { - conv_function = convert_uintN_to_float_c; + conv_function = convert_uintN_to_float_c; } else if (vi.ComponentSize() == 2) // 16->32 bit { - conv_function = convert_uintN_to_float_c; + if (vi.IsPlanar() && truerange) + { + switch (source_bitdepth) + { + case 10: conv_function = convert_uintN_to_float_c; break; + case 12: conv_function = convert_uintN_to_float_c; break; + case 14: conv_function = convert_uintN_to_float_c; break; + case 16: conv_function = convert_uintN_to_float_c; break; + default: env->ThrowError("ConvertToFloat: unsupported bit depth"); + } + } else { + conv_function = convert_uintN_to_float_c; + } } else env->ThrowError("ConvertToFloat: unsupported bit depth"); @@ -1201,26 +1442,28 @@ AVSValue __cdecl ConvertToFloat::Create(AVSValue args, void*, IScriptEnvironment PClip clip = args[0].AsClip(); const VideoInfo &vi = clip->GetVideoInfo(); + //0 1 2 + //c[truerange]b[scale]f if (!vi.IsPlanar()) - env->ThrowError("ConvertToFloat: Can only convert from Planar YUV."); + env->ThrowError("ConvertToFloat: Can only convert from Planar YUV(A) or RGB(A)."); if (vi.ComponentSize() == 4) return clip; // 32 bit -> 32 bit: no conversion // float range parameter - float float_range = (float)args[1].AsFloat(1.0f); + float float_range = (float)args[2].AsFloat(1.0f); - if (vi.ComponentSize() == 4) { - if(float_range<=0.0) + if(float_range<=0.0) env->ThrowError("ConvertToFloat: Float range parameter cannot be <= 0"); - // other checkings - } - // dither parameter - int dither_type = args[2].AsInt(-1); + bool assume_truerange = args[1].AsBool(true); // n/a for non planar formats + int source_bitdepth = vi.BitsPerComponent(); + + if (vi.ComponentSize() != 2 && args[1].Defined()) + env->ThrowError("ConvertToFloat: truerange specified for 8 bit source"); - return new ConvertToFloat(clip, float_range, dither_type, env); + return new ConvertToFloat(clip, float_range, source_bitdepth, assume_truerange, env); } PVideoFrame __stdcall ConvertToFloat::GetFrame(int n, IScriptEnvironment* env) { diff --git a/avs_core/convert/convert_planar.h b/avs_core/convert/convert_planar.h index 6eefac6a2..e8ff5c14e 100644 --- a/avs_core/convert/convert_planar.h +++ b/avs_core/convert/convert_planar.h @@ -175,7 +175,7 @@ typedef void (*BitDepthConvFuncPtr)(const BYTE *srcp, BYTE *dstp, int src_rowsiz class ConvertTo8bit : public GenericVideoFilter { public: - ConvertTo8bit(PClip _child, const float _float_range, const int _dither_mode, IScriptEnvironment* env); + ConvertTo8bit(PClip _child, const float _float_range, const int _dither_mode, const int _source_bitdepth, const int _truerange, IScriptEnvironment* env); PVideoFrame __stdcall GetFrame(int n,IScriptEnvironment* env); int __stdcall SetCacheHints(int cachehints, int frame_range) override { @@ -188,12 +188,14 @@ class ConvertTo8bit : public GenericVideoFilter float float_range; int dither_mode; int pixelsize; + int source_bitdepth; + int truerange; }; class ConvertTo16bit : public GenericVideoFilter { public: - ConvertTo16bit(PClip _child, const float _float_range, const int _dither_mode, const int _bitdepth, bool _modify_range, IScriptEnvironment* env); + ConvertTo16bit(PClip _child, const float _float_range, const int _dither_mode, const int _source_bitdepth, const int _target_bitdepth, bool _truerange, IScriptEnvironment* env); PVideoFrame __stdcall GetFrame(int n,IScriptEnvironment* env); int __stdcall SetCacheHints(int cachehints, int frame_range) override { @@ -206,15 +208,16 @@ class ConvertTo16bit : public GenericVideoFilter float float_range; int dither_mode; int pixelsize; - int bitdepth; // effective 10/12/14/16 bits within the 2 byte container - bool modify_range; // if 16->10 range reducing or e.g. 14->16 bit range expansion needed + int source_bitdepth; // effective 10/12/14/16 bits within the 2 byte container + int target_bitdepth; // effective 10/12/14/16 bits within the 2 byte container + bool truerange; // if 16->10 range reducing or e.g. 14->16 bit range expansion needed bool change_only_format; // if 16->10 bit affects only pixel_type }; class ConvertToFloat : public GenericVideoFilter { public: - ConvertToFloat(PClip _child, const float _float_range, const int _dither_mode, IScriptEnvironment* env); + ConvertToFloat(PClip _child, const float _float_range, const int _source_bitdepth, bool _truerange, IScriptEnvironment* env); PVideoFrame __stdcall GetFrame(int n,IScriptEnvironment* env); int __stdcall SetCacheHints(int cachehints, int frame_range) override { @@ -225,7 +228,8 @@ class ConvertToFloat : public GenericVideoFilter private: BitDepthConvFuncPtr conv_function; float float_range; - int dither_mode; + int source_bitdepth; // effective 10/12/14/16 bits within the 2 byte container + bool truerange; // if 16->10 range reducing or e.g. 14->16 bit range expansion needed int pixelsize; }; From b09bc11ad347aaebe592a848f3d3ea19d7c68430 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Tue, 23 Aug 2016 16:51:07 +0200 Subject: [PATCH 003/120] New params for Info(): c[font]s[size]f[text_color]i[halo_color]i. Fix hardcoded dimensions. --- avs_core/filters/text-overlay.cpp | 101 ++++++++++++++++++++++-------- avs_core/filters/text-overlay.h | 6 +- 2 files changed, 80 insertions(+), 27 deletions(-) diff --git a/avs_core/filters/text-overlay.cpp b/avs_core/filters/text-overlay.cpp index 7002ac09a..47d1d087c 100644 --- a/avs_core/filters/text-overlay.cpp +++ b/avs_core/filters/text-overlay.cpp @@ -69,7 +69,7 @@ extern const AVSFunction Text_filters[] = { "c[offset_f]i[x]f[y]f[font]s[size]f[text_color]i[halo_color]i[font_width]f[font_angle]f", ShowSMPTE::CreateTime }, - { "Info", BUILTIN_FUNC_PREFIX, "c", FilterInfo::Create }, // clip + { "Info", BUILTIN_FUNC_PREFIX, "c[font]s[size]f[text_color]i[halo_color]i", FilterInfo::Create }, // clip { "Subtitle",BUILTIN_FUNC_PREFIX, "cs[x]f[y]f[first_frame]i[last_frame]i[font]s[size]f[text_color]i[halo_color]i" @@ -747,8 +747,8 @@ ShowFrameNumber::ShowFrameNumber(PClip _child, bool _scroll, int _offset, int _x int _size, int _textcolor, int _halocolor, int font_width, int font_angle, IScriptEnvironment* env) : GenericVideoFilter(_child), scroll(_scroll), offset(_offset), x(_x), y(_y), size(_size), antialiaser(vi.width, vi.height, _fontname, _size, - vi.IsYUV() ? RGB2YUV(_textcolor) : _textcolor, - vi.IsYUV() ? RGB2YUV(_halocolor) : _halocolor, + vi.IsYUV() || vi.IsYUVA() ? RGB2YUV(_textcolor) : _textcolor, + vi.IsYUV() || vi.IsYUVA() ? RGB2YUV(_halocolor) : _halocolor, font_width, font_angle) { } @@ -827,8 +827,8 @@ ShowSMPTE::ShowSMPTE(PClip _child, double _rate, const char* offset, int _offset int _size, int _textcolor, int _halocolor, int font_width, int font_angle, IScriptEnvironment* env) : GenericVideoFilter(_child), x(_x), y(_y), antialiaser(vi.width, vi.height, _fontname, _size, - vi.IsYUV() ? RGB2YUV(_textcolor) : _textcolor, - vi.IsYUV() ? RGB2YUV(_halocolor) : _halocolor, + vi.IsYUV() || vi.IsYUVA() ? RGB2YUV(_textcolor) : _textcolor, + vi.IsYUV() || vi.IsYUVA() ? RGB2YUV(_halocolor) : _halocolor, font_width, font_angle) { int off_f, off_sec, off_min, off_hour; @@ -1020,8 +1020,8 @@ Subtitle::Subtitle( PClip _child, const char _text[], int _x, int _y, int _first int _font_width, int _font_angle, bool _interlaced ) : GenericVideoFilter(_child), antialiaser(0), text(_text), x(_x), y(_y), firstframe(_firstframe), lastframe(_lastframe), fontname(_fontname), size(_size), - textcolor(vi.IsYUV() ? RGB2YUV(_textcolor) : _textcolor), - halocolor(vi.IsYUV() ? RGB2YUV(_halocolor) : _halocolor), + textcolor(vi.IsYUV() || vi.IsYUVA() ? RGB2YUV(_textcolor) : _textcolor), + halocolor(vi.IsYUV() || vi.IsYUVA() ? RGB2YUV(_halocolor) : _halocolor), align(_align), spc(_spc), multiline(_multiline), lsp(_lsp), font_width(_font_width), font_angle(_font_angle), interlaced(_interlaced) { @@ -1194,10 +1194,12 @@ inline int CalcFontSize(int w, int h) ******* FilterInfo Filter ****** **********************************/ - -FilterInfo::FilterInfo( PClip _child) -: GenericVideoFilter(_child), vii(AdjustVi()), - antialiaser(vi.width, vi.height, "Courier New", CalcFontSize(vi.width, vi.height), vi.IsYUV() ? 0xD21092 : 0xFFFF00, vi.IsYUV() ? 0x108080 : 0) { +FilterInfo::FilterInfo( PClip _child, bool _font_override, const char _fontname[], int _size, int _textcolor, int _halocolor, IScriptEnvironment* env) +: GenericVideoFilter(_child), vii(AdjustVi()), font_override(_font_override), size(_size), + antialiaser(vi.width, vi.height, _fontname, size, + vi.IsYUV() || vi.IsYUVA() ? RGB2YUV(_textcolor) : _textcolor, + vi.IsYUV() || vi.IsYUVA() ? RGB2YUV(_halocolor) : _halocolor) +{ } @@ -1349,7 +1351,7 @@ PVideoFrame FilterInfo::GetFrame(int n, IScriptEnvironment* env) PVideoFrame frame = vii.HasVideo() ? child->GetFrame(n, env) : env->NewVideoFrame(vi); if ( !vii.HasVideo() ) { - memset(frame->GetWritePtr(), 0, frame->GetPitch()*frame->GetHeight()); // Blank frame + memset(frame->GetWritePtr(), 0, frame->GetPitch()*frame->GetHeight()); // Blank frame } HDC hdcAntialias = antialiaser.GetDC(); @@ -1357,9 +1359,8 @@ PVideoFrame FilterInfo::GetFrame(int n, IScriptEnvironment* env) const char* c_space = "Unknown"; const char* s_type = t_NONE; const char* s_parity; - char text[512]; - int tlen; - RECT r= { 32, 16, min(3440,vi.width*8), 900*2 }; + char text[1024]; + int tlen; if (vii.HasVideo()) { if (vii.IsRGB24()) c_space=t_RGB24; @@ -1451,6 +1452,7 @@ PVideoFrame FilterInfo::GetFrame(int n, IScriptEnvironment* env) "Parity: %s\n" // 35=9+26 "Video Pitch: %5u bytes.\n" // 25 "Has Audio: %s\n" // 15=12+3 +// "123456789012345678901234567890123456789012345678901234567890\n" // test , n, vii.num_frames , (cPosInMsecs/(60*60*1000)), (cPosInMsecs/(60*1000))%60 ,(cPosInMsecs/1000)%60, cPosInMsecs%1000, (vLenInMsecs/(60*60*1000)), (vLenInMsecs/(60*1000))%60 ,(vLenInMsecs/1000)%60, vLenInMsecs%1000 @@ -1498,14 +1500,57 @@ PVideoFrame FilterInfo::GetFrame(int n, IScriptEnvironment* env) tlen += 1; } tlen += _snprintf(text+tlen, sizeof(text)-tlen, - "CPU detected: %s\n" // 60=15+45 + "CPU detected: %s\n" // 60=15+45 max line length=60(?) , GetCpuMsg(env).c_str() // 442 ); + + + // So far RECT dimensions were hardcoded: RECT r = { 32, 16, min(3440,vi.width * 8), 900*2 }; + // More flexible way: get text extent + RECT r; + if(!font_override) + { + // To prevent slowish full MxN rendering, we calculate a dummy + // 1xN sized vertical and a Mx1 sized horizontal line extent + // Assuming that we are using fixed font (e.g. default Courier New) + std::string s = text; + size_t n = std::count(s.begin(), s.end(), '\n'); + // create dummy vertical string + std::string s_vert; + for (size_t i=0; i max_line) max_line = counter; + counter = 0; + } + } + std::string s_horiz = std::string(max_line > 0 ? max_line : 1, ' '); // M*spaces + RECT r0_h = { 0, 0, 100, 100 }; // for output + DrawText(hdcAntialias, s_horiz.c_str(), -1, &r0_h, DT_CALCRECT); + // and use the width and height dimensions from the two results + r = { 32, 16, min(32+(int)r0_h.right,vi.width * 8-1), min(16+int(r0_v.bottom), vi.height*8-1) }; // do not crop if larger font is used + } else { + // font was overridden, may not be fixed type + RECT r0 = { 0, 0, 100, 100 }; // do not crop if larger font is used + DrawText(hdcAntialias, text, -1, &r0, DT_CALCRECT); + r = { 32, 16, min(32+(int)r0.right,vi.width * 8 -1), min(16+int(r0.bottom), vi.height*8-1) }; + } + + // RECT r = { 32, 16, min(3440,vi.width * 8), 900*2 }; + // original code. Values possibly experimented Courier New size 18 + knowing max. text length/line count + DrawText(hdcAntialias, text, -1, &r, 0); GdiFlush(); - env->MakeWritable(&frame); + env->MakeWritable(&frame); frame->GetWritePtr(); // Bump sequence_number int dst_pitch = frame->GetPitch(); antialiaser.Apply(vi, &frame, dst_pitch ); @@ -1516,19 +1561,23 @@ PVideoFrame FilterInfo::GetFrame(int n, IScriptEnvironment* env) AVSValue __cdecl FilterInfo::Create(AVSValue args, void*, IScriptEnvironment* env) { + // 0 1 2 3 4 + // c[font]s[size]f[text_color]i[halo_color]i PClip clip = args[0].AsClip(); - return new FilterInfo(clip); + // new parameters 20160823 + const char* font = args[1].AsString("Courier New"); + int size = int(args[2].AsFloat(0) * 8 + 0.5); + if (!args[2].Defined()) + size = CalcFontSize(clip->GetVideoInfo().width, clip->GetVideoInfo().height); + const int text_color = args[3].AsInt(0xFFFF00); + const int halo_color = args[4].AsInt(0); + + return new FilterInfo(clip, args[1].Defined(), font, size, text_color, halo_color, env); + //return new FilterInfo(clip); } - - - - - - - /************************************ ******* Compare Filter ******* ***********************************/ @@ -2346,7 +2395,7 @@ bool GetTextBoundingBox( const char* text, const char* fontname, int size, bool void ApplyMessage( PVideoFrame* frame, const VideoInfo& vi, const char* message, int size, int textcolor, int halocolor, int bgcolor, IScriptEnvironment* env ) { - if (vi.IsYUV()) { + if (vi.IsYUV() || vi.IsYUVA()) { textcolor = RGB2YUV(textcolor); halocolor = RGB2YUV(halocolor); } diff --git a/avs_core/filters/text-overlay.h b/avs_core/filters/text-overlay.h index fabe898f8..004e0a230 100644 --- a/avs_core/filters/text-overlay.h +++ b/avs_core/filters/text-overlay.h @@ -175,7 +175,7 @@ class FilterInfo : public GenericVideoFilter **/ { public: - FilterInfo( PClip _child); + FilterInfo( PClip _child, bool _font_override, const char _fontname[], int _size, int _textcolor, int _halocolor, IScriptEnvironment* env); virtual ~FilterInfo(void); PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment* env); bool __stdcall GetParity(int n); @@ -191,6 +191,10 @@ class FilterInfo : public GenericVideoFilter const VideoInfo& AdjustVi(); const VideoInfo &vii; + + const int size; + const bool font_override; + Antialiaser antialiaser; }; From 06d02401a4f3e11d1cde816836218fa6bb785cc8 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Wed, 24 Aug 2016 18:47:47 +0200 Subject: [PATCH 004/120] ConvertToRGBxxx: 16 bit/float, RGB48/64,PlanarRGB(A), except PlanarRGBToPackedRGB --- avs_core/convert/convert.cpp | 107 +++++++++++-- avs_core/convert/convert.h | 6 +- avs_core/convert/convert_planar.cpp | 230 +++++++++++++++++++++++++--- avs_core/convert/convert_planar.h | 19 ++- avs_core/convert/convert_rgb.cpp | 127 +++++++++++++-- avs_core/convert/convert_rgb.h | 24 ++- 6 files changed, 456 insertions(+), 57 deletions(-) diff --git a/avs_core/convert/convert.cpp b/avs_core/convert/convert.cpp index 355dd1422..67eb45b0a 100644 --- a/avs_core/convert/convert.cpp +++ b/avs_core/convert/convert.cpp @@ -50,9 +50,12 @@ ********************************************************************/ extern const AVSFunction Convert_filters[] = { // matrix can be "rec601", rec709", "PC.601" or "PC.709" - { "ConvertToRGB", BUILTIN_FUNC_PREFIX, "c[matrix]s[interlaced]b[ChromaInPlacement]s[chromaresample]s", ConvertToRGB::Create }, - { "ConvertToRGB24", BUILTIN_FUNC_PREFIX, "c[matrix]s[interlaced]b[ChromaInPlacement]s[chromaresample]s", ConvertToRGB::Create24 }, - { "ConvertToRGB32", BUILTIN_FUNC_PREFIX, "c[matrix]s[interlaced]b[ChromaInPlacement]s[chromaresample]s", ConvertToRGB::Create32 }, + { "ConvertToRGB", BUILTIN_FUNC_PREFIX, "c[matrix]s[interlaced]b[ChromaInPlacement]s[chromaresample]s", ConvertToRGB::Create, (void *)0 }, + { "ConvertToRGB24", BUILTIN_FUNC_PREFIX, "c[matrix]s[interlaced]b[ChromaInPlacement]s[chromaresample]s", ConvertToRGB::Create, (void *)24 }, + { "ConvertToRGB32", BUILTIN_FUNC_PREFIX, "c[matrix]s[interlaced]b[ChromaInPlacement]s[chromaresample]s", ConvertToRGB::Create, (void *)32 }, + { "ConvertToRGB48", BUILTIN_FUNC_PREFIX, "c[matrix]s[interlaced]b[ChromaInPlacement]s[chromaresample]s", ConvertToRGB::Create, (void *)48 }, + { "ConvertToRGB64", BUILTIN_FUNC_PREFIX, "c[matrix]s[interlaced]b[ChromaInPlacement]s[chromaresample]s", ConvertToRGB::Create, (void *)64 }, + { "ConvertToPlanarRGB", BUILTIN_FUNC_PREFIX, "c[matrix]s[interlaced]b[ChromaInPlacement]s[chromaresample]s", ConvertToRGB::Create, (void *)-1 }, { "ConvertToY8", BUILTIN_FUNC_PREFIX, "c[matrix]s", ConvertToY8::Create }, { "ConvertToYV12", BUILTIN_FUNC_PREFIX, "c[interlaced]b[matrix]s[ChromaInPlacement]s[chromaresample]s[ChromaOutPlacement]s", ConvertToYV12::Create }, { "ConvertToYV24", BUILTIN_FUNC_PREFIX, "c[interlaced]b[matrix]s[ChromaInPlacement]s[chromaresample]s", ConvertToPlanarGeneric::CreateYUV444}, @@ -126,6 +129,7 @@ int getMatrix( const char* matrix, IScriptEnvironment* env) { ******* Convert to RGB / RGBA ****** ***************************************/ +// YUY2 only ConvertToRGB::ConvertToRGB( PClip _child, bool rgb24, const char* matrix, IScriptEnvironment* env ) : GenericVideoFilter(_child) @@ -524,7 +528,7 @@ static void convert_yuy2_to_rgb_c(const BYTE *srcp, BYTE* dstp, int src_pitch, i } } - +// YUY2 only PVideoFrame __stdcall ConvertToRGB::GetFrame(int n, IScriptEnvironment* env) { PVideoFrame src = child->GetFrame(n, env); @@ -573,30 +577,102 @@ PVideoFrame __stdcall ConvertToRGB::GetFrame(int n, IScriptEnvironment* env) return dst; } - -AVSValue __cdecl ConvertToRGB::Create(AVSValue args, void*, IScriptEnvironment* env) +// general for all colorspaces +// however class is constructed only for YUY2 input +AVSValue __cdecl ConvertToRGB::Create(AVSValue args, void* user_data, IScriptEnvironment* env) { const bool haveOpts = args[3].Defined() || args[4].Defined(); PClip clip = args[0].AsClip(); const char* const matrix = args[1].AsString(0); const VideoInfo& vi = clip->GetVideoInfo(); - if (vi.IsPlanar()) { + // todo bitdepth conversion on-the-fly + + // common Create for all CreateRGB24/32/48/64/Planar(-1) using user_data + int target_rgbtype = (int)reinterpret_cast(user_data); + // -1: Planar RGB + // 0: not specified (leave if input is packed RGB, convert to rgb32/64 input colorspace dependent) + // 24,32,48,64: RGB24/32/48/64 + + // planar YUV-like + if (vi.IsPlanar() && (vi.IsYUV() || vi.IsYUVA())) { AVSValue new_args[5] = { clip, args[2], args[1], args[3], args[4] }; + // conversion to planar or packed RGB is always from 444 clip = ConvertToPlanarGeneric::CreateYUV444(AVSValue(new_args, 5), NULL, env).AsClip(); - return new ConvertYV24ToRGB(clip, getMatrix(matrix, env), 4 , env); + if((target_rgbtype==24 || target_rgbtype==32) && vi.ComponentSize()!=1) + env->ThrowError("ConvertToRGB: conversion is allowed only from 8 bit colorspace"); + if((target_rgbtype==48 || target_rgbtype==64) && vi.ComponentSize()!=2) + env->ThrowError("ConvertToRGB: conversion is allowed only from 16 bit colorspace"); + if(target_rgbtype==0 && vi.ComponentSize()==4) + env->ThrowError("ConvertToRGB: conversion is allowed only from 8 or 16 bit colorspaces"); + int rgbtype_param; + switch (target_rgbtype) + { + case -1: + rgbtype_param = -1; break; // planar RGB(A) + case 0: + rgbtype_param = vi.ComponentSize() == 1 ? 4 : 8; break; // input bitdepth adaptive + case 24: + rgbtype_param = 3; break; // RGB24 + case 32: + rgbtype_param = 4; break; // RGB32 + case 48: + rgbtype_param = 6; break; // RGB48 + case 64: + rgbtype_param = 8; break; // RGB64 + } + return new ConvertYUV444ToRGB(clip, getMatrix(matrix, env), rgbtype_param , env); } if (haveOpts) env->ThrowError("ConvertToRGB: ChromaPlacement and ChromaResample options are not supported."); + + // planar RGB-like + if (vi.IsPlanarRGB() || vi.IsPlanarRGBA()) + { + if (target_rgbtype == -1) // planar to planar + return clip; + if(vi.ComponentSize() == 4) + env->ThrowError("ConvertToRGB: conversion from float colorspace is not supported."); + if((target_rgbtype==24 || target_rgbtype==32) && vi.ComponentSize()!=1) + env->ThrowError("ConvertToRGB: conversion is allowed only from 8 bit colorspace"); + if((target_rgbtype==48 || target_rgbtype==64) && vi.ComponentSize()!=2) + env->ThrowError("ConvertToRGB: conversion is allowed only from 16 bit colorspace"); + env->ThrowError("ConvertToRGB: Planar RGB to packed RGB conversion is not yet implemented"); + //return new PlanarRGBtoPackedRGB(clip); todo + } - if (vi.IsYUV()) - return new ConvertToRGB(clip, false, matrix, env); + // YUY2 + if (vi.IsYUV()) // at this point IsYUV means YUY2 (non-planar) + { + if (target_rgbtype==48 || target_rgbtype==64 || target_rgbtype==-1) + env->ThrowError("ConvertToRGB: conversion from YUY2 is allowed only to 8 bit packed RGB"); + return new ConvertToRGB(clip, target_rgbtype==24, matrix, env); + } + + // conversions from packed RGB + + if((target_rgbtype==24 || target_rgbtype==32) && vi.ComponentSize()!=1) + env->ThrowError("ConvertToRGB%d: conversion is allowed only from 8 bit colorspace",target_rgbtype); + if((target_rgbtype==48 || target_rgbtype==64) && vi.ComponentSize()!=2) + env->ThrowError("ConvertToRGB%d: conversion is allowed only from 16 bit colorspace",target_rgbtype); + + if(target_rgbtype==32 || target_rgbtype==64) + if (vi.IsRGB24() || vi.IsRGB48()) + return new RGBtoRGBA(clip); // also handles 48to64 + + if(target_rgbtype==24 || target_rgbtype==48) + if (vi.IsRGB32() || vi.IsRGB64()) + return new RGBAtoRGB(clip); // also handles 64to48 + + if (target_rgbtype == -1) + return new PackedRGBtoPlanarRGB(clip); return clip; } - +#if 0 +// merged into Create AVSValue __cdecl ConvertToRGB::Create32(AVSValue args, void*, IScriptEnvironment* env) { const bool haveOpts = args[3].Defined() || args[4].Defined(); @@ -607,7 +683,7 @@ AVSValue __cdecl ConvertToRGB::Create32(AVSValue args, void*, IScriptEnvironment if (vi.IsPlanar()) { AVSValue new_args[5] = { clip, args[2], args[1], args[3], args[4] }; clip = ConvertToPlanarGeneric::CreateYUV444(AVSValue(new_args, 5), NULL, env).AsClip(); - return new ConvertYV24ToRGB(clip, getMatrix(matrix, env), 4 , env); + return new ConvertYUV444ToRGB(clip, getMatrix(matrix, env), 4 , env); } if (haveOpts) @@ -621,8 +697,10 @@ AVSValue __cdecl ConvertToRGB::Create32(AVSValue args, void*, IScriptEnvironment return clip; } +#endif - +#if 0 +// merged into Create AVSValue __cdecl ConvertToRGB::Create24(AVSValue args, void*, IScriptEnvironment* env) { const bool haveOpts = args[3].Defined() || args[4].Defined(); @@ -633,7 +711,7 @@ AVSValue __cdecl ConvertToRGB::Create24(AVSValue args, void*, IScriptEnvironment if (vi.IsPlanar()) { AVSValue new_args[5] = { clip, args[2], args[1], args[3], args[4] }; clip = ConvertToPlanarGeneric::CreateYUV444(AVSValue(new_args, 5), NULL, env).AsClip(); - return new ConvertYV24ToRGB(clip, getMatrix(matrix, env), 3 , env); + return new ConvertYUV444ToRGB(clip, getMatrix(matrix, env), 3 , env); } if (haveOpts) @@ -647,6 +725,7 @@ AVSValue __cdecl ConvertToRGB::Create24(AVSValue args, void*, IScriptEnvironment return clip; } +#endif /********************************** ******* Convert to YV12 ****** diff --git a/avs_core/convert/convert.h b/avs_core/convert/convert.h index 1522f70d8..7dc76e1a1 100644 --- a/avs_core/convert/convert.h +++ b/avs_core/convert/convert.h @@ -80,9 +80,9 @@ class ConvertToRGB : public GenericVideoFilter return cachehints == CACHE_GET_MTMODE ? MT_NICE_FILTER : 0; } - static AVSValue __cdecl Create(AVSValue args, void*, IScriptEnvironment* env); - static AVSValue __cdecl Create32(AVSValue args, void*, IScriptEnvironment* env); - static AVSValue __cdecl Create24(AVSValue args, void*, IScriptEnvironment* env); + static AVSValue __cdecl Create(AVSValue args, void* user_data, IScriptEnvironment* env); +// static AVSValue __cdecl Create32(AVSValue args, void*, IScriptEnvironment* env); +// static AVSValue __cdecl Create24(AVSValue args, void*, IScriptEnvironment* env); private: int theMatrix; diff --git a/avs_core/convert/convert_planar.cpp b/avs_core/convert/convert_planar.cpp index 823beb219..43511e926 100644 --- a/avs_core/convert/convert_planar.cpp +++ b/avs_core/convert/convert_planar.cpp @@ -501,6 +501,17 @@ void ConvertRGBToYV24::BuildMatrix(double Kr, double Kb, int Sy, int Suv, int Oy matrix.v_g = (int16_t)(Suv * Kg/(Kr-1) * mulfac / Srgb + 0.5); matrix.v_r = (int16_t)(Suv * mulfac / Srgb + 0.5); matrix.offset_y = Oy; + + matrix.y_b_f = (float)(Sy/256.0 * Kb ); //B + matrix.y_g_f = (float)(Sy/256.0 * Kg ); //G + matrix.y_r_f = (float)(Sy/256.0 * Kr ); //R + matrix.u_b_f = (float)(Suv/256.0 ); + matrix.u_g_f = (float)(Suv/256.0 * Kg/(Kb-1) ); + matrix.u_r_f = (float)(Suv/256.0 * Kr/(Kb-1) ); + matrix.v_b_f = (float)(Suv/256.0 * Kb/(Kr-1) ); + matrix.v_g_f = (float)(Suv/256.0 * Kg/(Kr-1) ); + matrix.v_r_f = (float)(Suv/256.0 ); + } static void convert_rgb32_to_yv24_sse2(BYTE* dstY, BYTE* dstU, BYTE* dstV, const BYTE*srcp, size_t dst_pitch_y, size_t UVpitch, size_t src_pitch, size_t width, size_t height, const ConversionMatrix &matrix) { @@ -777,18 +788,37 @@ AVSValue __cdecl ConvertRGBToYV24::Create(AVSValue args, void*, IScriptEnvironme * ConvertYV24ToRGB * * (c) Klaus Post, 2005 + * Generic 4:4:4(:4), 16 bit and Planar RGB(A) support 2016 by PF ******************************************************/ -ConvertYV24ToRGB::ConvertYV24ToRGB(PClip src, int in_matrix, int _pixel_step, IScriptEnvironment* env) +ConvertYUV444ToRGB::ConvertYUV444ToRGB(PClip src, int in_matrix, int _pixel_step, IScriptEnvironment* env) : GenericVideoFilter(src), pixel_step(_pixel_step) { - if (!vi.IsYV24()) - env->ThrowError("ConvertYV24ToRGB: Only YV24 data input accepted"); + if (!vi.Is444()) + env->ThrowError("ConvertYUV444ToRGB: Only 4:4:4 data input accepted"); + + switch (pixel_step) + { + case -1: + switch (vi.BitsPerComponent()) + { + case 8: vi.pixel_type = vi.IsYUVA() ? VideoInfo::CS_RGBAP : VideoInfo::CS_RGBP; break; + case 10: vi.pixel_type = vi.IsYUVA() ? VideoInfo::CS_RGBAP10 : VideoInfo::CS_RGBP10; break; + case 12: vi.pixel_type = vi.IsYUVA() ? VideoInfo::CS_RGBAP12 : VideoInfo::CS_RGBP12; break; + case 14: vi.pixel_type = vi.IsYUVA() ? VideoInfo::CS_RGBAP14 : VideoInfo::CS_RGBP14; break; + case 16: vi.pixel_type = vi.IsYUVA() ? VideoInfo::CS_RGBAP16 : VideoInfo::CS_RGBP16; break; + case 32: vi.pixel_type = vi.IsYUVA() ? VideoInfo::CS_RGBAPS : VideoInfo::CS_RGBPS; break; + } + break; + case 3: vi.pixel_type = VideoInfo::CS_BGR24; break; + case 4: vi.pixel_type = VideoInfo::CS_BGR32; break; + case 6: vi.pixel_type = VideoInfo::CS_BGR48; break; + case 8: vi.pixel_type = VideoInfo::CS_BGR64; break; + } - vi.pixel_type = (pixel_step == 3) ? VideoInfo::CS_BGR24 : VideoInfo::CS_BGR32; - const int shift = 13; + const int shift = 13; // for integer arithmetic if (in_matrix == Rec601) { /* @@ -824,7 +854,7 @@ ConvertYV24ToRGB::ConvertYV24ToRGB(PClip src, int in_matrix, int _pixel_step, IS } } -void ConvertYV24ToRGB::BuildMatrix(double Kr, double Kb, int Sy, int Suv, int Oy, int shift) +void ConvertYUV444ToRGB::BuildMatrix(double Kr, double Kb, int Sy, int Suv, int Oy, int shift) { /* Kr = {0.299, 0.2126} @@ -863,6 +893,16 @@ void ConvertYV24ToRGB::BuildMatrix(double Kr, double Kb, int Sy, int Suv, int Oy matrix.u_r = (int16_t)(Srgb * 0.000 * mulfac / Suv + 0.5); matrix.v_r = (int16_t)(Srgb * (1-Kr) * mulfac / Suv + 0.5); matrix.offset_y = -Oy; + + matrix.y_b_f = (float)(1.000 / (Sy/256.0) ); //Y + matrix.u_b_f = (float)((1-Kb) / (Suv/256.0)); //U + matrix.v_b_f = (float)(0.000 / (Suv/256.0)); //V + matrix.y_g_f = (float)(1.000 / (Sy/256.0) ); + matrix.u_g_f = (float)((Kb-1)*Kb/Kg / (Suv/256.0)); + matrix.v_g_f = (float)((Kr-1)*Kr/Kg / (Suv/256.0)); + matrix.y_r_f = (float)(1.000 / (Sy/256.0) ); + matrix.u_r_f = (float)(0.000 / (Suv/256.0)); + matrix.v_r_f = (float)((1-Kr) / (Suv/256.0)); } static __forceinline __m128i convert_yuv_to_rgb_sse2_core(const __m128i &px01, const __m128i &px23, const __m128i &px45, const __m128i &px67, const __m128i& zero, const __m128i &matrix, const __m128i &round_mask) { @@ -1135,15 +1175,16 @@ static void convert_yv24_to_rgb_mmx(BYTE* dstp, const BYTE* srcY, const BYTE* sr #endif -PVideoFrame __stdcall ConvertYV24ToRGB::GetFrame(int n, IScriptEnvironment* env) +PVideoFrame __stdcall ConvertYUV444ToRGB::GetFrame(int n, IScriptEnvironment* env) { PVideoFrame src = child->GetFrame(n, env); - PVideoFrame dst = env->NewVideoFrame(vi, 8); - + //PVideoFrame dst = env->NewVideoFrame(vi, 8); // PF: why 8? A larger default Avisynth align should work fine + PVideoFrame dst = env->NewVideoFrame(vi); const BYTE* srcY = src->GetReadPtr(PLANAR_Y); const BYTE* srcU = src->GetReadPtr(PLANAR_U); const BYTE* srcV = src->GetReadPtr(PLANAR_V); + const BYTE* srcA = src->GetReadPtr(PLANAR_A); BYTE* dstp = dst->GetWritePtr(); @@ -1151,16 +1192,19 @@ PVideoFrame __stdcall ConvertYV24ToRGB::GetFrame(int n, IScriptEnvironment* env) const int src_pitch_y = src->GetPitch(PLANAR_Y); const int src_pitch_uv = src->GetPitch(PLANAR_U); + const int src_pitch_a = src->GetPitch(PLANAR_A); // zero if no Alpha const int dst_pitch = dst->GetPitch(); - if (pixel_step != 4 && pixel_step != 3) { + if (pixel_step != 4 && pixel_step != 3 && pixel_step != 8 && pixel_step != 6 && pixel_step != -1) { env->ThrowError("Invalid pixel step. This is a bug."); } - if (env->GetCPUFlags() & CPUF_SSE2) { + // todo: SSE for not only 8 bit RGB + if ((env->GetCPUFlags() & CPUF_SSE2) && (pixel_step==3 || pixel_step==4)) { //we load using movq so no need to check for alignment if (pixel_step == 4) { + // todo: move alpha channel from YUVA convert_yv24_to_rgb_ssex<4, CPUF_SSE2>(dstp, srcY, srcU, srcV, dst_pitch, src_pitch_y, src_pitch_uv, vi.width, vi.height, matrix); } else { if (env->GetCPUFlags() & CPUF_SSSE3) { @@ -1173,7 +1217,7 @@ PVideoFrame __stdcall ConvertYV24ToRGB::GetFrame(int n, IScriptEnvironment* env) } #ifdef X86_32 - if (env->GetCPUFlags() & CPUF_MMX) { + if ((env->GetCPUFlags() & CPUF_MMX) && (pixel_step==3 || pixel_step==4)) { if (pixel_step == 4) { convert_yv24_to_rgb_mmx<4>(dstp, srcY, srcU, srcV, dst_pitch, src_pitch_y, src_pitch_uv, vi.width, vi.height, matrix); } else { @@ -1185,27 +1229,30 @@ PVideoFrame __stdcall ConvertYV24ToRGB::GetFrame(int n, IScriptEnvironment* env) //Slow C-code. - dstp += dst_pitch * (vi.height-1); // We start at last line + dstp += dst_pitch * (vi.height-1); // We start at last line. Not for Planar RGB + bool hasAlpha = (src_pitch_a != 0); if (pixel_step == 4) { for (int y = 0; y < vi.height; y++) { for (int x = 0; x < vi.width; x++) { int Y = srcY[x] + matrix.offset_y; int U = srcU[x] - 128; int V = srcV[x] - 128; + uint8_t a = hasAlpha ? srcA[x] : 255; // YUVA aware int b = (((int)matrix.y_b * Y + (int)matrix.u_b * U + (int)matrix.v_b * V + 4096)>>13); int g = (((int)matrix.y_g * Y + (int)matrix.u_g * U + (int)matrix.v_g * V + 4096)>>13); int r = (((int)matrix.y_r * Y + (int)matrix.u_r * U + (int)matrix.v_r * V + 4096)>>13); dstp[x*4+0] = PixelClip(b); // All the safety we can wish for. dstp[x*4+1] = PixelClip(g); // Probably needed here. dstp[x*4+2] = PixelClip(r); - dstp[x*4+3] = 255; // alpha + dstp[x*4+3] = a; // alpha } dstp -= dst_pitch; srcY += src_pitch_y; srcU += src_pitch_uv; srcV += src_pitch_uv; + srcA += src_pitch_a; } - } else { + } else if (pixel_step == 3) { const int Dstep = dst_pitch + (vi.width * pixel_step); for (int y = 0; y < vi.height; y++) { for (int x = 0; x < vi.width; x++) { @@ -1224,24 +1271,169 @@ PVideoFrame __stdcall ConvertYV24ToRGB::GetFrame(int n, IScriptEnvironment* env) srcY += src_pitch_y; srcU += src_pitch_uv; srcV += src_pitch_uv; + } + } else if (pixel_step == 8) { // RGB64 + for (int y = 0; y < vi.height; y++) { + for (int x = 0; x < vi.width; x++) { + int Y = reinterpret_cast(srcY)[x] + (matrix.offset_y << 8); + int U = reinterpret_cast(srcU)[x] - 32768; + int V = reinterpret_cast(srcV)[x] - 32768; + uint16_t a = hasAlpha ? reinterpret_cast(srcA)[x] : 65535; // YUVA aware + int b = (((__int64)matrix.y_b * Y + (__int64)matrix.u_b * U + (__int64)matrix.v_b * V + 4096)>>13); + int g = (((__int64)matrix.y_g * Y + (__int64)matrix.u_g * U + (__int64)matrix.v_g * V + 4096)>>13); + int r = (((__int64)matrix.y_r * Y + (__int64)matrix.u_r * U + (__int64)matrix.v_r * V + 4096)>>13); + reinterpret_cast(dstp)[x*4+0] = clamp(b,0,65535); // All the safety we can wish for. + reinterpret_cast(dstp)[x*4+1] = clamp(g,0,65535); // Probably needed here. + reinterpret_cast(dstp)[x*4+2] = clamp(r,0,65535); + reinterpret_cast(dstp)[x*4+3] = a; // alpha + } + dstp -= dst_pitch; + srcY += src_pitch_y; + srcU += src_pitch_uv; + srcV += src_pitch_uv; + srcA += src_pitch_a; + } + } else if (pixel_step == 6) { // RGB48 + const int Dstep = dst_pitch + (vi.width * pixel_step); + for (int y = 0; y < vi.height; y++) { + for (int x = 0; x < vi.width; x++) { + int Y = reinterpret_cast(srcY)[x] + (matrix.offset_y << 8); + int U = reinterpret_cast(srcU)[x] - 32768; + int V = reinterpret_cast(srcV)[x] - 32768; + int b = (((__int64)matrix.y_b * Y + (__int64)matrix.u_b * U + (__int64)matrix.v_b * V + 4096)>>13); + int g = (((__int64)matrix.y_g * Y + (__int64)matrix.u_g * U + (__int64)matrix.v_g * V + 4096)>>13); + int r = (((__int64)matrix.y_r * Y + (__int64)matrix.u_r * U + (__int64)matrix.v_r * V + 4096)>>13); + reinterpret_cast(dstp)[0] = clamp(b,0,65535); // All the safety we can wish for. + reinterpret_cast(dstp)[1] = clamp(g,0,65535); // Probably needed here. + reinterpret_cast(dstp)[2] = clamp(r,0,65535); + dstp += pixel_step; + } + dstp -= Dstep; + srcY += src_pitch_y; + srcU += src_pitch_uv; + srcV += src_pitch_uv; + } + } else if(pixel_step==-1) + { + // YUV444 -> PlanarRGB + // YUVA444 -> PlanarRGBA + BYTE *dstpG = dst->GetWritePtr(PLANAR_G); + BYTE *dstpB = dst->GetWritePtr(PLANAR_B); + BYTE *dstpR = dst->GetWritePtr(PLANAR_R); + BYTE *dstpA = dst->GetWritePtr(PLANAR_A); + + int dst_pitchG = dst->GetPitch(PLANAR_G); + int dst_pitchB = dst->GetPitch(PLANAR_B); + int dst_pitchR = dst->GetPitch(PLANAR_R); + int dst_pitchA = dst->GetPitch(PLANAR_A); + + int pixelsize = vi.ComponentSize(); + + // todo: template, maybe sse + if(pixelsize==1) + { + for (int y = 0; y < vi.height; y++) { + for (int x = 0; x < vi.width; x++) { + int Y = reinterpret_cast(srcY)[x] + (matrix.offset_y); + int U = reinterpret_cast(srcU)[x] - 128; + int V = reinterpret_cast(srcV)[x] - 128; + int A; + if(hasAlpha) + A = reinterpret_cast(srcA)[x]; + int b = (((__int64)matrix.y_b * Y + (__int64)matrix.u_b * U + (__int64)matrix.v_b * V + 4096)>>13); + int g = (((__int64)matrix.y_g * Y + (__int64)matrix.u_g * U + (__int64)matrix.v_g * V + 4096)>>13); + int r = (((__int64)matrix.y_r * Y + (__int64)matrix.u_r * U + (__int64)matrix.v_r * V + 4096)>>13); + reinterpret_cast(dstpB)[x] = clamp(b,0,255); // All the safety we can wish for. + reinterpret_cast(dstpG)[x] = clamp(g,0,255); // Probably needed here. + reinterpret_cast(dstpR)[x] = clamp(r,0,255); + if(hasAlpha) + reinterpret_cast(dstpA)[x] = A; + } + dstpG += dst_pitchG; + dstpB += dst_pitchB; + dstpR += dst_pitchR; + if(hasAlpha) + dstpA += dst_pitchA; + srcY += src_pitch_y; + srcU += src_pitch_uv; + srcV += src_pitch_uv; + } + } else if (pixelsize==2) { + for (int y = 0; y < vi.height; y++) { + for (int x = 0; x < vi.width; x++) { + int Y = reinterpret_cast(srcY)[x] + (matrix.offset_y << 8); + int U = reinterpret_cast(srcU)[x] - 32768; + int V = reinterpret_cast(srcV)[x] - 32768; + int A; + if(hasAlpha) + A = reinterpret_cast(srcA)[x]; + int b = (((__int64)matrix.y_b * Y + (__int64)matrix.u_b * U + (__int64)matrix.v_b * V + 4096)>>13); + int g = (((__int64)matrix.y_g * Y + (__int64)matrix.u_g * U + (__int64)matrix.v_g * V + 4096)>>13); + int r = (((__int64)matrix.y_r * Y + (__int64)matrix.u_r * U + (__int64)matrix.v_r * V + 4096)>>13); + reinterpret_cast(dstpB)[x] = clamp(b,0,65535); // All the safety we can wish for. + reinterpret_cast(dstpG)[x] = clamp(g,0,65535); // Probably needed here. + reinterpret_cast(dstpR)[x] = clamp(r,0,65535); + if(hasAlpha) + reinterpret_cast(dstpA)[x] = A; + } + dstpG += dst_pitchG; + dstpB += dst_pitchB; + dstpR += dst_pitchR; + if(hasAlpha) + dstpA += dst_pitchA; + srcY += src_pitch_y; + srcU += src_pitch_uv; + srcV += src_pitch_uv; + } + } else { // pixelsize==4 float + for (int y = 0; y < vi.height; y++) { + for (int x = 0; x < vi.width; x++) { + float Y = reinterpret_cast(srcY)[x] + ((float)matrix.offset_y / 256.0f); + float U = reinterpret_cast(srcU)[x] - 0.5f; + float V = reinterpret_cast(srcV)[x] - 0.5f; + float A; + if(hasAlpha) + A = reinterpret_cast(srcA)[x]; + float b = matrix.y_b_f * Y + matrix.u_b_f * U + matrix.v_b_f * V; + float g = matrix.y_g_f * Y + matrix.u_g_f * U + matrix.v_g_f * V; + float r = matrix.y_r_f * Y + matrix.u_r_f * U + matrix.v_r_f * V; + reinterpret_cast(dstpB)[x] = clamp(b, 0.0f, 1.0f); // All the safety we can wish for. + reinterpret_cast(dstpG)[x] = clamp(g, 0.0f, 1.0f); // Probably needed here. + reinterpret_cast(dstpR)[x] = clamp(r, 0.0f, 1.0f); + if(hasAlpha) + reinterpret_cast(dstpA)[x] = A; + } + dstpG += dst_pitchG; + dstpB += dst_pitchB; + dstpR += dst_pitchR; + if(hasAlpha) + dstpA += dst_pitchA; + srcY += src_pitch_y; + srcU += src_pitch_uv; + srcV += src_pitch_uv; + } } } return dst; } -AVSValue __cdecl ConvertYV24ToRGB::Create32(AVSValue args, void*, IScriptEnvironment* env) { +#if 0 +AVSValue __cdecl ConvertYUV444ToRGB::Create32(AVSValue args, void*, IScriptEnvironment* env) { PClip clip = args[0].AsClip(); if (clip->GetVideoInfo().IsRGB()) return clip; - return new ConvertYV24ToRGB(clip, getMatrix(args[1].AsString(0), env), 4, env); + return new ConvertYUV444ToRGB(clip, getMatrix(args[1].AsString(0), env), 4, env); } +#endif -AVSValue __cdecl ConvertYV24ToRGB::Create24(AVSValue args, void*, IScriptEnvironment* env) { +#if 0 +AVSValue __cdecl ConvertYUV444ToRGB::Create24(AVSValue args, void*, IScriptEnvironment* env) { PClip clip = args[0].AsClip(); if (clip->GetVideoInfo().IsRGB()) return clip; - return new ConvertYV24ToRGB(clip, getMatrix(args[1].AsString(0), env), 3, env); + return new ConvertYUV444ToRGB(clip, getMatrix(args[1].AsString(0), env), 3, env); } +#endif /************************************ * YUY2 to YV16 diff --git a/avs_core/convert/convert_planar.h b/avs_core/convert/convert_planar.h index e8ff5c14e..c65824356 100644 --- a/avs_core/convert/convert_planar.h +++ b/avs_core/convert/convert_planar.h @@ -77,6 +77,16 @@ struct ConversionMatrix { int16_t v_g; int16_t v_b; + float y_r_f; + float y_g_f; + float y_b_f; + float u_r_f; + float u_g_f; + float u_b_f; + float v_r_f; + float v_g_f; + float v_b_f; + int offset_y; }; @@ -110,18 +120,19 @@ class ConvertYUY2ToYV16 : public GenericVideoFilter static AVSValue __cdecl Create(AVSValue args, void*, IScriptEnvironment* env); }; -class ConvertYV24ToRGB : public GenericVideoFilter +// note for AVS16: renamed from ConvertYV24ToRGB (Convert444ToRGB is already used in Overlay) +class ConvertYUV444ToRGB : public GenericVideoFilter { public: - ConvertYV24ToRGB(PClip src, int matrix, int pixel_step, IScriptEnvironment* env); + ConvertYUV444ToRGB(PClip src, int matrix, int pixel_step, IScriptEnvironment* env); PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment* env); int __stdcall SetCacheHints(int cachehints, int frame_range) override { return cachehints == CACHE_GET_MTMODE ? MT_NICE_FILTER : 0; } - static AVSValue __cdecl Create24(AVSValue args, void*, IScriptEnvironment* env); - static AVSValue __cdecl Create32(AVSValue args, void*, IScriptEnvironment* env); +// static AVSValue __cdecl Create24(AVSValue args, void*, IScriptEnvironment* env); +// static AVSValue __cdecl Create32(AVSValue args, void*, IScriptEnvironment* env); private: void BuildMatrix(double Kr, double Kb, int Sy, int Suv, int Oy, int shift); ConversionMatrix matrix; diff --git a/avs_core/convert/convert_rgb.cpp b/avs_core/convert/convert_rgb.cpp index bcce44e87..aad7e2b38 100644 --- a/avs_core/convert/convert_rgb.cpp +++ b/avs_core/convert/convert_rgb.cpp @@ -42,10 +42,10 @@ ******* RGB Helper Classes ****** *************************************/ -RGB24to32::RGB24to32(PClip src) +RGBtoRGBA::RGBtoRGBA(PClip src) : GenericVideoFilter(src) { - vi.pixel_type = VideoInfo::CS_BGR32; + vi.pixel_type = src->GetVideoInfo().ComponentSize() == 1 ? VideoInfo::CS_BGR32 : VideoInfo::CS_BGR64; } //todo: think how to port to sse2 without tons of shuffles or (un)packs @@ -139,8 +139,17 @@ static void convert_rgb24_to_rgb32_c(const BYTE *srcp, BYTE *dstp, size_t src_pi } } +static void convert_rgb48_to_rgb64_c(const BYTE *srcp, BYTE *dstp, size_t src_pitch, size_t dst_pitch, size_t width, size_t height) { + for (size_t y = height; y > 0; --y) { + for (size_t x = 0; x < width; ++x) { + *reinterpret_cast(dstp + x*8) = *reinterpret_cast(srcp+x*6) | 0xFFFF000000000000ULL; + } + srcp += src_pitch; + dstp += dst_pitch; + } +} -PVideoFrame __stdcall RGB24to32::GetFrame(int n, IScriptEnvironment* env) +PVideoFrame __stdcall RGBtoRGBA::GetFrame(int n, IScriptEnvironment* env) { PVideoFrame src = child->GetFrame(n, env); PVideoFrame dst = env->NewVideoFrame(vi); @@ -149,19 +158,25 @@ PVideoFrame __stdcall RGB24to32::GetFrame(int n, IScriptEnvironment* env) const int src_pitch = src->GetPitch(); const int dst_pitch = dst->GetPitch(); - if ((env->GetCPUFlags() & CPUF_SSSE3) && IsPtrAligned(srcp, 16)) { + int pixelsize = vi.ComponentSize(); + + // todo sse for 16 bit + if (pixelsize==1 && (env->GetCPUFlags() & CPUF_SSSE3) && IsPtrAligned(srcp, 16)) { convert_rgb24_to_rgb32_ssse3(srcp, dstp, src_pitch, dst_pitch, vi.width, vi.height); } else #ifdef X86_32 - if (env->GetCPUFlags() & CPUF_MMX) + if (pixelsize==1 && (env->GetCPUFlags() & CPUF_MMX)) { convert_rgb24_to_rgb32_mmx(srcp, dstp, src_pitch, dst_pitch, vi.width, vi.height); } else #endif { - convert_rgb24_to_rgb32_c(srcp, dstp, src_pitch, dst_pitch, vi.width, vi.height); + if (pixelsize == 1) + convert_rgb24_to_rgb32_c(srcp, dstp, src_pitch, dst_pitch, vi.width, vi.height); + else + convert_rgb48_to_rgb64_c(srcp, dstp, src_pitch, dst_pitch, vi.width, vi.height); } return dst; } @@ -169,10 +184,10 @@ PVideoFrame __stdcall RGB24to32::GetFrame(int n, IScriptEnvironment* env) -RGB32to24::RGB32to24(PClip src) +RGBAtoRGB::RGBAtoRGB(PClip src) : GenericVideoFilter(src) { - vi.pixel_type = VideoInfo::CS_BGR24; + vi.pixel_type = src->GetVideoInfo().ComponentSize() == 1 ? VideoInfo::CS_BGR24 : VideoInfo::CS_BGR48; } //todo: think how to port to sse2 without tons of shuffles or (un)packs @@ -272,7 +287,23 @@ static void convert_rgb32_to_rgb24_c(const BYTE *srcp, BYTE *dstp, size_t src_pi } } -PVideoFrame __stdcall RGB32to24::GetFrame(int n, IScriptEnvironment* env) +static void convert_rgb64_to_rgb48_c(const BYTE *srcp, BYTE *dstp, size_t src_pitch, size_t dst_pitch, size_t width, size_t height) { + for (size_t y = height; y > 0; --y) { + size_t x; + for (x = 0; x < width-1; ++x) { // width-1 really! + *reinterpret_cast(dstp+x*6) = *reinterpret_cast(srcp+x*8); + } + //last pixel + reinterpret_cast(dstp)[x*3+0] = reinterpret_cast(srcp)[x*4+0]; + reinterpret_cast(dstp)[x*3+1] = reinterpret_cast(srcp)[x*4+1]; + reinterpret_cast(dstp)[x*3+2] = reinterpret_cast(srcp)[x*4+2]; + + srcp += src_pitch; + dstp += dst_pitch; + } +} + +PVideoFrame __stdcall RGBAtoRGB::GetFrame(int n, IScriptEnvironment* env) { PVideoFrame src = child->GetFrame(n, env); PVideoFrame dst = env->NewVideoFrame(vi); @@ -281,20 +312,92 @@ PVideoFrame __stdcall RGB32to24::GetFrame(int n, IScriptEnvironment* env) size_t src_pitch = src->GetPitch(); size_t dst_pitch = dst->GetPitch(); - if ((env->GetCPUFlags() & CPUF_SSSE3) && IsPtrAligned(srcp, 16)) { + int pixelsize = vi.ComponentSize(); + + // todo sse for 16 bit + if ((pixelsize==1) && (env->GetCPUFlags() & CPUF_SSSE3) && IsPtrAligned(srcp, 16)) { convert_rgb32_to_rgb24_ssse3(srcp, dstp, src_pitch, dst_pitch, vi.width, vi.height); } else #ifdef X86_32 - if (env->GetCPUFlags() & CPUF_MMX) + if ((pixelsize==1) && (env->GetCPUFlags() & CPUF_MMX)) { convert_rgb32_to_rgb24_mmx(srcp, dstp, src_pitch, dst_pitch, vi.width, vi.height); } else #endif { - convert_rgb32_to_rgb24_c(srcp, dstp, src_pitch, dst_pitch, vi.width, vi.height); + if(pixelsize==1) + convert_rgb32_to_rgb24_c(srcp, dstp, src_pitch, dst_pitch, vi.width, vi.height); + else + convert_rgb64_to_rgb48_c(srcp, dstp, src_pitch, dst_pitch, vi.width, vi.height); } return dst; } +PackedRGBtoPlanarRGB::PackedRGBtoPlanarRGB(PClip src) + : GenericVideoFilter(src) +{ + vi.pixel_type = src->GetVideoInfo().ComponentSize() == 1 ? + (src->GetVideoInfo().IsRGB24() ? VideoInfo::CS_RGBP : VideoInfo::CS_RGBAP) : // RGB24, RGB32 + (src->GetVideoInfo().IsRGB48() ? VideoInfo::CS_RGBP16 : VideoInfo::CS_RGBAP16); // RGB48, RGB64 +} + +template +static void convert_rgb_to_rgbp_c(const BYTE *srcp, BYTE * (&dstp)[4], int src_pitch, int (&dst_pitch)[4], size_t width, size_t height) { + for (size_t y = height; y > 0; --y) { + size_t x; + // not proud of it but it works + for (x = 0; x < width; ++x) { + pixel_t B = reinterpret_cast(srcp)[x*numcomponents + 0]; + pixel_t G = reinterpret_cast(srcp)[x*numcomponents + 1]; + pixel_t R = reinterpret_cast(srcp)[x*numcomponents + 2]; + pixel_t A; + if(numcomponents==4) + A = reinterpret_cast(srcp)[x*numcomponents + 3]; + reinterpret_cast(dstp[0])[x] = G; + reinterpret_cast(dstp[1])[x] = B; + reinterpret_cast(dstp[2])[x] = R; + if(numcomponents==4) + reinterpret_cast(dstp[3])[x] = A; + } + + srcp -= src_pitch; // source packed RGB is upside down + dstp[0] += dst_pitch[0]; + dstp[1] += dst_pitch[1]; + dstp[2] += dst_pitch[2]; + if (numcomponents == 4) + dstp[3] += dst_pitch[3]; + } +} + +PVideoFrame __stdcall PackedRGBtoPlanarRGB::GetFrame(int n, IScriptEnvironment* env) +{ + PVideoFrame src = child->GetFrame(n, env); + PVideoFrame dst = env->NewVideoFrame(vi); + int src_pitch = src->GetPitch(); + const BYTE *srcp = src->GetReadPtr(); + BYTE *dstp[4] = {dst->GetWritePtr(PLANAR_G),dst->GetWritePtr(PLANAR_B),dst->GetWritePtr(PLANAR_R),dst->GetWritePtr(PLANAR_A)}; + int dst_pitch[4] = {dst->GetPitch(PLANAR_G),dst->GetPitch(PLANAR_B),dst->GetPitch(PLANAR_R),dst->GetPitch(PLANAR_A)}; + + int pixelsize = vi.ComponentSize(); + + srcp += src_pitch * (vi.height - 1); // start from bottom: packed RGB is upside down + + // todo sse + if(pixelsize==1) + { + if(vi.IsPlanarRGB()) + convert_rgb_to_rgbp_c(srcp, dstp, src_pitch, dst_pitch, vi.width, vi.height); + else // RGBA + convert_rgb_to_rgbp_c(srcp, dstp, src_pitch, dst_pitch, vi.width, vi.height); + } else { + if(vi.IsPlanarRGB()) + convert_rgb_to_rgbp_c(srcp, dstp, src_pitch, dst_pitch, vi.width, vi.height); + else // RGBA + convert_rgb_to_rgbp_c(srcp, dstp, src_pitch, dst_pitch, vi.width, vi.height); + } + return dst; +} + + diff --git a/avs_core/convert/convert_rgb.h b/avs_core/convert/convert_rgb.h index ef5d039a8..8e0bbd28a 100644 --- a/avs_core/convert/convert_rgb.h +++ b/avs_core/convert/convert_rgb.h @@ -38,13 +38,13 @@ #include -class RGB24to32 : public GenericVideoFilter +class RGBtoRGBA : public GenericVideoFilter /** - * RGB -> RGBA, setting alpha channel to 255 + * RGB -> RGBA, setting alpha channel to 255/65535 */ { public: - RGB24to32(PClip src); + RGBtoRGBA(PClip src); PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment* env); int __stdcall SetCacheHints(int cachehints, int frame_range) override { @@ -53,13 +53,27 @@ class RGB24to32 : public GenericVideoFilter }; -class RGB32to24 : public GenericVideoFilter +class RGBAtoRGB : public GenericVideoFilter /** * Class to strip alpha channel */ { public: - RGB32to24(PClip src); + RGBAtoRGB(PClip src); + PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment* env); + + int __stdcall SetCacheHints(int cachehints, int frame_range) override { + return cachehints == CACHE_GET_MTMODE ? MT_NICE_FILTER : 0; + } +}; + +class PackedRGBtoPlanarRGB : public GenericVideoFilter + /** + * RGB(A) -> RGBP(A) + */ +{ +public: + PackedRGBtoPlanarRGB(PClip src); PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment* env); int __stdcall SetCacheHints(int cachehints, int frame_range) override { From 3a2b8b3b311a28e02d7b5f3d34bb279484c7652e Mon Sep 17 00:00:00 2001 From: Pinterf Date: Thu, 25 Aug 2016 13:52:19 +0200 Subject: [PATCH 005/120] New: PlanarToRGBA. ToRGB conversions seem to be ready. --- avs_core/convert/convert.cpp | 38 ++++--- avs_core/convert/convert_planar.cpp | 162 ++++++++++++++++++++-------- avs_core/convert/convert_rgb.cpp | 94 +++++++++++++--- avs_core/convert/convert_rgb.h | 19 +++- 4 files changed, 238 insertions(+), 75 deletions(-) diff --git a/avs_core/convert/convert.cpp b/avs_core/convert/convert.cpp index 67eb45b0a..049fe12df 100644 --- a/avs_core/convert/convert.cpp +++ b/avs_core/convert/convert.cpp @@ -55,7 +55,8 @@ extern const AVSFunction Convert_filters[] = { // matrix can be "rec601", { "ConvertToRGB32", BUILTIN_FUNC_PREFIX, "c[matrix]s[interlaced]b[ChromaInPlacement]s[chromaresample]s", ConvertToRGB::Create, (void *)32 }, { "ConvertToRGB48", BUILTIN_FUNC_PREFIX, "c[matrix]s[interlaced]b[ChromaInPlacement]s[chromaresample]s", ConvertToRGB::Create, (void *)48 }, { "ConvertToRGB64", BUILTIN_FUNC_PREFIX, "c[matrix]s[interlaced]b[ChromaInPlacement]s[chromaresample]s", ConvertToRGB::Create, (void *)64 }, - { "ConvertToPlanarRGB", BUILTIN_FUNC_PREFIX, "c[matrix]s[interlaced]b[ChromaInPlacement]s[chromaresample]s", ConvertToRGB::Create, (void *)-1 }, + { "ConvertToPlanarRGB", BUILTIN_FUNC_PREFIX, "c[matrix]s[interlaced]b[ChromaInPlacement]s[chromaresample]s", ConvertToRGB::Create, (void *)-1 }, + { "ConvertToPlanarRGBA", BUILTIN_FUNC_PREFIX, "c[matrix]s[interlaced]b[ChromaInPlacement]s[chromaresample]s", ConvertToRGB::Create, (void *)-2 }, { "ConvertToY8", BUILTIN_FUNC_PREFIX, "c[matrix]s", ConvertToY8::Create }, { "ConvertToYV12", BUILTIN_FUNC_PREFIX, "c[interlaced]b[matrix]s[ChromaInPlacement]s[chromaresample]s[ChromaOutPlacement]s", ConvertToYV12::Create }, { "ConvertToYV24", BUILTIN_FUNC_PREFIX, "c[interlaced]b[matrix]s[ChromaInPlacement]s[chromaresample]s", ConvertToPlanarGeneric::CreateYUV444}, @@ -588,9 +589,9 @@ AVSValue __cdecl ConvertToRGB::Create(AVSValue args, void* user_data, IScriptEnv // todo bitdepth conversion on-the-fly - // common Create for all CreateRGB24/32/48/64/Planar(-1) using user_data + // common Create for all CreateRGB24/32/48/64/Planar(RGBP:-1, RGPAP:-2) using user_data int target_rgbtype = (int)reinterpret_cast(user_data); - // -1: Planar RGB + // -1,-2: Planar RGB(A) // 0: not specified (leave if input is packed RGB, convert to rgb32/64 input colorspace dependent) // 24,32,48,64: RGB24/32/48/64 @@ -600,16 +601,16 @@ AVSValue __cdecl ConvertToRGB::Create(AVSValue args, void* user_data, IScriptEnv // conversion to planar or packed RGB is always from 444 clip = ConvertToPlanarGeneric::CreateYUV444(AVSValue(new_args, 5), NULL, env).AsClip(); if((target_rgbtype==24 || target_rgbtype==32) && vi.ComponentSize()!=1) - env->ThrowError("ConvertToRGB: conversion is allowed only from 8 bit colorspace"); + env->ThrowError("ConvertToRGB%d: conversion is allowed only from 8 bit colorspace",target_rgbtype); if((target_rgbtype==48 || target_rgbtype==64) && vi.ComponentSize()!=2) - env->ThrowError("ConvertToRGB: conversion is allowed only from 16 bit colorspace"); + env->ThrowError("ConvertToRGB%d: conversion is allowed only from 16 bit colorspace",target_rgbtype); if(target_rgbtype==0 && vi.ComponentSize()==4) env->ThrowError("ConvertToRGB: conversion is allowed only from 8 or 16 bit colorspaces"); int rgbtype_param; switch (target_rgbtype) { - case -1: - rgbtype_param = -1; break; // planar RGB(A) + case -1: case -2: + rgbtype_param = target_rgbtype; break; // planar RGB(A) case 0: rgbtype_param = vi.ComponentSize() == 1 ? 4 : 8; break; // input bitdepth adaptive case 24: @@ -627,25 +628,28 @@ AVSValue __cdecl ConvertToRGB::Create(AVSValue args, void* user_data, IScriptEnv if (haveOpts) env->ThrowError("ConvertToRGB: ChromaPlacement and ChromaResample options are not supported."); - // planar RGB-like + // planar RGB-like source if (vi.IsPlanarRGB() || vi.IsPlanarRGBA()) { - if (target_rgbtype == -1) // planar to planar - return clip; + if (target_rgbtype < 0) // planar to planar + { + if ((vi.IsPlanarRGB() && target_rgbtype==-1) || (vi.IsPlanarRGBA() && target_rgbtype==-2)) + return clip; + env->ThrowError("ConvertToPlanarRGB: cannon convert between RGBP and RGBAP"); + } if(vi.ComponentSize() == 4) env->ThrowError("ConvertToRGB: conversion from float colorspace is not supported."); if((target_rgbtype==24 || target_rgbtype==32) && vi.ComponentSize()!=1) env->ThrowError("ConvertToRGB: conversion is allowed only from 8 bit colorspace"); if((target_rgbtype==48 || target_rgbtype==64) && vi.ComponentSize()!=2) env->ThrowError("ConvertToRGB: conversion is allowed only from 16 bit colorspace"); - env->ThrowError("ConvertToRGB: Planar RGB to packed RGB conversion is not yet implemented"); - //return new PlanarRGBtoPackedRGB(clip); todo + return new PlanarRGBtoPackedRGB(clip, (target_rgbtype==32 || target_rgbtype==64)); } // YUY2 if (vi.IsYUV()) // at this point IsYUV means YUY2 (non-planar) { - if (target_rgbtype==48 || target_rgbtype==64 || target_rgbtype==-1) + if (target_rgbtype==48 || target_rgbtype==64 || target_rgbtype < 0) env->ThrowError("ConvertToRGB: conversion from YUY2 is allowed only to 8 bit packed RGB"); return new ConvertToRGB(clip, target_rgbtype==24, matrix, env); } @@ -659,14 +663,14 @@ AVSValue __cdecl ConvertToRGB::Create(AVSValue args, void* user_data, IScriptEnv if(target_rgbtype==32 || target_rgbtype==64) if (vi.IsRGB24() || vi.IsRGB48()) - return new RGBtoRGBA(clip); // also handles 48to64 + return new RGBtoRGBA(clip); if(target_rgbtype==24 || target_rgbtype==48) if (vi.IsRGB32() || vi.IsRGB64()) - return new RGBAtoRGB(clip); // also handles 64to48 + return new RGBAtoRGB(clip); - if (target_rgbtype == -1) - return new PackedRGBtoPlanarRGB(clip); + if (target_rgbtype < 0) + return new PackedRGBtoPlanarRGB(clip, target_rgbtype==-2); return clip; } diff --git a/avs_core/convert/convert_planar.cpp b/avs_core/convert/convert_planar.cpp index 43511e926..08ff2681d 100644 --- a/avs_core/convert/convert_planar.cpp +++ b/avs_core/convert/convert_planar.cpp @@ -801,21 +801,25 @@ ConvertYUV444ToRGB::ConvertYUV444ToRGB(PClip src, int in_matrix, int _pixel_step switch (pixel_step) { - case -1: + case -1: case -2: switch (vi.BitsPerComponent()) { - case 8: vi.pixel_type = vi.IsYUVA() ? VideoInfo::CS_RGBAP : VideoInfo::CS_RGBP; break; - case 10: vi.pixel_type = vi.IsYUVA() ? VideoInfo::CS_RGBAP10 : VideoInfo::CS_RGBP10; break; - case 12: vi.pixel_type = vi.IsYUVA() ? VideoInfo::CS_RGBAP12 : VideoInfo::CS_RGBP12; break; - case 14: vi.pixel_type = vi.IsYUVA() ? VideoInfo::CS_RGBAP14 : VideoInfo::CS_RGBP14; break; - case 16: vi.pixel_type = vi.IsYUVA() ? VideoInfo::CS_RGBAP16 : VideoInfo::CS_RGBP16; break; - case 32: vi.pixel_type = vi.IsYUVA() ? VideoInfo::CS_RGBAPS : VideoInfo::CS_RGBPS; break; + case 8: vi.pixel_type = pixel_step == -2 ? VideoInfo::CS_RGBAP : VideoInfo::CS_RGBP; break; + case 10: vi.pixel_type = pixel_step == -2 ? VideoInfo::CS_RGBAP10 : VideoInfo::CS_RGBP10; break; + case 12: vi.pixel_type = pixel_step == -2 ? VideoInfo::CS_RGBAP12 : VideoInfo::CS_RGBP12; break; + case 14: vi.pixel_type = pixel_step == -2 ? VideoInfo::CS_RGBAP14 : VideoInfo::CS_RGBP14; break; + case 16: vi.pixel_type = pixel_step == -2 ? VideoInfo::CS_RGBAP16 : VideoInfo::CS_RGBP16; break; + case 32: vi.pixel_type = pixel_step == -2 ? VideoInfo::CS_RGBAPS : VideoInfo::CS_RGBPS; break; + default: + env->ThrowError("ConvertYUV444ToRGB: invalid vi.BitsPerComponent(): %d", vi.BitsPerComponent()); } break; case 3: vi.pixel_type = VideoInfo::CS_BGR24; break; case 4: vi.pixel_type = VideoInfo::CS_BGR32; break; case 6: vi.pixel_type = VideoInfo::CS_BGR48; break; case 8: vi.pixel_type = VideoInfo::CS_BGR64; break; + default: + env->ThrowError("ConvertYUV444ToRGB: invalid pixel step: %d", pixel_step); } const int shift = 13; // for integer arithmetic @@ -1196,9 +1200,9 @@ PVideoFrame __stdcall ConvertYUV444ToRGB::GetFrame(int n, IScriptEnvironment* en const int dst_pitch = dst->GetPitch(); - if (pixel_step != 4 && pixel_step != 3 && pixel_step != 8 && pixel_step != 6 && pixel_step != -1) { + /*if (pixel_step != 4 && pixel_step != 3 && pixel_step != 8 && pixel_step != 6 && pixel_step != -1 && pixel_step != -2) { env->ThrowError("Invalid pixel step. This is a bug."); - } + }*/ // todo: SSE for not only 8 bit RGB if ((env->GetCPUFlags() & CPUF_SSE2) && (pixel_step==3 || pixel_step==4)) { @@ -1230,14 +1234,14 @@ PVideoFrame __stdcall ConvertYUV444ToRGB::GetFrame(int n, IScriptEnvironment* en //Slow C-code. dstp += dst_pitch * (vi.height-1); // We start at last line. Not for Planar RGB - bool hasAlpha = (src_pitch_a != 0); + bool srcHasAlpha = (src_pitch_a != 0); if (pixel_step == 4) { for (int y = 0; y < vi.height; y++) { for (int x = 0; x < vi.width; x++) { int Y = srcY[x] + matrix.offset_y; int U = srcU[x] - 128; int V = srcV[x] - 128; - uint8_t a = hasAlpha ? srcA[x] : 255; // YUVA aware + uint8_t a = srcHasAlpha ? srcA[x] : 255; // YUVA aware int b = (((int)matrix.y_b * Y + (int)matrix.u_b * U + (int)matrix.v_b * V + 4096)>>13); int g = (((int)matrix.y_g * Y + (int)matrix.u_g * U + (int)matrix.v_g * V + 4096)>>13); int r = (((int)matrix.y_r * Y + (int)matrix.u_r * U + (int)matrix.v_r * V + 4096)>>13); @@ -1278,7 +1282,7 @@ PVideoFrame __stdcall ConvertYUV444ToRGB::GetFrame(int n, IScriptEnvironment* en int Y = reinterpret_cast(srcY)[x] + (matrix.offset_y << 8); int U = reinterpret_cast(srcU)[x] - 32768; int V = reinterpret_cast(srcV)[x] - 32768; - uint16_t a = hasAlpha ? reinterpret_cast(srcA)[x] : 65535; // YUVA aware + uint16_t a = srcHasAlpha ? reinterpret_cast(srcA)[x] : 65535; // YUVA aware int b = (((__int64)matrix.y_b * Y + (__int64)matrix.u_b * U + (__int64)matrix.v_b * V + 4096)>>13); int g = (((__int64)matrix.y_g * Y + (__int64)matrix.u_g * U + (__int64)matrix.v_g * V + 4096)>>13); int r = (((__int64)matrix.y_r * Y + (__int64)matrix.u_r * U + (__int64)matrix.v_r * V + 4096)>>13); @@ -1313,14 +1317,18 @@ PVideoFrame __stdcall ConvertYUV444ToRGB::GetFrame(int n, IScriptEnvironment* en srcU += src_pitch_uv; srcV += src_pitch_uv; } - } else if(pixel_step==-1) + } else if(pixel_step < 0) // -1: RGBP -2:RGBAP { // YUV444 -> PlanarRGB // YUVA444 -> PlanarRGBA + bool targetHasAlpha = pixel_step == -2; + BYTE *dstpG = dst->GetWritePtr(PLANAR_G); BYTE *dstpB = dst->GetWritePtr(PLANAR_B); BYTE *dstpR = dst->GetWritePtr(PLANAR_R); - BYTE *dstpA = dst->GetWritePtr(PLANAR_A); + BYTE *dstpA; + if(targetHasAlpha) + dstpA = dst->GetWritePtr(PLANAR_A); int dst_pitchG = dst->GetPitch(PLANAR_G); int dst_pitchB = dst->GetPitch(PLANAR_B); @@ -1329,7 +1337,7 @@ PVideoFrame __stdcall ConvertYUV444ToRGB::GetFrame(int n, IScriptEnvironment* en int pixelsize = vi.ComponentSize(); - // todo: template, maybe sse + // todo: template for integers, maybe sse if(pixelsize==1) { for (int y = 0; y < vi.height; y++) { @@ -1338,21 +1346,21 @@ PVideoFrame __stdcall ConvertYUV444ToRGB::GetFrame(int n, IScriptEnvironment* en int U = reinterpret_cast(srcU)[x] - 128; int V = reinterpret_cast(srcV)[x] - 128; int A; - if(hasAlpha) - A = reinterpret_cast(srcA)[x]; - int b = (((__int64)matrix.y_b * Y + (__int64)matrix.u_b * U + (__int64)matrix.v_b * V + 4096)>>13); - int g = (((__int64)matrix.y_g * Y + (__int64)matrix.u_g * U + (__int64)matrix.v_g * V + 4096)>>13); - int r = (((__int64)matrix.y_r * Y + (__int64)matrix.u_r * U + (__int64)matrix.v_r * V + 4096)>>13); + if(targetHasAlpha) + A = srcHasAlpha ? reinterpret_cast(srcA)[x] : 255; + int b = (((int)matrix.y_b * Y + (int)matrix.u_b * U + (int)matrix.v_b * V + 4096)>>13); + int g = (((int)matrix.y_g * Y + (int)matrix.u_g * U + (int)matrix.v_g * V + 4096)>>13); + int r = (((int)matrix.y_r * Y + (int)matrix.u_r * U + (int)matrix.v_r * V + 4096)>>13); reinterpret_cast(dstpB)[x] = clamp(b,0,255); // All the safety we can wish for. reinterpret_cast(dstpG)[x] = clamp(g,0,255); // Probably needed here. reinterpret_cast(dstpR)[x] = clamp(r,0,255); - if(hasAlpha) + if(targetHasAlpha) reinterpret_cast(dstpA)[x] = A; } dstpG += dst_pitchG; dstpB += dst_pitchB; dstpR += dst_pitchR; - if(hasAlpha) + if(targetHasAlpha) dstpA += dst_pitchA; srcY += src_pitch_y; srcU += src_pitch_uv; @@ -1365,21 +1373,22 @@ PVideoFrame __stdcall ConvertYUV444ToRGB::GetFrame(int n, IScriptEnvironment* en int U = reinterpret_cast(srcU)[x] - 32768; int V = reinterpret_cast(srcV)[x] - 32768; int A; - if(hasAlpha) - A = reinterpret_cast(srcA)[x]; + if(targetHasAlpha) + A = srcHasAlpha ? reinterpret_cast(srcA)[x] : 65535; + // __int64 needed for 16 bit pixels int b = (((__int64)matrix.y_b * Y + (__int64)matrix.u_b * U + (__int64)matrix.v_b * V + 4096)>>13); int g = (((__int64)matrix.y_g * Y + (__int64)matrix.u_g * U + (__int64)matrix.v_g * V + 4096)>>13); int r = (((__int64)matrix.y_r * Y + (__int64)matrix.u_r * U + (__int64)matrix.v_r * V + 4096)>>13); reinterpret_cast(dstpB)[x] = clamp(b,0,65535); // All the safety we can wish for. reinterpret_cast(dstpG)[x] = clamp(g,0,65535); // Probably needed here. reinterpret_cast(dstpR)[x] = clamp(r,0,65535); - if(hasAlpha) + if(targetHasAlpha) reinterpret_cast(dstpA)[x] = A; } dstpG += dst_pitchG; dstpB += dst_pitchB; dstpR += dst_pitchR; - if(hasAlpha) + if(targetHasAlpha) dstpA += dst_pitchA; srcY += src_pitch_y; srcU += src_pitch_uv; @@ -1392,21 +1401,21 @@ PVideoFrame __stdcall ConvertYUV444ToRGB::GetFrame(int n, IScriptEnvironment* en float U = reinterpret_cast(srcU)[x] - 0.5f; float V = reinterpret_cast(srcV)[x] - 0.5f; float A; - if(hasAlpha) - A = reinterpret_cast(srcA)[x]; + if(targetHasAlpha) + A = srcHasAlpha ? reinterpret_cast(srcA)[x] : 1.0f; float b = matrix.y_b_f * Y + matrix.u_b_f * U + matrix.v_b_f * V; float g = matrix.y_g_f * Y + matrix.u_g_f * U + matrix.v_g_f * V; float r = matrix.y_r_f * Y + matrix.u_r_f * U + matrix.v_r_f * V; reinterpret_cast(dstpB)[x] = clamp(b, 0.0f, 1.0f); // All the safety we can wish for. reinterpret_cast(dstpG)[x] = clamp(g, 0.0f, 1.0f); // Probably needed here. reinterpret_cast(dstpR)[x] = clamp(r, 0.0f, 1.0f); - if(hasAlpha) + if(targetHasAlpha) reinterpret_cast(dstpA)[x] = A; } dstpG += dst_pitchG; dstpB += dst_pitchB; dstpR += dst_pitchR; - if(hasAlpha) + if(targetHasAlpha) dstpA += dst_pitchA; srcY += src_pitch_y; srcU += src_pitch_uv; @@ -1705,7 +1714,9 @@ ConvertToPlanarGeneric::ConvertToPlanarGeneric(PClip src, int dst_space, bool in auto Is420 = [](int pix_type) { return pix_type == VideoInfo::CS_YV12 || pix_type == VideoInfo::CS_I420 || - pix_type == VideoInfo::CS_YUV420P16 || pix_type == VideoInfo::CS_YUV420PS; + pix_type == VideoInfo::CS_YUV420P10 || pix_type == VideoInfo::CS_YUV420P12 || + pix_type == VideoInfo::CS_YUV420P14 || pix_type == VideoInfo::CS_YUV420P16 || + pix_type == VideoInfo::CS_YUV420PS; }; if (!Is420(vi.pixel_type) && !Is420(dst_space)) @@ -1831,6 +1842,13 @@ static inline void fill_chroma(BYTE* dstp_u, BYTE* dstp_v, int height, int pitch std::fill_n(reinterpret_cast(dstp_v), size, val); } +template +static inline void fill_plane(BYTE* dstp, int height, int pitch, pixel_type val) +{ + size_t size = height * pitch / sizeof(pixel_type); + std::fill_n(reinterpret_cast(dstp), size, val); +} + PVideoFrame __stdcall ConvertToPlanarGeneric::GetFrame(int n, IScriptEnvironment* env) { PVideoFrame src = child->GetFrame(n, env); PVideoFrame dst = env->NewVideoFrame(vi); @@ -1862,6 +1880,34 @@ PVideoFrame __stdcall ConvertToPlanarGeneric::GetFrame(int n, IScriptEnvironment src = Vsource->GetFrame(n, env); env->BitBlt(dstp_v, dst_pitch, src->GetReadPtr(PLANAR_Y), src->GetPitch(PLANAR_Y), src->GetRowSize(PLANAR_Y_ALIGNED), height); } + + // alpha. if pitch is zero -> no alpha channel + const int dst_pitchA = dst->GetPitch(PLANAR_A); + BYTE* dstp_a = (dst_pitchA == 0) ? nullptr : dst->GetWritePtr(PLANAR_A); + const int heightA = dst->GetHeight(PLANAR_A); + + if (dst_pitchA != 0) + { + if (src->GetPitch(PLANAR_A) != 0) + env->BitBlt(dstp_a, dst_pitchA, src->GetReadPtr(PLANAR_A), src->GetPitch(PLANAR_A), + src->GetRowSize(PLANAR_A_ALIGNED), src->GetHeight(PLANAR_A)); + else { + switch (vi.ComponentSize()) + { + case 1: + fill_plane(dstp_a, heightA, dst_pitchA, 255); + break; + case 2: + fill_plane(dstp_a, heightA, dst_pitchA, 65535); + break; + case 4: + fill_plane(dstp_a, heightA, dst_pitchA, 1.0f); + break; + } + + } + } + return dst; } @@ -1869,7 +1915,14 @@ AVSValue ConvertToPlanarGeneric::Create(AVSValue& args, const char* filter, IScr PClip clip = args[0].AsClip(); VideoInfo vi = clip->GetVideoInfo(); - if (vi.IsRGB()) { // 8 bit only + if (vi.IsPlanarRGB() || vi.IsPlanarRGBA()) { + env->ThrowError("%s: Conversion from Planar RGB(A) is not implemented yet.", filter); + //clip = new ConvertPlanarRGBTo444(clip, getMatrix(args[2].AsString(0), env), env); + //vi = clip->GetVideoInfo(); + } + else if (vi.IsRGB()) { // 8 bit only, todo + if (vi.ComponentSize() != 1) + env->ThrowError("%s: Conversion from packed RGB > 8 bit is not implemented yet.", filter); clip = new ConvertRGBToYV24(clip, getMatrix(args[2].AsString(0), env), env); vi = clip->GetVideoInfo(); } @@ -1884,35 +1937,54 @@ AVSValue ConvertToPlanarGeneric::Create(AVSValue& args, const char* filter, IScr AVSValue outplacement = AVSValue(); if (strcmp(filter, "ConvertToYUV420") == 0) { - if (vi.IsYV12() || vi.IsColorSpace(VideoInfo::CS_YUV420P16) || vi.IsColorSpace(VideoInfo::CS_YUV420PS)) + if (vi.Is420()) if (getPlacement(args[3], env) == getPlacement(args[5], env)) return clip; outplacement = args[5]; - if (vi.ComponentSize() == 1) pixel_type = VideoInfo::CS_YV12; - else if (vi.ComponentSize() == 2) pixel_type = VideoInfo::CS_YUV420P16; - else if (vi.ComponentSize() == 4) pixel_type = VideoInfo::CS_YUV420PS; + switch (vi.BitsPerComponent()) + { + case 8: pixel_type = VideoInfo::CS_YV12; break; + case 10: pixel_type = VideoInfo::CS_YUV420P10; break; + case 12: pixel_type = VideoInfo::CS_YUV420P12; break; + case 14: pixel_type = VideoInfo::CS_YUV420P14; break; + case 16: pixel_type = VideoInfo::CS_YUV420P16; break; + case 32: pixel_type = VideoInfo::CS_YUV420PS; break; + } } else if (strcmp(filter, "ConvertToYUV422") == 0) { - if (vi.IsYV16() || vi.IsColorSpace(VideoInfo::CS_YUV422P16) || vi.IsColorSpace(VideoInfo::CS_YUV422PS)) + if (vi.Is422()) return clip; - if (vi.ComponentSize() == 1) pixel_type = VideoInfo::CS_YV16; - else if (vi.ComponentSize() == 2) pixel_type = VideoInfo::CS_YUV422P16; - else if (vi.ComponentSize() == 4) pixel_type = VideoInfo::CS_YUV422PS; + switch (vi.BitsPerComponent()) + { + case 8: pixel_type = VideoInfo::CS_YV16; break; + case 10: pixel_type = VideoInfo::CS_YUV422P10; break; + case 12: pixel_type = VideoInfo::CS_YUV422P12; break; + case 14: pixel_type = VideoInfo::CS_YUV422P14; break; + case 16: pixel_type = VideoInfo::CS_YUV422P16; break; + case 32: pixel_type = VideoInfo::CS_YUV422PS; break; + } } else if (strcmp(filter, "ConvertToYUV444") == 0) { - if (vi.IsYV24() || vi.IsColorSpace(VideoInfo::CS_YUV444P16) || vi.IsColorSpace(VideoInfo::CS_YUV444PS)) + if (vi.Is444()) return clip; - if (vi.ComponentSize() == 1) pixel_type = VideoInfo::CS_YV24; - else if (vi.ComponentSize() == 2) pixel_type = VideoInfo::CS_YUV444P16; - else if (vi.ComponentSize() == 4) pixel_type = VideoInfo::CS_YUV444PS; + switch (vi.BitsPerComponent()) + { + case 8: pixel_type = VideoInfo::CS_YV24; break; + case 10: pixel_type = VideoInfo::CS_YUV444P10; break; + case 12: pixel_type = VideoInfo::CS_YUV444P12; break; + case 14: pixel_type = VideoInfo::CS_YUV444P14; break; + case 16: pixel_type = VideoInfo::CS_YUV444P16; break; + case 32: pixel_type = VideoInfo::CS_YUV444PS; break; + } } else if (strcmp(filter, "ConvertToYV411") == 0) { if (vi.IsYV411()) return clip; + if(vi.ComponentSize()!=1) + env->ThrowError("%s: 8 bit only", filter); pixel_type = VideoInfo::CS_YV411; } else env->ThrowError("Convert: unknown filter '%s'.", filter); - if (pixel_type == VideoInfo::CS_UNKNOWN) env->ThrowError("%s: unsupported bit depth", filter); diff --git a/avs_core/convert/convert_rgb.cpp b/avs_core/convert/convert_rgb.cpp index aad7e2b38..38955efe4 100644 --- a/avs_core/convert/convert_rgb.cpp +++ b/avs_core/convert/convert_rgb.cpp @@ -335,30 +335,31 @@ PVideoFrame __stdcall RGBAtoRGB::GetFrame(int n, IScriptEnvironment* env) return dst; } -PackedRGBtoPlanarRGB::PackedRGBtoPlanarRGB(PClip src) - : GenericVideoFilter(src) +PackedRGBtoPlanarRGB::PackedRGBtoPlanarRGB(PClip src, bool _targetHasAlpha) + : GenericVideoFilter(src), targetHasAlpha(_targetHasAlpha) { vi.pixel_type = src->GetVideoInfo().ComponentSize() == 1 ? - (src->GetVideoInfo().IsRGB24() ? VideoInfo::CS_RGBP : VideoInfo::CS_RGBAP) : // RGB24, RGB32 - (src->GetVideoInfo().IsRGB48() ? VideoInfo::CS_RGBP16 : VideoInfo::CS_RGBAP16); // RGB48, RGB64 + (targetHasAlpha ? VideoInfo::CS_RGBAP : VideoInfo::CS_RGBP) : + (targetHasAlpha ? VideoInfo::CS_RGBAP16 : VideoInfo::CS_RGBP16); } -template +template static void convert_rgb_to_rgbp_c(const BYTE *srcp, BYTE * (&dstp)[4], int src_pitch, int (&dst_pitch)[4], size_t width, size_t height) { + bool targetHasAlpha = (dst_pitch[3] != 0); for (size_t y = height; y > 0; --y) { size_t x; // not proud of it but it works for (x = 0; x < width; ++x) { - pixel_t B = reinterpret_cast(srcp)[x*numcomponents + 0]; - pixel_t G = reinterpret_cast(srcp)[x*numcomponents + 1]; - pixel_t R = reinterpret_cast(srcp)[x*numcomponents + 2]; + pixel_t B = reinterpret_cast(srcp)[x*src_numcomponents + 0]; + pixel_t G = reinterpret_cast(srcp)[x*src_numcomponents + 1]; + pixel_t R = reinterpret_cast(srcp)[x*src_numcomponents + 2]; pixel_t A; - if(numcomponents==4) - A = reinterpret_cast(srcp)[x*numcomponents + 3]; + if(targetHasAlpha) + A = (src_numcomponents==4) ? reinterpret_cast(srcp)[x*src_numcomponents + 3] : (1<<(8*sizeof(pixel_t))) - 1; reinterpret_cast(dstp[0])[x] = G; reinterpret_cast(dstp[1])[x] = B; reinterpret_cast(dstp[2])[x] = R; - if(numcomponents==4) + if(targetHasAlpha) reinterpret_cast(dstp[3])[x] = A; } @@ -366,7 +367,7 @@ static void convert_rgb_to_rgbp_c(const BYTE *srcp, BYTE * (&dstp)[4], int src_p dstp[0] += dst_pitch[0]; dstp[1] += dst_pitch[1]; dstp[2] += dst_pitch[2]; - if (numcomponents == 4) + if (targetHasAlpha) dstp[3] += dst_pitch[3]; } } @@ -387,6 +388,7 @@ PVideoFrame __stdcall PackedRGBtoPlanarRGB::GetFrame(int n, IScriptEnvironment* // todo sse if(pixelsize==1) { + // targetHasAlpha decision in convert function if(vi.IsPlanarRGB()) convert_rgb_to_rgbp_c(srcp, dstp, src_pitch, dst_pitch, vi.width, vi.height); else // RGBA @@ -400,4 +402,72 @@ PVideoFrame __stdcall PackedRGBtoPlanarRGB::GetFrame(int n, IScriptEnvironment* return dst; } +PlanarRGBtoPackedRGB::PlanarRGBtoPackedRGB(PClip src, bool _targetHasAlpha) + : GenericVideoFilter(src), targetHasAlpha(_targetHasAlpha) +{ + vi.pixel_type = src->GetVideoInfo().ComponentSize() == 1 ? + (targetHasAlpha ? VideoInfo::CS_BGR32 : VideoInfo::CS_BGR24) : // PlanarRGB(A)->RGB24/32 + (targetHasAlpha ? VideoInfo::CS_BGR64 : VideoInfo::CS_BGR48); // PlanarRGB(A)->RGB48/64 +} + +template +static void convert_rgbp_to_rgb_c(const BYTE *(&srcp)[4], BYTE * dstp, int (&src_pitch)[4], int dst_pitch, size_t width, size_t height) { + bool hasSrcAlpha = (src_pitch[3] != 0); + for (size_t y = 0; y < height; y++) { + size_t x; + // not proud of it but it works + for (x = 0; x < width; ++x) { + pixel_t G = reinterpret_cast(srcp[0])[x]; + pixel_t B = reinterpret_cast(srcp[1])[x]; + pixel_t R = reinterpret_cast(srcp[2])[x]; + pixel_t A; + if(target_numcomponents==4) // either from A channel or default transparent constant + A = hasSrcAlpha ? reinterpret_cast(srcp[3])[x] : (1<<(8*sizeof(pixel_t))) - -1; // 255/65535 + reinterpret_cast(dstp)[x*target_numcomponents+0] = B; + reinterpret_cast(dstp)[x*target_numcomponents+1] = G; + reinterpret_cast(dstp)[x*target_numcomponents+2] = R; + if(target_numcomponents==4) + reinterpret_cast(dstp)[x*target_numcomponents+3] = A; + } + + dstp -= dst_pitch; // source packed RGB is upside down + srcp[0] += src_pitch[0]; + srcp[1] += src_pitch[1]; + srcp[2] += src_pitch[2]; + if (hasSrcAlpha) + srcp[3] += src_pitch[3]; + } +} + +PVideoFrame __stdcall PlanarRGBtoPackedRGB::GetFrame(int n, IScriptEnvironment* env) +{ + PVideoFrame src = child->GetFrame(n, env); + PVideoFrame dst = env->NewVideoFrame(vi); + int dst_pitch = dst->GetPitch(); + BYTE *dstp = dst->GetWritePtr(); + const BYTE *srcp[4] = {src->GetReadPtr(PLANAR_G),src->GetReadPtr(PLANAR_B),src->GetReadPtr(PLANAR_R),src->GetReadPtr(PLANAR_A)}; + int src_pitch[4] = {src->GetPitch(PLANAR_G),src->GetPitch(PLANAR_B),src->GetPitch(PLANAR_R),src->GetPitch(PLANAR_A)}; + + int pixelsize = vi.ComponentSize(); + + dstp += dst_pitch * (vi.height - 1); // start from bottom: packed RGB is upside down + + bool hasTargetAlpha = (vi.NumComponents() == 4); + // todo sse + if(pixelsize==1) + { + if(!hasTargetAlpha) + convert_rgbp_to_rgb_c(srcp, dstp, src_pitch, dst_pitch, vi.width, vi.height); + else // RGBA + convert_rgbp_to_rgb_c(srcp, dstp, src_pitch, dst_pitch, vi.width, vi.height); + } else { + if(!hasTargetAlpha) + convert_rgbp_to_rgb_c(srcp, dstp, src_pitch, dst_pitch, vi.width, vi.height); + else // RGBA + convert_rgbp_to_rgb_c(srcp, dstp, src_pitch, dst_pitch, vi.width, vi.height); + } + return dst; +} + + diff --git a/avs_core/convert/convert_rgb.h b/avs_core/convert/convert_rgb.h index 8e0bbd28a..9f9ac9466 100644 --- a/avs_core/convert/convert_rgb.h +++ b/avs_core/convert/convert_rgb.h @@ -73,13 +73,30 @@ class PackedRGBtoPlanarRGB : public GenericVideoFilter */ { public: - PackedRGBtoPlanarRGB(PClip src); + PackedRGBtoPlanarRGB(PClip src, bool _targetHasAlpha); PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment* env); int __stdcall SetCacheHints(int cachehints, int frame_range) override { return cachehints == CACHE_GET_MTMODE ? MT_NICE_FILTER : 0; } + + const bool targetHasAlpha; }; +class PlanarRGBtoPackedRGB : public GenericVideoFilter + /** + * RGBP(A) -> RGB(A) + */ +{ +public: + PlanarRGBtoPackedRGB(PClip src, bool _targetHasAlpha); + PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment* env); + + int __stdcall SetCacheHints(int cachehints, int frame_range) override { + return cachehints == CACHE_GET_MTMODE ? MT_NICE_FILTER : 0; + } + + const bool targetHasAlpha; +}; #endif // __Convert_RGB_H__ From 2301c2cf2a2846c56a389b01d53746fe9069f18c Mon Sep 17 00:00:00 2001 From: Pinterf Date: Thu, 25 Aug 2016 16:31:52 +0200 Subject: [PATCH 006/120] Fix: Convert RGB32->PlanarRGB (alpha->nonalpha) --- avs_core/convert/convert.cpp | 2 +- avs_core/convert/convert_rgb.cpp | 16 ++++++++-------- avs_core/convert/convert_rgb.h | 3 ++- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/avs_core/convert/convert.cpp b/avs_core/convert/convert.cpp index 049fe12df..3ce1eaf75 100644 --- a/avs_core/convert/convert.cpp +++ b/avs_core/convert/convert.cpp @@ -670,7 +670,7 @@ AVSValue __cdecl ConvertToRGB::Create(AVSValue args, void* user_data, IScriptEnv return new RGBAtoRGB(clip); if (target_rgbtype < 0) - return new PackedRGBtoPlanarRGB(clip, target_rgbtype==-2); + return new PackedRGBtoPlanarRGB(clip, vi.IsRGB32() || vi.IsRGB64(), target_rgbtype==-2); return clip; } diff --git a/avs_core/convert/convert_rgb.cpp b/avs_core/convert/convert_rgb.cpp index 38955efe4..7f22747d4 100644 --- a/avs_core/convert/convert_rgb.cpp +++ b/avs_core/convert/convert_rgb.cpp @@ -335,8 +335,8 @@ PVideoFrame __stdcall RGBAtoRGB::GetFrame(int n, IScriptEnvironment* env) return dst; } -PackedRGBtoPlanarRGB::PackedRGBtoPlanarRGB(PClip src, bool _targetHasAlpha) - : GenericVideoFilter(src), targetHasAlpha(_targetHasAlpha) +PackedRGBtoPlanarRGB::PackedRGBtoPlanarRGB(PClip src, bool _sourceHasAlpha, bool _targetHasAlpha) + : GenericVideoFilter(src), sourceHasAlpha(_sourceHasAlpha), targetHasAlpha(_targetHasAlpha) { vi.pixel_type = src->GetVideoInfo().ComponentSize() == 1 ? (targetHasAlpha ? VideoInfo::CS_RGBAP : VideoInfo::CS_RGBP) : @@ -389,15 +389,15 @@ PVideoFrame __stdcall PackedRGBtoPlanarRGB::GetFrame(int n, IScriptEnvironment* if(pixelsize==1) { // targetHasAlpha decision in convert function - if(vi.IsPlanarRGB()) - convert_rgb_to_rgbp_c(srcp, dstp, src_pitch, dst_pitch, vi.width, vi.height); - else // RGBA + if(sourceHasAlpha) convert_rgb_to_rgbp_c(srcp, dstp, src_pitch, dst_pitch, vi.width, vi.height); - } else { - if(vi.IsPlanarRGB()) - convert_rgb_to_rgbp_c(srcp, dstp, src_pitch, dst_pitch, vi.width, vi.height); else // RGBA + convert_rgb_to_rgbp_c(srcp, dstp, src_pitch, dst_pitch, vi.width, vi.height); + } else { + if(sourceHasAlpha) convert_rgb_to_rgbp_c(srcp, dstp, src_pitch, dst_pitch, vi.width, vi.height); + else // RGBA + convert_rgb_to_rgbp_c(srcp, dstp, src_pitch, dst_pitch, vi.width, vi.height); } return dst; } diff --git a/avs_core/convert/convert_rgb.h b/avs_core/convert/convert_rgb.h index 9f9ac9466..f8e17a41f 100644 --- a/avs_core/convert/convert_rgb.h +++ b/avs_core/convert/convert_rgb.h @@ -73,13 +73,14 @@ class PackedRGBtoPlanarRGB : public GenericVideoFilter */ { public: - PackedRGBtoPlanarRGB(PClip src, bool _targetHasAlpha); + PackedRGBtoPlanarRGB(PClip src, bool _sourceHasAlpha, bool _targetHasAlpha); PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment* env); int __stdcall SetCacheHints(int cachehints, int frame_range) override { return cachehints == CACHE_GET_MTMODE ? MT_NICE_FILTER : 0; } + const bool sourceHasAlpha; const bool targetHasAlpha; }; From 976064b9fbdcda5c3b5166d3f900d84fb8976568 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Fri, 26 Aug 2016 08:04:10 +0200 Subject: [PATCH 007/120] Resizers: RGB48/64, PlanarRGB 8/16/Float --- avs_core/filters/resample.cpp | 98 +++++++++++++++++++++++++++-------- 1 file changed, 76 insertions(+), 22 deletions(-) diff --git a/avs_core/filters/resample.cpp b/avs_core/filters/resample.cpp index e0364c6f8..2ef43d5c1 100644 --- a/avs_core/filters/resample.cpp +++ b/avs_core/filters/resample.cpp @@ -718,13 +718,15 @@ FilteredResizeH::FilteredResizeH( PClip _child, double subrange_left, double sub dst_height = vi.height; pixelsize = vi.ComponentSize(); // AVS16 - grey = vi.IsY8() || vi.IsColorSpace(VideoInfo::CS_Y16) || vi.IsColorSpace(VideoInfo::CS_Y32); + grey = vi.IsY(); + + bool isRGBPfamily = vi.IsPlanarRGB() || vi.IsPlanarRGBA(); if (target_width <= 0) { env->ThrowError("Resize: Width must be greater than 0."); } - if (vi.IsPlanar() && !grey) { + if (vi.IsPlanar() && !grey && !isRGBPfamily) { const int mask = (1 << vi.GetPlaneWidthSubsampling(PLANAR_U)) - 1; if (target_width & mask) @@ -735,7 +737,7 @@ FilteredResizeH::FilteredResizeH( PClip _child, double subrange_left, double sub // Main resampling program resampling_program_luma = func->GetResamplingProgram(vi.width, subrange_left, subrange_width, target_width, env2); - if (vi.IsPlanar() && !grey) { + if (vi.IsPlanar() && !grey && !isRGBPfamily) { const int shift = vi.GetPlaneWidthSubsampling(PLANAR_U); const int shift_h = vi.GetPlaneHeightSubsampling(PLANAR_U); const int div = 1 << shift; @@ -750,7 +752,7 @@ FilteredResizeH::FilteredResizeH( PClip _child, double subrange_left, double sub } fast_resize = (env->GetCPUFlags() & CPUF_SSSE3) == CPUF_SSSE3 && vi.IsPlanar() && target_width%4 == 0; - if (fast_resize && vi.IsYUV() && !grey) { + if (fast_resize /*&& vi.IsYUV()*/ && !grey && !isRGBPfamily) { const int shift = vi.GetPlaneWidthSubsampling(PLANAR_U); const int dst_chroma_width = dst_width >> shift; @@ -760,6 +762,7 @@ FilteredResizeH::FilteredResizeH( PClip _child, double subrange_left, double sub } if (false && resampling_program_luma->filter_size == 1 && vi.IsPlanar()) { + // dead code? fast_resize = true; resampler_h_luma = resize_h_pointresize; resampler_h_chroma = resize_h_pointresize; @@ -768,7 +771,7 @@ FilteredResizeH::FilteredResizeH( PClip _child, double subrange_left, double sub src_pitch_table_luma = new int[vi.width]; resampler_luma = FilteredResizeV::GetResampler(env->GetCPUFlags(), true, pixelsize, filter_storage_luma, resampling_program_luma); - if (vi.IsPlanar() && !grey) { + if (vi.IsPlanar() && !grey && !isRGBPfamily) { resampler_chroma = FilteredResizeV::GetResampler(env->GetCPUFlags(), true, pixelsize, filter_storage_chroma, resampling_program_chroma); } @@ -792,6 +795,17 @@ FilteredResizeH::FilteredResizeH( PClip _child, double subrange_left, double sub turn_left = turn_left_rgb32_c; turn_right = turn_right_rgb32_c; } + } else if (vi.IsRGB48()) { + turn_left = turn_left_rgb48; // todo: _c suffix + turn_right = turn_right_rgb48; // todo: _c suffix + } else if (vi.IsRGB64()) { + if (has_sse2) { + turn_left = turn_left_rgb64_sse2; + turn_right = turn_right_rgb64_sse2; + } else { + turn_left = turn_left_rgb64_c; + turn_right = turn_right_rgb64_c; + } } else { switch (vi.ComponentSize()) {// AVS16 case 1: // 8 bit @@ -825,7 +839,7 @@ FilteredResizeH::FilteredResizeH( PClip _child, double subrange_left, double sub } else { // Plannar + SSSE3 = use new horizontal resizer routines resampler_h_luma = GetResampler(env->GetCPUFlags(), true, pixelsize, resampling_program_luma, env2); - if (!grey) { + if (!grey && !isRGBPfamily) { resampler_h_chroma = GetResampler(env->GetCPUFlags(), true, pixelsize, resampling_program_chroma, env2); } } @@ -841,6 +855,8 @@ PVideoFrame __stdcall FilteredResizeH::GetFrame(int n, IScriptEnvironment* env) auto env2 = static_cast(env); + bool isRGBPfamily = vi.IsPlanarRGB() || vi.IsPlanarRGBA(); + if (!fast_resize) { // e.g. not aligned, not mod4 // temp_1_pitch and temp_2_pitch is pixelsize-aware @@ -852,13 +868,23 @@ PVideoFrame __stdcall FilteredResizeH::GetFrame(int n, IScriptEnvironment* env) env->ThrowError("Could not reserve memory in a resampler."); } - if (!vi.IsRGB()) { - // Y Plane + if (!vi.IsRGB() || isRGBPfamily) { + // Y/G Plane turn_right(src->GetReadPtr(), temp_1, src_width * pixelsize, src_height, src->GetPitch(), temp_1_pitch); // * pixelsize: turn_right needs GetPlaneWidth full size resampler_luma(temp_2, temp_1, temp_2_pitch, temp_1_pitch, resampling_program_luma, src_height, dst_width, src_pitch_table_luma, filter_storage_luma); turn_left(temp_2, dst->GetWritePtr(), dst_height * pixelsize, dst_width, temp_2_pitch, dst->GetPitch()); - if(!grey) { + if (isRGBPfamily) + { + turn_right(src->GetReadPtr(PLANAR_B), temp_1, src_width * pixelsize, src_height, src->GetPitch(PLANAR_B), temp_1_pitch); // * pixelsize: turn_right needs GetPlaneWidth full size + resampler_luma(temp_2, temp_1, temp_2_pitch, temp_1_pitch, resampling_program_luma, src_height, dst_width, src_pitch_table_luma, filter_storage_luma); + turn_left(temp_2, dst->GetWritePtr(PLANAR_B), dst_height * pixelsize, dst_width, temp_2_pitch, dst->GetPitch(PLANAR_B)); + + turn_right(src->GetReadPtr(PLANAR_R), temp_1, src_width * pixelsize, src_height, src->GetPitch(PLANAR_R), temp_1_pitch); // * pixelsize: turn_right needs GetPlaneWidth full size + resampler_luma(temp_2, temp_1, temp_2_pitch, temp_1_pitch, resampling_program_luma, src_height, dst_width, src_pitch_table_luma, filter_storage_luma); + turn_left(temp_2, dst->GetWritePtr(PLANAR_R), dst_height * pixelsize, dst_width, temp_2_pitch, dst->GetPitch(PLANAR_R)); + } + else if(!grey) { const int shift = vi.GetPlaneWidthSubsampling(PLANAR_U); const int shift_h = vi.GetPlaneHeightSubsampling(PLANAR_U); @@ -879,20 +905,25 @@ PVideoFrame __stdcall FilteredResizeH::GetFrame(int n, IScriptEnvironment* env) turn_left(temp_2, dst->GetWritePtr(PLANAR_V), dst_chroma_height * pixelsize, dst_chroma_width, temp_2_pitch, dst->GetPitch(PLANAR_V)); } } else { - // RGB - // PF160510 first left, then right. Right+left shifts RGB24/RGB32 image to the opposite horizontal direction + // packed RGB + // First left, then right. Reason: packed RGB bottom to top. Right+left shifts RGB24/RGB32 image to the opposite horizontal direction turn_left(src->GetReadPtr(), temp_1, vi.BytesFromPixels(src_width), src_height, src->GetPitch(), temp_1_pitch); - resampler_luma(temp_2, temp_1, temp_2_pitch, temp_1_pitch, resampling_program_luma, vi.BytesFromPixels(src_height), dst_width, src_pitch_table_luma, filter_storage_luma); + resampler_luma(temp_2, temp_1, temp_2_pitch, temp_1_pitch, resampling_program_luma, vi.BytesFromPixels(src_height) / pixelsize, dst_width, src_pitch_table_luma, filter_storage_luma); turn_right(temp_2, dst->GetWritePtr(), vi.BytesFromPixels(dst_height), dst_width, temp_2_pitch, dst->GetPitch()); } env2->Free(temp_1); env2->Free(temp_2); } else { + // Y Plane resampler_h_luma(dst->GetWritePtr(), src->GetReadPtr(), dst->GetPitch(), src->GetPitch(), resampling_program_luma, dst_width, dst_height); - if (!grey) { + if (isRGBPfamily) { + resampler_h_luma(dst->GetWritePtr(PLANAR_B), src->GetReadPtr(PLANAR_B), dst->GetPitch(PLANAR_B), src->GetPitch(PLANAR_B), resampling_program_luma, dst_width, dst_height); + resampler_h_luma(dst->GetWritePtr(PLANAR_R), src->GetReadPtr(PLANAR_R), dst->GetPitch(PLANAR_R), src->GetPitch(PLANAR_R), resampling_program_luma, dst_width, dst_height); + } + else if (!grey) { const int dst_chroma_width = dst_width >> vi.GetPlaneWidthSubsampling(PLANAR_U); const int dst_chroma_height = dst_height >> vi.GetPlaneHeightSubsampling(PLANAR_U); @@ -951,9 +982,10 @@ FilteredResizeV::FilteredResizeV( PClip _child, double subrange_top, double subr env->ThrowError("Resize: Height must be greater than 0."); pixelsize = vi.ComponentSize(); // AVS16 - grey = vi.IsY8() || vi.IsColorSpace(VideoInfo::CS_Y16) || vi.IsColorSpace(VideoInfo::CS_Y32); + grey = vi.IsY(); + bool isRGBPfamily = vi.IsPlanarRGB() || vi.IsPlanarRGBA(); - if (vi.IsPlanar() && !grey) { + if (vi.IsPlanar() && !grey && !isRGBPfamily) { const int mask = (1 << vi.GetPlaneHeightSubsampling(PLANAR_U)) - 1; if (target_height & mask) @@ -962,7 +994,7 @@ FilteredResizeV::FilteredResizeV( PClip _child, double subrange_top, double subr auto env2 = static_cast(env); - if (vi.IsRGB()) + if (vi.IsRGB() && !isRGBPfamily) subrange_top = vi.height - subrange_top - subrange_height; // why? @@ -971,7 +1003,7 @@ FilteredResizeV::FilteredResizeV( PClip _child, double subrange_top, double subr resampler_luma_aligned = GetResampler(env->GetCPUFlags(), true , pixelsize, filter_storage_luma_aligned, resampling_program_luma); resampler_luma_unaligned = GetResampler(env->GetCPUFlags(), false, pixelsize, filter_storage_luma_unaligned, resampling_program_luma); - if (vi.IsPlanar() && !grey) { + if (vi.IsPlanar() && !grey && !isRGBPfamily) { const int shift = vi.GetPlaneHeightSubsampling(PLANAR_U); const int div = 1 << shift; @@ -1001,6 +1033,8 @@ PVideoFrame __stdcall FilteredResizeV::GetFrame(int n, IScriptEnvironment* env) auto env2 = static_cast(env); + bool isRGBPfamily = vi.IsPlanarRGB() || vi.IsPlanarRGBA(); + // Create pitch table int* src_pitch_table_luma = static_cast(env2->Allocate(sizeof(int) * src->GetHeight(), 16, AVS_POOLED_ALLOC)); if (!src_pitch_table_luma) { @@ -1011,7 +1045,7 @@ PVideoFrame __stdcall FilteredResizeV::GetFrame(int n, IScriptEnvironment* env) int* src_pitch_table_chromaU = NULL; int* src_pitch_table_chromaV = NULL; - if ((!grey && vi.IsPlanar())) { + if ((!grey && vi.IsPlanar() && !isRGBPfamily)) { src_pitch_table_chromaU = static_cast(env2->Allocate(sizeof(int) * src->GetHeight(PLANAR_U), 16, AVS_POOLED_ALLOC)); src_pitch_table_chromaV = static_cast(env2->Allocate(sizeof(int) * src->GetHeight(PLANAR_V), 16, AVS_POOLED_ALLOC)); if (!src_pitch_table_chromaU || !src_pitch_table_chromaV) { @@ -1025,13 +1059,33 @@ PVideoFrame __stdcall FilteredResizeV::GetFrame(int n, IScriptEnvironment* env) } // Do resizing - int work_width = vi.IsPlanar() ? vi.width : vi.BytesFromPixels(vi.width); + int work_width = vi.IsPlanar() ? vi.width : vi.BytesFromPixels(vi.width) / pixelsize; // packed RGB: or vi.width * vi.NumComponent() if (IsPtrAligned(srcp, 16) && (src_pitch & 15) == 0) resampler_luma_aligned(dstp, srcp, dst_pitch, src_pitch, resampling_program_luma, work_width, vi.height, src_pitch_table_luma, filter_storage_luma_aligned); else resampler_luma_unaligned(dstp, srcp, dst_pitch, src_pitch, resampling_program_luma, work_width, vi.height, src_pitch_table_luma, filter_storage_luma_unaligned); - if (!grey && vi.IsPlanar()) { + if(isRGBPfamily) + { + src_pitch = src->GetPitch(PLANAR_B); + dst_pitch = dst->GetPitch(PLANAR_B); + srcp = src->GetReadPtr(PLANAR_B); + dstp = dst->GetWritePtr(PLANAR_B); + if (IsPtrAligned(srcp, 16) && (src_pitch & 15) == 0) + resampler_luma_aligned(dstp, srcp, dst_pitch, src_pitch, resampling_program_luma, work_width, vi.height, src_pitch_table_luma, filter_storage_luma_aligned); + else + resampler_luma_unaligned(dstp, srcp, dst_pitch, src_pitch, resampling_program_luma, work_width, vi.height, src_pitch_table_luma, filter_storage_luma_unaligned); + + src_pitch = src->GetPitch(PLANAR_R); + dst_pitch = dst->GetPitch(PLANAR_R); + srcp = src->GetReadPtr(PLANAR_R); + dstp = dst->GetWritePtr(PLANAR_R); + if (IsPtrAligned(srcp, 16) && (src_pitch & 15) == 0) + resampler_luma_aligned(dstp, srcp, dst_pitch, src_pitch, resampling_program_luma, work_width, vi.height, src_pitch_table_luma, filter_storage_luma_aligned); + else + resampler_luma_unaligned(dstp, srcp, dst_pitch, src_pitch, resampling_program_luma, work_width, vi.height, src_pitch_table_luma, filter_storage_luma_unaligned); + } + else if (!grey && vi.IsPlanar()) { int width = vi.width >> vi.GetPlaneWidthSubsampling(PLANAR_U); int height = vi.height >> vi.GetPlaneHeightSubsampling(PLANAR_U); @@ -1149,7 +1203,7 @@ PClip FilteredResize::CreateResizeH(PClip clip, double subrange_left, double sub if (subrange_left == int(subrange_left) && subrange_width == target_width && subrange_left >= 0 && subrange_left + subrange_width <= vi.width) { - const int mask = (vi.IsYUV() && !vi.IsY8() && !vi.IsColorSpace(VideoInfo::CS_Y16) && !vi.IsColorSpace(VideoInfo::CS_Y32)) ? (1 << vi.GetPlaneWidthSubsampling(PLANAR_U)) - 1 : 0; + const int mask = ((vi.IsYUV() || vi.IsYUVA()) && !vi.IsY()) ? (1 << vi.GetPlaneWidthSubsampling(PLANAR_U)) - 1 : 0; if (((int(subrange_left) | int(subrange_width)) & mask) == 0) return new Crop(int(subrange_left), 0, int(subrange_width), vi.height, 0, clip, env); @@ -1179,7 +1233,7 @@ PClip FilteredResize::CreateResizeV(PClip clip, double subrange_top, double subr if (subrange_top == int(subrange_top) && subrange_height == target_height && subrange_top >= 0 && subrange_top + subrange_height <= vi.height) { - const int mask = (vi.IsYUV() && !vi.IsY8() && !vi.IsColorSpace(VideoInfo::CS_Y16) && !vi.IsColorSpace(VideoInfo::CS_Y32)) ? (1 << vi.GetPlaneHeightSubsampling(PLANAR_U)) - 1 : 0; + const int mask = ((vi.IsYUV() || vi.IsYUVA()) && !vi.IsY()) ? (1 << vi.GetPlaneHeightSubsampling(PLANAR_U)) - 1 : 0; if (((int(subrange_top) | int(subrange_height)) & mask) == 0) return new Crop(0, int(subrange_top), vi.width, int(subrange_height), 0, clip, env); From 8a11a08e842be4f664aa4d9d6bf495beddba7c5e Mon Sep 17 00:00:00 2001 From: Pinterf Date: Fri, 26 Aug 2016 08:09:22 +0200 Subject: [PATCH 008/120] Resamplers: Alpha plane (YUVA, PlanarRGBA) --- avs_core/filters/resample.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/avs_core/filters/resample.cpp b/avs_core/filters/resample.cpp index 2ef43d5c1..aba731eee 100644 --- a/avs_core/filters/resample.cpp +++ b/avs_core/filters/resample.cpp @@ -904,6 +904,13 @@ PVideoFrame __stdcall FilteredResizeH::GetFrame(int n, IScriptEnvironment* env) resampler_luma(temp_2, temp_1, temp_2_pitch, temp_1_pitch, resampling_program_chroma, src_chroma_height, dst_chroma_width, src_pitch_table_luma, filter_storage_chroma); turn_left(temp_2, dst->GetWritePtr(PLANAR_V), dst_chroma_height * pixelsize, dst_chroma_width, temp_2_pitch, dst->GetPitch(PLANAR_V)); } + if (vi.IsYUVA() || vi.IsPlanarRGBA()) + { + turn_right(src->GetReadPtr(PLANAR_A), temp_1, src_width * pixelsize, src_height, src->GetPitch(PLANAR_A), temp_1_pitch); // * pixelsize: turn_right needs GetPlaneWidth full size + resampler_luma(temp_2, temp_1, temp_2_pitch, temp_1_pitch, resampling_program_luma, src_height, dst_width, src_pitch_table_luma, filter_storage_luma); + turn_left(temp_2, dst->GetWritePtr(PLANAR_A), dst_height * pixelsize, dst_width, temp_2_pitch, dst->GetPitch(PLANAR_A)); + } + } else { // packed RGB // First left, then right. Reason: packed RGB bottom to top. Right+left shifts RGB24/RGB32 image to the opposite horizontal direction @@ -933,6 +940,11 @@ PVideoFrame __stdcall FilteredResizeH::GetFrame(int n, IScriptEnvironment* env) // V Plane resampler_h_chroma(dst->GetWritePtr(PLANAR_V), src->GetReadPtr(PLANAR_V), dst->GetPitch(PLANAR_V), src->GetPitch(PLANAR_V), resampling_program_chroma, dst_chroma_width, dst_chroma_height); } + if (vi.IsYUVA() || vi.IsPlanarRGBA()) + { + resampler_h_luma(dst->GetWritePtr(PLANAR_A), src->GetReadPtr(PLANAR_A), dst->GetPitch(PLANAR_A), src->GetPitch(PLANAR_A), resampling_program_luma, dst_width, dst_height); + } + } return dst; From 46089bbe480e5cfa0033726db1ba73fa9ec0c5a5 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Fri, 26 Aug 2016 16:25:59 +0200 Subject: [PATCH 009/120] Resize: SSE2/4.1 16bit/float in resize_v_sseX_planar_16or32 with pure float internals --- avs_core/filters/resample.cpp | 151 +++++++++++++++++++++++++++++++++- 1 file changed, 147 insertions(+), 4 deletions(-) diff --git a/avs_core/filters/resample.cpp b/avs_core/filters/resample.cpp index aba731eee..d95c521a0 100644 --- a/avs_core/filters/resample.cpp +++ b/avs_core/filters/resample.cpp @@ -52,6 +52,7 @@ ***************************************/ typedef __m128i (SSELoader)(const __m128i*); +typedef __m128 (SSELoader_ps)(const float*); __forceinline __m128i simd_load_aligned(const __m128i* adr) { @@ -73,6 +74,28 @@ __forceinline __m128i simd_load_streaming(const __m128i* adr) return _mm_stream_load_si128(const_cast<__m128i*>(adr)); } +// float loaders +__forceinline __m128 simd_loadps_aligned(const float * adr) +{ + return _mm_load_ps(adr); +} + +__forceinline __m128 simd_loadps_unaligned(const float* adr) +{ + return _mm_loadu_ps(adr); +} + +// fake _mm_packus_epi32 (orig is SSE4.1 only) +static __forceinline __m128i _MM_PACKUS_EPI32( __m128i a, __m128i b ) +{ + a = _mm_slli_epi32 (a, 16); + a = _mm_srai_epi32 (a, 16); + b = _mm_slli_epi32 (b, 16); + b = _mm_srai_epi32 (b, 16); + a = _mm_packs_epi32 (a, b); + return a; +} + /*************************************** ***** Vertical Resizer Assembly ******* ***************************************/ @@ -349,6 +372,93 @@ static void resize_v_sse2_planar(BYTE* dst, const BYTE* src, int dst_pitch, int } } +// for uint16_t and float. Both uses float arithmetic and coefficients +template +static void resize_v_sseX_planar_16or32(BYTE* dst0, const BYTE* src0, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int target_height, const int* pitch_table, const void* storage) +{ + int filter_size = program->filter_size; + //short* current_coeff = program->pixel_coefficient; + float* current_coeff_float = program->pixel_coefficient_float; + + int wMod8 = (width / 8) * 8; // uint16/float: 8 at a time (byte was 16 byte at a time) + + __m128i zero = _mm_setzero_si128(); + + const pixel_t* src = (pixel_t *)src0; + pixel_t* dst = (pixel_t *)dst0; + dst_pitch = dst_pitch / sizeof(pixel_t); + src_pitch = src_pitch / sizeof(pixel_t); + + for (int y = 0; y < target_height; y++) { + int offset = program->pixel_offset[y]; + const pixel_t* src_ptr = src + pitch_table[offset]/sizeof(pixel_t); + + for (int x = 0; x < wMod8; x+=8) { + __m128 result_l_single = _mm_set1_ps(0.0f); + __m128 result_h_single = result_l_single; + + const pixel_t* src2_ptr = src_ptr+x; + + for (int i = 0; i < filter_size; i++) { + __m128 src_l_single; + __m128 src_h_single; + if(sizeof(pixel_t)==2) // word + { + // load is template-dependent + __m128i src_p = load(reinterpret_cast(src2_ptr)); // uint16_t 8*16=128 8 pixels at a time + __m128i src_l = _mm_unpacklo_epi16(src_p, zero); // spread lower 4*uint16_t pixel value -> 4*32 bit + __m128i src_h = _mm_unpackhi_epi16(src_p, zero); // spread higher 4*uint16_t pixel value -> 4*32 bit + src_l_single = _mm_cvtepi32_ps (src_l); // Converts the four signed 32-bit integer values of a to single-precision, floating-point values. + src_h_single = _mm_cvtepi32_ps (src_h); + } + else { // float + // _mm_load_ps or _mm_loadu_ps template dependent + src_l_single = loadps(reinterpret_cast(src2_ptr)); // float 4*32=128 4 pixels at a time + src_h_single = loadps(reinterpret_cast(src2_ptr+4)); + } + __m128 coeff = _mm_load1_ps(reinterpret_cast(current_coeff_float+i)); // loads 1, fills all 4 floats + __m128 dst_l = _mm_mul_ps(src_l_single, coeff); // Multiply by coefficient (SSE4) + __m128 dst_h = _mm_mul_ps(src_h_single, coeff); // 4*(32bit*32bit=32bit) + result_l_single = _mm_add_ps(result_l_single, dst_l); // accumulate result. + result_h_single = _mm_add_ps(result_h_single, dst_h); + + src2_ptr += src_pitch; + } + + if(sizeof(pixel_t)==2) // word + { + // Converts the four single-precision, floating-point values of a to signed 32-bit integer values. + __m128i result_l = _mm_cvtps_epi32(result_l_single); + __m128i result_h = _mm_cvtps_epi32(result_h_single); + // Pack and store + // SIMD Extensions 4 (SSE4) packus or simulation + __m128i result = sse41 ? _mm_packus_epi32(result_l, result_h) : (_MM_PACKUS_EPI32(result_l, result_h)) ; // 4*32+4*32 = 8*16 + _mm_store_si128(reinterpret_cast<__m128i*>(dst+x), result); + } + else { // float + _mm_store_ps(reinterpret_cast(dst+x), result_l_single); + _mm_store_ps(reinterpret_cast(dst+x+4), result_h_single); + } + } + + // Leftover + for (int x = wMod8; x < width; x++) { + float result = 0; + for (int i = 0; i < filter_size; i++) { + result += (src_ptr+pitch_table[i]/sizeof(pixel_t))[x] * current_coeff_float[i]; + } + if (!std::is_floating_point::value) { // floats are unscaled and uncapped + result = clamp(result, 0.0f, 65535.0f); + } + dst[x] = (pixel_t) result; + } + + dst += dst_pitch; + current_coeff_float += filter_size; + } +} + + template static void resize_v_ssse3_planar(BYTE* dst, const BYTE* src, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int target_height, const int* pitch_table, const void* storage) { @@ -474,6 +584,9 @@ static void resize_h_prepare_coeff_8(ResamplingProgram* p, IScriptEnvironment2* dst += filter_size; src += p->filter_size; + + dst_f += filter_size; + src_f += p->filter_size; } env->Free(p->pixel_coefficient); @@ -1184,12 +1297,42 @@ ResamplerV FilteredResizeV::GetResampler(int CPU, bool aligned, int pixelsize, v else { // C version return resize_v_c_planar; } - } // todo: sse + } else if (pixelsize == 2) { - return resize_v_c_planar; + if (CPU & CPUF_SSE4_1) { + if (aligned) { + return resize_v_sseX_planar_16or32; + } + else if (CPU & CPUF_SSE3) { // SSE3 lddqu + return resize_v_sseX_planar_16or32; + } + else { // unaligned + return resize_v_sseX_planar_16or32; + } + } + else if (CPU & CPUF_SSE2) { + if (aligned) { + return resize_v_sseX_planar_16or32; + } + else { + return resize_v_sseX_planar_16or32; + } + } else { // C version + return resize_v_c_planar; + } } - else { // if (pixelsize== 4) - return resize_v_c_planar; + else { // pixelsize== 4 + // no special integer loading difference, no special sse4 case + if (CPU & CPUF_SSE2) { + if (aligned) { + return resize_v_sseX_planar_16or32; + } + else { + return resize_v_sseX_planar_16or32; + } + } else { + return resize_v_c_planar; + } } } } From c97f54566aee4b037878149faf62fd48f9974730 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Fri, 26 Aug 2016 19:56:26 +0200 Subject: [PATCH 010/120] Remove unused code --- avs_core/convert/convert.cpp | 55 ------------------------------------ 1 file changed, 55 deletions(-) diff --git a/avs_core/convert/convert.cpp b/avs_core/convert/convert.cpp index 3ce1eaf75..76cf6b81b 100644 --- a/avs_core/convert/convert.cpp +++ b/avs_core/convert/convert.cpp @@ -675,61 +675,6 @@ AVSValue __cdecl ConvertToRGB::Create(AVSValue args, void* user_data, IScriptEnv return clip; } -#if 0 -// merged into Create -AVSValue __cdecl ConvertToRGB::Create32(AVSValue args, void*, IScriptEnvironment* env) -{ - const bool haveOpts = args[3].Defined() || args[4].Defined(); - PClip clip = args[0].AsClip(); - const char* const matrix = args[1].AsString(0); - const VideoInfo vi = clip->GetVideoInfo(); - - if (vi.IsPlanar()) { - AVSValue new_args[5] = { clip, args[2], args[1], args[3], args[4] }; - clip = ConvertToPlanarGeneric::CreateYUV444(AVSValue(new_args, 5), NULL, env).AsClip(); - return new ConvertYUV444ToRGB(clip, getMatrix(matrix, env), 4 , env); - } - - if (haveOpts) - env->ThrowError("ConvertToRGB32: ChromaPlacement and ChromaResample options are not supported."); - - if (vi.IsYUV()) - return new ConvertToRGB(clip, false, matrix, env); - - if (vi.IsRGB24()) - return new RGB24to32(clip); - - return clip; -} -#endif - -#if 0 -// merged into Create -AVSValue __cdecl ConvertToRGB::Create24(AVSValue args, void*, IScriptEnvironment* env) -{ - const bool haveOpts = args[3].Defined() || args[4].Defined(); - PClip clip = args[0].AsClip(); - const char* const matrix = args[1].AsString(0); - const VideoInfo& vi = clip->GetVideoInfo(); - - if (vi.IsPlanar()) { - AVSValue new_args[5] = { clip, args[2], args[1], args[3], args[4] }; - clip = ConvertToPlanarGeneric::CreateYUV444(AVSValue(new_args, 5), NULL, env).AsClip(); - return new ConvertYUV444ToRGB(clip, getMatrix(matrix, env), 3 , env); - } - - if (haveOpts) - env->ThrowError("ConvertToRGB24: ChromaPlacement and ChromaResample options are not supported."); - - if (vi.IsYUV()) - return new ConvertToRGB(clip, true, matrix, env); - - if (vi.IsRGB32()) - return new RGB32to24(clip); - - return clip; -} -#endif /********************************** ******* Convert to YV12 ****** From 721c29b25239f1de9295c74b2e0c2446f193ef7a Mon Sep 17 00:00:00 2001 From: Pinterf Date: Fri, 26 Aug 2016 19:58:05 +0200 Subject: [PATCH 011/120] Convert: RGB48/64,PlanarRGB(A)8/10-16/32 -> YUV(A)8/10-16/32 --- avs_core/convert/convert_planar.cpp | 175 ++++++++++++++++++++++++---- avs_core/convert/convert_planar.h | 2 + 2 files changed, 155 insertions(+), 22 deletions(-) diff --git a/avs_core/convert/convert_planar.cpp b/avs_core/convert/convert_planar.cpp index 08ff2681d..9704f6e09 100644 --- a/avs_core/convert/convert_planar.cpp +++ b/avs_core/convert/convert_planar.cpp @@ -424,8 +424,30 @@ ConvertRGBToYV24::ConvertRGBToYV24(PClip src, int in_matrix, IScriptEnvironment* if (!vi.IsRGB()) env->ThrowError("ConvertRGBToYV24: Only RGB data input accepted"); - pixel_step = vi.BytesFromPixels(1); - vi.pixel_type = VideoInfo::CS_YV24; + isPlanarRGBfamily = vi.IsPlanarRGB() || vi.IsPlanarRGBA(); + hasAlpha = vi.IsPlanarRGBA(); // for packed RGB always false (no YUVA target option) + if (isPlanarRGBfamily) + { + pixel_step = hasAlpha ? -2 : -1; + switch (vi.BitsPerComponent()) + { + case 8: vi.pixel_type = hasAlpha ? VideoInfo::CS_YUVA444 : VideoInfo::CS_YV24; break; + case 10: vi.pixel_type = hasAlpha ? VideoInfo::CS_YUVA444P10 : VideoInfo::CS_YUV444P10; break; + case 12: vi.pixel_type = hasAlpha ? VideoInfo::CS_YUVA444P12 : VideoInfo::CS_YUV444P12; break; + case 14: vi.pixel_type = hasAlpha ? VideoInfo::CS_YUVA444P14 : VideoInfo::CS_YUV444P14; break; + case 16: vi.pixel_type = hasAlpha ? VideoInfo::CS_YUVA444P16 : VideoInfo::CS_YUV444P16; break; + case 32: vi.pixel_type = hasAlpha ? VideoInfo::CS_YUVA444PS : VideoInfo::CS_YUV444PS; break; + } + } else { // packed RGB24/32/48/64 + pixel_step = vi.BytesFromPixels(1); // 3,4 for packed 8 bit, 6,8 for + switch(vi.ComponentSize()) + { + case 1: vi.pixel_type = VideoInfo::CS_YV24; break; + case 2: vi.pixel_type = VideoInfo::CS_YUV444P16; break; + case 4: vi.pixel_type = VideoInfo::CS_YUV444PS; break; // planar RGB + } + } + const int shift = 15; @@ -709,6 +731,58 @@ static void convert_rgb24_to_yv24_mmx(BYTE* dstY, BYTE* dstU, BYTE* dstV, const #endif +template +static void convert_planarrgb_to_yuv_int_c(BYTE *(&dstp)[3], int (&dstPitch)[3], const BYTE *(&srcp)[3], const int (&srcPitch)[3], int width, int height, const ConversionMatrix &m) +{ + const pixel_t half = 1 << (8 * sizeof(pixel_t) - 1 ); + typedef typename std::conditional < sizeof(pixel_t) == 1, int, __int64>::type sum_t; + const int limit = (1 << (8 * sizeof(pixel_t))) - 1; + for (int y = 0; y < height; y++) { + for (int x = 0; x < width; x++) { + pixel_t g = reinterpret_cast(srcp[0])[x]; + pixel_t b = reinterpret_cast(srcp[1])[x]; + pixel_t r = reinterpret_cast(srcp[2])[x]; + int Y = (m.offset_y << 8) + (int)(((sum_t)m.y_b * b + (sum_t)m.y_g * g + (sum_t)m.y_r * r + 16384)>>15); + int U = half + (int)(((sum_t)m.u_b * b + (sum_t)m.u_g * g + (sum_t)m.u_r * r + 16384)>>15); + int V = half + (int)(((sum_t)m.v_b * b + (sum_t)m.v_g * g + (sum_t)m.v_r * r + 16384)>>15); + reinterpret_cast(dstp[0])[x] = (pixel_t)clamp(Y, 0, limit); + reinterpret_cast(dstp[1])[x] = (pixel_t)clamp(U, 0, limit); + reinterpret_cast(dstp[2])[x] = (pixel_t)clamp(V, 0, limit); + } + srcp[0] += srcPitch[0]; + srcp[1] += srcPitch[1]; + srcp[2] += srcPitch[2]; + dstp[0] += dstPitch[0]; + dstp[1] += dstPitch[1]; + dstp[2] += dstPitch[2]; + } +} + +static void convert_planarrgb_to_yuv_float_c(BYTE *(&dstp)[3], int (&dstPitch)[3], const BYTE *(&srcp)[3], const int (&srcPitch)[3], int width, int height, const ConversionMatrix &m) +{ + typedef float pixel_t; + const pixel_t limit = 1.0; // we clamp on RGB conversions for float + const pixel_t half = 0.5f; + for (int y = 0; y < height; y++) { + for (int x = 0; x < width; x++) { + pixel_t g = reinterpret_cast(srcp[0])[x]; + pixel_t b = reinterpret_cast(srcp[1])[x]; + pixel_t r = reinterpret_cast(srcp[2])[x]; + pixel_t Y = (m.offset_y / 256.0f) + (m.y_b_f * b + m.y_g_f * g + m.y_r_f * r); + pixel_t U = half + (m.u_b_f * b + m.u_g_f * g + m.u_r_f * r); + pixel_t V = half + (m.v_b_f * b + m.v_g_f * g + m.v_r_f * r); + reinterpret_cast(dstp[0])[x] = (pixel_t)clamp(Y, (pixel_t)0, limit);// All the safety we can wish for. + reinterpret_cast(dstp[1])[x] = (pixel_t)clamp(U, (pixel_t)0, limit); + reinterpret_cast(dstp[2])[x] = (pixel_t)clamp(V, (pixel_t)0, limit); + } + srcp[0] += srcPitch[0]; + srcp[1] += srcPitch[1]; + srcp[2] += srcPitch[2]; + dstp[0] += dstPitch[0]; + dstp[1] += dstPitch[1]; + dstp[2] += dstPitch[2]; + } +} PVideoFrame __stdcall ConvertRGBToYV24::GetFrame(int n, IScriptEnvironment* env) { @@ -726,11 +800,12 @@ PVideoFrame __stdcall ConvertRGBToYV24::GetFrame(int n, IScriptEnvironment* env) const int Ypitch = dst->GetPitch(PLANAR_Y); const int UVpitch = dst->GetPitch(PLANAR_U); - if (pixel_step != 4 && pixel_step != 3) { + if (pixel_step != 4 && pixel_step != 3 && pixel_step != 8 && pixel_step != 6 && pixel_step != -1 && pixel_step != -2) { env->ThrowError("Invalid pixel step. This is a bug."); } - if ((env->GetCPUFlags() & CPUF_SSE2) && IsPtrAligned(srcp, 16)) { + // sse2 for 8 bit only (pixel_step==3,4), todo + if (((pixel_step == 3) || (pixel_step == 4)) && (env->GetCPUFlags() & CPUF_SSE2) && IsPtrAligned(srcp, 16)) { if (pixel_step == 4) { convert_rgb32_to_yv24_sse2(dstY, dstU, dstV, srcp, Ypitch, UVpitch, Spitch, vi.width, vi.height, matrix); } else { @@ -740,7 +815,7 @@ PVideoFrame __stdcall ConvertRGBToYV24::GetFrame(int n, IScriptEnvironment* env) } #ifdef X86_32 - if ((env->GetCPUFlags() & CPUF_MMX)) { + if (((pixel_step == 3) || (pixel_step == 4)) && (env->GetCPUFlags() & CPUF_MMX)) { if (pixel_step == 4) { convert_rgb32_to_yv24_mmx(dstY, dstU, dstV, srcp, Ypitch, UVpitch, Spitch, vi.width, vi.height, matrix); } else { @@ -755,23 +830,76 @@ PVideoFrame __stdcall ConvertRGBToYV24::GetFrame(int n, IScriptEnvironment* env) ConversionMatrix &m = matrix; srcp += Spitch * (vi.height-1); // We start at last line const int Sstep = Spitch + (vi.width * pixel_step); - for (int y = 0; y < vi.height; y++) { - for (int x = 0; x < vi.width; x++) { - int b = srcp[0]; - int g = srcp[1]; - int r = srcp[2]; - int Y = m.offset_y + (((int)m.y_b * b + (int)m.y_g * g + (int)m.y_r * r + 16384)>>15); - int U = 128+(((int)m.u_b * b + (int)m.u_g * g + (int)m.u_r * r + 16384)>>15); - int V = 128+(((int)m.v_b * b + (int)m.v_g * g + (int)m.v_r * r + 16384)>>15); - *dstY++ = PixelClip(Y); // All the safety we can wish for. - *dstU++ = PixelClip(U); - *dstV++ = PixelClip(V); - srcp += pixel_step; + + if(pixel_step==3 && pixel_step==4) + { + for (int y = 0; y < vi.height; y++) { + for (int x = 0; x < vi.width; x++) { + int b = srcp[0]; + int g = srcp[1]; + int r = srcp[2]; + int Y = m.offset_y + (((int)m.y_b * b + (int)m.y_g * g + (int)m.y_r * r + 16384)>>15); + int U = 128+(((int)m.u_b * b + (int)m.u_g * g + (int)m.u_r * r + 16384)>>15); + int V = 128+(((int)m.v_b * b + (int)m.v_g * g + (int)m.v_r * r + 16384)>>15); + *dstY++ = PixelClip(Y); // All the safety we can wish for. + *dstU++ = PixelClip(U); + *dstV++ = PixelClip(V); + srcp += pixel_step; + } + srcp -= Sstep; + dstY += Ypitch - vi.width; + dstU += UVpitch - vi.width; + dstV += UVpitch - vi.width; + } + } + else if(pixel_step==6 || pixel_step==8){ + // uint16: pixel_step==6,8 + uint16_t *dstY16 = reinterpret_cast(dstY); + uint16_t *dstU16 = reinterpret_cast(dstU); + uint16_t *dstV16 = reinterpret_cast(dstV); + int Ypitch16 = Ypitch / sizeof(uint16_t); + int UVpitch16 = UVpitch / sizeof(uint16_t); + for (int y = 0; y < vi.height; y++) { + for (int x = 0; x < vi.width; x++) { + int b = reinterpret_cast(srcp)[0]; + int g = reinterpret_cast(srcp)[1]; + int r = reinterpret_cast(srcp)[2]; + int Y = (m.offset_y << 8) + (((__int64)m.y_b * b + (__int64)m.y_g * g + (__int64)m.y_r * r + 16384)>>15); + int U = 32768+(((__int64)m.u_b * b + (__int64)m.u_g * g + (__int64)m.u_r * r + 16384)>>15); + int V = 32768+(((__int64)m.v_b * b + (__int64)m.v_g * g + (__int64)m.v_r * r + 16384)>>15); + *dstY16++ = (uint16_t)clamp(Y, 0, 65535);// PixelClip(Y); // All the safety we can wish for. + *dstU16++ = (uint16_t)clamp(U, 0, 65535); + *dstV16++ = (uint16_t)clamp(V, 0, 65535); + srcp += pixel_step; + } + srcp -= Sstep; + dstY16 += Ypitch16 - vi.width; + dstU16 += UVpitch16 - vi.width; + dstV16 += UVpitch16 - vi.width; } - srcp -= Sstep; - dstY += Ypitch - vi.width; - dstU += UVpitch - vi.width; - dstV += UVpitch - vi.width; + } + else { + // isPlanarRGBfamily + if(hasAlpha) { + // simple copy + BYTE* dstA = dst->GetWritePtr(PLANAR_A); + const int Apitch = dst->GetPitch(PLANAR_A); + env->BitBlt(dstA, Apitch, src->GetReadPtr(PLANAR_A), src->GetPitch(PLANAR_A), src->GetRowSize(PLANAR_A_ALIGNED), src->GetHeight(PLANAR_A)); + } + int pixelsize = vi.ComponentSize(); + + const BYTE *srcp[3] = { src->GetReadPtr(PLANAR_G), src->GetReadPtr(PLANAR_B), src->GetReadPtr(PLANAR_R) }; + const int srcPitch[3] = { src->GetPitch(PLANAR_G), src->GetPitch(PLANAR_B), src->GetPitch(PLANAR_R) }; + + BYTE *dstp[3] = { dstY, dstU, dstV }; + int dstPitch[3] = { Ypitch, UVpitch, UVpitch }; + + if(pixelsize==1) + convert_planarrgb_to_yuv_int_c(dstp, dstPitch, srcp, srcPitch, vi.width, vi.height, matrix); + else if (pixelsize==2) + convert_planarrgb_to_yuv_int_c(dstp, dstPitch, srcp, srcPitch, vi.width, vi.height, matrix); + else // float + convert_planarrgb_to_yuv_float_c(dstp, dstPitch, srcp, srcPitch, vi.width, vi.height, matrix); } return dst; } @@ -1915,14 +2043,17 @@ AVSValue ConvertToPlanarGeneric::Create(AVSValue& args, const char* filter, IScr PClip clip = args[0].AsClip(); VideoInfo vi = clip->GetVideoInfo(); + /* if (vi.IsPlanarRGB() || vi.IsPlanarRGBA()) { env->ThrowError("%s: Conversion from Planar RGB(A) is not implemented yet.", filter); //clip = new ConvertPlanarRGBTo444(clip, getMatrix(args[2].AsString(0), env), env); //vi = clip->GetVideoInfo(); } - else if (vi.IsRGB()) { // 8 bit only, todo + else*/ if (vi.IsRGB()) { // 8 bit only, todo + /* if (vi.ComponentSize() != 1) env->ThrowError("%s: Conversion from packed RGB > 8 bit is not implemented yet.", filter); + */ clip = new ConvertRGBToYV24(clip, getMatrix(args[2].AsString(0), env), env); vi = clip->GetVideoInfo(); } diff --git a/avs_core/convert/convert_planar.h b/avs_core/convert/convert_planar.h index c65824356..0f8f46f23 100644 --- a/avs_core/convert/convert_planar.h +++ b/avs_core/convert/convert_planar.h @@ -105,6 +105,8 @@ class ConvertRGBToYV24 : public GenericVideoFilter void BuildMatrix(double Kr, double Kb, int Sy, int Suv, int Oy, int shift); ConversionMatrix matrix; int pixel_step; + bool hasAlpha; + bool isPlanarRGBfamily; }; class ConvertYUY2ToYV16 : public GenericVideoFilter From 8579f2a309c3dde0c4d748880932101cf55b7c64 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Fri, 26 Aug 2016 19:59:46 +0200 Subject: [PATCH 012/120] Remove unused code --- avs_core/convert/convert.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/avs_core/convert/convert.h b/avs_core/convert/convert.h index 7dc76e1a1..d3f5c3ab6 100644 --- a/avs_core/convert/convert.h +++ b/avs_core/convert/convert.h @@ -81,8 +81,6 @@ class ConvertToRGB : public GenericVideoFilter } static AVSValue __cdecl Create(AVSValue args, void* user_data, IScriptEnvironment* env); -// static AVSValue __cdecl Create32(AVSValue args, void*, IScriptEnvironment* env); -// static AVSValue __cdecl Create24(AVSValue args, void*, IScriptEnvironment* env); private: int theMatrix; From 55057fae4b22b1cdbaa3036f165619f7add72a44 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Fri, 26 Aug 2016 20:12:51 +0200 Subject: [PATCH 013/120] Remove unused code --- avs_core/convert/convert_planar.cpp | 30 +---------------------------- 1 file changed, 1 insertion(+), 29 deletions(-) diff --git a/avs_core/convert/convert_planar.cpp b/avs_core/convert/convert_planar.cpp index 9704f6e09..6d8eefca7 100644 --- a/avs_core/convert/convert_planar.cpp +++ b/avs_core/convert/convert_planar.cpp @@ -1554,24 +1554,6 @@ PVideoFrame __stdcall ConvertYUV444ToRGB::GetFrame(int n, IScriptEnvironment* en return dst; } -#if 0 -AVSValue __cdecl ConvertYUV444ToRGB::Create32(AVSValue args, void*, IScriptEnvironment* env) { - PClip clip = args[0].AsClip(); - if (clip->GetVideoInfo().IsRGB()) - return clip; - return new ConvertYUV444ToRGB(clip, getMatrix(args[1].AsString(0), env), 4, env); -} -#endif - -#if 0 -AVSValue __cdecl ConvertYUV444ToRGB::Create24(AVSValue args, void*, IScriptEnvironment* env) { - PClip clip = args[0].AsClip(); - if (clip->GetVideoInfo().IsRGB()) - return clip; - return new ConvertYUV444ToRGB(clip, getMatrix(args[1].AsString(0), env), 3, env); -} -#endif - /************************************ * YUY2 to YV16 ************************************/ @@ -2043,17 +2025,7 @@ AVSValue ConvertToPlanarGeneric::Create(AVSValue& args, const char* filter, IScr PClip clip = args[0].AsClip(); VideoInfo vi = clip->GetVideoInfo(); - /* - if (vi.IsPlanarRGB() || vi.IsPlanarRGBA()) { - env->ThrowError("%s: Conversion from Planar RGB(A) is not implemented yet.", filter); - //clip = new ConvertPlanarRGBTo444(clip, getMatrix(args[2].AsString(0), env), env); - //vi = clip->GetVideoInfo(); - } - else*/ if (vi.IsRGB()) { // 8 bit only, todo - /* - if (vi.ComponentSize() != 1) - env->ThrowError("%s: Conversion from packed RGB > 8 bit is not implemented yet.", filter); - */ + if (vi.IsRGB()) { // packed or planar clip = new ConvertRGBToYV24(clip, getMatrix(args[2].AsString(0), env), env); vi = clip->GetVideoInfo(); } From 7167307f6c1b370b6ab7b09820fdf60fae7e1b22 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Fri, 26 Aug 2016 20:26:00 +0200 Subject: [PATCH 014/120] Typo left from earlier trials --- avs_core/filters/resample.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/avs_core/filters/resample.cpp b/avs_core/filters/resample.cpp index d95c521a0..2bc119027 100644 --- a/avs_core/filters/resample.cpp +++ b/avs_core/filters/resample.cpp @@ -417,7 +417,7 @@ static void resize_v_sseX_planar_16or32(BYTE* dst0, const BYTE* src0, int dst_pi src_h_single = loadps(reinterpret_cast(src2_ptr+4)); } __m128 coeff = _mm_load1_ps(reinterpret_cast(current_coeff_float+i)); // loads 1, fills all 4 floats - __m128 dst_l = _mm_mul_ps(src_l_single, coeff); // Multiply by coefficient (SSE4) + __m128 dst_l = _mm_mul_ps(src_l_single, coeff); // Multiply by coefficient __m128 dst_h = _mm_mul_ps(src_h_single, coeff); // 4*(32bit*32bit=32bit) result_l_single = _mm_add_ps(result_l_single, dst_l); // accumulate result. result_h_single = _mm_add_ps(result_h_single, dst_h); From 7998456313a094cbff945b62504b98f25f34ae86 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Fri, 26 Aug 2016 21:55:13 +0200 Subject: [PATCH 015/120] RGB48/64+PlanarRGB(A) for Crop(Bottom),AddBorders,LetterBox,FlipHorizontal,FlipVertical --- avs_core/filters/transform.cpp | 217 +++++++++++++++++++++++++-------- avs_core/filters/transform.h | 2 + 2 files changed, 166 insertions(+), 53 deletions(-) diff --git a/avs_core/filters/transform.cpp b/avs_core/filters/transform.cpp index f93c08e28..d1221b38e 100644 --- a/avs_core/filters/transform.cpp +++ b/avs_core/filters/transform.cpp @@ -72,16 +72,32 @@ PVideoFrame FlipVertical::GetFrame(int n, IScriptEnvironment* env) { int src_pitch = src->GetPitch(); int dst_pitch = dst->GetPitch(); env->BitBlt(dstp, dst_pitch, srcp + (vi.height-1) * src_pitch, -src_pitch, row_size, vi.height); - if (src->GetPitch(PLANAR_U)) { - srcp = src->GetReadPtr(PLANAR_U); - dstp = dst->GetWritePtr(PLANAR_U); - row_size = src->GetRowSize(PLANAR_U); - src_pitch = src->GetPitch(PLANAR_U); - dst_pitch = dst->GetPitch(PLANAR_U); - env->BitBlt(dstp, dst_pitch, srcp + (src->GetHeight(PLANAR_U)-1) * src_pitch, -src_pitch, row_size, src->GetHeight(PLANAR_U)); - srcp = src->GetReadPtr(PLANAR_V); - dstp = dst->GetWritePtr(PLANAR_V); - env->BitBlt(dstp, dst_pitch, srcp + (src->GetHeight(PLANAR_U)-1) * src_pitch, -src_pitch, row_size, src->GetHeight(PLANAR_U)); + + bool isRGBPfamily = vi.IsPlanarRGB() || vi.IsPlanarRGBA(); + int planeUB = isRGBPfamily ? PLANAR_B : PLANAR_U; + int planeVR = isRGBPfamily ? PLANAR_R : PLANAR_V; + + if (src->GetPitch(planeUB)) { + srcp = src->GetReadPtr(planeUB); + dstp = dst->GetWritePtr(planeUB); + row_size = src->GetRowSize(planeUB); + src_pitch = src->GetPitch(planeUB); + dst_pitch = dst->GetPitch(planeUB); + env->BitBlt(dstp, dst_pitch, srcp + (src->GetHeight(planeUB)-1) * src_pitch, -src_pitch, row_size, src->GetHeight(planeUB)); + + srcp = src->GetReadPtr(planeVR); + dstp = dst->GetWritePtr(planeVR); + env->BitBlt(dstp, dst_pitch, srcp + (src->GetHeight(planeVR)-1) * src_pitch, -src_pitch, row_size, src->GetHeight(planeVR)); + + if (vi.IsYUVA() || vi.IsPlanarRGBA()) + { + srcp = src->GetReadPtr(PLANAR_A); + dstp = dst->GetWritePtr(PLANAR_A); + row_size = src->GetRowSize(PLANAR_A); + src_pitch = src->GetPitch(PLANAR_A); + dst_pitch = dst->GetPitch(PLANAR_A); + env->BitBlt(dstp, dst_pitch, srcp + (src->GetHeight(PLANAR_A)-1) * src_pitch, -src_pitch, row_size, src->GetHeight(PLANAR_A)); + } } return dst; } @@ -146,25 +162,42 @@ PVideoFrame FlipHorizontal::GetFrame(int n, IScriptEnvironment* env) { flip_h_func = flip_horizontal_plane_c; break; } flip_h_func(dstp, srcp, dst_pitch, src_pitch, width, height); - if (src->GetPitch(PLANAR_U)) { - srcp = src->GetReadPtr(PLANAR_U); - dstp = dst->GetWritePtr(PLANAR_U); - width = src->GetRowSize(PLANAR_U); - src_pitch = src->GetPitch(PLANAR_U); - dst_pitch = dst->GetPitch(PLANAR_U); - height = src->GetHeight(PLANAR_U); + + bool isRGBPfamily = vi.IsPlanarRGB() || vi.IsPlanarRGBA(); + int planeUB = isRGBPfamily ? PLANAR_B : PLANAR_U; + int planeVR = isRGBPfamily ? PLANAR_R : PLANAR_V; + + if (src->GetPitch(planeUB)) { + srcp = src->GetReadPtr(planeUB); + dstp = dst->GetWritePtr(planeUB); + width = src->GetRowSize(planeUB); + src_pitch = src->GetPitch(planeUB); + dst_pitch = dst->GetPitch(planeUB); + height = src->GetHeight(planeUB); flip_h_func(dstp, srcp, dst_pitch, src_pitch, width, height); - srcp = src->GetReadPtr(PLANAR_V); - dstp = dst->GetWritePtr(PLANAR_V); + srcp = src->GetReadPtr(planeVR); + dstp = dst->GetWritePtr(planeVR); flip_h_func(dstp, srcp, dst_pitch, src_pitch, width, height); + + if (vi.IsYUVA() || vi.IsPlanarRGBA()) + { + srcp = src->GetReadPtr(PLANAR_A); + dstp = dst->GetWritePtr(PLANAR_A); + width = src->GetRowSize(PLANAR_A); + src_pitch = src->GetPitch(PLANAR_A); + dst_pitch = dst->GetPitch(PLANAR_A); + height = src->GetHeight(PLANAR_A); + flip_h_func(dstp, srcp, dst_pitch, src_pitch, width, height); + } } return dst; } + // width is GetRowSize srcp += width-bpp; - if (vi.IsRGB32()) { + if (vi.IsRGB32() || vi.IsRGB64()) { // fast method for (int y = 0; y(dstp))[x] = (reinterpret_cast(srcp))[-x]; @@ -175,7 +208,7 @@ PVideoFrame FlipHorizontal::GetFrame(int n, IScriptEnvironment* env) { return dst; } - //RGB24 + //RGB24/48 for (int y = 0; y vi.width || _top + _height > vi.height) env->ThrowError("Crop: you cannot use crop to enlarge or 'shift' a clip"); - if (vi.IsYUV()) { + isRGBPfamily = vi.IsPlanarRGB() || vi.IsPlanarRGBA(); + hasAlpha = vi.IsPlanarRGBA() || vi.IsYUVA(); + + if (vi.IsYUV() || vi.IsYUVA()) { if (vi.NumComponents() > 1) { xsub=vi.GetPlaneWidthSubsampling(PLANAR_U); ysub=vi.GetPlaneHeightSubsampling(PLANAR_U); @@ -244,7 +280,7 @@ Crop::Crop(int _left, int _top, int _width, int _height, bool _align, PClip _chi env->ThrowError("Crop: YUV image can only be cropped by Mod %d (top).", ymask+1); if (_height & ymask) env->ThrowError("Crop: YUV image can only be cropped by Mod %d (bottom).", ymask+1); - } else { + } else if (!isRGBPfamily) { // RGB is upside-down _top = vi.height - _height - _top; } @@ -261,39 +297,48 @@ PVideoFrame Crop::GetFrame(int n, IScriptEnvironment* env) { PVideoFrame frame = child->GetFrame(n, env); - const BYTE* srcpY = frame->GetReadPtr(PLANAR_Y) + top * frame->GetPitch(PLANAR_Y) + left_bytes; - const BYTE* srcpU = frame->GetReadPtr(PLANAR_U) + (top>>ysub) * frame->GetPitch(PLANAR_U) + (left_bytes>>xsub); - const BYTE* srcpV = frame->GetReadPtr(PLANAR_V) + (top>>ysub) * frame->GetPitch(PLANAR_V) + (left_bytes>>xsub); + int planeYG = isRGBPfamily ? PLANAR_G : PLANAR_Y; + int planeUB = isRGBPfamily ? PLANAR_B : PLANAR_U; + int planeVR = isRGBPfamily ? PLANAR_R : PLANAR_V; + + const BYTE* srcpYG = frame->GetReadPtr(planeYG) + top * frame->GetPitch(planeYG) + left_bytes; + const BYTE* srcpUB = frame->GetReadPtr(planeUB) + (top>>ysub) * frame->GetPitch(planeUB) + (left_bytes>>xsub); + const BYTE* srcpVR = frame->GetReadPtr(planeVR) + (top>>ysub) * frame->GetPitch(planeVR) + (left_bytes>>xsub); size_t _align; - if (frame->GetPitch(PLANAR_U) && (!vi.IsYV12() || env->PlanarChromaAlignment(IScriptEnvironment::PlanarChromaAlignmentTest))) - _align = this->align & ((size_t)srcpY|(size_t)srcpU|(size_t)srcpV); + if (frame->GetPitch(planeUB) && (!vi.IsYV12() || env->PlanarChromaAlignment(IScriptEnvironment::PlanarChromaAlignmentTest))) + _align = this->align & ((size_t)srcpYG|(size_t)srcpUB|(size_t)srcpVR); else - _align = this->align & (size_t)srcpY; + _align = this->align & (size_t)srcpYG; - if (0 != _align) { + if (0 != _align || hasAlpha) { // no env->SubframePlanar with extra planar parameter PVideoFrame dst = env->NewVideoFrame(vi, (int)align+1); - env->BitBlt(dst->GetWritePtr(PLANAR_Y), dst->GetPitch(PLANAR_Y), srcpY, - frame->GetPitch(PLANAR_Y), dst->GetRowSize(PLANAR_Y), dst->GetHeight(PLANAR_Y)); + env->BitBlt(dst->GetWritePtr(planeYG), dst->GetPitch(planeYG), srcpYG, + frame->GetPitch(planeYG), dst->GetRowSize(planeYG), dst->GetHeight(planeYG)); + + env->BitBlt(dst->GetWritePtr(planeUB), dst->GetPitch(planeUB), srcpUB, + frame->GetPitch(planeUB), dst->GetRowSize(planeUB), dst->GetHeight(planeUB)); - env->BitBlt(dst->GetWritePtr(PLANAR_U), dst->GetPitch(PLANAR_U), srcpU, - frame->GetPitch(PLANAR_U), dst->GetRowSize(PLANAR_U), dst->GetHeight(PLANAR_U)); + env->BitBlt(dst->GetWritePtr(planeVR), dst->GetPitch(planeVR), srcpVR, + frame->GetPitch(planeVR), dst->GetRowSize(planeVR), dst->GetHeight(planeVR)); - env->BitBlt(dst->GetWritePtr(PLANAR_V), dst->GetPitch(PLANAR_V), srcpV, - frame->GetPitch(PLANAR_V), dst->GetRowSize(PLANAR_V), dst->GetHeight(PLANAR_V)); + if(hasAlpha) + env->BitBlt(dst->GetWritePtr(PLANAR_A), dst->GetPitch(PLANAR_A), frame->GetReadPtr(PLANAR_A) + top * frame->GetPitch(PLANAR_A) + left_bytes, + frame->GetPitch(PLANAR_A), dst->GetRowSize(PLANAR_A), dst->GetHeight(PLANAR_A)); return dst; } - if (!frame->GetPitch(PLANAR_U)) + if (!frame->GetPitch(planeUB)) return env->Subframe(frame, top * frame->GetPitch() + left_bytes, frame->GetPitch(), vi.RowSize(), vi.height); - else - return env->SubframePlanar(frame, top * frame->GetPitch() + left_bytes, frame->GetPitch(), vi.RowSize(), vi.height, - (top>>ysub) * frame->GetPitch(PLANAR_U) + (left_bytes>>xsub), - (top>>ysub) * frame->GetPitch(PLANAR_V) + (left_bytes>>xsub), - frame->GetPitch(PLANAR_U)); + else { + return env->SubframePlanar(frame, top * frame->GetPitch() + left_bytes, frame->GetPitch(), vi.RowSize(), vi.height, + (top>>ysub) * frame->GetPitch(planeUB) + (left_bytes>>xsub), + (top>>ysub) * frame->GetPitch(planeVR) + (left_bytes>>xsub), + frame->GetPitch(planeUB)); + } } @@ -333,7 +378,7 @@ AddBorders::AddBorders(int _left, int _top, int _right, int _bot, int _clr, PCli env->ThrowError("AddBorders: YUV image can only add by Mod %d (top).", ymask+1); if (_bot & ymask) env->ThrowError("AddBorders: YUV image can only add by Mod %d (bottom).", ymask+1); - } else { + } else if (!vi.IsPlanarRGB() && !vi.IsPlanarRGBA()){ // RGB is upside-down int t = top; top = bot; bot = t; } @@ -350,15 +395,20 @@ static inline pixel_t GetHbdColorFromByte(uint8_t color) } template -static void addborders_planar(PVideoFrame &dst, PVideoFrame &src, VideoInfo &vi, int top, int bot, int left, int right, int rgbcolor) +static void addborders_planar(PVideoFrame &dst, PVideoFrame &src, VideoInfo &vi, int top, int bot, int left, int right, int rgbcolor, bool isYUV) { - const unsigned int colr = RGB2YUV(rgbcolor); + const unsigned int colr = isYUV ? RGB2YUV(rgbcolor) : rgbcolor; const unsigned char YBlack=(unsigned char)((colr >> 16) & 0xff); const unsigned char UBlack=(unsigned char)((colr >> 8) & 0xff); const unsigned char VBlack=(unsigned char)((colr ) & 0xff); - - int planes[3] = { PLANAR_Y, PLANAR_U, PLANAR_V }; - uint8_t colors[3] = { YBlack, UBlack, VBlack }; + const unsigned char ABlack=(unsigned char)((colr >> 24) & 0xff); + + int planesYUV[4] = { PLANAR_Y, PLANAR_U, PLANAR_V, PLANAR_A }; + int planesRGB[4] = { PLANAR_G, PLANAR_B, PLANAR_R, PLANAR_A }; + int *planes = isYUV ? planesYUV : planesRGB; + uint8_t colorsYUV[4] = { YBlack, UBlack, VBlack, ABlack }; + uint8_t colorsRGB[4] = { UBlack, VBlack, YBlack, ABlack }; // mapping for planar RGB + uint8_t *colors = isYUV ? colorsYUV : colorsRGB; for (int p = 0; p < vi.NumComponents(); p++) { int plane = planes[p]; @@ -405,11 +455,12 @@ PVideoFrame AddBorders::GetFrame(int n, IScriptEnvironment* env) PVideoFrame dst = env->NewVideoFrame(vi); if (vi.IsPlanar()) { + bool isYUV = vi.IsYUV() || vi.IsYUVA(); switch(vi.ComponentSize()) { - case 1: addborders_planar(dst, src, vi, top, bot, left, right, clr); break; - case 2: addborders_planar(dst, src, vi, top, bot, left, right, clr); break; + case 1: addborders_planar(dst, src, vi, top, bot, left, right, clr, isYUV); break; + case 2: addborders_planar(dst, src, vi, top, bot, left, right, clr, isYUV); break; default: //case 4: - addborders_planar(dst, src, vi, top, bot, left, right, clr); break; + addborders_planar(dst, src, vi, top, bot, left, right, clr, isYUV); break; } return dst; } @@ -497,7 +548,67 @@ PVideoFrame AddBorders::GetFrame(int n, IScriptEnvironment* env) for (int i = 0; i(clr & 0xFF); + uint32_t clr1 = + ((uint32_t)GetHbdColorFromByte((clr >> 16) & 0xFF) << (8 * 2)) + + ((uint32_t)GetHbdColorFromByte((clr >> 8) & 0xFF)); + const int leftbytes = vi.BytesFromPixels(left); + const int leftrow = src_row_size + leftbytes; + const int rightbytes = vi.BytesFromPixels(right); + const int rightrow = dst_pitch - dst_row_size + rightbytes; + + BitBlt(dstp+initial_black, dst_pitch, srcp, src_pitch, src_row_size, src_height); + /* Cannot use *_black optimisation as pitch may not be mod 3 */ + for (int y = top; y>0; --y) { + for (int i = 0; i0; --y) { + for (int i = 0; i0; --y) { + for (int i = 0; i((clr >> 24) & 0xFF) << (24 * 2)) + + ((uint64_t)GetHbdColorFromByte((clr >> 16) & 0xFF) << (16 * 2)) + + ((uint64_t)GetHbdColorFromByte((clr >> 8) & 0xFF) << (8 * 2)) + + ((uint64_t)GetHbdColorFromByte((clr) & 0xFF)); + + for (int i = 0; i0; --y) { + for (int i = 0; i=vi.width) // Must be >= otherwise it is interpreted wrong by crop() env->ThrowError("LetterBox: You cannot specify letterboxing that is bigger than the picture (width)."); - if (vi.IsYUV()) { + if (vi.IsYUV() || vi.IsYUVA()) { int xsub = 0; int ysub = 0; diff --git a/avs_core/filters/transform.h b/avs_core/filters/transform.h index 77aafc3d4..a054eb291 100644 --- a/avs_core/filters/transform.h +++ b/avs_core/filters/transform.h @@ -103,6 +103,8 @@ class Crop : public GenericVideoFilter private: /*const*/ int left_bytes, top, align; int xsub, ysub; + bool isRGBPfamily; + bool hasAlpha; }; From 6780ff4fa14bc6296b32e4d2c20a43db55846f38 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Mon, 29 Aug 2016 08:34:06 +0200 Subject: [PATCH 016/120] avisynth C: missing alpha fields in VideoFrame struct + planar R,G,B addons --- avs_core/core/avisynth_c.cpp | 26 +++++++++++++++----------- avs_core/include/avisynth_c.h | 8 ++++++-- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/avs_core/core/avisynth_c.cpp b/avs_core/core/avisynth_c.cpp index f3159be9a..548851ec0 100644 --- a/avs_core/core/avisynth_c.cpp +++ b/avs_core/core/avisynth_c.cpp @@ -204,8 +204,9 @@ extern "C" int AVSC_CC avs_get_pitch_p(const AVS_VideoFrame * p, int plane) { switch (plane) { - case AVS_PLANAR_U: case AVS_PLANAR_V: return p->pitchUV;} - return p->pitch; + case AVS_PLANAR_U: case AVS_PLANAR_V: return p->pitchUV; + case AVS_PLANAR_A: return p->pitchA;} + return p->pitch; // Y, G, B, R } extern "C" @@ -225,8 +226,9 @@ int AVSC_CC avs_get_row_size_p(const AVS_VideoFrame * p, int plane) else return 0; - case AVS_PLANAR_Y_ALIGNED: - r = (p->row_size+FRAME_ALIGN-1)&(~(FRAME_ALIGN-1)); // Aligned rowsize + case AVS_PLANAR_ALIGNED: case AVS_PLANAR_Y_ALIGNED: + case AVS_PLANAR_R_ALIGNED: case AVS_PLANAR_G_ALIGNED: case AVS_PLANAR_B_ALIGNED: case AVS_PLANAR_A_ALIGNED: + r = (p->row_size+FRAME_ALIGN-1)&(~(FRAME_ALIGN-1)); // Aligned rowsize return (r <= p->pitch) ? r : p->row_size; } return p->row_size; @@ -239,16 +241,17 @@ int AVSC_CC avs_get_height_p(const AVS_VideoFrame * p, int plane) case AVS_PLANAR_U: case AVS_PLANAR_V: return (p->pitchUV) ? p->heightUV : 0; } - return p->height; + return p->height; // Y, G, B, R, A } extern "C" const BYTE * AVSC_CC avs_get_read_ptr_p(const AVS_VideoFrame * p, int plane) { switch (plane) { - case AVS_PLANAR_U: return p->vfb->data + p->offsetU; - case AVS_PLANAR_V: return p->vfb->data + p->offsetV; - default: return p->vfb->data + p->offset;} + case AVS_PLANAR_U: case AVS_PLANAR_B: return p->vfb->data + p->offsetU; // G is first. Then B,R order like U,V + case AVS_PLANAR_V: case PLANAR_R: return p->vfb->data + p->offsetV; + case PLANAR_A: return p->vfb->data + p->offsetA; + default: return p->vfb->data + p->offset;} // PLANAR Y, PLANAR_G } extern "C" @@ -265,12 +268,13 @@ extern "C" BYTE * AVSC_CC avs_get_write_ptr_p(const AVS_VideoFrame * p, int plane) { switch (plane) { - case AVS_PLANAR_U: return p->vfb->data + p->offsetU; - case AVS_PLANAR_V: return p->vfb->data + p->offsetV; + case AVS_PLANAR_U: case AVS_PLANAR_B: return p->vfb->data + p->offsetU; + case AVS_PLANAR_V: case AVS_PLANAR_R: return p->vfb->data + p->offsetV; + case AVS_PLANAR_A: return p->vfb->data + p->offsetA; default: break; } if (avs_is_writable(p)) { - return p->vfb->data + p->offset; + return p->vfb->data + p->offset; // Y,G } return 0; } diff --git a/avs_core/include/avisynth_c.h b/avs_core/include/avisynth_c.h index a3db56701..fca600350 100644 --- a/avs_core/include/avisynth_c.h +++ b/avs_core/include/avisynth_c.h @@ -531,7 +531,11 @@ typedef struct AVS_VideoFrame { volatile long refcount; AVS_VideoFrameBuffer * vfb; int offset, pitch, row_size, height, offsetU, offsetV, pitchUV; // U&V offsets are from top of picture. - int row_sizeUV, heightUV; + int row_sizeUV, heightUV; // for Planar RGB offsetU, offsetV is for the 2nd and 3rd Plane. + // for Planar RGB pitchUV and row_sizeUV = 0, because when no VideoInfo (MakeWriteable) + // the decision on existance of UV is checked by zero pitch + // AVS+ extension, avisynth.h: class does not break plugins if appended here + int offsetA, pitchA, row_sizeA; // 4th alpha plane support, pitch and row_size is 0 is none } AVS_VideoFrame; // Access functions for AVS_VideoFrame @@ -745,7 +749,7 @@ enum { AVS_CPUF_SSSE3 = 0x200, // Core 2 AVS_CPUF_SSE4 = 0x400, // Penryn, Wolfdale, Yorkfield AVS_CPUF_SSE4_1 = 0x400, -//AVS_CPUF_AVX = 0x800, // Sandy Bridge, Bulldozer + AVS_CPUF_AVX = 0x800, // Sandy Bridge, Bulldozer AVS_CPUF_SSE4_2 = 0x1000, // Nehalem //AVS_CPUF_AVX2 = 0x2000, // Haswell //AVS_CPUF_AVX512 = 0x4000, // Knights Landing From b514645ba997a735b02a4da1737237fa252ec6bc Mon Sep 17 00:00:00 2001 From: Pinterf Date: Mon, 29 Aug 2016 11:30:17 +0200 Subject: [PATCH 017/120] fix :Convert PlanarRGB->444 at 8 bit --- avs_core/convert/convert_planar.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/avs_core/convert/convert_planar.cpp b/avs_core/convert/convert_planar.cpp index 6d8eefca7..468b0fb9f 100644 --- a/avs_core/convert/convert_planar.cpp +++ b/avs_core/convert/convert_planar.cpp @@ -742,9 +742,9 @@ static void convert_planarrgb_to_yuv_int_c(BYTE *(&dstp)[3], int (&dstPitch)[3], pixel_t g = reinterpret_cast(srcp[0])[x]; pixel_t b = reinterpret_cast(srcp[1])[x]; pixel_t r = reinterpret_cast(srcp[2])[x]; - int Y = (m.offset_y << 8) + (int)(((sum_t)m.y_b * b + (sum_t)m.y_g * g + (sum_t)m.y_r * r + 16384)>>15); - int U = half + (int)(((sum_t)m.u_b * b + (sum_t)m.u_g * g + (sum_t)m.u_r * r + 16384)>>15); - int V = half + (int)(((sum_t)m.v_b * b + (sum_t)m.v_g * g + (sum_t)m.v_r * r + 16384)>>15); + int Y = (sizeof(pixel_t)==1 ? m.offset_y : m.offset_y << 8) + (int)(((sum_t)m.y_b * b + (sum_t)m.y_g * g + (sum_t)m.y_r * r + 16384)>>15); + int U = half + (int)(((sum_t)m.u_b * b + (sum_t)m.u_g * g + (sum_t)m.u_r * r + 16384) >> 15); + int V = half + (int)(((sum_t)m.v_b * b + (sum_t)m.v_g * g + (sum_t)m.v_r * r + 16384) >> 15); reinterpret_cast(dstp[0])[x] = (pixel_t)clamp(Y, 0, limit); reinterpret_cast(dstp[1])[x] = (pixel_t)clamp(U, 0, limit); reinterpret_cast(dstp[2])[x] = (pixel_t)clamp(V, 0, limit); @@ -906,7 +906,7 @@ PVideoFrame __stdcall ConvertRGBToYV24::GetFrame(int n, IScriptEnvironment* env) AVSValue __cdecl ConvertRGBToYV24::Create(AVSValue args, void*, IScriptEnvironment* env) { PClip clip = args[0].AsClip(); - if (clip->GetVideoInfo().IsYV24()) + if (clip->GetVideoInfo().Is444()) return clip; return new ConvertRGBToYV24(clip, getMatrix(args[1].AsString(0), env), env); } From 815d59df0a1cc1ba034725210f84f23580f0a532 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Mon, 29 Aug 2016 11:31:48 +0200 Subject: [PATCH 018/120] Horizontal/VerticalReduceBy2: PlanarRGB+alpha. NoRGB48/64yet --- avs_core/filters/resize.cpp | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/avs_core/filters/resize.cpp b/avs_core/filters/resize.cpp index b650fcfdc..5dd135b60 100644 --- a/avs_core/filters/resize.cpp +++ b/avs_core/filters/resize.cpp @@ -263,7 +263,7 @@ void vertical_reduce_core(BYTE* dstp, const BYTE* srcp, int dst_pitch, int src_p VerticalReduceBy2::VerticalReduceBy2(PClip _child, IScriptEnvironment* env) : GenericVideoFilter(_child) { - if (vi.IsPlanar() && (vi.NumComponents() > 1)) { + if (vi.IsPlanar() && (vi.IsYUV() || vi.IsYUVA()) && (vi.NumComponents() > 1)) { const int mod = 2 << vi.GetPlaneHeightSubsampling(PLANAR_U); const int mask = mod - 1; if (vi.height & mask) @@ -285,23 +285,24 @@ VerticalReduceBy2::VerticalReduceBy2(PClip _child, IScriptEnvironment* env) PVideoFrame VerticalReduceBy2::GetFrame(int n, IScriptEnvironment* env) { PVideoFrame src = child->GetFrame(n, env); PVideoFrame dst = env->NewVideoFrame(vi); - int src_pitch = src->GetPitch(); - int dst_pitch = dst->GetPitch(); - int row_size = src->GetRowSize(); - BYTE* dstp = dst->GetWritePtr(); - const BYTE* srcp = src->GetReadPtr(); int pixelsize = vi.ComponentSize(); if (vi.IsPlanar()) { - vertical_reduce_core(dstp, srcp, dst_pitch, src_pitch, row_size, dst->GetHeight(PLANAR_Y), pixelsize, env); - if (vi.NumComponents() > 1) { - vertical_reduce_core(dst->GetWritePtr(PLANAR_U), src->GetReadPtr(PLANAR_U), dst->GetPitch(PLANAR_U), - src->GetPitch(PLANAR_U), dst->GetRowSize(PLANAR_U), dst->GetHeight(PLANAR_U), pixelsize, env); - vertical_reduce_core(dst->GetWritePtr(PLANAR_V), src->GetReadPtr(PLANAR_V), dst->GetPitch(PLANAR_V), - src->GetPitch(PLANAR_V), dst->GetRowSize(PLANAR_V), dst->GetHeight(PLANAR_V), pixelsize, env); + int planesYUV[4] = { PLANAR_Y, PLANAR_U, PLANAR_V, PLANAR_A }; + int planesRGB[4] = { PLANAR_G, PLANAR_B, PLANAR_R, PLANAR_A }; + int *planes = vi.IsYUV() || vi.IsYUVA() ? planesYUV : planesRGB; + for (int p = 0; p < vi.NumComponents(); p++) + { + int plane = planes[p]; + vertical_reduce_core(dst->GetWritePtr(plane), src->GetReadPtr(plane), dst->GetPitch(plane), src->GetPitch(plane), dst->GetRowSize(plane), dst->GetHeight(plane), pixelsize, env); } } else { + int src_pitch = src->GetPitch(); + int dst_pitch = dst->GetPitch(); + int row_size = src->GetRowSize(); + BYTE* dstp = dst->GetWritePtr(); + const BYTE* srcp = src->GetReadPtr(); vertical_reduce_core(dstp, srcp, dst_pitch, src_pitch, row_size, vi.height, pixelsize, env); } return dst; @@ -367,8 +368,9 @@ PVideoFrame HorizontalReduceBy2::GetFrame(int n, IScriptEnvironment* env) if (vi.IsPlanar()) { int pixelsize = vi.ComponentSize(); - int planes[3] = { PLANAR_Y, PLANAR_U, PLANAR_V }; - + int planesYUV[4] = { PLANAR_Y, PLANAR_U, PLANAR_V, PLANAR_A }; + int planesRGB[4] = { PLANAR_G, PLANAR_B, PLANAR_R, PLANAR_A }; + int *planes = vi.IsYUV() || vi.IsYUVA() ? planesYUV : planesRGB; for (int p = 0; p < vi.NumComponents(); p++) { int plane = planes[p]; @@ -453,6 +455,7 @@ PVideoFrame HorizontalReduceBy2::GetFrame(int n, IScriptEnvironment* env) } } } + // todo RGB48/RGB64 return dst; } From b84055d0cc45daca02c8404f6ad393f91fec3354 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Mon, 29 Aug 2016 11:32:49 +0200 Subject: [PATCH 019/120] StackHorizontal/Vertical: PlanarRGB, alpha --- avs_core/filters/combine.cpp | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/avs_core/filters/combine.cpp b/avs_core/filters/combine.cpp index ffaf45d0e..a998043b6 100644 --- a/avs_core/filters/combine.cpp +++ b/avs_core/filters/combine.cpp @@ -116,17 +116,22 @@ PVideoFrame __stdcall StackVertical::GetFrame(int n, IScriptEnvironment* env) if (vi.IsPlanar() && (vi.NumComponents() > 1)) { // Copy Planar - const int dst_pitchUV = dst->GetPitch(PLANAR_U); - const int row_sizeUV = dst->GetRowSize(PLANAR_U); + const int planesYUV[4] = { PLANAR_Y, PLANAR_U, PLANAR_V, PLANAR_A}; + const int planesRGB[4] = { PLANAR_G, PLANAR_B, PLANAR_R, PLANAR_A}; + const int *planes = vi.IsYUV() || vi.IsYUVA() ? planesYUV : planesRGB; - for (const int& plane: { PLANAR_U, PLANAR_V }) - { + // first plane is already processed + for (int p = 1; p < vi.NumComponents(); p++) { + const int plane = planes[p]; dstp = dst->GetWritePtr(plane); + const int dst_pitch = dst->GetPitch(plane); + const int row_size = dst->GetRowSize(plane); + for (const auto& src: frames) { const int src_height = src->GetHeight(plane); - env->BitBlt(dstp, dst_pitchUV, src->GetReadPtr(plane), src->GetPitch(plane), row_sizeUV, src_height); - dstp += dst_pitchUV * src_height; + env->BitBlt(dstp, dst_pitch, src->GetReadPtr(plane), src->GetPitch(plane), row_size, src_height); + dstp += dst_pitch * src_height; } } } @@ -208,15 +213,22 @@ PVideoFrame __stdcall StackHorizontal::GetFrame(int n, IScriptEnvironment* env) if (vi.IsPlanar() && (vi.NumComponents() > 1)) { // Copy Planar - const int dst_pitchUV = dst->GetPitch(PLANAR_U); - const int heightUV = dst->GetHeight(PLANAR_U); - for (const int plane: { PLANAR_U, PLANAR_V }) { + const int planesYUV[4] = { PLANAR_Y, PLANAR_U, PLANAR_V, PLANAR_A}; + const int planesRGB[4] = { PLANAR_G, PLANAR_B, PLANAR_R, PLANAR_A}; + const int *planes = vi.IsYUV() || vi.IsYUVA() ? planesYUV : planesRGB; + + // first plane is already processed + for (int p = 1; p < vi.NumComponents(); p++) { + const int plane = planes[p]; dstp = dst->GetWritePtr(plane); + const int dst_pitch = dst->GetPitch(plane); + const int height = dst->GetHeight(plane); + for (const auto& src: frames) { const int src_rowsize = src->GetRowSize(plane); - env->BitBlt(dstp, dst_pitchUV, src->GetReadPtr(plane), src->GetPitch(plane), src_rowsize, heightUV); + env->BitBlt(dstp, dst_pitch, src->GetReadPtr(plane), src->GetPitch(plane), src_rowsize, height); dstp += src_rowsize; } } @@ -285,7 +297,7 @@ PVideoFrame __stdcall ShowFiveVersions::GetFrame(int n, IScriptEnvironment* env) const int dst_pitchUV = dst->GetPitch(PLANAR_U); const int height = dst->GetHeight()/2; const int heightUV = dst->GetHeight(PLANAR_U)/2; - + // todo: >8 bits, planar RGB if (vi.IsYUV()) { const int wg = dst->GetRowSize()/6; for (int i=0; i Date: Mon, 29 Aug 2016 13:27:17 +0200 Subject: [PATCH 020/120] Merge(Chroma,Luma):Planar RGB, float type fix --- avs_core/filters/merge.cpp | 57 +++++++++++++++++++++++--------------- 1 file changed, 34 insertions(+), 23 deletions(-) diff --git a/avs_core/filters/merge.cpp b/avs_core/filters/merge.cpp index 62f6c2ea5..2977903e6 100644 --- a/avs_core/filters/merge.cpp +++ b/avs_core/filters/merge.cpp @@ -427,13 +427,11 @@ static void average_plane_c(BYTE *p1, const BYTE *p2, int p1_pitch, int p2_pitch // for float static void average_plane_c_float(BYTE *p1, const BYTE *p2, int p1_pitch, int p2_pitch, int rowsize, int height) { - float *fp1 = reinterpret_cast(p1); - const float *fp2 = reinterpret_cast(p2); size_t rs = rowsize / sizeof(float); for (int y = 0; y < height; ++y) { for (size_t x = 0; x < rs; ++x) { - fp1[x] = (fp1[x] + fp2[x]) / 2.0f; + reinterpret_cast(p1)[x] = (reinterpret_cast(p1)[x] + reinterpret_cast(p2)[x]) / 2.0f; } p1 += p1_pitch; p2 += p2_pitch; @@ -612,13 +610,11 @@ void weighted_merge_planar_c(BYTE *p1, const BYTE *p2, int p1_pitch, int p2_pitc void weighted_merge_planar_c_float(BYTE *p1, const BYTE *p2, int p1_pitch, int p2_pitch, int rowsize, int height, float weight, float invweight) { - float *fp1 = reinterpret_cast(p1); - const float *fp2 = reinterpret_cast(p2); size_t rs = rowsize / sizeof(float); for (int y = 0; y < height; ++y) { for (size_t x = 0; x < rs; ++x) { - fp1[x] = (fp1[x] * invweight + fp2[x] * weight) / 2.0f; + reinterpret_cast(p1)[x] = (reinterpret_cast(p1)[x] * invweight + reinterpret_cast(p2)[x] * weight) / 2.0f; } p1 += p1_pitch; p2 += p2_pitch; @@ -729,8 +725,8 @@ MergeChroma::MergeChroma(PClip _child, PClip _clip, float _weight, IScriptEnviro { const VideoInfo& vi2 = clip->GetVideoInfo(); - if (!vi.IsYUV() || !vi2.IsYUV()) - env->ThrowError("MergeChroma: YUV data only (no RGB); use ConvertToYUY2 or ConvertToYV12"); + if (!(vi.IsYUV() || vi.IsYUVA()) || !(vi2.IsYUV() || vi2.IsYUVA())) + env->ThrowError("MergeChroma: YUV data only (no RGB); use ConvertToYUY2, ConvertToYV12/16/24 or ConvertToYUVxxx"); if (!(vi.IsSameColorspace(vi2))) env->ThrowError("MergeChroma: YUV images must have same data type."); @@ -778,7 +774,7 @@ PVideoFrame __stdcall MergeChroma::GetFrame(int n, IScriptEnvironment* env) { weighted_merge_chroma_yuy2_c(srcp,chromap,src_pitch,chroma_pitch,w,h,(int)(weight*32768.0f),32768-(int)(weight*32768.0f)); } - } else { // Planar + } else { // Planar YUV env->MakeWritable(&src); src->GetWritePtr(PLANAR_Y); //Must be requested @@ -794,6 +790,10 @@ PVideoFrame __stdcall MergeChroma::GetFrame(int n, IScriptEnvironment* env) merge_plane(srcpU, chromapU, src_pitch_uv, chroma_pitch_uv, src_width_u, src_height_uv, weight, pixelsize, env); merge_plane(srcpV, chromapV, src_pitch_uv, chroma_pitch_uv, src_width_v, src_height_uv, weight, pixelsize, env); + + if(vi.IsYUVA()) + merge_plane(src->GetWritePtr(PLANAR_A), chroma->GetReadPtr(PLANAR_A), src->GetPitch(PLANAR_A), chroma->GetPitch(PLANAR_A), + src->GetRowSize(PLANAR_A_ALIGNED), src->GetHeight(PLANAR_A), weight, pixelsize, env); } } else { // weight == 1.0 if (vi.IsYUY2()) { @@ -826,6 +826,8 @@ PVideoFrame __stdcall MergeChroma::GetFrame(int n, IScriptEnvironment* env) src->GetWritePtr(PLANAR_Y); //Must be requested env->BitBlt(src->GetWritePtr(PLANAR_U),src->GetPitch(PLANAR_U),chroma->GetReadPtr(PLANAR_U),chroma->GetPitch(PLANAR_U),chroma->GetRowSize(PLANAR_U),chroma->GetHeight(PLANAR_U)); env->BitBlt(src->GetWritePtr(PLANAR_V),src->GetPitch(PLANAR_V),chroma->GetReadPtr(PLANAR_V),chroma->GetPitch(PLANAR_V),chroma->GetRowSize(PLANAR_V),chroma->GetHeight(PLANAR_V)); + if(vi.IsYUVA()) + env->BitBlt(src->GetWritePtr(PLANAR_A),src->GetPitch(PLANAR_A),chroma->GetReadPtr(PLANAR_A),chroma->GetPitch(PLANAR_A),chroma->GetRowSize(PLANAR_A),chroma->GetHeight(PLANAR_A)); } else { // avoid the cost of 2 chroma blits PVideoFrame dst = env->NewVideoFrame(vi); @@ -833,6 +835,9 @@ PVideoFrame __stdcall MergeChroma::GetFrame(int n, IScriptEnvironment* env) env->BitBlt(dst->GetWritePtr(PLANAR_Y),dst->GetPitch(PLANAR_Y),src->GetReadPtr(PLANAR_Y),src->GetPitch(PLANAR_Y),src->GetRowSize(PLANAR_Y),src->GetHeight(PLANAR_Y)); env->BitBlt(dst->GetWritePtr(PLANAR_U),dst->GetPitch(PLANAR_U),chroma->GetReadPtr(PLANAR_U),chroma->GetPitch(PLANAR_U),chroma->GetRowSize(PLANAR_U),chroma->GetHeight(PLANAR_U)); env->BitBlt(dst->GetWritePtr(PLANAR_V),dst->GetPitch(PLANAR_V),chroma->GetReadPtr(PLANAR_V),chroma->GetPitch(PLANAR_V),chroma->GetRowSize(PLANAR_V),chroma->GetHeight(PLANAR_V)); + if(vi.IsYUVA()) + env->BitBlt(dst->GetWritePtr(PLANAR_A),dst->GetPitch(PLANAR_A),chroma->GetReadPtr(PLANAR_A),chroma->GetPitch(PLANAR_A),chroma->GetRowSize(PLANAR_A),chroma->GetHeight(PLANAR_A)); + return dst; } } @@ -857,16 +862,16 @@ MergeLuma::MergeLuma(PClip _child, PClip _clip, float _weight, IScriptEnvironmen { const VideoInfo& vi2 = clip->GetVideoInfo(); - if (!vi.IsYUV() || !vi2.IsYUV()) - env->ThrowError("MergeLuma: YUV data only (no RGB); use ConvertToYUY2 or ConvertToYV12"); + if (!(vi.IsYUV() || vi.IsYUVA()) || !(vi2.IsYUV() || vi2.IsYUVA())) + env->ThrowError("MergeLuma: YUV data only (no RGB); use ConvertToYUY2, ConvertToYV12/16/24 or ConvertToYUVxxx"); - pixelsize = vi.BytesFromPixels(1); + pixelsize = vi.ComponentSize(); if (!vi.IsSameColorspace(vi2)) { // Since this is luma we allow all planar formats to be merged. if (!(vi.IsPlanar() && vi2.IsPlanar())) { env->ThrowError("MergeLuma: YUV data is not same type. YUY2 and planar images doesn't mix."); } - if (pixelsize != vi2.BytesFromPixels(1)) { + if (pixelsize != vi2.ComponentSize()) { env->ThrowError("MergeLuma: YUV data bit depth is not same."); } } @@ -938,6 +943,7 @@ PVideoFrame __stdcall MergeLuma::GetFrame(int n, IScriptEnvironment* env) return src; } // Planar if (weight>0.9961f) { + // 2nd clip weight is almost 100%: no merge, just copy const VideoInfo& vi2 = clip->GetVideoInfo(); if (luma->IsWritable() && vi.IsSameColorspace(vi2)) { if (luma->GetRowSize(PLANAR_U)) { @@ -945,6 +951,9 @@ PVideoFrame __stdcall MergeLuma::GetFrame(int n, IScriptEnvironment* env) env->BitBlt(luma->GetWritePtr(PLANAR_U),luma->GetPitch(PLANAR_U),src->GetReadPtr(PLANAR_U),src->GetPitch(PLANAR_U),src->GetRowSize(PLANAR_U),src->GetHeight(PLANAR_U)); env->BitBlt(luma->GetWritePtr(PLANAR_V),luma->GetPitch(PLANAR_V),src->GetReadPtr(PLANAR_V),src->GetPitch(PLANAR_V),src->GetRowSize(PLANAR_V),src->GetHeight(PLANAR_V)); } + if (luma->GetPitch(PLANAR_A)) // copy Alpha if exists + env->BitBlt(luma->GetWritePtr(PLANAR_A),luma->GetPitch(PLANAR_A),src->GetReadPtr(PLANAR_A),src->GetPitch(PLANAR_A),src->GetRowSize(PLANAR_A),src->GetHeight(PLANAR_A)); + return luma; } else { // avoid the cost of 2 chroma blits @@ -955,6 +964,9 @@ PVideoFrame __stdcall MergeLuma::GetFrame(int n, IScriptEnvironment* env) env->BitBlt(dst->GetWritePtr(PLANAR_U),dst->GetPitch(PLANAR_U),src->GetReadPtr(PLANAR_U),src->GetPitch(PLANAR_U),src->GetRowSize(PLANAR_U),src->GetHeight(PLANAR_U)); env->BitBlt(dst->GetWritePtr(PLANAR_V),dst->GetPitch(PLANAR_V),src->GetReadPtr(PLANAR_V),src->GetPitch(PLANAR_V),src->GetRowSize(PLANAR_V),src->GetHeight(PLANAR_V)); } + if (dst->GetPitch(PLANAR_A) && src->GetPitch(PLANAR_A)) // copy Alpha if in both clip exists + env->BitBlt(dst->GetWritePtr(PLANAR_A),dst->GetPitch(PLANAR_A),src->GetReadPtr(PLANAR_A),src->GetPitch(PLANAR_A),src->GetRowSize(PLANAR_A),src->GetHeight(PLANAR_A)); + return dst; } } else { // weight <= 0.9961f @@ -995,7 +1007,7 @@ MergeAll::MergeAll(PClip _child, PClip _clip, float _weight, IScriptEnvironment* if (vi.width!=vi2.width || vi.height!=vi2.height) env->ThrowError("Merge: Images must have same width and height!"); - pixelsize = vi.BytesFromPixels(1); + pixelsize = vi.ComponentSize(); if (weight<0.0f) weight=0.0f; if (weight>1.0f) weight=1.0f; @@ -1020,15 +1032,14 @@ PVideoFrame __stdcall MergeAll::GetFrame(int n, IScriptEnvironment* env) merge_plane(srcp, srcp2, src_pitch, src2->GetPitch(), src_rowsize, src->GetHeight(), weight, pixelsize, env); if (vi.IsPlanar()) { - BYTE* srcpU = (BYTE*)src->GetWritePtr(PLANAR_U); - BYTE* srcpV = (BYTE*)src->GetWritePtr(PLANAR_V); - BYTE* srcp2U = (BYTE*)src2->GetReadPtr(PLANAR_U); - BYTE* srcp2V = (BYTE*)src2->GetReadPtr(PLANAR_V); - - int src_rowsize = src->GetRowSize(PLANAR_U); - - merge_plane(srcpU, srcp2U, src->GetPitch(PLANAR_U), src2->GetPitch(PLANAR_U), src_rowsize, src->GetHeight(PLANAR_U), weight, pixelsize, env); - merge_plane(srcpV, srcp2V, src->GetPitch(PLANAR_V), src2->GetPitch(PLANAR_V), src_rowsize, src->GetHeight(PLANAR_V), weight, pixelsize, env); + const int planesYUV[4] = { PLANAR_Y, PLANAR_U, PLANAR_V, PLANAR_A}; + const int planesRGB[4] = { PLANAR_G, PLANAR_B, PLANAR_R, PLANAR_A}; + const int *planes = (vi.IsYUV() || vi.IsYUVA()) ? planesYUV : planesRGB; + // first plane is already processed + for (int p = 1; p < vi.NumComponents(); p++) { + const int plane = planes[p]; + merge_plane(src->GetWritePtr(plane), src2->GetReadPtr(plane), src->GetPitch(plane), src2->GetPitch(plane), src->GetRowSize(plane), src->GetHeight(plane), weight, pixelsize, env); + } } return src; From 48eb6405027ea1a574fb64fafadd21deb577e43f Mon Sep 17 00:00:00 2001 From: Pinterf Date: Mon, 29 Aug 2016 13:50:17 +0200 Subject: [PATCH 021/120] fix: typo in default alpha value --- avs_core/convert/convert_rgb.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/avs_core/convert/convert_rgb.cpp b/avs_core/convert/convert_rgb.cpp index 7f22747d4..b71e4702e 100644 --- a/avs_core/convert/convert_rgb.cpp +++ b/avs_core/convert/convert_rgb.cpp @@ -422,7 +422,7 @@ static void convert_rgbp_to_rgb_c(const BYTE *(&srcp)[4], BYTE * dstp, int (&src pixel_t R = reinterpret_cast(srcp[2])[x]; pixel_t A; if(target_numcomponents==4) // either from A channel or default transparent constant - A = hasSrcAlpha ? reinterpret_cast(srcp[3])[x] : (1<<(8*sizeof(pixel_t))) - -1; // 255/65535 + A = hasSrcAlpha ? reinterpret_cast(srcp[3])[x] : (1<<(8*sizeof(pixel_t))) -1; // 255/65535 reinterpret_cast(dstp)[x*target_numcomponents+0] = B; reinterpret_cast(dstp)[x*target_numcomponents+1] = G; reinterpret_cast(dstp)[x*target_numcomponents+2] = R; From 0b6c7ad384c00278ec1e9591079ecf8816cd1c5a Mon Sep 17 00:00:00 2001 From: Pinterf Date: Mon, 29 Aug 2016 19:26:21 +0200 Subject: [PATCH 022/120] make fill_plane and fill_chroma public --- avs_core/convert/convert_planar.cpp | 28 +++++++++++++++++++--------- avs_core/convert/convert_planar.h | 7 +++++++ 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/avs_core/convert/convert_planar.cpp b/avs_core/convert/convert_planar.cpp index 468b0fb9f..22ad7e754 100644 --- a/avs_core/convert/convert_planar.cpp +++ b/avs_core/convert/convert_planar.cpp @@ -1944,21 +1944,31 @@ ConvertToPlanarGeneric::ConvertToPlanarGeneric(PClip src, int dst_space, bool in delete filter; } -template -static inline void fill_chroma(BYTE* dstp_u, BYTE* dstp_v, int height, int pitch, pixel_type val) +// instantiate to let them access from other modules +template void fill_chroma(BYTE* dstp_u, BYTE* dstp_v, int height, int pitch, BYTE val); +template void fill_chroma(BYTE* dstp_u, BYTE* dstp_v, int height, int pitch, uint16_t val); +template void fill_chroma(BYTE* dstp_u, BYTE* dstp_v, int height, int pitch, float val); + +template void fill_plane(BYTE* dstp, int height, int pitch, BYTE val); +template void fill_plane(BYTE* dstp, int height, int pitch, uint16_t val); +template void fill_plane(BYTE* dstp, int height, int pitch, float val); + +template +inline void fill_chroma(BYTE* dstp_u, BYTE* dstp_v, int height, int pitch, pixel_t val) { - size_t size = height * pitch / sizeof(pixel_type); - std::fill_n(reinterpret_cast(dstp_u), size, val); - std::fill_n(reinterpret_cast(dstp_v), size, val); + size_t size = height * pitch / sizeof(pixel_t); + std::fill_n(reinterpret_cast(dstp_u), size, val); + std::fill_n(reinterpret_cast(dstp_v), size, val); } -template -static inline void fill_plane(BYTE* dstp, int height, int pitch, pixel_type val) +template +inline void fill_plane(BYTE* dstp, int height, int pitch, pixel_t val) { - size_t size = height * pitch / sizeof(pixel_type); - std::fill_n(reinterpret_cast(dstp), size, val); + size_t size = height * pitch / sizeof(pixel_t); + std::fill_n(reinterpret_cast(dstp), size, val); } + PVideoFrame __stdcall ConvertToPlanarGeneric::GetFrame(int n, IScriptEnvironment* env) { PVideoFrame src = child->GetFrame(n, env); PVideoFrame dst = env->NewVideoFrame(vi); diff --git a/avs_core/convert/convert_planar.h b/avs_core/convert/convert_planar.h index 0f8f46f23..7e20f2e74 100644 --- a/avs_core/convert/convert_planar.h +++ b/avs_core/convert/convert_planar.h @@ -40,6 +40,13 @@ #include #include +// useful functions +template +inline void fill_chroma(BYTE* dstp_u, BYTE* dstp_v, int height, int pitch, pixel_t val); + +template +inline void fill_plane(BYTE* dstp, int height, int pitch, pixel_t val); + struct ChannelConversionMatrix { int16_t r; int16_t g; From 7f92e2c02325a1fa16916b69010c478196e878f6 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Mon, 29 Aug 2016 19:29:15 +0200 Subject: [PATCH 023/120] ShowRed/Green/Blue/Alpha() for RGB48/64 source. New targets RGB48/64/YV16/YV24/Y16/YUV420P16/YUV422P16/YUV444P16 --- avs_core/filters/layer.cpp | 320 +++++++++++++++++++++++++++---------- avs_core/filters/layer.h | 1 + 2 files changed, 237 insertions(+), 84 deletions(-) diff --git a/avs_core/filters/layer.cpp b/avs_core/filters/layer.cpp index 57c880ef4..30891221f 100644 --- a/avs_core/filters/layer.cpp +++ b/avs_core/filters/layer.cpp @@ -43,7 +43,7 @@ #include #include "../core/internal.h" #include - +#include "../convert/convert_planar.h" /******************************************************************** @@ -663,32 +663,54 @@ AVSValue Invert::Create(AVSValue args, void*, IScriptEnvironment* env) ShowChannel::ShowChannel(PClip _child, const char * pixel_type, int _channel, IScriptEnvironment* env) - : GenericVideoFilter(_child), channel(_channel), input_type(_child->GetVideoInfo().pixel_type) + : GenericVideoFilter(_child), channel(_channel), input_type(_child->GetVideoInfo().pixel_type), pixelsize(_child->GetVideoInfo().ComponentSize()) { static const char * const ShowText[4] = {"Blue", "Green", "Red", "Alpha"}; - if ((channel == 3) && !vi.IsRGB32()) - env->ThrowError("ShowAlpha: RGB32 data only"); + if ((channel == 3) && !vi.IsRGB32() && !vi.IsRGB64()) + env->ThrowError("ShowAlpha: RGB32, RGB64 data only"); - if (!vi.IsRGB()) + if ((channel < 3) && !vi.IsRGB()) env->ThrowError("Show%s: RGB data only", ShowText[channel]); + if(vi.IsPlanarRGB() || vi.IsPlanarRGBA()) + env->ThrowError("Show%s: Planar RGB source is not supported", ShowText[channel]); + + int target_pixelsize; + if (!lstrcmpi(pixel_type, "rgb")) { - vi.pixel_type = VideoInfo::CS_BGR32; + switch(pixelsize) { + case 1: vi.pixel_type = VideoInfo::CS_BGR32; break; // bit-depth adaptive + case 2: vi.pixel_type = VideoInfo::CS_BGR64; break; + default: env->ThrowError("Show%s: source must be 8 or 16 bit", ShowText[channel]); + } + target_pixelsize = pixelsize; } else if (!lstrcmpi(pixel_type, "rgb32")) { + target_pixelsize = 1; vi.pixel_type = VideoInfo::CS_BGR32; } else if (!lstrcmpi(pixel_type, "rgb24")) { + target_pixelsize = 1; vi.pixel_type = VideoInfo::CS_BGR24; } + else if (!lstrcmpi(pixel_type, "rgb64")) { + target_pixelsize = 2; + vi.pixel_type = VideoInfo::CS_BGR64; + } + else if (!lstrcmpi(pixel_type, "rgb48")) { + target_pixelsize = 2; + vi.pixel_type = VideoInfo::CS_BGR48; + } else if (!lstrcmpi(pixel_type, "yuy2")) { + target_pixelsize = 1; if (vi.width & 1) { env->ThrowError("Show%s: width must be mod 2 for yuy2", ShowText[channel]); } vi.pixel_type = VideoInfo::CS_YUY2; } else if (!lstrcmpi(pixel_type, "yv12")) { + target_pixelsize = 1; if (vi.width & 1) { env->ThrowError("Show%s: width must be mod 2 for yv12", ShowText[channel]); } @@ -697,12 +719,51 @@ ShowChannel::ShowChannel(PClip _child, const char * pixel_type, int _channel, IS } vi.pixel_type = VideoInfo::CS_YV12; } + else if (!lstrcmpi(pixel_type, "yv16")) { + target_pixelsize = 1; + if (vi.width & 1) { + env->ThrowError("Show%s: width must be mod 2 for yv16", ShowText[channel]); + } + vi.pixel_type = VideoInfo::CS_YV16; + } + else if (!lstrcmpi(pixel_type, "yv24")) { + target_pixelsize = 1; + vi.pixel_type = VideoInfo::CS_YV24; + } + else if (!lstrcmpi(pixel_type, "yuv420p16")) { + target_pixelsize = 2; + if (vi.width & 1) { + env->ThrowError("Show%s: width must be mod 2 for YUV420P16", ShowText[channel]); + } + if (vi.height & 1) { + env->ThrowError("Show%s: height must be mod 2 for YUV420P16", ShowText[channel]); + } + vi.pixel_type = VideoInfo::CS_YUV420P16; + } + else if (!lstrcmpi(pixel_type, "yuv422p16")) { + target_pixelsize = 2; + if (vi.width & 1) { + env->ThrowError("Show%s: width must be mod 2 for YUV422P16", ShowText[channel]); + } + vi.pixel_type = VideoInfo::CS_YUV422P16; + } + else if (!lstrcmpi(pixel_type, "yuv444p16")) { + target_pixelsize = 2; + vi.pixel_type = VideoInfo::CS_YUV444P16; + } else if (!lstrcmpi(pixel_type, "y8")) { + target_pixelsize = 1; vi.pixel_type = VideoInfo::CS_Y8; } + else if (!lstrcmpi(pixel_type, "y16")) { + target_pixelsize = 2; + vi.pixel_type = VideoInfo::CS_Y16; + } else { - env->ThrowError("Show%s supports the following output pixel types: RGB, Y8, YUY2, or YV12", ShowText[channel]); + env->ThrowError("Show%s supports the following output pixel types: RGB, Y8, Y16, YUY2, or 8/16 bit YUV formats", ShowText[channel]); } + if(target_pixelsize != pixelsize) + env->ThrowError("Show%s: source must be %d bit for %s", ShowText[channel], pixelsize*8, pixel_type); } @@ -715,53 +776,89 @@ PVideoFrame ShowChannel::GetFrame(int n, IScriptEnvironment* env) const int pitch = f->GetPitch(); const int rowsize = f->GetRowSize(); - if (input_type == VideoInfo::CS_BGR32) { - if (vi.pixel_type == VideoInfo::CS_BGR32) + if (input_type == VideoInfo::CS_BGR32 || input_type == VideoInfo::CS_BGR64) { + if (vi.pixel_type == VideoInfo::CS_BGR32 || vi.pixel_type == VideoInfo::CS_BGR64) // RGB32->RGB32, RGB64->RGB64 { if (f->IsWritable()) { // we can do it in-place BYTE* dstp = f->GetWritePtr(); - for (int i=0; i(dstp); + dstp16[j + 0] = dstp16[j + 1] = dstp16[j + 2] = dstp16[j + channel]; + } + dstp += pitch; } - dstp += pitch; } return f; } - else { + else { // RGB32->RGB32 not in-place PVideoFrame dst = env->NewVideoFrame(vi); BYTE * dstp = dst->GetWritePtr(); const int dstpitch = dst->GetPitch(); - for (int i=0; i(dstp); + dstp16[j + 0] = dstp16[j + 1] = dstp16[j + 2] = reinterpret_cast(pf)[j + channel]; + dstp16[j + 3] = pf[j + 3]; + } + pf += pitch; + dstp += dstpitch; } - pf += pitch; - dstp += dstpitch; } return dst; } } - else if (vi.pixel_type == VideoInfo::CS_BGR24) + else if (vi.pixel_type == VideoInfo::CS_BGR24 || vi.pixel_type == VideoInfo::CS_BGR48) // RGB32->RGB24, RGB64->RGB48 { PVideoFrame dst = env->NewVideoFrame(vi); BYTE * dstp = dst->GetWritePtr(); const int dstpitch = dst->GetPitch(); - - for (int i=0; i(dstp); + dstp16[j*3 + 0] = dstp16[j*3 + 1] = dstp16[j*3 + 2] = reinterpret_cast(pf)[j*4 + channel]; + } + pf += pitch; + dstp += dstpitch; + } + } return dst; } - else if (vi.pixel_type == VideoInfo::CS_YUY2) + else if (vi.pixel_type == VideoInfo::CS_YUY2) // RGB32->YUY2 { PVideoFrame dst = env->NewVideoFrame(vi); BYTE * dstp = dst->GetWritePtr(); @@ -782,8 +879,9 @@ PVideoFrame ShowChannel::GetFrame(int n, IScriptEnvironment* env) return dst; } else - { - if ((vi.pixel_type == VideoInfo::CS_YV12) || (vi.pixel_type == VideoInfo::CS_Y8)) + { // RGB32->YV12/16/24/Y8 + 16bit + // 444, 422 support + 16 bits + if (vi.Is444() || vi.Is422() || vi.Is420() || vi.IsY()) // Y8, YV12, Y16, YUV420P16, etc. { PVideoFrame dst = env->NewVideoFrame(vi); BYTE * dstp = dst->GetWritePtr(); @@ -792,79 +890,125 @@ PVideoFrame ShowChannel::GetFrame(int n, IScriptEnvironment* env) // RGB is upside-down pf += (height-1) * pitch; - - for (int i=0; i(dstp)[j] = reinterpret_cast(pf)[j*4 + channel]; + } + pf -= pitch; + dstp += dstpitch; + } + } + if (!vi.IsY()) { dstpitch = dst->GetPitch(PLANAR_U); - dstrowsize = dst->GetRowSize(PLANAR_U_ALIGNED)/4; - const int dstheight = dst->GetHeight(PLANAR_U); - BYTE * dstpu = dst->GetWritePtr(PLANAR_U); - BYTE * dstpv = dst->GetWritePtr(PLANAR_V); - for (int i=0; iGetHeight(PLANAR_U); + BYTE * dstp_u = dst->GetWritePtr(PLANAR_U); + BYTE * dstp_v = dst->GetWritePtr(PLANAR_V); + switch (pixelsize) { + case 1: fill_chroma(dstp_u, dstp_v, dstheight, dstpitch, (BYTE)0x80); break; + case 2: fill_chroma(dstp_u, dstp_v, dstheight, dstpitch, 0x8000); break; + case 4: fill_chroma(dstp_u, dstp_v, dstheight, dstpitch, 0.5f); break; } } return dst; } } } - else if (input_type == VideoInfo::CS_BGR24) { - if (vi.pixel_type == VideoInfo::CS_BGR24) + else if (input_type == VideoInfo::CS_BGR24 || input_type == VideoInfo::CS_BGR48) { + if (vi.pixel_type == VideoInfo::CS_BGR24 || vi.pixel_type == VideoInfo::CS_BGR48) // RGB24->RGB24, RGB48->RGB48 { if (f->IsWritable()) { // we can do it in-place BYTE* dstp = f->GetWritePtr(); - for (int i=0; i(dstp); + dstp16[j + 0] = dstp16[j + 1] = dstp16[j + 2] = dstp16[j + channel]; + } + dstp += pitch; } - dstp += pitch; } return f; } - else { + else { // RGB24->RGB24 not in-place PVideoFrame dst = env->NewVideoFrame(vi); BYTE * dstp = dst->GetWritePtr(); const int dstpitch = dst->GetPitch(); - for (int i=0; i(dstp); + dstp16[j + 0] = dstp16[j + 1] = dstp16[j + 2] = reinterpret_cast(pf)[j + channel]; + } + pf += pitch; + dstp += dstpitch; } - pf += pitch; - dstp += dstpitch; } + + return dst; } } - else if (vi.pixel_type == VideoInfo::CS_BGR32) + else if (vi.pixel_type == VideoInfo::CS_BGR32 || vi.pixel_type == VideoInfo::CS_BGR64) // RGB24->RGB32 { PVideoFrame dst = env->NewVideoFrame(vi); BYTE * dstp = dst->GetWritePtr(); const int dstpitch = dst->GetPitch(); - for (int i=0; i(dstp); + dstp16[j*4 + 0] = dstp16[j*4 + 1] = dstp16[j*4 + 2] = dstp16[j*4 + 3] = reinterpret_cast(pf)[j*3 + channel]; + } + pf += pitch; + dstp += dstpitch; } - pf += pitch; - dstp += dstpitch; } return dst; } - else if (vi.pixel_type == VideoInfo::CS_YUY2) + else if (vi.pixel_type == VideoInfo::CS_YUY2) // RGB24->YUY2 { PVideoFrame dst = env->NewVideoFrame(vi); BYTE * dstp = dst->GetWritePtr(); @@ -885,8 +1029,8 @@ PVideoFrame ShowChannel::GetFrame(int n, IScriptEnvironment* env) return dst; } else - { - if ((vi.pixel_type == VideoInfo::CS_YV12) || (vi.pixel_type == VideoInfo::CS_Y8)) + { // // RGB24->YV12/16/24/Y8 + 16bit + if (vi.Is444() || vi.Is422() || vi.Is420() || vi.IsY()) // Y8, YV12, Y16, YUV420P16, etc. { int i, j; // stupid VC6 @@ -898,26 +1042,34 @@ PVideoFrame ShowChannel::GetFrame(int n, IScriptEnvironment* env) // RGB is upside-down pf += (height-1) * pitch; - for (i=0; i(dstp)[j] = reinterpret_cast(pf)[j*3 + channel]; + } + pf -= pitch; + dstp += dstpitch; } - pf -= pitch; - dstp += dstpitch; } - if (vi.pixel_type == VideoInfo::CS_YV12) + if (!vi.IsY()) { dstpitch = dst->GetPitch(PLANAR_U); - dstrowsize = dst->GetRowSize(PLANAR_U_ALIGNED)/4; - const int dstheight = dst->GetHeight(PLANAR_U); - BYTE * dstpu = dst->GetWritePtr(PLANAR_U); - BYTE * dstpv = dst->GetWritePtr(PLANAR_V); - for (i=0; iGetHeight(PLANAR_U); + BYTE * dstp_u = dst->GetWritePtr(PLANAR_U); + BYTE * dstp_v = dst->GetWritePtr(PLANAR_V); + switch (pixelsize) { + case 1: fill_chroma(dstp_u, dstp_v, dstheight, dstpitch, (BYTE)0x80); break; + case 2: fill_chroma(dstp_u, dstp_v, dstheight, dstpitch, 0x8000); break; + case 4: fill_chroma(dstp_u, dstp_v, dstheight, dstpitch, 0.5f); break; } } return dst; diff --git a/avs_core/filters/layer.h b/avs_core/filters/layer.h index 2ea9f25be..f42e0997d 100644 --- a/avs_core/filters/layer.h +++ b/avs_core/filters/layer.h @@ -156,6 +156,7 @@ class ShowChannel : public GenericVideoFilter private: const int channel; const int input_type; + const int pixelsize; }; From 783fe2ed422011703c8c8b6301de2069c22ae309 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Mon, 29 Aug 2016 19:32:01 +0200 Subject: [PATCH 024/120] ShowRed/G/B/A parameter check message fix --- avs_core/filters/layer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/avs_core/filters/layer.cpp b/avs_core/filters/layer.cpp index 30891221f..45e0ff036 100644 --- a/avs_core/filters/layer.cpp +++ b/avs_core/filters/layer.cpp @@ -763,7 +763,7 @@ ShowChannel::ShowChannel(PClip _child, const char * pixel_type, int _channel, IS env->ThrowError("Show%s supports the following output pixel types: RGB, Y8, Y16, YUY2, or 8/16 bit YUV formats", ShowText[channel]); } if(target_pixelsize != pixelsize) - env->ThrowError("Show%s: source must be %d bit for %s", ShowText[channel], pixelsize*8, pixel_type); + env->ThrowError("Show%s: source must be %d bit for %s", ShowText[channel], target_pixelsize*8, pixel_type); } From 98376c942bbe201c6b2bc589e7561f35ccd64851 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Mon, 29 Aug 2016 19:42:30 +0200 Subject: [PATCH 025/120] ShowChannel: kill warnings --- avs_core/filters/layer.cpp | 39 +++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/avs_core/filters/layer.cpp b/avs_core/filters/layer.cpp index 45e0ff036..ca5c07d0f 100644 --- a/avs_core/filters/layer.cpp +++ b/avs_core/filters/layer.cpp @@ -776,16 +776,17 @@ PVideoFrame ShowChannel::GetFrame(int n, IScriptEnvironment* env) const int pitch = f->GetPitch(); const int rowsize = f->GetRowSize(); + const int width = rowsize / pixelsize; + if (input_type == VideoInfo::CS_BGR32 || input_type == VideoInfo::CS_BGR64) { if (vi.pixel_type == VideoInfo::CS_BGR32 || vi.pixel_type == VideoInfo::CS_BGR64) // RGB32->RGB32, RGB64->RGB64 { if (f->IsWritable()) { // we can do it in-place BYTE* dstp = f->GetWritePtr(); - if(pixelsize==1) { for (int i=0; i(dstp); dstp16[j + 0] = dstp16[j + 1] = dstp16[j + 2] = dstp16[j + channel]; } @@ -809,7 +810,7 @@ PVideoFrame ShowChannel::GetFrame(int n, IScriptEnvironment* env) if(pixelsize==1) { for (int i=0; i(dstp); dstp16[j + 0] = dstp16[j + 1] = dstp16[j + 2] = reinterpret_cast(pf)[j + channel]; dstp16[j + 3] = pf[j + 3]; @@ -838,7 +839,7 @@ PVideoFrame ShowChannel::GetFrame(int n, IScriptEnvironment* env) const int dstpitch = dst->GetPitch(); if(pixelsize==1) { for (int i=0; i(dstp); dstp16[j*3 + 0] = dstp16[j*3 + 1] = dstp16[j*3 + 2] = reinterpret_cast(pf)[j*4 + channel]; } @@ -886,7 +887,7 @@ PVideoFrame ShowChannel::GetFrame(int n, IScriptEnvironment* env) PVideoFrame dst = env->NewVideoFrame(vi); BYTE * dstp = dst->GetWritePtr(); int dstpitch = dst->GetPitch(); - int dstrowsize = dst->GetRowSize(); + int dstwidth = dst->GetRowSize() / pixelsize; // RGB is upside-down pf += (height-1) * pitch; @@ -894,7 +895,7 @@ PVideoFrame ShowChannel::GetFrame(int n, IScriptEnvironment* env) // copy to luma if(pixelsize==1) { for (int i=0; i(dstp)[j] = reinterpret_cast(pf)[j*4 + channel]; } pf -= pitch; @@ -935,7 +936,7 @@ PVideoFrame ShowChannel::GetFrame(int n, IScriptEnvironment* env) if(pixelsize==1) { for (int i=0; i(dstp); dstp16[j + 0] = dstp16[j + 1] = dstp16[j + 2] = dstp16[j + channel]; } @@ -959,7 +960,7 @@ PVideoFrame ShowChannel::GetFrame(int n, IScriptEnvironment* env) if(pixelsize==1) { for (int i=0; i(dstp); dstp16[j + 0] = dstp16[j + 1] = dstp16[j + 2] = reinterpret_cast(pf)[j + channel]; } @@ -989,7 +990,7 @@ PVideoFrame ShowChannel::GetFrame(int n, IScriptEnvironment* env) if(pixelsize==1) { for (int i=0; i(dstp); dstp16[j*4 + 0] = dstp16[j*4 + 1] = dstp16[j*4 + 2] = dstp16[j*4 + 3] = reinterpret_cast(pf)[j*3 + channel]; } @@ -1037,14 +1038,14 @@ PVideoFrame ShowChannel::GetFrame(int n, IScriptEnvironment* env) PVideoFrame dst = env->NewVideoFrame(vi); BYTE * dstp = dst->GetWritePtr(); int dstpitch = dst->GetPitch(); - int dstrowsize = dst->GetRowSize(); + int dstwidth = dst->GetRowSize() / pixelsize; // RGB is upside-down pf += (height-1) * pitch; if(pixelsize==1) { for (i=0; i(dstp)[j] = reinterpret_cast(pf)[j*3 + channel]; } pf -= pitch; From ef2eb0df855bc5e5719d62f65f2a7be61734a6f0 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Tue, 30 Aug 2016 11:56:02 +0200 Subject: [PATCH 026/120] ConvertToY: RGB48/64, PlanarRGB8/16/float support --- avs_core/convert/convert_planar.cpp | 171 ++++++++++++++++++++++------ avs_core/convert/convert_planar.h | 12 +- 2 files changed, 148 insertions(+), 35 deletions(-) diff --git a/avs_core/convert/convert_planar.cpp b/avs_core/convert/convert_planar.cpp index 22ad7e754..f775d1c0e 100644 --- a/avs_core/convert/convert_planar.cpp +++ b/avs_core/convert/convert_planar.cpp @@ -53,64 +53,96 @@ static ResamplingFunction* getResampler( const char* resampler, IScriptEnvironme ConvertToY8::ConvertToY8(PClip src, int in_matrix, IScriptEnvironment* env) : GenericVideoFilter(src) { - yuy2_input = blit_luma_only = rgb_input = false; + yuy2_input = blit_luma_only = packed_rgb_input = planar_rgb_input = false; - if (vi.IsPlanar()) { + int target_pixel_type; + switch (vi.BitsPerComponent()) + { + case 8: target_pixel_type = VideoInfo::CS_Y8; break; + case 10: target_pixel_type = VideoInfo::CS_Y10; break; + case 12: target_pixel_type = VideoInfo::CS_Y12; break; + case 14: target_pixel_type = VideoInfo::CS_Y14; break; + case 16: target_pixel_type = VideoInfo::CS_Y16; break; + case 32: target_pixel_type = VideoInfo::CS_Y32; break; + default: + env->ThrowError("ConvertToY does not support %d-bit formats.", vi.BitsPerComponent()); + } + + pixelsize = vi.ComponentSize(); + + if (vi.IsPlanar() && (vi.IsYUV() || vi.IsYUVA())) { // not for Planar RGB blit_luma_only = true; - switch (vi.ComponentSize()) - { - case 1: vi.pixel_type = VideoInfo::CS_Y8; break; - case 2: vi.pixel_type = VideoInfo::CS_Y16; break; - case 4: vi.pixel_type = VideoInfo::CS_Y32; break; - default: - env->ThrowError("ConvertToY does not support %d-byte formats.", vi.ComponentSize()); - } + vi.pixel_type = target_pixel_type; return; } - if (vi.IsYUY2()) { + if (vi.IsYUY2()) { yuy2_input = true; - vi.pixel_type = VideoInfo::CS_Y8; + vi.pixel_type = target_pixel_type; return; } - if (vi.IsRGB()) { - rgb_input = true; - pixel_step = vi.BytesFromPixels(1); - vi.pixel_type = VideoInfo::CS_Y8; + if (vi.IsRGB()) { // also Planar RGB + if (vi.IsPlanarRGB() || vi.IsPlanarRGBA()) + planar_rgb_input = true; + else + packed_rgb_input = true; + pixel_step = vi.BytesFromPixels(1); // for packed RGB 3,4,6,8 + vi.pixel_type = target_pixel_type; + if (in_matrix == Rec601) { matrix.b = (int16_t)((219.0/255.0)*0.114*32768.0+0.5); //B matrix.g = (int16_t)((219.0/255.0)*0.587*32768.0+0.5); //G matrix.r = (int16_t)((219.0/255.0)*0.299*32768.0+0.5); //R - matrix.offset_y = 16; + matrix.b_f = (float)((219.0/255.0)*0.114); //B + matrix.g_f = (float)((219.0/255.0)*0.587); //G + matrix.r_f = (float)((219.0/255.0)*0.299); //R + matrix.offset_y = pixelsize == 1 ? 16 : 16*256; + matrix.offset_y_f = 16.0f / 256.0f; } else if (in_matrix == PC_601) { matrix.b = (int16_t)(0.114*32768.0+0.5); //B matrix.g = (int16_t)(0.587*32768.0+0.5); //G matrix.r = (int16_t)(0.299*32768.0+0.5); //R + matrix.b_f = 0.114f; //B + matrix.g_f = 0.587f; //G + matrix.r_f = 0.299f; //R matrix.offset_y = 0; + matrix.offset_y_f = 0; } else if (in_matrix == Rec709) { matrix.b = (int16_t)((219.0/255.0)*0.0722*32768.0+0.5); //B matrix.g = (int16_t)((219.0/255.0)*0.7152*32768.0+0.5); //G matrix.r = (int16_t)((219.0/255.0)*0.2126*32768.0+0.5); //R - matrix.offset_y = 16; + matrix.b_f = (float)((219.0/255.0)*0.0722); //B + matrix.g_f = (float)((219.0/255.0)*0.7152); //G + matrix.r_f = (float)((219.0/255.0)*0.2126); //R + matrix.offset_y = pixelsize == 1 ? 16 : 16*256; + matrix.offset_y_f = 16.0f / 256.0f; } else if (in_matrix == PC_709) { matrix.b = (int16_t)(0.0722*32768.0+0.5); //B matrix.g = (int16_t)(0.7152*32768.0+0.5); //G matrix.r = (int16_t)(0.2126*32768.0+0.5); //R + matrix.b_f = 0.0722f; //B + matrix.g_f = 0.7152f; //G + matrix.r_f = 0.2126f; //R matrix.offset_y = 0; + matrix.offset_y_f = 0; } else if (in_matrix == AVERAGE) { matrix.b = (int16_t)(32768.0/3 + 0.5); //B matrix.g = (int16_t)(32768.0/3 + 0.5); //G matrix.r = (int16_t)(32768.0/3 + 0.5); //R + matrix.b_f = (float)(1.0/3); //B + matrix.g_f = (float)(1.0/3); //G + matrix.r_f = (float)(1.0/3); //R matrix.offset_y = 0; + matrix.offset_y_f = 0; } else { - env->ThrowError("ConvertToY8: Unknown matrix."); + env->ThrowError("ConvertToY: Unknown matrix."); } return; } - env->ThrowError("ConvertToY8: Unknown input format"); + env->ThrowError("ConvertToY: Unknown input format"); } @@ -346,7 +378,8 @@ PVideoFrame __stdcall ConvertToY8::GetFrame(int n, IScriptEnvironment* env) { PVideoFrame dst = env->NewVideoFrame(vi); BYTE* dstp = dst->GetWritePtr(PLANAR_Y); const int dst_pitch = dst->GetPitch(PLANAR_Y); - int width = dst->GetRowSize(PLANAR_Y); + int rowsize = dst->GetRowSize(PLANAR_Y); + int width = rowsize / pixelsize; int height = dst->GetHeight(PLANAR_Y); if (yuy2_input) { @@ -370,20 +403,26 @@ PVideoFrame __stdcall ConvertToY8::GetFrame(int n, IScriptEnvironment* env) { return dst; } - if (rgb_input) { + if (packed_rgb_input) { srcp += src_pitch * (vi.height-1); // We start at last line - if ((env->GetCPUFlags() & CPUF_SSE2) && IsPtrAligned(srcp, 16)) { + if ((pixelsize==1) && (env->GetCPUFlags() & CPUF_SSE2) && IsPtrAligned(srcp, 16)) { if (pixel_step == 4) { convert_rgb32_to_y8_sse2(srcp, dstp, src_pitch, dst_pitch, width, height, matrix); - } else { + } else if(pixel_step == 3) { convert_rgb24_to_y8_sse2(srcp, dstp, src_pitch, dst_pitch, width, height, matrix); + } else if(pixel_step == 8) { + //todo + //convert_rgb64_to_y8_sse2(srcp, dstp, src_pitch, dst_pitch, width, height, matrix); + } else if(pixel_step == 6) { + // todo + //convert_rgb48_to_y8_sse2(srcp, dstp, src_pitch, dst_pitch, width, height, matrix); } return dst; } #ifdef X86_32 - if (env->GetCPUFlags() & CPUF_MMX) { + if ((pixelsize==1) && (env->GetCPUFlags() & CPUF_MMX)) { if (pixel_step == 4) { convert_rgb32_to_y8_mmx(srcp, dstp, src_pitch, dst_pitch, width, height, matrix); } else { @@ -393,15 +432,83 @@ PVideoFrame __stdcall ConvertToY8::GetFrame(int n, IScriptEnvironment* env) { } #endif + // Slow C const int srcMod = src_pitch + width * pixel_step; - for (int y=0; y> 15); - dstp[x] = PixelClip(Y); // All the safety we can wish for. - srcp += pixel_step; + if(pixelsize==1) { + for (int y=0; y> 15); + dstp[x] = PixelClip(Y); // All the safety we can wish for. + srcp += pixel_step; // 3,4 + } + srcp -= srcMod; + dstp += dst_pitch; + } + } + else { // pixelsize==2 + for (int y=0; y(srcp); + // int overflows! + const int Y = matrix.offset_y + (int)(((__int64)matrix.b * srcp16[0] + (__int64)matrix.g * srcp16[1] + (__int64)matrix.r * srcp16[2] + 16384) >> 15); + reinterpret_cast(dstp)[x] = clamp(Y,0,65535); // All the safety we can wish for. + + // __int64 version is a bit faster + //const float Y = matrix.offset_y_f + (matrix.b_f * srcp16[0] + matrix.g_f * srcp16[1] + matrix.r_f * srcp16[2]); + //reinterpret_cast(dstp)[x] = (uint16_t)clamp((int)Y,0,65535); // All the safety we can wish for. + srcp += pixel_step; // 6,8 + } + srcp -= srcMod; + dstp += dst_pitch; + } + } + } + + if (planar_rgb_input) + { + const BYTE *srcpG = src->GetReadPtr(PLANAR_G); + const BYTE *srcpB = src->GetReadPtr(PLANAR_B); + const BYTE *srcpR = src->GetReadPtr(PLANAR_R); + const int pitchG = src->GetPitch(PLANAR_G); + const int pitchB = src->GetPitch(PLANAR_B); + const int pitchR = src->GetPitch(PLANAR_R); + if(pixelsize==1) { + for (int y=0; y> 15); + dstp[x] = PixelClip(Y); // All the safety we can wish for. + } + srcpG += pitchG; srcpB += pitchB; srcpR += pitchR; + dstp += dst_pitch; + } + } else if(pixelsize==2) { + for (int y=0; y(srcpB)[x] + + (__int64)matrix.g * reinterpret_cast(srcpG)[x] + + (__int64)matrix.r * reinterpret_cast(srcpR)[x] + + 16384) >> 15); + reinterpret_cast(dstp)[x] = (uint16_t)clamp(Y,0,65535); // All the safety we can wish for. + } + srcpG += pitchG; srcpB += pitchB; srcpR += pitchR; + dstp += dst_pitch; + } + } + else if (pixelsize==4) { + for (int y=0; y(srcpB)[x] + + matrix.g_f * reinterpret_cast(srcpG)[x] + + matrix.r_f * reinterpret_cast(srcpR)[x] + ); + reinterpret_cast(dstp)[x] = clamp(Y,0.0f,65535.0f); // All the safety we can wish for. + } + srcpG += pitchG; srcpB += pitchB; srcpR += pitchR; + dstp += dst_pitch; } - srcp -= srcMod; - dstp += dst_pitch; } } return dst; diff --git a/avs_core/convert/convert_planar.h b/avs_core/convert/convert_planar.h index 7e20f2e74..ace585f66 100644 --- a/avs_core/convert/convert_planar.h +++ b/avs_core/convert/convert_planar.h @@ -48,10 +48,14 @@ template inline void fill_plane(BYTE* dstp, int height, int pitch, pixel_t val); struct ChannelConversionMatrix { - int16_t r; + int16_t r; // for 15bit scaled integer arithmetic int16_t g; int16_t b; - int offset_y; + float r_f; // for float operation + float g_f; + float b_f; + int offset_y; // for 8 or 16 bit + float offset_y_f; // for float }; class ConvertToY8 : public GenericVideoFilter @@ -68,8 +72,10 @@ class ConvertToY8 : public GenericVideoFilter private: bool blit_luma_only; bool yuy2_input; - bool rgb_input; + bool packed_rgb_input; + bool planar_rgb_input; int pixel_step; + int pixelsize; ChannelConversionMatrix matrix; }; From 417282b2c639cc7b06afc811b42f0b17f7da3e6e Mon Sep 17 00:00:00 2001 From: Pinterf Date: Tue, 30 Aug 2016 12:53:54 +0200 Subject: [PATCH 027/120] HorizontalReduceBy2: RGB48/64. No todo left here. --- avs_core/filters/resize.cpp | 132 +++++++++++++++++++++++++----------- avs_core/filters/resize.h | 1 + 2 files changed, 95 insertions(+), 38 deletions(-) diff --git a/avs_core/filters/resize.cpp b/avs_core/filters/resize.cpp index 5dd135b60..48f5b24f2 100644 --- a/avs_core/filters/resize.cpp +++ b/avs_core/filters/resize.cpp @@ -316,7 +316,7 @@ PVideoFrame VerticalReduceBy2::GetFrame(int n, IScriptEnvironment* env) { HorizontalReduceBy2::HorizontalReduceBy2(PClip _child, IScriptEnvironment* env) : GenericVideoFilter(_child), mybuffer(0) { - if (vi.IsPlanar() && (vi.NumComponents() > 1)) { + if (vi.IsPlanar() && (vi.IsYUV() || vi.IsYUVA()) && (vi.NumComponents() > 1)) { const int mod = 2 << vi.GetPlaneWidthSubsampling(PLANAR_U); const int mask = mod - 1; if (vi.width & mask) @@ -329,6 +329,7 @@ HorizontalReduceBy2::HorizontalReduceBy2(PClip _child, IScriptEnvironment* env) if (vi.IsYUY2() && (vi.width & 3)) env->ThrowError("HorizontalReduceBy2: YUY2 output image width must be even"); + pixelsize = vi.ComponentSize(); source_width = vi.width; vi.width >>= 1; } @@ -410,52 +411,107 @@ PVideoFrame HorizontalReduceBy2::GetFrame(int n, IScriptEnvironment* env) srcp += src_gap+8; } - } else if (vi.IsRGB24()) { + } else if (vi.IsRGB24() || vi.IsRGB48()) { const BYTE* srcp = src->GetReadPtr(); - for (int y = vi.height; y>0; --y) { - for (int x = (source_width-1)>>1; x; --x) { - dstp[0] = (srcp[0] + 2*srcp[3] + srcp[6] + 2) >> 2; - dstp[1] = (srcp[1] + 2*srcp[4] + srcp[7] + 2) >> 2; - dstp[2] = (srcp[2] + 2*srcp[5] + srcp[8] + 2) >> 2; - dstp += 3; - srcp += 6; + if(pixelsize==1) { + for (int y = vi.height; y>0; --y) { + for (int x = (source_width-1)>>1; x; --x) { + dstp[0] = (srcp[0] + 2*srcp[3] + srcp[6] + 2) >> 2; + dstp[1] = (srcp[1] + 2*srcp[4] + srcp[7] + 2) >> 2; + dstp[2] = (srcp[2] + 2*srcp[5] + srcp[8] + 2) >> 2; + dstp += 3; + srcp += 6; + } + if (source_width&1) { + dstp += dst_gap; + srcp += src_gap+3; + } else { + dstp[0] = (srcp[0] + srcp[3] + 1) >> 1; + dstp[1] = (srcp[1] + srcp[4] + 1) >> 1; + dstp[2] = (srcp[2] + srcp[5] + 1) >> 1; + dstp += dst_gap+3; + srcp += src_gap+6; + } } - if (source_width&1) { - dstp += dst_gap; - srcp += src_gap+3; - } else { - dstp[0] = (srcp[0] + srcp[3] + 1) >> 1; - dstp[1] = (srcp[1] + srcp[4] + 1) >> 1; - dstp[2] = (srcp[2] + srcp[5] + 1) >> 1; - dstp += dst_gap+3; - srcp += src_gap+6; + } + else { // pixelsize==2 RGB48 + uint16_t *dstp16 = reinterpret_cast(dstp); + const uint16_t *srcp16 = reinterpret_cast(srcp); + dst_gap /= sizeof(uint16_t); + src_gap /= sizeof(uint16_t); + for (int y = vi.height; y>0; --y) { + for (int x = (source_width-1)>>1; x; --x) { + dstp16[0] = (srcp16[0] + 2*srcp16[3] + srcp16[6] + 2) >> 2; + dstp16[1] = (srcp16[1] + 2*srcp16[4] + srcp16[7] + 2) >> 2; + dstp16[2] = (srcp16[2] + 2*srcp16[5] + srcp16[8] + 2) >> 2; + dstp16 += 3; + srcp16 += 6; + } + if (source_width&1) { + dstp16 += dst_gap; + srcp16 += src_gap+3; + } else { + dstp16[0] = (srcp16[0] + srcp16[3] + 1) >> 1; + dstp16[1] = (srcp16[1] + srcp16[4] + 1) >> 1; + dstp16[2] = (srcp16[2] + srcp16[5] + 1) >> 1; + dstp16 += dst_gap+3; + srcp16 += src_gap+6; + } } } - } else if (vi.IsRGB32()) { //rgb32 + } else if (vi.IsRGB32() || vi.IsRGB64()) { //rgb32 const BYTE* srcp = src->GetReadPtr(); - for (int y = vi.height; y>0; --y) { - for (int x = (source_width-1)>>1; x; --x) { - dstp[0] = (srcp[0] + 2*srcp[4] + srcp[8] + 2) >> 2; - dstp[1] = (srcp[1] + 2*srcp[5] + srcp[9] + 2) >> 2; - dstp[2] = (srcp[2] + 2*srcp[6] + srcp[10] + 2) >> 2; - dstp[3] = (srcp[3] + 2*srcp[7] + srcp[11] + 2) >> 2; - dstp += 4; - srcp += 8; + if(pixelsize==1) { + for (int y = vi.height; y>0; --y) { + for (int x = (source_width-1)>>1; x; --x) { + dstp[0] = (srcp[0] + 2*srcp[4] + srcp[8] + 2) >> 2; + dstp[1] = (srcp[1] + 2*srcp[5] + srcp[9] + 2) >> 2; + dstp[2] = (srcp[2] + 2*srcp[6] + srcp[10] + 2) >> 2; + dstp[3] = (srcp[3] + 2*srcp[7] + srcp[11] + 2) >> 2; + dstp += 4; + srcp += 8; + } + if (source_width&1) { + dstp += dst_gap; + srcp += src_gap+4; + } else { + dstp[0] = (srcp[0] + srcp[4] + 1) >> 1; + dstp[1] = (srcp[1] + srcp[5] + 1) >> 1; + dstp[2] = (srcp[2] + srcp[6] + 1) >> 1; + dstp[3] = (srcp[3] + srcp[7] + 1) >> 1; + dstp += dst_gap+4; + srcp += src_gap+8; + } } - if (source_width&1) { - dstp += dst_gap; - srcp += src_gap+4; - } else { - dstp[0] = (srcp[0] + srcp[4] + 1) >> 1; - dstp[1] = (srcp[1] + srcp[5] + 1) >> 1; - dstp[2] = (srcp[2] + srcp[6] + 1) >> 1; - dstp[3] = (srcp[3] + srcp[7] + 1) >> 1; - dstp += dst_gap+4; - srcp += src_gap+8; + } + else { // pixelsize==2 rgb64 + uint16_t *dstp16 = reinterpret_cast(dstp); + const uint16_t *srcp16 = reinterpret_cast(srcp); + dst_gap /= sizeof(uint16_t); + src_gap /= sizeof(uint16_t); + for (int y = vi.height; y>0; --y) { + for (int x = (source_width-1)>>1; x; --x) { + dstp16[0] = (srcp16[0] + 2*srcp16[4] + srcp16[8] + 2) >> 2; + dstp16[1] = (srcp16[1] + 2*srcp16[5] + srcp16[9] + 2) >> 2; + dstp16[2] = (srcp16[2] + 2*srcp16[6] + srcp16[10] + 2) >> 2; + dstp16[3] = (srcp16[3] + 2*srcp16[7] + srcp16[11] + 2) >> 2; + dstp16 += 4; + srcp16 += 8; + } + if (source_width&1) { + dstp16 += dst_gap; + srcp16 += src_gap+4; + } else { + dstp16[0] = (srcp16[0] + srcp16[4] + 1) >> 1; + dstp16[1] = (srcp16[1] + srcp16[5] + 1) >> 1; + dstp16[2] = (srcp16[2] + srcp16[6] + 1) >> 1; + dstp16[3] = (srcp16[3] + srcp16[7] + 1) >> 1; + dstp16 += dst_gap+4; + srcp16 += src_gap+8; + } } } } - // todo RGB48/RGB64 return dst; } diff --git a/avs_core/filters/resize.h b/avs_core/filters/resize.h index 4ffe55b0b..df9f1b265 100644 --- a/avs_core/filters/resize.h +++ b/avs_core/filters/resize.h @@ -84,6 +84,7 @@ class HorizontalReduceBy2 : public GenericVideoFilter private: BYTE *mybuffer; int source_width; + int pixelsize; }; From 5b537c36695bf021d4b9834696271666ac7c4a09 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Tue, 30 Aug 2016 13:45:05 +0200 Subject: [PATCH 028/120] resize_h_c_planar: much faster by changing x-y loop order --- avs_core/filters/resample.cpp | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/avs_core/filters/resample.cpp b/avs_core/filters/resample.cpp index 2bc119027..c801912d0 100644 --- a/avs_core/filters/resample.cpp +++ b/avs_core/filters/resample.cpp @@ -46,6 +46,7 @@ #include // Intrinsics for SSE4.1, SSSE3, SSE3, SSE2, ISSE and MMX #include +#include /*************************************** ********* Templated SSE Loader ******** @@ -560,6 +561,7 @@ static void resize_h_pointresize(BYTE* dst, const BYTE* src, int dst_pitch, int } } +// make the resampling coefficient array mod8 friendly for simd, padding non-used coeffs with zeros static void resize_h_prepare_coeff_8(ResamplingProgram* p, IScriptEnvironment2* env) { int filter_size = AlignNumber(p->filter_size, 8); short* new_coeff = (short*) env->Allocate(sizeof(short) * p->target_size * filter_size, 64, AVS_NORMAL_ALLOC); @@ -571,9 +573,9 @@ static void resize_h_prepare_coeff_8(ResamplingProgram* p, IScriptEnvironment2* } memset(new_coeff, 0, sizeof(short) * p->target_size * filter_size); - memset(new_coeff_float, 0, sizeof(float) * p->target_size * filter_size); + std::fill_n(new_coeff_float, p->target_size * filter_size, 0.0f); - // Copy coeff + // Copy existing coeff short *dst = new_coeff, *src = p->pixel_coefficient; float *dst_f = new_coeff_float, *src_f = p->pixel_coefficient_float; for (int i = 0; i < p->target_size; i++) { @@ -602,11 +604,6 @@ static void resize_h_c_planar(BYTE* dst, const BYTE* src, int dst_pitch, int src typedef typename std::conditional < std::is_floating_point::value, float, short>::type coeff_t; coeff_t *current_coeff; - if (!std::is_floating_point::value) - current_coeff = (coeff_t *)program->pixel_coefficient; - else - current_coeff = (coeff_t *)program->pixel_coefficient_float; - pixel_t limit = 0; if (!std::is_floating_point::value) { // floats are unscaled and uncapped if (sizeof(pixel_t) == 1) limit = 255; @@ -619,9 +616,14 @@ static void resize_h_c_planar(BYTE* dst, const BYTE* src, int dst_pitch, int src pixel_t* src0 = (pixel_t*)src; pixel_t* dst0 = (pixel_t*)dst; - for (int x = 0; x < width; x++) { - int begin = program->pixel_offset[x]; - for (int y = 0; y < height; y++) { + // external loop y is much faster + for (int y = 0; y < height; y++) { + if (!std::is_floating_point::value) + current_coeff = (coeff_t *)program->pixel_coefficient; + else + current_coeff = (coeff_t *)program->pixel_coefficient_float; + for (int x = 0; x < width; x++) { + int begin = program->pixel_offset[x]; // todo: check whether int result is enough for 16 bit samples (can an int overflow because of 16384 scale or really need __int64?) typename std::conditional < sizeof(pixel_t) == 1, int, typename std::conditional < sizeof(pixel_t) == 2, __int64, float>::type >::type result; result = 0; @@ -633,8 +635,8 @@ static void resize_h_c_planar(BYTE* dst, const BYTE* src, int dst_pitch, int src result = clamp(result, decltype(result)(0), decltype(result)(limit)); } (dst0 + y*dst_pitch)[x] = (pixel_t)result; + current_coeff += filter_size; } - current_coeff += filter_size; } } @@ -1068,6 +1070,7 @@ ResamplerH FilteredResizeH::GetResampler(int CPU, bool aligned, int pixelsize, R if (pixelsize == 1) { if (CPU & CPUF_SSSE3) { + // make the resampling coefficient array mod8 friendly for simd, padding non-used coeffs with zeros resize_h_prepare_coeff_8(program, env); if (program->filter_size > 8) return resizer_h_ssse3_generic; From 3e8f2f873b0cd87d80d79bdb152885d2a7bd42da Mon Sep 17 00:00:00 2001 From: Pinterf Date: Tue, 30 Aug 2016 15:55:54 +0200 Subject: [PATCH 029/120] resizer_h_ssse3_generic for 16bit/float. Resizers are superfast now for all bit depths. --- avs_core/filters/resample.cpp | 190 ++++++++++++++++++++++++++++++++-- 1 file changed, 184 insertions(+), 6 deletions(-) diff --git a/avs_core/filters/resample.cpp b/avs_core/filters/resample.cpp index c801912d0..07a489bee 100644 --- a/avs_core/filters/resample.cpp +++ b/avs_core/filters/resample.cpp @@ -45,6 +45,7 @@ #include // Intrinsics for SSE4.1, SSSE3, SSE3, SSE2, ISSE and MMX +#include #include #include @@ -640,6 +641,168 @@ static void resize_h_c_planar(BYTE* dst, const BYTE* src, int dst_pitch, int src } } +template +static void resizer_h_ssse3_generic_int16_float(BYTE* dst8, const BYTE* src8, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int height) { + int filter_size = AlignNumber(program->filter_size, 8) / 8; + __m128i zero = _mm_setzero_si128(); + + const pixel_t *src = reinterpret_cast(src8); + pixel_t *dst = reinterpret_cast(dst8); + dst_pitch /= sizeof(pixel_t); + src_pitch /= sizeof(pixel_t); + + for (int y = 0; y < height; y++) { + float* current_coeff = program->pixel_coefficient_float; + for (int x = 0; x < width; x+=4) { + __m128 result1 = _mm_set1_ps(0.0f); + __m128 result2 = result1; + __m128 result3 = result1; + __m128 result4 = result1; + + int begin1 = program->pixel_offset[x+0]; + int begin2 = program->pixel_offset[x+1]; + int begin3 = program->pixel_offset[x+2]; + int begin4 = program->pixel_offset[x+3]; + + // begin1, result1 + for (int i = 0; i < filter_size; i++) { + __m128 data_l_single, data_h_single; + if(sizeof(pixel_t)==2) // word + { + // unaligned + __m128i src_p = _mm_loadu_si128(reinterpret_cast(src+begin1+i*8)); // uint16_t 8*16=128 8 pixels at a time + __m128i src_l = _mm_unpacklo_epi16(src_p, zero); // spread lower 4*uint16_t pixel value -> 4*32 bit + __m128i src_h = _mm_unpackhi_epi16(src_p, zero); // spread higher 4*uint16_t pixel value -> 4*32 bit + data_l_single = _mm_cvtepi32_ps (src_l); // Converts the four signed 32-bit integer values of a to single-precision, floating-point values. + data_h_single = _mm_cvtepi32_ps (src_h); + } + else { // float + // unaligned + data_l_single = _mm_loadu_ps(reinterpret_cast(src+begin1+i*8)); // float 4*32=128 4 pixels at a time + data_h_single = _mm_loadu_ps(reinterpret_cast(src+begin1+i*8+4)); // float 4*32=128 4 pixels at a time + } + __m128 coeff_l = /*loadps*/_mm_load_ps(reinterpret_cast(current_coeff)); // always aligned + __m128 coeff_h = /*loadps*/_mm_load_ps(reinterpret_cast(current_coeff+4)); // always aligned + __m128 dst_l = _mm_mul_ps(data_l_single, coeff_l); // Multiply by coefficient + __m128 dst_h = _mm_mul_ps(data_h_single, coeff_h); // 4*(32bit*32bit=32bit) + result1 = _mm_add_ps(result1, dst_l); // accumulate result. + result1 = _mm_add_ps(result1, dst_h); + + current_coeff += 8; + } + + // begin2, result2 + for (int i = 0; i < filter_size; i++) { + __m128 data_l_single, data_h_single; + if(sizeof(pixel_t)==2) // word + { + // unaligned + __m128i src_p = _mm_loadu_si128(reinterpret_cast(src+begin2+i*8)); // uint16_t 8*16=128 8 pixels at a time + __m128i src_l = _mm_unpacklo_epi16(src_p, zero); // spread lower 4*uint16_t pixel value -> 4*32 bit + __m128i src_h = _mm_unpackhi_epi16(src_p, zero); // spread higher 4*uint16_t pixel value -> 4*32 bit + data_l_single = _mm_cvtepi32_ps (src_l); // Converts the four signed 32-bit integer values of a to single-precision, floating-point values. + data_h_single = _mm_cvtepi32_ps (src_h); + } + else { // float + // unaligned + data_l_single = _mm_loadu_ps(reinterpret_cast(src+begin2+i*8)); // float 4*32=128 4 pixels at a time + data_h_single = _mm_loadu_ps(reinterpret_cast(src+begin2+i*8+4)); // float 4*32=128 4 pixels at a time + } + __m128 coeff_l = /*loadps*/_mm_load_ps(reinterpret_cast(current_coeff)); // always aligned + __m128 coeff_h = /*loadps*/_mm_load_ps(reinterpret_cast(current_coeff+4)); // always aligned + __m128 dst_l = _mm_mul_ps(data_l_single, coeff_l); // Multiply by coefficient + __m128 dst_h = _mm_mul_ps(data_h_single, coeff_h); // 4*(32bit*32bit=32bit) + result2 = _mm_add_ps(result2, dst_l); // accumulate result. + result2 = _mm_add_ps(result2, dst_h); + + current_coeff += 8; + } + + // begin3, result3 + for (int i = 0; i < filter_size; i++) { + __m128 data_l_single, data_h_single; + if(sizeof(pixel_t)==2) // word + { + // unaligned + __m128i src_p = _mm_loadu_si128(reinterpret_cast(src+begin3+i*8)); // uint16_t 8*16=128 8 pixels at a time + __m128i src_l = _mm_unpacklo_epi16(src_p, zero); // spread lower 4*uint16_t pixel value -> 4*32 bit + __m128i src_h = _mm_unpackhi_epi16(src_p, zero); // spread higher 4*uint16_t pixel value -> 4*32 bit + data_l_single = _mm_cvtepi32_ps (src_l); // Converts the four signed 32-bit integer values of a to single-precision, floating-point values. + data_h_single = _mm_cvtepi32_ps (src_h); + } + else { // float + // unaligned + data_l_single = _mm_loadu_ps(reinterpret_cast(src+begin3+i*8)); // float 4*32=128 4 pixels at a time + data_h_single = _mm_loadu_ps(reinterpret_cast(src+begin3+i*8+4)); // float 4*32=128 4 pixels at a time + } + __m128 coeff_l = /*loadps*/_mm_load_ps(reinterpret_cast(current_coeff)); // always aligned + __m128 coeff_h = /*loadps*/_mm_load_ps(reinterpret_cast(current_coeff+4)); // always aligned + __m128 dst_l = _mm_mul_ps(data_l_single, coeff_l); // Multiply by coefficient + __m128 dst_h = _mm_mul_ps(data_h_single, coeff_h); // 4*(32bit*32bit=32bit) + result3 = _mm_add_ps(result3, dst_l); // accumulate result. + result3 = _mm_add_ps(result3, dst_h); + + current_coeff += 8; + } + + // begin4, result4 + for (int i = 0; i < filter_size; i++) { + __m128 data_l_single, data_h_single; + if(sizeof(pixel_t)==2) // word + { + // unaligned + __m128i src_p = _mm_loadu_si128(reinterpret_cast(src+begin4+i*8)); // uint16_t 8*16=128 8 pixels at a time + __m128i src_l = _mm_unpacklo_epi16(src_p, zero); // spread lower 4*uint16_t pixel value -> 4*32 bit + __m128i src_h = _mm_unpackhi_epi16(src_p, zero); // spread higher 4*uint16_t pixel value -> 4*32 bit + data_l_single = _mm_cvtepi32_ps (src_l); // Converts the four signed 32-bit integer values of a to single-precision, floating-point values. + data_h_single = _mm_cvtepi32_ps (src_h); + } + else { // float + // unaligned + data_l_single = _mm_loadu_ps(reinterpret_cast(src+begin4+i*8)); // float 4*32=128 4 pixels at a time + data_h_single = _mm_loadu_ps(reinterpret_cast(src+begin4+i*8+4)); // float 4*32=128 4 pixels at a time + } + __m128 coeff_l = /*loadps*/_mm_load_ps(reinterpret_cast(current_coeff)); // always aligned + __m128 coeff_h = /*loadps*/_mm_load_ps(reinterpret_cast(current_coeff+4)); // always aligned + __m128 dst_l = _mm_mul_ps(data_l_single, coeff_l); // Multiply by coefficient + __m128 dst_h = _mm_mul_ps(data_h_single, coeff_h); // 4*(32bit*32bit=32bit) + result4 = _mm_add_ps(result4, dst_l); // accumulate result. + result4 = _mm_add_ps(result4, dst_h); + + current_coeff += 8; + } + + __m128 result; + + // this part needs ssse3 + __m128 result12 = _mm_hadd_ps(result1, result2); + __m128 result34 = _mm_hadd_ps(result3, result4); + result = _mm_hadd_ps(result12, result34); + + if(sizeof(pixel_t)==2) // word + { + // Converts the four single-precision, floating-point values of a to signed 32-bit integer values. + __m128i result_4x_int32 = _mm_cvtps_epi32(result); // 4 * 32 bit integers + // SIMD Extensions 4 (SSE4) packus or simulation + __m128i result_4x_uint16 = hasSSE41 ? _mm_packus_epi32(result_4x_int32, zero) : (_MM_PACKUS_EPI32(result_4x_int32, zero)) ; // 4*32+zeros = lower 4*16 OK +#ifdef X86_32 + *((uint64_t *)(dst + x)) = _mm_cvtsi128_si32(result_4x_uint16) + (((__int64)_mm_cvtsi128_si32(_mm_srli_si128(result_4x_uint16, 4))) << 32); +#else + *((uint64_t *)(dst + x)) = _mm_cvtsi128_si64(result_4x_uint16); // 64 bit only +#endif + } + else { // float + // aligned + _mm_store_ps(reinterpret_cast(dst+x), result); // 4 results at a time + } + + } + + dst += dst_pitch; + src += src_pitch; + } +} + static void resizer_h_ssse3_generic(BYTE* dst, const BYTE* src, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int height) { int filter_size = AlignNumber(program->filter_size, 8) / 8; __m128i zero = _mm_setzero_si128(); @@ -659,9 +822,9 @@ static void resizer_h_ssse3_generic(BYTE* dst, const BYTE* src, int dst_pitch, i for (int i = 0; i < filter_size; i++) { __m128i data, coeff, current_result; - data = _mm_loadl_epi64(reinterpret_cast(src+begin1+i*8)); - data = _mm_unpacklo_epi8(data, zero); - coeff = _mm_load_si128(reinterpret_cast(current_coeff)); + data = _mm_loadl_epi64(reinterpret_cast(src+begin1+i*8)); // 8 * 8 bit pixels + data = _mm_unpacklo_epi8(data, zero); // make 8*16 bit pixels + coeff = _mm_load_si128(reinterpret_cast(current_coeff)); // 8 coeffs 14 bit scaled -> ushort OK current_result = _mm_madd_epi16(data, coeff); result1 = _mm_add_epi32(result1, current_result); @@ -1081,10 +1244,25 @@ ResamplerH FilteredResizeH::GetResampler(int CPU, bool aligned, int pixelsize, R return resize_h_c_planar; } } - else if (pixelsize == 2) { // todo: non_c - return resize_h_c_planar; + else if (pixelsize == 2) { + if (CPU & CPUF_SSSE3) { + resize_h_prepare_coeff_8(program, env); + if (CPU & CPUF_SSE4_1) + return resizer_h_ssse3_generic_int16_float; + else + return resizer_h_ssse3_generic_int16_float; + } else + return resize_h_c_planar; } else { //if (pixelsize == 4) - return resize_h_c_planar; + if (CPU & CPUF_SSSE3) { + resize_h_prepare_coeff_8(program, env); + //if (program->filter_size > 8) + if (CPU & CPUF_SSE4_1) + return resizer_h_ssse3_generic_int16_float; + else + return resizer_h_ssse3_generic_int16_float; + } else + return resize_h_c_planar; } } From cd782cac7a2b1ac0afc9c844e584671a57b5b628 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Thu, 1 Sep 2016 20:23:21 +0200 Subject: [PATCH 030/120] ColorYUV for 10-16 bit with native 10,12,14 bitdepth. New params: bits=10,12,14,16 for showyuv. bool showyuv_fullrange --- avs_core/filters/color.cpp | 407 ++++++++++++++++++++++++++----------- avs_core/filters/color.h | 7 +- 2 files changed, 295 insertions(+), 119 deletions(-) diff --git a/avs_core/filters/color.cpp b/avs_core/filters/color.cpp index 0d23d3dc9..0222434f7 100644 --- a/avs_core/filters/color.cpp +++ b/avs_core/filters/color.cpp @@ -41,64 +41,93 @@ #include #include #include "../core/internal.h" +#include -static void coloryuv_showyuv(BYTE* pY, BYTE* pU, BYTE* pV, int y_pitch, int u_pitch, int v_pitch, int frame, bool full_range) +static void coloryuv_showyuv(BYTE* pY, BYTE* pU, BYTE* pV, int y_pitch, int u_pitch, int v_pitch, int framenumber, bool full_range, int bits_per_pixel) { - const int frame_size = full_range ? 256 : 224; - const int luma_size = frame_size * 2; - const int chroma_offset = full_range ? 0 : 16; + int internal_bitdepth = bits_per_pixel == 8 ? 8 : 10; + int luma_min = (full_range ? 0 : 16) << (internal_bitdepth - 8); + int luma_max = ((full_range ? 256: 236) << (internal_bitdepth - 8)) - 1; - int luma; + int chroma_min = (full_range ? 0 : 16) << (internal_bitdepth - 8); + int chroma_max = ((full_range ? 256: 241) << (internal_bitdepth - 8)) - 1; + + int luma_range = luma_max - luma_min + 1; // 256/220 ,1024/880 + int chroma_range = chroma_max - chroma_min + 1; // 256/225 ,1024/900 + const int luma_size = chroma_range * 2; // YV12 subsampling + + int luma; // Calculate luma cycle - if (full_range) - { - luma = frame % 510; - if (luma > 255) - { - luma = 510 - luma; - } - } - else - { - luma = frame % 438; - if (luma > 219) - { - luma = 438 - luma; - } - luma += 16; - } + // 0,1..255,254,..1 = 2x256-2 + // 0,1..1023,1022,..1 = 2*1024-2 + luma = framenumber % (luma_range*2 -2); + if (luma > luma_range-1) + luma = (luma_range*2 -2) - luma; + luma += luma_min; // Set luma value for (int y = 0; y < luma_size; y++) { - memset(pY, luma, luma_size); - pY += y_pitch; + switch(bits_per_pixel) { + case 8: + memset(pY, luma, luma_size); break; + case 10: case 12: case 14: case 16: + std::fill_n((uint16_t *)pY, luma_size, luma << (bits_per_pixel-internal_bitdepth)); break; + case 32: + std::fill_n((float *)pY, luma_size, (float)luma / ((1 << internal_bitdepth) - 1)); break; + } + pY += y_pitch; } - // Set chroma - for (int y = 0; y < frame_size; y++) + for (int y = 0; y < chroma_range; y++) { - for (int x = 0; x < frame_size; x++) { - pU[x] = x + chroma_offset; + switch(bits_per_pixel) { + case 8: + for (int x = 0; x < chroma_range; x++) + pU[x] = x + chroma_min; + memset(pV, y + chroma_min, chroma_range); + break; + case 10: case 12: case 14: case 16: + for (int x = 0; x < chroma_range; x++) { + reinterpret_cast(pU)[x] = (x + chroma_min) << (bits_per_pixel - internal_bitdepth); } - - memset(pV, y + chroma_offset, frame_size); - - pU += u_pitch; - pV += v_pitch; + std::fill_n((uint16_t *)pV, chroma_range, (y + chroma_min) << (bits_per_pixel-internal_bitdepth)); + break; + case 32: + for (int x = 0; x < chroma_range; x++) { + reinterpret_cast(pU)[x] = (float)(x + chroma_min) / ((1 << internal_bitdepth) - 1); + } + std::fill_n((float *)pV, chroma_range, (float)(y + chroma_min) / ((1 << internal_bitdepth) - 1)); + break; + } + pU += u_pitch; + pV += v_pitch; } } -static void coloryuv_create_lut(BYTE* lut, const ColorYUVPlaneConfig* config) +// luts are only for integer bits 8/10/12/14/16. float will be realtime +template +static void coloryuv_create_lut(BYTE* lut8, const ColorYUVPlaneConfig* config, int bits_per_pixel, bool clamp_on_tv_range, bool scale_is_256) { - const double scale = 256; + pixel_t *lut = reinterpret_cast(lut8); + + // to be decided that parameters are scaled with 256 (legacy 8 bit behaviour) or + // bit-depth dependent + const double value_scale = (1 << bits_per_pixel); // scale is 256/1024/4096/16384/65536 + const double scale_param = scale_is_256 ? 256 : (1 << bits_per_pixel); // scale is 256/1024/4096/16384/65536 + + const int lookup_size = (1 << bits_per_pixel); // 256, 1024, 4096, 16384, 65536 + const int pixel_max = lookup_size - 1; + int tv_range_low = 16 << (bits_per_pixel - 8); + int tv_range_hi_chroma = ((240+1) << (bits_per_pixel - 8)) - 1; // 16-240,64–963, 256–3855,... 4096-61695 + int tv_range_hi_luma = ((235+1) << (bits_per_pixel - 8)) - 1; - double gain = config->gain / scale + 1.0; - double contrast = config->contrast / scale + 1.0; - double gamma = config->gamma / scale + 1.0; - double offset = config->offset / scale; + double gain = config->gain / scale_param + 1.0; + double contrast = config->contrast / scale_param + 1.0; + double gamma = config->gamma / scale_param + 1.0; + double offset = config->offset / scale_param; int range = config->range; if (range == COLORYUV_RANGE_PC_TVY) @@ -114,28 +143,30 @@ static void coloryuv_create_lut(BYTE* lut, const ColorYUVPlaneConfig* config) { if (config->plane == PLANAR_Y) { - range_factor = 219.0 / 255.0; + // 8 bit 219 = 235-16, 10 bit: 64–963 + range_factor = (tv_range_hi_luma - tv_range_low) / (double)pixel_max; // 219.0 / 255.0 } else { - range_factor = 224.0 / 255.0; + // 224 = 240-16 + range_factor = (tv_range_hi_chroma - tv_range_low) / (double)pixel_max; } } else { if (config->plane == PLANAR_Y) { - range_factor = 255.0 / 219.0; + range_factor = (double)pixel_max / (tv_range_hi_luma - tv_range_low); // 255.0 / 219.0 } else { - range_factor = 255.0 / 224.0; + range_factor = (double)pixel_max / (tv_range_hi_chroma - tv_range_low); // 255.0 / 224.0 } } } - for (int i = 0; i < 256; i++) { - double value = double(i) / 256.0; + for (int i = 0; i < lookup_size; i++) { + double value = double(i) / value_scale; // Applying gain value *= gain; @@ -152,36 +183,38 @@ static void coloryuv_create_lut(BYTE* lut, const ColorYUVPlaneConfig* config) value = pow(value, 1.0 / gamma); } - value *= 256.0; + value *= value_scale; // Range conversion if (range == COLORYUV_RANGE_PC_TV) { - value = value*range_factor + 16.0; + value = value*range_factor + tv_range_low; // v*range - 16 } else if (range == COLORYUV_RANGE_TV_PC) { - value = (value - 16.0) * range_factor; + value = (value - tv_range_low) * range_factor; // (v-16)*range } // Convert back to int int iValue = int(value); // Clamp - iValue = clamp(iValue, 0, 255); + iValue = clamp(iValue, 0, pixel_max); - if (config->clip_tv) + if (config->clip_tv && clamp_on_tv_range) // avs+: clamp on tv range { - iValue = clamp(iValue, 16, config->plane == PLANAR_Y ? 235 : 240); + //iValue = clamp(iValue, tv_range_low, config->plane == PLANAR_Y ? tv_range_hi_luma : tv_range_hi_chroma); } lut[i] = iValue; } } -static void coloryuv_analyse_core(const int* freq, const int pixel_num, ColorYUVPlaneData* data) +static void coloryuv_analyse_core(const int* freq, const int pixel_num, ColorYUVPlaneData* data, int bits_per_pixel) { - const int pixel_256th = pixel_num / 256; // For loose max/min + int pixel_value_count = 1 << bits_per_pixel; // size of freq table + + const int pixel_256th = pixel_num / 256; // For loose max/min yes, still 1/256! double avg = 0.0; data->real_min = -1; @@ -191,9 +224,9 @@ static void coloryuv_analyse_core(const int* freq, const int pixel_num, ColorYUV int px_min_c = 0, px_max_c = 0; - for (int i = 0; i < 256; i++) + for (int i = 0; i < pixel_value_count; i++) { - avg += freq[i] * i; + avg += freq[i] * double(i); if (freq[i] > 0 && data->real_min == -1) { @@ -210,18 +243,18 @@ static void coloryuv_analyse_core(const int* freq, const int pixel_num, ColorYUV } } - if (freq[255 - i] > 0 && data->real_max == -1) + if (freq[pixel_value_count-1 - i] > 0 && data->real_max == -1) { - data->real_max = 255 - i; + data->real_max = pixel_value_count-1 - i; } if (data->loose_max == -1) { - px_max_c += freq[255 - i]; + px_max_c += freq[pixel_value_count-1 - i]; if (px_max_c > pixel_256th) { - data->loose_max = 255 - i; + data->loose_max = pixel_value_count-1 - i; } } } @@ -230,22 +263,62 @@ static void coloryuv_analyse_core(const int* freq, const int pixel_num, ColorYUV data->average = avg; } -static void coloryuv_analyse_planar(const BYTE* pSrc, int src_pitch, int width, int height, ColorYUVPlaneData* data) +static void coloryuv_analyse_planar(const BYTE* pSrc, int src_pitch, int width, int height, ColorYUVPlaneData* data, int bits_per_pixel) { - int freq[256]; - memset(freq, 0, sizeof(freq)); + int bits_per_pixel_for_freq = bits_per_pixel <= 16 ? bits_per_pixel : 16; + int statistics_size = 1 << bits_per_pixel_for_freq; // float: 65536 + int *freq = new int[statistics_size]; + std::fill_n(freq, statistics_size, 0); + + if(bits_per_pixel==8) { + for (int y = 0; y < height; y++) + { + for (int x = 0; x < width; x++) + { + freq[pSrc[x]]++; + } + + pSrc += src_pitch; + } + } + else if (bits_per_pixel >= 10 && bits_per_pixel <= 14) { + uint16_t mask = statistics_size - 1; // e.g. 0x3FF for 10 bit + for (int y = 0; y < height; y++) + { + for (int x = 0; x < width; x++) + { + freq[clamp(reinterpret_cast(pSrc)[x],(uint16_t)0,mask)]++; + } - for (int y = 0; y < height; y++) - { + pSrc += src_pitch; + } + } + else if (bits_per_pixel == 16) { + // no clamp, faster + for (int y = 0; y < height; y++) + { for (int x = 0; x < width; x++) { - freq[pSrc[x]]++; + freq[reinterpret_cast(pSrc)[x]]++; } pSrc += src_pitch; + } + } else if(bits_per_pixel==32) { + for (int y = 0; y < height; y++) + { + for (int x = 0; x < width; x++) + { + freq[clamp((int)(65535.0f*reinterpret_cast(pSrc)[x]), 0, 65535)]++; + } + + pSrc += src_pitch; + } } - coloryuv_analyse_core(freq, width*height, data); + coloryuv_analyse_core(freq, width*height, data, bits_per_pixel_for_freq); + + delete[] freq; } static void coloryuv_analyse_yuy2(const BYTE* pSrc, int src_pitch, int width, int height, ColorYUVPlaneData* dataY, ColorYUVPlaneData* dataU, ColorYUVPlaneData* dataV) @@ -268,42 +341,86 @@ static void coloryuv_analyse_yuy2(const BYTE* pSrc, int src_pitch, int width, in pSrc += src_pitch; } - coloryuv_analyse_core(freqY, width*height, dataY); - coloryuv_analyse_core(freqU, width*height/2, dataU); - coloryuv_analyse_core(freqV, width*height/2, dataV); + coloryuv_analyse_core(freqY, width*height, dataY, 1); + coloryuv_analyse_core(freqU, width*height/2, dataU, 1); + coloryuv_analyse_core(freqV, width*height/2, dataV, 1); } -static void coloryuv_autogain(const ColorYUVPlaneData* dY, const ColorYUVPlaneData* dU, const ColorYUVPlaneData* dV, ColorYUVPlaneConfig* cY, ColorYUVPlaneConfig* cU, ColorYUVPlaneConfig* cV) +static void coloryuv_autogain(const ColorYUVPlaneData* dY, const ColorYUVPlaneData* dU, const ColorYUVPlaneData* dV, ColorYUVPlaneConfig* cY, ColorYUVPlaneConfig* cU, ColorYUVPlaneConfig* cV, + int bits_per_pixel, bool scale_is_256) { - int maxY = min(dY->loose_max, 236); - int minY = max(dY->loose_min, 16); + const double scale_param = scale_is_256 ? 256 : (1 << bits_per_pixel); // scale is 256/1024/4096/16384/65536 + int bits_per_pixel_for_freq = bits_per_pixel <= 16 ? bits_per_pixel : 16; // for float: like uint16_t + // always 16..235 + int loose_max_limit = (235 + 1) << (bits_per_pixel_for_freq - 8); + int loose_min_limit = 16 << (bits_per_pixel_for_freq - 8); + double gain_corr = 1 << bits_per_pixel_for_freq; + int maxY = min(dY->loose_max, loose_max_limit); + int minY = max(dY->loose_min, loose_min_limit); int range = maxY - minY; if (range > 0) { - double scale = 220.0 / range; - cY->offset = 16.0 - scale*minY; - cY->gain = 256.0*scale - 256.0; + double scale = double(loose_max_limit - loose_min_limit) / range; + cY->offset = (loose_min_limit - scale*minY) / (1<<(bits_per_pixel-8)) * 256 / scale_param; + cY->gain = scale_param * (scale - 1.0); } } -static void coloryuv_autowhite(const ColorYUVPlaneData* dY, const ColorYUVPlaneData* dU, const ColorYUVPlaneData* dV, ColorYUVPlaneConfig* cY, ColorYUVPlaneConfig* cU, ColorYUVPlaneConfig* cV) +static void coloryuv_autowhite(const ColorYUVPlaneData* dY, const ColorYUVPlaneData* dU, const ColorYUVPlaneData* dV, ColorYUVPlaneConfig* cY, ColorYUVPlaneConfig* cU, ColorYUVPlaneConfig* cV, + int bits_per_pixel, bool scale_is_256) { - cU->offset = 127 - dU->average; - cV->offset = 127 - dV->average; + double middle; + const double scale_param = scale_is_256 ? 256 : (1 << bits_per_pixel); // scale is 256/1024/4096/16384/65536 + middle = (1 << (bits_per_pixel - 1)) - 1; // 128-1, 2048-1 ... + cU->offset = (middle - dU->average) / (1<<(bits_per_pixel-8)) * 256 / scale_param; + cV->offset = (middle - dV->average) / (1<<(bits_per_pixel-8)) * 256 / scale_param; } -static void coloryuv_apply_lut_planar(BYTE* pDst, const BYTE* pSrc, int dst_pitch, int src_pitch, int width, int height, const BYTE* lut) +// only for integer samples +static void coloryuv_apply_lut_planar(BYTE* pDst, const BYTE* pSrc, int dst_pitch, int src_pitch, int width, int height, const BYTE* lut, int bits_per_pixel) { - for (int y = 0; y < height; y++) + if(bits_per_pixel==8) { + for (int y = 0; y < height; y++) + { + for (int x = 0; x < width; x++) + { + pDst[x] = lut[pSrc[x]]; + } + + pSrc += src_pitch; + pDst += dst_pitch; + } + } + else if (bits_per_pixel >= 10 && bits_per_pixel <= 14) { + uint16_t max_pixel_value = (1 << bits_per_pixel) - 1; + // protection needed for lut + for (int y = 0; y < height; y++) + { + for (int x = 0; x < width; x++) + { + uint16_t pixel = reinterpret_cast(pSrc)[x]; + pixel = pixel <= max_pixel_value ? pixel : max_pixel_value; + reinterpret_cast(pDst)[x] = reinterpret_cast(lut)[pixel]; + } + + pSrc += src_pitch; + pDst += dst_pitch; + } + } + else if (bits_per_pixel == 16) { + // no protection, faster + for (int y = 0; y < height; y++) + { for (int x = 0; x < width; x++) { - pDst[x] = lut[pSrc[x]]; + reinterpret_cast(pDst)[x] = reinterpret_cast(lut)[reinterpret_cast(pSrc)[x]]; } pSrc += src_pitch; pDst += dst_pitch; + } } } @@ -356,19 +473,21 @@ static void coloryuv_read_conditional(IScriptEnvironment* env, ColorYUVPlaneConf #undef READ_CONDITIONAL ColorYUV::ColorYUV(PClip child, - double gain_y, double offset_y, double gamma_y, double contrast_y, - double gain_u, double offset_u, double gamma_u, double contrast_u, - double gain_v, double offset_v, double gamma_v, double contrast_v, - const char* level, const char* opt, - bool colorbar, bool analyse, bool autowhite, bool autogain, bool conditional, - IScriptEnvironment* env) + double gain_y, double offset_y, double gamma_y, double contrast_y, + double gain_u, double offset_u, double gamma_u, double contrast_u, + double gain_v, double offset_v, double gamma_v, double contrast_v, + const char* level, const char* opt, + bool showyuv, bool analyse, bool autowhite, bool autogain, bool conditional, + int bits, bool showyuv_fullrange, + IScriptEnvironment* env) : GenericVideoFilter(child), - colorbar(colorbar), analyse(analyse), autowhite(autowhite), autogain(autogain), conditional(conditional) + colorbar_bits(showyuv ? bits : 0), analyse(analyse), autowhite(autowhite), autogain(autogain), conditional(conditional), colorbar_fullrange(showyuv_fullrange) { - if (!vi.IsYUV()) - { + if (!vi.IsYUV() && !vi.IsYUVA()) env->ThrowError("ColorYUV: Only work with YUV colorspace."); - } + + if (vi.ComponentSize() == 4) + env->ThrowError("ColorYUV: float pixel type is not supported yet."); configY.gain = gain_y; configY.offset = offset_y; @@ -411,6 +530,10 @@ ColorYUV::ColorYUV(PClip child, { env->ThrowError("ColorYUV: invalid parameter : levels"); } + else { + // avs+: missing init to none + configV.range = configU.range = configY.range = COLORYUV_RANGE_NONE; + } // Option if (lstrcmpi(opt, "coring") == 0) @@ -424,31 +547,52 @@ ColorYUV::ColorYUV(PClip child, env->ThrowError("ColorYUV: invalid parameter : opt"); } - if (colorbar) + if(showyuv && colorbar_bits !=8 && colorbar_bits != 10 && colorbar_bits != 12 && colorbar_bits != 14 && colorbar_bits != 16) + env->ThrowError("ColorYUV: bits parameter for showyuv must be 8, 10, 12, 14 or 16"); + + if (colorbar_bits>0 && showyuv) { - vi.width = 224 * 2; - vi.height = 224 * 2; - vi.pixel_type = VideoInfo::CS_YV12; + // pre-avs+: coloryuv_showyuv is always called with full_range false + int chroma_range = colorbar_fullrange ? 256 : (240 - 16 + 1); // 0..255, 16..240 + // size limited to either 8 or 10 bits, independenly of 12/14/16 bit-depth + vi.width = (colorbar_bits == 8 ? chroma_range : (chroma_range*4)) * 2; + vi.height = vi.width; + switch (colorbar_bits) { + case 8: vi.pixel_type = VideoInfo::CS_YV12; break; + case 10: vi.pixel_type = VideoInfo::CS_YUV420P10; break; + case 12: vi.pixel_type = VideoInfo::CS_YUV420P12; break; + case 14: vi.pixel_type = VideoInfo::CS_YUV420P14; break; + case 16: vi.pixel_type = VideoInfo::CS_YUV420P16; break; + } } } PVideoFrame __stdcall ColorYUV::GetFrame(int n, IScriptEnvironment* env) { - if (colorbar) + if (colorbar_bits>0) { PVideoFrame dst = env->NewVideoFrame(vi); - coloryuv_showyuv(dst->GetWritePtr(), dst->GetWritePtr(PLANAR_U), dst->GetWritePtr(PLANAR_V), dst->GetPitch(), dst->GetPitch(PLANAR_U), dst->GetPitch(PLANAR_V), n, false); + // pre AVS+: full_range is always false + // AVS+: showyuv_fullrange bool parameter + // AVS+: bits parameter + coloryuv_showyuv(dst->GetWritePtr(), dst->GetWritePtr(PLANAR_U), dst->GetWritePtr(PLANAR_V), dst->GetPitch(), dst->GetPitch(PLANAR_U), dst->GetPitch(PLANAR_V), n, colorbar_fullrange, colorbar_bits); return dst; } PVideoFrame src = child->GetFrame(n, env); PVideoFrame dst = env->NewVideoFrame(vi); + int pixelsize = vi.ComponentSize(); + int bits_per_pixel = vi.BitsPerComponent(); + ColorYUVPlaneConfig // Yes, we copy these struct cY = configY, cU = configU, cV = configV; + bool clamp_on_tv_range = true; // rfu + bool param_scale_is_256 = true; //rfu + // for analysing data char text[512]; @@ -462,20 +606,20 @@ PVideoFrame __stdcall ColorYUV::GetFrame(int n, IScriptEnvironment* env) } else { - coloryuv_analyse_planar(src->GetReadPtr(), src->GetPitch(), vi.width, vi.height, &dY); - if (!vi.IsY8()) + coloryuv_analyse_planar(src->GetReadPtr(), src->GetPitch(), vi.width, vi.height, &dY, bits_per_pixel); + if (!vi.IsY()) { const int width = vi.width >> vi.GetPlaneWidthSubsampling(PLANAR_U); const int height = vi.height >> vi.GetPlaneHeightSubsampling(PLANAR_U); - coloryuv_analyse_planar(src->GetReadPtr(PLANAR_U), src->GetPitch(PLANAR_U), width, height, &dU); - coloryuv_analyse_planar(src->GetReadPtr(PLANAR_V), src->GetPitch(PLANAR_V), width, height, &dV); + coloryuv_analyse_planar(src->GetReadPtr(PLANAR_U), src->GetPitch(PLANAR_U), width, height, &dU, bits_per_pixel); + coloryuv_analyse_planar(src->GetReadPtr(PLANAR_V), src->GetPitch(PLANAR_V), width, height, &dV, bits_per_pixel); } } if (analyse) { - if (!vi.IsY8()) + if (!vi.IsY()) { sprintf(text, " Frame: %-8u ( Luma Y / ChromaU / ChromaV )\n" @@ -511,27 +655,47 @@ PVideoFrame __stdcall ColorYUV::GetFrame(int n, IScriptEnvironment* env) } } - if (autowhite && !vi.IsY8()) + if (autowhite && !vi.IsY()) { - coloryuv_autowhite(&dY, &dU, &dV, &cY, &cU, &cV); + coloryuv_autowhite(&dY, &dU, &dV, &cY, &cU, &cV, bits_per_pixel, param_scale_is_256); } if (autogain) { - coloryuv_autogain(&dY, &dU, &dV, &cY, &cU, &cV); + coloryuv_autogain(&dY, &dU, &dV, &cY, &cU, &cV, bits_per_pixel, param_scale_is_256); } } // Read conditional variables coloryuv_read_conditional(env, &cY, &cU, &cV); - BYTE lutY[256], lutU[256], lutV[256]; + int lut_size = pixelsize*(1 << bits_per_pixel); // 256*1 / 1024*2 .. 65536*2 + BYTE *lutY = nullptr; + BYTE *lutU = nullptr; + BYTE *lutV = nullptr; + + if(pixelsize==1 || pixelsize==2) { + // no float lut. if ever, float will be realtime + lutY = new BYTE[lut_size]; + lutU = new BYTE[lut_size]; + lutV = new BYTE[lut_size]; + } - coloryuv_create_lut(lutY, &cY); - if (!vi.IsY8()) - { - coloryuv_create_lut(lutU, &cU); - coloryuv_create_lut(lutV, &cV); + if(pixelsize==1) { + coloryuv_create_lut(lutY, &cY, bits_per_pixel, clamp_on_tv_range, param_scale_is_256); + if (!vi.IsY()) + { + coloryuv_create_lut(lutU, &cU, bits_per_pixel, clamp_on_tv_range, param_scale_is_256); + coloryuv_create_lut(lutV, &cV, bits_per_pixel, clamp_on_tv_range, param_scale_is_256); + } + } + else if (pixelsize==2) { // pixelsize==2 + coloryuv_create_lut(lutY, &cY, bits_per_pixel, clamp_on_tv_range, param_scale_is_256); + if (!vi.IsY()) + { + coloryuv_create_lut(lutU, &cU, bits_per_pixel, clamp_on_tv_range, param_scale_is_256); + coloryuv_create_lut(lutV, &cV, bits_per_pixel, clamp_on_tv_range, param_scale_is_256); + } } if (vi.IsYUY2()) @@ -539,15 +703,18 @@ PVideoFrame __stdcall ColorYUV::GetFrame(int n, IScriptEnvironment* env) coloryuv_apply_lut_yuy2(dst->GetWritePtr(), src->GetReadPtr(), dst->GetPitch(), src->GetPitch(), vi.width, vi.height, lutY, lutU, lutV); } else - { - coloryuv_apply_lut_planar(dst->GetWritePtr(), src->GetReadPtr(), dst->GetPitch(), src->GetPitch(), vi.width, vi.height, lutY); - if (!vi.IsY8()) + { + coloryuv_apply_lut_planar(dst->GetWritePtr(), src->GetReadPtr(), dst->GetPitch(), src->GetPitch(), vi.width, vi.height, lutY, bits_per_pixel); + if (!vi.IsY()) { const int width = vi.width >> vi.GetPlaneWidthSubsampling(PLANAR_U); const int height = vi.height >> vi.GetPlaneHeightSubsampling(PLANAR_U); - coloryuv_apply_lut_planar(dst->GetWritePtr(PLANAR_U), src->GetReadPtr(PLANAR_U), dst->GetPitch(PLANAR_U), src->GetPitch(PLANAR_U), width, height, lutU); - coloryuv_apply_lut_planar(dst->GetWritePtr(PLANAR_V), src->GetReadPtr(PLANAR_V), dst->GetPitch(PLANAR_V), src->GetPitch(PLANAR_V), width, height, lutV); + coloryuv_apply_lut_planar(dst->GetWritePtr(PLANAR_U), src->GetReadPtr(PLANAR_U), dst->GetPitch(PLANAR_U), src->GetPitch(PLANAR_U), width, height, lutU, bits_per_pixel); + coloryuv_apply_lut_planar(dst->GetWritePtr(PLANAR_V), src->GetReadPtr(PLANAR_V), dst->GetPitch(PLANAR_V), src->GetPitch(PLANAR_V), width, height, lutV, bits_per_pixel); + } + if(vi.IsYUVA()) { + } } @@ -556,6 +723,9 @@ PVideoFrame __stdcall ColorYUV::GetFrame(int n, IScriptEnvironment* env) env->ApplyMessage(&dst, vi, text, vi.width / 4, 0xa0a0a0, 0, 0); } + if (lutY) delete[] lutY; + if (lutU) delete[] lutU; + if (lutV) delete[] lutV; return dst; } @@ -582,6 +752,8 @@ AVSValue __cdecl ColorYUV::Create(AVSValue args, void*, IScriptEnvironment* env) args[18].AsBool(false), // autowhite args[19].AsBool(false), // autogain args[20].AsBool(false), // conditional + args[21].AsInt(8), // bits avs+ + args[22].AsBool(false), // showyuv_fullrange avs+ env); } @@ -591,7 +763,8 @@ extern const AVSFunction Color_filters[] = { "[gain_u]f[off_u]f[gamma_u]f[cont_u]f" \ "[gain_v]f[off_v]f[gamma_v]f[cont_v]f" \ "[levels]s[opt]s[matrix]s[showyuv]b" \ - "[analyze]b[autowhite]b[autogain]b[conditional]b", + "[analyze]b[autowhite]b[autogain]b[conditional]" \ + "b[bits]i[showyuv_fullrange]b", ColorYUV::Create }, { 0 } }; diff --git a/avs_core/filters/color.h b/avs_core/filters/color.h index e8b178544..c270e5d65 100644 --- a/avs_core/filters/color.h +++ b/avs_core/filters/color.h @@ -68,7 +68,8 @@ class ColorYUV : public GenericVideoFilter double gain_u, double offset_u, double gamma_u, double contrast_u, double gain_v, double offset_v, double gamma_v, double contrast_v, const char* level, const char* opt, - bool colorbar, bool analyse, bool autowhite, bool autogain, bool conditional, + bool showyuv, bool analyse, bool autowhite, bool autogain, bool conditional, + int bits, bool showyuv_fullrange, // avs+ IScriptEnvironment* env); PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment* env); @@ -82,7 +83,9 @@ class ColorYUV : public GenericVideoFilter private: ColorYUVPlaneConfig configY, configU, configV; - bool colorbar, analyse, autowhite, autogain, conditional; + int colorbar_bits; + bool colorbar_fullrange; + bool analyse, autowhite, autogain, conditional; }; #endif // __Color_h From fab87f47966a3536a6b903c8bd0d9b8102e7592c Mon Sep 17 00:00:00 2001 From: Pinterf Date: Thu, 1 Sep 2016 20:26:09 +0200 Subject: [PATCH 031/120] Text overlay native 10-12-14 bit-aware. Info() always calculates real rendering size Default font style change to FIXED if no font found by name --- avs_core/filters/text-overlay.cpp | 37 ++++++++++++++++--------------- 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/avs_core/filters/text-overlay.cpp b/avs_core/filters/text-overlay.cpp index 47d1d087c..f031caafd 100644 --- a/avs_core/filters/text-overlay.cpp +++ b/avs_core/filters/text-overlay.cpp @@ -49,7 +49,8 @@ static HFONT LoadFont(const char name[], int size, bool bold, bool italic, int w { return CreateFont( size, width, angle, angle, bold ? FW_BOLD : FW_NORMAL, italic, FALSE, FALSE, DEFAULT_CHARSET, OUT_DEFAULT_PRECIS, - CLIP_DEFAULT_PRECIS, DEFAULT_QUALITY, FF_DONTCARE | DEFAULT_PITCH, name ); + CLIP_DEFAULT_PRECIS, DEFAULT_QUALITY, FF_DONTCARE | FIXED_PITCH /*FF_DONTCARE | DEFAULT_PITCH*/, name ); + // avs+: force fixed pitch when font is not found by name } /******************************************************************** @@ -184,7 +185,7 @@ void Antialiaser::Apply( const VideoInfo& vi, PVideoFrame* frame, int pitch) (*frame)->GetWritePtr(PLANAR_U), (*frame)->GetWritePtr(PLANAR_V) ); else if (vi.NumComponents() == 1) // Y8, Y16, Y32 - ApplyPlanar((*frame)->GetWritePtr(), pitch, 0, 0, 0, 0, 0, vi.ComponentSize()); + ApplyPlanar((*frame)->GetWritePtr(), pitch, 0, 0, 0, 0, 0, vi.BitsPerComponent()); else if (vi.IsPlanar()) { if(vi.IsPlanarRGB() || vi.IsPlanarRGBA()) // color are OK if plane order is sent as G R B @@ -194,15 +195,15 @@ void Antialiaser::Apply( const VideoInfo& vi, PVideoFrame* frame, int pitch) (*frame)->GetWritePtr(PLANAR_B), vi.GetPlaneWidthSubsampling(PLANAR_G), // no subsampling vi.GetPlaneHeightSubsampling(PLANAR_G), - vi.ComponentSize() ); + vi.BitsPerComponent() ); else ApplyPlanar((*frame)->GetWritePtr(), pitch, - (*frame)->GetPitch(PLANAR_U), - (*frame)->GetWritePtr(PLANAR_U), - (*frame)->GetWritePtr(PLANAR_V), - vi.GetPlaneWidthSubsampling(PLANAR_U), - vi.GetPlaneHeightSubsampling(PLANAR_U), - vi.ComponentSize()); + (*frame)->GetPitch(PLANAR_U), + (*frame)->GetWritePtr(PLANAR_U), + (*frame)->GetWritePtr(PLANAR_V), + vi.GetPlaneWidthSubsampling(PLANAR_U), + vi.GetPlaneHeightSubsampling(PLANAR_U), + vi.BitsPerComponent()); } } @@ -249,7 +250,7 @@ void Antialiaser::ApplyYV12(BYTE* buf, int pitch, int pitchUV, BYTE* bufU, BYTE* } -void Antialiaser::ApplyPlanar(BYTE* buf, int pitch, int pitchUV, BYTE* bufU, BYTE* bufV, int shiftX, int shiftY, int pixelsize) { +void Antialiaser::ApplyPlanar(BYTE* buf, int pitch, int pitchUV, BYTE* bufU, BYTE* bufV, int shiftX, int shiftY, int bits_per_pixel) { const int stepX = 1<= 10 && bits_per_pixel <= 16) { // uint16_t for (int y=yb; y<=yt; y+=1) { for (int x=xl; x<=xr; x+=1) { const int x4 = x<<2; const int basealpha = alpha[x4+0]; if (basealpha != 256) { - reinterpret_cast(buf)[x] = (uint16_t)((reinterpret_cast(buf)[x] * basealpha + ((int)alpha[x4 + 3] << 8)) >> 8); + reinterpret_cast(buf)[x] = (uint16_t)((reinterpret_cast(buf)[x] * basealpha + ((int)alpha[x4 + 3] << (bits_per_pixel-8))) >> 8); } } buf += pitch; @@ -317,7 +318,7 @@ void Antialiaser::ApplyPlanar(BYTE* buf, int pitch, int pitchUV, BYTE* bufU, BYT bufV += (pitchUV*yb)>>shiftY; // different paths for different bitdepth - if(pixelsize == 1) { + if(bits_per_pixel == 8) { for (int y=yb; y<=yt; y+=stepY) { for (int x=xl, xs=xlshiftX; x<=xr; x+=stepX, xs+=1) { unsigned short* UValpha = alpha + x*4; @@ -342,7 +343,7 @@ void Antialiaser::ApplyPlanar(BYTE* buf, int pitch, int pitchUV, BYTE* bufU, BYT alpha += UVw4; }//end for y } - else if (pixelsize == 2) { // uint16_t + else if (bits_per_pixel >= 10 && bits_per_pixel <= 16) { // uint16_t for (int y=yb; y<=yt; y+=stepY) { for (int x=xl, xs=xlshiftX; x<=xr; x+=stepX, xs+=1) { unsigned short* UValpha = alpha + x*4; @@ -358,8 +359,8 @@ void Antialiaser::ApplyPlanar(BYTE* buf, int pitch, int pitchUV, BYTE* bufU, BYT UValpha += w4; } if (basealphaUV != skipThresh) { - reinterpret_cast(bufU)[xs] = (uint16_t)((reinterpret_cast(bufU)[xs] * basealphaUV + (au << 8)) >> shifter); - reinterpret_cast(bufV)[xs] = (uint16_t)((reinterpret_cast(bufV)[xs] * basealphaUV + (av << 8)) >> shifter); + reinterpret_cast(bufU)[xs] = (uint16_t)((reinterpret_cast(bufU)[xs] * basealphaUV + (au << (bits_per_pixel-8))) >> shifter); + reinterpret_cast(bufV)[xs] = (uint16_t)((reinterpret_cast(bufV)[xs] * basealphaUV + (av << (bits_per_pixel-8))) >> shifter); } }// end for x bufU += pitchUV; @@ -1509,7 +1510,7 @@ PVideoFrame FilterInfo::GetFrame(int n, IScriptEnvironment* env) // More flexible way: get text extent RECT r; - if(!font_override) + if(false && !font_override) { // To prevent slowish full MxN rendering, we calculate a dummy // 1xN sized vertical and a Mx1 sized horizontal line extent From 74116e7b79bf22454b4ba92ef231cdc2c51c39c5 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Thu, 1 Sep 2016 20:26:55 +0200 Subject: [PATCH 032/120] Blur/Sharpen: RGB48/64 and Planar RGB(A) --- avs_core/filters/focus.cpp | 193 ++++++++++++++++++++++--------------- 1 file changed, 115 insertions(+), 78 deletions(-) diff --git a/avs_core/filters/focus.cpp b/avs_core/filters/focus.cpp index d172e5199..9636515e8 100644 --- a/avs_core/filters/focus.cpp +++ b/avs_core/filters/focus.cpp @@ -320,10 +320,14 @@ PVideoFrame __stdcall AdjustFocusV::GetFrame(int n, IScriptEnvironment* env) env2->ThrowError("AdjustFocusV: Could not reserve memory."); } + int pixelsize = vi.ComponentSize(); + if (vi.IsPlanar()) { - int pixelsize = vi.ComponentSize(); - const int planes[3] = { PLANAR_Y, PLANAR_U, PLANAR_V }; - for (int cplane = 0; cplane < 3; cplane++) { + const int planesYUV[4] = { PLANAR_Y, PLANAR_U, PLANAR_V, PLANAR_A}; + const int planesRGB[4] = { PLANAR_G, PLANAR_B, PLANAR_R, PLANAR_A}; + const int *planes = vi.IsYUV() || vi.IsYUVA() ? planesYUV : planesRGB; + + for (int cplane = 0; cplane < 3; cplane++) { int plane = planes[cplane]; BYTE* dstp = src->GetWritePtr(plane); int pitch = src->GetPitch(plane); @@ -345,8 +349,10 @@ PVideoFrame __stdcall AdjustFocusV::GetFrame(int n, IScriptEnvironment* env) int row_size = vi.RowSize(); int height = vi.height; memcpy(line_buf, dstp, row_size); // First row - map centre as upper - - af_vertical_process(line_buf, dstp, height, pitch, row_size, half_amount, env); + if (pixelsize == 1) + af_vertical_process(line_buf, dstp, height, pitch, row_size, half_amount, env); + else + af_vertical_process(line_buf, dstp, height, pitch, row_size, half_amount, env); } env2->Free(line_buf); @@ -364,41 +370,50 @@ AdjustFocusH::AdjustFocusH(double _amount, PClip _child) // Blur/Sharpen Horizontal RGB32 C++ Code // -------------------------------------- -static __forceinline void af_horizontal_rgb32_process_line_c(BYTE b_left, BYTE g_left, BYTE r_left, BYTE a_left, BYTE *dstp, size_t width, int center_weight, int outer_weight) { +template +static __forceinline void af_horizontal_rgb32_process_line_c(pixel_t b_left, pixel_t g_left, pixel_t r_left, pixel_t a_left, pixel_t *dstp, size_t width, weight_t center_weight, weight_t outer_weight) { size_t x; for (x = 0; x < width-1; ++x) { - BYTE b = ScaledPixelClip(dstp[x*4+0] * center_weight + (b_left + dstp[x*4+4]) * outer_weight); + pixel_t b = ScaledPixelClip((weight_t)(dstp[x*4+0] * center_weight + (b_left + dstp[x*4+4]) * outer_weight)); b_left = dstp[x*4+0]; dstp[x*4+0] = b; - BYTE g = ScaledPixelClip(dstp[x*4+1] * center_weight + (g_left + dstp[x*4+5]) * outer_weight); + pixel_t g = ScaledPixelClip((weight_t)(dstp[x*4+1] * center_weight + (g_left + dstp[x*4+5]) * outer_weight)); g_left = dstp[x*4+1]; dstp[x*4+1] = g; - BYTE r = ScaledPixelClip(dstp[x*4+2] * center_weight + (r_left + dstp[x*4+6]) * outer_weight); + pixel_t r = ScaledPixelClip((weight_t)(dstp[x*4+2] * center_weight + (r_left + dstp[x*4+6]) * outer_weight)); r_left = dstp[x*4+2]; dstp[x*4+2] = r; - BYTE a = ScaledPixelClip(dstp[x*4+3] * center_weight + (a_left + dstp[x*4+7]) * outer_weight); + pixel_t a = ScaledPixelClip((weight_t)(dstp[x*4+3] * center_weight + (a_left + dstp[x*4+7]) * outer_weight)); a_left = dstp[x*4+3]; dstp[x*4+3] = a; } - dstp[x*4+0] = ScaledPixelClip(dstp[x*4+0] * center_weight + (b_left + dstp[x*4+0]) * outer_weight); - dstp[x*4+1] = ScaledPixelClip(dstp[x*4+1] * center_weight + (g_left + dstp[x*4+1]) * outer_weight); - dstp[x*4+2] = ScaledPixelClip(dstp[x*4+2] * center_weight + (r_left + dstp[x*4+2]) * outer_weight); - dstp[x*4+3] = ScaledPixelClip(dstp[x*4+3] * center_weight + (a_left + dstp[x*4+3]) * outer_weight); + dstp[x*4+0] = ScaledPixelClip((weight_t)(dstp[x*4+0] * center_weight + (b_left + dstp[x*4+0]) * outer_weight)); + dstp[x*4+1] = ScaledPixelClip((weight_t)(dstp[x*4+1] * center_weight + (g_left + dstp[x*4+1]) * outer_weight)); + dstp[x*4+2] = ScaledPixelClip((weight_t)(dstp[x*4+2] * center_weight + (r_left + dstp[x*4+2]) * outer_weight)); + dstp[x*4+3] = ScaledPixelClip((weight_t)(dstp[x*4+3] * center_weight + (a_left + dstp[x*4+3]) * outer_weight)); } -static void af_horizontal_rgb32_c(BYTE* dstp, size_t height, size_t pitch, size_t width, size_t amount) { - int center_weight = int(amount*2); - int outer_weight = int(32768-amount); - for (size_t y = height; y>0; --y) +template +static void af_horizontal_rgb32_64_c(BYTE* dstp8, size_t height, size_t pitch8, size_t width, int half_amount) { + typedef typename std::conditional < sizeof(pixel_t) == 1, int, __int64>::type weight_t; + // kernel:[(1-1/2^_amount)/2, 1/2^_amount, (1-1/2^_amount)/2] + weight_t center_weight = half_amount*2; // *2: 16 bit scaled arithmetic, but the converted amount parameter scaled is only 15 bits + weight_t outer_weight = 32768-half_amount; // (1-1/2^_amount)/2 32768 = 0.5 + + pixel_t* dstp = reinterpret_cast(dstp8); + int pitch = pitch8 / sizeof(pixel_t); + + for (int y = height; y>0; --y) { - BYTE b_left = dstp[0]; - BYTE g_left = dstp[1]; - BYTE r_left = dstp[2]; - BYTE a_left = dstp[3]; - af_horizontal_rgb32_process_line_c(b_left, g_left, r_left, a_left, dstp, width, center_weight, outer_weight); + pixel_t b_left = dstp[0]; + pixel_t g_left = dstp[1]; + pixel_t r_left = dstp[2]; + pixel_t a_left = dstp[3]; + af_horizontal_rgb32_process_line_c(b_left, g_left, r_left, a_left, dstp, width, center_weight, outer_weight); dstp += pitch; } + } @@ -756,29 +771,35 @@ static void af_horizontal_yuy2_mmx(BYTE* dstp, const BYTE* srcp, size_t dst_pitc // Blur/Sharpen Horizontal RGB24 C++ Code // -------------------------------------- -static void af_horizontal_rgb24_c(BYTE* p, int height, int pitch, int width, int amount) { - const int center_weight = amount*2; - const int outer_weight = 32768-amount; +template +static void af_horizontal_rgb24_48_c(BYTE* dstp8, int height, int pitch8, int width, int half_amount) { + typedef typename std::conditional < sizeof(pixel_t) == 1, int, __int64>::type weight_t; + // kernel:[(1-1/2^_amount)/2, 1/2^_amount, (1-1/2^_amount)/2] + weight_t center_weight = half_amount*2; // *2: 16 bit scaled arithmetic, but the converted amount parameter scaled is only 15 bits + weight_t outer_weight = 32768-half_amount; // (1-1/2^_amount)/2 32768 = 0.5 + + pixel_t *dstp = reinterpret_cast(dstp8); + int pitch = pitch8 / sizeof(pixel_t); for (int y = height; y>0; --y) { - - BYTE bb = p[0]; - BYTE gg = p[1]; - BYTE rr = p[2]; + pixel_t bb = dstp[0]; + pixel_t gg = dstp[1]; + pixel_t rr = dstp[2]; int x; for (x = 0; x < width-1; ++x) { - BYTE b = ScaledPixelClip(p[x*3+0] * center_weight + (bb + p[x*3+3]) * outer_weight); - bb = p[x*3+0]; p[x*3+0] = b; - BYTE g = ScaledPixelClip(p[x*3+1] * center_weight + (gg + p[x*3+4]) * outer_weight); - gg = p[x*3+1]; p[x*3+1] = g; - BYTE r = ScaledPixelClip(p[x*3+2] * center_weight + (rr + p[x*3+5]) * outer_weight); - rr = p[x*3+2]; p[x*3+2] = r; + // ScaledPixelClip has 2 overloads: BYTE/uint16_t (int/int64 i) + pixel_t b = ScaledPixelClip((weight_t)(dstp[x*3+0] * center_weight + (bb + dstp[x*3+3]) * outer_weight)); + bb = dstp[x*3+0]; dstp[x*3+0] = b; + pixel_t g = ScaledPixelClip((weight_t)(dstp[x*3+1] * center_weight + (gg + dstp[x*3+4]) * outer_weight)); + gg = dstp[x*3+1]; dstp[x*3+1] = g; + pixel_t r = ScaledPixelClip((weight_t)(dstp[x*3+2] * center_weight + (rr + dstp[x*3+5]) * outer_weight)); + rr = dstp[x*3+2]; dstp[x*3+2] = r; } - p[x*3+0] = ScaledPixelClip(p[x*3+0] * center_weight + (bb + p[x*3+0]) * outer_weight); - p[x*3+1] = ScaledPixelClip(p[x*3+1] * center_weight + (gg + p[x*3+1]) * outer_weight); - p[x*3+2] = ScaledPixelClip(p[x*3+2] * center_weight + (rr + p[x*3+2]) * outer_weight); - p += pitch; + dstp[x*3+0] = ScaledPixelClip((weight_t)(dstp[x*3+0] * center_weight + (bb + dstp[x*3+0]) * outer_weight)); + dstp[x*3+1] = ScaledPixelClip((weight_t)(dstp[x*3+1] * center_weight + (gg + dstp[x*3+1]) * outer_weight)); + dstp[x*3+2] = ScaledPixelClip((weight_t)(dstp[x*3+2] * center_weight + (rr + dstp[x*3+2]) * outer_weight)); + dstp += pitch; } } @@ -802,7 +823,7 @@ static __forceinline void af_horizontal_yv12_process_line_c(pixel_t left, BYTE * } template -static void af_horizontal_yv12_c(BYTE* dstp8, size_t height, size_t pitch8, size_t row_size, size_t half_amount) +static void af_horizontal_planar_c(BYTE* dstp8, size_t height, size_t pitch8, size_t row_size, size_t half_amount) { pixel_t* dstp = reinterpret_cast(dstp8); size_t pitch = pitch8 / sizeof(pixel_t); @@ -827,7 +848,7 @@ static __forceinline void af_horizontal_yv12_process_line_float_c(float left, fl dstp[x] = dstp[x] * center_weight + (left + dstp[x]) * outer_weight; } -static void af_horizontal_yv12_float_c(BYTE* dstp8, size_t height, size_t pitch8, size_t row_size, float amount) +static void af_horizontal_planar_float_c(BYTE* dstp8, size_t height, size_t pitch8, size_t row_size, float amount) { float* dstp = reinterpret_cast(dstp8); size_t pitch = pitch8 / sizeof(float); @@ -841,7 +862,7 @@ static void af_horizontal_yv12_float_c(BYTE* dstp8, size_t height, size_t pitch8 } } -static void af_horizontal_yv12_sse2(BYTE* dstp, size_t height, size_t pitch, size_t width, size_t amount) { +static void af_horizontal_planar_sse2(BYTE* dstp, size_t height, size_t pitch, size_t width, size_t amount) { size_t mod16_width = (width / 16) * 16; size_t sse_loop_limit = width == mod16_width ? mod16_width - 16 : mod16_width; int center_weight_c = int(amount*2); @@ -903,7 +924,7 @@ static void af_horizontal_yv12_sse2(BYTE* dstp, size_t height, size_t pitch, siz #ifdef X86_32 -static void af_horizontal_yv12_mmx(BYTE* dstp, size_t height, size_t pitch, size_t width, size_t amount) { +static void af_horizontal_planar_mmx(BYTE* dstp, size_t height, size_t pitch, size_t width, size_t amount) { size_t mod8_width = (width / 8) * 8; size_t mmx_loop_limit = width == mod8_width ? mod8_width - 8 : mod8_width; int center_weight_c = amount*2; @@ -965,16 +986,14 @@ static void af_horizontal_yv12_mmx(BYTE* dstp, size_t height, size_t pitch, size #endif -static void copy_frame(const PVideoFrame &src, PVideoFrame &dst, IScriptEnvironment *env) { - env->BitBlt(dst->GetWritePtr(), dst->GetPitch(), src->GetReadPtr(), src->GetPitch(), src->GetRowSize(), src->GetHeight()); - // Blit More planes (pitch, rowsize and height should be 0, if none is present) - env->BitBlt(dst->GetWritePtr(PLANAR_V), dst->GetPitch(PLANAR_V), src->GetReadPtr(PLANAR_V), - src->GetPitch(PLANAR_V), src->GetRowSize(PLANAR_V), src->GetHeight(PLANAR_V)); - env->BitBlt(dst->GetWritePtr(PLANAR_U), dst->GetPitch(PLANAR_U), src->GetReadPtr(PLANAR_U), - src->GetPitch(PLANAR_U), src->GetRowSize(PLANAR_U), src->GetHeight(PLANAR_U)); +static void copy_frame(const PVideoFrame &src, PVideoFrame &dst, IScriptEnvironment *env, const int *planes, int plane_count) { + for (int p = 0; p < plane_count; p++) { + int plane = planes[p]; + env->BitBlt(dst->GetWritePtr(plane), dst->GetPitch(plane), src->GetReadPtr(plane), + src->GetPitch(plane), src->GetRowSize(plane), src->GetHeight(plane)); + } } - // ---------------------------------- // Blur/Sharpen Horizontal GetFrame() // ---------------------------------- @@ -984,9 +1003,14 @@ PVideoFrame __stdcall AdjustFocusH::GetFrame(int n, IScriptEnvironment* env) PVideoFrame src = child->GetFrame(n, env); PVideoFrame dst = env->NewVideoFrame(vi); + const int planesYUV[4] = { PLANAR_Y, PLANAR_U, PLANAR_V, PLANAR_A}; + const int planesRGB[4] = { PLANAR_G, PLANAR_B, PLANAR_R, PLANAR_A}; + const int *planes = vi.IsYUV() || vi.IsYUVA() ? planesYUV : planesRGB; + + int pixelsize = vi.ComponentSize(); + if (vi.IsPlanar()) { - const int planes[3] = { PLANAR_Y, PLANAR_U, PLANAR_V }; - copy_frame(src, dst, env); //planar processing is always in-place + copy_frame(src, dst, env, planes, vi.NumComponents() ); //planar processing is always in-place int pixelsize = vi.ComponentSize(); for(int cplane=0;cplane<3;cplane++) { int plane = planes[cplane]; @@ -995,19 +1019,19 @@ PVideoFrame __stdcall AdjustFocusH::GetFrame(int n, IScriptEnvironment* env) int pitch = dst->GetPitch(plane); int height = dst->GetHeight(plane); if (pixelsize==1 && (env->GetCPUFlags() & CPUF_SSE2) && IsPtrAligned(q, 16) && row_size >= 16) { - af_horizontal_yv12_sse2(q, height, pitch, row_size, half_amount); + af_horizontal_planar_sse2(q, height, pitch, row_size, half_amount); } else #ifdef X86_32 if (pixelsize == 1 && (env->GetCPUFlags() & CPUF_MMX) && row_size >= 8) { - af_horizontal_yv12_mmx(q,height,pitch,row_size,half_amount); + af_horizontal_planar_mmx(q,height,pitch,row_size,half_amount); } else #endif { switch (pixelsize) { - case 1: af_horizontal_yv12_c(q, height, pitch, row_size, half_amount); break; - case 2: af_horizontal_yv12_c(q, height, pitch, row_size, half_amount); break; + case 1: af_horizontal_planar_c(q, height, pitch, row_size, half_amount); break; + case 2: af_horizontal_planar_c(q, height, pitch, row_size, half_amount); break; default: // 4: float - af_horizontal_yv12_float_c(q, height, pitch, row_size, (float)amountd); break; + af_horizontal_planar_float_c(q, height, pitch, row_size, (float)amountd); break; } } @@ -1016,37 +1040,44 @@ PVideoFrame __stdcall AdjustFocusH::GetFrame(int n, IScriptEnvironment* env) if (vi.IsYUY2()) { BYTE* q = dst->GetWritePtr(); const int pitch = dst->GetPitch(); + // PF: sse2/mmx versions are not identical to C. Sharpen(1.0, 1.0) has ugly artifacts if ((env->GetCPUFlags() & CPUF_SSE2) && IsPtrAligned(src->GetReadPtr(), 16)) { af_horizontal_yuy2_sse2(dst->GetWritePtr(), src->GetReadPtr(), dst->GetPitch(), src->GetPitch(), vi.height, vi.width, half_amount); } else #ifdef X86_32 - if (env->GetCPUFlags() & CPUF_MMX) { + if ((env->GetCPUFlags() & CPUF_MMX)) { af_horizontal_yuy2_mmx(dst->GetWritePtr(), src->GetReadPtr(), dst->GetPitch(), src->GetPitch(), vi.height, vi.width, half_amount); } else #endif { - copy_frame(src, dst, env); //in-place + copy_frame(src, dst, env, planesYUV, 1); //in-place af_horizontal_yuy2_c(q,vi.height,pitch,vi.width,half_amount); } } - else if (vi.IsRGB32()) { - if ((env->GetCPUFlags() & CPUF_SSE2) && IsPtrAligned(src->GetReadPtr(), 16)) { + else if (vi.IsRGB32() || vi.IsRGB64()) { + if ((pixelsize==1) && (env->GetCPUFlags() & CPUF_SSE2) && IsPtrAligned(src->GetReadPtr(), 16)) { //this one is NOT in-place af_horizontal_rgb32_sse2(dst->GetWritePtr(), src->GetReadPtr(), dst->GetPitch(), src->GetPitch(), vi.height, vi.width, half_amount); } else #ifdef X86_32 - if (env->GetCPUFlags() & CPUF_MMX) + if ((pixelsize==1) && (env->GetCPUFlags() & CPUF_MMX)) { //so as this one af_horizontal_rgb32_mmx(dst->GetWritePtr(), src->GetReadPtr(), dst->GetPitch(), src->GetPitch(), vi.height, vi.width, half_amount); } else #endif { - copy_frame(src, dst, env); - af_horizontal_rgb32_c(dst->GetWritePtr(), vi.height, dst->GetPitch(), vi.width, half_amount); + copy_frame(src, dst, env, planesYUV, 1); + if(pixelsize==1) + af_horizontal_rgb32_64_c(dst->GetWritePtr(), vi.height, dst->GetPitch(), vi.width, half_amount); + else + af_horizontal_rgb32_64_c(dst->GetWritePtr(), vi.height, dst->GetPitch(), vi.width, half_amount); } - } else { //rgb24 - copy_frame(src, dst, env); - af_horizontal_rgb24_c(dst->GetWritePtr(), vi.height, dst->GetPitch(), vi.width, half_amount); + } else if (vi.IsRGB24() || vi.IsRGB48()) { + copy_frame(src, dst, env, planesYUV, 1); + if(pixelsize==1) + af_horizontal_rgb24_48_c(dst->GetWritePtr(), vi.height, dst->GetPitch(), vi.width, half_amount); + else + af_horizontal_rgb24_48_c(dst->GetWritePtr(), vi.height, dst->GetPitch(), vi.width, half_amount); } } @@ -1127,12 +1158,16 @@ TemporalSoften::TemporalSoften( PClip _child, unsigned radius, unsigned luma_thr child->SetCacheHints(CACHE_WINDOW,kernel); - if (vi.IsRGB24()) { - env->ThrowError("TemporalSoften: RGB24 Not supported, use ConvertToRGB32()."); + if (vi.IsRGB24() || vi.IsRGB48()) { + env->ThrowError("TemporalSoften: RGB24/48 Not supported, use ConvertToRGB32/48()."); + } + + if (vi.IsPlanarRGB() || vi.IsPlanarRGBA()) { + env->ThrowError("TemporalSoften: Planar RGB Not supported, use ConvertToRGB32/48()."); } - if ((vi.IsRGB32()) && (vi.width&1)) { - env->ThrowError("TemporalSoften: RGB32 source must be multiple of 2 in width."); + if ((vi.IsRGB32() || vi.IsRGB64()) && (vi.width&1)) { + env->ThrowError("TemporalSoften: RGB32/64 source must be multiple of 2 in width."); } if ((vi.IsYUY2()) && (vi.width&3)) { @@ -1142,19 +1177,21 @@ TemporalSoften::TemporalSoften( PClip _child, unsigned radius, unsigned luma_thr if (scenechange >= 255) { scenechange = 0; } - if (scenechange>0 && vi.IsRGB32()) { - env->ThrowError("TemporalSoften: Scenechange not available on RGB32"); + + if (scenechange>0 && (vi.IsRGB32() || vi.IsRGB64())) { + env->ThrowError("TemporalSoften: Scenechange not available on RGB32/64"); } + pixelsize = vi.ComponentSize(); + // original scenechange parameter always 0-255 int factor; - if (vi.IsPlanar()) // Y/YUV + if (vi.IsPlanar()) // Y/YUV, no Planar RGB here factor = 1; // bitdepth independent. sad normalizes else - factor = vi.BytesFromPixels(1); + factor = vi.BytesFromPixels(1) / pixelsize; // /pixelsize: correction for packed 16 bit rgb scenechange *= ((vi.width/32)*32)*vi.height*factor; // why /*32? - pixelsize = vi.ComponentSize(); int c = 0; if (vi.IsPlanar()) { From 4446dd45d0dd2967e693c6f7786a448f44080551 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Fri, 2 Sep 2016 15:35:26 +0200 Subject: [PATCH 033/120] Conditional (runtime) functions for YUV 16 bit/float and RGB64 and Planar RGB --- .../conditional/conditional_functions.cpp | 401 ++++++++++++------ .../conditional/conditional_functions.h | 21 +- 2 files changed, 300 insertions(+), 122 deletions(-) diff --git a/avs_core/filters/conditional/conditional_functions.cpp b/avs_core/filters/conditional/conditional_functions.cpp index 6d6d6473e..da52052ab 100644 --- a/avs_core/filters/conditional/conditional_functions.cpp +++ b/avs_core/filters/conditional/conditional_functions.cpp @@ -39,47 +39,72 @@ #include #include #include - +#include +#include extern const AVSFunction Conditional_funtions_filters[] = { - { "AverageLuma", BUILTIN_FUNC_PREFIX, "c[offset]i", AveragePlane::Create_y }, - { "AverageChromaU", BUILTIN_FUNC_PREFIX, "c[offset]i", AveragePlane::Create_u }, - { "AverageChromaV", BUILTIN_FUNC_PREFIX, "c[offset]i", AveragePlane::Create_v }, -//{ "AverageSat","c[offset]i", AverageSat::Create }, Sum(SatLookup[U,V])/N, SatLookup[U,V]=1.4087*sqrt((U-128)**2+(V-128)**2) + { "AverageLuma", BUILTIN_FUNC_PREFIX, "c[offset]i", AveragePlane::Create, (void *)PLANAR_Y }, + { "AverageChromaU", BUILTIN_FUNC_PREFIX, "c[offset]i", AveragePlane::Create, (void *)PLANAR_U }, + { "AverageChromaV", BUILTIN_FUNC_PREFIX, "c[offset]i", AveragePlane::Create, (void *)PLANAR_V }, + { "AverageR", BUILTIN_FUNC_PREFIX, "c[offset]i", AveragePlane::Create, (void *)PLANAR_R }, + { "AverageG", BUILTIN_FUNC_PREFIX, "c[offset]i", AveragePlane::Create, (void *)PLANAR_G }, + { "AverageB", BUILTIN_FUNC_PREFIX, "c[offset]i", AveragePlane::Create, (void *)PLANAR_B }, + //{ "AverageSat","c[offset]i", AverageSat::Create }, Sum(SatLookup[U,V])/N, SatLookup[U,V]=1.4087*sqrt((U-128)**2+(V-128)**2) //{ "AverageHue","c[offset]i", AverageHue::Create }, Sum(HueLookup[U,V])/N, HueLookup[U,V]=40.5845*Atan2(U-128,V-128) - { "RGBDifference", BUILTIN_FUNC_PREFIX, "cc", ComparePlane::Create_rgb }, - { "LumaDifference", BUILTIN_FUNC_PREFIX, "cc", ComparePlane::Create_y }, - { "ChromaUDifference", BUILTIN_FUNC_PREFIX, "cc", ComparePlane::Create_u }, - { "ChromaVDifference", BUILTIN_FUNC_PREFIX, "cc", ComparePlane::Create_v }, -//{ "SatDifference","cc", CompareSat::Create }, Sum(Abs(SatLookup[U1,V1]-SatLookup[U2,V2]))/N + { "RGBDifference", BUILTIN_FUNC_PREFIX, "cc", ComparePlane::Create, (void *)-1 }, + { "LumaDifference", BUILTIN_FUNC_PREFIX, "cc", ComparePlane::Create, (void *)PLANAR_Y }, + { "ChromaUDifference", BUILTIN_FUNC_PREFIX, "cc", ComparePlane::Create, (void *)PLANAR_U }, + { "ChromaVDifference", BUILTIN_FUNC_PREFIX, "cc", ComparePlane::Create, (void *)PLANAR_V }, + { "RDifference", BUILTIN_FUNC_PREFIX, "cc", ComparePlane::Create, (void *)PLANAR_R }, + { "GDifference", BUILTIN_FUNC_PREFIX, "cc", ComparePlane::Create, (void *)PLANAR_G }, + { "BDifference", BUILTIN_FUNC_PREFIX, "cc", ComparePlane::Create, (void *)PLANAR_B }, + //{ "SatDifference","cc", CompareSat::Create }, Sum(Abs(SatLookup[U1,V1]-SatLookup[U2,V2]))/N //{ "HueDifference","cc", CompareHue::Create }, Sum(Abs(HueLookup[U1,V1]-HueLookup[U2,V2]))/N - { "YDifferenceFromPrevious", BUILTIN_FUNC_PREFIX, "c", ComparePlane::Create_prev_y }, - { "UDifferenceFromPrevious", BUILTIN_FUNC_PREFIX, "c", ComparePlane::Create_prev_u }, - { "VDifferenceFromPrevious", BUILTIN_FUNC_PREFIX, "c", ComparePlane::Create_prev_v }, - { "RGBDifferenceFromPrevious", BUILTIN_FUNC_PREFIX, "c", ComparePlane::Create_prev_rgb }, -//{ "SatDifferenceFromPrevious","c", CompareSat::Create_prev }, + { "YDifferenceFromPrevious", BUILTIN_FUNC_PREFIX, "c", ComparePlane::Create_prev, (void *)PLANAR_Y }, + { "UDifferenceFromPrevious", BUILTIN_FUNC_PREFIX, "c", ComparePlane::Create_prev, (void *)PLANAR_U }, + { "VDifferenceFromPrevious", BUILTIN_FUNC_PREFIX, "c", ComparePlane::Create_prev, (void *)PLANAR_V }, + { "RGBDifferenceFromPrevious", BUILTIN_FUNC_PREFIX, "c", ComparePlane::Create_prev, (void *)-1 }, + { "RDifferenceFromPrevious", BUILTIN_FUNC_PREFIX, "c", ComparePlane::Create_prev, (void *)PLANAR_R }, + { "GDifferenceFromPrevious", BUILTIN_FUNC_PREFIX, "c", ComparePlane::Create_prev, (void *)PLANAR_G }, + { "BDifferenceFromPrevious", BUILTIN_FUNC_PREFIX, "c", ComparePlane::Create_prev, (void *)PLANAR_B }, + //{ "SatDifferenceFromPrevious","c", CompareSat::Create_prev }, //{ "HueDifferenceFromPrevious","c", CompareHue::Create_prev }, - { "YDifferenceToNext", BUILTIN_FUNC_PREFIX, "c[offset]i", ComparePlane::Create_next_y }, - { "UDifferenceToNext", BUILTIN_FUNC_PREFIX, "c[offset]i", ComparePlane::Create_next_u }, - { "VDifferenceToNext", BUILTIN_FUNC_PREFIX, "c[offset]i", ComparePlane::Create_next_v }, - { "RGBDifferenceToNext", BUILTIN_FUNC_PREFIX, "c[offset]i", ComparePlane::Create_next_rgb }, -//{ "SatDifferenceFromNext","c[offset]i", CompareSat::Create_next }, + { "YDifferenceToNext", BUILTIN_FUNC_PREFIX, "c[offset]i", ComparePlane::Create_next, (void *)PLANAR_Y }, + { "UDifferenceToNext", BUILTIN_FUNC_PREFIX, "c[offset]i", ComparePlane::Create_next, (void *)PLANAR_U }, + { "VDifferenceToNext", BUILTIN_FUNC_PREFIX, "c[offset]i", ComparePlane::Create_next, (void *)PLANAR_V }, + { "RGBDifferenceToNext", BUILTIN_FUNC_PREFIX, "c[offset]i", ComparePlane::Create_next, (void *)-1 }, + { "RDifferenceToNext", BUILTIN_FUNC_PREFIX, "c[offset]i", ComparePlane::Create_next, (void *)PLANAR_R }, + { "GDifferenceToNext", BUILTIN_FUNC_PREFIX, "c[offset]i", ComparePlane::Create_next, (void *)PLANAR_G }, + { "BDifferenceToNext", BUILTIN_FUNC_PREFIX, "c[offset]i", ComparePlane::Create_next, (void *)PLANAR_B }, + //{ "SatDifferenceFromNext","c[offset]i", CompareSat::Create_next }, //{ "HueDifferenceFromNext","c[offset]i", CompareHue::Create_next }, - { "YPlaneMax", BUILTIN_FUNC_PREFIX, "c[threshold]f[offset]i", MinMaxPlane::Create_max_y }, - { "YPlaneMin", BUILTIN_FUNC_PREFIX, "c[threshold]f[offset]i", MinMaxPlane::Create_min_y }, - { "YPlaneMedian", BUILTIN_FUNC_PREFIX, "c[offset]i", MinMaxPlane::Create_median_y }, - { "UPlaneMax", BUILTIN_FUNC_PREFIX, "c[threshold]f[offset]i", MinMaxPlane::Create_max_u }, - { "UPlaneMin", BUILTIN_FUNC_PREFIX, "c[threshold]f[offset]i", MinMaxPlane::Create_min_u }, - { "UPlaneMedian", BUILTIN_FUNC_PREFIX, "c[offset]i", MinMaxPlane::Create_median_u }, - { "VPlaneMax", BUILTIN_FUNC_PREFIX, "c[threshold]f", MinMaxPlane::Create_max_v }, - { "VPlaneMin", BUILTIN_FUNC_PREFIX, "c[threshold]f", MinMaxPlane::Create_min_v }, - { "VPlaneMedian", BUILTIN_FUNC_PREFIX, "c[offset]i", MinMaxPlane::Create_median_v }, - { "YPlaneMinMaxDifference", BUILTIN_FUNC_PREFIX, "c[threshold]f[offset]i", MinMaxPlane::Create_minmax_y }, - { "UPlaneMinMaxDifference", BUILTIN_FUNC_PREFIX, "c[threshold]f", MinMaxPlane::Create_minmax_u }, - { "VPlaneMinMaxDifference", BUILTIN_FUNC_PREFIX, "c[threshold]f", MinMaxPlane::Create_minmax_v }, + { "YPlaneMax", BUILTIN_FUNC_PREFIX, "c[threshold]f[offset]i", MinMaxPlane::Create_max, (void *)PLANAR_Y }, + { "YPlaneMin", BUILTIN_FUNC_PREFIX, "c[threshold]f[offset]i", MinMaxPlane::Create_min, (void *)PLANAR_Y }, + { "YPlaneMedian", BUILTIN_FUNC_PREFIX, "c[offset]i", MinMaxPlane::Create_median, (void *)PLANAR_Y }, + { "UPlaneMax", BUILTIN_FUNC_PREFIX, "c[threshold]f[offset]i", MinMaxPlane::Create_max, (void *)PLANAR_U }, + { "UPlaneMin", BUILTIN_FUNC_PREFIX, "c[threshold]f[offset]i", MinMaxPlane::Create_min, (void *)PLANAR_U }, + { "UPlaneMedian", BUILTIN_FUNC_PREFIX, "c[offset]i", MinMaxPlane::Create_median, (void *)PLANAR_U }, + { "VPlaneMax", BUILTIN_FUNC_PREFIX, "c[threshold]f[offset]i", MinMaxPlane::Create_max, (void *)PLANAR_V }, // AVS+! was before: missing offset parameter + { "VPlaneMin", BUILTIN_FUNC_PREFIX, "c[threshold]f[offset]i", MinMaxPlane::Create_min, (void *)PLANAR_V }, // AVS+! was before: missing offset parameter + { "VPlaneMedian", BUILTIN_FUNC_PREFIX, "c[offset]i", MinMaxPlane::Create_median, (void *)PLANAR_V }, + { "RPlaneMax", BUILTIN_FUNC_PREFIX, "c[threshold]f[offset]i", MinMaxPlane::Create_max, (void *)PLANAR_R }, + { "RPlaneMin", BUILTIN_FUNC_PREFIX, "c[threshold]f[offset]i", MinMaxPlane::Create_min, (void *)PLANAR_R }, + { "RPlaneMedian", BUILTIN_FUNC_PREFIX, "c[offset]i", MinMaxPlane::Create_median, (void *)PLANAR_R }, + { "GPlaneMax", BUILTIN_FUNC_PREFIX, "c[threshold]f[offset]i", MinMaxPlane::Create_max, (void *)PLANAR_G }, + { "GPlaneMin", BUILTIN_FUNC_PREFIX, "c[threshold]f[offset]i", MinMaxPlane::Create_min, (void *)PLANAR_G }, + { "GPlaneMedian", BUILTIN_FUNC_PREFIX, "c[offset]i", MinMaxPlane::Create_median, (void *)PLANAR_G }, + { "BPlaneMax", BUILTIN_FUNC_PREFIX, "c[threshold]f[offset]i", MinMaxPlane::Create_max, (void *)PLANAR_B }, + { "BPlaneMin", BUILTIN_FUNC_PREFIX, "c[threshold]f[offset]i", MinMaxPlane::Create_min, (void *)PLANAR_B }, + { "BPlaneMedian", BUILTIN_FUNC_PREFIX, "c[offset]i", MinMaxPlane::Create_median, (void *)PLANAR_B }, + { "YPlaneMinMaxDifference", BUILTIN_FUNC_PREFIX, "c[threshold]f[offset]i", MinMaxPlane::Create_minmax, (void *)PLANAR_Y }, + { "UPlaneMinMaxDifference", BUILTIN_FUNC_PREFIX, "c[threshold]f[offset]i", MinMaxPlane::Create_minmax, (void *)PLANAR_U }, // AVS+! was before: missing offset parameter + { "VPlaneMinMaxDifference", BUILTIN_FUNC_PREFIX, "c[threshold]f[offset]i", MinMaxPlane::Create_minmax, (void *)PLANAR_V }, // AVS+! was before: missing offset parameter + { "RPlaneMinMaxDifference", BUILTIN_FUNC_PREFIX, "c[threshold]f[offset]i", MinMaxPlane::Create_minmax, (void *)PLANAR_R }, + { "GPlaneMinMaxDifference", BUILTIN_FUNC_PREFIX, "c[threshold]f[offset]i", MinMaxPlane::Create_minmax, (void *)PLANAR_G }, + { "BPlaneMinMaxDifference", BUILTIN_FUNC_PREFIX, "c[threshold]f[offset]i", MinMaxPlane::Create_minmax, (void *)PLANAR_B }, //{ "SatMax","c[threshold]f[offset]i", MinMaxPlane::Create_maxsat }, ++accum[SatLookup[U,V]] //{ "SatMin","c[threshold]f[offset]i", MinMaxPlane::Create_minsat }, @@ -95,6 +120,11 @@ extern const AVSFunction Conditional_funtions_filters[] = { }; +AVSValue AveragePlane::Create(AVSValue args, void* user_data, IScriptEnvironment* env) { + int plane = (int)reinterpret_cast(user_data); + return AvgPlane(args[0], user_data, plane, args[1].AsInt(0), env); +} +/* AVSValue AveragePlane::Create_y(AVSValue args, void* user_data, IScriptEnvironment* env) { return AvgPlane(args[0], user_data, PLANAR_Y, args[1].AsInt(0), env); } @@ -108,22 +138,27 @@ AVSValue AveragePlane::Create_u(AVSValue args, void* user_data, IScriptEnvironme AVSValue AveragePlane::Create_v(AVSValue args, void* user_data, IScriptEnvironment* env) { return AvgPlane(args[0], user_data, PLANAR_V, args[1].AsInt(0), env); } +*/ // Average plane -static size_t get_sum_of_pixels_c(const BYTE* srcp, size_t height, size_t width, size_t pitch) { - unsigned int accum = 0; +template +static double get_sum_of_pixels_c(const BYTE* srcp8, size_t height, size_t width, size_t pitch) { + typedef typename std::conditional < sizeof(pixel_t) == 4, double, __int64>::type sum_t; + sum_t accum = 0; // int32 holds sum of maximum 16 Mpixels for 8 bit, and 65536 pixels for uint16_t pixels + const pixel_t *srcp = reinterpret_cast(srcp8); + pitch /= sizeof(pixel_t); for (size_t y = 0; y < height; y++) { for (size_t x = 0; x < width; x++) { accum += srcp[x]; } srcp += pitch; } - return accum; + return (double)accum; } -static size_t get_sum_of_pixels_sse2(const BYTE* srcp, size_t height, size_t width, size_t pitch) { +static double get_sum_of_pixels_sse2(const BYTE* srcp, size_t height, size_t width, size_t pitch) { size_t mod16_width = width / 16 * 16; - int result = 0; + __int64 result = 0; __m128i sum = _mm_setzero_si128(); __m128i zero = _mm_setzero_si128(); @@ -143,13 +178,13 @@ static size_t get_sum_of_pixels_sse2(const BYTE* srcp, size_t height, size_t wid __m128i upper = _mm_castps_si128(_mm_movehl_ps(_mm_setzero_ps(), _mm_castsi128_ps(sum))); sum = _mm_add_epi32(sum, upper); result += _mm_cvtsi128_si32(sum); - return result; + return (double)result; } #ifdef X86_32 -static size_t get_sum_of_pixels_isse(const BYTE* srcp, size_t height, size_t width, size_t pitch) { +static double get_sum_of_pixels_isse(const BYTE* srcp, size_t height, size_t width, size_t pitch) { size_t mod8_width = width / 8 * 8; - int result = 0; + __int64 result = 0; __m64 sum = _mm_setzero_si64(); __m64 zero = _mm_setzero_si64(); @@ -168,7 +203,7 @@ static size_t get_sum_of_pixels_isse(const BYTE* srcp, size_t height, size_t wid } result += _mm_cvtsi64_si32(sum); _mm_empty(); - return result; + return (double)result; } #endif @@ -178,14 +213,12 @@ AVSValue AveragePlane::AvgPlane(AVSValue clip, void* user_data, int plane, int o { if (!clip.IsClip()) env->ThrowError("Average Plane: No clip supplied!"); - if (!(env->GetCPUFlags() & CPUF_INTEGER_SSE)) - env->ThrowError("Average Plane: Requires Integer SSE capable CPU."); PClip child = clip.AsClip(); VideoInfo vi = child->GetVideoInfo(); if (!vi.IsPlanar()) - env->ThrowError("Average Plane: Only planar images (as YV12) supported!"); + env->ThrowError("Average Plane: Only planar YUV or planar RGB images supported!"); AVSValue cn = env->GetVarDef("current_frame"); if (!cn.IsInt()) @@ -196,34 +229,54 @@ AVSValue AveragePlane::AvgPlane(AVSValue clip, void* user_data, int plane, int o PVideoFrame src = child->GetFrame(n,env); + int pixelsize = vi.ComponentSize(); + const BYTE* srcp = src->GetReadPtr(plane); int height = src->GetHeight(plane); - int width = src->GetRowSize(plane); + int width = src->GetRowSize(plane) / pixelsize; int pitch = src->GetPitch(plane); if (width == 0 || height == 0) - env->ThrowError("Average Plane: No chroma planes in Y8!"); + env->ThrowError("Average Plane: plane does not exist!"); - size_t sum = 0; + double sum = 0.0; - if ((env->GetCPUFlags() & CPUF_SSE2) && IsPtrAligned(srcp, 16) && width >= 16) { + + int total_pixels = width*height; + bool sum_in_32bits; + if (pixelsize == 4) + sum_in_32bits = false; + else // worst case + sum_in_32bits = ((__int64)total_pixels * (pixelsize == 1 ? 255 : 65535)) <= std::numeric_limits::max(); + + if ((pixelsize==1) && sum_in_32bits && (env->GetCPUFlags() & CPUF_SSE2) && IsPtrAligned(srcp, 16) && width >= 16) { sum = get_sum_of_pixels_sse2(srcp, height, width, pitch); } else #ifdef X86_32 - if ((env->GetCPUFlags() & CPUF_INTEGER_SSE) && width >= 8) { + if ((pixelsize==1) && sum_in_32bits && (env->GetCPUFlags() & CPUF_INTEGER_SSE) && width >= 8) { sum = get_sum_of_pixels_isse(srcp, height, width, pitch); } else #endif { - sum = get_sum_of_pixels_c(srcp, height, width, pitch); + if(pixelsize==1) + sum = get_sum_of_pixels_c(srcp, height, width, pitch); + else if(pixelsize==2) + sum = get_sum_of_pixels_c(srcp, height, width, pitch); + else // pixelsize==4 + sum = get_sum_of_pixels_c(srcp, height, width, pitch); } - float f = (float)((double)sum / (height * width)); + float f = (float)(sum / (height * width)); return (AVSValue)f; } +AVSValue ComparePlane::Create(AVSValue args, void* user_data, IScriptEnvironment* env) { + int plane = (int)reinterpret_cast(user_data); + return CmpPlane(args[0],args[1], user_data, plane, env); +} +/* AVSValue ComparePlane::Create_y(AVSValue args, void* user_data, IScriptEnvironment* env) { return CmpPlane(args[0],args[1], user_data, PLANAR_Y, env); } @@ -241,8 +294,14 @@ AVSValue ComparePlane::Create_v(AVSValue args, void* user_data, IScriptEnvironme AVSValue ComparePlane::Create_rgb(AVSValue args, void* user_data, IScriptEnvironment* env) { return CmpPlane(args[0],args[1], user_data, -1 , env); } +*/ +AVSValue ComparePlane::Create_prev(AVSValue args, void* user_data, IScriptEnvironment* env) { + int plane = (int)reinterpret_cast(user_data); + return CmpPlaneSame(args[0], user_data, -1, plane, env); +} +/* AVSValue ComparePlane::Create_prev_y(AVSValue args, void* user_data, IScriptEnvironment* env) { return CmpPlaneSame(args[0], user_data, -1, PLANAR_Y, env); } @@ -258,8 +317,15 @@ AVSValue ComparePlane::Create_prev_v(AVSValue args, void* user_data, IScriptEnvi AVSValue ComparePlane::Create_prev_rgb(AVSValue args, void* user_data, IScriptEnvironment* env) { return CmpPlaneSame(args[0], user_data, -1, -1, env); } +*/ +AVSValue ComparePlane::Create_next(AVSValue args, void* user_data, IScriptEnvironment* env) { + int plane = (int)reinterpret_cast(user_data); + return CmpPlaneSame(args[0], user_data, args[1].AsInt(1), plane, env); +} + +/* AVSValue ComparePlane::Create_next_y(AVSValue args, void* user_data, IScriptEnvironment* env) { return CmpPlaneSame(args[0], user_data, args[1].AsInt(1), PLANAR_Y, env); } @@ -275,34 +341,46 @@ AVSValue ComparePlane::Create_next_v(AVSValue args, void* user_data, IScriptEnvi AVSValue ComparePlane::Create_next_rgb(AVSValue args, void* user_data, IScriptEnvironment* env) { return CmpPlaneSame(args[0], user_data, args[1].AsInt(1), -1, env); } +*/ +template +static double get_sad_c(const BYTE* c_plane8, const BYTE* t_plane8, size_t height, size_t width, size_t c_pitch, size_t t_pitch) { + const pixel_t *c_plane = reinterpret_cast(c_plane8); + const pixel_t *t_plane = reinterpret_cast(t_plane8); + c_pitch /= sizeof(pixel_t); + t_pitch /= sizeof(pixel_t); + typedef typename std::conditional < sizeof(pixel_t) == 4, double, __int64>::type sum_t; + sum_t accum = 0; // int32 holds sum of maximum 16 Mpixels for 8 bit, and 65536 pixels for uint16_t pixels -static size_t get_sad_c(const BYTE* c_plane, const BYTE* tplane, size_t height, size_t width, size_t c_pitch, size_t t_pitch) { - size_t accum = 0; for (size_t y = 0; y < height; y++) { for (size_t x = 0; x < width; x++) { - accum += abs(tplane[x] - c_plane[x]); + accum += abs(t_plane[x] - c_plane[x]); } c_plane += c_pitch; - tplane += t_pitch; + t_plane += t_pitch; } - return accum; + return (double)accum; } -static size_t get_sad_rgb_c(const BYTE* c_plane, const BYTE* tplane, size_t height, size_t width, size_t c_pitch, size_t t_pitch) { - size_t accum = 0; +template +static double get_sad_rgb_c(const BYTE* c_plane8, const BYTE* t_plane8, size_t height, size_t width, size_t c_pitch, size_t t_pitch) { + const pixel_t *c_plane = reinterpret_cast(c_plane8); + const pixel_t *t_plane = reinterpret_cast(t_plane8); + c_pitch /= sizeof(pixel_t); + t_pitch /= sizeof(pixel_t); + __int64 accum = 0; // packed rgb: integer type only for (size_t y = 0; y < height; y++) { for (size_t x = 0; x < width; x+=4) { - accum += abs(tplane[x] - c_plane[x]); - accum += abs(tplane[x+1] - c_plane[x+1]); - accum += abs(tplane[x+2] - c_plane[x+2]); + accum += abs(t_plane[x] - c_plane[x]); + accum += abs(t_plane[x+1] - c_plane[x+1]); + accum += abs(t_plane[x+2] - c_plane[x+2]); } c_plane += c_pitch; - tplane += t_pitch; + t_plane += t_pitch; } - return accum; + return (double)accum; } @@ -431,11 +509,13 @@ AVSValue ComparePlane::CmpPlane(AVSValue clip, AVSValue clip2, void* user_data, PClip child2 = clip2.AsClip(); VideoInfo vi2 = child2->GetVideoInfo(); if (plane !=-1 ) { - if (!vi.IsPlanar()) - env->ThrowError("Plane Difference: Only planar images (as YV12) supported!"); - if (!vi2.IsPlanar()) - env->ThrowError("Plane Difference: Only planar images (as YV12) supported!"); + if (!vi.IsPlanar() || !vi2.IsPlanar()) + env->ThrowError("Plane Difference: Only planar YUV or planar RGB images supported!"); } else { + if(vi.IsPlanarRGB() || vi.IsPlanarRGBA()) + env->ThrowError("RGB Difference: Planar RGB is not supported here (clip 1)"); + if(vi2.IsPlanarRGB() || vi2.IsPlanarRGBA()) + env->ThrowError("RGB Difference: Planar RGB is not supported here (clip 2)"); if (!vi.IsRGB()) env->ThrowError("RGB Difference: RGB difference can only be tested on RGB images! (clip 1)"); if (!vi2.IsRGB()) @@ -443,7 +523,6 @@ AVSValue ComparePlane::CmpPlane(AVSValue clip, AVSValue clip2, void* user_data, plane = 0; } - AVSValue cn = env->GetVarDef("current_frame"); if (!cn.IsInt()) env->ThrowError("Plane Difference: This filter can only be used within run-time filters"); @@ -454,54 +533,75 @@ AVSValue ComparePlane::CmpPlane(AVSValue clip, AVSValue clip2, void* user_data, PVideoFrame src = child->GetFrame(n,env); PVideoFrame src2 = child2->GetFrame(n,env); + int pixelsize = vi.ComponentSize(); + const BYTE* srcp = src->GetReadPtr(plane); const BYTE* srcp2 = src2->GetReadPtr(plane); const int height = src->GetHeight(plane); - const int width = src->GetRowSize(plane); + const int width = src->GetRowSize(plane) / pixelsize; const int pitch = src->GetPitch(plane); const int height2 = src2->GetHeight(plane); - const int width2 = src2->GetRowSize(plane); + const int width2 = src2->GetRowSize(plane) / pixelsize; const int pitch2 = src2->GetPitch(plane); + if(vi.ComponentSize() != vi2.ComponentSize()) + env->ThrowError("Plane Difference: Bit-depth are not the same!"); + if (width == 0 || height == 0) - env->ThrowError("Plane Difference: No chroma planes in Y8!"); + env->ThrowError("Plane Difference: plane does not exist!"); if (height != height2 || width != width2) env->ThrowError("Plane Difference: Images are not the same size!"); - size_t sad = 0; - if (vi.IsRGB32()) { - if ((env->GetCPUFlags() & CPUF_SSE2) && IsPtrAligned(srcp, 16) && IsPtrAligned(srcp2, 16) && width >= 16) { + int total_pixels = width*height; + bool sum_in_32bits; + if (pixelsize == 4) + sum_in_32bits = false; + else // worst case + sum_in_32bits = ((__int64)total_pixels * (pixelsize == 1 ? 255 : 65535)) <= std::numeric_limits::max(); + + double sad = 0.0; + if (vi.IsRGB32() || vi.IsRGB64()) { + if ((pixelsize==1) && sum_in_32bits && (env->GetCPUFlags() & CPUF_SSE2) && IsPtrAligned(srcp, 16) && IsPtrAligned(srcp2, 16) && width >= 16) { sad = get_sad_rgb_sse2(srcp, srcp2, height, width, pitch, pitch2); } else #ifdef X86_32 - if ((env->GetCPUFlags() & CPUF_INTEGER_SSE) && width >= 8) { + if ((pixelsize==1) && sum_in_32bits && (env->GetCPUFlags() & CPUF_INTEGER_SSE) && width >= 8) { sad = get_sad_rgb_isse(srcp, srcp2, height, width, pitch, pitch2); } else #endif { - sad = get_sad_rgb_c(srcp, srcp2, height, width, pitch, pitch2); + if(pixelsize==1) + sad = get_sad_rgb_c(srcp, srcp2, height, width, pitch, pitch2); + else // pixelsize==2 + sad = get_sad_rgb_c(srcp, srcp2, height, width, pitch, pitch2); } } else { - if ((env->GetCPUFlags() & CPUF_SSE2) && IsPtrAligned(srcp, 16) && IsPtrAligned(srcp2, 16) && width >= 16) { + if ((pixelsize==1) && sum_in_32bits && (env->GetCPUFlags() & CPUF_SSE2) && IsPtrAligned(srcp, 16) && IsPtrAligned(srcp2, 16) && width >= 16) { sad = get_sad_sse2(srcp, srcp2, height, width, pitch, pitch2); } else #ifdef X86_32 - if ((env->GetCPUFlags() & CPUF_INTEGER_SSE) && width >= 8) { + if ((pixelsize==1) && sum_in_32bits && (env->GetCPUFlags() & CPUF_INTEGER_SSE) && width >= 8) { sad = get_sad_isse(srcp, srcp2, height, width, pitch, pitch2); } else #endif { - sad = get_sad_c(srcp, srcp2, height, width, pitch, pitch2); + if(pixelsize==1) + sad = get_sad_c(srcp, srcp2, height, width, pitch, pitch2); + else if(pixelsize==2) + sad = get_sad_c(srcp, srcp2, height, width, pitch, pitch2); + else // pixelsize==4 + sad = get_sad_c(srcp, srcp2, height, width, pitch, pitch2); + } } float f; - if (vi.IsRGB32()) - f = (float)((double)(sad * 4) / (height * width * 3)); + if (vi.IsRGB32() || vi.IsRGB64()) + f = (float)((sad * 4) / (height * width * 3)); else - f = (float)((double)sad / (height * width)); + f = (float)(sad / (height * width)); return (AVSValue)f; } @@ -512,17 +612,15 @@ AVSValue ComparePlane::CmpPlaneSame(AVSValue clip, void* user_data, int offset, if (!clip.IsClip()) env->ThrowError("Plane Difference: No clip supplied!"); - bool ISSE = !!(env->GetCPUFlags() & CPUF_INTEGER_SSE); - PClip child = clip.AsClip(); VideoInfo vi = child->GetVideoInfo(); if (plane ==-1 ) { - if (!vi.IsRGB()) - env->ThrowError("RGB Difference: RGB difference can only be calculated on RGB images"); + if (!vi.IsRGB() || vi.IsPlanarRGB() || vi.IsPlanarRGBA()) + env->ThrowError("RGB Difference: RGB difference can only be calculated on packed RGB images"); plane = 0; } else { if (!vi.IsPlanar()) - env->ThrowError("Plane Difference: Only planar images (as YV12) supported!"); + env->ThrowError("Plane Difference: Only planar YUV or planar RGB images images supported!"); } AVSValue cn = env->GetVarDef("current_frame"); @@ -536,54 +634,92 @@ AVSValue ComparePlane::CmpPlaneSame(AVSValue clip, void* user_data, int offset, PVideoFrame src = child->GetFrame(n,env); PVideoFrame src2 = child->GetFrame(n2,env); + int pixelsize = vi.ComponentSize(); + const BYTE* srcp = src->GetReadPtr(plane); const BYTE* srcp2 = src2->GetReadPtr(plane); int height = src->GetHeight(plane); - int width = src->GetRowSize(plane); + int width = src->GetRowSize(plane) / pixelsize; int pitch = src->GetPitch(plane); int pitch2 = src2->GetPitch(plane); if (width == 0 || height == 0) env->ThrowError("Plane Difference: No chroma planes in Y8!"); - size_t sad = 0; - if (vi.IsRGB32()) { - if ((env->GetCPUFlags() & CPUF_SSE2) && IsPtrAligned(srcp, 16) && IsPtrAligned(srcp2, 16) && width >= 16) { + int total_pixels = width*height; + bool sum_in_32bits; + if (pixelsize == 4) + sum_in_32bits = false; + else // worst case + sum_in_32bits = ((__int64)total_pixels * (pixelsize == 1 ? 255 : 65535)) <= std::numeric_limits::max(); + + double sad = 0; + if (vi.IsRGB32() || vi.IsRGB64()) { + if ((pixelsize==1) && sum_in_32bits && (env->GetCPUFlags() & CPUF_SSE2) && IsPtrAligned(srcp, 16) && IsPtrAligned(srcp2, 16) && width >= 16) { sad = get_sad_rgb_sse2(srcp, srcp2, height, width, pitch, pitch2); } else #ifdef X86_32 - if ((env->GetCPUFlags() & CPUF_INTEGER_SSE) && width >= 8) { + if ((pixelsize==1) && sum_in_32bits && (env->GetCPUFlags() & CPUF_INTEGER_SSE) && width >= 8) { sad = get_sad_rgb_isse(srcp, srcp2, height, width, pitch, pitch2); } else #endif { - sad = get_sad_rgb_c(srcp, srcp2, height, width, pitch, pitch2); + if(pixelsize==1) + sad = get_sad_rgb_c(srcp, srcp2, height, width, pitch, pitch2); + else + sad = get_sad_rgb_c(srcp, srcp2, height, width, pitch, pitch2); } } else { - if ((env->GetCPUFlags() & CPUF_SSE2) && IsPtrAligned(srcp, 16) && IsPtrAligned(srcp2, 16) && width >= 16) { + if ((pixelsize==1) && sum_in_32bits && (env->GetCPUFlags() & CPUF_SSE2) && IsPtrAligned(srcp, 16) && IsPtrAligned(srcp2, 16) && width >= 16) { sad = get_sad_sse2(srcp, srcp2, height, width, pitch, pitch2); } else #ifdef X86_32 - if ((env->GetCPUFlags() & CPUF_INTEGER_SSE) && width >= 8) { + if ((pixelsize==1) && sum_in_32bits && (env->GetCPUFlags() & CPUF_INTEGER_SSE) && width >= 8) { sad = get_sad_isse(srcp, srcp2, height, width, pitch, pitch2); } else #endif { - sad = get_sad_c(srcp, srcp2, height, width, pitch, pitch2); + if(pixelsize==1) + sad = get_sad_c(srcp, srcp2, height, width, pitch, pitch2); + else if (pixelsize==2) + sad = get_sad_c(srcp, srcp2, height, width, pitch, pitch2); + else // pixelsize==4 + sad = get_sad_c(srcp, srcp2, height, width, pitch, pitch2); } } float f; - if (vi.IsRGB32()) - f = (float)((double)(sad * 4) / (height * width * 3)); + if (vi.IsRGB32() || vi.IsRGB64()) + f = (float)((sad * 4) / (height * width * 3)); else - f = (float)((double)sad / (height * width)); + f = (float)(sad / (height * width)); return (AVSValue)f; } +AVSValue MinMaxPlane::Create_max(AVSValue args, void* user_data, IScriptEnvironment* env) { + int plane = (int)reinterpret_cast(user_data); + return MinMax(args[0], user_data, args[1].AsDblDef(0.0), args[2].AsInt(0), plane, MAX, env); +} + +AVSValue MinMaxPlane::Create_min(AVSValue args, void* user_data, IScriptEnvironment* env) { + int plane = (int)reinterpret_cast(user_data); + return MinMax(args[0], user_data, args[1].AsDblDef(0.0), args[2].AsInt(0), plane, MIN, env); +} + +AVSValue MinMaxPlane::Create_median(AVSValue args, void* user_data, IScriptEnvironment* env) { + int plane = (int)reinterpret_cast(user_data); + return MinMax(args[0], user_data, 50.0, args[1].AsInt(0), plane, MIN, env); +} + +AVSValue MinMaxPlane::Create_minmax(AVSValue args, void* user_data, IScriptEnvironment* env) { + int plane = (int)reinterpret_cast(user_data); + return MinMax(args[0], user_data, args[1].AsDblDef(0.0), args[2].AsInt(0), plane, MINMAX_DIFFERENCE, env); +} + +/* // Y Planes functions AVSValue MinMaxPlane::Create_max_y(AVSValue args, void* user_data, IScriptEnvironment* env) { @@ -636,10 +772,9 @@ AVSValue MinMaxPlane::Create_median_v(AVSValue args, void* user_data, IScriptEnv AVSValue MinMaxPlane::Create_minmax_v(AVSValue args, void* user_data, IScriptEnvironment* env) { return MinMax(args[0], user_data, args[1].AsDblDef(0.0), args[2].AsInt(0), PLANAR_V, MINMAX_DIFFERENCE, env); } - +*/ AVSValue MinMaxPlane::MinMax(AVSValue clip, void* user_data, double threshold, int offset, int plane, int mode, IScriptEnvironment* env) { - unsigned int accum[256]; if (!clip.IsClip()) env->ThrowError("MinMax: No clip supplied!"); @@ -650,6 +785,10 @@ AVSValue MinMaxPlane::MinMax(AVSValue clip, void* user_data, double threshold, i if (!vi.IsPlanar()) env->ThrowError("MinMax: Image must be planar"); + int pixelsize = vi.ComponentSize(); + int buffersize = pixelsize == 1 ? 256 : 65536; // 65536 for float, too + uint32_t *accum_buf = new uint32_t[buffersize]; + // Get current frame number AVSValue cn = env->GetVarDef("current_frame"); if (!cn.IsInt()) @@ -663,23 +802,39 @@ AVSValue MinMaxPlane::MinMax(AVSValue clip, void* user_data, double threshold, i const BYTE* srcp = src->GetReadPtr(plane); int pitch = src->GetPitch(plane); - int w = src->GetRowSize(plane); + int w = src->GetRowSize(plane) / pixelsize; int h = src->GetHeight(plane); if (w == 0 || h == 0) - env->ThrowError("MinMax: No chroma planes in Y8!"); + env->ThrowError("MinMax: plane does not exist!"); // Reset accumulators - for (int i=0;i<256;i++) { - accum[i]=0; - } + std::fill_n(accum_buf, buffersize, 0); - // Count each component. - for (int y=0;y(srcp)[x]]++; + } + srcp+=pitch; + } + } else { //pixelsize==4 float + // for float results are always checked with 16 bit precision only + for (int y=0;y(srcp)[x] * 65535.0f), 0, 65535)]++; + } + srcp+=pitch; } - srcp+=pitch; } int pixels = w*h; @@ -691,18 +846,18 @@ AVSValue MinMaxPlane::MinMax(AVSValue clip, void* user_data, double threshold, i // Find the value we need. if (mode == MIN) { unsigned int counted=0; - for (int i = 0; i< 256;i++) { - counted += accum[i]; + for (int i = 0; i< buffersize;i++) { + counted += accum_buf[i]; if (counted>tpixels) return AVSValue(i); } - return AVSValue(255); + return AVSValue(buffersize-1); } if (mode == MAX) { unsigned int counted=0; - for (int i = 255; i>=0;i--) { - counted += accum[i]; + for (int i = buffersize-1; i>=0;i--) { + counted += accum_buf[i]; if (counted>tpixels) return AVSValue(i); } @@ -713,8 +868,8 @@ AVSValue MinMaxPlane::MinMax(AVSValue clip, void* user_data, double threshold, i unsigned int counted=0; int i, t_min = 0; // Find min - for (i = 0; i < 256;i++) { - counted += accum[i]; + for (i = 0; i < buffersize;i++) { + counted += accum_buf[i]; if (counted>tpixels) { t_min=i; break; @@ -723,16 +878,20 @@ AVSValue MinMaxPlane::MinMax(AVSValue clip, void* user_data, double threshold, i // Find max counted=0; - int t_max = 255; - for (i = 255; i>=0;i--) { - counted += accum[i]; + int t_max = buffersize-1; + for (i = buffersize-1; i>=0;i--) { + counted += accum_buf[i]; if (counted>tpixels) { t_max=i; break; } } + delete[] accum_buf; + return AVSValue(t_max-t_min); // results <0 will be returned if threshold > 50 } + + delete[] accum_buf; return AVSValue(-1); } diff --git a/avs_core/filters/conditional/conditional_functions.h b/avs_core/filters/conditional/conditional_functions.h index 596ba8cec..174a81fe9 100644 --- a/avs_core/filters/conditional/conditional_functions.h +++ b/avs_core/filters/conditional/conditional_functions.h @@ -39,9 +39,12 @@ class AveragePlane { public: + static AVSValue Create(AVSValue args, void* user_data, IScriptEnvironment* env); + /* static AVSValue Create_y(AVSValue args, void* user_data, IScriptEnvironment* env); static AVSValue Create_u(AVSValue args, void* user_data, IScriptEnvironment* env); static AVSValue Create_v(AVSValue args, void* user_data, IScriptEnvironment* env); + */ static AVSValue AvgPlane(AVSValue clip, void* user_data, int plane, int offset, IScriptEnvironment* env); }; @@ -51,18 +54,27 @@ class ComparePlane { static AVSValue CmpPlane(AVSValue clip, AVSValue clip2, void* user_data, int plane, IScriptEnvironment* env); static AVSValue CmpPlaneSame(AVSValue clip, void* user_data, int offset, int plane, IScriptEnvironment* env); + static AVSValue Create(AVSValue args, void* user_data, IScriptEnvironment* env); + /* static AVSValue Create_y(AVSValue args, void* user_data, IScriptEnvironment* env); static AVSValue Create_u(AVSValue args, void* user_data, IScriptEnvironment* env); static AVSValue Create_v(AVSValue args, void* user_data, IScriptEnvironment* env); static AVSValue Create_rgb(AVSValue args, void* user_data, IScriptEnvironment* env); + */ + static AVSValue Create_prev(AVSValue args, void* user_data, IScriptEnvironment* env); + /* static AVSValue Create_prev_y(AVSValue args, void* user_data, IScriptEnvironment* env); static AVSValue Create_prev_u(AVSValue args, void* user_data, IScriptEnvironment* env); static AVSValue Create_prev_v(AVSValue args, void* user_data, IScriptEnvironment* env); static AVSValue Create_prev_rgb(AVSValue args, void* user_data, IScriptEnvironment* env); + */ + static AVSValue Create_next(AVSValue args, void* user_data, IScriptEnvironment* env); + /* static AVSValue Create_next_y(AVSValue args, void* user_data, IScriptEnvironment* env); static AVSValue Create_next_u(AVSValue args, void* user_data, IScriptEnvironment* env); static AVSValue Create_next_v(AVSValue args, void* user_data, IScriptEnvironment* env); static AVSValue Create_next_rgb(AVSValue args, void* user_data, IScriptEnvironment* env); + */ }; @@ -70,6 +82,13 @@ class MinMaxPlane { public: static AVSValue MinMax(AVSValue clip, void* user_data, double threshold, int offset, int plane, int mode, IScriptEnvironment* env); + + static AVSValue Create_max(AVSValue args, void* user_data, IScriptEnvironment* env); + static AVSValue Create_min(AVSValue args, void* user_data, IScriptEnvironment* env); + static AVSValue Create_median(AVSValue args, void* user_data, IScriptEnvironment* env); + static AVSValue Create_minmax(AVSValue args, void* user_data, IScriptEnvironment* env); + + /* static AVSValue Create_max_y(AVSValue args, void* user_data, IScriptEnvironment* env); static AVSValue Create_min_y(AVSValue args, void* user_data, IScriptEnvironment* env); static AVSValue Create_median_y(AVSValue args, void* user_data, IScriptEnvironment* env); @@ -84,7 +103,7 @@ class MinMaxPlane { static AVSValue Create_min_v(AVSValue args, void* user_data, IScriptEnvironment* env); static AVSValue Create_median_v(AVSValue args, void* user_data, IScriptEnvironment* env); static AVSValue Create_minmax_v(AVSValue args, void* user_data, IScriptEnvironment* env); - + */ private: enum { MIN = 1, MAX = 2, MEDIAN = 3, MINMAX_DIFFERENCE = 4 }; From 3c6a1cd65a15818ffafe7619e5a3ec132043f837 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Fri, 2 Sep 2016 16:25:42 +0200 Subject: [PATCH 034/120] Conditional_functions: Remove refactored creates --- .../conditional/conditional_functions.cpp | 126 ------------------ .../conditional/conditional_functions.h | 40 ------ 2 files changed, 166 deletions(-) diff --git a/avs_core/filters/conditional/conditional_functions.cpp b/avs_core/filters/conditional/conditional_functions.cpp index da52052ab..880f55181 100644 --- a/avs_core/filters/conditional/conditional_functions.cpp +++ b/avs_core/filters/conditional/conditional_functions.cpp @@ -124,21 +124,6 @@ AVSValue AveragePlane::Create(AVSValue args, void* user_data, IScriptEnvironment int plane = (int)reinterpret_cast(user_data); return AvgPlane(args[0], user_data, plane, args[1].AsInt(0), env); } -/* -AVSValue AveragePlane::Create_y(AVSValue args, void* user_data, IScriptEnvironment* env) { - return AvgPlane(args[0], user_data, PLANAR_Y, args[1].AsInt(0), env); -} - - -AVSValue AveragePlane::Create_u(AVSValue args, void* user_data, IScriptEnvironment* env) { - return AvgPlane(args[0], user_data, PLANAR_U, args[1].AsInt(0), env); -} - - -AVSValue AveragePlane::Create_v(AVSValue args, void* user_data, IScriptEnvironment* env) { - return AvgPlane(args[0], user_data, PLANAR_V, args[1].AsInt(0), env); -} -*/ // Average plane template @@ -276,73 +261,16 @@ AVSValue ComparePlane::Create(AVSValue args, void* user_data, IScriptEnvironment return CmpPlane(args[0],args[1], user_data, plane, env); } -/* -AVSValue ComparePlane::Create_y(AVSValue args, void* user_data, IScriptEnvironment* env) { - return CmpPlane(args[0],args[1], user_data, PLANAR_Y, env); -} - - -AVSValue ComparePlane::Create_u(AVSValue args, void* user_data, IScriptEnvironment* env) { - return CmpPlane(args[0],args[1], user_data, PLANAR_U, env); -} - - -AVSValue ComparePlane::Create_v(AVSValue args, void* user_data, IScriptEnvironment* env) { - return CmpPlane(args[0],args[1], user_data, PLANAR_V, env); -} - -AVSValue ComparePlane::Create_rgb(AVSValue args, void* user_data, IScriptEnvironment* env) { - return CmpPlane(args[0],args[1], user_data, -1 , env); -} -*/ - AVSValue ComparePlane::Create_prev(AVSValue args, void* user_data, IScriptEnvironment* env) { int plane = (int)reinterpret_cast(user_data); return CmpPlaneSame(args[0], user_data, -1, plane, env); } -/* -AVSValue ComparePlane::Create_prev_y(AVSValue args, void* user_data, IScriptEnvironment* env) { - return CmpPlaneSame(args[0], user_data, -1, PLANAR_Y, env); -} - -AVSValue ComparePlane::Create_prev_u(AVSValue args, void* user_data, IScriptEnvironment* env) { - return CmpPlaneSame(args[0], user_data, -1, PLANAR_U, env); -} - -AVSValue ComparePlane::Create_prev_v(AVSValue args, void* user_data, IScriptEnvironment* env) { - return CmpPlaneSame(args[0], user_data, -1, PLANAR_V, env); -} - -AVSValue ComparePlane::Create_prev_rgb(AVSValue args, void* user_data, IScriptEnvironment* env) { - return CmpPlaneSame(args[0], user_data, -1, -1, env); -} -*/ - - AVSValue ComparePlane::Create_next(AVSValue args, void* user_data, IScriptEnvironment* env) { int plane = (int)reinterpret_cast(user_data); return CmpPlaneSame(args[0], user_data, args[1].AsInt(1), plane, env); } -/* -AVSValue ComparePlane::Create_next_y(AVSValue args, void* user_data, IScriptEnvironment* env) { - return CmpPlaneSame(args[0], user_data, args[1].AsInt(1), PLANAR_Y, env); -} - -AVSValue ComparePlane::Create_next_u(AVSValue args, void* user_data, IScriptEnvironment* env) { - return CmpPlaneSame(args[0], user_data, args[1].AsInt(1), PLANAR_U, env); -} - -AVSValue ComparePlane::Create_next_v(AVSValue args, void* user_data, IScriptEnvironment* env) { - return CmpPlaneSame(args[0], user_data, args[1].AsInt(1), PLANAR_V, env); -} - -AVSValue ComparePlane::Create_next_rgb(AVSValue args, void* user_data, IScriptEnvironment* env) { - return CmpPlaneSame(args[0], user_data, args[1].AsInt(1), -1, env); -} -*/ - template static double get_sad_c(const BYTE* c_plane8, const BYTE* t_plane8, size_t height, size_t width, size_t c_pitch, size_t t_pitch) { @@ -719,60 +647,6 @@ AVSValue MinMaxPlane::Create_minmax(AVSValue args, void* user_data, IScriptEnvir return MinMax(args[0], user_data, args[1].AsDblDef(0.0), args[2].AsInt(0), plane, MINMAX_DIFFERENCE, env); } -/* -// Y Planes functions - -AVSValue MinMaxPlane::Create_max_y(AVSValue args, void* user_data, IScriptEnvironment* env) { - return MinMax(args[0], user_data, args[1].AsDblDef(0.0), args[2].AsInt(0), PLANAR_Y, MAX, env); -} - -AVSValue MinMaxPlane::Create_min_y(AVSValue args, void* user_data, IScriptEnvironment* env) { - return MinMax(args[0], user_data, args[1].AsDblDef(0.0), args[2].AsInt(0), PLANAR_Y, MIN, env); -} - -AVSValue MinMaxPlane::Create_median_y(AVSValue args, void* user_data, IScriptEnvironment* env) { - return MinMax(args[0], user_data, 50.0, args[1].AsInt(0), PLANAR_Y, MIN, env); -} - -AVSValue MinMaxPlane::Create_minmax_y(AVSValue args, void* user_data, IScriptEnvironment* env) { - return MinMax(args[0], user_data, args[1].AsDblDef(0.0), args[2].AsInt(0), PLANAR_Y, MINMAX_DIFFERENCE, env); -} - -// U Planes functions - -AVSValue MinMaxPlane::Create_max_u(AVSValue args, void* user_data, IScriptEnvironment* env) { - return MinMax(args[0], user_data, args[1].AsDblDef(0.0), args[2].AsInt(0), PLANAR_U, MAX, env); -} - -AVSValue MinMaxPlane::Create_min_u(AVSValue args, void* user_data, IScriptEnvironment* env) { - return MinMax(args[0], user_data, args[1].AsDblDef(0.0), args[2].AsInt(0), PLANAR_U, MIN, env); -} - -AVSValue MinMaxPlane::Create_median_u(AVSValue args, void* user_data, IScriptEnvironment* env) { - return MinMax(args[0], user_data, 50.0, args[1].AsInt(0), PLANAR_U, MIN, env); -} - -AVSValue MinMaxPlane::Create_minmax_u(AVSValue args, void* user_data, IScriptEnvironment* env) { - return MinMax(args[0], user_data, args[1].AsDblDef(0.0), args[2].AsInt(0), PLANAR_U, MINMAX_DIFFERENCE, env); -} -// V Planes functions - -AVSValue MinMaxPlane::Create_max_v(AVSValue args, void* user_data, IScriptEnvironment* env) { - return MinMax(args[0], user_data, args[1].AsDblDef(0.0), args[2].AsInt(0), PLANAR_V, MAX, env); -} - -AVSValue MinMaxPlane::Create_min_v(AVSValue args, void* user_data, IScriptEnvironment* env) { - return MinMax(args[0], user_data, args[1].AsDblDef(0.0), args[2].AsInt(0), PLANAR_V, MIN, env); -} - -AVSValue MinMaxPlane::Create_median_v(AVSValue args, void* user_data, IScriptEnvironment* env) { - return MinMax(args[0], user_data, 50.0, args[1].AsInt(0), PLANAR_V, MIN, env); -} - -AVSValue MinMaxPlane::Create_minmax_v(AVSValue args, void* user_data, IScriptEnvironment* env) { - return MinMax(args[0], user_data, args[1].AsDblDef(0.0), args[2].AsInt(0), PLANAR_V, MINMAX_DIFFERENCE, env); -} -*/ AVSValue MinMaxPlane::MinMax(AVSValue clip, void* user_data, double threshold, int offset, int plane, int mode, IScriptEnvironment* env) { diff --git a/avs_core/filters/conditional/conditional_functions.h b/avs_core/filters/conditional/conditional_functions.h index 174a81fe9..cb5ffc78a 100644 --- a/avs_core/filters/conditional/conditional_functions.h +++ b/avs_core/filters/conditional/conditional_functions.h @@ -40,11 +40,6 @@ class AveragePlane { public: static AVSValue Create(AVSValue args, void* user_data, IScriptEnvironment* env); - /* - static AVSValue Create_y(AVSValue args, void* user_data, IScriptEnvironment* env); - static AVSValue Create_u(AVSValue args, void* user_data, IScriptEnvironment* env); - static AVSValue Create_v(AVSValue args, void* user_data, IScriptEnvironment* env); - */ static AVSValue AvgPlane(AVSValue clip, void* user_data, int plane, int offset, IScriptEnvironment* env); }; @@ -55,26 +50,8 @@ class ComparePlane { static AVSValue CmpPlaneSame(AVSValue clip, void* user_data, int offset, int plane, IScriptEnvironment* env); static AVSValue Create(AVSValue args, void* user_data, IScriptEnvironment* env); - /* - static AVSValue Create_y(AVSValue args, void* user_data, IScriptEnvironment* env); - static AVSValue Create_u(AVSValue args, void* user_data, IScriptEnvironment* env); - static AVSValue Create_v(AVSValue args, void* user_data, IScriptEnvironment* env); - static AVSValue Create_rgb(AVSValue args, void* user_data, IScriptEnvironment* env); - */ static AVSValue Create_prev(AVSValue args, void* user_data, IScriptEnvironment* env); - /* - static AVSValue Create_prev_y(AVSValue args, void* user_data, IScriptEnvironment* env); - static AVSValue Create_prev_u(AVSValue args, void* user_data, IScriptEnvironment* env); - static AVSValue Create_prev_v(AVSValue args, void* user_data, IScriptEnvironment* env); - static AVSValue Create_prev_rgb(AVSValue args, void* user_data, IScriptEnvironment* env); - */ static AVSValue Create_next(AVSValue args, void* user_data, IScriptEnvironment* env); - /* - static AVSValue Create_next_y(AVSValue args, void* user_data, IScriptEnvironment* env); - static AVSValue Create_next_u(AVSValue args, void* user_data, IScriptEnvironment* env); - static AVSValue Create_next_v(AVSValue args, void* user_data, IScriptEnvironment* env); - static AVSValue Create_next_rgb(AVSValue args, void* user_data, IScriptEnvironment* env); - */ }; @@ -88,23 +65,6 @@ class MinMaxPlane { static AVSValue Create_median(AVSValue args, void* user_data, IScriptEnvironment* env); static AVSValue Create_minmax(AVSValue args, void* user_data, IScriptEnvironment* env); - /* - static AVSValue Create_max_y(AVSValue args, void* user_data, IScriptEnvironment* env); - static AVSValue Create_min_y(AVSValue args, void* user_data, IScriptEnvironment* env); - static AVSValue Create_median_y(AVSValue args, void* user_data, IScriptEnvironment* env); - static AVSValue Create_minmax_y(AVSValue args, void* user_data, IScriptEnvironment* env); - - static AVSValue Create_max_u(AVSValue args, void* user_data, IScriptEnvironment* env); - static AVSValue Create_min_u(AVSValue args, void* user_data, IScriptEnvironment* env); - static AVSValue Create_median_u(AVSValue args, void* user_data, IScriptEnvironment* env); - static AVSValue Create_minmax_u(AVSValue args, void* user_data, IScriptEnvironment* env); - - static AVSValue Create_max_v(AVSValue args, void* user_data, IScriptEnvironment* env); - static AVSValue Create_min_v(AVSValue args, void* user_data, IScriptEnvironment* env); - static AVSValue Create_median_v(AVSValue args, void* user_data, IScriptEnvironment* env); - static AVSValue Create_minmax_v(AVSValue args, void* user_data, IScriptEnvironment* env); - */ - private: enum { MIN = 1, MAX = 2, MEDIAN = 3, MINMAX_DIFFERENCE = 4 }; From 79cb2cd52954c2a74941c20c43e82e2129e0ee19 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Fri, 2 Sep 2016 16:33:43 +0200 Subject: [PATCH 035/120] conditional_functions: return from one common place --- .../conditional/conditional_functions.cpp | 37 ++++++++++--------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/avs_core/filters/conditional/conditional_functions.cpp b/avs_core/filters/conditional/conditional_functions.cpp index 880f55181..bfdb1008d 100644 --- a/avs_core/filters/conditional/conditional_functions.cpp +++ b/avs_core/filters/conditional/conditional_functions.cpp @@ -717,28 +717,30 @@ AVSValue MinMaxPlane::MinMax(AVSValue clip, void* user_data, double threshold, i unsigned int tpixels = (unsigned int)(pixels*threshold); - // Find the value we need. + int retval; + + // Find the value we need. if (mode == MIN) { unsigned int counted=0; + retval = buffersize - 1; for (int i = 0; i< buffersize;i++) { counted += accum_buf[i]; - if (counted>tpixels) - return AVSValue(i); + if (counted>tpixels) { + retval = i; + break; + } } - return AVSValue(buffersize-1); - } - - if (mode == MAX) { + } else if (mode == MAX) { unsigned int counted=0; + retval = 0; for (int i = buffersize-1; i>=0;i--) { counted += accum_buf[i]; - if (counted>tpixels) - return AVSValue(i); + if (counted>tpixels) { + retval = i; + break; + } } - return AVSValue(0); - } - - if (mode == MINMAX_DIFFERENCE) { + } else if (mode == MINMAX_DIFFERENCE) { unsigned int counted=0; int i, t_min = 0; // Find min @@ -761,11 +763,12 @@ AVSValue MinMaxPlane::MinMax(AVSValue clip, void* user_data, double threshold, i } } - delete[] accum_buf; - - return AVSValue(t_max-t_min); // results <0 will be returned if threshold > 50 + retval = t_max - t_min; // results <0 will be returned if threshold > 50 + } + else { + retval = -1; } delete[] accum_buf; - return AVSValue(-1); + return AVSValue(retval); } From d97c5eee6bba3a233e5e3ed4a5d4802bea79cc71 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Sun, 4 Sep 2016 18:24:48 +0200 Subject: [PATCH 036/120] Levels: 10-16 bit support for YUV(A), PlanarRGB(A), 16 bits for RGB48/64 --- avs_core/filters/levels.cpp | 363 ++++++++++++++++++++++++++++++------ avs_core/filters/levels.h | 4 + 2 files changed, 308 insertions(+), 59 deletions(-) diff --git a/avs_core/filters/levels.cpp b/avs_core/filters/levels.cpp index 8a801b869..a0f63bfbe 100644 --- a/avs_core/filters/levels.cpp +++ b/avs_core/filters/levels.cpp @@ -41,6 +41,7 @@ #include #include "../core/internal.h" #include +#include #define PI 3.141592653589793 @@ -130,34 +131,73 @@ Levels::Levels(PClip _child, int in_min, double gamma, int in_max, int out_min, { if (gamma <= 0.0) env->ThrowError("Levels: gamma must be positive"); - + gamma = 1/gamma; - int divisor = in_max - in_min + (in_max == in_min); + int divisor; + if (in_min == in_max) + divisor = 1; + else + divisor = in_max - in_min; + int scale = 1; double bias = 0.0; + pixelsize = vi.ComponentSize(); + bits_per_pixel = vi.BitsPerComponent(); // 8,10..16 + + if (pixelsize == 4) + env->ThrowError("Levels: cannot operate on float video formats"); + // No lookup for float. todo: slow on-the-fly realtime calculation + + int lookup_size = 1 << bits_per_pixel; // 256, 1024, 4096, 16384, 65536 + int real_lookup_size = (pixelsize == 1) ? 256 : 65536; // avoids lut overflow in case of non-standard content of a 10 bit clip + int pixel_max = lookup_size - 1; + + use_lut = bits_per_pixel != 32; // for float: realtime (todo) + + if (!use_lut) + dither = false; + + int tv_range_low = 16 << (bits_per_pixel - 8); // 16 + int tv_range_hi_luma = ((235+1) << (bits_per_pixel - 8)) - 1; // 16-235 + int range_luma = tv_range_hi_luma - tv_range_low; // 219 + + int tv_range_hi_chroma = ((240+1) << (bits_per_pixel - 8)) - 1; // 16-240,64–963, 256–3855,... 4096-61695 + int range_chroma = tv_range_hi_chroma - tv_range_low; // 224 + + int middle_chroma = 1 << (bits_per_pixel - 1); // 128 + if (dither) { - scale = 256; + // lut scale settings + scale = 256; // lower 256 is dither value divisor *= 256; in_min *= 256; - bias = -127.5; + bias = -((1 << bits_per_pixel) - 1) / 2; // -127.5 for 8 bit, scaling because of dithershift } - auto env2 = static_cast(env); - size_t num_map = vi.IsYUV() ? 2 : 1; - map = static_cast(env2->Allocate(256 * scale * num_map, 8, AVS_NORMAL_ALLOC)); - if (!map) - env->ThrowError("Levels: Could not reserve memory."); - env->AtExit(free_buffer, map); + // one buffer for map and mapchroma + map = nullptr; + if(use_lut) { + auto env2 = static_cast(env); + size_t number_of_maps = vi.IsYUV() || vi.IsYUVA() ? 2 : 1; + int bufsize = pixelsize * real_lookup_size * scale * number_of_maps; + map = static_cast(env2->Allocate(bufsize , 16, AVS_NORMAL_ALLOC)); + if (!map) + env->ThrowError("Levels: Could not reserve memory."); + env->AtExit(free_buffer, map); + if(bits_per_pixel>=10 && bits_per_pixel<=14) + std::fill_n(map, bufsize, 0); // 8 and 16 bit fully overwrites + } - if (vi.IsYUV()) + if (vi.IsYUV() || vi.IsYUVA()) { - mapchroma = map + 256 * scale; + mapchroma = map + pixelsize * real_lookup_size * scale; - for (int i = 0; i<256*scale; ++i) { + for (int i = 0; i(map)[i] = (uint16_t)luma; + reinterpret_cast(mapchroma)[i] = (uint16_t)chroma; + } } } else if (vi.IsRGB()) { - for (int i = 0; i<256*scale; ++i) { - double p = (bias + i - in_min) / divisor; + // no coring option here + // lookup for packed and planar RGBs + for (int i = 0; i(map)[i] = (uint16_t)p; } } } @@ -213,27 +306,88 @@ PVideoFrame __stdcall Levels::GetFrame(int n, IScriptEnvironment* env) p += pitch; } } else if (vi.IsPlanar()) { - for (int y = 0; y(p)[x] = reinterpret_cast(map)[reinterpret_cast(p)[x]<<8 | ditherMap[(x&0x0f)|_y]]; + } + p += pitch; + } + } + const int UVpitch = frame->GetPitch(PLANAR_U); + const int w = frame->GetRowSize(PLANAR_U) / pixelsize; + const int h = frame->GetHeight(PLANAR_U); + p = frame->GetWritePtr(PLANAR_U); + BYTE* q = frame->GetWritePtr(PLANAR_V); + if(pixelsize==1) { + for (int y = 0; y(p)[x] = reinterpret_cast(mapchroma)[reinterpret_cast(p)[x]<<8 | _dither]; + reinterpret_cast(q)[x] = reinterpret_cast(mapchroma)[reinterpret_cast(q)[x]<<8 | _dither]; + } + p += UVpitch; + q += UVpitch; + } } - p += pitch; } - const int UVpitch = frame->GetPitch(PLANAR_U); - const int w = frame->GetRowSize(PLANAR_U); - const int h = frame->GetHeight(PLANAR_U); - p = frame->GetWritePtr(PLANAR_U); - BYTE* q = frame->GetWritePtr(PLANAR_V); - for (int y = 0; yGetWritePtr(PLANAR_B); + BYTE* r = frame->GetWritePtr(PLANAR_R); + const int pitch_b = frame->GetPitch(PLANAR_B); + const int pitch_r = frame->GetPitch(PLANAR_R); + if(pixelsize==1) { + for (int y = 0; y(p)[x] = reinterpret_cast(map)[reinterpret_cast(p)[x]<<8 | ditherMap[(x&0x0f)|_y]]; + reinterpret_cast(b)[x] = reinterpret_cast(map)[reinterpret_cast(b)[x]<<8 | ditherMap[(x&0x0f)|_y]]; + reinterpret_cast(r)[x] = reinterpret_cast(map)[reinterpret_cast(r)[x]<<8 | ditherMap[(x&0x0f)|_y]]; + } + p += pitch; + b += pitch_b; + r += pitch_r; + } } - p += UVpitch; - q += UVpitch; } } else if (vi.IsRGB32()) { for (int y = 0; y(p)[x*4+0] = reinterpret_cast(map)[reinterpret_cast(p)[x*4+0]<<8 | _dither]; + reinterpret_cast(p)[x*4+1] = reinterpret_cast(map)[reinterpret_cast(p)[x*4+1]<<8 | _dither]; + reinterpret_cast(p)[x*4+2] = reinterpret_cast(map)[reinterpret_cast(p)[x*4+2]<<8 | _dither]; + reinterpret_cast(p)[x*4+3] = reinterpret_cast(map)[reinterpret_cast(p)[x*4+3]<<8 | _dither]; + } + p += pitch; + } + } else if (vi.IsRGB48()) { + for (int y = 0; y(p)[x*3+0] = reinterpret_cast(map)[reinterpret_cast(p)[x*3+0]<<8 | _dither]; + reinterpret_cast(p)[x*3+1] = reinterpret_cast(map)[reinterpret_cast(p)[x*3+1]<<8 | _dither]; + reinterpret_cast(p)[x*3+2] = reinterpret_cast(map)[reinterpret_cast(p)[x*3+2]<<8 | _dither]; + } + p += pitch; + } } - } else { + } else { // no dithering if (vi.IsYUY2()) { for (int y = 0; y(p)[x] = reinterpret_cast(map)[reinterpret_cast(p)[x]]; + } + p += pitch; + } } - p += pitch; - } - const int UVpitch = frame->GetPitch(PLANAR_U); - p = frame->GetWritePtr(PLANAR_U); - const int w = frame->GetRowSize(PLANAR_U); - const int h = frame->GetHeight(PLANAR_U); - for (int y = 0; yGetPitch(PLANAR_U); + p = frame->GetWritePtr(PLANAR_U); + const int w = frame->GetRowSize(PLANAR_U) / pixelsize; + const int h = frame->GetHeight(PLANAR_U); + if(pixelsize==1) { + for (int y = 0; yGetWritePtr(PLANAR_V); + for (int y = 0; y(p)[x] = reinterpret_cast(mapchroma)[reinterpret_cast(p)[x]]; + } + p += UVpitch; + } + p = frame->GetWritePtr(PLANAR_V); + for (int y = 0; y(p)[x] = reinterpret_cast(mapchroma)[reinterpret_cast(p)[x]]; + } + p += UVpitch; + } } - p += UVpitch; } - p = frame->GetWritePtr(PLANAR_V); - for (int y = 0; yGetWritePtr(PLANAR_B); + BYTE* r = frame->GetWritePtr(PLANAR_R); + const int pitch_b = frame->GetPitch(PLANAR_B); + const int pitch_r = frame->GetPitch(PLANAR_R); + if(pixelsize==1) { + for (int y = 0; y(p)[x] = reinterpret_cast(map)[reinterpret_cast(p)[x]]; + reinterpret_cast(b)[x] = reinterpret_cast(map)[reinterpret_cast(b)[x]]; + reinterpret_cast(r)[x] = reinterpret_cast(map)[reinterpret_cast(r)[x]]; + } + p += pitch; + b += pitch_b; + r += pitch_r; + } } - p += UVpitch; } } else if (vi.IsRGB()) { - const int row_size = frame->GetRowSize(); - for (int y = 0; yGetRowSize() / pixelsize; + if(pixelsize==1) { + for (int y = 0; y(p)[x] = reinterpret_cast(map)[reinterpret_cast(p)[x]]; + } + p += pitch; } - p += pitch; } } } diff --git a/avs_core/filters/levels.h b/avs_core/filters/levels.h index 596b9949b..bca83c9ef 100644 --- a/avs_core/filters/levels.h +++ b/avs_core/filters/levels.h @@ -63,6 +63,10 @@ class Levels : public GenericVideoFilter private: BYTE *map, *mapchroma; bool dither; + // avs+ + int pixelsize; + int bits_per_pixel; // 8,10..16 + bool use_lut; }; From 1b7e57fe8e7e5d0de657ea2bf070cebc2ea3211f Mon Sep 17 00:00:00 2001 From: Pinterf Date: Sun, 4 Sep 2016 20:13:14 +0200 Subject: [PATCH 037/120] RGBAdjust: RGB48/64 and Planar RGB(A) 10-16 bit --- avs_core/filters/levels.cpp | 341 ++++++++++++++++++++++++++++++------ avs_core/filters/levels.h | 4 + 2 files changed, 290 insertions(+), 55 deletions(-) diff --git a/avs_core/filters/levels.cpp b/avs_core/filters/levels.cpp index a0f63bfbe..bd7ac1416 100644 --- a/avs_core/filters/levels.cpp +++ b/avs_core/filters/levels.cpp @@ -582,39 +582,85 @@ RGBAdjust::RGBAdjust(PClip _child, double r, double g, double b, double a, rg = 1 / rg; gg = 1 / gg; bg = 1 / bg; ag = 1 / ag; - auto env2 = static_cast(env); - size_t num_map = vi.IsRGB24() ? 3 : 4; - size_t map_size = dither ? 256 * 256 : 256; - - mapR = static_cast(env2->Allocate(map_size * num_map, 8, AVS_NORMAL_ALLOC)); - if (!mapR) - env->ThrowError("RGBAdjust: Could not reserve memory."); - env->AtExit(free_buffer, mapR); - - mapG = mapR + map_size; - mapB = mapG + map_size; - mapA = num_map == 4 ? mapB + map_size : nullptr; - - void(*set_map)(BYTE*, const double, const double, const double); - if (dither) { - set_map = [](BYTE* map, const double c0, const double c1, const double c2) { - for (int i = 0; i < 256 * 256; ++i) { - map[i] = BYTE(pow(clamp((c0 * 256 + i * c1 - 127.5) / (255.0 * 256), 0.0, 1.0), c2) * 255.0 + 0.5); + pixelsize = vi.ComponentSize(); + bits_per_pixel = vi.BitsPerComponent(); // 8,10..16 + + if (pixelsize == 4) + env->ThrowError("RGBAdjust: cannot operate on float video formats"); + // No lookup for float. todo: slow on-the-fly realtime calculation + + int lookup_size = 1 << bits_per_pixel; // 256, 1024, 4096, 16384, 65536 + int real_lookup_size = (pixelsize == 1) ? 256 : 65536; // avoids lut overflow in case of non-standard content of a 10 bit clip + int pixel_max = lookup_size - 1; + + use_lut = bits_per_pixel != 32; // for float: realtime (todo) + + if (!use_lut) + dither = false; + + // one buffer for all maps + mapR = nullptr; + + if(use_lut) { + auto env2 = static_cast(env); + size_t number_of_maps = (vi.IsRGB24() || vi.IsRGB48() || vi.IsPlanarRGB()) ? 3 : 4; + int one_bufsize = pixelsize * real_lookup_size; + if (dither) one_bufsize *= 256; + + mapR = static_cast(env2->Allocate(one_bufsize * number_of_maps, 16, AVS_NORMAL_ALLOC)); + if (!mapR) + env->ThrowError("RGBAdjust: Could not reserve memory."); + env->AtExit(free_buffer, mapR); + if(bits_per_pixel>=10 && bits_per_pixel<=14) + std::fill_n(mapR, one_bufsize * number_of_maps, 0); // 8 and 16 bit fully overwrites + mapG = mapR + one_bufsize; + mapB = mapG + one_bufsize; + mapA = number_of_maps == 4 ? mapB + one_bufsize : nullptr; + + void(*set_map)(BYTE*, int, int, const double, const double, const double); + if (dither) { + set_map = [](BYTE* map, int lookup_size, int bits_per_pixel, const double c0, const double c1, const double c2) { + double bias = -((1 << bits_per_pixel) - 1) / 2; // -127.5 for 8 bit, scaling because of dithershift + double pixel_max = (1 << bits_per_pixel) - 1; + if(bits_per_pixel == 8) { + for (int i = 0; i < lookup_size * 256; ++i) { + int i_base = i & ~0xFF; + int i_dithershift = (i & 0xFF) << (bits_per_pixel - 8); + int ii = ii = i_base + i_dithershift; // otherwise dither has no visible effect on 10..16 bit + map[i] = BYTE(pow(clamp((c0 * 256 + ii * c1 - bias) / (double(pixel_max) * 256), 0.0, 1.0), c2) * (double)pixel_max + 0.5); + } + } + else { + for (int i = 0; i < lookup_size * 256; ++i) { + int i_base = i & ~0xFF; + int i_dithershift = (i & 0xFF) << (bits_per_pixel - 8); + int ii = ii = i_base + i_dithershift; // otherwise dither has no visible effect on 10..16 bit + reinterpret_cast(map)[i] = uint16_t(pow(clamp((c0 * 256 + ii * c1 - bias) / (double(pixel_max) * 256), 0.0, 1.0), c2) * (double)pixel_max + 0.5); + } + } + }; + } else { + set_map = [](BYTE* map, int lookup_size, int bits_per_pixel, const double c0, const double c1, const double c2) { + double pixel_max = (1 << bits_per_pixel) - 1; + if(bits_per_pixel==8) { + for (int i = 0; i < lookup_size; ++i) { // fix of bug introduced in an earlier refactor was: i < 256 * 256 + map[i] = BYTE(pow(clamp((c0 + i * c1) / (double)pixel_max, 0.0, 1.0), c2) * double(pixel_max) + 0.5); + } } - }; - } else { - set_map = [](BYTE* map, const double c0, const double c1, const double c2) { - for (int i = 0; i < 256 * 256; ++i) { - map[i] = BYTE(pow(clamp((c0 + i * c1) / 255.0, 0.0, 1.0), c2) * 255.0 + 0.5); + else { + for (int i = 0; i < lookup_size; ++i) { // fix of bug introduced in an earlier refactor was: i < 256 * 256 + reinterpret_cast(map)[i] = uint16_t(pow(clamp((c0 + i * c1) / (double)pixel_max, 0.0, 1.0), c2) * double(pixel_max) + 0.5); + } } - }; - } + }; + } - set_map(mapR, rb, r, rg); - set_map(mapG, gb, g, gg); - set_map(mapB, bb, b, bg); - if (num_map == 4) - set_map(mapA, ab, a, ag); + set_map(mapR, lookup_size, bits_per_pixel, rb, r, rg); + set_map(mapG, lookup_size, bits_per_pixel, gb, g, gg); + set_map(mapB, lookup_size, bits_per_pixel, bb, b, bg); + if (number_of_maps == 4) + set_map(mapA, lookup_size, bits_per_pixel, ab, a, ag); + } } @@ -651,12 +697,82 @@ PVideoFrame __stdcall RGBAdjust::GetFrame(int n, IScriptEnvironment* env) p += pitch; } } + else if (vi.IsRGB64()) { + for (int y = 0; y < vi.height; ++y) { + const int _y = (y << 4) & 0xf0; + for (int x = 0; x < vi.width; ++x) { + const int _dither = ditherMap[(x & 0x0f) | _y]; + uint16_t *p16 = reinterpret_cast(p); + p16[x * 4 + 0] = reinterpret_cast(mapB)[p16[x * 4 + 0] << 8 | _dither]; + p16[x * 4 + 1] = reinterpret_cast(mapG)[p16[x * 4 + 1] << 8 | _dither]; + p16[x * 4 + 2] = reinterpret_cast(mapR)[p16[x * 4 + 2] << 8 | _dither]; + p16[x * 4 + 3] = reinterpret_cast(mapA)[p16[x * 4 + 3] << 8 | _dither]; + } + p += pitch; + } + } + else if (vi.IsRGB48()) { + for (int y = 0; y < vi.height; ++y) { + const int _y = (y << 4) & 0xf0; + for (int x = 0; x < vi.width; ++x) { + const int _dither = ditherMap[(x & 0x0f) | _y]; + uint16_t *p16 = reinterpret_cast(p); + p16[x * 3 + 0] = reinterpret_cast(mapB)[p16[x * 3 + 0] << 8 | _dither]; + p16[x * 3 + 1] = reinterpret_cast(mapG)[p16[x * 3 + 1] << 8 | _dither]; + p16[x * 3 + 2] = reinterpret_cast(mapR)[p16[x * 3 + 2] << 8 | _dither]; + } + p += pitch; + } + } + else { + // Planar RGB + bool hasAlpha = vi.IsPlanarRGBA(); + BYTE *srcpG = p; + BYTE *srcpB = frame->GetWritePtr(PLANAR_B); + BYTE *srcpR = frame->GetWritePtr(PLANAR_R); + BYTE *srcpA = frame->GetWritePtr(PLANAR_A); + const int pitchG = pitch; + const int pitchB = frame->GetPitch(PLANAR_B); + const int pitchR = frame->GetPitch(PLANAR_R); + const int pitchA = frame->GetPitch(PLANAR_A); + // no float support + if(pixelsize==1) { + for (int y=0; y(srcpG)[x] = reinterpret_cast(mapG)[reinterpret_cast(srcpG)[x] << 8 | _dither]; + reinterpret_cast(srcpB)[x] = reinterpret_cast(mapB)[reinterpret_cast(srcpB)[x] << 8 | _dither]; + reinterpret_cast(srcpR)[x] = reinterpret_cast(mapR)[reinterpret_cast(srcpR)[x] << 8 | _dither]; + if(hasAlpha) + reinterpret_cast(srcpA)[x] = reinterpret_cast(mapA)[reinterpret_cast(srcpA)[x] << 8 | _dither]; + } + srcpG += pitchG; srcpB += pitchB; srcpR += pitchR; + srcpA += pitchA; + } + } + } } else { + // no dither if (vi.IsRGB32()) { for (int y = 0; y < vi.height; ++y) { for (int x = 0; x < vi.width; ++x) { - p[x * 4] = mapB[p[x * 4]]; + p[x * 4 + 0] = mapB[p[x * 4]]; p[x * 4 + 1] = mapG[p[x * 4 + 1]]; p[x * 4 + 2] = mapR[p[x * 4 + 2]]; p[x * 4 + 3] = mapA[p[x * 4 + 3]]; @@ -665,41 +781,153 @@ PVideoFrame __stdcall RGBAdjust::GetFrame(int n, IScriptEnvironment* env) } } else if (vi.IsRGB24()) { - const int row_size = frame->GetRowSize(); for (int y = 0; y < vi.height; ++y) { - for (int x = 0; x < row_size; x += 3) { - p[x] = mapB[p[x]]; - p[x + 1] = mapG[p[x + 1]]; - p[x + 2] = mapR[p[x + 2]]; + for (int x = 0; x < vi.width; x += 3) { + p[x * 3 + 0] = mapB[p[x]]; + p[x * 3 + 1] = mapG[p[x + 1]]; + p[x * 3 + 2] = mapR[p[x + 2]]; } p += pitch; } + } else if (vi.IsRGB64()) { + for (int y = 0; y < vi.height; ++y) { + for (int x = 0; x < vi.width; ++x) { + uint16_t *p16 = reinterpret_cast(p); + p16[x * 4 + 0] = reinterpret_cast(mapB)[p16[x * 4]]; + p16[x * 4 + 1] = reinterpret_cast(mapG)[p16[x * 4 + 1]]; + p16[x * 4 + 2] = reinterpret_cast(mapR)[p16[x * 4 + 2]]; + p16[x * 4 + 3] = reinterpret_cast(mapA)[p16[x * 4 + 3]]; + } + p += pitch; + } + } else if (vi.IsRGB48()) { + for (int y = 0; y < vi.height; ++y) { + for (int x = 0; x < vi.width; ++x) { + uint16_t *p16 = reinterpret_cast(p); + p16[x * 3 + 0] = reinterpret_cast(mapB)[p16[x * 3]]; + p16[x * 3 + 1] = reinterpret_cast(mapG)[p16[x * 3 + 1]]; + p16[x * 3 + 2] = reinterpret_cast(mapR)[p16[x * 3 + 2]]; + } + p += pitch; + } + } + else { + // Planar RGB + bool hasAlpha = vi.IsPlanarRGBA(); + BYTE *srcpG = p; + BYTE *srcpB = frame->GetWritePtr(PLANAR_B); + BYTE *srcpR = frame->GetWritePtr(PLANAR_R); + BYTE *srcpA = frame->GetWritePtr(PLANAR_A); + const int pitchG = pitch; + const int pitchB = frame->GetPitch(PLANAR_B); + const int pitchR = frame->GetPitch(PLANAR_R); + const int pitchA = frame->GetPitch(PLANAR_A); + // no float support + if(pixelsize==1) { + for (int y=0; y(srcpG)[x] = reinterpret_cast(mapG)[reinterpret_cast(srcpG)[x]]; + reinterpret_cast(srcpB)[x] = reinterpret_cast(mapB)[reinterpret_cast(srcpB)[x]]; + reinterpret_cast(srcpR)[x] = reinterpret_cast(mapR)[reinterpret_cast(srcpR)[x]]; + if(hasAlpha) + reinterpret_cast(srcpA)[x] = reinterpret_cast(mapA)[reinterpret_cast(srcpA)[x]]; + } + srcpG += pitchG; srcpB += pitchB; srcpR += pitchR; + srcpA += pitchA; + } + } } } if (analyze) { - const int w = frame->GetRowSize(); + const int w = frame->GetRowSize() / pixelsize; const int h = frame->GetHeight(); - const int t = vi.IsRGB24() ? 3 : 4; - unsigned int accum_r[256], accum_g[256], accum_b[256]; - p = frame->GetWritePtr(); + int lookup_size = 1 << bits_per_pixel; // 256, 1024, 4096, 16384, 65536 + int real_lookup_size = (pixelsize == 1) ? 256 : 65536; // avoids lut overflow in case of non-standard content of a 10 bit clip + int pixel_max = lookup_size - 1; - {for (int i = 0; i < 256; i++) { - accum_r[i] = 0; - accum_g[i] = 0; - accum_b[i] = 0; - }} + // worst case + unsigned int accum_r[65536], accum_g[65536], accum_b[65536]; + + for (int i = 0; i < lookup_size; i++) { + accum_r[i] = 0; + accum_g[i] = 0; + accum_b[i] = 0; + } - for (int y = 0; y < h; y++) { - for (int x = 0; x < w; x += t) { + p = frame->GetWritePtr(); + if(vi.IsPlanarRGB() || vi.IsPlanarRGBA()) + { + const BYTE *p_g = p; + const BYTE *p_b = frame->GetReadPtr(PLANAR_B); + const BYTE *p_r = frame->GetReadPtr(PLANAR_R); + const int pitchG = pitch; + const int pitchB= frame->GetPitch(PLANAR_B); + const int pitchR= frame->GetPitch(PLANAR_R); + if(pixelsize==1) { + for (int y = 0; y < h; y++) { + for (int x = 0; x < w; x++) { + accum_r[p_r[x]]++; + accum_g[p_g[x]]++; + accum_b[p_b[x]]++; + } + p_g += pitchG; + p_b += pitchB; + p_r += pitchR; + } + } + else { + // pixelsize == 2 + for (int y = 0; y < h; y++) { + for (int x = 0; x < w; x++) { + accum_r[reinterpret_cast(p_r)[x]]++; + accum_g[reinterpret_cast(p_g)[x]]++; + accum_b[reinterpret_cast(p_b)[x]]++; + } + p_g += pitchG; + p_b += pitchB; + p_r += pitchR; + } + } + } else { + // packed RGB + const int pixel_step = vi.IsRGB24() || vi.IsRGB48() ? 3 : 4; + if(pixelsize==1) { + for (int y = 0; y < h; y++) { + for (int x = 0; x < w; x += pixel_step) { accum_r[p[x + 2]]++; accum_g[p[x + 1]]++; accum_b[p[x]]++; + } + p += pitch; } - p += pitch; + } + else { // pixelsize==2 + for (int y = 0; y < h; y++) { + for (int x = 0; x < w; x += pixel_step) { + accum_r[reinterpret_cast(p)[x + 2]]++; + accum_g[reinterpret_cast(p)[x + 1]]++; + accum_b[reinterpret_cast(p)[x]]++; + } + p += pitch; + } + } } + int pixels = vi.width*vi.height; float avg_r = 0, avg_g = 0, avg_b = 0; float st_r = 0, st_g = 0, st_b = 0; @@ -713,7 +941,7 @@ PVideoFrame __stdcall RGBAdjust::GetFrame(int n, IScriptEnvironment* env) int At_256 = (pixels + 128) / 256; // When 1/256th of all pixels have been reached, trigger "Loose min/max" - {for (int i = 0; i < 256; i++) { + {for (int i = 0; i < lookup_size; i++) { avg_r += (float)accum_r[i] * (float)i; avg_g += (float)accum_g[i] * (float)i; avg_b += (float)accum_b[i] * (float)i; @@ -729,16 +957,16 @@ PVideoFrame __stdcall RGBAdjust::GetFrame(int n, IScriptEnvironment* env) if (!Ahit_ming) { Amin_g += accum_g[i]; if (Amin_g > At_256) { Ahit_ming = true; Amin_g = i; } } if (!Ahit_minb) { Amin_b += accum_b[i]; if (Amin_b > At_256) { Ahit_minb = true; Amin_b = i; } } - if (!Ahit_maxr) { Amax_r += accum_r[255 - i]; if (Amax_r > At_256) { Ahit_maxr = true; Amax_r = 255 - i; } } - if (!Ahit_maxg) { Amax_g += accum_g[255 - i]; if (Amax_g > At_256) { Ahit_maxg = true; Amax_g = 255 - i; } } - if (!Ahit_maxb) { Amax_b += accum_b[255 - i]; if (Amax_b > At_256) { Ahit_maxb = true; Amax_b = 255 - i; } } + if (!Ahit_maxr) { Amax_r += accum_r[pixel_max - i]; if (Amax_r > At_256) { Ahit_maxr = true; Amax_r = pixel_max - i; } } + if (!Ahit_maxg) { Amax_g += accum_g[pixel_max - i]; if (Amax_g > At_256) { Ahit_maxg = true; Amax_g = pixel_max - i; } } + if (!Ahit_maxb) { Amax_b += accum_b[pixel_max - i]; if (Amax_b > At_256) { Ahit_maxb = true; Amax_b = pixel_max - i; } } }} float Favg_r = avg_r / pixels; float Favg_g = avg_g / pixels; float Favg_b = avg_b / pixels; - {for (int i = 0; i < 256; i++) { + {for (int i = 0; i < lookup_size; i++) { st_r += (float)accum_r[i] * (float(i - Favg_r)*(i - Favg_r)); st_g += (float)accum_g[i] * (float(i - Favg_g)*(i - Favg_g)); st_b += (float)accum_b[i] * (float(i - Favg_b)*(i - Favg_b)); @@ -932,8 +1160,11 @@ Tweak::Tweak(PClip _child, double _hue, double _sat, double _bright, double _con Sin = (int) (SIN * 4096 + 0.5); Cos = (int) (COS * 4096 + 0.5); - if (vi.IsPlanar() && (vi.ComponentSize() > 1)) - realcalc = true; // 16/32 bit: no lookup tables. + int bits_per_pixel = vi.BitsPerComponent(); // 8,10..16,32 + + if (vi.IsPlanar() && (bits_per_pixel > 8)) + realcalc = true; // 8bit: lut OK. 10+ bits: no lookup tables. + // todo: 10 bit lut is still OK auto env2 = static_cast(env); diff --git a/avs_core/filters/levels.h b/avs_core/filters/levels.h index bca83c9ef..9b6d3603f 100644 --- a/avs_core/filters/levels.h +++ b/avs_core/filters/levels.h @@ -93,6 +93,10 @@ class RGBAdjust : public GenericVideoFilter bool analyze; bool dither; BYTE *mapR, *mapG, *mapB, *mapA; + // avs+ + int pixelsize; + int bits_per_pixel; // 8,10..16 + bool use_lut; }; From 440a2cdf3311cd547d75807c56bace67a221c244 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Mon, 5 Sep 2016 09:48:12 +0200 Subject: [PATCH 038/120] RGBAdjust: code cleanup to templates --- avs_core/filters/levels.cpp | 380 +++++++++++++++--------------------- 1 file changed, 157 insertions(+), 223 deletions(-) diff --git a/avs_core/filters/levels.cpp b/avs_core/filters/levels.cpp index bd7ac1416..8de6d8cfd 100644 --- a/avs_core/filters/levels.cpp +++ b/avs_core/filters/levels.cpp @@ -663,6 +663,93 @@ RGBAdjust::RGBAdjust(PClip _child, double r, double g, double b, double a, } } +template +static void fill_accum_rgb_planar_c(const BYTE *srcpR, const BYTE* srcpG, const BYTE* srcpB, int pitch, + unsigned int *accum_r, unsigned int *accum_g, unsigned int *accum_b, + int width, int height) { + for (int y = 0; y < height; y++) { + for (int x = 0; x < width; x++) { + accum_r[reinterpret_cast(srcpR)[x]]++; + accum_g[reinterpret_cast(srcpG)[x]]++; + accum_b[reinterpret_cast(srcpB)[x]]++; + } + srcpR += pitch; + srcpG += pitch; + srcpB += pitch; + } +} + +template +static void fill_accum_rgb_packed_c(const BYTE *srcp, int pitch, + unsigned int *accum_r, unsigned int *accum_g, unsigned int *accum_b, + int work_width, int height, int pixel_step) { + for (int y = 0; y < height; y++) { + for (int x = 0; x < work_width; x += pixel_step) { + accum_r[reinterpret_cast(srcp)[x + 2]]++; + accum_g[reinterpret_cast(srcp)[x + 1]]++; + accum_b[reinterpret_cast(srcp)[x + 0]]++; + } + srcp += pitch; + } +} + +template +static void apply_map_rgb_packed_c(BYTE *dstp8, int pitch, + BYTE *mapR, BYTE *mapG, BYTE *mapB, BYTE *mapA, + int width, int height) +{ + int _y = 0; + int _dither = 0; + pixel_t *dstp = reinterpret_cast(dstp8); + pitch /= sizeof(pixel_t); + + for (int y = 0; y < height; y++) { + if(dither) + _y = (y << 4) & 0xf0; + for (int x = 0; x < width; x++) { + if (dither) + _dither = ditherMap[(x & 0x0f) | _y]; + dstp[x * pixel_step + 0] = reinterpret_cast(mapB)[dither ? dstp[x * pixel_step + 0] << 8 | _dither : dstp[x * pixel_step + 0]]; + dstp[x * pixel_step + 1] = reinterpret_cast(mapG)[dither ? dstp[x * pixel_step + 1] << 8 | _dither : dstp[x * pixel_step + 1]]; + dstp[x * pixel_step + 2] = reinterpret_cast(mapR)[dither ? dstp[x * pixel_step + 2] << 8 | _dither : dstp[x * pixel_step + 2]]; + if(pixel_step == 4) + dstp[x * pixel_step + 3] = reinterpret_cast(mapA)[dither ? dstp[x * pixel_step + 3] << 8 | _dither : dstp[x * pixel_step + 3]]; + } + dstp += pitch; + } +} + +template +static void apply_map_rgb_planar_c(BYTE *dstpR8, BYTE *dstpG8, BYTE *dstpB8, BYTE *dstpA8, int pitch, + BYTE *mapR, BYTE *mapG, BYTE *mapB, BYTE *mapA, + int width, int height) +{ + int _y = 0; + int _dither = 0; + pixel_t *dstpR = reinterpret_cast(dstpR8); + pixel_t *dstpG = reinterpret_cast(dstpG8); + pixel_t *dstpB = reinterpret_cast(dstpB8); + pixel_t *dstpA = reinterpret_cast(dstpA8); + pitch /= sizeof(pixel_t); + + for (int y = 0; y < height; y++) { + if(dither) + _y = (y << 4) & 0xf0; + for (int x = 0; x < width; x++) { + if (dither) + _dither = ditherMap[(x & 0x0f) | _y]; + reinterpret_cast(dstpG)[x] = reinterpret_cast(mapG)[dither ? dstpG[x] << 8 | _dither : dstpG[x]]; + reinterpret_cast(dstpB)[x] = reinterpret_cast(mapB)[dither ? dstpB[x] << 8 | _dither : dstpB[x]]; + reinterpret_cast(dstpR)[x] = reinterpret_cast(mapR)[dither ? dstpR[x] << 8 | _dither : dstpR[x]]; + if(hasAlpha) + reinterpret_cast(dstpA)[x] = reinterpret_cast(mapA)[dither ? dstpA[x] << 8 | _dither : dstpA[x]]; + } + dstpG += pitch; dstpB += pitch; dstpR += pitch; + if(hasAlpha) + dstpA += pitch; + } +} + PVideoFrame __stdcall RGBAdjust::GetFrame(int n, IScriptEnvironment* env) { @@ -671,184 +758,71 @@ PVideoFrame __stdcall RGBAdjust::GetFrame(int n, IScriptEnvironment* env) BYTE* p = frame->GetWritePtr(); const int pitch = frame->GetPitch(); + int w = vi.width; + int h = vi.height; + if (dither) { - if (vi.IsRGB32()) { - for (int y = 0; y < vi.height; ++y) { - const int _y = (y << 4) & 0xf0; - for (int x = 0; x < vi.width; ++x) { - const int _dither = ditherMap[(x & 0x0f) | _y]; - p[x * 4 + 0] = mapB[p[x * 4 + 0] << 8 | _dither]; - p[x * 4 + 1] = mapG[p[x * 4 + 1] << 8 | _dither]; - p[x * 4 + 2] = mapR[p[x * 4 + 2] << 8 | _dither]; - p[x * 4 + 3] = mapA[p[x * 4 + 3] << 8 | _dither]; - } - p += pitch; - } - } - else if (vi.IsRGB24()) { - for (int y = 0; y < vi.height; ++y) { - const int _y = (y << 4) & 0xf0; - for (int x = 0; x < vi.width; ++x) { - const int _dither = ditherMap[(x & 0x0f) | _y]; - p[x * 3 + 0] = mapB[p[x * 3 + 0] << 8 | _dither]; - p[x * 3 + 1] = mapG[p[x * 3 + 1] << 8 | _dither]; - p[x * 3 + 2] = mapR[p[x * 3 + 2] << 8 | _dither]; - } - p += pitch; - } - } - else if (vi.IsRGB64()) { - for (int y = 0; y < vi.height; ++y) { - const int _y = (y << 4) & 0xf0; - for (int x = 0; x < vi.width; ++x) { - const int _dither = ditherMap[(x & 0x0f) | _y]; - uint16_t *p16 = reinterpret_cast(p); - p16[x * 4 + 0] = reinterpret_cast(mapB)[p16[x * 4 + 0] << 8 | _dither]; - p16[x * 4 + 1] = reinterpret_cast(mapG)[p16[x * 4 + 1] << 8 | _dither]; - p16[x * 4 + 2] = reinterpret_cast(mapR)[p16[x * 4 + 2] << 8 | _dither]; - p16[x * 4 + 3] = reinterpret_cast(mapA)[p16[x * 4 + 3] << 8 | _dither]; - } - p += pitch; - } - } - else if (vi.IsRGB48()) { - for (int y = 0; y < vi.height; ++y) { - const int _y = (y << 4) & 0xf0; - for (int x = 0; x < vi.width; ++x) { - const int _dither = ditherMap[(x & 0x0f) | _y]; - uint16_t *p16 = reinterpret_cast(p); - p16[x * 3 + 0] = reinterpret_cast(mapB)[p16[x * 3 + 0] << 8 | _dither]; - p16[x * 3 + 1] = reinterpret_cast(mapG)[p16[x * 3 + 1] << 8 | _dither]; - p16[x * 3 + 2] = reinterpret_cast(mapR)[p16[x * 3 + 2] << 8 | _dither]; - } - p += pitch; - } + if (vi.IsRGB32()) + apply_map_rgb_packed_c(p, pitch, mapR, mapG, mapB, mapA, w, h); + else if(vi.IsRGB24()) + apply_map_rgb_packed_c(p, pitch, mapR, mapG, mapB, mapA, w, h); + else if(vi.IsRGB64()) + apply_map_rgb_packed_c(p, pitch, mapR, mapG, mapB, mapA, w, h); + else if(vi.IsRGB48()) + apply_map_rgb_packed_c(p, pitch, mapR, mapG, mapB, mapA, w, h); + else { + // Planar RGB + bool hasAlpha = vi.IsPlanarRGBA(); + BYTE *p_g = p; + BYTE *p_b = frame->GetWritePtr(PLANAR_B); + BYTE *p_r = frame->GetWritePtr(PLANAR_R); + BYTE *p_a = frame->GetWritePtr(PLANAR_A); + // no float support + if(pixelsize==1) { + if(hasAlpha) + apply_map_rgb_planar_c(p_r, p_g, p_b, p_a, pitch, mapR, mapG, mapB, mapA, w, h); + else + apply_map_rgb_planar_c(p_r, p_g, p_b, p_a, pitch, mapR, mapG, mapB, mapA, w, h); } else { - // Planar RGB - bool hasAlpha = vi.IsPlanarRGBA(); - BYTE *srcpG = p; - BYTE *srcpB = frame->GetWritePtr(PLANAR_B); - BYTE *srcpR = frame->GetWritePtr(PLANAR_R); - BYTE *srcpA = frame->GetWritePtr(PLANAR_A); - const int pitchG = pitch; - const int pitchB = frame->GetPitch(PLANAR_B); - const int pitchR = frame->GetPitch(PLANAR_R); - const int pitchA = frame->GetPitch(PLANAR_A); - // no float support - if(pixelsize==1) { - for (int y=0; y(srcpG)[x] = reinterpret_cast(mapG)[reinterpret_cast(srcpG)[x] << 8 | _dither]; - reinterpret_cast(srcpB)[x] = reinterpret_cast(mapB)[reinterpret_cast(srcpB)[x] << 8 | _dither]; - reinterpret_cast(srcpR)[x] = reinterpret_cast(mapR)[reinterpret_cast(srcpR)[x] << 8 | _dither]; - if(hasAlpha) - reinterpret_cast(srcpA)[x] = reinterpret_cast(mapA)[reinterpret_cast(srcpA)[x] << 8 | _dither]; - } - srcpG += pitchG; srcpB += pitchB; srcpR += pitchR; - srcpA += pitchA; - } - } + if(hasAlpha) + apply_map_rgb_planar_c(p_r, p_g, p_b, p_a, pitch, mapR, mapG, mapB, mapA, w, h); + else + apply_map_rgb_planar_c(p_r, p_g, p_b, p_a, pitch, mapR, mapG, mapB, mapA, w, h); } + } } else { // no dither - if (vi.IsRGB32()) { - for (int y = 0; y < vi.height; ++y) { - for (int x = 0; x < vi.width; ++x) { - p[x * 4 + 0] = mapB[p[x * 4]]; - p[x * 4 + 1] = mapG[p[x * 4 + 1]]; - p[x * 4 + 2] = mapR[p[x * 4 + 2]]; - p[x * 4 + 3] = mapA[p[x * 4 + 3]]; - } - p += pitch; - } - } - else if (vi.IsRGB24()) { - for (int y = 0; y < vi.height; ++y) { - for (int x = 0; x < vi.width; x += 3) { - p[x * 3 + 0] = mapB[p[x]]; - p[x * 3 + 1] = mapG[p[x + 1]]; - p[x * 3 + 2] = mapR[p[x + 2]]; - } - p += pitch; - } - } else if (vi.IsRGB64()) { - for (int y = 0; y < vi.height; ++y) { - for (int x = 0; x < vi.width; ++x) { - uint16_t *p16 = reinterpret_cast(p); - p16[x * 4 + 0] = reinterpret_cast(mapB)[p16[x * 4]]; - p16[x * 4 + 1] = reinterpret_cast(mapG)[p16[x * 4 + 1]]; - p16[x * 4 + 2] = reinterpret_cast(mapR)[p16[x * 4 + 2]]; - p16[x * 4 + 3] = reinterpret_cast(mapA)[p16[x * 4 + 3]]; - } - p += pitch; - } - } else if (vi.IsRGB48()) { - for (int y = 0; y < vi.height; ++y) { - for (int x = 0; x < vi.width; ++x) { - uint16_t *p16 = reinterpret_cast(p); - p16[x * 3 + 0] = reinterpret_cast(mapB)[p16[x * 3]]; - p16[x * 3 + 1] = reinterpret_cast(mapG)[p16[x * 3 + 1]]; - p16[x * 3 + 2] = reinterpret_cast(mapR)[p16[x * 3 + 2]]; - } - p += pitch; - } + if (vi.IsRGB32()) + apply_map_rgb_packed_c(p, pitch, mapR, mapG, mapB, mapA, w, h); + else if(vi.IsRGB24()) + apply_map_rgb_packed_c(p, pitch, mapR, mapG, mapB, mapA, w, h); + else if(vi.IsRGB64()) + apply_map_rgb_packed_c(p, pitch, mapR, mapG, mapB, mapA, w, h); + else if(vi.IsRGB48()) + apply_map_rgb_packed_c(p, pitch, mapR, mapG, mapB, mapA, w, h); + else { + // Planar RGB + bool hasAlpha = vi.IsPlanarRGBA(); + BYTE *p_g = p; + BYTE *p_b = frame->GetWritePtr(PLANAR_B); + BYTE *p_r = frame->GetWritePtr(PLANAR_R); + BYTE *p_a = frame->GetWritePtr(PLANAR_A); + // no float support + if(pixelsize==1) { + if(hasAlpha) + apply_map_rgb_planar_c(p_r, p_g, p_b, p_a, pitch, mapR, mapG, mapB, mapA, w, h); + else + apply_map_rgb_planar_c(p_r, p_g, p_b, p_a, pitch, mapR, mapG, mapB, mapA, w, h); } else { - // Planar RGB - bool hasAlpha = vi.IsPlanarRGBA(); - BYTE *srcpG = p; - BYTE *srcpB = frame->GetWritePtr(PLANAR_B); - BYTE *srcpR = frame->GetWritePtr(PLANAR_R); - BYTE *srcpA = frame->GetWritePtr(PLANAR_A); - const int pitchG = pitch; - const int pitchB = frame->GetPitch(PLANAR_B); - const int pitchR = frame->GetPitch(PLANAR_R); - const int pitchA = frame->GetPitch(PLANAR_A); - // no float support - if(pixelsize==1) { - for (int y=0; y(srcpG)[x] = reinterpret_cast(mapG)[reinterpret_cast(srcpG)[x]]; - reinterpret_cast(srcpB)[x] = reinterpret_cast(mapB)[reinterpret_cast(srcpB)[x]]; - reinterpret_cast(srcpR)[x] = reinterpret_cast(mapR)[reinterpret_cast(srcpR)[x]]; - if(hasAlpha) - reinterpret_cast(srcpA)[x] = reinterpret_cast(mapA)[reinterpret_cast(srcpA)[x]]; - } - srcpG += pitchG; srcpB += pitchB; srcpR += pitchR; - srcpA += pitchA; - } - } + if(hasAlpha) + apply_map_rgb_planar_c(p_r, p_g, p_b, p_a, pitch, mapR, mapG, mapB, mapA, w, h); + else + apply_map_rgb_planar_c(p_r, p_g, p_b, p_a, pitch, mapR, mapG, mapB, mapA, w, h); } + } } if (analyze) { @@ -868,65 +842,25 @@ PVideoFrame __stdcall RGBAdjust::GetFrame(int n, IScriptEnvironment* env) accum_b[i] = 0; } - p = frame->GetWritePtr(); if(vi.IsPlanarRGB() || vi.IsPlanarRGBA()) { - const BYTE *p_g = p; + const BYTE *p_g = frame->GetReadPtr(PLANAR_G);; const BYTE *p_b = frame->GetReadPtr(PLANAR_B); const BYTE *p_r = frame->GetReadPtr(PLANAR_R); - const int pitchG = pitch; - const int pitchB= frame->GetPitch(PLANAR_B); - const int pitchR= frame->GetPitch(PLANAR_R); - if(pixelsize==1) { - for (int y = 0; y < h; y++) { - for (int x = 0; x < w; x++) { - accum_r[p_r[x]]++; - accum_g[p_g[x]]++; - accum_b[p_b[x]]++; - } - p_g += pitchG; - p_b += pitchB; - p_r += pitchR; - } - } - else { - // pixelsize == 2 - for (int y = 0; y < h; y++) { - for (int x = 0; x < w; x++) { - accum_r[reinterpret_cast(p_r)[x]]++; - accum_g[reinterpret_cast(p_g)[x]]++; - accum_b[reinterpret_cast(p_b)[x]]++; - } - p_g += pitchG; - p_b += pitchB; - p_r += pitchR; - } - } + if (pixelsize == 1) + fill_accum_rgb_planar_c(p_r, p_g, p_b, pitch, accum_r, accum_g, accum_b, w, h); + else + fill_accum_rgb_planar_c(p_r, p_g, p_b, pitch, accum_r, accum_g, accum_b, w, h); } else { // packed RGB + const BYTE *srcp = frame->GetReadPtr(); const int pixel_step = vi.IsRGB24() || vi.IsRGB48() ? 3 : 4; - if(pixelsize==1) { - for (int y = 0; y < h; y++) { - for (int x = 0; x < w; x += pixel_step) { - accum_r[p[x + 2]]++; - accum_g[p[x + 1]]++; - accum_b[p[x]]++; - } - p += pitch; - } - } - else { // pixelsize==2 - for (int y = 0; y < h; y++) { - for (int x = 0; x < w; x += pixel_step) { - accum_r[reinterpret_cast(p)[x + 2]]++; - accum_g[reinterpret_cast(p)[x + 1]]++; - accum_b[reinterpret_cast(p)[x]]++; - } - p += pitch; - } - } - } + if (pixelsize == 1) + fill_accum_rgb_packed_c(srcp, pitch, accum_r, accum_g, accum_b, w, h, pixel_step); + else + fill_accum_rgb_packed_c(srcp, pitch, accum_r, accum_g, accum_b, w, h, pixel_step); + } int pixels = vi.width*vi.height; float avg_r = 0, avg_g = 0, avg_b = 0; @@ -941,7 +875,7 @@ PVideoFrame __stdcall RGBAdjust::GetFrame(int n, IScriptEnvironment* env) int At_256 = (pixels + 128) / 256; // When 1/256th of all pixels have been reached, trigger "Loose min/max" - {for (int i = 0; i < lookup_size; i++) { + for (int i = 0; i < lookup_size; i++) { avg_r += (float)accum_r[i] * (float)i; avg_g += (float)accum_g[i] * (float)i; avg_b += (float)accum_b[i] * (float)i; @@ -960,17 +894,17 @@ PVideoFrame __stdcall RGBAdjust::GetFrame(int n, IScriptEnvironment* env) if (!Ahit_maxr) { Amax_r += accum_r[pixel_max - i]; if (Amax_r > At_256) { Ahit_maxr = true; Amax_r = pixel_max - i; } } if (!Ahit_maxg) { Amax_g += accum_g[pixel_max - i]; if (Amax_g > At_256) { Ahit_maxg = true; Amax_g = pixel_max - i; } } if (!Ahit_maxb) { Amax_b += accum_b[pixel_max - i]; if (Amax_b > At_256) { Ahit_maxb = true; Amax_b = pixel_max - i; } } - }} + } float Favg_r = avg_r / pixels; float Favg_g = avg_g / pixels; float Favg_b = avg_b / pixels; - {for (int i = 0; i < lookup_size; i++) { + for (int i = 0; i < lookup_size; i++) { st_r += (float)accum_r[i] * (float(i - Favg_r)*(i - Favg_r)); st_g += (float)accum_g[i] * (float(i - Favg_g)*(i - Favg_g)); st_b += (float)accum_b[i] * (float(i - Favg_b)*(i - Favg_b)); - }} + } float Fst_r = sqrt(st_r / pixels); float Fst_g = sqrt(st_g / pixels); From 8360ed62aa900adb6cf93ad795a32e5a75be4d00 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Mon, 5 Sep 2016 10:31:24 +0200 Subject: [PATCH 039/120] Fix Limiter parameter check (was never true) --- avs_core/filters/limiter.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/avs_core/filters/limiter.cpp b/avs_core/filters/limiter.cpp index 9a48b069e..5460a0c39 100644 --- a/avs_core/filters/limiter.cpp +++ b/avs_core/filters/limiter.cpp @@ -94,10 +94,10 @@ Limiter::Limiter(PClip _child, int _min_luma, int _max_luma, int _min_chroma, in max_chroma(_max_chroma), show(show_e(_show)) { - if (!vi.IsYUV()) + if (!vi.IsYUV() && !vi.IsYUVA()) env->ThrowError("Limiter: Source must be YUV"); - if(show != show_none && vi.IsYUY2() && vi.IsYV24() && vi.IsYV12()) + if(show != show_none && !vi.IsYUY2() && !vi.IsYV24() && !vi.IsYV12()) env->ThrowError("Limiter: Source must be YV24, YV12 or YUY2 with show option."); if ((min_luma<0)||(min_luma>255)) From c32c9ec4f387c814d265a98c5b8218e19cad3010 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Mon, 5 Sep 2016 20:14:28 +0200 Subject: [PATCH 040/120] Allow plugins to use parameter type double It can be mapped to float in avs (case of Virtualdub 'ddddddddd' param specifier) --- avs_core/core/PluginManager.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/avs_core/core/PluginManager.cpp b/avs_core/core/PluginManager.cpp index 310f2b0be..288d65ea3 100644 --- a/avs_core/core/PluginManager.cpp +++ b/avs_core/core/PluginManager.cpp @@ -81,7 +81,7 @@ static std::string GetFullPathNameWrap(const std::string &f) static bool IsParameterTypeSpecifier(char c) { switch (c) { - case 'b': case 'i': case 'f': case 's': case 'c': case '.': + case 'b': case 'i': case 'f': case 's': case 'c': case '.': case 'd': return true; default: return false; From 263956714642806e0a299b3b54f2ed363eeb5a3c Mon Sep 17 00:00:00 2001 From: Pinterf Date: Mon, 5 Sep 2016 20:18:27 +0200 Subject: [PATCH 041/120] Limiter: 10-16 bit YUV support. Todo: show options for 10+ bits --- avs_core/filters/limiter.cpp | 195 +++++++++++++++++++++++++---------- avs_core/filters/limiter.h | 4 + 2 files changed, 146 insertions(+), 53 deletions(-) diff --git a/avs_core/filters/limiter.cpp b/avs_core/filters/limiter.cpp index 5460a0c39..e0f1981ae 100644 --- a/avs_core/filters/limiter.cpp +++ b/avs_core/filters/limiter.cpp @@ -54,6 +54,47 @@ inline void limit_plane_sse2(BYTE *ptr, int min_value, int max_value, int pitch, } } +static inline __m128i _mm_cmple_epu16 (__m128i x, __m128i y) +{ + // Returns 0xFFFF where x <= y: + return _mm_cmpeq_epi16(_mm_subs_epu16(x, y), _mm_setzero_si128()); +} + +static inline __m128i _mm_blendv_si128 (__m128i x, __m128i y, __m128i mask) +{ + // Replace bit in x with bit in y when matching bit in mask is set: + return _mm_or_si128(_mm_andnot_si128(mask, x), _mm_and_si128(mask, y)); +} + +static inline __m128i _mm_min_epu16 (__m128i x, __m128i y) +{ + // Returns x where x <= y, else y: + return _mm_blendv_si128(y, x, _mm_cmple_epu16(x, y)); +} + +// sse4.1 +static inline __m128i _mm_max_epu16 (__m128i x, __m128i y) +{ + // Returns x where x >= y, else y: + return _mm_blendv_si128(x, y, _mm_cmple_epu16(x, y)); +} + +//min and max values are 16-bit unsigned integers +inline void limit_plane_uint16_sse2(BYTE *ptr, unsigned int min_value, unsigned int max_value, int pitch, int height) { + __m128i min_vector = _mm_set1_epi16(min_value); + __m128i max_vector = _mm_set1_epi16(max_value); + BYTE* end_point = ptr + pitch * height; + + while(ptr < end_point) { + __m128i src = _mm_load_si128(reinterpret_cast(ptr)); + src = _mm_max_epu16(src, min_vector); + src = _mm_min_epu16(src, max_vector); + _mm_store_si128(reinterpret_cast<__m128i*>(ptr), src); + ptr += 16; + } +} + + #ifdef X86_32 //min and max values are 16-bit integers either max_plane|max_plane for planar or max_luma|max_chroma for yuy2 @@ -97,27 +138,64 @@ Limiter::Limiter(PClip _child, int _min_luma, int _max_luma, int _min_chroma, in if (!vi.IsYUV() && !vi.IsYUVA()) env->ThrowError("Limiter: Source must be YUV"); - if(show != show_none && !vi.IsYUY2() && !vi.IsYV24() && !vi.IsYV12()) - env->ThrowError("Limiter: Source must be YV24, YV12 or YUY2 with show option."); + if(show != show_none && !vi.IsYUY2() && !vi.Is444() && !vi.Is420()) + env->ThrowError("Limiter: Source must be YUV(A) 4:4:4, 4:2:0 or YUY2 with show option."); + + pixelsize = vi.ComponentSize(); + bits_per_pixel = vi.BitsPerComponent(); // 8,10..16 + int pixel_max = (1 << bits_per_pixel) - 1; // 255, 1023, 4095, 16383, 65535 + + int tv_range_low = 16 << (bits_per_pixel - 8); // 16 + int tv_range_hi_luma = ((235+1) << (bits_per_pixel - 8)) - 1; // 16-235 + int tv_range_hi_chroma = ((240+1) << (bits_per_pixel - 8)) - 1; // 16-240,64–963, 256–3855,... 4096-61695 - if ((min_luma<0)||(min_luma>255)) + // default min and max values by bitdepths + if (min_luma == -9999) + min_luma = tv_range_low; + if (max_luma == -9999) + max_luma = tv_range_hi_luma; + if (min_chroma == -9999) + min_chroma = tv_range_low; + if (max_chroma == -9999) + max_chroma = tv_range_hi_chroma; + + if (pixelsize == 4) + env->ThrowError("Limiter: cannot operate on float video formats"); + + if ((min_luma<0)||(min_luma>pixel_max)) env->ThrowError("Limiter: Invalid minimum luma"); - if ((max_luma<0)||(max_luma>255)) + if ((max_luma<0)||(max_luma>pixel_max)) env->ThrowError("Limiter: Invalid maximum luma"); - if ((min_chroma<0)||(min_chroma>255)) + if ((min_chroma<0)||(min_chroma>pixel_max)) env->ThrowError("Limiter: Invalid minimum chroma"); - if ((max_chroma<0)||(max_chroma>255)) + if ((max_chroma<0)||(max_chroma>pixel_max)) env->ThrowError("Limiter: Invalid maximum chroma"); } +template +static void limit_plane_c(BYTE *srcp8, int pitch, int min, int max, int width, int height) { + pixel_t *srcp = reinterpret_cast(srcp8); + pitch /= sizeof(pixel_t); + for(int y = 0; y < height; y++) { + for(int x = 0; x < width; x++) { + if(srcp[x] < min ) + srcp[x] = (pixel_t)min; + else if(srcp[x] > max) + srcp[x] = (pixel_t)max; + } + srcp += pitch; + } +} + PVideoFrame __stdcall Limiter::GetFrame(int n, IScriptEnvironment* env) { PVideoFrame frame = child->GetFrame(n, env); env->MakeWritable(&frame); unsigned char* srcp = frame->GetWritePtr(); int pitch = frame->GetPitch(); int row_size = frame->GetRowSize(); - int height = frame->GetHeight(); + int width = vi.width; + int height = vi.height; if (vi.IsYUY2()) { @@ -229,16 +307,17 @@ PVideoFrame __stdcall Limiter::GetFrame(int n, IScriptEnvironment* env) { srcp += pitch; } return frame; - } else if(vi.IsYV12()) { + // YUY end + } else if(vi.Is420()) { if (show == show_luma) { // Mark clamped pixels red/yellow/green over a colour image const int pitchUV = frame->GetPitch(PLANAR_U); - unsigned char* srcn = srcp + pitch; + unsigned char* srcn = srcp + pitch; // next luma line unsigned char* srcpV = frame->GetWritePtr(PLANAR_V); unsigned char* srcpU = frame->GetWritePtr(PLANAR_U); for (int h=0; h < height;h+=2) { - for (int x = 0; x < row_size; x+=2) { + for (int x = 0; x < width; x+=2) { int uv = 0; if (srcp[x ] < min_luma) { srcp[x ] = 81; uv |= 1;} else if (srcp[x ] > max_luma) { srcp[x ] = 145; uv |= 2;} @@ -251,12 +330,13 @@ PVideoFrame __stdcall Limiter::GetFrame(int n, IScriptEnvironment* env) { switch (uv) { case 1: srcpU[x/2] = 91; srcpV[x/2] = 240; break; // red: Y=81, U=91 and V=240 case 2: srcpU[x/2] = 54; srcpV[x/2] = 34; break; // green: Y=145, U=54 and V=34 + // this differs from show_luma_grey case 3: srcp[x]=srcp[x+2]=srcn[x]=srcn[x+2]=210; srcpU[x/2] = 16; srcpV[x/2] = 146; break; // yellow:Y=210, U=16 and V=146 default: break; } } - srcp += pitch*2; + srcp += pitch*2; // 2x2 pixels at a time (4:2:0 subsampling) srcn += pitch*2; srcpV += pitchUV; srcpU += pitchUV; @@ -265,12 +345,12 @@ PVideoFrame __stdcall Limiter::GetFrame(int n, IScriptEnvironment* env) { } else if (show == show_luma_grey) { // Mark clamped pixels coloured over a greyscaled image const int pitchUV = frame->GetPitch(PLANAR_U); - unsigned char* srcn = srcp + pitch; + unsigned char* srcn = srcp + pitch; // next luma line unsigned char* srcpV = frame->GetWritePtr(PLANAR_V); unsigned char* srcpU = frame->GetWritePtr(PLANAR_U); for (int h=0; h < height;h+=2) { - for (int x = 0; x < row_size; x+=2) { + for (int x = 0; x < width; x+=2) { int uv = 0; if (srcp[x ] < min_luma) { srcp[x ] = 81; uv |= 1;} else if (srcp[x ] > max_luma) { srcp[x ] = 145; uv |= 2;} @@ -283,6 +363,7 @@ PVideoFrame __stdcall Limiter::GetFrame(int n, IScriptEnvironment* env) { switch (uv) { case 1: srcpU[x/2] = 91; srcpV[x/2] = 240; break; // red: Y=81, U=91 and V=240 case 2: srcpU[x/2] = 54; srcpV[x/2] = 34; break; // green: Y=145, U=54 and V=34 + // this differs from show_luma case 3: srcpU[x/2] = 90; srcpV[x/2] = 134; break; // puke: Y=81, U=90 and V=134 default: srcpU[x/2] = srcpV[x/2] = 128; break; // olive: Y=145, U=90 and V=134 } @@ -296,12 +377,12 @@ PVideoFrame __stdcall Limiter::GetFrame(int n, IScriptEnvironment* env) { } else if (show == show_chroma) { // Mark clamped pixels yellow over a colour image const int pitchUV = frame->GetPitch(PLANAR_U); - unsigned char* srcn = srcp + pitch; + unsigned char* srcn = srcp + pitch; // next luma line unsigned char* srcpV = frame->GetWritePtr(PLANAR_V); unsigned char* srcpU = frame->GetWritePtr(PLANAR_U); for (int h=0; h < height;h+=2) { - for (int x = 0; x < row_size; x+=2) { + for (int x = 0; x < width; x+=2) { if ( (srcpU[x/2] < min_chroma) // U- || (srcpU[x/2] > max_chroma) // U+ || (srcpV[x/2] < min_chroma) // V- @@ -317,12 +398,12 @@ PVideoFrame __stdcall Limiter::GetFrame(int n, IScriptEnvironment* env) { } else if (show == show_chroma_grey) { // Mark clamped pixels coloured over a greyscaled image const int pitchUV = frame->GetPitch(PLANAR_U); - unsigned char* srcn = srcp + pitch; + unsigned char* srcn = srcp + pitch; // next luma line unsigned char* srcpV = frame->GetWritePtr(PLANAR_V); unsigned char* srcpU = frame->GetWritePtr(PLANAR_U); for (int h=0; h < height;h+=2) { - for (int x = 0; x < row_size; x+=2) { + for (int x = 0; x < width; x+=2) { int uv = 0; if (srcpU[x/2] < min_chroma) uv |= 1; // U- else if (srcpU[x/2] > max_chroma) uv |= 2; // U+ @@ -340,13 +421,14 @@ PVideoFrame __stdcall Limiter::GetFrame(int n, IScriptEnvironment* env) { default: srcpU[x/2] = srcpV[x/2] = 128; break; } } - srcp += pitch*2; + srcp += pitch*2; // this differs from YV24 (*2) srcn += pitch*2; srcpV += pitchUV; srcpU += pitchUV; } return frame; } + // YV12 (4:2:0) end } else if(vi.IsYV24()) { if (show == show_luma) { // Mark clamped pixels red/green over a colour image @@ -354,11 +436,12 @@ PVideoFrame __stdcall Limiter::GetFrame(int n, IScriptEnvironment* env) { unsigned char* srcpV = frame->GetWritePtr(PLANAR_V); unsigned char* srcpU = frame->GetWritePtr(PLANAR_U); - for (int h=0; h < height; h+=1) { - for (int x = 0; x < row_size; x+=1) { + for (int h=0; h < height; h+=1) { + for (int x = 0; x < width; x+=1) { if (srcp[x] < min_luma) { srcp[x] = 81; srcpU[x] = 91; srcpV[x] = 240; } // red: Y=81, U=91 and V=240 else if (srcp[x] > max_luma) { srcp[x] = 145; srcpU[x] = 54; srcpV[x] = 34; } // green: Y=145, U=54 and V=34 - } + // this differs from show_luma_grey (nothing here) + } srcp += pitch; srcpV += pitchUV; srcpU += pitchUV; @@ -371,9 +454,10 @@ PVideoFrame __stdcall Limiter::GetFrame(int n, IScriptEnvironment* env) { unsigned char* srcpU = frame->GetWritePtr(PLANAR_U); for (int h=0; h < height; h+=1) { - for (int x = 0; x < row_size; x+=1) { + for (int x = 0; x < width; x+=1) { if (srcp[x] < min_luma) { srcp[x] = 81; srcpU[x] = 91; srcpV[x] = 240; } // red: Y=81, U=91 and V=240 else if (srcp[x] > max_luma) { srcp[x] = 145; srcpU[x] = 54; srcpV[x] = 34; } // green: Y=145, U=54 and V=34 + // this differs from show_luma else { srcpU[x] = srcpV[x] = 128; } // grey } srcp += pitch; @@ -388,7 +472,7 @@ PVideoFrame __stdcall Limiter::GetFrame(int n, IScriptEnvironment* env) { unsigned char* srcpU = frame->GetWritePtr(PLANAR_U); for (int h=0; h < height; h+=1) { - for (int x = 0; x < row_size; x+=1) { + for (int x = 0; x < width; x+=1) { if ( (srcpU[x] < min_chroma) // U- || (srcpU[x] > max_chroma) // U+ || (srcpV[x] < min_chroma) // V- @@ -407,7 +491,7 @@ PVideoFrame __stdcall Limiter::GetFrame(int n, IScriptEnvironment* env) { unsigned char* srcpU = frame->GetWritePtr(PLANAR_U); for (int h=0; h < height; h+=1) { - for (int x = 0; x < row_size; x+=1) { + for (int x = 0; x < width; x+=1) { int uv = 0; if (srcpU[x] < min_chroma) uv |= 1; // U- else if (srcpU[x] > max_chroma) uv |= 2; // U+ @@ -431,11 +515,12 @@ PVideoFrame __stdcall Limiter::GetFrame(int n, IScriptEnvironment* env) { } return frame; } + // YV24 (4:4:4) end } if (vi.IsPlanar()) { //todo: separate to functions and use sse2 for aligned planes even if some are unaligned - if ((env->GetCPUFlags() & CPUF_SSE2) && IsPtrAligned(srcp, 16) && + if ((pixelsize==1) && (env->GetCPUFlags() & CPUF_SSE2) && IsPtrAligned(srcp, 16) && IsPtrAligned(frame->GetWritePtr(PLANAR_U), 16) && IsPtrAligned(frame->GetWritePtr(PLANAR_V), 16)) { limit_plane_sse2(srcp, min_luma | (min_luma << 8), max_luma | (max_luma << 8), pitch, row_size, height); @@ -449,7 +534,7 @@ PVideoFrame __stdcall Limiter::GetFrame(int n, IScriptEnvironment* env) { } #ifdef X86_32 - if (env->GetCPUFlags() & CPUF_INTEGER_SSE) + if ((pixelsize==1) && (env->GetCPUFlags() & CPUF_INTEGER_SSE)) { limit_plane_isse(srcp, min_luma | (min_luma << 8), max_luma | (max_luma << 8), pitch, row_size, height); limit_plane_isse(frame->GetWritePtr(PLANAR_U), min_chroma | (min_chroma << 8), max_chroma | (max_chroma << 8), @@ -460,39 +545,43 @@ PVideoFrame __stdcall Limiter::GetFrame(int n, IScriptEnvironment* env) { return frame; } #endif - - for(int y = 0; y < height; y++) { - for(int x = 0; x < row_size; x++) { - if(srcp[x] < min_luma ) - srcp[x] = (unsigned char)min_luma; - else if(srcp[x] > max_luma) - srcp[x] = (unsigned char)max_luma; - } - srcp += pitch; + + if ((pixelsize==2) && (env->GetCPUFlags() & CPUF_SSE2) && IsPtrAligned(srcp, 16) && + IsPtrAligned(frame->GetWritePtr(PLANAR_U), 16) && IsPtrAligned(frame->GetWritePtr(PLANAR_V), 16)) + { + limit_plane_uint16_sse2(srcp, min_luma, max_luma, pitch, height); + + limit_plane_uint16_sse2(frame->GetWritePtr(PLANAR_U), min_chroma, max_chroma, + frame->GetPitch(PLANAR_U), frame->GetHeight(PLANAR_U)); + + limit_plane_uint16_sse2(frame->GetWritePtr(PLANAR_V), min_chroma, max_chroma, + frame->GetPitch(PLANAR_V), frame->GetHeight(PLANAR_V)); + + return frame; } - // Prepare for chroma + // C + + // luma + if(pixelsize == 1) + limit_plane_c(srcp, pitch, min_luma, max_luma, width, height); + else if(pixelsize == 2) + limit_plane_c(srcp, pitch, min_luma, max_luma, width, height); + + // chroma if exists srcp = frame->GetWritePtr(PLANAR_U); - unsigned char* srcpV = frame->GetWritePtr(PLANAR_V); - row_size = frame->GetRowSize(PLANAR_U); + width = frame->GetRowSize(PLANAR_U) / pixelsize; height = frame->GetHeight(PLANAR_U); pitch = frame->GetPitch(PLANAR_U); if (!pitch) return frame; - - for(int y = 0; y < height; y++) { - for(int x = 0; x < row_size; x++) { - if(srcp[x] < min_chroma) - srcp[x] = (unsigned char)min_chroma; - else if(srcp[x] > max_chroma) - srcp[x] = (unsigned char)max_chroma; - if(srcpV[x] < min_chroma) - srcpV[x] = (unsigned char)min_chroma; - else if(srcpV[x] > max_chroma) - srcpV[x] = (unsigned char)max_chroma; - } - srcp += pitch; - srcpV += pitch; + BYTE* srcpV = frame->GetWritePtr(PLANAR_V); + if(pixelsize == 1) { + limit_plane_c(srcp, pitch, min_chroma, max_chroma, width, height); + limit_plane_c(srcpV, pitch, min_chroma, max_chroma, width, height); + } else if(pixelsize == 2) { + limit_plane_c(srcp, pitch, min_chroma, max_chroma, width, height); + limit_plane_c(srcpV, pitch, min_chroma, max_chroma, width, height); } } return frame; @@ -516,5 +605,5 @@ AVSValue __cdecl Limiter::Create(AVSValue args, void* user_data, IScriptEnvironm env->ThrowError("Limiter: show must be \"luma\", \"luma_grey\", \"chroma\" or \"chroma_grey\""); } - return new Limiter(args[0].AsClip(), args[1].AsInt(16), args[2].AsInt(235), args[3].AsInt(16), args[4].AsInt(240), show, env); + return new Limiter(args[0].AsClip(), args[1].AsInt(-9999), args[2].AsInt(-9999), args[3].AsInt(-9999), args[4].AsInt(-9999), show, env); } diff --git a/avs_core/filters/limiter.h b/avs_core/filters/limiter.h index 30f08facb..7e107f5be 100644 --- a/avs_core/filters/limiter.h +++ b/avs_core/filters/limiter.h @@ -57,6 +57,10 @@ class Limiter : public GenericVideoFilter enum show_e{show_none, show_luma, show_luma_grey, show_chroma, show_chroma_grey}; const show_e show; + + // avs+ + int pixelsize; + int bits_per_pixel; // 8,10..16 }; #endif // __Limiter_H__ From e536fb1b5e23f66536d78c2396f1dac4e2639781 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Mon, 5 Sep 2016 20:21:33 +0200 Subject: [PATCH 042/120] Convert sse2 for YUV 8->10-16, 16-10->8 bit (with shifts) plus misc temporary comments for simd --- avs_core/convert/convert.cpp | 160 +++++++++++++++++++++++++++++++---- 1 file changed, 142 insertions(+), 18 deletions(-) diff --git a/avs_core/convert/convert.cpp b/avs_core/convert/convert.cpp index 76cf6b81b..6e721c739 100644 --- a/avs_core/convert/convert.cpp +++ b/avs_core/convert/convert.cpp @@ -787,12 +787,69 @@ static void convert_rgb_uint16_to_8_c(const BYTE *srcp, BYTE *dstp, int src_rows // test if(sourcebits==16) dstp[x] = srcp0[x] / 257; // RGB: full range 0..255 <-> 0..65535 (*255 / 65535) + // hint for simd code writers: + // compilers are smart. Some divisions near 2^n can be performed by tricky multiplication + // such as x/257 + // 32 bit (x) * 0xFF00FF01 = edx_eax + // Result of /257 is in: (edx>>8) and &FF !! + // movzx edx, WORD PTR [esi+ecx*2] + // mov eax, -16711935 ; ff00ff01H + // mul edx + // shr edx, 8 + // mov BYTE PTR [ecx+edi], dl else if (sourcebits==14) dstp[x] = srcp0[x] * 255 / 16383; // RGB: full range 0..255 <-> 0..16384-1 + /* + movzx eax, WORD PTR [edi+ecx*2] + imul esi, eax, 255 + mov eax, -2147352567 ; 80020009H + imul esi + add edx, esi + sar edx, 13 ; 0000000dH + mov eax, edx + shr eax, 31 ; 0000001fH + add eax, edx + mov BYTE PTR [ecx+ebx], al + */ + /* + and w/o mul 255: byte_y = uint16_t_x/16383: + movzx ebx, WORD PTR [esi+ecx*2] + mov eax, 262161 ; 00040011H + mul ebx + sub ebx, edx + shr ebx, 1 + add ebx, edx + shr ebx, 13 ; 0000000dH + mov BYTE PTR [ecx+edi], bl + */ else if (sourcebits==12) dstp[x] = srcp0[x] * 255 / 4095; // RGB: full range 0..255 <-> 0..4096-1 + /* + movzx eax, WORD PTR [edi+ecx*2] + imul esi, eax, 255 + mov eax, -2146959231 ; 80080081H + imul esi + add edx, esi + sar edx, 11 ; 0000000bH + mov eax, edx + shr eax, 31 ; 0000001fH + add eax, edx + mov BYTE PTR [ecx+ebx], al + */ else if (sourcebits==10) dstp[x] = srcp0[x] * 255 / 1023; // RGB: full range 0..255 <-> 0..1024-1 + /* + movzx eax, WORD PTR [edi+ecx*2] + imul esi, eax, 255 + mov eax, -2145384445 ; 80200803H + imul esi + add edx, esi + sar edx, 9 + mov eax, edx + shr eax, 31 ; 0000001fH + add eax, edx + mov BYTE PTR [ecx+ebx], al + */ } dstp += dst_pitch; srcp0 += src_pitch; @@ -819,6 +876,37 @@ static void convert_uint16_to_8_c(const BYTE *srcp, BYTE *dstp, int src_rowsize, } } +template +static void convert_uint16_to_8_sse2(const BYTE *srcp8, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range) +{ + const uint16_t *srcp = reinterpret_cast(srcp8); + src_pitch = src_pitch / sizeof(uint16_t); + int src_width = src_rowsize / sizeof(uint16_t); + int wmod16 = (src_width / 16) * 16; + + __m128i zero = _mm_setzero_si128(); + // no dithering, no range conversion, simply shift + for(int y=0; y(srcp + x)); // 8* uint16 + __m128i src_hi = _mm_load_si128(reinterpret_cast(srcp + x+8)); + src_lo = _mm_srli_epi16(src_lo, (sourcebits - 8)); + src_hi = _mm_srli_epi16(src_hi, (sourcebits - 8)); + __m128i dst = _mm_packus_epi16(src_lo, src_hi); + _mm_store_si128(reinterpret_cast<__m128i*>(dstp+x), dst); + } + // rest + for (int x = wmod16; x < src_width; x++) + { + dstp[x] = srcp[x] >> (sourcebits-8); + } + dstp += dst_pitch; + srcp += src_pitch; + } +} + // float to 8 bit, float to 10/12/14/16 bit template static void convert_32_to_uintN_c(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range) @@ -880,27 +968,60 @@ static void convert_rgb_8_to_uint16_c(const BYTE *srcp, BYTE *dstp, int src_rows // YUV: bit shift 8 to 10-12-14-16 bits template -static void convert_8_to_uint16_c(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range) +static void convert_8_to_uint16_c(const BYTE *srcp, BYTE *dstp8, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range) { - const uint8_t *srcp0 = reinterpret_cast(srcp); - uint16_t *dstp0 = reinterpret_cast(dstp); + uint16_t *dstp = reinterpret_cast(dstp8); - src_pitch = src_pitch / sizeof(uint8_t); dst_pitch = dst_pitch / sizeof(uint16_t); - int src_width = src_rowsize / sizeof(uint8_t); + int src_width = src_rowsize / sizeof(uint8_t); // intentional for(int y=0; y +static void convert_8_to_uint16_sse2(const BYTE *srcp, BYTE *dstp8, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range) +{ + uint16_t *dstp = reinterpret_cast(dstp8); + + dst_pitch = dst_pitch / sizeof(uint16_t); + + int src_width = src_rowsize / sizeof(uint8_t); + int wmod16 = (src_width / 16) * 16; + + __m128i zero = _mm_setzero_si128(); + + for(int y=0; y(srcp + x)); // 16 bytes + __m128i dst_lo = _mm_unpacklo_epi8(src, zero); + __m128i dst_hi = _mm_unpackhi_epi8(src, zero); + dst_lo = _mm_slli_epi16(dst_lo, (targetbits - 8)); + dst_hi = _mm_slli_epi16(dst_hi, (targetbits - 8)); + _mm_store_si128(reinterpret_cast<__m128i*>(dstp+x), dst_lo); + _mm_store_si128(reinterpret_cast<__m128i*>(dstp+x + 8), dst_hi); + } + // rest + for (int x = wmod16; x < src_width; x++) + { + dstp[x] = srcp[x] << (targetbits-8); + } + dstp += dst_pitch; + srcp += src_pitch; + } +} + + // RGB full range: 10-12-14 <=> 16 bits template static void convert_rgb_uint16_to_uint16_c(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range) @@ -988,6 +1109,7 @@ static void convert_uintN_to_float_c(const BYTE *srcp, BYTE *dstp, int src_rowsi ConvertTo8bit::ConvertTo8bit(PClip _child, const float _float_range, const int _dither_mode, const int _bitdepth, const int _truerange, IScriptEnvironment* env) : GenericVideoFilter(_child), float_range(_float_range), dither_mode(_dither_mode), source_bitdepth(_bitdepth), truerange(_truerange) { + bool sse2 = !!(env->GetCPUFlags() & CPUF_SSE2); // frames are always 16 bit aligned if (vi.ComponentSize() == 2) // 16(,14,12,10)->8 bit { @@ -1012,14 +1134,14 @@ ConvertTo8bit::ConvertTo8bit(PClip _child, const float _float_range, const int _ if(truerange) { switch(source_bitdepth) { - case 10: conv_function = convert_uint16_to_8_c<10>; break; - case 12: conv_function = convert_uint16_to_8_c<12>; break; - case 14: conv_function = convert_uint16_to_8_c<14>; break; - case 16: conv_function = convert_uint16_to_8_c<16>; break; + case 10: conv_function = sse2 ? convert_uint16_to_8_sse2<10> : convert_uint16_to_8_c<10>; break; + case 12: conv_function = sse2 ? convert_uint16_to_8_sse2<12> : convert_uint16_to_8_c<12>; break; + case 14: conv_function = sse2 ? convert_uint16_to_8_sse2<14> : convert_uint16_to_8_c<14>; break; + case 16: conv_function = sse2 ? convert_uint16_to_8_sse2<16> : convert_uint16_to_8_c<16>; break; default: env->ThrowError("ConvertTo8bit: invalid source bitdepth"); } } else { - conv_function = convert_uint16_to_8_c<16>; // always convet from 16 bit scale + conv_function = sse2 ? convert_uint16_to_8_sse2<16> : convert_uint16_to_8_c<16>; // always convert from 16 bit scale } } else env->ThrowError("ConvertTo8bit: unsupported color space"); @@ -1129,6 +1251,8 @@ ConvertTo16bit::ConvertTo16bit(PClip _child, const float _float_range, const int GenericVideoFilter(_child), float_range(_float_range), dither_mode(_dither_mode), source_bitdepth(_source_bitdepth), target_bitdepth(_target_bitdepth), truerange(_truerange) { change_only_format = false; + + bool sse2 = !!(env->GetCPUFlags() & CPUF_SSE2); // frames are always 16 bit aligned if (vi.ComponentSize() == 1) // 8->10-12-14-16 bit { @@ -1156,15 +1280,15 @@ ConvertTo16bit::ConvertTo16bit(PClip _child, const float _float_range, const int { switch (target_bitdepth) { - case 10: conv_function = convert_8_to_uint16_c<10>; break; - case 12: conv_function = convert_8_to_uint16_c<12>; break; - case 14: conv_function = convert_8_to_uint16_c<14>; break; - case 16: conv_function = convert_8_to_uint16_c<16>; break; + case 10: conv_function = sse2 ? convert_8_to_uint16_sse2<10> : convert_8_to_uint16_c<10>; break; + case 12: conv_function = sse2 ? convert_8_to_uint16_sse2<12> : convert_8_to_uint16_c<12>; break; + case 14: conv_function = sse2 ? convert_8_to_uint16_sse2<14> : convert_8_to_uint16_c<14>; break; + case 16: conv_function = sse2 ? convert_8_to_uint16_sse2<16> : convert_8_to_uint16_c<16>; break; default: env->ThrowError("ConvertTo16bit: unsupported bit depth"); } } else { - conv_function = convert_8_to_uint16_c<16>; // always 16 bit scale + conv_function = sse2 ? convert_8_to_uint16_sse2<16> : convert_8_to_uint16_c<16>; // always 16 bit scale } } else From 21d0a9886c97fc720b5b26c2af46430209a1bbaa Mon Sep 17 00:00:00 2001 From: Pinterf Date: Tue, 6 Sep 2016 19:32:14 +0200 Subject: [PATCH 043/120] Limiter: port show option for 10-16 bits --- avs_core/filters/limiter.cpp | 272 ++++++++++++++++++++++++++++++++++- 1 file changed, 265 insertions(+), 7 deletions(-) diff --git a/avs_core/filters/limiter.cpp b/avs_core/filters/limiter.cpp index e0f1981ae..0b3b648ba 100644 --- a/avs_core/filters/limiter.cpp +++ b/avs_core/filters/limiter.cpp @@ -188,6 +188,205 @@ static void limit_plane_c(BYTE *srcp8, int pitch, int min, int max, int width, i } } +template +static void show_luma_with_grey_opt_yuv444(BYTE *srcp8, BYTE *srcpU8, BYTE *srcpV8, int pitch, int pitchUV, int width, int height, int min_luma, int max_luma, int bits_per_pixel) +{ + // show_luma Mark clamped pixels red/green over a colour image + // show_luma_grey Mark clamped pixels red/green over a greyscaled image + const int shift = sizeof(pixel_t) == 1 ? 0 : (bits_per_pixel - 8); + pixel_t *srcp = reinterpret_cast(srcp8); + pixel_t *srcpU = reinterpret_cast(srcpU8); + pixel_t *srcpV = reinterpret_cast(srcpV8); + pitch /= sizeof(pixel_t); + pitchUV /= sizeof(pixel_t); + + for (int h=0; h < height; h+=1) { + for (int x = 0; x < width; x+=1) { + if (srcp[x] < min_luma) { srcp[x] = 81 << shift; srcpU[x] = 91 << shift; srcpV[x] = 240 << shift; } // red: Y=81, U=91 and V=240 + else if (srcp[x] > max_luma) { srcp[x] = 145 << shift; srcpU[x] = 54 << shift; srcpV[x] = 34 << shift; } // green: Y=145, U=54 and V=34 + // this differs from show_luma + else if(show_luma_grey) { srcpU[x] = srcpV[x] = 128 << shift; } // grey + } + srcp += pitch; + srcpV += pitchUV; + srcpU += pitchUV; + } +} + +template +static void show_luma_with_grey_opt_yuv420(BYTE *srcp8, BYTE *srcpU8, BYTE *srcpV8, int pitch, int pitchUV, int width, int height, int min_luma, int max_luma, int bits_per_pixel) +{ + // show_luma Mark clamped pixels red/green over a colour image + // show_luma_grey Mark clamped pixels red/green over a greyscaled image + const int shift = sizeof(pixel_t) == 1 ? 0 : (bits_per_pixel - 8); + pixel_t *srcp = reinterpret_cast(srcp8); + pixel_t *srcn = reinterpret_cast(srcp8 + pitch); // next line + pixel_t *srcpU = reinterpret_cast(srcpU8); + pixel_t *srcpV = reinterpret_cast(srcpV8); + pitch /= sizeof(pixel_t); + pitchUV /= sizeof(pixel_t); + + for (int h=0; h < height; h+=2) { + for (int x = 0; x < width; x+=2) { + int uv = 0; + if (srcp[x ] < min_luma) { srcp[x ] = 81 << shift; uv |= 1;} + else if (srcp[x ] > max_luma) { srcp[x ] = 145 << shift; uv |= 2;} + if (srcp[x+1] < min_luma) { srcp[x+1] = 81 << shift; uv |= 1;} + else if (srcp[x+1] > max_luma) { srcp[x+1] = 145 << shift; uv |= 2;} + if (srcn[x ] < min_luma) { srcn[x ] = 81 << shift; uv |= 1;} + else if (srcn[x ] > max_luma) { srcn[x ] = 145 << shift; uv |= 2;} + if (srcn[x+1] < min_luma) { srcn[x+1] = 81 << shift; uv |= 1;} + else if (srcn[x+1] > max_luma) { srcn[x+1] = 145 << shift; uv |= 2;} + switch (uv) { + case 1: srcpU[x/2] = 91 << shift; srcpV[x/2] = 240 << shift; break; // red: Y=81, U=91 and V=240 + case 2: srcpU[x/2] = 54 << shift; srcpV[x/2] = 34 << shift; break; // green: Y=145, U=54 and V=34 + // this differs from show_luma_grey + case 3: + if(show_luma_grey) { + srcpU[x/2] = 90 << shift; + srcpV[x/2] = 134 << shift; break; // puke: Y=81, U=90 and V=134 olive: Y=145, U=90 and V=134 + } else { + srcp[x]=srcp[x+2]=srcn[x]=srcn[x+2]=210 << shift; // yellow:Y=210, U=16 and V=146 + srcpU[x/2] = 16 << shift; + srcpV[x/2] = 146 << shift; + } + break; + default: + if(show_luma_grey) { + srcpU[x/2] = srcpV[x/2] = 128 << shift; // olive: Y=145, U=90 and V=134 + } + break; + } + } + srcp += pitch*2; // 2x2 pixels at a time (4:2:0 subsampling) + srcn += pitch*2; + srcpV += pitchUV; + srcpU += pitchUV; + } +} + +template +static void show_chroma_yuv444(BYTE *srcp8, BYTE *srcpU8, BYTE *srcpV8, int pitch, int pitchUV, int width, int height, int min_chroma, int max_chroma, int bits_per_pixel) +{ + const int shift = sizeof(pixel_t) == 1 ? 0 : (bits_per_pixel - 8); + pixel_t *srcp = reinterpret_cast(srcp8); + pixel_t *srcpU = reinterpret_cast(srcpU8); + pixel_t *srcpV = reinterpret_cast(srcpV8); + pitch /= sizeof(pixel_t); + pitchUV /= sizeof(pixel_t); + + for (int h=0; h < height; h+=1) { + for (int x = 0; x < width; x+=1) { + if ( (srcpU[x] < min_chroma) // U- + || (srcpU[x] > max_chroma) // U+ + || (srcpV[x] < min_chroma) // V- + || (srcpV[x] > max_chroma) )// V+ + { srcp[x]=210 << shift; srcpU[x]= 16 << shift; srcpV[x]=146 << shift; } // yellow:Y=210, U=16 and V=146 + } + srcp += pitch; + srcpV += pitchUV; + srcpU += pitchUV; + } +} + +template +static void show_chroma_yuv420(BYTE *srcp8, BYTE *srcpU8, BYTE *srcpV8, int pitch, int pitchUV, int width, int height, int min_chroma, int max_chroma, int bits_per_pixel) +{ + const int shift = sizeof(pixel_t) == 1 ? 0 : (bits_per_pixel - 8); + pixel_t *srcp = reinterpret_cast(srcp8); + pixel_t *srcn = reinterpret_cast(srcp8 + pitch); // next line + pixel_t *srcpU = reinterpret_cast(srcpU8); + pixel_t *srcpV = reinterpret_cast(srcpV8); + pitch /= sizeof(pixel_t); + pitchUV /= sizeof(pixel_t); + + for (int h=0; h < height; h+=2) { + for (int x = 0; x < width; x+=2) { + if ( (srcpU[x/2] < min_chroma) // U- + || (srcpU[x/2] > max_chroma) // U+ + || (srcpV[x/2] < min_chroma) // V- + || (srcpV[x/2] > max_chroma) )// V+ + { srcp[x]=srcp[x+1]=srcn[x]=srcn[x+1]=210 << shift; srcpU[x/2]= 16 << shift; srcpV[x/2]=146 << shift; } // yellow:Y=210, U=16 and V=146 + } + srcp += pitch*2; // 2x2 pixels at a time (4:2:0 subsampling) + srcn += pitch*2; + srcpV += pitchUV; + srcpU += pitchUV; + } +} + +template +static void show_chroma_grey_yuv444(BYTE *srcp8, BYTE *srcpU8, BYTE *srcpV8, int pitch, int pitchUV, int width, int height, int min_chroma, int max_chroma, int bits_per_pixel) +{ + const int shift = sizeof(pixel_t) == 1 ? 0 : (bits_per_pixel - 8); + pixel_t *srcp = reinterpret_cast(srcp8); + pixel_t *srcpU = reinterpret_cast(srcpU8); + pixel_t *srcpV = reinterpret_cast(srcpV8); + pitch /= sizeof(pixel_t); + pitchUV /= sizeof(pixel_t); + + for (int h=0; h < height; h+=1) { + for (int x = 0; x < width; x+=1) { + int uv = 0; + if (srcpU[x] < min_chroma) uv |= 1; // U- + else if (srcpU[x] > max_chroma) uv |= 2; // U+ + if (srcpV[x] < min_chroma) uv |= 4; // V- + else if (srcpV[x] > max_chroma) uv |= 8; // V+ + switch (uv) { + case 8: srcp[x]= 81 << shift; srcpU[x]= 91 << shift; srcpV[x]=240 << shift; break; // +V Red + case 9: srcp[x]=146 << shift; srcpU[x]= 53 << shift; srcpV[x]=193 << shift; break; // -U+V Orange + case 1: srcp[x]=210 << shift; srcpU[x]= 16 << shift; srcpV[x]=146 << shift; break; // -U Yellow + case 5: srcp[x]=153 << shift; srcpU[x]= 49 << shift; srcpV[x]= 49 << shift; break; // -U-V Green + case 4: srcp[x]=170 << shift; srcpU[x]=165 << shift; srcpV[x]= 16 << shift; break; // -V Cyan + case 6: srcp[x]=105 << shift; srcpU[x]=203 << shift; srcpV[x]= 63 << shift; break; // +U-V Teal + case 2: srcp[x]= 41 << shift; srcpU[x]=240 << shift; srcpV[x]=110 << shift; break; // +U Blue + case 10: srcp[x]=106 << shift; srcpU[x]=202 << shift; srcpV[x]=222 << shift; break; // +U+V Magenta + default: srcpU[x]= srcpV[x]=128 << shift; break; + } + } + srcp += pitch; + srcpV += pitchUV; + srcpU += pitchUV; + } +} + +template +static void show_chroma_grey_yuv420(BYTE *srcp8, BYTE *srcpU8, BYTE *srcpV8, int pitch, int pitchUV, int width, int height, int min_chroma, int max_chroma, int bits_per_pixel) +{ + const int shift = sizeof(pixel_t) == 1 ? 0 : (bits_per_pixel - 8); + pixel_t *srcp = reinterpret_cast(srcp8); + pixel_t *srcn = reinterpret_cast(srcp8 + pitch); // next line + pixel_t *srcpU = reinterpret_cast(srcpU8); + pixel_t *srcpV = reinterpret_cast(srcpV8); + pitch /= sizeof(pixel_t); + pitchUV /= sizeof(pixel_t); + + for (int h=0; h < height; h+=2) { + for (int x = 0; x < width; x+=2) { + int uv = 0; + if (srcpU[x/2] < min_chroma) uv |= 1; // U- + else if (srcpU[x/2] > max_chroma) uv |= 2; // U+ + if (srcpV[x/2] < min_chroma) uv |= 4; // V- + else if (srcpV[x/2] > max_chroma) uv |= 8; // V+ + switch (uv) { + case 8: srcp[x]=srcp[x+1]=srcn[x]=srcn[x+1]= 81 << shift; srcpU[x/2]= 91 << shift; srcpV[x/2]=240 << shift; break; // +V Red + case 9: srcp[x]=srcp[x+1]=srcn[x]=srcn[x+1]=146 << shift; srcpU[x/2]= 53 << shift; srcpV[x/2]=193 << shift; break; // -U+V Orange + case 1: srcp[x]=srcp[x+1]=srcn[x]=srcn[x+1]=210 << shift; srcpU[x/2]= 16 << shift; srcpV[x/2]=146 << shift; break; // -U Yellow + case 5: srcp[x]=srcp[x+1]=srcn[x]=srcn[x+1]=153 << shift; srcpU[x/2]= 49 << shift; srcpV[x/2]= 49 << shift; break; // -U-V Green + case 4: srcp[x]=srcp[x+1]=srcn[x]=srcn[x+1]=170 << shift; srcpU[x/2]=165 << shift; srcpV[x/2]= 16 << shift; break; // -V Cyan + case 6: srcp[x]=srcp[x+1]=srcn[x]=srcn[x+1]=105 << shift; srcpU[x/2]=203 << shift; srcpV[x/2]= 63 << shift; break; // +U-V Teal + case 2: srcp[x]=srcp[x+1]=srcn[x]=srcn[x+1]= 41 << shift; srcpU[x/2]=240 << shift; srcpV[x/2]=110 << shift; break; // +U Blue + case 10: srcp[x]=srcp[x+1]=srcn[x]=srcn[x+1]=106 << shift; srcpU[x/2]=202 << shift; srcpV[x/2]=222 << shift; break; // +U+V Magenta + default: srcpU[x/2] = srcpV[x/2] = 128 << shift; break; + } + } + srcp += pitch*2; // 2x2 pixels at a time (4:2:0 subsampling) + srcn += pitch*2; + srcpV += pitchUV; + srcpU += pitchUV; + } +} + PVideoFrame __stdcall Limiter::GetFrame(int n, IScriptEnvironment* env) { PVideoFrame frame = child->GetFrame(n, env); env->MakeWritable(&frame); @@ -310,6 +509,25 @@ PVideoFrame __stdcall Limiter::GetFrame(int n, IScriptEnvironment* env) { // YUY end } else if(vi.Is420()) { + if (show == show_luma || show == show_luma_grey) { // Mark clamped pixels red/yellow/green over a colour image + const int pitchUV = frame->GetPitch(PLANAR_U); + unsigned char* srcpV = frame->GetWritePtr(PLANAR_V); + unsigned char* srcpU = frame->GetWritePtr(PLANAR_U); + + if (pixelsize == 1) { + if (show == show_luma) + show_luma_with_grey_opt_yuv420(srcp, srcpU, srcpV, pitch, pitchUV, width, height, min_luma, max_luma, bits_per_pixel); + else // show_luma_grey + show_luma_with_grey_opt_yuv420(srcp, srcpU, srcpV, pitch, pitchUV, width, height, min_luma, max_luma, bits_per_pixel); + } else { // pixelsize == 2 + if (show == show_luma) + show_luma_with_grey_opt_yuv420(srcp, srcpU, srcpV, pitch, pitchUV, width, height, min_luma, max_luma, bits_per_pixel); + else // show_luma_grey + show_luma_with_grey_opt_yuv420(srcp, srcpU, srcpV, pitch, pitchUV, width, height, min_luma, max_luma, bits_per_pixel); + } + return frame; + } +#if 0 if (show == show_luma) { // Mark clamped pixels red/yellow/green over a colour image const int pitchUV = frame->GetPitch(PLANAR_U); unsigned char* srcn = srcp + pitch; // next luma line @@ -364,8 +582,8 @@ PVideoFrame __stdcall Limiter::GetFrame(int n, IScriptEnvironment* env) { case 1: srcpU[x/2] = 91; srcpV[x/2] = 240; break; // red: Y=81, U=91 and V=240 case 2: srcpU[x/2] = 54; srcpV[x/2] = 34; break; // green: Y=145, U=54 and V=34 // this differs from show_luma - case 3: srcpU[x/2] = 90; srcpV[x/2] = 134; break; // puke: Y=81, U=90 and V=134 - default: srcpU[x/2] = srcpV[x/2] = 128; break; // olive: Y=145, U=90 and V=134 + case 3: srcpU[x/2] = 90; srcpV[x/2] = 134; break; // puke: Y=81, U=90 and V=134 olive: Y=145, U=90 and V=134 + default: srcpU[x/2] = srcpV[x/2] = 128; break; } } srcp += pitch*2; @@ -375,12 +593,17 @@ PVideoFrame __stdcall Limiter::GetFrame(int n, IScriptEnvironment* env) { } return frame; } +#endif else if (show == show_chroma) { // Mark clamped pixels yellow over a colour image const int pitchUV = frame->GetPitch(PLANAR_U); - unsigned char* srcn = srcp + pitch; // next luma line unsigned char* srcpV = frame->GetWritePtr(PLANAR_V); unsigned char* srcpU = frame->GetWritePtr(PLANAR_U); + if (pixelsize == 1) + show_chroma_yuv420(srcp, srcpU, srcpV, pitch, pitchUV, width, height, min_chroma, max_chroma, bits_per_pixel); + else + show_chroma_yuv420(srcp, srcpU, srcpV, pitch, pitchUV, width, height, min_chroma, max_chroma, bits_per_pixel); +#if 0 for (int h=0; h < height;h+=2) { for (int x = 0; x < width; x+=2) { if ( (srcpU[x/2] < min_chroma) // U- @@ -394,14 +617,20 @@ PVideoFrame __stdcall Limiter::GetFrame(int n, IScriptEnvironment* env) { srcpV += pitchUV; srcpU += pitchUV; } +#endif return frame; } else if (show == show_chroma_grey) { // Mark clamped pixels coloured over a greyscaled image const int pitchUV = frame->GetPitch(PLANAR_U); - unsigned char* srcn = srcp + pitch; // next luma line unsigned char* srcpV = frame->GetWritePtr(PLANAR_V); unsigned char* srcpU = frame->GetWritePtr(PLANAR_U); + if (pixelsize == 1) + show_chroma_grey_yuv420(srcp, srcpU, srcpV, pitch, pitchUV, width, height, min_chroma, max_chroma, bits_per_pixel); + else + show_chroma_grey_yuv420(srcp, srcpU, srcpV, pitch, pitchUV, width, height, min_chroma, max_chroma, bits_per_pixel); + +#if 0 for (int h=0; h < height;h+=2) { for (int x = 0; x < width; x+=2) { int uv = 0; @@ -426,16 +655,32 @@ PVideoFrame __stdcall Limiter::GetFrame(int n, IScriptEnvironment* env) { srcpV += pitchUV; srcpU += pitchUV; } +#endif return frame; } // YV12 (4:2:0) end - } else if(vi.IsYV24()) { + } else if(vi.Is444()) { - if (show == show_luma) { // Mark clamped pixels red/green over a colour image + if (show == show_luma || show == show_luma_grey) { const int pitchUV = frame->GetPitch(PLANAR_U); unsigned char* srcpV = frame->GetWritePtr(PLANAR_V); unsigned char* srcpU = frame->GetWritePtr(PLANAR_U); - + if (pixelsize == 1) { + if (show == show_luma) + show_luma_with_grey_opt_yuv444(srcp, srcpU, srcpV, pitch, pitchUV, width, height, min_luma, max_luma, bits_per_pixel); + else // show_luma_grey + show_luma_with_grey_opt_yuv444(srcp, srcpU, srcpV, pitch, pitchUV, width, height, min_luma, max_luma, bits_per_pixel); + } else { // pixelsize == 2 + if (show == show_luma) + show_luma_with_grey_opt_yuv444(srcp, srcpU, srcpV, pitch, pitchUV, width, height, min_luma, max_luma, bits_per_pixel); + else // show_luma_grey + show_luma_with_grey_opt_yuv444(srcp, srcpU, srcpV, pitch, pitchUV, width, height, min_luma, max_luma, bits_per_pixel); + } + return frame; + } +#if 0 + if (show == show_luma) { + // Mark clamped pixels red/green over a colour image for (int h=0; h < height; h+=1) { for (int x = 0; x < width; x+=1) { if (srcp[x] < min_luma) { srcp[x] = 81; srcpU[x] = 91; srcpV[x] = 240; } // red: Y=81, U=91 and V=240 @@ -466,11 +711,17 @@ PVideoFrame __stdcall Limiter::GetFrame(int n, IScriptEnvironment* env) { } return frame; } +#endif else if (show == show_chroma) { // Mark clamped pixels yellow over a colour image const int pitchUV = frame->GetPitch(PLANAR_U); unsigned char* srcpV = frame->GetWritePtr(PLANAR_V); unsigned char* srcpU = frame->GetWritePtr(PLANAR_U); + if (pixelsize == 1) + show_chroma_yuv444(srcp, srcpU, srcpV, pitch, pitchUV, width, height, min_chroma, max_chroma, bits_per_pixel); + else + show_chroma_yuv444(srcp, srcpU, srcpV, pitch, pitchUV, width, height, min_chroma, max_chroma, bits_per_pixel); +#if 0 for (int h=0; h < height; h+=1) { for (int x = 0; x < width; x+=1) { if ( (srcpU[x] < min_chroma) // U- @@ -483,6 +734,7 @@ PVideoFrame __stdcall Limiter::GetFrame(int n, IScriptEnvironment* env) { srcpV += pitchUV; srcpU += pitchUV; } +#endif return frame; } else if (show == show_chroma_grey) { // Mark clamped pixels coloured over a greyscaled image @@ -490,6 +742,11 @@ PVideoFrame __stdcall Limiter::GetFrame(int n, IScriptEnvironment* env) { unsigned char* srcpV = frame->GetWritePtr(PLANAR_V); unsigned char* srcpU = frame->GetWritePtr(PLANAR_U); + if (pixelsize == 1) + show_chroma_grey_yuv444(srcp, srcpU, srcpV, pitch, pitchUV, width, height, min_chroma, max_chroma, bits_per_pixel); + else + show_chroma_grey_yuv444(srcp, srcpU, srcpV, pitch, pitchUV, width, height, min_chroma, max_chroma, bits_per_pixel); +#if 0 for (int h=0; h < height; h+=1) { for (int x = 0; x < width; x+=1) { int uv = 0; @@ -513,6 +770,7 @@ PVideoFrame __stdcall Limiter::GetFrame(int n, IScriptEnvironment* env) { srcpV += pitchUV; srcpU += pitchUV; } +#endif return frame; } // YV24 (4:4:4) end From 4f67b39a92ea00f1946c9e22d625f8b02eb7234f Mon Sep 17 00:00:00 2001 From: Pinterf Date: Tue, 6 Sep 2016 19:44:21 +0200 Subject: [PATCH 044/120] Limiter: Remove old non-templated 8 bit code --- avs_core/filters/limiter.cpp | 216 ++--------------------------------- 1 file changed, 9 insertions(+), 207 deletions(-) diff --git a/avs_core/filters/limiter.cpp b/avs_core/filters/limiter.cpp index 0b3b648ba..ddff07df0 100644 --- a/avs_core/filters/limiter.cpp +++ b/avs_core/filters/limiter.cpp @@ -507,13 +507,12 @@ PVideoFrame __stdcall Limiter::GetFrame(int n, IScriptEnvironment* env) { } return frame; // YUY end - } else if(vi.Is420()) { + } else if(vi.Is420() && show != show_none) { + const int pitchUV = frame->GetPitch(PLANAR_U); + unsigned char* srcpV = frame->GetWritePtr(PLANAR_V); + unsigned char* srcpU = frame->GetWritePtr(PLANAR_U); if (show == show_luma || show == show_luma_grey) { // Mark clamped pixels red/yellow/green over a colour image - const int pitchUV = frame->GetPitch(PLANAR_U); - unsigned char* srcpV = frame->GetWritePtr(PLANAR_V); - unsigned char* srcpU = frame->GetWritePtr(PLANAR_U); - if (pixelsize == 1) { if (show == show_luma) show_luma_with_grey_opt_yuv420(srcp, srcpU, srcpV, pitch, pitchUV, width, height, min_luma, max_luma, bits_per_pixel); @@ -527,144 +526,28 @@ PVideoFrame __stdcall Limiter::GetFrame(int n, IScriptEnvironment* env) { } return frame; } -#if 0 - if (show == show_luma) { // Mark clamped pixels red/yellow/green over a colour image - const int pitchUV = frame->GetPitch(PLANAR_U); - unsigned char* srcn = srcp + pitch; // next luma line - unsigned char* srcpV = frame->GetWritePtr(PLANAR_V); - unsigned char* srcpU = frame->GetWritePtr(PLANAR_U); - - for (int h=0; h < height;h+=2) { - for (int x = 0; x < width; x+=2) { - int uv = 0; - if (srcp[x ] < min_luma) { srcp[x ] = 81; uv |= 1;} - else if (srcp[x ] > max_luma) { srcp[x ] = 145; uv |= 2;} - if (srcp[x+1] < min_luma) { srcp[x+1] = 81; uv |= 1;} - else if (srcp[x+1] > max_luma) { srcp[x+1] = 145; uv |= 2;} - if (srcn[x ] < min_luma) { srcn[x ] = 81; uv |= 1;} - else if (srcn[x ] > max_luma) { srcn[x ] = 145; uv |= 2;} - if (srcn[x+1] < min_luma) { srcn[x+1] = 81; uv |= 1;} - else if (srcn[x+1] > max_luma) { srcn[x+1] = 145; uv |= 2;} - switch (uv) { - case 1: srcpU[x/2] = 91; srcpV[x/2] = 240; break; // red: Y=81, U=91 and V=240 - case 2: srcpU[x/2] = 54; srcpV[x/2] = 34; break; // green: Y=145, U=54 and V=34 - // this differs from show_luma_grey - case 3: srcp[x]=srcp[x+2]=srcn[x]=srcn[x+2]=210; - srcpU[x/2] = 16; srcpV[x/2] = 146; break; // yellow:Y=210, U=16 and V=146 - default: break; - } - } - srcp += pitch*2; // 2x2 pixels at a time (4:2:0 subsampling) - srcn += pitch*2; - srcpV += pitchUV; - srcpU += pitchUV; - } - return frame; - } - else if (show == show_luma_grey) { // Mark clamped pixels coloured over a greyscaled image - const int pitchUV = frame->GetPitch(PLANAR_U); - unsigned char* srcn = srcp + pitch; // next luma line - unsigned char* srcpV = frame->GetWritePtr(PLANAR_V); - unsigned char* srcpU = frame->GetWritePtr(PLANAR_U); - - for (int h=0; h < height;h+=2) { - for (int x = 0; x < width; x+=2) { - int uv = 0; - if (srcp[x ] < min_luma) { srcp[x ] = 81; uv |= 1;} - else if (srcp[x ] > max_luma) { srcp[x ] = 145; uv |= 2;} - if (srcp[x+1] < min_luma) { srcp[x+1] = 81; uv |= 1;} - else if (srcp[x+1] > max_luma) { srcp[x+1] = 145; uv |= 2;} - if (srcn[x ] < min_luma) { srcn[x ] = 81; uv |= 1;} - else if (srcn[x ] > max_luma) { srcn[x ] = 145; uv |= 2;} - if (srcn[x+1] < min_luma) { srcn[x+1] = 81; uv |= 1;} - else if (srcn[x+1] > max_luma) { srcn[x+1] = 145; uv |= 2;} - switch (uv) { - case 1: srcpU[x/2] = 91; srcpV[x/2] = 240; break; // red: Y=81, U=91 and V=240 - case 2: srcpU[x/2] = 54; srcpV[x/2] = 34; break; // green: Y=145, U=54 and V=34 - // this differs from show_luma - case 3: srcpU[x/2] = 90; srcpV[x/2] = 134; break; // puke: Y=81, U=90 and V=134 olive: Y=145, U=90 and V=134 - default: srcpU[x/2] = srcpV[x/2] = 128; break; - } - } - srcp += pitch*2; - srcn += pitch*2; - srcpV += pitchUV; - srcpU += pitchUV; - } - return frame; - } -#endif else if (show == show_chroma) { // Mark clamped pixels yellow over a colour image - const int pitchUV = frame->GetPitch(PLANAR_U); - unsigned char* srcpV = frame->GetWritePtr(PLANAR_V); - unsigned char* srcpU = frame->GetWritePtr(PLANAR_U); - if (pixelsize == 1) show_chroma_yuv420(srcp, srcpU, srcpV, pitch, pitchUV, width, height, min_chroma, max_chroma, bits_per_pixel); else show_chroma_yuv420(srcp, srcpU, srcpV, pitch, pitchUV, width, height, min_chroma, max_chroma, bits_per_pixel); -#if 0 - for (int h=0; h < height;h+=2) { - for (int x = 0; x < width; x+=2) { - if ( (srcpU[x/2] < min_chroma) // U- - || (srcpU[x/2] > max_chroma) // U+ - || (srcpV[x/2] < min_chroma) // V- - || (srcpV[x/2] > max_chroma) )// V+ - { srcp[x]=srcp[x+1]=srcn[x]=srcn[x+1]=210; srcpU[x/2]= 16; srcpV[x/2]=146; } // yellow:Y=210, U=16 and V=146 - } - srcp += pitch*2; - srcn += pitch*2; - srcpV += pitchUV; - srcpU += pitchUV; - } -#endif return frame; } else if (show == show_chroma_grey) { // Mark clamped pixels coloured over a greyscaled image - const int pitchUV = frame->GetPitch(PLANAR_U); - unsigned char* srcpV = frame->GetWritePtr(PLANAR_V); - unsigned char* srcpU = frame->GetWritePtr(PLANAR_U); - if (pixelsize == 1) show_chroma_grey_yuv420(srcp, srcpU, srcpV, pitch, pitchUV, width, height, min_chroma, max_chroma, bits_per_pixel); else show_chroma_grey_yuv420(srcp, srcpU, srcpV, pitch, pitchUV, width, height, min_chroma, max_chroma, bits_per_pixel); - -#if 0 - for (int h=0; h < height;h+=2) { - for (int x = 0; x < width; x+=2) { - int uv = 0; - if (srcpU[x/2] < min_chroma) uv |= 1; // U- - else if (srcpU[x/2] > max_chroma) uv |= 2; // U+ - if (srcpV[x/2] < min_chroma) uv |= 4; // V- - else if (srcpV[x/2] > max_chroma) uv |= 8; // V+ - switch (uv) { - case 8: srcp[x]=srcp[x+1]=srcn[x]=srcn[x+1]= 81; srcpU[x/2]= 91; srcpV[x/2]=240; break; // +V Red - case 9: srcp[x]=srcp[x+1]=srcn[x]=srcn[x+1]=146; srcpU[x/2]= 53; srcpV[x/2]=193; break; // -U+V Orange - case 1: srcp[x]=srcp[x+1]=srcn[x]=srcn[x+1]=210; srcpU[x/2]= 16; srcpV[x/2]=146; break; // -U Yellow - case 5: srcp[x]=srcp[x+1]=srcn[x]=srcn[x+1]=153; srcpU[x/2]= 49; srcpV[x/2]= 49; break; // -U-V Green - case 4: srcp[x]=srcp[x+1]=srcn[x]=srcn[x+1]=170; srcpU[x/2]=165; srcpV[x/2]= 16; break; // -V Cyan - case 6: srcp[x]=srcp[x+1]=srcn[x]=srcn[x+1]=105; srcpU[x/2]=203; srcpV[x/2]= 63; break; // +U-V Teal - case 2: srcp[x]=srcp[x+1]=srcn[x]=srcn[x+1]= 41; srcpU[x/2]=240; srcpV[x/2]=110; break; // +U Blue - case 10: srcp[x]=srcp[x+1]=srcn[x]=srcn[x+1]=106; srcpU[x/2]=202; srcpV[x/2]=222; break; // +U+V Magenta - default: srcpU[x/2] = srcpV[x/2] = 128; break; - } - } - srcp += pitch*2; // this differs from YV24 (*2) - srcn += pitch*2; - srcpV += pitchUV; - srcpU += pitchUV; - } -#endif return frame; } // YV12 (4:2:0) end - } else if(vi.Is444()) { + } else if(vi.Is444() && show != show_none) { + + const int pitchUV = frame->GetPitch(PLANAR_U); + unsigned char* srcpV = frame->GetWritePtr(PLANAR_V); + unsigned char* srcpU = frame->GetWritePtr(PLANAR_U); if (show == show_luma || show == show_luma_grey) { - const int pitchUV = frame->GetPitch(PLANAR_U); - unsigned char* srcpV = frame->GetWritePtr(PLANAR_V); - unsigned char* srcpU = frame->GetWritePtr(PLANAR_U); if (pixelsize == 1) { if (show == show_luma) show_luma_with_grey_opt_yuv444(srcp, srcpU, srcpV, pitch, pitchUV, width, height, min_luma, max_luma, bits_per_pixel); @@ -678,99 +561,18 @@ PVideoFrame __stdcall Limiter::GetFrame(int n, IScriptEnvironment* env) { } return frame; } -#if 0 - if (show == show_luma) { - // Mark clamped pixels red/green over a colour image - for (int h=0; h < height; h+=1) { - for (int x = 0; x < width; x+=1) { - if (srcp[x] < min_luma) { srcp[x] = 81; srcpU[x] = 91; srcpV[x] = 240; } // red: Y=81, U=91 and V=240 - else if (srcp[x] > max_luma) { srcp[x] = 145; srcpU[x] = 54; srcpV[x] = 34; } // green: Y=145, U=54 and V=34 - // this differs from show_luma_grey (nothing here) - } - srcp += pitch; - srcpV += pitchUV; - srcpU += pitchUV; - } - return frame; - } - else if (show == show_luma_grey) { // Mark clamped pixels red/green over a greyscaled image - const int pitchUV = frame->GetPitch(PLANAR_U); - unsigned char* srcpV = frame->GetWritePtr(PLANAR_V); - unsigned char* srcpU = frame->GetWritePtr(PLANAR_U); - - for (int h=0; h < height; h+=1) { - for (int x = 0; x < width; x+=1) { - if (srcp[x] < min_luma) { srcp[x] = 81; srcpU[x] = 91; srcpV[x] = 240; } // red: Y=81, U=91 and V=240 - else if (srcp[x] > max_luma) { srcp[x] = 145; srcpU[x] = 54; srcpV[x] = 34; } // green: Y=145, U=54 and V=34 - // this differs from show_luma - else { srcpU[x] = srcpV[x] = 128; } // grey - } - srcp += pitch; - srcpV += pitchUV; - srcpU += pitchUV; - } - return frame; - } -#endif else if (show == show_chroma) { // Mark clamped pixels yellow over a colour image - const int pitchUV = frame->GetPitch(PLANAR_U); - unsigned char* srcpV = frame->GetWritePtr(PLANAR_V); - unsigned char* srcpU = frame->GetWritePtr(PLANAR_U); - if (pixelsize == 1) show_chroma_yuv444(srcp, srcpU, srcpV, pitch, pitchUV, width, height, min_chroma, max_chroma, bits_per_pixel); else show_chroma_yuv444(srcp, srcpU, srcpV, pitch, pitchUV, width, height, min_chroma, max_chroma, bits_per_pixel); -#if 0 - for (int h=0; h < height; h+=1) { - for (int x = 0; x < width; x+=1) { - if ( (srcpU[x] < min_chroma) // U- - || (srcpU[x] > max_chroma) // U+ - || (srcpV[x] < min_chroma) // V- - || (srcpV[x] > max_chroma) )// V+ - { srcp[x]=210; srcpU[x]= 16; srcpV[x]=146; } // yellow:Y=210, U=16 and V=146 - } - srcp += pitch; - srcpV += pitchUV; - srcpU += pitchUV; - } -#endif return frame; } else if (show == show_chroma_grey) { // Mark clamped pixels coloured over a greyscaled image - const int pitchUV = frame->GetPitch(PLANAR_U); - unsigned char* srcpV = frame->GetWritePtr(PLANAR_V); - unsigned char* srcpU = frame->GetWritePtr(PLANAR_U); - if (pixelsize == 1) show_chroma_grey_yuv444(srcp, srcpU, srcpV, pitch, pitchUV, width, height, min_chroma, max_chroma, bits_per_pixel); else show_chroma_grey_yuv444(srcp, srcpU, srcpV, pitch, pitchUV, width, height, min_chroma, max_chroma, bits_per_pixel); -#if 0 - for (int h=0; h < height; h+=1) { - for (int x = 0; x < width; x+=1) { - int uv = 0; - if (srcpU[x] < min_chroma) uv |= 1; // U- - else if (srcpU[x] > max_chroma) uv |= 2; // U+ - if (srcpV[x] < min_chroma) uv |= 4; // V- - else if (srcpV[x] > max_chroma) uv |= 8; // V+ - switch (uv) { - case 8: srcp[x]= 81; srcpU[x]= 91; srcpV[x]=240; break; // +V Red - case 9: srcp[x]=146; srcpU[x]= 53; srcpV[x]=193; break; // -U+V Orange - case 1: srcp[x]=210; srcpU[x]= 16; srcpV[x]=146; break; // -U Yellow - case 5: srcp[x]=153; srcpU[x]= 49; srcpV[x]= 49; break; // -U-V Green - case 4: srcp[x]=170; srcpU[x]=165; srcpV[x]= 16; break; // -V Cyan - case 6: srcp[x]=105; srcpU[x]=203; srcpV[x]= 63; break; // +U-V Teal - case 2: srcp[x]= 41; srcpU[x]=240; srcpV[x]=110; break; // +U Blue - case 10: srcp[x]=106; srcpU[x]=202; srcpV[x]=222; break; // +U+V Magenta - default: srcpU[x]= srcpV[x]=128; break; - } - } - srcp += pitch; - srcpV += pitchUV; - srcpU += pitchUV; - } -#endif return frame; } // YV24 (4:4:4) end From 6c85f4c730206154b4815414d3cdc41865c55c18 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Wed, 7 Sep 2016 08:39:07 +0200 Subject: [PATCH 045/120] Revert a false idea (double type support in plugins e.g. virtualdub) --- avs_core/core/PluginManager.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/avs_core/core/PluginManager.cpp b/avs_core/core/PluginManager.cpp index 288d65ea3..071761185 100644 --- a/avs_core/core/PluginManager.cpp +++ b/avs_core/core/PluginManager.cpp @@ -81,7 +81,7 @@ static std::string GetFullPathNameWrap(const std::string &f) static bool IsParameterTypeSpecifier(char c) { switch (c) { - case 'b': case 'i': case 'f': case 's': case 'c': case '.': case 'd': + case 'b': case 'i': case 'f': case 's': case 'c': case '.': return true; default: return false; @@ -277,7 +277,7 @@ bool AVSFunction::TypeMatch(const char* param_types, const AVSValue* args, size_ } switch (*param_types) { - case 'b': case 'i': case 'f': case 's': case 'c': + case 'b': case 'i': case 'f': case 's': case 'c': if ( (!optional || args[i].Defined()) && !SingleTypeMatch(*param_types, args[i], strict)) return false; From 34db747cf619faeed741f1de6a2b949734a62fe2 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Wed, 7 Sep 2016 11:07:11 +0200 Subject: [PATCH 046/120] Invert: YUV(A)/PlanarRGB(A) 8,10-16,32 bit, RGB48/64, with SSE2 --- avs_core/filters/layer.cpp | 160 +++++++++++++++++++++++++++++++------ avs_core/filters/layer.h | 4 + 2 files changed, 141 insertions(+), 23 deletions(-) diff --git a/avs_core/filters/layer.cpp b/avs_core/filters/layer.cpp index ca5c07d0f..5031c66b6 100644 --- a/avs_core/filters/layer.cpp +++ b/avs_core/filters/layer.cpp @@ -450,7 +450,7 @@ Invert::Invert(PClip _child, const char * _channels, IScriptEnvironment* env) break; case 'A': case 'a': - doA = true; + doA = (vi.NumComponents() > 3); break; case 'Y': case 'y': @@ -468,6 +468,8 @@ Invert::Invert(PClip _child, const char * _channels, IScriptEnvironment* env) break; } } + pixelsize = vi.ComponentSize(); + bits_per_pixel = vi.BitsPerComponent(); if (vi.IsYUY2()) { mask = doY ? 0x00ff00ff : 0; mask |= doU ? 0x0000ff00 : 0; @@ -479,8 +481,17 @@ Invert::Invert(PClip _child, const char * _channels, IScriptEnvironment* env) mask |= doR ? 0x00ff0000 : 0; mask |= doA ? 0xff000000 : 0; } + else if (vi.IsRGB64()) { + mask64 = doB ? 0x000000000000ffffull : 0; + mask64 |= (doG ? 0x00000000ffff0000ull : 0); + mask64 |= (doR ? 0x0000ffff00000000ull : 0); + mask64 |= (doA ? 0xffff000000000000ull : 0); + } else { mask = 0xffffffff; + mask64 = (1 << bits_per_pixel) - 1; + mask64 |= (mask64 << 48) | (mask64 << 32) | (mask64 << 16); // works for 10 bit, too + // RGB24/48 is special case no use of this mask } } @@ -497,6 +508,19 @@ static void invert_frame_sse2(BYTE* frame, int pitch, int width, int height, int } } +static void invert_frame_uint16_sse2(BYTE* frame, int pitch, int width, int height, uint64_t mask64) { + __m128i maskv = _mm_set_epi32((uint32_t)(mask64 >> 32),(uint32_t)mask64,(uint32_t)(mask64 >> 32),(uint32_t)mask64); + + BYTE* endp = frame + pitch * height; + + while (frame < endp) { + __m128i src = _mm_load_si128(reinterpret_cast(frame)); + __m128i inv = _mm_xor_si128(src, maskv); + _mm_store_si128(reinterpret_cast<__m128i*>(frame), inv); + frame += 16; + } +} + #ifdef X86_32 //mod4 width (in bytes) is required @@ -560,8 +584,17 @@ static void invert_frame_c(BYTE* frame, int pitch, int width, int height, int ma } } -static void invert_plane_c(BYTE* frame, int pitch, int width, int height) { - int mod4_width = width / 4 * 4; +static void invert_frame_uint16_c(BYTE* frame, int pitch, int width, int height, uint64_t mask64) { + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width / 8; ++x) { + reinterpret_cast(frame)[x] = reinterpret_cast(frame)[x] ^ mask64; + } + frame += pitch; + } +} + +static void invert_plane_c(BYTE* frame, int pitch, int row_size, int height) { + int mod4_width = row_size / 4 * 4; for (int y = 0; y < height; ++y) { int* intptr = reinterpret_cast(frame); @@ -569,44 +602,97 @@ static void invert_plane_c(BYTE* frame, int pitch, int width, int height) { intptr[x] = intptr[x] ^ 0xFFFFFFFF; } - for (int x = mod4_width; x < width; ++x) { + for (int x = mod4_width; x < row_size; ++x) { frame[x] = frame[x] ^ 255; } frame += pitch; } } -static void invert_frame(BYTE* frame, int pitch, int rowsize, int height, int mask, IScriptEnvironment *env) { - if ((env->GetCPUFlags() & CPUF_SSE2) && IsPtrAligned(frame, 16)) +static void invert_plane_uint16_c(BYTE* frame, int pitch, int row_size, int height, uint64_t mask64) { + int mod8_width = row_size / 8 * 8; + uint16_t mask16 = mask64 & 0xFFFF; // for planes, all 16 bit parts of 64 bit mask is the same + for (int y = 0; y < height; ++y) { + + for (int x = 0; x < mod8_width / 8; ++x) { + reinterpret_cast(frame)[x] ^= mask64; + } + + for (int x = mod8_width; x < row_size; ++x) { + reinterpret_cast(frame)[x] ^= mask16; + } + frame += pitch; + } +} + +static void invert_plane_float_c(BYTE* frame, int pitch, int row_size, int height) { + const int width = row_size / sizeof(float); + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + reinterpret_cast(frame)[x] = 1.0f - reinterpret_cast(frame)[x]; + } + frame += pitch; + } +} + +static void invert_frame(BYTE* frame, int pitch, int rowsize, int height, int mask, uint64_t mask64, int pixelsize, IScriptEnvironment *env) { + if ((pixelsize == 1 || pixelsize == 2) && (env->GetCPUFlags() & CPUF_SSE2) && IsPtrAligned(frame, 16)) { - invert_frame_sse2(frame, pitch, rowsize, height, mask); + if(pixelsize == 1) + invert_frame_sse2(frame, pitch, rowsize, height, mask); + else + invert_frame_uint16_sse2(frame, pitch, rowsize, height, mask64); } #ifdef X86_32 - else if (env->GetCPUFlags() & CPUF_MMX) + else if ((pixelsize == 1) && (env->GetCPUFlags() & CPUF_MMX)) { invert_frame_mmx(frame, pitch, rowsize, height, mask); } #endif else { - invert_frame_c(frame, pitch, rowsize, height, mask); + if(pixelsize == 1) + invert_frame_c(frame, pitch, rowsize, height, mask); + else + invert_frame_uint16_c(frame, pitch, rowsize, height, mask64); } } -static void invert_plane(BYTE* frame, int pitch, int rowsize, int height, IScriptEnvironment *env) { +static void invert_frame_uint16(BYTE* frame, int pitch, int rowsize, int height, uint64_t mask64, IScriptEnvironment *env) { if ((env->GetCPUFlags() & CPUF_SSE2) && IsPtrAligned(frame, 16)) { - invert_frame_sse2(frame, pitch, rowsize, height, 0xffffffff); + invert_frame_uint16_sse2(frame, pitch, rowsize, height, mask64); + } + else + { + invert_frame_uint16_c(frame, pitch, rowsize, height, mask64); + } +} + + +static void invert_plane(BYTE* frame, int pitch, int rowsize, int height, int pixelsize, uint64_t mask64, IScriptEnvironment *env) { + if ((pixelsize == 1 || pixelsize == 2) && (env->GetCPUFlags() & CPUF_SSE2) && IsPtrAligned(frame, 16)) + { + if(pixelsize == 1) + invert_frame_sse2(frame, pitch, rowsize, height, 0xffffffff); + else if(pixelsize == 2) + invert_frame_uint16_sse2(frame, pitch, rowsize, height, mask64); } #ifdef X86_32 - else if (env->GetCPUFlags() & CPUF_MMX) + else if ((pixelsize == 1) && (env->GetCPUFlags() & CPUF_MMX)) { invert_plane_mmx(frame, pitch, rowsize, height); } #endif else { - invert_plane_c(frame, pitch, rowsize, height); + if(pixelsize == 1) + invert_plane_c(frame, pitch, rowsize, height); + else if (pixelsize == 2) + invert_plane_uint16_c(frame, pitch, rowsize, height, mask64); + else { + invert_plane_float_c(frame, pitch, rowsize, height); + } } } @@ -622,15 +708,30 @@ PVideoFrame Invert::GetFrame(int n, IScriptEnvironment* env) int height = f->GetHeight(); if (vi.IsPlanar()) { - if (doY) - invert_plane(pf, pitch, f->GetRowSize(PLANAR_Y_ALIGNED), height, env); - if (doU) - invert_plane(f->GetWritePtr(PLANAR_U), f->GetPitch(PLANAR_U), f->GetRowSize(PLANAR_U_ALIGNED), f->GetHeight(PLANAR_U), env); - if (doV) - invert_plane(f->GetWritePtr(PLANAR_V), f->GetPitch(PLANAR_V), f->GetRowSize(PLANAR_V_ALIGNED), f->GetHeight(PLANAR_V), env); - } - else if (vi.IsYUY2() || vi.IsRGB32()) { - invert_frame(pf, pitch, rowsize, height, mask, env); + // planar YUV + if (vi.IsYUV() || vi.IsYUVA()) { + if (doY) + invert_plane(pf, pitch, f->GetRowSize(PLANAR_Y_ALIGNED), height, pixelsize, mask64, env); + if (doU) + invert_plane(f->GetWritePtr(PLANAR_U), f->GetPitch(PLANAR_U), f->GetRowSize(PLANAR_U_ALIGNED), f->GetHeight(PLANAR_U), pixelsize, mask64, env); + if (doV) + invert_plane(f->GetWritePtr(PLANAR_V), f->GetPitch(PLANAR_V), f->GetRowSize(PLANAR_V_ALIGNED), f->GetHeight(PLANAR_V), pixelsize, mask64, env); + } + // planar RGB + if (vi.IsPlanarRGB() || vi.IsPlanarRGBA()) { + if (doG) // first plane, GetWritePtr w/o parameters + invert_plane(pf, pitch, f->GetRowSize(PLANAR_G_ALIGNED), height, pixelsize, mask64, env); + if (doB) + invert_plane(f->GetWritePtr(PLANAR_B), f->GetPitch(PLANAR_B), f->GetRowSize(PLANAR_B_ALIGNED), f->GetHeight(PLANAR_B), pixelsize, mask64, env); + if (doR) + invert_plane(f->GetWritePtr(PLANAR_R), f->GetPitch(PLANAR_R), f->GetRowSize(PLANAR_R_ALIGNED), f->GetHeight(PLANAR_R), pixelsize, mask64, env); + } + // alpha + if (doA && vi.IsPlanarRGBA() || vi.IsYUVA()) + invert_plane(f->GetWritePtr(PLANAR_A), f->GetPitch(PLANAR_A), f->GetRowSize(PLANAR_A_ALIGNED), f->GetHeight(PLANAR_A), pixelsize, mask64, env); + } + else if (vi.IsYUY2() || vi.IsRGB32() || vi.IsRGB64()) { + invert_frame(pf, pitch, rowsize, height, mask, mask64, pixelsize, env); } else if (vi.IsRGB24()) { int rMask= doR ? 0xff : 0; @@ -646,6 +747,19 @@ PVideoFrame Invert::GetFrame(int n, IScriptEnvironment* env) pf += pitch; } } + else if (vi.IsRGB48()) { + int rMask= doR ? 0xffff : 0; + int gMask= doG ? 0xffff : 0; + int bMask= doB ? 0xffff : 0; + for (int i=0; i(pf)[j+0] ^= bMask; + reinterpret_cast(pf)[j+1] ^= gMask; + reinterpret_cast(pf)[j+2] ^= rMask; + } + pf += pitch; + } + } return f; } @@ -653,7 +767,7 @@ PVideoFrame Invert::GetFrame(int n, IScriptEnvironment* env) AVSValue Invert::Create(AVSValue args, void*, IScriptEnvironment* env) { - return new Invert(args[0].AsClip(), args[0].AsClip()->GetVideoInfo().IsRGB() ? args[1].AsString("RGBA") : args[1].AsString("YUV"), env); + return new Invert(args[0].AsClip(), args[0].AsClip()->GetVideoInfo().IsRGB() ? args[1].AsString("RGBA") : args[1].AsString("YUVA"), env); } diff --git a/avs_core/filters/layer.h b/avs_core/filters/layer.h index f42e0997d..08ab78522 100644 --- a/avs_core/filters/layer.h +++ b/avs_core/filters/layer.h @@ -135,6 +135,10 @@ class Invert : public GenericVideoFilter int mask; bool doB, doG, doR, doA; bool doY, doU, doV; + + unsigned __int64 mask64; + int pixelsize; + int bits_per_pixel; // 8,10..16 }; From 34ea5515efd1814864b9a1af8026d8041ebafd6c Mon Sep 17 00:00:00 2001 From: Pinterf Date: Wed, 7 Sep 2016 12:40:34 +0200 Subject: [PATCH 047/120] Blur/Sharpen: 10,12,14 bit clamp, TemporalSoften: SAD scale ok for 10,12,14 bit --- avs_core/core/internal.h | 4 ++ avs_core/filters/focus.cpp | 87 +++++++++++++++++++++++++------------- avs_core/filters/focus.h | 1 + 3 files changed, 63 insertions(+), 29 deletions(-) diff --git a/avs_core/core/internal.h b/avs_core/core/internal.h index 101c798de..442a9dfb2 100644 --- a/avs_core/core/internal.h +++ b/avs_core/core/internal.h @@ -182,6 +182,10 @@ static __inline uint16_t ScaledPixelClip(__int64 i) { return (uint16_t)clamp((i + 32768) >> 16, 0LL, 65535LL); } +static __inline uint16_t ScaledPixelClipEx(__int64 i, int max_value) { + return (uint16_t)clamp((int)((i + 32768) >> 16), 0, max_value); +} + static __inline bool IsClose(int a, int b, unsigned threshold) { return (unsigned(a-b+threshold) <= threshold*2); } diff --git a/avs_core/filters/focus.cpp b/avs_core/filters/focus.cpp index 9636515e8..902e33910 100644 --- a/avs_core/filters/focus.cpp +++ b/avs_core/filters/focus.cpp @@ -75,11 +75,12 @@ AdjustFocusV::AdjustFocusV(double _amount, PClip _child) } template -static void af_vertical_c(BYTE* line_buf8, BYTE* dstp8, const int height, const int pitch8, const int width, const int half_amount) { +static void af_vertical_c(BYTE* line_buf8, BYTE* dstp8, const int height, const int pitch8, const int width, const int half_amount, int bits_per_pixel) { typedef typename std::conditional < sizeof(pixel_t) == 1, int, __int64>::type weight_t; // kernel:[(1-1/2^_amount)/2, 1/2^_amount, (1-1/2^_amount)/2] weight_t center_weight = half_amount*2; // *2: 16 bit scaled arithmetic, but the converted amount parameter scaled is only 15 bits weight_t outer_weight = 32768-half_amount; // (1-1/2^_amount)/2 32768 = 0.5 + int max_pixel_value = (1 << bits_per_pixel) - 1; pixel_t * dstp = reinterpret_cast(dstp8); pixel_t * line_buf = reinterpret_cast(line_buf8); @@ -89,14 +90,20 @@ static void af_vertical_c(BYTE* line_buf8, BYTE* dstp8, const int height, const for (int x = 0; x < width; ++x) { pixel_t a; // Note: ScaledPixelClip is overloaded. With __int64 parameter and uint16_t result works for 16 bit - a = ScaledPixelClip((weight_t)(dstp[x] * center_weight + (line_buf[x] + dstp[x+pitch]) * outer_weight)); + if(sizeof(pixel_t) == 1) + a = ScaledPixelClip((weight_t)(dstp[x] * center_weight + (line_buf[x] + dstp[x+pitch]) * outer_weight)); + else + a = (pixel_t)ScaledPixelClipEx((weight_t)(dstp[x] * center_weight + (line_buf[x] + dstp[x+pitch]) * outer_weight), max_pixel_value); line_buf[x] = dstp[x]; dstp[x] = a; } dstp += pitch; } for (int x = 0; x < width; ++x) { // Last row - map centre as lower + if(sizeof(pixel_t) == 1) dstp[x] = ScaledPixelClip((weight_t)(dstp[x] * center_weight + (line_buf[x] + dstp[x]) * outer_weight)); + else + dstp[x] = (pixel_t)ScaledPixelClipEx((weight_t)(dstp[x] * center_weight + (line_buf[x] + dstp[x]) * outer_weight), max_pixel_value); } } @@ -269,7 +276,7 @@ static void af_vertical_mmx(BYTE* line_buf, BYTE* dstp, int height, int pitch, i #endif template -static void af_vertical_process(BYTE* line_buf, BYTE* dstp, size_t height, size_t pitch, size_t row_size, int half_amount, IScriptEnvironment* env) { +static void af_vertical_process(BYTE* line_buf, BYTE* dstp, size_t height, size_t pitch, size_t row_size, int half_amount, int bits_per_pixel, IScriptEnvironment* env) { size_t width = row_size / sizeof(pixel_t); // only for 8/16 bit, float separated // todo: sse2 for 16 @@ -284,12 +291,12 @@ static void af_vertical_process(BYTE* line_buf, BYTE* dstp, size_t height, size_ af_vertical_mmx(line_buf, dstp, height, pitch, mod8_width, half_amount); if (mod8_width != width) { //yes, this is bad for caching. MMX shouldn't be used these days anyway - af_vertical_c(line_buf, dstp + mod8_width, height, pitch, width - mod8_width, half_amount); + af_vertical_c(line_buf, dstp + mod8_width, height, pitch, width - mod8_width, half_amount, bits_per_pixel); } } else #endif { - af_vertical_c(line_buf, dstp, (int)height, (int)pitch, (int)width, half_amount); + af_vertical_c(line_buf, dstp, (int)height, (int)pitch, (int)width, half_amount, bits_per_pixel); } } @@ -321,6 +328,7 @@ PVideoFrame __stdcall AdjustFocusV::GetFrame(int n, IScriptEnvironment* env) } int pixelsize = vi.ComponentSize(); + int bits_per_pixel = vi.BitsPerComponent(); if (vi.IsPlanar()) { const int planesYUV[4] = { PLANAR_Y, PLANAR_U, PLANAR_V, PLANAR_A}; @@ -336,8 +344,8 @@ PVideoFrame __stdcall AdjustFocusV::GetFrame(int n, IScriptEnvironment* env) memcpy(line_buf, dstp, row_size); // First row - map centre as upper switch (pixelsize) { - case 1: af_vertical_process(line_buf, dstp, height, pitch, row_size, half_amount, env); break; - case 2: af_vertical_process(line_buf, dstp, height, pitch, row_size, half_amount, env); break; + case 1: af_vertical_process(line_buf, dstp, height, pitch, row_size, half_amount, bits_per_pixel, env); break; + case 2: af_vertical_process(line_buf, dstp, height, pitch, row_size, half_amount, bits_per_pixel, env); break; default: // 4: float af_vertical_process_float(line_buf, dstp, height, pitch, row_size, amountd, env); break; } @@ -350,9 +358,9 @@ PVideoFrame __stdcall AdjustFocusV::GetFrame(int n, IScriptEnvironment* env) int height = vi.height; memcpy(line_buf, dstp, row_size); // First row - map centre as upper if (pixelsize == 1) - af_vertical_process(line_buf, dstp, height, pitch, row_size, half_amount, env); + af_vertical_process(line_buf, dstp, height, pitch, row_size, half_amount, bits_per_pixel, env); else - af_vertical_process(line_buf, dstp, height, pitch, row_size, half_amount, env); + af_vertical_process(line_buf, dstp, height, pitch, row_size, half_amount, bits_per_pixel, env); } env2->Free(line_buf); @@ -822,8 +830,24 @@ static __forceinline void af_horizontal_yv12_process_line_c(pixel_t left, BYTE * dstp[x] = ScaledPixelClip((weight_t)(dstp[x] * (weight_t)center_weight + (left + dstp[x]) * (weight_t)outer_weight)); } +static __forceinline void af_horizontal_yv12_process_line_uint16_c(uint16_t left, BYTE *dstp8, size_t row_size, int center_weight, int outer_weight, int bits_per_pixel) { + size_t x; + typedef uint16_t pixel_t; + pixel_t* dstp = reinterpret_cast(dstp8); + const int max_pixel_value = (1 << bits_per_pixel) - 1; // clamping on 10-12-14-16 bitdepth + typedef typename std::conditional < sizeof(pixel_t) == 1, int, __int64>::type weight_t; // for calling the right ScaledPixelClip() + size_t width = row_size / sizeof(pixel_t); + for (x = 0; x < width-1; ++x) { + pixel_t temp = (pixel_t)ScaledPixelClipEx((weight_t)(dstp[x] * (weight_t)center_weight + (left + dstp[x+1]) * (weight_t)outer_weight), max_pixel_value); + left = dstp[x]; + dstp[x] = temp; + } + // ScaledPixelClip has 2 overloads: BYTE/uint16_t (int/int64 i) + dstp[x] = ScaledPixelClipEx((weight_t)(dstp[x] * (weight_t)center_weight + (left + dstp[x]) * (weight_t)outer_weight), max_pixel_value); +} + template -static void af_horizontal_planar_c(BYTE* dstp8, size_t height, size_t pitch8, size_t row_size, size_t half_amount) +static void af_horizontal_planar_c(BYTE* dstp8, size_t height, size_t pitch8, size_t row_size, size_t half_amount, int bits_per_pixel) { pixel_t* dstp = reinterpret_cast(dstp8); size_t pitch = pitch8 / sizeof(pixel_t); @@ -832,7 +856,10 @@ static void af_horizontal_planar_c(BYTE* dstp8, size_t height, size_t pitch8, si pixel_t left; for (size_t y = height; y>0; --y) { left = dstp[0]; - af_horizontal_yv12_process_line_c(left, (BYTE *)dstp, row_size, center_weight, outer_weight); + if(sizeof(pixel_t) == 1) + af_horizontal_yv12_process_line_c(left, (BYTE *)dstp, row_size, center_weight, outer_weight); + else + af_horizontal_yv12_process_line_uint16_c(left, (BYTE *)dstp, row_size, center_weight, outer_weight, bits_per_pixel); dstp += pitch; } } @@ -1012,6 +1039,7 @@ PVideoFrame __stdcall AdjustFocusH::GetFrame(int n, IScriptEnvironment* env) if (vi.IsPlanar()) { copy_frame(src, dst, env, planes, vi.NumComponents() ); //planar processing is always in-place int pixelsize = vi.ComponentSize(); + int bits_per_pixel = vi.BitsPerComponent(); for(int cplane=0;cplane<3;cplane++) { int plane = planes[cplane]; int row_size = dst->GetRowSize(plane); @@ -1028,8 +1056,8 @@ PVideoFrame __stdcall AdjustFocusH::GetFrame(int n, IScriptEnvironment* env) #endif { switch (pixelsize) { - case 1: af_horizontal_planar_c(q, height, pitch, row_size, half_amount); break; - case 2: af_horizontal_planar_c(q, height, pitch, row_size, half_amount); break; + case 1: af_horizontal_planar_c(q, height, pitch, row_size, half_amount, bits_per_pixel); break; + case 2: af_horizontal_planar_c(q, height, pitch, row_size, half_amount, bits_per_pixel); break; default: // 4: float af_horizontal_planar_float_c(q, height, pitch, row_size, (float)amountd); break; } @@ -1183,6 +1211,7 @@ TemporalSoften::TemporalSoften( PClip _child, unsigned radius, unsigned luma_thr } pixelsize = vi.ComponentSize(); + bits_per_pixel = vi.BitsPerComponent(); // original scenechange parameter always 0-255 int factor; @@ -1194,7 +1223,7 @@ TemporalSoften::TemporalSoften( PClip _child, unsigned radius, unsigned luma_thr int c = 0; - if (vi.IsPlanar()) { + if (vi.IsPlanar() && (vi.IsYUV() || vi.IsYUVA())) { if (luma_thresh>0) {planes[c++] = PLANAR_Y; planes[c++] = luma_thresh;} if (chroma_thresh>0) { planes[c++] = PLANAR_V;planes[c++] =chroma_thresh; planes[c++] = PLANAR_U;planes[c++] = chroma_thresh;} } else if (vi.IsYUY2()) { @@ -1209,7 +1238,7 @@ TemporalSoften::TemporalSoften( PClip _child, unsigned radius, unsigned luma_thr //offset is the initial value of x. Used when C routine processes only parts of frames after SSE/MMX paths do their job. template -static void accumulate_line_c(BYTE* _c_plane, const BYTE** planeP, int planes, int offset, size_t rowsize, BYTE _threshold, int div) { +static void accumulate_line_c(BYTE* _c_plane, const BYTE** planeP, int planes, int offset, size_t rowsize, BYTE _threshold, int div, int bits_per_pixel) { pixel_t *c_plane = reinterpret_cast(_c_plane); typedef typename std::conditional < sizeof(pixel_t) == 1, unsigned int, typename std::conditional < sizeof(pixel_t) == 2, unsigned __int64, float>::type >::type sum_t; @@ -1221,7 +1250,7 @@ static void accumulate_line_c(BYTE* _c_plane, const BYTE** planeP, int planes, i if (std::is_floating_point::value) threshold = threshold / 256; // float else if (sizeof(pixel_t) == 2) - threshold = threshold * 256; // uint16_t + threshold = threshold * (1 << (bits_per_pixel - 8)); // uint16_t, 10 bit: *4 16bit: *256 for (size_t x = offset; x < width; ++x) { pixel_t current = c_plane[x]; @@ -1403,7 +1432,7 @@ static void accumulate_line_yuy2(BYTE* c_plane, const BYTE** planeP, int planes, accumulate_line_yuy2_c(c_plane, planeP, planes, width, threshold_luma, threshold_chroma, div); } -static void accumulate_line(BYTE* c_plane, const BYTE** planeP, int planes, size_t rowsize, BYTE threshold, int div, bool aligned16, int pixelsize, IScriptEnvironment* env) { +static void accumulate_line(BYTE* c_plane, const BYTE** planeP, int planes, size_t rowsize, BYTE threshold, int div, bool aligned16, int pixelsize, int bits_per_pixel, IScriptEnvironment* env) { // todo: sse for 16bit/float if ((pixelsize == 1) && (env->GetCPUFlags() & CPUF_SSE2) && aligned16 && rowsize >= 16) { accumulate_line_sse2(c_plane, planeP, planes, rowsize, threshold | (threshold << 8), div); @@ -1414,14 +1443,14 @@ static void accumulate_line(BYTE* c_plane, const BYTE** planeP, int planes, size accumulate_line_mmx(c_plane, planeP, planes, rowsize, threshold | (threshold << 8), div); if (mod8_width != rowsize) { - accumulate_line_c(c_plane, planeP, planes, mod8_width, rowsize - mod8_width, threshold, div); + accumulate_line_c(c_plane, planeP, planes, mod8_width, rowsize - mod8_width, threshold, div, bits_per_pixel); } } else #endif switch(pixelsize) { - case 1: accumulate_line_c(c_plane, planeP, planes, 0, rowsize, threshold, div); break; - case 2: accumulate_line_c(c_plane, planeP, planes, 0, rowsize, threshold, div); break; - case 4: accumulate_line_c(c_plane, planeP, planes, 0, rowsize, threshold, div); break; + case 1: accumulate_line_c(c_plane, planeP, planes, 0, rowsize, threshold, div, bits_per_pixel); break; + case 2: accumulate_line_c(c_plane, planeP, planes, 0, rowsize, threshold, div, bits_per_pixel); break; + case 4: accumulate_line_c(c_plane, planeP, planes, 0, rowsize, threshold, div, bits_per_pixel); break; } } @@ -1481,13 +1510,13 @@ static int calculate_sad_isse(const BYTE* cur_ptr, const BYTE* other_ptr, int cu #endif template -static size_t calculate_sad_c(const BYTE* cur_ptr, const BYTE* other_ptr, int cur_pitch, int other_pitch, size_t rowsize, size_t height) +static __int64 calculate_sad_c(const BYTE* cur_ptr, const BYTE* other_ptr, int cur_pitch, int other_pitch, size_t rowsize, size_t height) { const pixel_t *ptr1 = reinterpret_cast(cur_ptr); const pixel_t *ptr2 = reinterpret_cast(other_ptr); size_t width = rowsize / sizeof(pixel_t); - typedef typename std::conditional < std::is_floating_point::value, float, size_t>::type sum_t; + typedef typename std::conditional < std::is_floating_point::value, float, __int64>::type sum_t; sum_t sum = 0; for (size_t y = 0; y < height; ++y) { @@ -1498,20 +1527,20 @@ static size_t calculate_sad_c(const BYTE* cur_ptr, const BYTE* other_ptr, int cu ptr2 += other_pitch / sizeof(pixel_t); } if (std::is_floating_point::value) - return (size_t)(sum * 256); // float defaulting to 0..1 range + return (__int64)(sum * 256); // float defaulting to 0..1 range else - return (size_t)sum; + return (__int64)sum; } // sum of byte-diffs. -static size_t calculate_sad(const BYTE* cur_ptr, const BYTE* other_ptr, int cur_pitch, int other_pitch, size_t rowsize, size_t height, int pixelsize, IScriptEnvironment* env) { +static __int64 calculate_sad(const BYTE* cur_ptr, const BYTE* other_ptr, int cur_pitch, int other_pitch, size_t rowsize, size_t height, int pixelsize, IScriptEnvironment* env) { // todo: sse for 16bit/float if ((pixelsize == 1) && (env->GetCPUFlags() & CPUF_SSE2) && IsPtrAligned(cur_ptr, 16) && IsPtrAligned(other_ptr, 16) && rowsize >= 16) { - return calculate_sad_sse2(cur_ptr, other_ptr, cur_pitch, other_pitch, rowsize, height); + return (__int64)calculate_sad_sse2(cur_ptr, other_ptr, cur_pitch, other_pitch, rowsize, height); } #ifdef X86_32 if ((pixelsize ==1 ) && (env->GetCPUFlags() & CPUF_INTEGER_SSE) && rowsize >= 8) { - return calculate_sad_isse(cur_ptr, other_ptr, cur_pitch, other_pitch, rowsize, height); + return (__int64)calculate_sad_isse(cur_ptr, other_ptr, cur_pitch, other_pitch, rowsize, height); } #endif switch(pixelsize) { @@ -1646,7 +1675,7 @@ PVideoFrame TemporalSoften::GetFrame(int n, IScriptEnvironment* env) if (vi.IsYUY2()) { accumulate_line_yuy2(c_plane, planeP, d, rowsize, luma_threshold, chroma_threshold, c_div, aligned16, env); } else { - accumulate_line(c_plane, planeP, d, rowsize, c_thresh, c_div, aligned16, pixelsize, env); + accumulate_line(c_plane, planeP, d, rowsize, c_thresh, c_div, aligned16, pixelsize, bits_per_pixel, env); } for (int p = 0; p Date: Wed, 7 Sep 2016 17:14:38 +0200 Subject: [PATCH 048/120] PlaneSwap ToY: 10-12-14 bit aware --- avs_core/filters/planeswap.cpp | 55 +++++++++++++++++++++------------- 1 file changed, 34 insertions(+), 21 deletions(-) diff --git a/avs_core/filters/planeswap.cpp b/avs_core/filters/planeswap.cpp index a2afcf317..fad5c89b5 100644 --- a/avs_core/filters/planeswap.cpp +++ b/avs_core/filters/planeswap.cpp @@ -157,7 +157,7 @@ PVideoFrame __stdcall SwapUV::GetFrame(int n, IScriptEnvironment* env) if (vi.IsPlanar()) { // Abuse subframe to flip the UV plane pointers -- extremely fast but a bit naughty! const int uvoffset = src->GetOffset(PLANAR_V) - src->GetOffset(PLANAR_U); // very naughty - don't do this at home!! - + // todo: check for YUVA??? env-> has no SubFramePlanar with alpha option! return env->SubframePlanar(src, 0, src->GetPitch(PLANAR_Y), src->GetRowSize(PLANAR_Y), src->GetHeight(PLANAR_Y), uvoffset, -uvoffset, src->GetPitch(PLANAR_V)); } @@ -245,7 +245,7 @@ SwapUVToY::SwapUVToY(PClip _child, int _mode, IScriptEnvironment* env) env->ThrowError("PlaneToY: clip is not planar RGB!"); if (vi.NumComponents() == 1) - env->ThrowError("PlaneToY: There are no chroma channels in Y8/Y16/Y32!"); + env->ThrowError("PlaneToY: There are no chroma channels in greyscale clip!"); if(YUVmode) { vi.height >>= vi.GetPlaneHeightSubsampling(PLANAR_U); @@ -254,11 +254,14 @@ SwapUVToY::SwapUVToY(PClip _child, int _mode, IScriptEnvironment* env) if (mode == YToY8 || mode == UToY8 || mode == VToY8 || mode == YUY2UToY8 || mode == YUY2VToY8 || RGBmode || Alphamode) { - switch (vi.BytesFromPixels(1)) // although name is Y8, it means that greyscale stays in the same bitdepth + switch (vi.BitsPerComponent()) // although name is Y8, it means that greyscale stays in the same bitdepth { - case 1: vi.pixel_type = VideoInfo::CS_Y8; break; - case 2: vi.pixel_type = VideoInfo::CS_Y16; break; - case 4: vi.pixel_type = VideoInfo::CS_Y32; break; + case 8: vi.pixel_type = VideoInfo::CS_Y8; break; + case 10: vi.pixel_type = VideoInfo::CS_Y10; break; + case 12: vi.pixel_type = VideoInfo::CS_Y12; break; + case 14: vi.pixel_type = VideoInfo::CS_Y14; break; + case 16: vi.pixel_type = VideoInfo::CS_Y16; break; + case 32: vi.pixel_type = VideoInfo::CS_Y32; break; } } } @@ -398,8 +401,9 @@ PVideoFrame __stdcall SwapUVToY::GetFrame(int n, IScriptEnvironment* env) fill_plane(dstp_v, rowsize, height, pitch, 0x80); } else if (vi.ComponentSize() == 2) { // 16bit - fill_plane(dstp_u, rowsize, height, pitch, 0x8000); - fill_plane(dstp_v, rowsize, height, pitch, 0x8000); + uint16_t grey_val = 1 << (vi.BitsPerComponent() - 1); // 0x8000 for 16 bit + fill_plane(dstp_u, rowsize, height, pitch, grey_val); + fill_plane(dstp_v, rowsize, height, pitch, grey_val); } else { // 32bit(float) fill_plane(dstp_u, rowsize, height, pitch, 0.5f); @@ -438,12 +442,16 @@ SwapYToUV::SwapYToUV(PClip _child, PClip _clip, PClip _clipY, IScriptEnvironment if (!clipY) { if (vi.IsYUY2()) vi.width *= 2; - else if (vi.IsY8()) - vi.pixel_type = VideoInfo::CS_YV24; - else if (vi.IsColorSpace(VideoInfo::CS_Y16)) - vi.pixel_type = VideoInfo::CS_YUV444P16; - else if (vi.IsColorSpace(VideoInfo::CS_Y32)) - vi.pixel_type = VideoInfo::CS_YUV444PS; + else if (vi.IsY()) { + switch(vi.BitsPerComponent()) { + case 8: vi.pixel_type = VideoInfo::CS_YV24; break; + case 10: vi.pixel_type = VideoInfo::CS_YUV444P10; break; + case 12: vi.pixel_type = VideoInfo::CS_YUV444P12; break; + case 14: vi.pixel_type = VideoInfo::CS_YUV444P14; break; + case 16: vi.pixel_type = VideoInfo::CS_YUV444P16; break; + case 32: vi.pixel_type = VideoInfo::CS_YUV444PS; break; + } + } else { vi.height <<= vi.GetPlaneHeightSubsampling(PLANAR_U); vi.width <<= vi.GetPlaneWidthSubsampling(PLANAR_U); @@ -465,11 +473,14 @@ SwapYToUV::SwapYToUV(PClip _child, PClip _clip, PClip _clipY, IScriptEnvironment } // Autogenerate destination colorformat - switch (vi.ComponentSize()) + switch (vi.BitsPerComponent()) { - case 1: vi.pixel_type = VideoInfo::CS_YV12; break;// CS_Sub_Width_2 and CS_Sub_Height_2 are 0 - case 2: vi.pixel_type = VideoInfo::CS_YUV420P16; break; - case 4: vi.pixel_type = VideoInfo::CS_YUV420PS; break; + case 8: vi.pixel_type = VideoInfo::CS_YV12; break;// CS_Sub_Width_2 and CS_Sub_Height_2 are 0 + case 10: vi.pixel_type = VideoInfo::CS_YUV420P10; break; + case 12: vi.pixel_type = VideoInfo::CS_YUV420P12; break; + case 14: vi.pixel_type = VideoInfo::CS_YUV420P14; break; + case 16: vi.pixel_type = VideoInfo::CS_YUV420P16; break; + case 32: vi.pixel_type = VideoInfo::CS_YUV420PS; break; } if (vi3.width == vi.width) @@ -602,10 +613,12 @@ PVideoFrame __stdcall SwapYToUV::GetFrame(int n, IScriptEnvironment* env) { if (vi.ComponentSize() == 1) // 8bit fill_plane(dstp, rowsize, vi.height, pitch, 0x7e); - else if (vi.ComponentSize() == 2) // 16bit - fill_plane(dstp, rowsize, vi.height, pitch, 0x7e00); - else // 32bit(float) + else if (vi.ComponentSize() == 2) { // 16bit + uint16_t luma_val = 0x7e << (vi.BitsPerComponent() - 8); + fill_plane(dstp, rowsize, vi.height, pitch, luma_val); + } else { // 32bit(float) fill_plane(dstp, rowsize, vi.height, pitch, 126.0f / 256); + } return dst; } From 132b5f712df848243bc67d3c1aeef2c53c18e0f2 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Wed, 7 Sep 2016 17:16:04 +0200 Subject: [PATCH 049/120] ConvertToY8: 10-12-14 bit aware --- avs_core/convert/convert_planar.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/avs_core/convert/convert_planar.cpp b/avs_core/convert/convert_planar.cpp index f775d1c0e..a7dd2fd7b 100644 --- a/avs_core/convert/convert_planar.cpp +++ b/avs_core/convert/convert_planar.cpp @@ -450,7 +450,8 @@ PVideoFrame __stdcall ConvertToY8::GetFrame(int n, IScriptEnvironment* env) { for (int x=0; x(srcp); // int overflows! - const int Y = matrix.offset_y + (int)(((__int64)matrix.b * srcp16[0] + (__int64)matrix.g * srcp16[1] + (__int64)matrix.r * srcp16[2] + 16384) >> 15); + // todo: does not overflow if matrix.g is converted to 32768 - (matrix.b + matrix.r!!!) (sum is not 32768!) + const int Y = matrix.offset_y + (int)(((__int64)(matrix.b * srcp16[0] + matrix.g * srcp16[1]) + (__int64)matrix.r * srcp16[2] + 16384) >> 15); reinterpret_cast(dstp)[x] = clamp(Y,0,65535); // All the safety we can wish for. // __int64 version is a bit faster @@ -458,7 +459,7 @@ PVideoFrame __stdcall ConvertToY8::GetFrame(int n, IScriptEnvironment* env) { //reinterpret_cast(dstp)[x] = (uint16_t)clamp((int)Y,0,65535); // All the safety we can wish for. srcp += pixel_step; // 6,8 } - srcp -= srcMod; + srcp -= src_pitch; dstp += dst_pitch; } } @@ -482,15 +483,17 @@ PVideoFrame __stdcall ConvertToY8::GetFrame(int n, IScriptEnvironment* env) { dstp += dst_pitch; } } else if(pixelsize==2) { + int max_pixel_value = (1 << vi.BitsPerComponent()) - 1; for (int y=0; y(srcpB)[x] + (__int64)matrix.g * reinterpret_cast(srcpG)[x] + (__int64)matrix.r * reinterpret_cast(srcpR)[x] + 16384) >> 15); - reinterpret_cast(dstp)[x] = (uint16_t)clamp(Y,0,65535); // All the safety we can wish for. + reinterpret_cast(dstp)[x] = (uint16_t)clamp(Y,0,max_pixel_value); // All the safety we can wish for. } srcpG += pitchG; srcpB += pitchB; srcpR += pitchR; dstp += dst_pitch; From 672d8bf04ea841e29135a68cf8348e53c6153660 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Wed, 7 Sep 2016 17:18:24 +0200 Subject: [PATCH 050/120] Mask: hbd alpha formats RGB64 and PlanarRGBA (8,10-16,float) support --- avs_core/filters/layer.cpp | 150 ++++++++++++++++++++++++++++++------- avs_core/filters/layer.h | 3 + 2 files changed, 128 insertions(+), 25 deletions(-) diff --git a/avs_core/filters/layer.cpp b/avs_core/filters/layer.cpp index 5031c66b6..2b60593d9 100644 --- a/avs_core/filters/layer.cpp +++ b/avs_core/filters/layer.cpp @@ -81,10 +81,20 @@ Mask::Mask(PClip _child1, PClip _child2, IScriptEnvironment* env) const VideoInfo& vi2 = child2->GetVideoInfo(); if (vi1.width != vi2.width || vi1.height != vi2.height) env->ThrowError("Mask error: image dimensions don't match"); - if (!vi1.IsRGB32() | !vi2.IsRGB32()) - env->ThrowError("Mask error: sources must be RGB32"); + if (! ((vi1.IsRGB32() && vi2.IsRGB32()) || + (vi1.IsRGB64() && vi2.IsRGB64()) || + (vi1.IsPlanarRGBA() && vi2.IsPlanarRGBA())) + ) + env->ThrowError("Mask error: sources must be RGB32, RGB64 or Planar RGBA"); + + if(vi1.BitsPerComponent() != vi2.BitsPerComponent()) + env->ThrowError("Mask error: Components are not of the same bit depths"); vi = vi1; + + pixelsize = vi.ComponentSize(); + bits_per_pixel = vi.BitsPerComponent(); + mask_frames = vi2.num_frames; } @@ -196,7 +206,15 @@ static void mask_mmx(BYTE *srcp, const BYTE *alphap, int src_pitch, int alpha_pi #endif -static void mask_c(BYTE *srcp, const BYTE *alphap, int src_pitch, int alpha_pitch, size_t width, size_t height, int cyb, int cyg, int cyr) { + +template +static void mask_c(BYTE *srcp8, const BYTE *alphap8, int src_pitch, int alpha_pitch, size_t width, size_t height, int cyb, int cyg, int cyr) { + pixel_t *srcp = reinterpret_cast(srcp8); + const pixel_t *alphap = reinterpret_cast(alphap8); + + src_pitch /= sizeof(pixel_t); + alpha_pitch /= sizeof(pixel_t); + for (size_t y = 0; y < height; ++y) { for (size_t x = 0; x < width; ++x) { srcp[x*4+3] = (cyb*alphap[x*4+0] + cyg*alphap[x*4+1] + cyr*alphap[x*4+2] + 16384) >> 15; @@ -206,38 +224,120 @@ static void mask_c(BYTE *srcp, const BYTE *alphap, int src_pitch, int alpha_pitc } } +template +static void mask_planar_rgb_c(BYTE *dstp8, const BYTE *srcp_r8, const BYTE *srcp_g8, const BYTE *srcp_b8, int dst_pitch, int src_pitch, size_t width, size_t height, int cyb, int cyg, int cyr, int bits_per_pixel) { + // worst case uint16: 65535 * 19235 + 65535 * 9798 + 65535 * 3736 + 16384 = 65535 * 32769 + 16384 = 2147532799 = 8000BFFF -> int32 fail + // 2147532799 >> 15 = 65537 = 0x10001, needs clamping :( !!!!! + // worst case uint16: 65535 * (!!!19234) + 65535 * 9798 + 65535 * 3736 + 16384 = 65535 * 32768 + 16384 = 2147467264 = 7FFFC000 -> int32 OK + // 2147467264 >> 15 = 65535 no need clamping + // worst case uint14: 16383*(19235+9798+3736) + 16384 = 16383*32769 + 16384 = 536870911 >> 15 = 16383 -> int is enough, and no clamping needed + // worst case uint12: 4095*(19235+9798+3736) + 16384 = 4095*32769 + 16384 = 134205439 >> 15 = 4095 -> int is enough, and no clamping needed + // worst case uint10: 1023*(19235+9798+3736) + 16384 = 1023*32769 + 16384 = 33539071 >> 15 = 1023 -> int is enough, and no clamping needed + // worst case uint8 : 255*(19235+9798+3736) + 16384 = 255*32769 + 16384 = 8372479 >> 15 = 255 -> int is enough, and no clamping needed + + pixel_t *dstp = reinterpret_cast(dstp8); + const pixel_t *srcp_r = reinterpret_cast(srcp_r8); + const pixel_t *srcp_g = reinterpret_cast(srcp_g8); + const pixel_t *srcp_b = reinterpret_cast(srcp_b8); + src_pitch /= sizeof(pixel_t); + dst_pitch /= sizeof(pixel_t); + + for (size_t y = 0; y < height; ++y) { + for (size_t x = 0; x < width; ++x) { + dstp[x] = ((cyb*srcp_b[x] + cyg*srcp_g[x] + cyr*srcp_r[x] + 16384) >> 15); + } + dstp += dst_pitch; + srcp_r += src_pitch; + srcp_g += src_pitch; + srcp_b += src_pitch; + } +} + +static void mask_planar_rgb_float_c(BYTE *dstp8, const BYTE *srcp_r8, const BYTE *srcp_g8, const BYTE *srcp_b8, int dst_pitch, int src_pitch, size_t width, size_t height, float cyb_f, float cyg_f, float cyr_f) { + + float *dstp = reinterpret_cast(dstp8); + const float *srcp_r = reinterpret_cast(srcp_r8); + const float *srcp_g = reinterpret_cast(srcp_g8); + const float *srcp_b = reinterpret_cast(srcp_b8); + src_pitch /= sizeof(float); + dst_pitch /= sizeof(float); + + for (size_t y = 0; y < height; ++y) { + for (size_t x = 0; x < width; ++x) { + dstp[x] = cyb_f*srcp_b[x] + cyg_f*srcp_g[x] + cyr_f*srcp_r[x]; + } + dstp += dst_pitch; + srcp_r += src_pitch; + srcp_g += src_pitch; + srcp_b += src_pitch; + } +} + PVideoFrame __stdcall Mask::GetFrame(int n, IScriptEnvironment* env) { PVideoFrame src1 = child1->GetFrame(n, env); PVideoFrame src2 = child2->GetFrame(min(n,mask_frames-1), env); - env->MakeWritable(&src1); - BYTE* src1p = src1->GetWritePtr(); - const BYTE* src2p = src2->GetReadPtr(); + // unfortunately sum is not 32768, correction needed at 16 bit to avoid overflow + int cyb = int(0.114*32768+0.5); + int cyg = int(0.587*32768+0.5); + int cyr = int(0.299*32768+0.5); - const int src1_pitch = src1->GetPitch(); - const int src2_pitch = src2->GetPitch(); + if (bits_per_pixel == 16) + cyg = 32768 - (cyr + cyb); // avoid integer overflow and clamping at 16 bits - const int cyb = int(0.114*32768+0.5); - const int cyg = int(0.587*32768+0.5); - const int cyr = int(0.299*32768+0.5); + if (vi.IsPlanar()) { + // planar RGB + const float cyb_f = 0.114f; + const float cyg_f = 0.587f; + const float cyr_f = 0.299f; - if ((env->GetCPUFlags() & CPUF_SSE2) && IsPtrAligned(src1p, 16) && IsPtrAligned(src2p, 16)) - { - mask_sse2(src1p, src2p, src1_pitch, src2_pitch, vi.width, vi.height, cyb, cyg, cyr); - } - else -#ifdef X86_32 - if (env->GetCPUFlags() & CPUF_MMX) - { - mask_mmx(src1p, src2p, src1_pitch, src2_pitch, vi.width, vi.height, cyb, cyg, cyr); - } - else -#endif - { - mask_c(src1p, src2p, src1_pitch, src2_pitch, vi.width, vi.height, cyb, cyg, cyr); + BYTE* dstp = src1->GetWritePtr(PLANAR_A); // destination Alpha plane + + const BYTE* srcp_g = src2->GetReadPtr(PLANAR_G); + const BYTE* srcp_b = src2->GetReadPtr(PLANAR_B); + const BYTE* srcp_r = src2->GetReadPtr(PLANAR_R); + + const int dst_pitch = src1->GetPitch(); + const int src_pitch = src2->GetPitch(); + + // clip1_alpha = greyscale(clip2) + if (pixelsize == 1) + mask_planar_rgb_c(dstp, srcp_r, srcp_g, srcp_b, dst_pitch, src_pitch, vi.width, vi.height, cyb, cyg, cyr, bits_per_pixel); + else if (pixelsize == 2) + mask_planar_rgb_c(dstp, srcp_r, srcp_g, srcp_b, dst_pitch, src_pitch, vi.width, vi.height, cyb, cyg, cyr, bits_per_pixel); + else + mask_planar_rgb_float_c(dstp, srcp_r, srcp_g, srcp_b, dst_pitch, src_pitch, vi.width, vi.height, cyb_f, cyg_f, cyr_f); + } else { + // Packed RGB32/64 + BYTE* src1p = src1->GetWritePtr(); + const BYTE* src2p = src2->GetReadPtr(); + + const int src1_pitch = src1->GetPitch(); + const int src2_pitch = src2->GetPitch(); + + // clip1_alpha = greyscale(clip2) + if ((pixelsize == 1) && (env->GetCPUFlags() & CPUF_SSE2) && IsPtrAligned(src1p, 16) && IsPtrAligned(src2p, 16)) + { + mask_sse2(src1p, src2p, src1_pitch, src2_pitch, vi.width, vi.height, cyb, cyg, cyr); + } + else + #ifdef X86_32 + if ((pixelsize == 1) && (env->GetCPUFlags() & CPUF_MMX)) + { + mask_mmx(src1p, src2p, src1_pitch, src2_pitch, vi.width, vi.height, cyb, cyg, cyr); + } + else + #endif + { + if (pixelsize == 1) { + mask_c(src1p, src2p, src1_pitch, src2_pitch, vi.width, vi.height, cyb, cyg, cyr); + } else { // if (pixelsize == 2) + mask_c(src1p, src2p, src1_pitch, src2_pitch, vi.width, vi.height, cyb, cyg, cyr); + } + } } return src1; diff --git a/avs_core/filters/layer.h b/avs_core/filters/layer.h index 08ab78522..12f534649 100644 --- a/avs_core/filters/layer.h +++ b/avs_core/filters/layer.h @@ -74,6 +74,9 @@ class Mask : public IClip const PClip child1, child2; VideoInfo vi; int mask_frames; + int pixelsize; + int bits_per_pixel; + }; From 6c5d0b0ee1b4e7e97be13f2ea031c75c69da9d33 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Thu, 8 Sep 2016 14:50:08 +0200 Subject: [PATCH 051/120] GreyScale(): RGB64/PlanarRGB(A) + all 10-12-14 bit aware --- avs_core/convert/convert_planar.cpp | 2 +- avs_core/filters/greyscale.cpp | 295 +++++++++++++++++++++------- avs_core/filters/greyscale.h | 12 ++ avs_core/filters/layer.cpp | 20 +- 4 files changed, 250 insertions(+), 79 deletions(-) diff --git a/avs_core/convert/convert_planar.cpp b/avs_core/convert/convert_planar.cpp index a7dd2fd7b..38557f8d4 100644 --- a/avs_core/convert/convert_planar.cpp +++ b/avs_core/convert/convert_planar.cpp @@ -2098,7 +2098,7 @@ PVideoFrame __stdcall ConvertToPlanarGeneric::GetFrame(int n, IScriptEnvironment fill_chroma(dstp_u, dstp_v, height, dst_pitch, 0x80); break; case 2: - fill_chroma(dstp_u, dstp_v, height, dst_pitch, 0x8000); + fill_chroma(dstp_u, dstp_v, height, dst_pitch, 1 << (vi.BitsPerComponent() - 1)); break; case 4: fill_chroma(dstp_u, dstp_v, height, dst_pitch, 0.5f); diff --git a/avs_core/filters/greyscale.cpp b/avs_core/filters/greyscale.cpp index c9cb6d76c..aad0392f5 100644 --- a/avs_core/filters/greyscale.cpp +++ b/avs_core/filters/greyscale.cpp @@ -36,10 +36,12 @@ #include "greyscale.h" #include "../core/internal.h" #include +#include #include #include #include #include +#include "../convert/convert_planar.h" /************************************* @@ -68,6 +70,9 @@ Greyscale::Greyscale(PClip _child, const char* matrix, IScriptEnvironment* env) else env->ThrowError("GreyScale: invalid \"matrix\" parameter (must be matrix=\"Rec601\", \"Rec709\" or \"Average\")"); } + BuildGreyMatrix(); + pixelsize = vi.ComponentSize(); + bits_per_pixel = vi.BitsPerComponent(); } //this is not really faster than MMX but a lot cleaner @@ -124,6 +129,47 @@ static void greyscale_rgb32_sse2(BYTE *srcp, size_t /*width*/, size_t height, si } } +static void greyscale_rgb64_sse41(BYTE *srcp, size_t /*width*/, size_t height, size_t pitch, int cyb, int cyg, int cyr) { + __m128i matrix = _mm_set_epi32(0, cyr, cyg, cyb); + __m128i zero = _mm_setzero_si128(); + __m128i round_mask = _mm_set1_epi32(16384); + uint64_t mask64 = 0xFFFF000000000000ull; + __m128i alpha_mask = _mm_set_epi32((uint32_t)(mask64 >> 32),(uint32_t)mask64,(uint32_t)(mask64 >> 32),(uint32_t)mask64); + + BYTE* end_point = srcp + pitch * height; + + while(srcp < end_point) { + __m128i src = _mm_load_si128(reinterpret_cast(srcp)); // 2x64bit pixels + + __m128i srclo = _mm_unpacklo_epi16(src, zero); // pixel1 + __m128i mullo = _mm_mullo_epi32(srclo, matrix); // 0, mul_r1, mul_g1, mul_b1 // sse41 + + __m128i srchi = _mm_unpackhi_epi16(src, zero); // pixel2 + __m128i mulhi = _mm_mullo_epi32(srchi, matrix); // 0, mul_r2, mul_g2, mul_b2 // sse41 + + __m128i alpha = _mm_and_si128(src, alpha_mask); // put back later + + // ssse3 + __m128i result = _mm_hadd_epi32(mullo, mulhi); // 0+mul_r1 | mul_g1+mul_b1 | 0+mul_r2 | mul_g2+mul_b2 + result = _mm_hadd_epi32(result, zero); // 0+mul_r1+mul_g1+mul_b1 | 0+mul_r2+mul_g2+mul_b2 | 0 | 0 + + result = _mm_add_epi32(result, round_mask); + result = _mm_srli_epi32(result, 15); + // we have the greyscale value of two pixels as int32 0 0 | 0 0 | 0 p1 | 0 p0 + // we need 0 p1 p1 p1 0 p0 p0 p0 + + __m128i result1 = _mm_or_si128(_mm_slli_si128(result, 2), result); + // 0 0 | 0 0 | p1 p1 | p0 p0 + result = _mm_unpacklo_epi32(result1, result); + // 0 p1 | p1 p1 | 0 p0 | p0 p0 + + result = _mm_or_si128(alpha, result); // put back initial alpha + + _mm_store_si128(reinterpret_cast<__m128i*>(srcp), result); + + srcp += 16; + } +} #ifdef X86_32 static void greyscale_yuy2_mmx(BYTE *srcp, size_t width, size_t height, size_t pitch) { @@ -206,6 +252,123 @@ static void greyscale_rgb32_mmx(BYTE *srcp, size_t width, size_t height, size_t } #endif +template +static void greyscale_packed_rgb_c(BYTE *srcp8, int src_pitch, int width, int height, int cyb, int cyg, int cyr) { + pixel_t *srcp = reinterpret_cast(srcp8); + src_pitch /= sizeof(pixel_t); + + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + srcp[x*pixel_step+0] = srcp[x*pixel_step+1] = srcp[x*pixel_step+2] = + (cyb*srcp[x*pixel_step+0] + cyg*srcp[x*pixel_step+1] + cyr*srcp[x*pixel_step+2] + 16384) >> 15; + } + srcp += src_pitch; + } +} + +template +static void greyscale_planar_rgb_c(BYTE *srcp_r8, BYTE *srcp_g8, BYTE *srcp_b8, int src_pitch, int width, int height, int cyb, int cyg, int cyr) { + pixel_t *srcp_r = reinterpret_cast(srcp_r8); + pixel_t *srcp_g = reinterpret_cast(srcp_g8); + pixel_t *srcp_b = reinterpret_cast(srcp_b8); + src_pitch /= sizeof(pixel_t); + + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + srcp_b[x] = srcp_g[x] = srcp_r[x] = + ((cyb*srcp_b[x] + cyg*srcp_g[x] + cyr*srcp_r[x] + 16384) >> 15); + } + srcp_r += src_pitch; + srcp_g += src_pitch; + srcp_b += src_pitch; + } +} + +static void greyscale_planar_rgb_float_c(BYTE *srcp_r8, BYTE *srcp_g8, BYTE *srcp_b8, int src_pitch, int width, int height, float cyb, float cyg, float cyr) { + float *srcp_r = reinterpret_cast(srcp_r8); + float *srcp_g = reinterpret_cast(srcp_g8); + float *srcp_b = reinterpret_cast(srcp_b8); + src_pitch /= sizeof(float); + + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + srcp_b[x] = srcp_g[x] = srcp_r[x] = cyb*srcp_b[x] + cyg*srcp_g[x] + cyr*srcp_r[x]; + } + srcp_r += src_pitch; + srcp_g += src_pitch; + srcp_b += src_pitch; + } +} + +void Greyscale::BuildGreyMatrix() { + // 16 bit scaled + const int cyavb_sc16 = 21845; // const int cyav = int(0.333333*65536+0.5); + const int cyavg_sc16 = 21845; + const int cyavr_sc16 = 21845; + // 21845 sum: 65535 <= 65536 OK + + // const int cyb = int(0.114*65536+0.5); // 7471 + // const int cyg = int(0.587*65536+0.5); // 38470 + // const int cyr = int(0.299*65536+0.5); // 19595 + + const int cyb601_sc16 = 7471; // int(0.114*65536+0.5); // 7471 + const int cyg601_sc16 = 38470; // int(0.587*65536+0.5); // 38470 + const int cyr601_sc16 = 19595; // int(0.299*65536+0.5); // 19595 + // sum: 65536 OK + + const int cyb709_sc16 = 4732; // int(0.0722 * 65536 + 0.5); // 4732 + const int cyg709_sc16 = 46871; // int(0.7152 * 65536 + 0.5); // 46871 + const int cyr709_sc16 = 13933; // int(0.2126 * 65536 + 0.5); // 13933 + // Sum: 65536 OK + // This is the correct brigtness calculations (standardized in Rec. 709) + + // 15 bit scaled + // PF check: int32 overflow in 16 bits + // 32769 * 65535 + 16384 = 8000BFFF int32 overflow + // 32768 * 65535 + 16384 = 7FFFC000 OK + // Let's make correction + // --- Average + const int cybav_sc15 = 10923; // int(0.33333 * 32768 + 0.5); // 10923 + const int cygav_sc15 = 10923-1; // int(0.33333 * 32768 + 0.5); // 10923 + const int cyrav_sc15 = 10923; // int(0.33333 * 32768 + 0.5); // 10923 + // w/o correction 3*10923 = 32769! + const float cybav_f = 0.333333f; + const float cygav_f = 0.333333f; + const float cyrav_f = 0.333333f; + + // --- Rec601 + const int cyb601_sc15 = 3736; // int(0.114 * 32768 + 0.5); // 3736 + const int cyg601_sc15 = 19235-1; // int(0.587 * 32768 + 0.5); // 19235 + const int cyr601_sc15 = 9798; // int(0.299 * 32768 + 0.5); // 9798 + // w/o correction: 32769 + + const float cyb601_f = 0.114f; + const float cyg601_f = 0.587f; + const float cyr601_f = 0.299f; + + // --- Rec709 + const int cyb709_sc15 = 2366; // int(0.0722 * 32768 + 0.5); // 2366 + const int cyg709_sc15 = 23436; // int(0.7152 * 32768 + 0.5); // 23436 + const int cyr709_sc15 = 6966; // int(0.2126 * 32768 + 0.5); // 6966 + // sum: 32768 OK + const float cyb709_f = 0.0722f; + const float cyg709_f = 0.7152f; + const float cyr709_f = 0.2126f; + + if(matrix_ == Rec709) { + greyMatrix.b = cyb709_sc15; greyMatrix.g = cyg709_sc15; greyMatrix.r = cyr709_sc15; + greyMatrix.b_f = cyb709_f; greyMatrix.g_f = cyg709_f; greyMatrix.r_f = cyr709_f; + } else if (matrix_ == Average) { + greyMatrix.b = cybav_sc15; greyMatrix.g = cygav_sc15; greyMatrix.r = cyrav_sc15; + greyMatrix.b_f = cybav_f; greyMatrix.g_f = cygav_f; greyMatrix.r_f = cyrav_f; + } else if (matrix_ == Rec601) { + greyMatrix.b = cyb601_sc15; greyMatrix.g = cyg601_sc15; greyMatrix.r = cyr601_sc15; + greyMatrix.b_f = cyb601_f; greyMatrix.g_f = cyg601_f; greyMatrix.r_f = cyr601_f; + } else { + // n/a not valid matrix, checked earlier + } + +} PVideoFrame Greyscale::GetFrame(int n, IScriptEnvironment* env) { @@ -219,46 +382,24 @@ PVideoFrame Greyscale::GetFrame(int n, IScriptEnvironment* env) int height = vi.height; int width = vi.width; - if (vi.IsPlanar()) { - switch (vi.BytesFromPixels(1)) + if (vi.IsPlanar() && (vi.IsYUV() || vi.IsYUVA())) { + // planar YUV, set UV plane to neutral + BYTE* dstp_u = frame->GetWritePtr(PLANAR_U); + BYTE* dstp_v = frame->GetWritePtr(PLANAR_V); + const int height = frame->GetHeight(PLANAR_U); + const int dst_pitch = frame->GetPitch(PLANAR_U); + switch (vi.ComponentSize()) { case 1: - memset(frame->GetWritePtr(PLANAR_U), 0x80808080, frame->GetHeight(PLANAR_U) * frame->GetPitch(PLANAR_U)); // 0x80 byte would be enough - memset(frame->GetWritePtr(PLANAR_V), 0x80808080, frame->GetHeight(PLANAR_V) * frame->GetPitch(PLANAR_V)); + fill_chroma(dstp_u, dstp_v, height, dst_pitch, 0x80); // in convert_planar break; case 2: - { - int planes[2] = { PLANAR_U, PLANAR_V }; - for (int p = 0; p < 2; p++) - { - int plane = planes[p]; - uint16_t* dstp = reinterpret_cast(frame->GetWritePtr(plane)); - for (int y = 0; y < height >> vi.GetPlaneHeightSubsampling(plane); y++) - { - for (int x = 0; x < width >> vi.GetPlaneWidthSubsampling(plane); x++) - dstp[x] = 0x8000; - dstp += frame->GetPitch(plane) / sizeof(uint16_t); - } - } + fill_chroma(dstp_u, dstp_v, height, dst_pitch, 1 << (vi.BitsPerComponent() - 1)); break; - } - case 4: - { - int planes[2] = { PLANAR_U, PLANAR_V }; - for (int p = 0; p < 2; p++) - { - int plane = planes[p]; - float* dstp = reinterpret_cast(frame->GetWritePtr(plane)); - for (int y = 0; y < height >> vi.GetPlaneHeightSubsampling(plane); y++) - { - for (int x = 0; x < width >> vi.GetPlaneWidthSubsampling(plane); x++) - dstp[x] = 0.5f; - dstp += frame->GetPitch(plane) / sizeof(float); - } - } + case 4: + fill_chroma(dstp_u, dstp_v, height, dst_pitch, 0.5f); break; } - } return frame; } @@ -282,51 +423,66 @@ PVideoFrame Greyscale::GetFrame(int n, IScriptEnvironment* env) return frame; } - + + if(vi.IsRGB64()) { + if ((env->GetCPUFlags() & CPUF_SSE4_1) && IsPtrAligned(srcp, 16)) { + greyscale_rgb64_sse41(srcp, width, height, pitch, greyMatrix.b, greyMatrix.g, greyMatrix.r); + return frame; + } + } + if (vi.IsRGB32()) { - const int cyav = int(0.33333*32768+0.5); - - const int cyb = int(0.114*32768+0.5); - const int cyg = int(0.587*32768+0.5); - const int cyr = int(0.299*32768+0.5); - - const int cyb709 = int(0.0722*32768+0.5); - const int cyg709 = int(0.7152*32768+0.5); - const int cyr709 = int(0.2126*32768+0.5); - if ((env->GetCPUFlags() & CPUF_SSE2) && IsPtrAligned(srcp, 16)) { - if (matrix_ == Rec709) { - greyscale_rgb32_sse2(srcp, width, height, pitch, cyb709, cyg709, cyr709); - } else if (matrix_ == Average) { - greyscale_rgb32_sse2(srcp, width, height, pitch, cyav, cyav, cyav); - } else { - greyscale_rgb32_sse2(srcp, width, height, pitch, cyb, cyg, cyr); - } + greyscale_rgb32_sse2(srcp, width, height, pitch, greyMatrix.b, greyMatrix.g, greyMatrix.r); return frame; } #ifdef X86_32 else if (env->GetCPUFlags() & CPUF_MMX) { - if (matrix_ == Rec709) { - greyscale_rgb32_mmx(srcp, width, height, pitch, cyb709, cyg709, cyr709); - } else if (matrix_ == Average) { - greyscale_rgb32_mmx(srcp, width, height, pitch, cyav, cyav, cyav); - } else { - greyscale_rgb32_mmx(srcp, width, height, pitch, cyb, cyg, cyr); - } + greyscale_rgb32_mmx(srcp, width, height, pitch, greyMatrix.b, greyMatrix.g, greyMatrix.r); return frame; } #endif } - + if (vi.IsRGB()) { // RGB C. - BYTE* p_count = srcp; + if (vi.IsPlanarRGB() || vi.IsPlanarRGBA()) + { + BYTE* srcp_g = frame->GetWritePtr(PLANAR_G); + BYTE* srcp_b = frame->GetWritePtr(PLANAR_B); + BYTE* srcp_r = frame->GetWritePtr(PLANAR_R); + + const int src_pitch = frame->GetPitch(); // same for all planes + + if (pixelsize == 1) + greyscale_planar_rgb_c(srcp_r, srcp_g, srcp_b, src_pitch, vi.width, vi.height, greyMatrix.b, greyMatrix.g, greyMatrix.r); + else if (pixelsize == 2) + greyscale_planar_rgb_c(srcp_r, srcp_g, srcp_b, src_pitch, vi.width, vi.height, greyMatrix.b, greyMatrix.g, greyMatrix.r); + else + greyscale_planar_rgb_float_c(srcp_r, srcp_g, srcp_b, src_pitch, vi.width, vi.height, greyMatrix.b_f, greyMatrix.g_f, greyMatrix.r_f); + + return frame; + } + // packed RGB + + const int rgb_inc = vi.IsRGB32() || vi.IsRGB64() ? 4 : 3; + + if (pixelsize == 1) { // rgb24/32 + if (rgb_inc == 3) + greyscale_packed_rgb_c(srcp, pitch, vi.width, vi.height, greyMatrix.b, greyMatrix.g, greyMatrix.r); + else + greyscale_packed_rgb_c(srcp, pitch, vi.width, vi.height, greyMatrix.b, greyMatrix.g, greyMatrix.r); + } + else { // rgb48/64 + if (rgb_inc == 3) + greyscale_packed_rgb_c(srcp, pitch, vi.width, vi.height, greyMatrix.b, greyMatrix.g, greyMatrix.r); + else + greyscale_packed_rgb_c(srcp, pitch, vi.width, vi.height, greyMatrix.b, greyMatrix.g, greyMatrix.r); + } - const int rgb_inc = vi.IsRGB32() ? 4 : 3; +#if 0 + BYTE* p_count = srcp; if (matrix_ == Rec709) { - // const int cyb709 = int(0.0722*65536+0.5); // 4732 - // const int cyg709 = int(0.7152*65536+0.5); // 46871 - // const int cyr709 = int(0.2126*65536+0.5); // 13933 for (int y = 0; y>16; // This produces similar results as YUY2 (luma calculation) @@ -364,6 +520,7 @@ PVideoFrame Greyscale::GetFrame(int n, IScriptEnvironment* env) srcp = p_count; } } +#endif } return frame; } diff --git a/avs_core/filters/greyscale.h b/avs_core/filters/greyscale.h index 8b321fca1..2be9b7873 100644 --- a/avs_core/filters/greyscale.h +++ b/avs_core/filters/greyscale.h @@ -38,6 +38,14 @@ #include +struct GreyConversionMatrix { + int r; // for 15bit scaled integer arithmetic + int g; + int b; + float r_f; // for float operation + float g_f; + float b_f; +}; class Greyscale : public GenericVideoFilter /** @@ -55,8 +63,12 @@ class Greyscale : public GenericVideoFilter } private: + void BuildGreyMatrix(); + GreyConversionMatrix greyMatrix; int matrix_; enum {Rec601 = 0, Rec709, Average }; + int pixelsize; + int bits_per_pixel; }; diff --git a/avs_core/filters/layer.cpp b/avs_core/filters/layer.cpp index 2b60593d9..584c0a986 100644 --- a/avs_core/filters/layer.cpp +++ b/avs_core/filters/layer.cpp @@ -280,13 +280,15 @@ PVideoFrame __stdcall Mask::GetFrame(int n, IScriptEnvironment* env) env->MakeWritable(&src1); - // unfortunately sum is not 32768, correction needed at 16 bit to avoid overflow - int cyb = int(0.114*32768+0.5); - int cyg = int(0.587*32768+0.5); - int cyr = int(0.299*32768+0.5); - - if (bits_per_pixel == 16) - cyg = 32768 - (cyr + cyb); // avoid integer overflow and clamping at 16 bits + // 15 bit scaled + // PF check: int32 overflow in 16 bits + // 32769 * 65535 + 16384 = 8000BFFF int32 overflow + // 32768 * 65535 + 16384 = 7FFFC000 OK + // Let's make correction + const int cyb = 3736; // int(0.114 * 32768 + 0.5); // 3736 + const int cyg = 19235-1; // int(0.587 * 32768 + 0.5); // 19235 + const int cyr = 9798; // int(0.299 * 32768 + 0.5); // 9798 + // w/o correction: 32769 if (vi.IsPlanar()) { // planar RGB @@ -1133,7 +1135,7 @@ PVideoFrame ShowChannel::GetFrame(int n, IScriptEnvironment* env) BYTE * dstp_v = dst->GetWritePtr(PLANAR_V); switch (pixelsize) { case 1: fill_chroma(dstp_u, dstp_v, dstheight, dstpitch, (BYTE)0x80); break; - case 2: fill_chroma(dstp_u, dstp_v, dstheight, dstpitch, 0x8000); break; + case 2: fill_chroma(dstp_u, dstp_v, dstheight, dstpitch, 1 << (vi.BitsPerComponent() - 1)); break; case 4: fill_chroma(dstp_u, dstp_v, dstheight, dstpitch, 0.5f); break; } } @@ -1283,7 +1285,7 @@ PVideoFrame ShowChannel::GetFrame(int n, IScriptEnvironment* env) BYTE * dstp_v = dst->GetWritePtr(PLANAR_V); switch (pixelsize) { case 1: fill_chroma(dstp_u, dstp_v, dstheight, dstpitch, (BYTE)0x80); break; - case 2: fill_chroma(dstp_u, dstp_v, dstheight, dstpitch, 0x8000); break; + case 2: fill_chroma(dstp_u, dstp_v, dstheight, dstpitch, 1 << (vi.BitsPerComponent() - 1)); break; case 4: fill_chroma(dstp_u, dstp_v, dstheight, dstpitch, 0.5f); break; } } From 04707112fdedae7e4dc27d0ab1da08d386581663 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Thu, 8 Sep 2016 15:19:36 +0200 Subject: [PATCH 052/120] ResetMask: RGB64, planars with alpha (RGBA, YUVA) --- avs_core/convert/convert_planar.cpp | 2 +- avs_core/filters/layer.cpp | 45 +++++++++++++++++++++++++---- 2 files changed, 40 insertions(+), 7 deletions(-) diff --git a/avs_core/convert/convert_planar.cpp b/avs_core/convert/convert_planar.cpp index 38557f8d4..f78ea13de 100644 --- a/avs_core/convert/convert_planar.cpp +++ b/avs_core/convert/convert_planar.cpp @@ -2128,7 +2128,7 @@ PVideoFrame __stdcall ConvertToPlanarGeneric::GetFrame(int n, IScriptEnvironment fill_plane(dstp_a, heightA, dst_pitchA, 255); break; case 2: - fill_plane(dstp_a, heightA, dst_pitchA, 65535); + fill_plane(dstp_a, heightA, dst_pitchA, (1 << vi.BitsPerComponent()) - 1); break; case 4: fill_plane(dstp_a, heightA, dst_pitchA, 1.0f); diff --git a/avs_core/filters/layer.cpp b/avs_core/filters/layer.cpp index 584c0a986..5ef231389 100644 --- a/avs_core/filters/layer.cpp +++ b/avs_core/filters/layer.cpp @@ -494,8 +494,8 @@ AVSValue __cdecl ColorKeyMask::Create(AVSValue args, void*, IScriptEnvironment* ResetMask::ResetMask(PClip _child, IScriptEnvironment* env) : GenericVideoFilter(_child) { - if (!vi.IsRGB32()) - env->ThrowError("ResetMask: RGB32 data only"); + if (!(vi.IsRGB32() || vi.IsRGB64() || vi.IsPlanarRGBA() || vi.IsYUVA())) + env->ThrowError("ResetMask: format has no alpha channel"); } @@ -504,16 +504,49 @@ PVideoFrame ResetMask::GetFrame(int n, IScriptEnvironment* env) PVideoFrame f = child->GetFrame(n, env); env->MakeWritable(&f); + if (vi.IsPlanarRGBA() || vi.IsYUVA()) { + const int dst_pitchA = f->GetPitch(PLANAR_A); + BYTE* dstp_a = f->GetWritePtr(PLANAR_A); + const int heightA = f->GetHeight(PLANAR_A); + + switch (vi.ComponentSize()) + { + case 1: + fill_plane(dstp_a, heightA, dst_pitchA, 255); + break; + case 2: + fill_plane(dstp_a, heightA, dst_pitchA, (1 << vi.BitsPerComponent()) - 1); + break; + case 4: + fill_plane(dstp_a, heightA, dst_pitchA, 1.0f); + break; + } + return f; + } + // RGB32 and RGB64 + BYTE* pf = f->GetWritePtr(); int pitch = f->GetPitch(); int rowsize = f->GetRowSize(); int height = f->GetHeight(); + int width = vi.width; - for (int y = 0; y(pf)[x] = 65535; + } + pf += pitch; } - pf += pitch; } return f; From ec2c88cf6f03526430f37d5885ee7a1d1f88f066 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Thu, 8 Sep 2016 19:00:23 +0200 Subject: [PATCH 053/120] Layer() for RGB64, ResetMask: new "mask" parameter. Default: max of pixel format --- avs_core/filters/layer.cpp | 347 ++++++++++++++++++++++++++----------- avs_core/filters/layer.h | 8 +- 2 files changed, 249 insertions(+), 106 deletions(-) diff --git a/avs_core/filters/layer.cpp b/avs_core/filters/layer.cpp index 5ef231389..b919e2eb1 100644 --- a/avs_core/filters/layer.cpp +++ b/avs_core/filters/layer.cpp @@ -44,6 +44,7 @@ #include "../core/internal.h" #include #include "../convert/convert_planar.h" +#include /******************************************************************** @@ -53,7 +54,7 @@ extern const AVSFunction Layer_filters[] = { { "Mask", BUILTIN_FUNC_PREFIX, "cc", Mask::Create }, // clip, mask { "ColorKeyMask", BUILTIN_FUNC_PREFIX, "ci[]i[]i[]i", ColorKeyMask::Create }, // clip, color, tolerance[B, toleranceG, toleranceR] - { "ResetMask", BUILTIN_FUNC_PREFIX, "c", ResetMask::Create }, + { "ResetMask", BUILTIN_FUNC_PREFIX, "c[mask]f", ResetMask::Create }, { "Invert", BUILTIN_FUNC_PREFIX, "c[channels]s", Invert::Create }, { "ShowAlpha", BUILTIN_FUNC_PREFIX, "c[pixel_type]s", ShowChannel::Create, (void*)3 }, { "ShowRed", BUILTIN_FUNC_PREFIX, "c[pixel_type]s", ShowChannel::Create, (void*)2 }, @@ -491,11 +492,27 @@ AVSValue __cdecl ColorKeyMask::Create(AVSValue args, void*, IScriptEnvironment* ********************************/ -ResetMask::ResetMask(PClip _child, IScriptEnvironment* env) +ResetMask::ResetMask(PClip _child, float _mask_f, IScriptEnvironment* env) : GenericVideoFilter(_child) { if (!(vi.IsRGB32() || vi.IsRGB64() || vi.IsPlanarRGBA() || vi.IsYUVA())) env->ThrowError("ResetMask: format has no alpha channel"); + + // new: resetmask has parameter. If none->max transparency + + int max_pixel_value = (1 << vi.BitsPerComponent()) - 1; + if(_mask_f < 0) { + mask_f = 1.0f; + mask = max_pixel_value; + } + else { + mask_f = _mask_f; + if (mask_f < 0) mask_f = 0; + mask = (int)mask_f; + + mask = clamp(mask, 0, max_pixel_value); + mask_f = clamp(mask_f, 0.0f, 1.0f); + } } @@ -512,13 +529,13 @@ PVideoFrame ResetMask::GetFrame(int n, IScriptEnvironment* env) switch (vi.ComponentSize()) { case 1: - fill_plane(dstp_a, heightA, dst_pitchA, 255); + fill_plane(dstp_a, heightA, dst_pitchA, mask); break; case 2: - fill_plane(dstp_a, heightA, dst_pitchA, (1 << vi.BitsPerComponent()) - 1); + fill_plane(dstp_a, heightA, dst_pitchA, mask); break; case 4: - fill_plane(dstp_a, heightA, dst_pitchA, 1.0f); + fill_plane(dstp_a, heightA, dst_pitchA, mask_f); break; } return f; @@ -534,7 +551,7 @@ PVideoFrame ResetMask::GetFrame(int n, IScriptEnvironment* env) if(vi.IsRGB32()) { for (int y = 0; y(pf)[x] = 65535; + reinterpret_cast(pf)[x] = mask; } pf += pitch; } @@ -555,7 +572,7 @@ PVideoFrame ResetMask::GetFrame(int n, IScriptEnvironment* env) AVSValue ResetMask::Create(AVSValue args, void*, IScriptEnvironment* env) { - return new ResetMask(args[0].AsClip(), env); + return new ResetMask(args[0].AsClip(), (float)args[1].AsFloat(-1.0f), env); } @@ -1576,7 +1593,7 @@ AVSValue MergeRGB::Create(AVSValue args, void* mode, IScriptEnvironment* env) Layer::Layer( PClip _child1, PClip _child2, const char _op[], int _lev, int _x, int _y, int _t, bool _chroma, IScriptEnvironment* env ) : child1(_child1), child2(_child2), levelB(_lev), ofsX(_x), ofsY(_y), Op(_op), - T(_t), chroma(_chroma) + ThresholdParam(_t), chroma(_chroma) { const VideoInfo& vi1 = child1->GetVideoInfo(); const VideoInfo& vi2 = child2->GetVideoInfo(); @@ -1584,12 +1601,20 @@ Layer::Layer( PClip _child1, PClip _child2, const char _op[], int _lev, int _x, if (vi1.pixel_type != vi2.pixel_type) env->ThrowError("Layer: image formats don't match"); - if (! (vi1.IsRGB32() | vi1.IsYUY2()) ) - env->ThrowError("Layer only support RGB32 and YUY2 formats"); + if (! (vi1.IsRGB32() || vi1.IsYUY2() || vi1.IsRGB64()) ) + env->ThrowError("Layer only support RGB32, RGB64 and YUY2 formats"); vi = vi1; - if (vi.IsRGB32()) ofsY = vi.height-vi2.height-ofsY; //RGB is upside down + if (levelB == -1) { // default + if (vi.IsRGB64()) + levelB = 65537; // (65535* 65537 +1 ) / 65536 = 65536 + else + levelB = 257; // (65535* 257 +1 ) / 256 = 256 + } + + + if (vi.IsRGB32() || vi.IsRGB64()) ofsY = vi.height-vi2.height-ofsY; //RGB is upside down else ofsX = ofsX & 0xFFFFFFFE; //YUV must be aligned on even pixels xdest=(ofsX < 0)? 0: ofsX; @@ -1615,10 +1640,15 @@ Layer::Layer( PClip _child1, PClip _child2, const char _op[], int _lev, int _x, overlay_frames = vi2.num_frames; } - -const int cyb = int(0.114*32768+0.5); -const int cyg = int(0.587*32768+0.5); -const int cyr = int(0.299*32768+0.5); +// 15 bit scaled +// PF check: int32 overflow in 16 bits +// 32769 * 65535 + 16384 = 8000BFFF int32 overflow +// 32768 * 65535 + 16384 = 7FFFC000 OK +// Let's make correction +const int cyb = 3736; // int(0.114 * 32768 + 0.5); // 3736 +const int cyg = 19235-1; // int(0.587 * 32768 + 0.5); // 19235 +const int cyr = 9798; // int(0.299 * 32768 + 0.5); // 9798 +// w/o correction: 32769 enum { @@ -1913,7 +1943,13 @@ static void layer_yuy2_fast_isse(BYTE* dstp, const BYTE* ovrp, int dst_pitch, in } #endif -static void layer_yuy2_fast_c(BYTE* dstp, const BYTE* ovrp, int dst_pitch, int overlay_pitch, int width, int height, int level) { +template +static void layer_yuy2_fast_c(BYTE* dstp8, const BYTE* ovrp8, int dst_pitch, int overlay_pitch, int width, int height, int level) { + pixel_t *dstp = reinterpret_cast(dstp8); + const pixel_t *ovrp = reinterpret_cast(ovrp8); + dst_pitch /= sizeof(pixel_t); + overlay_pitch /= sizeof(pixel_t); + for (int y = 0; y < height; ++y) { for (int x = 0; x < width*2; ++x) { dstp[x] = (dstp[x] + ovrp[x] + 1) / 2; @@ -2281,31 +2317,53 @@ static void layer_rgb32_mul_isse(BYTE* dstp, const BYTE* ovrp, int dst_pitch, in } #endif -static void layer_rgb32_mul_chroma_c(BYTE* dstp, const BYTE* ovrp, int dst_pitch, int overlay_pitch, int width, int height, int level) { +// For Full Strenth: 8 bit Level must be 257, 16 bit must be 65537! +// in 8 bit: (255*257+1)/256 = (65535+1)/256 = 256 -> alpha_max = 256 +// in 16 bit: (65535*65537+1)/65536 = 65536, x=? 7FFFFFFF, x=65537 -> alpha_max = 65536 + +template +static void layer_rgb32_mul_chroma_c(BYTE* dstp8, const BYTE* ovrp8, int dst_pitch, int overlay_pitch, int width, int height, int level) { + pixel_t *dstp = reinterpret_cast(dstp8); + const pixel_t *ovrp = reinterpret_cast(ovrp8); + dst_pitch /= sizeof(pixel_t); + overlay_pitch /= sizeof(pixel_t); + const int SHIFT = sizeof(pixel_t) == 1 ? 8 : 16; + + typedef typename std::conditional < sizeof(pixel_t) == 1, int, __int64>::type calc_t; + for (int y = 0; y < height; ++y) { for (int x = 0; x < width ; ++x) { - int alpha = (ovrp[x*4+3] * level + 1) >> 8; + calc_t alpha = ((calc_t)ovrp[x*4+3] * level + 1) >> SHIFT; - dstp[x*4] = dstp[x*4] + (((((ovrp[x*4] * dstp[x*4]) >> 8) - dstp[x*4] ) * alpha) >> 8); - dstp[x*4+1] = dstp[x*4+1] + (((((ovrp[x*4+1] * dstp[x*4+1]) >> 8) - dstp[x*4+1]) * alpha) >> 8); - dstp[x*4+2] = dstp[x*4+2] + (((((ovrp[x*4+2] * dstp[x*4+2]) >> 8) - dstp[x*4+2]) * alpha) >> 8); - dstp[x*4+3] = dstp[x*4+3] + (((((ovrp[x*4+3] * dstp[x*4+3]) >> 8) - dstp[x*4+3]) * alpha) >> 8); + dstp[x*4+0] = (pixel_t)(dstp[x*4+0] + ((((((calc_t)ovrp[x*4+0] * dstp[x*4+0]) >> SHIFT) - dstp[x*4+0]) * alpha) >> SHIFT)); + dstp[x*4+1] = (pixel_t)(dstp[x*4+1] + ((((((calc_t)ovrp[x*4+1] * dstp[x*4+1]) >> SHIFT) - dstp[x*4+1]) * alpha) >> SHIFT)); + dstp[x*4+2] = (pixel_t)(dstp[x*4+2] + ((((((calc_t)ovrp[x*4+2] * dstp[x*4+2]) >> SHIFT) - dstp[x*4+2]) * alpha) >> SHIFT)); + dstp[x*4+3] = (pixel_t)(dstp[x*4+3] + ((((((calc_t)ovrp[x*4+3] * dstp[x*4+3]) >> SHIFT) - dstp[x*4+3]) * alpha) >> SHIFT)); } dstp += dst_pitch; ovrp += overlay_pitch; } } -static void layer_rgb32_mul_c(BYTE* dstp, const BYTE* ovrp, int dst_pitch, int overlay_pitch, int width, int height, int level) { +template +static void layer_rgb32_mul_c(BYTE* dstp8, const BYTE* ovrp8, int dst_pitch, int overlay_pitch, int width, int height, int level) { + pixel_t *dstp = reinterpret_cast(dstp8); + const pixel_t *ovrp = reinterpret_cast(ovrp8); + dst_pitch /= sizeof(pixel_t); + overlay_pitch /= sizeof(pixel_t); + const int SHIFT = sizeof(pixel_t) == 1 ? 8 : 16; + + typedef typename std::conditional < sizeof(pixel_t) == 1, int, __int64>::type calc_t; + for (int y = 0; y < height; ++y) { for (int x = 0; x < width ; ++x) { - int alpha = (ovrp[x*4+3] * level + 1) >> 8; - int luma = (cyb * ovrp[x*4] + cyg * ovrp[x*4+1] + cyr * ovrp[x*4+2]) >> 15; + calc_t alpha = ((calc_t)ovrp[x*4+3] * level + 1) >> SHIFT; + calc_t luma = (cyb * ovrp[x*4] + cyg * ovrp[x*4+1] + cyr * ovrp[x*4+2]) >> 15; - dstp[x*4] = dstp[x*4] + (((((luma * dstp[x*4]) >> 8) - dstp[x*4] ) * alpha) >> 8); - dstp[x*4+1] = dstp[x*4+1] + (((((luma * dstp[x*4+1]) >> 8) - dstp[x*4+1]) * alpha) >> 8); - dstp[x*4+2] = dstp[x*4+2] + (((((luma * dstp[x*4+2]) >> 8) - dstp[x*4+2]) * alpha) >> 8); - dstp[x*4+3] = dstp[x*4+3] + (((((luma * dstp[x*4+3]) >> 8) - dstp[x*4+3]) * alpha) >> 8); + dstp[x*4+0] = (pixel_t)(dstp[x*4+0] + (((((luma * dstp[x*4+0]) >> SHIFT) - dstp[x*4+0]) * alpha) >> SHIFT)); + dstp[x*4+1] = (pixel_t)(dstp[x*4+1] + (((((luma * dstp[x*4+1]) >> SHIFT) - dstp[x*4+1]) * alpha) >> SHIFT)); + dstp[x*4+2] = (pixel_t)(dstp[x*4+2] + (((((luma * dstp[x*4+2]) >> SHIFT) - dstp[x*4+2]) * alpha) >> SHIFT)); + dstp[x*4+3] = (pixel_t)(dstp[x*4+3] + (((((luma * dstp[x*4+3]) >> SHIFT) - dstp[x*4+3]) * alpha) >> SHIFT)); } dstp += dst_pitch; ovrp += overlay_pitch; @@ -2314,7 +2372,7 @@ static void layer_rgb32_mul_c(BYTE* dstp, const BYTE* ovrp, int dst_pitch, int o template -static void layer_rgb32_add_sse2(BYTE* dstp, const BYTE* ovrp, int dst_pitch, int overlay_pitch, int width, int height, int level) { +static void layer_rgb32_add_sse2(BYTE* dstp, const BYTE* ovrp, int dst_pitch, int overlay_pitch, int width, int height, int level) { int mod2_width = width / 2 * 2; __m128i zero = _mm_setzero_si128(); @@ -2375,7 +2433,7 @@ static void layer_rgb32_add_sse2(BYTE* dstp, const BYTE* ovrp, int dst_pitch, in #ifdef X86_32 template -static void layer_rgb32_add_isse(BYTE* dstp, const BYTE* ovrp, int dst_pitch, int overlay_pitch, int width, int height, int level) { +static void layer_rgb32_add_isse(BYTE* dstp, const BYTE* ovrp, int dst_pitch, int overlay_pitch, int width, int height, int level) { __m64 zero = _mm_setzero_si64(); __m64 rgb_coeffs = _mm_set_pi16(0, cyr, cyg, cyb); @@ -2413,31 +2471,49 @@ static void layer_rgb32_add_isse(BYTE* dstp, const BYTE* ovrp, int dst_pitch, in } #endif -static void layer_rgb32_add_chroma_c(BYTE* dstp, const BYTE* ovrp, int dst_pitch, int overlay_pitch, int width, int height, int level) { +template +static void layer_rgb32_add_chroma_c(BYTE* dstp8, const BYTE* ovrp8, int dst_pitch, int overlay_pitch, int width, int height, int level) { + pixel_t *dstp = reinterpret_cast(dstp8); + const pixel_t *ovrp = reinterpret_cast(ovrp8); + dst_pitch /= sizeof(pixel_t); + overlay_pitch /= sizeof(pixel_t); + const int SHIFT = sizeof(pixel_t) == 1 ? 8 : 16; + + typedef typename std::conditional < sizeof(pixel_t) == 1, int, __int64>::type calc_t; + for (int y = 0; y < height; ++y) { for (int x = 0; x < width ; ++x) { - int alpha = (ovrp[x*4+3] * level + 1) >> 8; + calc_t alpha = ((calc_t)ovrp[x*4+3] * level + 1) >> SHIFT; - dstp[x*4] = dstp[x*4] + (((ovrp[x*4] - dstp[x*4]) * alpha) >> 8); - dstp[x*4+1] = dstp[x*4+1] + (((ovrp[x*4+1] - dstp[x*4+1]) * alpha) >> 8); - dstp[x*4+2] = dstp[x*4+2] + (((ovrp[x*4+2] - dstp[x*4+2]) * alpha) >> 8); - dstp[x*4+3] = dstp[x*4+3] + (((ovrp[x*4+3] - dstp[x*4+3]) * alpha) >> 8); + dstp[x*4] = (pixel_t)(dstp[x*4] + ((((calc_t)ovrp[x*4] - dstp[x*4]) * alpha) >> SHIFT)); + dstp[x*4+1] = (pixel_t)(dstp[x*4+1] + ((((calc_t)ovrp[x*4+1] - dstp[x*4+1]) * alpha) >> SHIFT)); + dstp[x*4+2] = (pixel_t)(dstp[x*4+2] + ((((calc_t)ovrp[x*4+2] - dstp[x*4+2]) * alpha) >> SHIFT)); + dstp[x*4+3] = (pixel_t)(dstp[x*4+3] + ((((calc_t)ovrp[x*4+3] - dstp[x*4+3]) * alpha) >> SHIFT)); } dstp += dst_pitch; ovrp += overlay_pitch; } } -static void layer_rgb32_add_c(BYTE* dstp, const BYTE* ovrp, int dst_pitch, int overlay_pitch, int width, int height, int level) { +template +static void layer_rgb32_add_c(BYTE* dstp8, const BYTE* ovrp8, int dst_pitch, int overlay_pitch, int width, int height, int level) { + pixel_t *dstp = reinterpret_cast(dstp8); + const pixel_t *ovrp = reinterpret_cast(ovrp8); + dst_pitch /= sizeof(pixel_t); + overlay_pitch /= sizeof(pixel_t); + const int SHIFT = sizeof(pixel_t) == 1 ? 8 : 16; + + typedef typename std::conditional < sizeof(pixel_t) == 1, int, __int64>::type calc_t; + for (int y = 0; y < height; ++y) { for (int x = 0; x < width ; ++x) { - int alpha = (ovrp[x*4+3] * level + 1) >> 8; - int luma = (cyb * ovrp[x*4] + cyg * ovrp[x*4+1] + cyr * ovrp[x*4+2]) >> 15; + calc_t alpha = ((calc_t)ovrp[x*4+3] * level + 1) >> 8; + calc_t luma = (cyb * ovrp[x*4] + cyg * ovrp[x*4+1] + cyr * ovrp[x*4+2]) >> 15; - dstp[x*4] = dstp[x*4] + (((luma - dstp[x*4]) * alpha) >> 8); - dstp[x*4+1] = dstp[x*4+1] + (((luma - dstp[x*4+1]) * alpha) >> 8); - dstp[x*4+2] = dstp[x*4+2] + (((luma - dstp[x*4+2]) * alpha) >> 8); - dstp[x*4+3] = dstp[x*4+3] + (((luma - dstp[x*4+3]) * alpha) >> 8); + dstp[x*4] = (pixel_t)(dstp[x*4] + (((luma - dstp[x*4]) * alpha) >> SHIFT)); + dstp[x*4+1] = (pixel_t)(dstp[x*4+1] + (((luma - dstp[x*4+1]) * alpha) >> SHIFT)); + dstp[x*4+2] = (pixel_t)(dstp[x*4+2] + (((luma - dstp[x*4+2]) * alpha) >> SHIFT)); + dstp[x*4+3] = (pixel_t)(dstp[x*4+3] + (((luma - dstp[x*4+3]) * alpha) >> SHIFT)); } dstp += dst_pitch; ovrp += overlay_pitch; @@ -2455,13 +2531,14 @@ static void layer_rgb32_fast_isse(BYTE* dstp, const BYTE* ovrp, int dst_pitch, i } #endif +template static void layer_rgb32_fast_c(BYTE* dstp, const BYTE* ovrp, int dst_pitch, int overlay_pitch, int width, int height, int level) { - layer_yuy2_fast_c(dstp, ovrp, dst_pitch, overlay_pitch, width*2, height, level); + layer_yuy2_fast_c(dstp, ovrp, dst_pitch, overlay_pitch, width*2, height, level); } template -static void layer_rgb32_subtract_sse2(BYTE* dstp, const BYTE* ovrp, int dst_pitch, int overlay_pitch, int width, int height, int level) { +static void layer_rgb32_subtract_sse2(BYTE* dstp, const BYTE* ovrp, int dst_pitch, int overlay_pitch, int width, int height, int level) { int mod2_width = width / 2 * 2; __m128i zero = _mm_setzero_si128(); @@ -2523,7 +2600,7 @@ static void layer_rgb32_subtract_sse2(BYTE* dstp, const BYTE* ovrp, int dst_pitc #ifdef X86_32 template -static void layer_rgb32_subtract_isse(BYTE* dstp, const BYTE* ovrp, int dst_pitch, int overlay_pitch, int width, int height, int level) { +static void layer_rgb32_subtract_isse(BYTE* dstp, const BYTE* ovrp, int dst_pitch, int overlay_pitch, int width, int height, int level) { __m64 zero = _mm_setzero_si64(); __m64 rgb_coeffs = _mm_set_pi16(0, cyr, cyg, cyb); __m64 ff = _mm_set1_pi16(0x00FF); @@ -2562,31 +2639,53 @@ static void layer_rgb32_subtract_isse(BYTE* dstp, const BYTE* ovrp, int dst_pitc } #endif -static void layer_rgb32_subtract_chroma_c(BYTE* dstp, const BYTE* ovrp, int dst_pitch, int overlay_pitch, int width, int height, int level) { +template +static void layer_rgb32_subtract_chroma_c(BYTE* dstp8, const BYTE* ovrp8, int dst_pitch, int overlay_pitch, int width, int height, int level) { + pixel_t *dstp = reinterpret_cast(dstp8); + const pixel_t *ovrp = reinterpret_cast(ovrp8); + dst_pitch /= sizeof(pixel_t); + overlay_pitch /= sizeof(pixel_t); + const int SHIFT = sizeof(pixel_t) == 1 ? 8 : 16; + + typedef typename std::conditional < sizeof(pixel_t) == 1, int, __int64>::type calc_t; + + const calc_t MAX_PIXEL_VALUE = sizeof(pixel_t) == 1 ? 255 : 65535; + for (int y = 0; y < height; ++y) { for (int x = 0; x < width ; ++x) { - int alpha = (ovrp[x*4+3] * level + 1) >> 8; + calc_t alpha = ((calc_t)ovrp[x*4+3] * level + 1) >> SHIFT; - dstp[x*4] = dstp[x*4] + (((255 - ovrp[x*4] - dstp[x*4]) * alpha) >> 8); - dstp[x*4+1] = dstp[x*4+1] + (((255 - ovrp[x*4+1] - dstp[x*4+1]) * alpha) >> 8); - dstp[x*4+2] = dstp[x*4+2] + (((255 - ovrp[x*4+2] - dstp[x*4+2]) * alpha) >> 8); - dstp[x*4+3] = dstp[x*4+3] + (((255 - ovrp[x*4+3] - dstp[x*4+3]) * alpha) >> 8); + dstp[x*4] = (pixel_t)(dstp[x*4] + (((MAX_PIXEL_VALUE - ovrp[x*4] - dstp[x*4]) * alpha) >> SHIFT)); + dstp[x*4+1] = (pixel_t)(dstp[x*4+1] + (((MAX_PIXEL_VALUE - ovrp[x*4+1] - dstp[x*4+1]) * alpha) >> SHIFT)); + dstp[x*4+2] = (pixel_t)(dstp[x*4+2] + (((MAX_PIXEL_VALUE - ovrp[x*4+2] - dstp[x*4+2]) * alpha) >> SHIFT)); + dstp[x*4+3] = (pixel_t)(dstp[x*4+3] + (((MAX_PIXEL_VALUE - ovrp[x*4+3] - dstp[x*4+3]) * alpha) >> SHIFT)); } dstp += dst_pitch; ovrp += overlay_pitch; } } -static void layer_rgb32_subtract_c(BYTE* dstp, const BYTE* ovrp, int dst_pitch, int overlay_pitch, int width, int height, int level) { +template +static void layer_rgb32_subtract_c(BYTE* dstp8, const BYTE* ovrp8, int dst_pitch, int overlay_pitch, int width, int height, int level) { + pixel_t *dstp = reinterpret_cast(dstp8); + const pixel_t *ovrp = reinterpret_cast(ovrp8); + dst_pitch /= sizeof(pixel_t); + overlay_pitch /= sizeof(pixel_t); + const int SHIFT = sizeof(pixel_t) == 1 ? 8 : 16; + + typedef typename std::conditional < sizeof(pixel_t) == 1, int, __int64>::type calc_t; + + const calc_t MAX_PIXEL_VALUE = sizeof(pixel_t) == 1 ? 255 : 65535; + for (int y = 0; y < height; ++y) { for (int x = 0; x < width ; ++x) { - int alpha = (ovrp[x*4+3] * level + 1) >> 8; - int luma = (cyb * (255 - ovrp[x*4]) + cyg * (255 - ovrp[x*4+1]) + cyr * (255 - ovrp[x*4+2])) >> 15; + calc_t alpha = ((calc_t)ovrp[x*4+3] * level + 1) >> SHIFT; + calc_t luma = (cyb * (MAX_PIXEL_VALUE - ovrp[x*4]) + cyg * (MAX_PIXEL_VALUE - ovrp[x*4+1]) + cyr * (MAX_PIXEL_VALUE - ovrp[x*4+2])) >> 15; - dstp[x*4] = dstp[x*4] + (((luma - dstp[x*4]) * alpha) >> 8); - dstp[x*4+1] = dstp[x*4+1] + (((luma - dstp[x*4+1]) * alpha) >> 8); - dstp[x*4+2] = dstp[x*4+2] + (((luma - dstp[x*4+2]) * alpha) >> 8); - dstp[x*4+3] = dstp[x*4+3] + (((luma - dstp[x*4+3]) * alpha) >> 8); + dstp[x*4] = (pixel_t)(dstp[x*4] + (((luma - dstp[x*4]) * alpha) >> SHIFT)); + dstp[x*4+1] = (pixel_t)(dstp[x*4+1] + (((luma - dstp[x*4+1]) * alpha) >> SHIFT)); + dstp[x*4+2] = (pixel_t)(dstp[x*4+2] + (((luma - dstp[x*4+2]) * alpha) >> SHIFT)); + dstp[x*4+3] = (pixel_t)(dstp[x*4+3] + (((luma - dstp[x*4+3]) * alpha) >> SHIFT)); } dstp += dst_pitch; ovrp += overlay_pitch; @@ -2707,11 +2806,21 @@ static void layer_rgb32_lighten_darken_isse(BYTE* dstp, const BYTE* ovrp, int ds } #endif -template -static void layer_rgb32_lighten_darken_c(BYTE* dstp, const BYTE* ovrp, int dst_pitch, int overlay_pitch, int width, int height, int level, int thresh) { +template +static void layer_rgb32_lighten_darken_c(BYTE* dstp8, const BYTE* ovrp8, int dst_pitch, int overlay_pitch, int width, int height, int level, int thresh) { + pixel_t *dstp = reinterpret_cast(dstp8); + const pixel_t *ovrp = reinterpret_cast(ovrp8); + dst_pitch /= sizeof(pixel_t); + overlay_pitch /= sizeof(pixel_t); + const int SHIFT = sizeof(pixel_t) == 1 ? 8 : 16; + + typedef typename std::conditional < sizeof(pixel_t) == 1, int, __int64>::type calc_t; + + const calc_t MAX_PIXEL_VALUE = sizeof(pixel_t) == 1 ? 255 : 65535; + for (int y = 0; y < height; ++y) { for (int x = 0; x < width ; ++x) { - int alpha = (ovrp[x*4+3] * level + 1) >> 8; + calc_t alpha = ((calc_t)ovrp[x*4+3] * level + 1) >> SHIFT; int luma_ovr = (cyb * ovrp[x*4] + cyg * ovrp[x*4+1] + cyr * ovrp[x*4+2]) >> 15; int luma_src = (cyb * dstp[x*4] + cyg * dstp[x*4+1] + cyr * dstp[x*4+2]) >> 15; @@ -2721,10 +2830,10 @@ static void layer_rgb32_lighten_darken_c(BYTE* dstp, const BYTE* ovrp, int dst_p alpha = luma_ovr < thresh + luma_src ? alpha : 0; } - dstp[x*4] = dstp[x*4] + (((ovrp[x*4] - dstp[x*4]) * alpha) >> 8); - dstp[x*4+1] = dstp[x*4+1] + (((ovrp[x*4+1] - dstp[x*4+1]) * alpha) >> 8); - dstp[x*4+2] = dstp[x*4+2] + (((ovrp[x*4+2] - dstp[x*4+2]) * alpha) >> 8); - dstp[x*4+3] = dstp[x*4+3] + (((ovrp[x*4+3] - dstp[x*4+3]) * alpha) >> 8); + dstp[x*4] = (pixel_t)(dstp[x*4] + ((((calc_t)ovrp[x*4] - dstp[x*4]) * alpha) >> SHIFT)); + dstp[x*4+1] = (pixel_t)(dstp[x*4+1] + ((((calc_t)ovrp[x*4+1] - dstp[x*4+1]) * alpha) >> SHIFT)); + dstp[x*4+2] = (pixel_t)(dstp[x*4+2] + ((((calc_t)ovrp[x*4+2] - dstp[x*4+2]) * alpha) >> SHIFT)); + dstp[x*4+3] = (pixel_t)(dstp[x*4+3] + ((((calc_t)ovrp[x*4+3] - dstp[x*4+3]) * alpha) >> SHIFT)); } dstp += dst_pitch; ovrp += overlay_pitch; @@ -2756,7 +2865,7 @@ PVideoFrame __stdcall Layer::GetFrame(int n, IScriptEnvironment* env) src1p += (src1_pitch * ydest) + (xdest * 2); src2p += (src2_pitch * ysrc) + (xsrc * 2); - int thresh= ((T & 0xFF) <<16)| (T & 0xFF); + int thresh= ((ThresholdParam & 0xFF) <<16)| (ThresholdParam & 0xFF); if (!lstrcmpi(Op, "Mul")) { @@ -2848,7 +2957,7 @@ PVideoFrame __stdcall Layer::GetFrame(int n, IScriptEnvironment* env) #endif else { - layer_yuy2_fast_c(src1p, src2p, src1_pitch, src2_pitch, width, height, mylevel); + layer_yuy2_fast_c(src1p, src2p, src1_pitch, src2_pitch, width, height, mylevel); } } else @@ -2940,47 +3049,56 @@ PVideoFrame __stdcall Layer::GetFrame(int n, IScriptEnvironment* env) } } } - else if (vi.IsRGB32()) + else if (vi.IsRGB32() || vi.IsRGB64()) { - src1p += (src1_pitch * ydest) + (xdest * 4); - src2p += (src2_pitch * ysrc) + (xsrc * 4); + int rgb_step = vi.BytesFromPixels(1); // 4 or 8 + int pixelsize = vi.ComponentSize(); + + src1p += (src1_pitch * ydest) + (xdest * rgb_step); + src2p += (src2_pitch * ysrc) + (xsrc * rgb_step); - int thresh = T & 0xFF; + int thresh = ThresholdParam & (pixelsize == 1 ? 0xFF : 0xFFFF); if (!lstrcmpi(Op, "Mul")) { if (chroma) { - if (env->GetCPUFlags() & CPUF_SSE2) + if ((pixelsize == 1) && (env->GetCPUFlags() & CPUF_SSE2)) { layer_rgb32_mul_sse2(src1p, src2p, src1_pitch, src2_pitch, width, height, mylevel); } #ifdef X86_32 - else if (env->GetCPUFlags() & CPUF_INTEGER_SSE) + else if ((pixelsize == 1) && (env->GetCPUFlags() & CPUF_INTEGER_SSE)) { layer_rgb32_mul_isse(src1p, src2p, src1_pitch, src2_pitch, width, height, mylevel); } #endif else { - layer_rgb32_mul_chroma_c(src1p, src2p, src1_pitch, src2_pitch, width, height, mylevel); + if(pixelsize == 1) + layer_rgb32_mul_chroma_c(src1p, src2p, src1_pitch, src2_pitch, width, height, mylevel); + else + layer_rgb32_mul_chroma_c(src1p, src2p, src1_pitch, src2_pitch, width, height, mylevel); } } - else + else // Mul, chroma==false { - if (env->GetCPUFlags() & CPUF_SSE2) + if ((pixelsize == 1) && (env->GetCPUFlags() & CPUF_SSE2)) { layer_rgb32_mul_sse2(src1p, src2p, src1_pitch, src2_pitch, width, height, mylevel); } #ifdef X86_32 - else if (env->GetCPUFlags() & CPUF_INTEGER_SSE) + else if ((pixelsize == 1) && (env->GetCPUFlags() & CPUF_INTEGER_SSE)) { layer_rgb32_mul_isse(src1p, src2p, src1_pitch, src2_pitch, width, height, mylevel); } #endif else { - layer_rgb32_mul_c(src1p, src2p, src1_pitch, src2_pitch, width, height, mylevel); + if(pixelsize == 1) + layer_rgb32_mul_c(src1p, src2p, src1_pitch, src2_pitch, width, height, mylevel); + else + layer_rgb32_mul_c(src1p, src2p, src1_pitch, src2_pitch, width, height, mylevel); } } } @@ -2988,36 +3106,42 @@ PVideoFrame __stdcall Layer::GetFrame(int n, IScriptEnvironment* env) { if (chroma) { - if (env->GetCPUFlags() & CPUF_SSE2) + if ((pixelsize == 1) && (env->GetCPUFlags() & CPUF_SSE2)) { layer_rgb32_add_sse2(src1p, src2p, src1_pitch, src2_pitch, width, height, mylevel); } #ifdef X86_32 - else if (env->GetCPUFlags() & CPUF_INTEGER_SSE) + else if ((pixelsize == 1) && (env->GetCPUFlags() & CPUF_INTEGER_SSE)) { layer_rgb32_add_isse(src1p, src2p, src1_pitch, src2_pitch, width, height, mylevel); } #endif else { - layer_rgb32_add_chroma_c(src1p, src2p, src1_pitch, src2_pitch, width, height, mylevel); + if(pixelsize == 1) + layer_rgb32_add_chroma_c(src1p, src2p, src1_pitch, src2_pitch, width, height, mylevel); + else + layer_rgb32_add_chroma_c(src1p, src2p, src1_pitch, src2_pitch, width, height, mylevel); } } - else + else // Add, chroma == false { - if (env->GetCPUFlags() & CPUF_SSE2) + if ((pixelsize == 1) && (env->GetCPUFlags() & CPUF_SSE2)) { layer_rgb32_add_sse2(src1p, src2p, src1_pitch, src2_pitch, width, height, mylevel); } #ifdef X86_32 - else if (env->GetCPUFlags() & CPUF_INTEGER_SSE) + else if ((pixelsize == 1) && (env->GetCPUFlags() & CPUF_INTEGER_SSE)) { layer_rgb32_add_isse(src1p, src2p, src1_pitch, src2_pitch, width, height, mylevel); } #endif else { - layer_rgb32_add_c(src1p, src2p, src1_pitch, src2_pitch, width, height, mylevel); + if(pixelsize == 1) + layer_rgb32_add_c(src1p, src2p, src1_pitch, src2_pitch, width, height, mylevel); + else + layer_rgb32_add_c(src1p, src2p, src1_pitch, src2_pitch, width, height, mylevel); } } } @@ -3025,19 +3149,22 @@ PVideoFrame __stdcall Layer::GetFrame(int n, IScriptEnvironment* env) { if (chroma) { - if (env->GetCPUFlags() & CPUF_SSE2) + if ((pixelsize == 1) && (env->GetCPUFlags() & CPUF_SSE2)) { layer_rgb32_lighten_darken_sse2(src1p, src2p, src1_pitch, src2_pitch, width, height, mylevel, thresh); } #ifdef X86_32 - else if (env->GetCPUFlags() & CPUF_INTEGER_SSE) + else if ((pixelsize == 1) && (env->GetCPUFlags() & CPUF_INTEGER_SSE)) { layer_rgb32_lighten_darken_isse(src1p, src2p, src1_pitch, src2_pitch, width, height, mylevel, thresh); } #endif else { - layer_rgb32_lighten_darken_c(src1p, src2p, src1_pitch, src2_pitch, width, height, mylevel, thresh); + if(pixelsize==1) + layer_rgb32_lighten_darken_c(src1p, src2p, src1_pitch, src2_pitch, width, height, mylevel, thresh); + else + layer_rgb32_lighten_darken_c(src1p, src2p, src1_pitch, src2_pitch, width, height, mylevel, thresh); } } else @@ -3049,19 +3176,22 @@ PVideoFrame __stdcall Layer::GetFrame(int n, IScriptEnvironment* env) { if (chroma) { - if (env->GetCPUFlags() & CPUF_SSE2) + if ((pixelsize == 1) && (env->GetCPUFlags() & CPUF_SSE2)) { layer_rgb32_lighten_darken_sse2(src1p, src2p, src1_pitch, src2_pitch, width, height, mylevel, thresh); } #ifdef X86_32 - else if (env->GetCPUFlags() & CPUF_INTEGER_SSE) + else if ((pixelsize == 1) && (env->GetCPUFlags() & CPUF_INTEGER_SSE)) { layer_rgb32_lighten_darken_isse(src1p, src2p, src1_pitch, src2_pitch, width, height, mylevel, thresh); } #endif else { - layer_rgb32_lighten_darken_c(src1p, src2p, src1_pitch, src2_pitch, width, height, mylevel, thresh); + if (pixelsize==1) + layer_rgb32_lighten_darken_c(src1p, src2p, src1_pitch, src2_pitch, width, height, mylevel, thresh); + else + layer_rgb32_lighten_darken_c(src1p, src2p, src1_pitch, src2_pitch, width, height, mylevel, thresh); } } else @@ -3073,19 +3203,22 @@ PVideoFrame __stdcall Layer::GetFrame(int n, IScriptEnvironment* env) { if (chroma) { - if ((env->GetCPUFlags() & CPUF_SSE2) && IsPtrAligned(src1p, 16) && IsPtrAligned(src2p, 16)) + if ((pixelsize == 1) && (env->GetCPUFlags() & CPUF_SSE2) && IsPtrAligned(src1p, 16) && IsPtrAligned(src2p, 16)) { layer_rgb32_fast_sse2(src1p, src2p, src1_pitch, src2_pitch, width, height, mylevel); } #ifdef X86_32 - else if (env->GetCPUFlags() & CPUF_INTEGER_SSE) + else if ((pixelsize == 1) && (env->GetCPUFlags() & CPUF_INTEGER_SSE)) { layer_rgb32_fast_isse(src1p, src2p, src1_pitch, src2_pitch, width, height, mylevel); } #endif else { - layer_rgb32_fast_c(src1p, src2p, src1_pitch, src2_pitch, width, height, mylevel); + if(pixelsize==1) + layer_rgb32_fast_c(src1p, src2p, src1_pitch, src2_pitch, width, height, mylevel); + else + layer_rgb32_fast_c(src1p, src2p, src1_pitch, src2_pitch, width, height, mylevel); } } else @@ -3097,36 +3230,42 @@ PVideoFrame __stdcall Layer::GetFrame(int n, IScriptEnvironment* env) { if (chroma) { - if (env->GetCPUFlags() & CPUF_SSE2) + if ((pixelsize == 1) && (env->GetCPUFlags() & CPUF_SSE2)) { layer_rgb32_subtract_sse2(src1p, src2p, src1_pitch, src2_pitch, width, height, mylevel); } #ifdef X86_32 - else if (env->GetCPUFlags() & CPUF_INTEGER_SSE) + else if ((pixelsize == 1) && (env->GetCPUFlags() & CPUF_INTEGER_SSE)) { layer_rgb32_subtract_isse(src1p, src2p, src1_pitch, src2_pitch, width, height, mylevel); } #endif else { - layer_rgb32_subtract_chroma_c(src1p, src2p, src1_pitch, src2_pitch, width, height, mylevel); + if(pixelsize==1) + layer_rgb32_subtract_chroma_c(src1p, src2p, src1_pitch, src2_pitch, width, height, mylevel); + else + layer_rgb32_subtract_chroma_c(src1p, src2p, src1_pitch, src2_pitch, width, height, mylevel); } } else { - if (env->GetCPUFlags() & CPUF_SSE2) + if ((pixelsize == 1) && (env->GetCPUFlags() & CPUF_SSE2)) { layer_rgb32_subtract_sse2(src1p, src2p, src1_pitch, src2_pitch, width, height, mylevel); } #ifdef X86_32 - else if (env->GetCPUFlags() & CPUF_INTEGER_SSE) + else if ((pixelsize == 1) && (env->GetCPUFlags() & CPUF_INTEGER_SSE)) { layer_rgb32_subtract_isse(src1p, src2p, src1_pitch, src2_pitch, width, height, mylevel); } #endif else { - layer_rgb32_subtract_c(src1p, src2p, src1_pitch, src2_pitch, width, height, mylevel); + if(pixelsize==1) + layer_rgb32_subtract_c(src1p, src2p, src1_pitch, src2_pitch, width, height, mylevel); + else + layer_rgb32_subtract_c(src1p, src2p, src1_pitch, src2_pitch, width, height, mylevel); } } } @@ -3137,7 +3276,7 @@ PVideoFrame __stdcall Layer::GetFrame(int n, IScriptEnvironment* env) AVSValue __cdecl Layer::Create(AVSValue args, void*, IScriptEnvironment* env) { - return new Layer( args[0].AsClip(), args[1].AsClip(), args[2].AsString("Add"), args[3].AsInt(257), + return new Layer( args[0].AsClip(), args[1].AsClip(), args[2].AsString("Add"), args[3].AsInt(-1), args[4].AsInt(0), args[5].AsInt(0), args[6].AsInt(0), args[7].AsBool(true), env ); } diff --git a/avs_core/filters/layer.h b/avs_core/filters/layer.h index 12f534649..27831bb2c 100644 --- a/avs_core/filters/layer.h +++ b/avs_core/filters/layer.h @@ -108,7 +108,7 @@ class ResetMask : public GenericVideoFilter **/ { public: - ResetMask(PClip _child, IScriptEnvironment* env); + ResetMask(PClip _child, float _mask_f, IScriptEnvironment* env); PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment* env); int __stdcall SetCacheHints(int cachehints, int frame_range) override { @@ -116,6 +116,10 @@ class ResetMask : public GenericVideoFilter } static AVSValue __cdecl Create(AVSValue args, void*, IScriptEnvironment* env); + +private: + float mask_f; + int mask; }; @@ -220,7 +224,7 @@ class Layer: public IClip const PClip child1, child2; VideoInfo vi; const char* Op; - const int levelB, T; + int levelB, ThresholdParam; int ydest, xdest, ysrc, xsrc, ofsX, ofsY, ycount, xcount, overlay_frames; const bool chroma; From 63402388cb81c387b086837b4565879a764c22c4 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Thu, 8 Sep 2016 19:26:27 +0200 Subject: [PATCH 054/120] Info(): missing 8 bit YUVA color space descriptions --- avs_core/filters/text-overlay.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/avs_core/filters/text-overlay.cpp b/avs_core/filters/text-overlay.cpp index f031caafd..82f6b620d 100644 --- a/avs_core/filters/text-overlay.cpp +++ b/avs_core/filters/text-overlay.cpp @@ -1254,6 +1254,9 @@ const char* const t_YUV422PS="YUV422PS"; const char* const t_YUV444PS="YUV444PS"; const char* const t_Y32="Y32"; +const char* const t_YUVA420="YUVA420"; +const char* const t_YUVA422="YUVA422"; +const char* const t_YUVA444="YUVA444"; const char* const t_YUVA420P10="YUVA420P10"; const char* const t_YUVA422P10="YUVA422P10"; const char* const t_YUVA444P10="YUVA444P10"; @@ -1393,6 +1396,9 @@ PVideoFrame FilterInfo::GetFrame(int n, IScriptEnvironment* env) else if (vii.IsColorSpace(VideoInfo::CS_YUV444PS)) c_space=t_YUV444PS; else if (vii.IsColorSpace(VideoInfo::CS_Y32)) c_space=t_Y32; + else if (vii.IsColorSpace(VideoInfo::CS_YUVA420)) c_space=t_YUVA420; + else if (vii.IsColorSpace(VideoInfo::CS_YUVA422)) c_space=t_YUVA422; + else if (vii.IsColorSpace(VideoInfo::CS_YUVA444)) c_space=t_YUVA444; else if (vii.IsColorSpace(VideoInfo::CS_YUVA420P10)) c_space=t_YUVA420P10; else if (vii.IsColorSpace(VideoInfo::CS_YUVA422P10)) c_space=t_YUVA422P10; else if (vii.IsColorSpace(VideoInfo::CS_YUVA444P10)) c_space=t_YUVA444P10; From 3967091b6051085999577ef3de3f2e24b66b03f3 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Thu, 8 Sep 2016 19:28:53 +0200 Subject: [PATCH 055/120] Planar RGBA -> 422/420 conversion chain results YUVA as it works for 444. --- avs_core/convert/convert_planar.cpp | 38 +++++++++++++++-------------- 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/avs_core/convert/convert_planar.cpp b/avs_core/convert/convert_planar.cpp index f78ea13de..49202f626 100644 --- a/avs_core/convert/convert_planar.cpp +++ b/avs_core/convert/convert_planar.cpp @@ -2159,6 +2159,8 @@ AVSValue ConvertToPlanarGeneric::Create(AVSValue& args, const char* filter, IScr int pixel_type = VideoInfo::CS_UNKNOWN; AVSValue outplacement = AVSValue(); + bool hasAlpha = vi.NumComponents() == 4; + if (strcmp(filter, "ConvertToYUV420") == 0) { if (vi.Is420()) if (getPlacement(args[3], env) == getPlacement(args[5], env)) @@ -2166,12 +2168,12 @@ AVSValue ConvertToPlanarGeneric::Create(AVSValue& args, const char* filter, IScr outplacement = args[5]; switch (vi.BitsPerComponent()) { - case 8: pixel_type = VideoInfo::CS_YV12; break; - case 10: pixel_type = VideoInfo::CS_YUV420P10; break; - case 12: pixel_type = VideoInfo::CS_YUV420P12; break; - case 14: pixel_type = VideoInfo::CS_YUV420P14; break; - case 16: pixel_type = VideoInfo::CS_YUV420P16; break; - case 32: pixel_type = VideoInfo::CS_YUV420PS; break; + case 8 : pixel_type = hasAlpha ? VideoInfo::CS_YUVA420 : VideoInfo::CS_YV12; break; + case 10: pixel_type = hasAlpha ? VideoInfo::CS_YUVA420P10 : VideoInfo::CS_YUV420P10; break; + case 12: pixel_type = hasAlpha ? VideoInfo::CS_YUVA420P12 : VideoInfo::CS_YUV420P12; break; + case 14: pixel_type = hasAlpha ? VideoInfo::CS_YUVA420P14 : VideoInfo::CS_YUV420P14; break; + case 16: pixel_type = hasAlpha ? VideoInfo::CS_YUVA420P16 : VideoInfo::CS_YUV420P16; break; + case 32: pixel_type = hasAlpha ? VideoInfo::CS_YUVA420PS : VideoInfo::CS_YUV420PS; break; } } else if (strcmp(filter, "ConvertToYUV422") == 0) { @@ -2179,12 +2181,12 @@ AVSValue ConvertToPlanarGeneric::Create(AVSValue& args, const char* filter, IScr return clip; switch (vi.BitsPerComponent()) { - case 8: pixel_type = VideoInfo::CS_YV16; break; - case 10: pixel_type = VideoInfo::CS_YUV422P10; break; - case 12: pixel_type = VideoInfo::CS_YUV422P12; break; - case 14: pixel_type = VideoInfo::CS_YUV422P14; break; - case 16: pixel_type = VideoInfo::CS_YUV422P16; break; - case 32: pixel_type = VideoInfo::CS_YUV422PS; break; + case 8 : pixel_type = hasAlpha ? VideoInfo::CS_YUVA422 : VideoInfo::CS_YV16; break; + case 10: pixel_type = hasAlpha ? VideoInfo::CS_YUVA422P10 : VideoInfo::CS_YUV422P10; break; + case 12: pixel_type = hasAlpha ? VideoInfo::CS_YUVA422P12 : VideoInfo::CS_YUV422P12; break; + case 14: pixel_type = hasAlpha ? VideoInfo::CS_YUVA422P14 : VideoInfo::CS_YUV422P14; break; + case 16: pixel_type = hasAlpha ? VideoInfo::CS_YUVA422P16 : VideoInfo::CS_YUV422P16; break; + case 32: pixel_type = hasAlpha ? VideoInfo::CS_YUVA422PS : VideoInfo::CS_YUV422PS; break; } } else if (strcmp(filter, "ConvertToYUV444") == 0) { @@ -2192,12 +2194,12 @@ AVSValue ConvertToPlanarGeneric::Create(AVSValue& args, const char* filter, IScr return clip; switch (vi.BitsPerComponent()) { - case 8: pixel_type = VideoInfo::CS_YV24; break; - case 10: pixel_type = VideoInfo::CS_YUV444P10; break; - case 12: pixel_type = VideoInfo::CS_YUV444P12; break; - case 14: pixel_type = VideoInfo::CS_YUV444P14; break; - case 16: pixel_type = VideoInfo::CS_YUV444P16; break; - case 32: pixel_type = VideoInfo::CS_YUV444PS; break; + case 8 : pixel_type = hasAlpha ? VideoInfo::CS_YUVA444 : VideoInfo::CS_YV24; break; + case 10: pixel_type = hasAlpha ? VideoInfo::CS_YUVA444P10 : VideoInfo::CS_YUV444P10; break; + case 12: pixel_type = hasAlpha ? VideoInfo::CS_YUVA444P12 : VideoInfo::CS_YUV444P12; break; + case 14: pixel_type = hasAlpha ? VideoInfo::CS_YUVA444P14 : VideoInfo::CS_YUV444P14; break; + case 16: pixel_type = hasAlpha ? VideoInfo::CS_YUVA444P16 : VideoInfo::CS_YUV444P16; break; + case 32: pixel_type = hasAlpha ? VideoInfo::CS_YUVA444PS : VideoInfo::CS_YUV444PS; break; } } else if (strcmp(filter, "ConvertToYV411") == 0) { From 5c0be9bcab87c8474d4241308886eb6a7683599a Mon Sep 17 00:00:00 2001 From: Pinterf Date: Thu, 8 Sep 2016 19:56:33 +0200 Subject: [PATCH 056/120] ConvertToYUV411: alias for ConvertToYV411. For naming consistency --- avs_core/convert/convert.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/avs_core/convert/convert.cpp b/avs_core/convert/convert.cpp index 6e721c739..796323e47 100644 --- a/avs_core/convert/convert.cpp +++ b/avs_core/convert/convert.cpp @@ -65,6 +65,7 @@ extern const AVSFunction Convert_filters[] = { // matrix can be "rec601", { "ConvertToYUY2", BUILTIN_FUNC_PREFIX, "c[interlaced]b[matrix]s[ChromaInPlacement]s[chromaresample]s", ConvertToYUY2::Create }, { "ConvertBackToYUY2", BUILTIN_FUNC_PREFIX, "c[matrix]s", ConvertBackToYUY2::Create }, { "ConvertToY", BUILTIN_FUNC_PREFIX, "c[matrix]s", ConvertToY8::Create }, + { "ConvertToYUV411", BUILTIN_FUNC_PREFIX, "c[interlaced]b[matrix]s[ChromaInPlacement]s[chromaresample]s", ConvertToPlanarGeneric::CreateYV411}, // alias for ConvertToYV411 { "ConvertToYUV420", BUILTIN_FUNC_PREFIX, "c[interlaced]b[matrix]s[ChromaInPlacement]s[chromaresample]s[ChromaOutPlacement]s", ConvertToPlanarGeneric::CreateYUV420}, { "ConvertToYUV422", BUILTIN_FUNC_PREFIX, "c[interlaced]b[matrix]s[ChromaInPlacement]s[chromaresample]s", ConvertToPlanarGeneric::CreateYUV422}, { "ConvertToYUV444", BUILTIN_FUNC_PREFIX, "c[interlaced]b[matrix]s[ChromaInPlacement]s[chromaresample]s", ConvertToPlanarGeneric::CreateYUV444}, From e780ae4b9bdc32cc207813bf1e30426af12e6072 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Sat, 10 Sep 2016 22:53:20 +0200 Subject: [PATCH 057/120] Convert: Planar RGB <> YUV: 10-12-14 bit range --- avs_core/convert/convert_planar.cpp | 40 +++++++++++++++++------------ 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/avs_core/convert/convert_planar.cpp b/avs_core/convert/convert_planar.cpp index 49202f626..bc93253c2 100644 --- a/avs_core/convert/convert_planar.cpp +++ b/avs_core/convert/convert_planar.cpp @@ -841,18 +841,18 @@ static void convert_rgb24_to_yv24_mmx(BYTE* dstY, BYTE* dstU, BYTE* dstV, const #endif -template +template static void convert_planarrgb_to_yuv_int_c(BYTE *(&dstp)[3], int (&dstPitch)[3], const BYTE *(&srcp)[3], const int (&srcPitch)[3], int width, int height, const ConversionMatrix &m) { - const pixel_t half = 1 << (8 * sizeof(pixel_t) - 1 ); + const pixel_t half = 1 << (bits_per_pixel - 1 ); typedef typename std::conditional < sizeof(pixel_t) == 1, int, __int64>::type sum_t; - const int limit = (1 << (8 * sizeof(pixel_t))) - 1; + const int limit = (1 << bits_per_pixel) - 1; for (int y = 0; y < height; y++) { for (int x = 0; x < width; x++) { pixel_t g = reinterpret_cast(srcp[0])[x]; pixel_t b = reinterpret_cast(srcp[1])[x]; pixel_t r = reinterpret_cast(srcp[2])[x]; - int Y = (sizeof(pixel_t)==1 ? m.offset_y : m.offset_y << 8) + (int)(((sum_t)m.y_b * b + (sum_t)m.y_g * g + (sum_t)m.y_r * r + 16384)>>15); + int Y = (sizeof(pixel_t)==1 ? m.offset_y : m.offset_y << (bits_per_pixel - 8)) + (int)(((sum_t)m.y_b * b + (sum_t)m.y_g * g + (sum_t)m.y_r * r + 16384)>>15); int U = half + (int)(((sum_t)m.u_b * b + (sum_t)m.u_g * g + (sum_t)m.u_r * r + 16384) >> 15); int V = half + (int)(((sum_t)m.v_b * b + (sum_t)m.v_g * g + (sum_t)m.v_r * r + 16384) >> 15); reinterpret_cast(dstp[0])[x] = (pixel_t)clamp(Y, 0, limit); @@ -997,6 +997,7 @@ PVideoFrame __stdcall ConvertRGBToYV24::GetFrame(int n, IScriptEnvironment* env) env->BitBlt(dstA, Apitch, src->GetReadPtr(PLANAR_A), src->GetPitch(PLANAR_A), src->GetRowSize(PLANAR_A_ALIGNED), src->GetHeight(PLANAR_A)); } int pixelsize = vi.ComponentSize(); + int bits_per_pixel = vi.BitsPerComponent(); const BYTE *srcp[3] = { src->GetReadPtr(PLANAR_G), src->GetReadPtr(PLANAR_B), src->GetReadPtr(PLANAR_R) }; const int srcPitch[3] = { src->GetPitch(PLANAR_G), src->GetPitch(PLANAR_B), src->GetPitch(PLANAR_R) }; @@ -1004,12 +1005,14 @@ PVideoFrame __stdcall ConvertRGBToYV24::GetFrame(int n, IScriptEnvironment* env) BYTE *dstp[3] = { dstY, dstU, dstV }; int dstPitch[3] = { Ypitch, UVpitch, UVpitch }; - if(pixelsize==1) - convert_planarrgb_to_yuv_int_c(dstp, dstPitch, srcp, srcPitch, vi.width, vi.height, matrix); - else if (pixelsize==2) - convert_planarrgb_to_yuv_int_c(dstp, dstPitch, srcp, srcPitch, vi.width, vi.height, matrix); - else // float - convert_planarrgb_to_yuv_float_c(dstp, dstPitch, srcp, srcPitch, vi.width, vi.height, matrix); + switch(bits_per_pixel) { + case 8: convert_planarrgb_to_yuv_int_c(dstp, dstPitch, srcp, srcPitch, vi.width, vi.height, matrix); break; + case 10: convert_planarrgb_to_yuv_int_c(dstp, dstPitch, srcp, srcPitch, vi.width, vi.height, matrix); break; + case 12: convert_planarrgb_to_yuv_int_c(dstp, dstPitch, srcp, srcPitch, vi.width, vi.height, matrix); break; + case 14: convert_planarrgb_to_yuv_int_c(dstp, dstPitch, srcp, srcPitch, vi.width, vi.height, matrix); break; + case 16: convert_planarrgb_to_yuv_int_c(dstp, dstPitch, srcp, srcPitch, vi.width, vi.height, matrix); break; + case 32: convert_planarrgb_to_yuv_float_c(dstp, dstPitch, srcp, srcPitch, vi.width, vi.height, matrix); break; + } } return dst; } @@ -1605,21 +1608,24 @@ PVideoFrame __stdcall ConvertYUV444ToRGB::GetFrame(int n, IScriptEnvironment* en srcV += src_pitch_uv; } } else if (pixelsize==2) { + int bits_per_pixel = vi.BitsPerComponent(); + int half_pixel_value = 1 << (bits_per_pixel - 1); + int max_pixel_value = (1 << bits_per_pixel) - 1; for (int y = 0; y < vi.height; y++) { for (int x = 0; x < vi.width; x++) { - int Y = reinterpret_cast(srcY)[x] + (matrix.offset_y << 8); - int U = reinterpret_cast(srcU)[x] - 32768; - int V = reinterpret_cast(srcV)[x] - 32768; + int Y = reinterpret_cast(srcY)[x] + (matrix.offset_y << (bits_per_pixel - 8)); + int U = reinterpret_cast(srcU)[x] - half_pixel_value; + int V = reinterpret_cast(srcV)[x] - half_pixel_value; int A; if(targetHasAlpha) - A = srcHasAlpha ? reinterpret_cast(srcA)[x] : 65535; + A = srcHasAlpha ? reinterpret_cast(srcA)[x] : max_pixel_value; // __int64 needed for 16 bit pixels int b = (((__int64)matrix.y_b * Y + (__int64)matrix.u_b * U + (__int64)matrix.v_b * V + 4096)>>13); int g = (((__int64)matrix.y_g * Y + (__int64)matrix.u_g * U + (__int64)matrix.v_g * V + 4096)>>13); int r = (((__int64)matrix.y_r * Y + (__int64)matrix.u_r * U + (__int64)matrix.v_r * V + 4096)>>13); - reinterpret_cast(dstpB)[x] = clamp(b,0,65535); // All the safety we can wish for. - reinterpret_cast(dstpG)[x] = clamp(g,0,65535); // Probably needed here. - reinterpret_cast(dstpR)[x] = clamp(r,0,65535); + reinterpret_cast(dstpB)[x] = clamp(b,0,max_pixel_value); // All the safety we can wish for. + reinterpret_cast(dstpG)[x] = clamp(g,0,max_pixel_value); // Probably needed here. + reinterpret_cast(dstpR)[x] = clamp(r,0,max_pixel_value); if(targetHasAlpha) reinterpret_cast(dstpA)[x] = A; } From 808d9c9d2fa40c225c58da479c9bd45e3dab081d Mon Sep 17 00:00:00 2001 From: Pinterf Date: Sat, 10 Sep 2016 22:58:32 +0200 Subject: [PATCH 058/120] StackVertical: don't reverse frame order for planar RGBs --- avs_core/filters/combine.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/avs_core/filters/combine.cpp b/avs_core/filters/combine.cpp index a998043b6..52e9fbc8f 100644 --- a/avs_core/filters/combine.cpp +++ b/avs_core/filters/combine.cpp @@ -87,7 +87,7 @@ StackVertical::StackVertical(const std::vector& child_array, IScriptEnvir } // reverse the order of the clips in RGB mode because it's upside-down - if (vi.IsRGB()) + if (vi.IsRGB() && !vi.IsPlanarRGB() && !vi.IsPlanarRGBA()) std::reverse(children.begin(), children.end()); } From 8211308f827c9a887cc5ebe51bceee38b29b4ca1 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Sat, 10 Sep 2016 23:03:25 +0200 Subject: [PATCH 059/120] General functions for pixel_type<->pixel_type_name, aliases also accepted --- avs_core/core/internal.h | 2 + avs_core/core/parser/script.cpp | 171 ++++++++++++++++++++++---------- 2 files changed, 121 insertions(+), 52 deletions(-) diff --git a/avs_core/core/internal.h b/avs_core/core/internal.h index 442a9dfb2..5a0a0a23d 100644 --- a/avs_core/core/internal.h +++ b/avs_core/core/internal.h @@ -93,6 +93,8 @@ class AVSFunction { int RGB2YUV(int rgb); +const char *GetPixelTypeName(const int pixel_type); // in script.c +const int GetPixelTypeFromName(const char *pixeltypename); // in script.c PClip Create_MessageClip(const char* message, int width, int height, int pixel_type, bool shrink, int textcolor, int halocolor, int bgcolor, diff --git a/avs_core/core/parser/script.cpp b/avs_core/core/parser/script.cpp index 1fac8efa0..e2e86a486 100644 --- a/avs_core/core/parser/script.cpp +++ b/avs_core/core/parser/script.cpp @@ -47,6 +47,8 @@ #include "../internal.h" #include "../Prefetcher.h" #include "../InternalEnvironment.h" +#include + /******************************************************************** @@ -821,59 +823,124 @@ AVSValue Spline(AVSValue args, void*, IScriptEnvironment* env ) static inline const VideoInfo& VI(const AVSValue& arg) { return arg.AsClip()->GetVideoInfo(); } -AVSValue PixelType (AVSValue args, void*, IScriptEnvironment* env) { - switch (VI(args[0]).pixel_type) { - case VideoInfo::CS_BGR24 : - return "RGB24"; - case VideoInfo::CS_BGR32 : - return "RGB32"; - case VideoInfo::CS_YUY2 : - return "YUY2"; - case VideoInfo::CS_YV24 : - return "YV24"; - case VideoInfo::CS_YV16 : - return "YV16"; - case VideoInfo::CS_YV12 : - case VideoInfo::CS_I420 : - return "YV12"; - case VideoInfo::CS_YUV9 : - return "YUV9"; - case VideoInfo::CS_YV411 : - return "YV411"; - case VideoInfo::CS_Y8 : - return "Y8"; - case VideoInfo::CS_YUV420P10 : return "YUV420P10"; - case VideoInfo::CS_YUV422P10 : return "YUV422P10"; - case VideoInfo::CS_YUV444P10 : return "YUV444P10"; - case VideoInfo::CS_Y10 : return "Y10"; - case VideoInfo::CS_YUV420P12 : return "YUV420P12"; - case VideoInfo::CS_YUV422P12 : return "YUV422P12"; - case VideoInfo::CS_YUV444P12 : return "YUV444P12"; - case VideoInfo::CS_Y12 : return "Y12"; - case VideoInfo::CS_YUV420P14 : return "YUV420P14"; - case VideoInfo::CS_YUV422P14 : return "YUV422P14"; - case VideoInfo::CS_YUV444P14 : return "YUV444P14"; - case VideoInfo::CS_Y14 : return "Y14"; - case VideoInfo::CS_YUV420P16 : return "YUV420P16"; - case VideoInfo::CS_YUV422P16 : return "YUV422P16"; - case VideoInfo::CS_YUV444P16 : return "YUV444P16"; - case VideoInfo::CS_Y16 : return "Y16"; - case VideoInfo::CS_YUV420PS : return "YUV420PS"; - case VideoInfo::CS_YUV422PS : return "YUV422PS"; - case VideoInfo::CS_YUV444PS : return "YUV444PS"; - case VideoInfo::CS_Y32 : return "Y32"; - case VideoInfo::CS_BGR48 : return "RGB48"; - case VideoInfo::CS_BGR64 : return "RGB64"; - case VideoInfo::CS_RGBP : return "RGBP"; - case VideoInfo::CS_RGBP10 : return "RGBP10"; - case VideoInfo::CS_RGBP12 : return "RGBP12"; - case VideoInfo::CS_RGBP14 : return "RGBP14"; - case VideoInfo::CS_RGBP16 : return "RGBP16"; - case VideoInfo::CS_RGBPS : return "RGBPS"; - default: - break; +static const std::map pixel_format_table = +{ // names for lookup by pixel_type or name + {VideoInfo::CS_BGR24, "RGB24"}, + {VideoInfo::CS_BGR32, "RGB32"}, + {VideoInfo::CS_YUY2 , "YUY2"}, + {VideoInfo::CS_YV24 , "YV24"}, + {VideoInfo::CS_YV16 , "YV16"}, + {VideoInfo::CS_YV12 , "YV12"}, + {VideoInfo::CS_I420 , "YV12"}, + {VideoInfo::CS_YUV9 , "YUV9"}, + {VideoInfo::CS_YV411, "YV411"}, + {VideoInfo::CS_Y8 , "Y8"}, + + {VideoInfo::CS_YUV420P10, "YUV420P10"}, + {VideoInfo::CS_YUV422P10, "YUV422P10"}, + {VideoInfo::CS_YUV444P10, "YUV444P10"}, + {VideoInfo::CS_Y10 , "Y10"}, + {VideoInfo::CS_YUV420P12, "YUV420P12"}, + {VideoInfo::CS_YUV422P12, "YUV422P12"}, + {VideoInfo::CS_YUV444P12, "YUV444P12"}, + {VideoInfo::CS_Y12 , "Y12"}, + {VideoInfo::CS_YUV420P14, "YUV420P14"}, + {VideoInfo::CS_YUV422P14, "YUV422P14"}, + {VideoInfo::CS_YUV444P14, "YUV444P14"}, + {VideoInfo::CS_Y14 , "Y14"}, + {VideoInfo::CS_YUV420P16, "YUV420P16"}, + {VideoInfo::CS_YUV422P16, "YUV422P16"}, + {VideoInfo::CS_YUV444P16, "YUV444P16"}, + {VideoInfo::CS_Y16 , "Y16"}, + {VideoInfo::CS_YUV420PS , "YUV420PS"}, + {VideoInfo::CS_YUV422PS , "YUV422PS"}, + {VideoInfo::CS_YUV444PS , "YUV444PS"}, + {VideoInfo::CS_Y32 , "Y32"}, + + {VideoInfo::CS_BGR48 , "RGB48"}, + {VideoInfo::CS_BGR64 , "RGB64"}, + + {VideoInfo::CS_RGBP , "RGBP"}, + {VideoInfo::CS_RGBP10 , "RGBP10"}, + {VideoInfo::CS_RGBP12 , "RGBP12"}, + {VideoInfo::CS_RGBP14 , "RGBP14"}, + {VideoInfo::CS_RGBP16 , "RGBP16"}, + {VideoInfo::CS_RGBPS , "RGBPS"}, + + {VideoInfo::CS_YUVA420, "YUVA420"}, + {VideoInfo::CS_YUVA422, "YUVA422"}, + {VideoInfo::CS_YUVA444, "YUVA444"}, + {VideoInfo::CS_YUVA420P10, "YUVA420P10"}, + {VideoInfo::CS_YUVA422P10, "YUVA422P10"}, + {VideoInfo::CS_YUVA444P10, "YUVA444P10"}, + {VideoInfo::CS_YUVA420P12, "YUVA420P12"}, + {VideoInfo::CS_YUVA422P12, "YUVA422P12"}, + {VideoInfo::CS_YUVA444P12, "YUVA444P12"}, + {VideoInfo::CS_YUVA420P14, "YUVA420P14"}, + {VideoInfo::CS_YUVA422P14, "YUVA422P14"}, + {VideoInfo::CS_YUVA444P14, "YUVA444P14"}, + {VideoInfo::CS_YUVA420P16, "YUVA420P16"}, + {VideoInfo::CS_YUVA422P16, "YUVA422P16"}, + {VideoInfo::CS_YUVA444P16, "YUVA444P16"}, + {VideoInfo::CS_YUVA420PS , "YUVA420PS"}, + {VideoInfo::CS_YUVA422PS , "YUVA422PS"}, + {VideoInfo::CS_YUVA444PS , "YUVA444PS"}, + + {VideoInfo::CS_RGBAP , "RGBAP"}, + {VideoInfo::CS_RGBAP10 , "RGBAP10"}, + {VideoInfo::CS_RGBAP12 , "RGBAP12"}, + {VideoInfo::CS_RGBAP14 , "RGBAP14"}, + {VideoInfo::CS_RGBAP16 , "RGBAP16"}, + {VideoInfo::CS_RGBAPS , "RGBAPS"}, +}; + +static const std::multimap pixel_format_table_ex = +{ // alternative names for lookup by name (multimap!) + {VideoInfo::CS_YV24 , "YUV444"}, + {VideoInfo::CS_YV16 , "YUV422"}, + {VideoInfo::CS_YV12 , "YUV420"}, + {VideoInfo::CS_YV411, "YUV411"}, + {VideoInfo::CS_RGBP , "RGBP8"}, + {VideoInfo::CS_RGBAP, "RGBAP8"}, + {VideoInfo::CS_YV24 , "YUV444P8"}, + {VideoInfo::CS_YV16 , "YUV422P8"}, + {VideoInfo::CS_YV12 , "YUV420P8"}, + {VideoInfo::CS_YV411, "YUV411P8"}, + {VideoInfo::CS_YUVA420, "YUVA420P8"}, + {VideoInfo::CS_YUVA422, "YUVA422P8"}, + {VideoInfo::CS_YUVA444, "YUVA444P8"}, +}; + +const char *GetPixelTypeName(const int pixel_type) +{ + const std::string name = ""; + auto it = pixel_format_table.find(pixel_type); + if (it == pixel_format_table.end()) + return ""; + return (it->second).c_str(); +} + +const int GetPixelTypeFromName(const char *pixeltypename) +{ + std::string name_to_find = pixeltypename; + for (auto & c: name_to_find) c = toupper(c); // uppercase input string + for (auto it = pixel_format_table.begin(); it != pixel_format_table.end(); it++) + { + if ((it->second).compare(name_to_find) == 0) + return it->first; } - return ""; + // find by alternative names e.g. YUV420 or YUV420P8 instead of YV12 + for (auto it = pixel_format_table_ex.begin(); it != pixel_format_table_ex.end(); it++) + { + if ((it->second).compare(name_to_find) == 0) + return it->first; + } + return VideoInfo::CS_UNKNOWN; +} + + +AVSValue PixelType (AVSValue args, void*, IScriptEnvironment* env) { + return GetPixelTypeName(VI(args[0]).pixel_type); } AVSValue Width(AVSValue args, void*, IScriptEnvironment* env) { return VI(args[0]).width; } From f55c7d4625d5f06b531cb1c359d169d931640abd Mon Sep 17 00:00:00 2001 From: Pinterf Date: Sat, 10 Sep 2016 23:05:10 +0200 Subject: [PATCH 060/120] ShowChannel() to use GetPixelTypeFromName() --- avs_core/filters/layer.cpp | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/avs_core/filters/layer.cpp b/avs_core/filters/layer.cpp index b919e2eb1..67f5cb9f8 100644 --- a/avs_core/filters/layer.cpp +++ b/avs_core/filters/layer.cpp @@ -951,7 +951,36 @@ ShowChannel::ShowChannel(PClip _child, const char * pixel_type, int _channel, IS default: env->ThrowError("Show%s: source must be 8 or 16 bit", ShowText[channel]); } target_pixelsize = pixelsize; + } else { + int new_pixel_type = GetPixelTypeFromName(pixel_type); + if(new_pixel_type == VideoInfo::CS_UNKNOWN) + env->ThrowError("Show%s: invalid pixel_type!", ShowText[channel]); + vi.pixel_type = new_pixel_type; + if(vi.IsPlanarRGB() || vi.IsPlanarRGBA() || (vi.BitsPerComponent() !=8 && vi.BitsPerComponent() != 16) || vi.IsYUVA()) + env->ThrowError("Show%s supports the following output pixel types: RGB, Y8, Y16, YUY2, or 8/16 bit YUV formats", ShowText[channel]); + if (new_pixel_type == VideoInfo::CS_YUY2) { + if (vi.width & 1) { + env->ThrowError("Show%s: width must be mod 2 for yuy2", ShowText[channel]); + } + } + if (vi.Is420()) { + if (vi.width & 1) { + env->ThrowError("Show%s: width must be mod 2 for 4:2:0 source", ShowText[channel]); + } + if (vi.height & 1) { + env->ThrowError("Show%s: height must be mod 2 for 4:2:0 source", ShowText[channel]); + } + } + if(vi.Is422()) { + if (vi.width & 1) { + env->ThrowError("Show%s: width must be mod 2 for 4:2:2 source", ShowText[channel]); + } + } + + target_pixelsize = vi.ComponentSize(); } + +#if 0 else if (!lstrcmpi(pixel_type, "rgb32")) { target_pixelsize = 1; vi.pixel_type = VideoInfo::CS_BGR32; @@ -1028,6 +1057,7 @@ ShowChannel::ShowChannel(PClip _child, const char * pixel_type, int _channel, IS else { env->ThrowError("Show%s supports the following output pixel types: RGB, Y8, Y16, YUY2, or 8/16 bit YUV formats", ShowText[channel]); } +#endif if(target_pixelsize != pixelsize) env->ThrowError("Show%s: source must be %d bit for %s", ShowText[channel], target_pixelsize*8, pixel_type); } From 1827ec4dab90821a6e9a0e288cb67f4b43cd5492 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Sat, 10 Sep 2016 23:08:18 +0200 Subject: [PATCH 061/120] BlankClip and Colorbars to use GetPixelTypeFromName() --- avs_core/filters/source.cpp | 70 ++----------------------------------- 1 file changed, 2 insertions(+), 68 deletions(-) diff --git a/avs_core/filters/source.cpp b/avs_core/filters/source.cpp index 6ece0e38f..cb11385d7 100644 --- a/avs_core/filters/source.cpp +++ b/avs_core/filters/source.cpp @@ -195,72 +195,6 @@ static PVideoFrame CreateBlankFrame(const VideoInfo& vi, int color, int mode, IS return frame; } -static int PixelTypeFromName(const char *pixel_type_string) { - if (!lstrcmpi(pixel_type_string, "YUY2")) return VideoInfo::CS_YUY2; - else if (!lstrcmpi(pixel_type_string, "YV12")) return VideoInfo::CS_YV12; - else if (!lstrcmpi(pixel_type_string, "YV24")) return VideoInfo::CS_YV24; - else if (!lstrcmpi(pixel_type_string, "YV16")) return VideoInfo::CS_YV16; - else if (!lstrcmpi(pixel_type_string, "Y8")) return VideoInfo::CS_Y8; - else if (!lstrcmpi(pixel_type_string, "YV411")) return VideoInfo::CS_YV411; - else if (!lstrcmpi(pixel_type_string, "RGB24")) return VideoInfo::CS_BGR24; - else if (!lstrcmpi(pixel_type_string, "RGB32")) return VideoInfo::CS_BGR32; - else if (!lstrcmpi(pixel_type_string, "YUV420P10")) return VideoInfo::CS_YUV420P10; - else if (!lstrcmpi(pixel_type_string, "YUV422P10")) return VideoInfo::CS_YUV422P10; - else if (!lstrcmpi(pixel_type_string, "YUV444P10")) return VideoInfo::CS_YUV444P10; - else if (!lstrcmpi(pixel_type_string, "Y10")) return VideoInfo::CS_Y10; - else if (!lstrcmpi(pixel_type_string, "YUV420P12")) return VideoInfo::CS_YUV420P12; - else if (!lstrcmpi(pixel_type_string, "YUV422P12")) return VideoInfo::CS_YUV422P12; - else if (!lstrcmpi(pixel_type_string, "YUV444P12")) return VideoInfo::CS_YUV444P12; - else if (!lstrcmpi(pixel_type_string, "Y12")) return VideoInfo::CS_Y12; - else if (!lstrcmpi(pixel_type_string, "YUV420P14")) return VideoInfo::CS_YUV420P14; - else if (!lstrcmpi(pixel_type_string, "YUV422P14")) return VideoInfo::CS_YUV422P14; - else if (!lstrcmpi(pixel_type_string, "YUV444P14")) return VideoInfo::CS_YUV444P14; - else if (!lstrcmpi(pixel_type_string, "Y14")) return VideoInfo::CS_Y14; - else if (!lstrcmpi(pixel_type_string, "YUV420P16")) return VideoInfo::CS_YUV420P16; - else if (!lstrcmpi(pixel_type_string, "YUV422P16")) return VideoInfo::CS_YUV422P16; - else if (!lstrcmpi(pixel_type_string, "YUV444P16")) return VideoInfo::CS_YUV444P16; - else if (!lstrcmpi(pixel_type_string, "Y16")) return VideoInfo::CS_Y16; - else if (!lstrcmpi(pixel_type_string, "YUV420PS")) return VideoInfo::CS_YUV420PS; - else if (!lstrcmpi(pixel_type_string, "YUV422PS")) return VideoInfo::CS_YUV422PS; - else if (!lstrcmpi(pixel_type_string, "YUV444PS")) return VideoInfo::CS_YUV444PS; - else if (!lstrcmpi(pixel_type_string, "Y32")) return VideoInfo::CS_Y32; - else if (!lstrcmpi(pixel_type_string, "RGB48")) return VideoInfo::CS_BGR48; - else if (!lstrcmpi(pixel_type_string, "RGB64")) return VideoInfo::CS_BGR64; - else if (!lstrcmpi(pixel_type_string, "RGBP")) return VideoInfo::CS_RGBP; - else if (!lstrcmpi(pixel_type_string, "RGBP10")) return VideoInfo::CS_RGBP10; - else if (!lstrcmpi(pixel_type_string, "RGBP12")) return VideoInfo::CS_RGBP12; - else if (!lstrcmpi(pixel_type_string, "RGBP14")) return VideoInfo::CS_RGBP14; - else if (!lstrcmpi(pixel_type_string, "RGBP16")) return VideoInfo::CS_RGBP16; - else if (!lstrcmpi(pixel_type_string, "RGBPS")) return VideoInfo::CS_RGBPS; - else if (!lstrcmpi(pixel_type_string, "RGBAP")) return VideoInfo::CS_RGBAP; - else if (!lstrcmpi(pixel_type_string, "RGBAP10")) return VideoInfo::CS_RGBAP10; - else if (!lstrcmpi(pixel_type_string, "RGBAP12")) return VideoInfo::CS_RGBAP12; - else if (!lstrcmpi(pixel_type_string, "RGBAP14")) return VideoInfo::CS_RGBAP14; - else if (!lstrcmpi(pixel_type_string, "RGBAP16")) return VideoInfo::CS_RGBAP16; - else if (!lstrcmpi(pixel_type_string, "RGBAPS")) return VideoInfo::CS_RGBAPS; - else if (!lstrcmpi(pixel_type_string, "YUVA420")) return VideoInfo::CS_YUVA420; - else if (!lstrcmpi(pixel_type_string, "YUVA420P10")) return VideoInfo::CS_YUVA420P10; - else if (!lstrcmpi(pixel_type_string, "YUVA420P12")) return VideoInfo::CS_YUVA420P12; - else if (!lstrcmpi(pixel_type_string, "YUVA420P14")) return VideoInfo::CS_YUVA420P14; - else if (!lstrcmpi(pixel_type_string, "YUVA420P16")) return VideoInfo::CS_YUVA420P16; - else if (!lstrcmpi(pixel_type_string, "YUVA420PS")) return VideoInfo::CS_YUVA420PS; - else if (!lstrcmpi(pixel_type_string, "YUVA422")) return VideoInfo::CS_YUVA422; - else if (!lstrcmpi(pixel_type_string, "YUVA422P10")) return VideoInfo::CS_YUVA422P10; - else if (!lstrcmpi(pixel_type_string, "YUVA422P12")) return VideoInfo::CS_YUVA422P12; - else if (!lstrcmpi(pixel_type_string, "YUVA422P14")) return VideoInfo::CS_YUVA422P14; - else if (!lstrcmpi(pixel_type_string, "YUVA422P16")) return VideoInfo::CS_YUVA422P16; - else if (!lstrcmpi(pixel_type_string, "YUVA422PS")) return VideoInfo::CS_YUVA422PS; - else if (!lstrcmpi(pixel_type_string, "YUVA444")) return VideoInfo::CS_YUVA444; - else if (!lstrcmpi(pixel_type_string, "YUVA444P10")) return VideoInfo::CS_YUVA444P10; - else if (!lstrcmpi(pixel_type_string, "YUVA444P12")) return VideoInfo::CS_YUVA444P12; - else if (!lstrcmpi(pixel_type_string, "YUVA444P14")) return VideoInfo::CS_YUVA444P14; - else if (!lstrcmpi(pixel_type_string, "YUVA444P16")) return VideoInfo::CS_YUVA444P16; - else if (!lstrcmpi(pixel_type_string, "YUVA444PS")) return VideoInfo::CS_YUVA444PS; - else { - return VideoInfo::CS_UNKNOWN; - } -} - static AVSValue __cdecl Create_BlankClip(AVSValue args, void*, IScriptEnvironment* env) { VideoInfo vi_default; @@ -325,7 +259,7 @@ static AVSValue __cdecl Create_BlankClip(AVSValue args, void*, IScriptEnvironmen vi.height = args[3].AsInt(vi_default.height); if (args[4].Defined()) { - int pixel_type = PixelTypeFromName(args[4].AsString()); + int pixel_type = GetPixelTypeFromName(args[4].AsString()); if(pixel_type == VideoInfo::CS_UNKNOWN) { env->ThrowError("BlankClip: pixel_type must be \"RGB32\", \"RGB24\", \"YV12\", \"YV24\", \"YV16\", \"Y8\", \n"\ @@ -489,7 +423,7 @@ class ColorBars : public IClip { vi.fps_numerator = 30000; vi.fps_denominator = 1001; vi.num_frames = 107892; // 1 hour - int i_pixel_type = PixelTypeFromName(pixel_type); + int i_pixel_type = GetPixelTypeFromName(pixel_type); if (type) { // ColorbarsHD if (i_pixel_type != VideoInfo::CS_YV24) From 712f3424ab0159407d1cd29524b92be3ab836bf0 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Sat, 10 Sep 2016 23:10:36 +0200 Subject: [PATCH 062/120] FilterInfo to use GetPixelTypeName() --- avs_core/filters/text-overlay.cpp | 134 +----------------------------- 1 file changed, 3 insertions(+), 131 deletions(-) diff --git a/avs_core/filters/text-overlay.cpp b/avs_core/filters/text-overlay.cpp index 82f6b620d..1c40824d8 100644 --- a/avs_core/filters/text-overlay.cpp +++ b/avs_core/filters/text-overlay.cpp @@ -1224,72 +1224,6 @@ const VideoInfo& FilterInfo::AdjustVi() } -const char* const t_YV12="YV12"; -const char* const t_YUY2="YUY2"; -const char* const t_RGB32="RGB32"; -const char* const t_RGB24="RGB24"; -const char* const t_YV24="YV24"; -const char* const t_Y8="Y8"; -const char* const t_YV16="YV16"; -const char* const t_Y41P="YUV 411 Planar"; - -const char* const t_YUV420P10="YUV420P10"; -const char* const t_YUV422P10="YUV422P10"; -const char* const t_YUV444P10="YUV444P10"; -const char* const t_Y10="Y10"; -const char* const t_YUV420P12="YUV420P12"; -const char* const t_YUV422P12="YUV422P12"; -const char* const t_YUV444P12="YUV444P12"; -const char* const t_Y12="Y12"; -const char* const t_YUV420P14="YUV420P14"; -const char* const t_YUV422P14="YUV422P14"; -const char* const t_YUV444P14="YUV444P14"; -const char* const t_Y14="Y14"; -const char* const t_YUV420P16="YUV420P16"; -const char* const t_YUV422P16="YUV422P16"; -const char* const t_YUV444P16="YUV444P16"; -const char* const t_Y16="Y16"; -const char* const t_YUV420PS="YUV420PS"; -const char* const t_YUV422PS="YUV422PS"; -const char* const t_YUV444PS="YUV444PS"; -const char* const t_Y32="Y32"; - -const char* const t_YUVA420="YUVA420"; -const char* const t_YUVA422="YUVA422"; -const char* const t_YUVA444="YUVA444"; -const char* const t_YUVA420P10="YUVA420P10"; -const char* const t_YUVA422P10="YUVA422P10"; -const char* const t_YUVA444P10="YUVA444P10"; -const char* const t_YUVA420P12="YUVA420P12"; -const char* const t_YUVA422P12="YUVA422P12"; -const char* const t_YUVA444P12="YUVA444P12"; -const char* const t_YUVA420P14="YUVA420P14"; -const char* const t_YUVA422P14="YUVA422P14"; -const char* const t_YUVA444P14="YUVA444P14"; -const char* const t_YUVA420P16="YUVA420P16"; -const char* const t_YUVA422P16="YUVA422P16"; -const char* const t_YUVA444P16="YUVA444P16"; -const char* const t_YUVA420PS="YUVA420PS"; -const char* const t_YUVA422PS="YUVA422PS"; -const char* const t_YUVA444PS="YUVA444PS"; - -const char* const t_RGB48="RGB48"; -const char* const t_RGB64="RGB64"; - -const char* const t_RGBP="RGBP"; -const char* const t_RGBP10="RGBP10"; -const char* const t_RGBP12="RGBP12"; -const char* const t_RGBP14="RGBP14"; -const char* const t_RGBP16="RGBP16"; -const char* const t_RGBPS="RGBPS"; - -const char* const t_RGBAP="RGBAP"; -const char* const t_RGBAP10="RGBAP10"; -const char* const t_RGBAP12="RGBAP12"; -const char* const t_RGBAP14="RGBAP14"; -const char* const t_RGBAP16="RGBAP16"; -const char* const t_RGBAPS="RGBAPS"; - const char* const t_INT8="Integer 8 bit"; const char* const t_INT16="Integer 16 bit"; const char* const t_INT24="Integer 24 bit"; @@ -1367,71 +1301,9 @@ PVideoFrame FilterInfo::GetFrame(int n, IScriptEnvironment* env) int tlen; if (vii.HasVideo()) { - if (vii.IsRGB24()) c_space=t_RGB24; - else if (vii.IsRGB32()) c_space=t_RGB32; - else if (vii.IsYV12()) c_space=t_YV12; - else if (vii.IsYUY2()) c_space=t_YUY2; - else if (vii.IsYV24()) c_space=t_YV24; - else if (vii.IsY8()) c_space=t_Y8; - else if (vii.IsYV16()) c_space=t_YV16; - else if (vii.IsYV411()) c_space=t_Y41P; - else if (vii.IsColorSpace(VideoInfo::CS_YUV420P10)) c_space=t_YUV420P10; - else if (vii.IsColorSpace(VideoInfo::CS_YUV422P10)) c_space=t_YUV422P10; - else if (vii.IsColorSpace(VideoInfo::CS_YUV444P10)) c_space=t_YUV444P10; - else if (vii.IsColorSpace(VideoInfo::CS_Y10)) c_space=t_Y10; - else if (vii.IsColorSpace(VideoInfo::CS_YUV420P12)) c_space=t_YUV420P12; - else if (vii.IsColorSpace(VideoInfo::CS_YUV422P12)) c_space=t_YUV422P12; - else if (vii.IsColorSpace(VideoInfo::CS_YUV444P12)) c_space=t_YUV444P12; - else if (vii.IsColorSpace(VideoInfo::CS_Y12)) c_space=t_Y12; - else if (vii.IsColorSpace(VideoInfo::CS_YUV420P14)) c_space=t_YUV420P14; - else if (vii.IsColorSpace(VideoInfo::CS_YUV422P14)) c_space=t_YUV422P14; - else if (vii.IsColorSpace(VideoInfo::CS_YUV444P14)) c_space=t_YUV444P14; - else if (vii.IsColorSpace(VideoInfo::CS_Y14)) c_space=t_Y14; - else if (vii.IsColorSpace(VideoInfo::CS_YUV420P16)) c_space=t_YUV420P16; - else if (vii.IsColorSpace(VideoInfo::CS_YUV422P16)) c_space=t_YUV422P16; - else if (vii.IsColorSpace(VideoInfo::CS_YUV444P16)) c_space=t_YUV444P16; - else if (vii.IsColorSpace(VideoInfo::CS_Y16)) c_space=t_Y16; - else if (vii.IsColorSpace(VideoInfo::CS_YUV420PS)) c_space=t_YUV420PS; - else if (vii.IsColorSpace(VideoInfo::CS_YUV422PS)) c_space=t_YUV422PS; - else if (vii.IsColorSpace(VideoInfo::CS_YUV444PS)) c_space=t_YUV444PS; - else if (vii.IsColorSpace(VideoInfo::CS_Y32)) c_space=t_Y32; - - else if (vii.IsColorSpace(VideoInfo::CS_YUVA420)) c_space=t_YUVA420; - else if (vii.IsColorSpace(VideoInfo::CS_YUVA422)) c_space=t_YUVA422; - else if (vii.IsColorSpace(VideoInfo::CS_YUVA444)) c_space=t_YUVA444; - else if (vii.IsColorSpace(VideoInfo::CS_YUVA420P10)) c_space=t_YUVA420P10; - else if (vii.IsColorSpace(VideoInfo::CS_YUVA422P10)) c_space=t_YUVA422P10; - else if (vii.IsColorSpace(VideoInfo::CS_YUVA444P10)) c_space=t_YUVA444P10; - else if (vii.IsColorSpace(VideoInfo::CS_YUVA420P12)) c_space=t_YUVA420P12; - else if (vii.IsColorSpace(VideoInfo::CS_YUVA422P12)) c_space=t_YUVA422P12; - else if (vii.IsColorSpace(VideoInfo::CS_YUVA444P12)) c_space=t_YUVA444P12; - else if (vii.IsColorSpace(VideoInfo::CS_YUVA420P14)) c_space=t_YUVA420P14; - else if (vii.IsColorSpace(VideoInfo::CS_YUVA422P14)) c_space=t_YUVA422P14; - else if (vii.IsColorSpace(VideoInfo::CS_YUVA444P14)) c_space=t_YUVA444P14; - else if (vii.IsColorSpace(VideoInfo::CS_YUVA420P16)) c_space=t_YUVA420P16; - else if (vii.IsColorSpace(VideoInfo::CS_YUVA422P16)) c_space=t_YUVA422P16; - else if (vii.IsColorSpace(VideoInfo::CS_YUVA444P16)) c_space=t_YUVA444P16; - else if (vii.IsColorSpace(VideoInfo::CS_YUVA420PS)) c_space=t_YUVA420PS; - else if (vii.IsColorSpace(VideoInfo::CS_YUVA422PS)) c_space=t_YUVA422PS; - else if (vii.IsColorSpace(VideoInfo::CS_YUVA444PS)) c_space=t_YUVA444PS; - - else if (vii.IsColorSpace(VideoInfo::CS_BGR48)) c_space=t_RGB48; - else if (vii.IsColorSpace(VideoInfo::CS_BGR64)) c_space=t_RGB64; - - else if (vii.IsColorSpace(VideoInfo::CS_RGBP)) c_space=t_RGBP; - else if (vii.IsColorSpace(VideoInfo::CS_RGBP10)) c_space=t_RGBP10; - else if (vii.IsColorSpace(VideoInfo::CS_RGBP12)) c_space=t_RGBP12; - else if (vii.IsColorSpace(VideoInfo::CS_RGBP14)) c_space=t_RGBP14; - else if (vii.IsColorSpace(VideoInfo::CS_RGBP16)) c_space=t_RGBP16; - else if (vii.IsColorSpace(VideoInfo::CS_RGBPS)) c_space=t_RGBPS; - - else if (vii.IsColorSpace(VideoInfo::CS_RGBAP)) c_space=t_RGBAP; - else if (vii.IsColorSpace(VideoInfo::CS_RGBAP10)) c_space=t_RGBAP10; - else if (vii.IsColorSpace(VideoInfo::CS_RGBAP12)) c_space=t_RGBAP12; - else if (vii.IsColorSpace(VideoInfo::CS_RGBAP14)) c_space=t_RGBAP14; - else if (vii.IsColorSpace(VideoInfo::CS_RGBAP16)) c_space=t_RGBAP16; - else if (vii.IsColorSpace(VideoInfo::CS_RGBAPS)) c_space=t_RGBAPS; - + c_space = GetPixelTypeName(vii.pixel_type); + if (*c_space == '\0') + c_space = "Unknown"; if (vii.IsFieldBased()) { if (child->GetParity(n)) { s_parity = t_STFF; From b2216fda4e35466d85e2dd06c17a9ba1a693bc35 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Mon, 12 Sep 2016 18:26:17 +0200 Subject: [PATCH 063/120] frame->GetRowSize() and GetHeight() to return 0 if no Alpha plane (for BitBlt's exists-plane? decision) --- avs_core/core/avisynth_c.cpp | 13 ++++++++++++- avs_core/core/interface.cpp | 22 ++++++++++++++++++++-- 2 files changed, 32 insertions(+), 3 deletions(-) diff --git a/avs_core/core/avisynth_c.cpp b/avs_core/core/avisynth_c.cpp index 548851ec0..c8842131e 100644 --- a/avs_core/core/avisynth_c.cpp +++ b/avs_core/core/avisynth_c.cpp @@ -227,9 +227,18 @@ int AVSC_CC avs_get_row_size_p(const AVS_VideoFrame * p, int plane) return 0; case AVS_PLANAR_ALIGNED: case AVS_PLANAR_Y_ALIGNED: - case AVS_PLANAR_R_ALIGNED: case AVS_PLANAR_G_ALIGNED: case AVS_PLANAR_B_ALIGNED: case AVS_PLANAR_A_ALIGNED: + case AVS_PLANAR_R_ALIGNED: case AVS_PLANAR_G_ALIGNED: case AVS_PLANAR_B_ALIGNED: r = (p->row_size+FRAME_ALIGN-1)&(~(FRAME_ALIGN-1)); // Aligned rowsize return (r <= p->pitch) ? r : p->row_size; + case AVS_PLANAR_A: + return (p->pitchA) ? p->row_sizeA : 0; + case AVS_PLANAR_A_ALIGNED: + if (p->pitchA) { + r = (p->row_sizeA + FRAME_ALIGN - 1)&(~(FRAME_ALIGN - 1)); // Aligned rowsize + return (r <= p->pitchA) ? r : p->row_sizeA; + } + else + return 0; } return p->row_size; } @@ -240,6 +249,8 @@ int AVSC_CC avs_get_height_p(const AVS_VideoFrame * p, int plane) switch (plane) { case AVS_PLANAR_U: case AVS_PLANAR_V: return (p->pitchUV) ? p->heightUV : 0; + case AVS_PLANAR_A: + return (p->pitchA) ? p->height : 0; } return p->height; // Y, G, B, R, A } diff --git a/avs_core/core/interface.cpp b/avs_core/core/interface.cpp index 643abe576..2b6220f4b 100644 --- a/avs_core/core/interface.cpp +++ b/avs_core/core/interface.cpp @@ -473,16 +473,34 @@ int VideoFrame::GetRowSize(int plane) const { } else return 0; case PLANAR_ALIGNED: case PLANAR_Y_ALIGNED: - case PLANAR_R_ALIGNED: case PLANAR_G_ALIGNED: case PLANAR_B_ALIGNED: case PLANAR_A_ALIGNED: + case PLANAR_R_ALIGNED: case PLANAR_G_ALIGNED: case PLANAR_B_ALIGNED: + { const int r = (row_size+FRAME_ALIGN-1)&(~(FRAME_ALIGN-1)); // Aligned rowsize if (r<=pitch) return r; return row_size; } + case PLANAR_A: + if (pitchA) return row_sizeA; else return 0; + case PLANAR_A_ALIGNED: + if(pitchA) { + const int r = (row_sizeA+FRAME_ALIGN-1)&(~(FRAME_ALIGN-1)); // Aligned rowsize + if (r<=pitchA) + return r; + return row_sizeA; + } + else return 0; + } return row_size; // PLANAR_Y, PLANAR_G, PLANAR_B, PLANAR_R } -int VideoFrame::GetHeight(int plane) const { switch (plane) {case PLANAR_U: case PLANAR_V: if (pitchUV) return heightUV; return 0;} return height; } +int VideoFrame::GetHeight(int plane) const { + switch (plane) { + case PLANAR_U: case PLANAR_V: if (pitchUV) return heightUV; return 0; + case PLANAR_A: if (pitchA) return height; return 0; + } + return height; +} // Generally you should not be using these two VideoFrameBuffer* VideoFrame::GetFrameBuffer() const { return vfb; } From 8f0e2bfe0039348e553f132f0e9843e2b0056f8b Mon Sep 17 00:00:00 2001 From: Pinterf Date: Mon, 12 Sep 2016 18:27:24 +0200 Subject: [PATCH 064/120] Comment in order to not mislead me again --- avs_core/convert/convert.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/avs_core/convert/convert.cpp b/avs_core/convert/convert.cpp index 796323e47..152174fd7 100644 --- a/avs_core/convert/convert.cpp +++ b/avs_core/convert/convert.cpp @@ -681,7 +681,8 @@ AVSValue __cdecl ConvertToRGB::Create(AVSValue args, void* user_data, IScriptEnv ******* Convert to YV12 ****** *********************************/ - +// for YUY2->YV12 only +// all other sources use ConvertToPlanarGeneric ConvertToYV12::ConvertToYV12(PClip _child, bool _interlaced, IScriptEnvironment* env) : GenericVideoFilter(_child), interlaced(_interlaced) From 37173f18b3a8352214facd216141181cbe9c907f Mon Sep 17 00:00:00 2001 From: Pinterf Date: Mon, 12 Sep 2016 18:32:05 +0200 Subject: [PATCH 065/120] ConvertToY: make rgb matrix offset_y always 8 bit, conversion later --- avs_core/convert/convert_planar.cpp | 20 ++++++++++++-------- avs_core/convert/convert_planar.h | 2 +- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/avs_core/convert/convert_planar.cpp b/avs_core/convert/convert_planar.cpp index bc93253c2..14c1f9d6e 100644 --- a/avs_core/convert/convert_planar.cpp +++ b/avs_core/convert/convert_planar.cpp @@ -97,7 +97,7 @@ ConvertToY8::ConvertToY8(PClip src, int in_matrix, IScriptEnvironment* env) : Ge matrix.b_f = (float)((219.0/255.0)*0.114); //B matrix.g_f = (float)((219.0/255.0)*0.587); //G matrix.r_f = (float)((219.0/255.0)*0.299); //R - matrix.offset_y = pixelsize == 1 ? 16 : 16*256; + matrix.offset_y = 16; matrix.offset_y_f = 16.0f / 256.0f; } else if (in_matrix == PC_601) { matrix.b = (int16_t)(0.114*32768.0+0.5); //B @@ -115,7 +115,7 @@ ConvertToY8::ConvertToY8(PClip src, int in_matrix, IScriptEnvironment* env) : Ge matrix.b_f = (float)((219.0/255.0)*0.0722); //B matrix.g_f = (float)((219.0/255.0)*0.7152); //G matrix.r_f = (float)((219.0/255.0)*0.2126); //R - matrix.offset_y = pixelsize == 1 ? 16 : 16*256; + matrix.offset_y = 16; matrix.offset_y_f = 16.0f / 256.0f; } else if (in_matrix == PC_709) { matrix.b = (int16_t)(0.0722*32768.0+0.5); //B @@ -138,6 +138,9 @@ ConvertToY8::ConvertToY8(PClip src, int in_matrix, IScriptEnvironment* env) : Ge } else { env->ThrowError("ConvertToY: Unknown matrix."); } + // Anti-Overflow correction + if (matrix.g + matrix.r + matrix.b != 32768) + matrix.g = 32768 - (matrix.r + matrix.b); return; } @@ -446,13 +449,13 @@ PVideoFrame __stdcall ConvertToY8::GetFrame(int n, IScriptEnvironment* env) { } } else { // pixelsize==2 + int offset_y_bitsperpixel_corrected = matrix.offset_y << (vi.BitsPerComponent() - 8); for (int y=0; y(srcp); // int overflows! - // todo: does not overflow if matrix.g is converted to 32768 - (matrix.b + matrix.r!!!) (sum is not 32768!) - const int Y = matrix.offset_y + (int)(((__int64)(matrix.b * srcp16[0] + matrix.g * srcp16[1]) + (__int64)matrix.r * srcp16[2] + 16384) >> 15); - reinterpret_cast(dstp)[x] = clamp(Y,0,65535); // All the safety we can wish for. + const int Y = offset_y_bitsperpixel_corrected + (int)(((__int64)(matrix.b * srcp16[0] + matrix.g * srcp16[1]) + (__int64)matrix.r * srcp16[2] + 16384) >> 15); + reinterpret_cast(dstp)[x] = clamp(Y,0,65535); // All the safety we can wish for. packed RGB 65535 // __int64 version is a bit faster //const float Y = matrix.offset_y_f + (matrix.b_f * srcp16[0] + matrix.g_f * srcp16[1] + matrix.r_f * srcp16[2]); @@ -483,12 +486,13 @@ PVideoFrame __stdcall ConvertToY8::GetFrame(int n, IScriptEnvironment* env) { dstp += dst_pitch; } } else if(pixelsize==2) { + int offset_y_bitsperpixel_corrected = matrix.offset_y << (vi.BitsPerComponent() - 8); int max_pixel_value = (1 << vi.BitsPerComponent()) - 1; for (int y=0; y(srcpB)[x] + (__int64)matrix.g * reinterpret_cast(srcpG)[x] + (__int64)matrix.r * reinterpret_cast(srcpR)[x] + @@ -1476,7 +1480,7 @@ PVideoFrame __stdcall ConvertYUV444ToRGB::GetFrame(int n, IScriptEnvironment* en dstp += dst_pitch * (vi.height-1); // We start at last line. Not for Planar RGB bool srcHasAlpha = (src_pitch_a != 0); - if (pixel_step == 4) { + if (pixel_step == 4) { // RGB32 for (int y = 0; y < vi.height; y++) { for (int x = 0; x < vi.width; x++) { int Y = srcY[x] + matrix.offset_y; @@ -1497,7 +1501,7 @@ PVideoFrame __stdcall ConvertYUV444ToRGB::GetFrame(int n, IScriptEnvironment* en srcV += src_pitch_uv; srcA += src_pitch_a; } - } else if (pixel_step == 3) { + } else if (pixel_step == 3) { // RGB24 const int Dstep = dst_pitch + (vi.width * pixel_step); for (int y = 0; y < vi.height; y++) { for (int x = 0; x < vi.width; x++) { diff --git a/avs_core/convert/convert_planar.h b/avs_core/convert/convert_planar.h index ace585f66..06d17fab3 100644 --- a/avs_core/convert/convert_planar.h +++ b/avs_core/convert/convert_planar.h @@ -54,7 +54,7 @@ struct ChannelConversionMatrix { float r_f; // for float operation float g_f; float b_f; - int offset_y; // for 8 or 16 bit + int offset_y; // always 8 bit float offset_y_f; // for float }; From 53f46a50df2cc1e96ef103cc39cfde3d923f0852 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Mon, 12 Sep 2016 18:35:02 +0200 Subject: [PATCH 066/120] Memo why AVSValue will never get 64 bit types in Avisynth(32) --- avs_core/include/avisynth.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/avs_core/include/avisynth.h b/avs_core/include/avisynth.h index 7da99878b..13e613be1 100644 --- a/avs_core/include/avisynth.h +++ b/avs_core/include/avisynth.h @@ -1018,7 +1018,7 @@ class AVSValue { private: - short type; // 'a'rray, 'c'lip, 'b'ool, 'i'nt, 'f'loat, 's'tring, 'v'oid, or 'l'ong + short type; // 'a'rray, 'c'lip, 'b'ool, 'i'nt, 'f'loat, 's'tring, 'v'oid, or RFU: 'l'ong ('d'ouble) short array_size; union { IClip* clip; @@ -1027,7 +1027,11 @@ class AVSValue { float floating_pt; const char* string; const AVSValue* array; -// __int64 longlong; + #ifdef X86_64 + // if ever, only x64 will support. It breaks struct size on 32 bit + __int64 longlong; // 8 bytes + double double_pt // 8 bytes + #endif }; void Assign(const AVSValue* src, bool init); From 7a340644f7178ecb4660458c715b72e3bf474d67 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Mon, 12 Sep 2016 18:38:28 +0200 Subject: [PATCH 067/120] Overlay! Working "add" method for 16 bit input. Big changes, no cleanup yet. --- avs_core/filters/overlay/444convert.cpp | 309 ++++++++++- avs_core/filters/overlay/444convert.h | 39 +- avs_core/filters/overlay/OF_add.cpp | 154 ++++-- avs_core/filters/overlay/OF_blend.cpp | 23 + avs_core/filters/overlay/OF_darken.cpp | 23 + avs_core/filters/overlay/OF_difference.cpp | 23 + avs_core/filters/overlay/OF_exclusion.cpp | 23 + avs_core/filters/overlay/OF_lighten.cpp | 23 + avs_core/filters/overlay/OF_lumachroma.cpp | 41 ++ avs_core/filters/overlay/OF_multiply.cpp | 23 + avs_core/filters/overlay/OF_softhardlight.cpp | 42 ++ avs_core/filters/overlay/OF_subtract.cpp | 22 + avs_core/filters/overlay/imghelpers.h | 112 +++- avs_core/filters/overlay/overlay.cpp | 517 ++++++++++++++++-- avs_core/filters/overlay/overlay.h | 15 +- avs_core/filters/overlay/overlayfunctions.h | 56 +- 16 files changed, 1305 insertions(+), 140 deletions(-) diff --git a/avs_core/filters/overlay/444convert.cpp b/avs_core/filters/overlay/444convert.cpp index 6b6f44e49..064e7d42f 100644 --- a/avs_core/filters/overlay/444convert.cpp +++ b/avs_core/filters/overlay/444convert.cpp @@ -39,7 +39,6 @@ #include #include - //this isn't really faster than mmx static void convert_yv12_chroma_to_yv24_sse2(BYTE *dstp, const BYTE *srcp, int dst_pitch, int src_pitch, int src_width, int src_height) { int mod8_width = src_width / 8 * 8; @@ -97,7 +96,12 @@ static void convert_yv12_chroma_to_yv24_mmx(BYTE *dstp, const BYTE *srcp, int ds #endif // X86_32 -static void convert_yv12_chroma_to_yv24_c(BYTE *dstp, const BYTE *srcp, int dst_pitch, int src_pitch, int src_width, int src_height) { +template +static void convert_yv12_chroma_to_yv24_c(BYTE *dstp8, const BYTE *srcp8, int dst_pitch, int src_pitch, int src_width, int src_height) { + pixel_t *dstp = reinterpret_cast(dstp8); + const pixel_t *srcp = reinterpret_cast(srcp8); + dst_pitch /= sizeof(pixel_t); + src_pitch /= sizeof(pixel_t); for (int y = 0; y < src_height; ++y) { for (int x = 0; x < src_width; ++x) { dstp[x*2] = srcp[x]; @@ -110,9 +114,61 @@ static void convert_yv12_chroma_to_yv24_c(BYTE *dstp, const BYTE *srcp, int dst_ } } -void Convert444FromYV12::ConvertImage(PVideoFrame src, Image444* dst, IScriptEnvironment* env) +//void Convert444FromYV12::ConvertImage(PVideoFrame src, Image444* dst, int pixelsize, int bits_per_pixel, IScriptEnvironment* env) +void Convert444FromYV12(PVideoFrame &src, PVideoFrame &dst, int pixelsize, int bits_per_pixel, IScriptEnvironment* env) { + env->BitBlt(dst->GetWritePtr(PLANAR_Y), dst->GetPitch(PLANAR_Y), src->GetReadPtr(PLANAR_Y),src->GetPitch(PLANAR_Y), src->GetRowSize(PLANAR_Y), src->GetHeight()); + + const BYTE* srcU = src->GetReadPtr(PLANAR_U); + const BYTE* srcV = src->GetReadPtr(PLANAR_V); + + int srcUVpitch = src->GetPitch(PLANAR_U); + + BYTE* dstU = dst->GetWritePtr(PLANAR_U); + BYTE* dstV = dst->GetWritePtr(PLANAR_V); + + int dstUVpitch = dst->GetPitch(PLANAR_U); + + int width = src->GetRowSize(PLANAR_U) / pixelsize; + int height = src->GetHeight(PLANAR_U); + + if ((pixelsize == 1) && (env->GetCPUFlags() & CPUF_SSE2) && IsPtrAligned(dstU, 16) && IsPtrAligned(dstV, 16)) + { + convert_yv12_chroma_to_yv24_sse2(dstU, srcU, dstUVpitch, srcUVpitch, width, height); + convert_yv12_chroma_to_yv24_sse2(dstV, srcV, dstUVpitch, srcUVpitch, width, height); + } + else +#ifdef X86_32 + if ((pixelsize == 1) && (env->GetCPUFlags() & CPUF_MMX)) + { + convert_yv12_chroma_to_yv24_mmx(dstU, srcU, dstUVpitch, srcUVpitch, width, height); + convert_yv12_chroma_to_yv24_mmx(dstV, srcV, dstUVpitch, srcUVpitch, width, height); + } + else +#endif + { + if (pixelsize == 1) { + convert_yv12_chroma_to_yv24_c(dstU, srcU, dstUVpitch, srcUVpitch, width, height); + convert_yv12_chroma_to_yv24_c(dstV, srcV, dstUVpitch, srcUVpitch, width, height); + } else if(pixelsize == 2) { + convert_yv12_chroma_to_yv24_c(dstU, srcU, dstUVpitch, srcUVpitch, width, height); + convert_yv12_chroma_to_yv24_c(dstV, srcV, dstUVpitch, srcUVpitch, width, height); + } + else { + convert_yv12_chroma_to_yv24_c(dstU, srcU, dstUVpitch, srcUVpitch, width, height); + convert_yv12_chroma_to_yv24_c(dstV, srcV, dstUVpitch, srcUVpitch, width, height); + } + } + + env->BitBlt(dst->GetWritePtr(PLANAR_A), dst->GetPitch(PLANAR_A), + src->GetReadPtr(PLANAR_A), src->GetPitch(PLANAR_A), dst->GetRowSize(PLANAR_A), dst->GetHeight(PLANAR_A)); + +} + +#if 0 +void Convert444FromYV12::ConvertImage(PVideoFrame src, Image444* dst, int pixelsize, int bits_per_pixel, IScriptEnvironment* env) +{ env->BitBlt(dst->GetPtr(PLANAR_Y), dst->pitch, src->GetReadPtr(PLANAR_Y),src->GetPitch(PLANAR_Y), src->GetRowSize(PLANAR_Y), src->GetHeight()); const BYTE* srcU = src->GetReadPtr(PLANAR_U); @@ -148,42 +204,99 @@ void Convert444FromYV12::ConvertImage(PVideoFrame src, Image444* dst, IScriptEnv } } -void Convert444FromYV12::ConvertImageLumaOnly(PVideoFrame src, Image444* dst, IScriptEnvironment* env) { +void Convert444FromYV12::ConvertImageLumaOnly(PVideoFrame src, Image444* dst, int pixelsize, int bits_per_pixel, IScriptEnvironment* env) { env->BitBlt(dst->GetPtr(PLANAR_Y), dst->pitch, src->GetReadPtr(PLANAR_Y),src->GetPitch(PLANAR_Y), src->GetRowSize(PLANAR_Y), src->GetHeight()); } +#endif +#if 0 +void CopyToImage444(PVideoFrame src, Image444* dst, IScriptEnvironment* env) +{ + env->BitBlt(dst->GetPtr(PLANAR_Y), dst->GetPitch(PLANAR_Y), + src->GetReadPtr(PLANAR_Y),src->GetPitch(PLANAR_Y), src->GetRowSize(PLANAR_Y), src->GetHeight(PLANAR_Y)); + env->BitBlt(dst->GetPtr(PLANAR_U), dst->GetPitch(PLANAR_U), + src->GetReadPtr(PLANAR_U),src->GetPitch(PLANAR_U), src->GetRowSize(PLANAR_U), src->GetHeight(PLANAR_U)); + env->BitBlt(dst->GetPtr(PLANAR_V), dst->GetPitch(PLANAR_V), + src->GetReadPtr(PLANAR_V),src->GetPitch(PLANAR_V), src->GetRowSize(PLANAR_V), src->GetHeight(PLANAR_V)); + //env->BitBlt(dst->GetPtr(PLANAR_A), dst->GetPitch(PLANAR_A), + // src->GetReadPtr(PLANAR_A),src->GetPitch(PLANAR_A), src->GetRowSize(PLANAR_A), src->GetHeight(PLANAR_A)); +} +void CopyToImage444LumaOnly(PVideoFrame src, Image444* dst, IScriptEnvironment* env) +{ + env->BitBlt(dst->GetPtr(PLANAR_Y), dst->GetPitch(PLANAR_Y), + src->GetReadPtr(PLANAR_Y),src->GetPitch(PLANAR_Y), src->GetRowSize(PLANAR_Y), src->GetHeight(PLANAR_Y)); +} +#endif + +#if 0 +// we are using only Convert444FromYV24, other conversions will be treated in Avisynth's standard ConvertToYUV444 void Convert444FromYV24::ConvertImage(PVideoFrame src, Image444* dst, IScriptEnvironment* env) { - env->BitBlt(dst->GetPtr(PLANAR_Y), dst->pitch, + env->BitBlt(dst->GetPtr(PLANAR_Y), dst->GetPitch(PLANAR_Y), src->GetReadPtr(PLANAR_Y),src->GetPitch(PLANAR_Y), src->GetRowSize(PLANAR_Y), src->GetHeight(PLANAR_Y)); - env->BitBlt(dst->GetPtr(PLANAR_U), dst->pitch, + env->BitBlt(dst->GetPtr(PLANAR_U), dst->GetPitch(PLANAR_U), src->GetReadPtr(PLANAR_U),src->GetPitch(PLANAR_U), src->GetRowSize(PLANAR_U), src->GetHeight(PLANAR_U)); - env->BitBlt(dst->GetPtr(PLANAR_V), dst->pitch, + env->BitBlt(dst->GetPtr(PLANAR_V), dst->GetPitch(PLANAR_V), src->GetReadPtr(PLANAR_V),src->GetPitch(PLANAR_V), src->GetRowSize(PLANAR_V), src->GetHeight(PLANAR_V)); + env->BitBlt(dst->GetPtr(PLANAR_A), dst->GetPitch(PLANAR_A), + src->GetReadPtr(PLANAR_A),src->GetPitch(PLANAR_A), src->GetRowSize(PLANAR_A), src->GetHeight(PLANAR_A)); } void Convert444FromYV24::ConvertImageLumaOnly(PVideoFrame src, Image444* dst, IScriptEnvironment* env) { - env->BitBlt(dst->GetPtr(PLANAR_Y), dst->pitch, + env->BitBlt(dst->GetPtr(PLANAR_Y), dst->GetPitch(PLANAR_Y), src->GetReadPtr(PLANAR_Y),src->GetPitch(PLANAR_Y), src->GetRowSize(PLANAR_Y), src->GetHeight(PLANAR_Y)); } +#endif -void Convert444FromY8::ConvertImage(PVideoFrame src, Image444* dst, IScriptEnvironment* env) { +#if 0 +void Convert444FromY8::ConvertImage(PVideoFrame src, Image444* dst, int pixelsize, int bits_per_pixel, IScriptEnvironment* env) { env->BitBlt(dst->GetPtr(PLANAR_Y), dst->pitch, src->GetReadPtr(PLANAR_Y),src->GetPitch(PLANAR_Y), src->GetRowSize(PLANAR_Y), src->GetHeight(PLANAR_Y)); memset((void *)dst->GetPtr(PLANAR_U), 0x80, dst->pitch * dst->h()); memset((void *)dst->GetPtr(PLANAR_V), 0x80, dst->pitch * dst->h()); } -void Convert444FromY8::ConvertImageLumaOnly(PVideoFrame src, Image444* dst, IScriptEnvironment* env) { +void Convert444FromY8::ConvertImageLumaOnly(PVideoFrame src, Image444* dst, int pixelsize, int bits_per_pixel, IScriptEnvironment* env) { env->BitBlt(dst->GetPtr(PLANAR_Y), dst->pitch, src->GetReadPtr(PLANAR_Y),src->GetPitch(PLANAR_Y), src->GetRowSize(PLANAR_Y), src->GetHeight(PLANAR_Y)); } - +#endif /***** YUY2 -> YUV 4:4:4 ******/ +void Convert444FromYUY2(PVideoFrame &src, PVideoFrame &dst, int pixelsize, int bits_per_pixel, IScriptEnvironment* env) { + + const BYTE* srcP = src->GetReadPtr(); + int srcPitch = src->GetPitch(); + + BYTE* dstY = dst->GetWritePtr(PLANAR_Y); + BYTE* dstU = dst->GetWritePtr(PLANAR_U); + BYTE* dstV = dst->GetWritePtr(PLANAR_V); + + int dstPitch = dst->GetPitch(); -void Convert444FromYUY2::ConvertImage(PVideoFrame src, Image444* dst, IScriptEnvironment* env) { + int w = src->GetRowSize() / 2; + int h = src->GetHeight(); + + for (int y=0; yGetReadPtr(); int srcPitch = src->GetPitch(); @@ -214,7 +327,7 @@ void Convert444FromYUY2::ConvertImage(PVideoFrame src, Image444* dst, IScriptEnv } -void Convert444FromYUY2::ConvertImageLumaOnly(PVideoFrame src, Image444* dst, IScriptEnvironment* env) { +void Convert444FromYUY2::ConvertImageLumaOnly(PVideoFrame src, Image444* dst, int pixelsize, int bits_per_pixel, IScriptEnvironment* env) { const BYTE* srcP = src->GetReadPtr(); int srcPitch = src->GetPitch(); @@ -236,7 +349,44 @@ void Convert444FromYUY2::ConvertImageLumaOnly(PVideoFrame src, Image444* dst, IS dstY+=dstPitch; } } +#endif + +#if 0 +// original format, source, target, +PVideoFrame Convert444ToOriginal(VideoInfo *vi, Image444 *src, PVideoFrame dst, bool rgb_full_range, IScriptEnvironment *env) +{ + if(vi->Is444()) { + // always! + env->MakeWritable(&dst); + + env->BitBlt(dst->GetWritePtr(PLANAR_Y), dst->GetPitch(PLANAR_Y), + src->GetPtr(PLANAR_Y), src->GetPitch(PLANAR_Y), dst->GetRowSize(PLANAR_Y), dst->GetHeight(PLANAR_Y)); + env->BitBlt(dst->GetWritePtr(PLANAR_U), dst->GetPitch(PLANAR_U), + src->GetPtr(PLANAR_U), src->GetPitch(PLANAR_U), dst->GetRowSize(PLANAR_U), dst->GetHeight(PLANAR_U)); + env->BitBlt(dst->GetWritePtr(PLANAR_V), dst->GetPitch(PLANAR_V), + src->GetPtr(PLANAR_V), src->GetPitch(PLANAR_V), dst->GetRowSize(PLANAR_V), dst->GetHeight(PLANAR_V)); + //env->BitBlt(dst->GetWritePtr(PLANAR_A), dst->GetPitch(PLANAR_A), + // src->GetPtr(PLANAR_A), src->GetPitch(PLANAR_A), dst->GetRowSize(PLANAR_A), dst->GetHeight(PLANAR_A)); + + return dst; + } + /* + if (vi->IsY()) { + env->MakeWritable(&dst); + + env->BitBlt(dst->GetWritePtr(PLANAR_Y), dst->GetPitch(PLANAR_Y), + src->GetPtr(PLANAR_Y), src->pitch, dst->GetRowSize(PLANAR_Y), dst->GetHeight(PLANAR_Y)); + return dst; + } + */ + env->ThrowError("Overlay: Output Colorspace is not 444, must be a bug"); + // invoke Avisynth's converters + // ... + return dst; +} +#endif +#if 0 /****** YUV 4:4:4 -> YUV 4:4:4 - Perhaps the easiest job in the world ;) *****/ PVideoFrame Convert444ToYV24::ConvertImage(Image444* src, PVideoFrame dst, IScriptEnvironment* env) { env->MakeWritable(&dst); @@ -247,16 +397,22 @@ PVideoFrame Convert444ToYV24::ConvertImage(Image444* src, PVideoFrame dst, IScri src->GetPtr(PLANAR_U), src->pitch, dst->GetRowSize(PLANAR_U), dst->GetHeight(PLANAR_U)); env->BitBlt(dst->GetWritePtr(PLANAR_V), dst->GetPitch(PLANAR_V), src->GetPtr(PLANAR_V), src->pitch, dst->GetRowSize(PLANAR_V), dst->GetHeight(PLANAR_V)); + env->BitBlt(dst->GetWritePtr(PLANAR_A), dst->GetPitch(PLANAR_A), + src->GetPtr(PLANAR_A), src->pitch, dst->GetRowSize(PLANAR_A), dst->GetHeight(PLANAR_A)); return dst; } +#endif -PVideoFrame Convert444ToY8::ConvertImage(Image444* src, PVideoFrame dst, IScriptEnvironment* env) { +#if 0 +PVideoFrame Convert444ToY8::ConvertImage(Image444* src, PVideoFrame dst, int pixelsize, int bits_per_pixel, IScriptEnvironment* env) { env->MakeWritable(&dst); env->BitBlt(dst->GetWritePtr(PLANAR_Y), dst->GetPitch(PLANAR_Y), src->GetPtr(PLANAR_Y), src->pitch, dst->GetRowSize(PLANAR_Y), dst->GetHeight()); return dst; } +#endif + static __forceinline __m128i convert_yv24_chroma_block_to_yv12_sse2(const __m128i &src_line0_p0, const __m128i &src_line1_p0, const __m128i &src_line0_p1, const __m128i &src_line1_p1, const __m128i &ffff, const __m128i &mask) { __m128i avg1 = _mm_avg_epu8(src_line0_p0, src_line1_p0); @@ -377,17 +533,79 @@ static void convert_yv24_chroma_to_yv12_isse(BYTE *dstp, const BYTE *srcp, int d #endif // X86_32 -static void convert_yv24_chroma_to_yv12_c(BYTE *dstp, const BYTE *srcp, int dst_pitch, int src_pitch, int dst_width, const int dst_hegiht) { - for (int y=0; y < dst_hegiht; y++) { +template +static void convert_yv24_chroma_to_yv12_c(BYTE *dstp8, const BYTE *srcp8, int dst_pitch, int src_pitch, int dst_width, const int dst_height) { + const pixel_t *srcp = reinterpret_cast(srcp8); + pixel_t *dstp = reinterpret_cast(dstp8); + dst_pitch /= sizeof(pixel_t); + src_pitch /= sizeof(pixel_t); + for (int y=0; y < dst_height; y++) { for (int x=0; x < dst_width; x++) { - dstp[x] = (srcp[x*2] + srcp[x*2+1] + srcp[x*2+src_pitch] + srcp[x*2+src_pitch+1] + 2)>>2; + dstp[x] = (srcp[x*2] + srcp[x*2+1] + srcp[x*2+src_pitch] + srcp[x*2+src_pitch+1] + 2) / 4; // >> 2 } srcp+=src_pitch*2; dstp+=dst_pitch; } } -PVideoFrame Convert444ToYV12::ConvertImage(Image444* src, PVideoFrame dst, IScriptEnvironment* env) +void Convert444ToYV12(PVideoFrame &src, PVideoFrame &dst, int pixelsize, int bits_per_pixel, IScriptEnvironment* env) +{ +// env->MakeWritable(&dst); already writeable + + env->BitBlt(dst->GetWritePtr(PLANAR_Y), dst->GetPitch(PLANAR_Y), + src->GetReadPtr(PLANAR_Y), src->GetPitch(), dst->GetRowSize(PLANAR_Y), dst->GetHeight()); + + const BYTE* srcU = src->GetReadPtr(PLANAR_U); + const BYTE* srcV = src->GetReadPtr(PLANAR_V); + + int srcUVpitch = src->GetPitch(PLANAR_U); + + BYTE* dstU = dst->GetWritePtr(PLANAR_U); + BYTE* dstV = dst->GetWritePtr(PLANAR_V); + + int dstUVpitch = dst->GetPitch(PLANAR_U); + + int w = dst->GetRowSize(PLANAR_U); + int h = dst->GetHeight(PLANAR_U); + + if ((pixelsize == 1) && (env->GetCPUFlags() & CPUF_SSE2) && IsPtrAligned(srcU, 16) && IsPtrAligned(srcV, 16) && IsPtrAligned(dstU, 16) && IsPtrAligned(dstV, 16)) + { + convert_yv24_chroma_to_yv12_sse2(dstU, srcU, dstUVpitch, srcUVpitch, w, h); + convert_yv24_chroma_to_yv12_sse2(dstV, srcV, dstUVpitch, srcUVpitch, w, h); + } + else { +#ifdef X86_32 + if ((pixelsize == 1) && (env->GetCPUFlags() & CPUF_INTEGER_SSE)) + { + convert_yv24_chroma_to_yv12_isse(dstU, srcU, dstUVpitch, srcUVpitch, w, h); + convert_yv24_chroma_to_yv12_isse(dstV, srcV, dstUVpitch, srcUVpitch, w, h); + } + else { +#endif + { + if(pixelsize==1) { + convert_yv24_chroma_to_yv12_c(dstU, srcU, dstUVpitch, srcUVpitch, w, h); + convert_yv24_chroma_to_yv12_c(dstV, srcV, dstUVpitch, srcUVpitch, w, h); + } + else if (pixelsize == 2) { + convert_yv24_chroma_to_yv12_c(dstU, srcU, dstUVpitch, srcUVpitch, w, h); + convert_yv24_chroma_to_yv12_c(dstV, srcV, dstUVpitch, srcUVpitch, w, h); + } + else // if (pixelsize == 4) + convert_yv24_chroma_to_yv12_c(dstU, srcU, dstUVpitch, srcUVpitch, w, h); + convert_yv24_chroma_to_yv12_c(dstV, srcV, dstUVpitch, srcUVpitch, w, h); + } + } + } + + env->BitBlt(dst->GetWritePtr(PLANAR_A), dst->GetPitch(PLANAR_A), + src->GetReadPtr(PLANAR_A), src->GetPitch(PLANAR_A), dst->GetRowSize(PLANAR_A), dst->GetHeight(PLANAR_A)); + +//return dst; +} + +#if 0 +PVideoFrame Convert444ToYV12::ConvertImage(Image444* src, PVideoFrame dst, int pixelsize, int bits_per_pixel, IScriptEnvironment* env) { env->MakeWritable(&dst); @@ -427,11 +645,42 @@ PVideoFrame Convert444ToYV12::ConvertImage(Image444* src, PVideoFrame dst, IScri } return dst; } +#endif // if 0 +/***** YUV 4:4:4 -> YUY2 *******/ +void Convert444ToYUY2(PVideoFrame &src, PVideoFrame &dst, int pixelsize, int bits_per_pixel, IScriptEnvironment* env) { -/***** YUV 4:4:4 -> YUY2 *******/ + const BYTE* srcY = src->GetReadPtr(PLANAR_Y); + const BYTE* srcU = src->GetReadPtr(PLANAR_U); + const BYTE* srcV = src->GetReadPtr(PLANAR_V); + + int srcPitch = src->GetPitch(); + + BYTE* dstP = dst->GetWritePtr(); + + int dstPitch = dst->GetPitch(); -PVideoFrame Convert444ToYUY2::ConvertImage(Image444* src, PVideoFrame dst, IScriptEnvironment* env) { + int w = src->GetRowSize() / pixelsize; + int h = src->GetHeight(); + + for (int y=0; y>1; + dstP[x2+2] = srcY[x+1]; + dstP[x2+3] = (srcV[x] + srcV[x+1] + 1)>>1; + } + srcY+=srcPitch; + srcU+=srcPitch; + srcV+=srcPitch; + dstP+=dstPitch; + } +// return dst; +} + +#if 0 +PVideoFrame Convert444ToYUY2::ConvertImage(Image444* src, PVideoFrame dst, int pixelsize, int bits_per_pixel, IScriptEnvironment* env) { env->MakeWritable(&dst); const BYTE* srcY = src->GetPtr(PLANAR_Y); @@ -462,6 +711,7 @@ PVideoFrame Convert444ToYUY2::ConvertImage(Image444* src, PVideoFrame dst, IScri } return dst; } +#endif /***** YUV 4:4:4 -> RGB24/32 *******/ @@ -469,7 +719,8 @@ PVideoFrame Convert444ToYUY2::ConvertImage(Image444* src, PVideoFrame dst, IScri #define Kg 0.587 #define Kb 0.114 -PVideoFrame Convert444ToRGB::ConvertImage(Image444* src, PVideoFrame dst, IScriptEnvironment* env) { +#if 0 +PVideoFrame Convert444ToRGB::ConvertImage(Image444* src, PVideoFrame dst, int pixelsize, int bits_per_pixel, IScriptEnvironment* env) { const int crv = int(2*(1-Kr) * 255.0/224.0 * 65536+0.5); const int cgv = int(2*(1-Kr)*Kr/Kg * 255.0/224.0 * 65536+0.5); const int cgu = int(2*(1-Kb)*Kb/Kg * 255.0/224.0 * 65536+0.5); @@ -511,8 +762,11 @@ PVideoFrame Convert444ToRGB::ConvertImage(Image444* src, PVideoFrame dst, IScrip } return dst; } +#endif + +#if 0 -PVideoFrame Convert444NonCCIRToRGB::ConvertImage(Image444* src, PVideoFrame dst, IScriptEnvironment* env) { +PVideoFrame Convert444NonCCIRToRGB::ConvertImage(Image444* src, PVideoFrame dst, int pixelsize, int bits_per_pixel, IScriptEnvironment* env) { const int crv = int(2*(1-Kr) * 65536+0.5); const int cgv = int(2*(1-Kr)*Kr/Kg * 65536+0.5); const int cgu = int(2*(1-Kb)*Kb/Kg * 65536+0.5); @@ -554,11 +808,11 @@ PVideoFrame Convert444NonCCIRToRGB::ConvertImage(Image444* src, PVideoFrame dst, } return dst; } +#endif - +#if 0 /******* RGB 24/32 -> YUV444 *******/ - -void Convert444FromRGB::ConvertImageLumaOnly(PVideoFrame src, Image444* dst, IScriptEnvironment* env) { +void Convert444FromRGB::ConvertImageLumaOnly(PVideoFrame src, Image444* dst, int pixelsize, int bits_per_pixel, IScriptEnvironment* env) { const BYTE* srcP = src->GetReadPtr(); int srcPitch = src->GetPitch(); @@ -584,7 +838,7 @@ void Convert444FromRGB::ConvertImageLumaOnly(PVideoFrame src, Image444* dst, ISc } } -void Convert444FromRGB::ConvertImage(PVideoFrame src, Image444* dst, IScriptEnvironment* env) { +void Convert444FromRGB::ConvertImage(PVideoFrame src, Image444* dst, int pixelsize, int bits_per_pixel, IScriptEnvironment* env) { const int cyr = int(Kr * 219/255 * 65536 + 0.5); const int cyg = int(Kg * 219/255 * 65536 + 0.5); const int cyb = int(Kb * 219/255 * 65536 + 0.5); @@ -634,7 +888,7 @@ void Convert444FromRGB::ConvertImage(PVideoFrame src, Image444* dst, IScriptEnvi } } -void Convert444NonCCIRFromRGB::ConvertImage(PVideoFrame src, Image444* dst, IScriptEnvironment* env) { +void Convert444NonCCIRFromRGB::ConvertImage(PVideoFrame src, Image444* dst, int pixelsize, int bits_per_pixel, IScriptEnvironment* env) { const int cyb = int(Kb * 65536 + 0.5); const int cyg = int(Kg * 65536 + 0.5); const int cyr = int(Kr * 65536 + 0.5); @@ -679,4 +933,5 @@ void Convert444NonCCIRFromRGB::ConvertImage(PVideoFrame src, Image444* dst, IScr dstV+=dstPitch; } } +#endif diff --git a/avs_core/filters/overlay/444convert.h b/avs_core/filters/overlay/444convert.h index 80788230f..f65915291 100644 --- a/avs_core/filters/overlay/444convert.h +++ b/avs_core/filters/overlay/444convert.h @@ -40,12 +40,20 @@ #include #include "imghelpers.h" +void Convert444FromYV12(PVideoFrame &src, PVideoFrame &dst, int pixelsize, int bits_per_pixel, IScriptEnvironment* env); +void Convert444FromYUY2(PVideoFrame &src, PVideoFrame &dst, int pixelsize, int bits_per_pixel, IScriptEnvironment* env); +void Convert444ToYV12(PVideoFrame &src, PVideoFrame &dst, int pixelsize, int bits_per_pixel, IScriptEnvironment* env); +void Convert444ToYUY2(PVideoFrame &src, PVideoFrame &dst, int pixelsize, int bits_per_pixel, IScriptEnvironment* env); + +#if 0 class ConvertTo444 { private: - VideoInfo* inputVi; + VideoInfo* inputVi; + int pixelsize; + int bits_per_pixel; public: - ConvertTo444() {inputVi = 0; } + ConvertTo444() { inputVi = 0; } virtual void ConvertImage(PVideoFrame src_frame, Image444* dst_frame, IScriptEnvironment* env) { env->ThrowError("Overlay: Unable to convert input image."); } @@ -54,43 +62,62 @@ class ConvertTo444 { } void SetVideoInfo(VideoInfo* in_vi) { inputVi = in_vi; + pixelsize = inputVi->ComponentSize(); + bits_per_pixel = inputVi->BitsPerComponent(); } }; +#endif +#if 0 class ConvertFrom444 { public: - ConvertFrom444() {} - virtual PVideoFrame ConvertImage(Image444* src_frame, PVideoFrame dst_frame, IScriptEnvironment* env) { + ConvertFrom444() { } + virtual PVideoFrame ConvertImage(Image444* src_frame, PVideoFrame dst_frame, int pixelsize, int bits_per_pixel, IScriptEnvironment* env) { env->ThrowError("Overlay: Unable to convert output image."); return 0; } }; +// avs+ we are using this +// simple Blts +void CopyToImage444(PVideoFrame src, Image444* dst, IScriptEnvironment* env); +void CopyToImage444LumaOnly(PVideoFrame src, Image444* dst, IScriptEnvironment* env); +// Simple BLTs for YUV444 and Y, or else invoke Avisynth's converters +PVideoFrame Convert444ToOriginal(VideoInfo *vi, Image444 *src, PVideoFrame dst, bool rgb_full_range, IScriptEnvironment *env); +#if 0 class Convert444FromYV24 : public ConvertTo444 { public: void ConvertImage(PVideoFrame src_frame, Image444* dst_frame, IScriptEnvironment* env); void ConvertImageLumaOnly(PVideoFrame src_frame, Image444* dst_frame, IScriptEnvironment* env); }; +#endif +#if 0 class Convert444FromY8 : public ConvertTo444 { public: void ConvertImage(PVideoFrame src_frame, Image444* dst_frame, IScriptEnvironment* env); void ConvertImageLumaOnly(PVideoFrame src_frame, Image444* dst_frame, IScriptEnvironment* env); }; +#endif +#if 0 class Convert444FromYV12 : public ConvertTo444 { public: void ConvertImage(PVideoFrame src_frame, Image444* dst_frame, IScriptEnvironment* env); void ConvertImageLumaOnly(PVideoFrame src_frame, Image444* dst_frame, IScriptEnvironment* env); }; +#endif +#if 0 class Convert444FromYUY2 : public ConvertTo444 { public: void ConvertImage(PVideoFrame src_frame, Image444* dst_frame, IScriptEnvironment* env); void ConvertImageLumaOnly(PVideoFrame src_frame, Image444* dst_frame, IScriptEnvironment* env); }; +#endif +#if 0 class Convert444FromRGB : public ConvertTo444 { private: @@ -105,7 +132,9 @@ class Convert444NonCCIRFromRGB : public Convert444FromRGB { public: void ConvertImage(PVideoFrame src_frame, Image444* dst_frame, IScriptEnvironment* env); }; +#endif +#if 0 class Convert444ToYV24 : public ConvertFrom444 { public: PVideoFrame ConvertImage(Image444* src_frame, PVideoFrame dst_frame, IScriptEnvironment* env); @@ -135,5 +164,7 @@ class Convert444NonCCIRToRGB : public ConvertFrom444 { public: PVideoFrame ConvertImage(Image444* src_frame, PVideoFrame dst_frame, IScriptEnvironment* env); }; +#endif +#endif #endif //444Convert \ No newline at end of file diff --git a/avs_core/filters/overlay/OF_add.cpp b/avs_core/filters/overlay/OF_add.cpp index b4c0de2cb..61e9fbe90 100644 --- a/avs_core/filters/overlay/OF_add.cpp +++ b/avs_core/filters/overlay/OF_add.cpp @@ -36,6 +36,29 @@ #include "overlayfunctions.h" +#include +#include + +void OL_AddImage::DoBlendImageMask(Image444* base, Image444* overlay, Image444* mask) { + if (bits_per_pixel == 8) + BlendImageMask(base, overlay, mask); + else if(bits_per_pixel <= 16) + BlendImageMask(base, overlay, mask); + //else if(bits_per_pixel == 32) + // BlendImageMask(base, overlay, mask); +} + +void OL_AddImage::DoBlendImage(Image444* base, Image444* overlay) { + if (bits_per_pixel == 8) + BlendImageMask(base, overlay, nullptr); + else if(bits_per_pixel <= 16) + BlendImageMask(base, overlay, nullptr); + //else if(bits_per_pixel == 32) + // BlendImage(base, overlay); +} + +/* +template void OL_AddImage::BlendImageMask(Image444* base, Image444* overlay, Image444* mask) { BYTE* baseY = base->GetPtr(PLANAR_Y); BYTE* baseU = base->GetPtr(PLANAR_U); @@ -113,67 +136,106 @@ void OL_AddImage::BlendImageMask(Image444* base, Image444* overlay, Image444* ma } } - -void OL_AddImage::BlendImage(Image444* base, Image444* overlay) { +*/ +template +void OL_AddImage::BlendImageMask(Image444* base, Image444* overlay, Image444* mask) { - BYTE* baseY = base->GetPtr(PLANAR_Y); - BYTE* baseU = base->GetPtr(PLANAR_U); - BYTE* baseV = base->GetPtr(PLANAR_V); + pixel_t* baseY = reinterpret_cast(base->GetPtr(PLANAR_Y)); + pixel_t* baseU = reinterpret_cast(base->GetPtr(PLANAR_U)); + pixel_t* baseV = reinterpret_cast(base->GetPtr(PLANAR_V)); + + pixel_t* ovY = reinterpret_cast(overlay->GetPtr(PLANAR_Y)); + pixel_t* ovU = reinterpret_cast(overlay->GetPtr(PLANAR_U)); + pixel_t* ovV = reinterpret_cast(overlay->GetPtr(PLANAR_V)); + + pixel_t* maskY = maskMode ? reinterpret_cast(mask->GetPtr(PLANAR_Y)) : nullptr; + pixel_t* maskU = maskMode ? reinterpret_cast(mask->GetPtr(PLANAR_U)) : nullptr; + pixel_t* maskV = maskMode ? reinterpret_cast(mask->GetPtr(PLANAR_V)) : nullptr; + + const int half_pixel_value = (sizeof(pixel_t) == 1) ? 128 : (1 << (bits_per_pixel - 1)); + const int max_pixel_value = (sizeof(pixel_t) == 1) ? 255 : (1 << bits_per_pixel) - 1; + const int pixel_range = max_pixel_value + 1; + const int SHIFT = (sizeof(pixel_t) == 1) ? 5 : 5 + (bits_per_pixel - 8); + const int MASK_CORR_SHIFT = (sizeof(pixel_t) == 1) ? 8 : bits_per_pixel; + const int OPACITY_SHIFT = 8; // opacity always max 0..256 + const int over32 = (1 << SHIFT); // 32 + const int basepitch = (base->pitch) / sizeof(pixel_t); + const int overlaypitch = (overlay->pitch) / sizeof(pixel_t); + const int maskpitch = maskMode ? (mask->pitch) / sizeof(pixel_t) : 0; + + // avoid "uint16*uint16 can't get into int32" overflows + typedef std::conditional < sizeof(pixel_t) == 1, int, typename std::conditional < sizeof(pixel_t) == 2, __int64, float>::type >::type result_t; - BYTE* ovY = overlay->GetPtr(PLANAR_Y); - BYTE* ovU = overlay->GetPtr(PLANAR_U); - BYTE* ovV = overlay->GetPtr(PLANAR_V); - int w = base->w(); int h = base->h(); if (opacity == 256) { for (int y = 0; y < h; y++) { for (int x = 0; x < w; x++) { - int Y = baseY[x] + ovY[x]; - int U = baseU[x] + ovU[x] - 128; - int V = baseV[x] + ovV[x] - 128; - if (Y>255) { // Apply overbrightness to UV - int multiplier = max(0,288-Y); // 0 to 32 - U = ((U*multiplier) + (128*(32-multiplier)))>>5; - V = ((V*multiplier) + (128*(32-multiplier)))>>5; - Y = 255; + int Y = baseY[x] + (maskMode ? (((result_t)ovY[x] * maskY[x]) >> MASK_CORR_SHIFT) : ovY[x]); + int U = baseU[x] + (int)(maskMode ? ((((result_t)half_pixel_value*(pixel_range - maskU[x])) + ((result_t)maskU[x] * ovU[x])) >> MASK_CORR_SHIFT) : ovU[x]) - half_pixel_value; + int V = baseV[x] + (int)(maskMode ? ((((result_t)half_pixel_value*(pixel_range - maskV[x])) + ((result_t)maskV[x] * ovV[x])) >> MASK_CORR_SHIFT) : ovV[x]) - half_pixel_value; + if (Y>max_pixel_value) { // Apply overbrightness to UV + int multiplier = max(0,pixel_range + over32 -Y); // 0 to 32 + U = ((U*multiplier) + (half_pixel_value*(over32-multiplier)))>>SHIFT; + V = ((V*multiplier) + (half_pixel_value*(over32-multiplier)))>>SHIFT; + Y = max_pixel_value; } - baseU[x] = (BYTE)clamp(U, 0, 255); - baseV[x] = (BYTE)clamp(V, 0, 255); - baseY[x] = (BYTE)Y; + baseU[x] = (pixel_t)clamp(U, 0, max_pixel_value); + baseV[x] = (pixel_t)clamp(V, 0, max_pixel_value); + baseY[x] = (pixel_t)Y; + } + baseY += basepitch; + baseU += basepitch; + baseV += basepitch; + + ovY += overlaypitch; + ovU += overlaypitch; + ovV += overlaypitch; + + if(maskMode) { + maskY += maskpitch; + maskU += maskpitch; + maskV += maskpitch; } - baseY += base->pitch; - baseU += base->pitch; - baseV += base->pitch; - - ovY += overlay->pitch; - ovU += overlay->pitch; - ovV += overlay->pitch; - } } else { for (int y = 0; y < h; y++) { for (int x = 0; x < w; x++) { - int Y = baseY[x] + ((opacity*ovY[x])>>8); - int U = baseU[x] + (((128*inv_opacity)+(opacity*(ovU[x])))>>8) - 128; - int V = baseV[x] + (((128*inv_opacity)+(opacity*(ovV[x])))>>8) - 128; - if (Y>255) { // Apply overbrightness to UV - int multiplier = max(0,288-Y); // 0 to 32 - U = ((U*multiplier) + (128*(32-multiplier)))>>5; - V = ((V*multiplier) + (128*(32-multiplier)))>>5; - Y = 255; + int Y = baseY[x] + (maskMode ? (((result_t)maskY[x] * opacity*ovY[x]) >> (OPACITY_SHIFT + MASK_CORR_SHIFT)) : ((opacity*ovY[x]) >> OPACITY_SHIFT)); + int U, V; + if (maskMode) { + result_t mU = (maskU[x] * opacity) >> OPACITY_SHIFT; + result_t mV = (maskV[x] * opacity) >> OPACITY_SHIFT; + U = baseU[x] + (int)(((half_pixel_value*(pixel_range - mU)) + (mU*ovU[x])) >> MASK_CORR_SHIFT) - half_pixel_value; + V = baseV[x] + (int)(((half_pixel_value*(pixel_range - mV)) + (mV*ovV[x])) >> MASK_CORR_SHIFT) - half_pixel_value; } - baseU[x] = (BYTE)clamp(U, 0, 255); - baseV[x] = (BYTE)clamp(V, 0, 255); - baseY[x] = (BYTE)Y; + else { + U = baseU[x] + (((half_pixel_value*inv_opacity)+(opacity*(ovU[x])))>>OPACITY_SHIFT) - half_pixel_value; + V = baseV[x] + (((half_pixel_value*inv_opacity)+(opacity*(ovV[x])))>>OPACITY_SHIFT) - half_pixel_value; + } + if (Y>max_pixel_value) { // Apply overbrightness to UV + int multiplier = max(0,(max_pixel_value + 1) + over32 - Y); // 288-Y : 0 to 32 + U = ((U*multiplier) + (half_pixel_value*(over32 - multiplier))) >> SHIFT; + V = ((V*multiplier) + (half_pixel_value*(over32 - multiplier))) >> SHIFT; + Y = max_pixel_value; + } + baseU[x] = (pixel_t)clamp(U, 0, max_pixel_value); + baseV[x] = (pixel_t)clamp(V, 0, max_pixel_value); + baseY[x] = (pixel_t)Y; + } + baseY += basepitch; + baseU += basepitch; + baseV += basepitch; + + ovY += overlaypitch; + ovU += overlaypitch; + ovV += overlaypitch; + + if(maskMode) { + maskY += maskpitch; + maskU += maskpitch; + maskV += maskpitch; } - baseY += base->pitch; - baseU += base->pitch; - baseV += base->pitch; - - ovY += overlay->pitch; - ovU += overlay->pitch; - ovV += overlay->pitch; } } } diff --git a/avs_core/filters/overlay/OF_blend.cpp b/avs_core/filters/overlay/OF_blend.cpp index 0dbe9286b..b8b6d5bad 100644 --- a/avs_core/filters/overlay/OF_blend.cpp +++ b/avs_core/filters/overlay/OF_blend.cpp @@ -37,6 +37,28 @@ #include "overlayfunctions.h" #include +#include + +void OL_BlendImage::DoBlendImageMask(Image444* base, Image444* overlay, Image444* mask) { + if (bits_per_pixel == 8) + BlendImageMask(base, overlay, mask); + //else if(bits_per_pixel == 32) + // BlendImageMask(base, overlay, mask); + else if(bits_per_pixel == 16) + BlendImageMask(base, overlay, mask); +} + +void OL_BlendImage::DoBlendImage(Image444* base, Image444* overlay) { + if (bits_per_pixel == 8) + BlendImage(base, overlay); + //else if(bits_per_pixel == 32) + // BlendImage(base, overlay); + else if(bits_per_pixel == 16) + BlendImage(base, overlay); +} + + +template void OL_BlendImage::BlendImageMask(Image444* base, Image444* overlay, Image444* mask) { BYTE* baseY = base->GetPtr(PLANAR_Y); BYTE* baseU = base->GetPtr(PLANAR_U); @@ -94,6 +116,7 @@ void OL_BlendImage::BlendImageMask(Image444* base, Image444* overlay, Image444* } } +template void OL_BlendImage::BlendImage(Image444* base, Image444* overlay) { BYTE* baseY = base->GetPtr(PLANAR_Y); BYTE* baseU = base->GetPtr(PLANAR_U); diff --git a/avs_core/filters/overlay/OF_darken.cpp b/avs_core/filters/overlay/OF_darken.cpp index 6c51dae4c..089f4945c 100644 --- a/avs_core/filters/overlay/OF_darken.cpp +++ b/avs_core/filters/overlay/OF_darken.cpp @@ -36,6 +36,28 @@ #include "overlayfunctions.h" +#include + +void OL_DarkenImage::DoBlendImageMask(Image444* base, Image444* overlay, Image444* mask) { + if (bits_per_pixel == 8) + BlendImageMask(base, overlay, mask); + //else if(bits_per_pixel == 32) + // BlendImageMask(base, overlay, mask); + else if(bits_per_pixel == 16) + BlendImageMask(base, overlay, mask); +} + +void OL_DarkenImage::DoBlendImage(Image444* base, Image444* overlay) { + if (bits_per_pixel == 8) + BlendImage(base, overlay); + //else if(bits_per_pixel == 32) + // BlendImage(base, overlay); + else if(bits_per_pixel == 16) + BlendImage(base, overlay); +} + + +template void OL_DarkenImage::BlendImageMask(Image444* base, Image444* overlay, Image444* mask) { BYTE* baseY = base->GetPtr(PLANAR_Y); BYTE* baseU = base->GetPtr(PLANAR_U); @@ -100,6 +122,7 @@ void OL_DarkenImage::BlendImageMask(Image444* base, Image444* overlay, Image444* } +template void OL_DarkenImage::BlendImage(Image444* base, Image444* overlay) { BYTE* baseY = base->GetPtr(PLANAR_Y); diff --git a/avs_core/filters/overlay/OF_difference.cpp b/avs_core/filters/overlay/OF_difference.cpp index 5d3b62f78..9983b1ce9 100644 --- a/avs_core/filters/overlay/OF_difference.cpp +++ b/avs_core/filters/overlay/OF_difference.cpp @@ -38,6 +38,28 @@ #include #include +#include + +void OL_DifferenceImage::DoBlendImageMask(Image444* base, Image444* overlay, Image444* mask) { + if (bits_per_pixel == 8) + BlendImageMask(base, overlay, mask); + //else if(bits_per_pixel == 32) + // BlendImageMask(base, overlay, mask); + else if(bits_per_pixel == 16) + BlendImageMask(base, overlay, mask); +} + +void OL_DifferenceImage::DoBlendImage(Image444* base, Image444* overlay) { + if (bits_per_pixel == 8) + BlendImage(base, overlay); + //else if(bits_per_pixel == 32) + // BlendImage(base, overlay); + else if(bits_per_pixel == 16) + BlendImage(base, overlay); +} + + +template void OL_DifferenceImage::BlendImageMask(Image444* base, Image444* overlay, Image444* mask) { BYTE* baseY = base->GetPtr(PLANAR_Y); BYTE* baseU = base->GetPtr(PLANAR_U); @@ -135,6 +157,7 @@ void OL_DifferenceImage::BlendImageMask(Image444* base, Image444* overlay, Image } // for y } +template void OL_DifferenceImage::BlendImage(Image444* base, Image444* overlay) { BYTE* baseY = base->GetPtr(PLANAR_Y); BYTE* baseU = base->GetPtr(PLANAR_U); diff --git a/avs_core/filters/overlay/OF_exclusion.cpp b/avs_core/filters/overlay/OF_exclusion.cpp index ff18163b6..39baf4eec 100644 --- a/avs_core/filters/overlay/OF_exclusion.cpp +++ b/avs_core/filters/overlay/OF_exclusion.cpp @@ -37,6 +37,28 @@ #include "overlayfunctions.h" #include +#include + +void OL_ExclusionImage::DoBlendImageMask(Image444* base, Image444* overlay, Image444* mask) { + if (bits_per_pixel == 8) + BlendImageMask(base, overlay, mask); + //else if(bits_per_pixel == 32) + // BlendImageMask(base, overlay, mask); + else if(bits_per_pixel == 16) + BlendImageMask(base, overlay, mask); +} + +void OL_ExclusionImage::DoBlendImage(Image444* base, Image444* overlay) { + if (bits_per_pixel == 8) + BlendImage(base, overlay); + //else if(bits_per_pixel == 32) + // BlendImage(base, overlay); + else if(bits_per_pixel == 16) + BlendImage(base, overlay); +} + + +template void OL_ExclusionImage::BlendImageMask(Image444* base, Image444* overlay, Image444* mask) { BYTE* baseY = base->GetPtr(PLANAR_Y); BYTE* baseU = base->GetPtr(PLANAR_U); @@ -134,6 +156,7 @@ void OL_ExclusionImage::BlendImageMask(Image444* base, Image444* overlay, Image4 } // for y } +template void OL_ExclusionImage::BlendImage(Image444* base, Image444* overlay) { BYTE* baseY = base->GetPtr(PLANAR_Y); BYTE* baseU = base->GetPtr(PLANAR_U); diff --git a/avs_core/filters/overlay/OF_lighten.cpp b/avs_core/filters/overlay/OF_lighten.cpp index 495d1b81c..1aa105fff 100644 --- a/avs_core/filters/overlay/OF_lighten.cpp +++ b/avs_core/filters/overlay/OF_lighten.cpp @@ -36,6 +36,28 @@ #include "overlayfunctions.h" +#include + +void OL_LightenImage::DoBlendImageMask(Image444* base, Image444* overlay, Image444* mask) { + if (bits_per_pixel == 8) + BlendImageMask(base, overlay, mask); + //else if(bits_per_pixel == 32) + // BlendImageMask(base, overlay, mask); + else if(bits_per_pixel == 16) + BlendImageMask(base, overlay, mask); +} + +void OL_LightenImage::DoBlendImage(Image444* base, Image444* overlay) { + if (bits_per_pixel == 8) + BlendImage(base, overlay); + //else if(bits_per_pixel == 32) + // BlendImage(base, overlay); + else if(bits_per_pixel == 16) + BlendImage(base, overlay); +} + + +template void OL_LightenImage::BlendImageMask(Image444* base, Image444* overlay, Image444* mask) { BYTE* baseY = base->GetPtr(PLANAR_Y); BYTE* baseU = base->GetPtr(PLANAR_U); @@ -100,6 +122,7 @@ void OL_LightenImage::BlendImageMask(Image444* base, Image444* overlay, Image444 } +template void OL_LightenImage::BlendImage(Image444* base, Image444* overlay) { BYTE* baseY = base->GetPtr(PLANAR_Y); diff --git a/avs_core/filters/overlay/OF_lumachroma.cpp b/avs_core/filters/overlay/OF_lumachroma.cpp index e2efaa0be..5d873d596 100644 --- a/avs_core/filters/overlay/OF_lumachroma.cpp +++ b/avs_core/filters/overlay/OF_lumachroma.cpp @@ -37,7 +37,45 @@ #include "overlayfunctions.h" #include +#include + +void OL_BlendLumaImage::DoBlendImageMask(Image444* base, Image444* overlay, Image444* mask) { + if (bits_per_pixel == 8) + BlendImageMask(base, overlay, mask); + //else if(bits_per_pixel == 32) + // BlendImageMask(base, overlay, mask); + else if(bits_per_pixel == 16) + BlendImageMask(base, overlay, mask); +} + +void OL_BlendLumaImage::DoBlendImage(Image444* base, Image444* overlay) { + if (bits_per_pixel == 8) + BlendImage(base, overlay); + //else if(bits_per_pixel == 32) + // BlendImage(base, overlay); + else if(bits_per_pixel == 16) + BlendImage(base, overlay); +} + +void OL_BlendChromaImage::DoBlendImageMask(Image444* base, Image444* overlay, Image444* mask) { + if (bits_per_pixel == 8) + BlendImageMask(base, overlay, mask); + //else if(bits_per_pixel == 32) + // BlendImageMask(base, overlay, mask); + else if(bits_per_pixel == 16) + BlendImageMask(base, overlay, mask); +} + +void OL_BlendChromaImage::DoBlendImage(Image444* base, Image444* overlay) { + if (bits_per_pixel == 8) + BlendImage(base, overlay); + //else if(bits_per_pixel == 32) + // BlendImage(base, overlay); + else if(bits_per_pixel == 16) + BlendImage(base, overlay); +} +template void OL_BlendLumaImage::BlendImageMask(Image444* base, Image444* overlay, Image444* mask) { BYTE* baseY = base->GetPtr(PLANAR_Y); @@ -78,6 +116,7 @@ void OL_BlendLumaImage::BlendImageMask(Image444* base, Image444* overlay, Image4 } +template void OL_BlendLumaImage::BlendImage(Image444* base, Image444* overlay) { BYTE* baseY = base->GetPtr(PLANAR_Y); @@ -106,6 +145,7 @@ void OL_BlendLumaImage::BlendImage(Image444* base, Image444* overlay) { +template void OL_BlendChromaImage::BlendImageMask(Image444* base, Image444* overlay, Image444* mask) { BYTE* baseU = base->GetPtr(PLANAR_U); BYTE* baseV = base->GetPtr(PLANAR_V); @@ -154,6 +194,7 @@ void OL_BlendChromaImage::BlendImageMask(Image444* base, Image444* overlay, Imag } } +template void OL_BlendChromaImage::BlendImage(Image444* base, Image444* overlay) { BYTE* baseU = base->GetPtr(PLANAR_U); BYTE* baseV = base->GetPtr(PLANAR_V); diff --git a/avs_core/filters/overlay/OF_multiply.cpp b/avs_core/filters/overlay/OF_multiply.cpp index ca650d9fa..3980713e9 100644 --- a/avs_core/filters/overlay/OF_multiply.cpp +++ b/avs_core/filters/overlay/OF_multiply.cpp @@ -36,6 +36,28 @@ #include "overlayfunctions.h" +#include + +void OL_MultiplyImage::DoBlendImageMask(Image444* base, Image444* overlay, Image444* mask) { + if (bits_per_pixel == 8) + BlendImageMask(base, overlay, mask); + else if(bits_per_pixel <= 16) + BlendImageMask(base, overlay, mask); + //else if(bits_per_pixel == 32) + // BlendImageMask(base, overlay, mask); + +} + +void OL_MultiplyImage::DoBlendImage(Image444* base, Image444* overlay) { + if (bits_per_pixel == 8) + BlendImage(base, overlay); + else if(bits_per_pixel <= 16) + BlendImage(base, overlay); + //else if(bits_per_pixel == 32) + // BlendImage(base, overlay); +} + +template void OL_MultiplyImage::BlendImageMask(Image444* base, Image444* overlay, Image444* mask) { BYTE* baseY = base->GetPtr(PLANAR_Y); BYTE* baseU = base->GetPtr(PLANAR_U); @@ -117,6 +139,7 @@ void OL_MultiplyImage::BlendImageMask(Image444* base, Image444* overlay, Image44 } +template void OL_MultiplyImage::BlendImage(Image444* base, Image444* overlay) { BYTE* baseY = base->GetPtr(PLANAR_Y); diff --git a/avs_core/filters/overlay/OF_softhardlight.cpp b/avs_core/filters/overlay/OF_softhardlight.cpp index 7cccb7bb4..77eb774c2 100644 --- a/avs_core/filters/overlay/OF_softhardlight.cpp +++ b/avs_core/filters/overlay/OF_softhardlight.cpp @@ -36,6 +36,45 @@ #include "overlayfunctions.h" +#include + +void OL_SoftLightImage::DoBlendImageMask(Image444* base, Image444* overlay, Image444* mask) { + if (bits_per_pixel == 8) + BlendImageMask(base, overlay, mask); + //else if(bits_per_pixel == 32) + // BlendImageMask(base, overlay, mask); + else if(bits_per_pixel == 16) + BlendImageMask(base, overlay, mask); +} + +void OL_SoftLightImage::DoBlendImage(Image444* base, Image444* overlay) { + if (bits_per_pixel == 8) + BlendImage(base, overlay); + //else if(bits_per_pixel == 32) + // BlendImage(base, overlay); + else if(bits_per_pixel == 16) + BlendImage(base, overlay); +} + +void OL_HardLightImage::DoBlendImageMask(Image444* base, Image444* overlay, Image444* mask) { + if (bits_per_pixel == 8) + BlendImageMask(base, overlay, mask); + //else if(bits_per_pixel == 32) + // BlendImageMask(base, overlay, mask); + else if(bits_per_pixel == 16) + BlendImageMask(base, overlay, mask); +} + +void OL_HardLightImage::DoBlendImage(Image444* base, Image444* overlay) { + if (bits_per_pixel == 8) + BlendImage(base, overlay); + //else if(bits_per_pixel == 32) + // BlendImage(base, overlay); + else if(bits_per_pixel == 16) + BlendImage(base, overlay); +} + +template void OL_SoftLightImage::BlendImageMask(Image444* base, Image444* overlay, Image444* mask) { BYTE* baseY = base->GetPtr(PLANAR_Y); BYTE* baseU = base->GetPtr(PLANAR_U); @@ -134,6 +173,7 @@ void OL_SoftLightImage::BlendImageMask(Image444* base, Image444* overlay, Image4 } // for y } +template void OL_SoftLightImage::BlendImage(Image444* base, Image444* overlay) { BYTE* baseY = base->GetPtr(PLANAR_Y); BYTE* baseU = base->GetPtr(PLANAR_U); @@ -213,6 +253,7 @@ void OL_SoftLightImage::BlendImage(Image444* base, Image444* overlay) { /************* Hard Light ***************/ +template void OL_HardLightImage::BlendImageMask(Image444* base, Image444* overlay, Image444* mask) { BYTE* baseY = base->GetPtr(PLANAR_Y); BYTE* baseU = base->GetPtr(PLANAR_U); @@ -311,6 +352,7 @@ void OL_HardLightImage::BlendImageMask(Image444* base, Image444* overlay, Image4 } // for y } +template void OL_HardLightImage::BlendImage(Image444* base, Image444* overlay) { BYTE* baseY = base->GetPtr(PLANAR_Y); BYTE* baseU = base->GetPtr(PLANAR_U); diff --git a/avs_core/filters/overlay/OF_subtract.cpp b/avs_core/filters/overlay/OF_subtract.cpp index 3a75e70f5..033c3b75c 100644 --- a/avs_core/filters/overlay/OF_subtract.cpp +++ b/avs_core/filters/overlay/OF_subtract.cpp @@ -37,6 +37,27 @@ #include "overlayfunctions.h" #include +#include + +void OL_SubtractImage::DoBlendImageMask(Image444* base, Image444* overlay, Image444* mask) { + if (bits_per_pixel == 8) + BlendImageMask(base, overlay, mask); + //else if(bits_per_pixel == 32) + // BlendImageMask(base, overlay, mask); + else if(bits_per_pixel == 16) + BlendImageMask(base, overlay, mask); +} + +void OL_SubtractImage::DoBlendImage(Image444* base, Image444* overlay) { + if (bits_per_pixel == 8) + BlendImage(base, overlay); + //else if(bits_per_pixel == 32) + // BlendImage(base, overlay); + else if(bits_per_pixel == 16) + BlendImage(base, overlay); +} + +template void OL_SubtractImage::BlendImageMask(Image444* base, Image444* overlay, Image444* mask) { BYTE* baseY = base->GetPtr(PLANAR_Y); BYTE* baseU = base->GetPtr(PLANAR_U); @@ -116,6 +137,7 @@ void OL_SubtractImage::BlendImageMask(Image444* base, Image444* overlay, Image44 } +template void OL_SubtractImage::BlendImage(Image444* base, Image444* overlay) { BYTE* baseY = base->GetPtr(PLANAR_Y); diff --git a/avs_core/filters/overlay/imghelpers.h b/avs_core/filters/overlay/imghelpers.h index 3fca208fd..1ad2479af 100644 --- a/avs_core/filters/overlay/imghelpers.h +++ b/avs_core/filters/overlay/imghelpers.h @@ -40,79 +40,137 @@ #include #include +#define USE_ORIG_FRAME + class Image444 { private: IScriptEnvironment2 * Env; + + PVideoFrame &frame; + BYTE* Y_plane; BYTE* U_plane; BYTE* V_plane; + BYTE* A_plane; BYTE* fake_Y_plane; BYTE* fake_U_plane; BYTE* fake_V_plane; + BYTE* fake_A_plane; int fake_w; int fake_h; const int _w; const int _h; + const int _bits_per_pixel; + const bool hasAlpha; bool return_original; public: int pitch; + int pitchUV; + int pitchA; - Image444(IScriptEnvironment* env) : Env(static_cast(env)), _w(0), _h(0) {} + //Image444(IScriptEnvironment* env) : Env(static_cast(env)), _w(0), _h(0), _bits_per_pixel(8), hasAlpha(false) {} - Image444(Image444* img, IScriptEnvironment* env) : Env(static_cast(env)), _w(img->w()), _h(img->h()), pitch(img->pitch) { + /*Image444(Image444* img, IScriptEnvironment* env) : Env(static_cast(env)), + _w(img->w()), _h(img->h()), pitch(img->pitch), pitchUV(img->pitchUV), pitchA(img->pitchA), _bits_per_pixel(img->_bits_per_pixel), hasAlpha(img->hasAlpha) { Y_plane = img->GetPtr(PLANAR_Y); U_plane = img->GetPtr(PLANAR_U); V_plane = img->GetPtr(PLANAR_V); + A_plane = img->GetPtr(PLANAR_A); ResetFake(); } + */ + + Image444( +#ifdef USE_ORIG_FRAME + PVideoFrame &_frame, +#endif + int _inw, int _inh, int _in_bits_per_pixel, bool _hasAlpha, IScriptEnvironment* env) : + Env(static_cast(env)), +#ifdef USE_ORIG_FRAME + frame(_frame), +#endif + _w(_inw), _h(_inh), _bits_per_pixel(_in_bits_per_pixel), hasAlpha(_hasAlpha) { - Image444(int _inw, int _inh, IScriptEnvironment* env) : Env(static_cast(env)), _w(_inw), _h(_inh) { - pitch = (_w+15)&(~15); + int pixelsize; + if (_bits_per_pixel == 8) pixelsize = 1; + else if (_bits_per_pixel <= 16) pixelsize = 2; + else pixelsize = 4; - Y_plane = (BYTE*)Env->Allocate(pitch*_h, 64, AVS_POOLED_ALLOC); + +#ifdef USE_ORIG_FRAME + pitch = frame->GetPitch(PLANAR_Y); + pitchUV = frame->GetPitch(PLANAR_U); + pitchA = frame->GetPitch(PLANAR_A); + + Y_plane = (BYTE*) frame->GetReadPtr(PLANAR_Y); + U_plane = (BYTE*) frame->GetReadPtr(PLANAR_U); + V_plane = (BYTE*) frame->GetReadPtr(PLANAR_V); + A_plane = (BYTE*) frame->GetReadPtr(PLANAR_A); +#else + const int INTERNAL_ALIGN = 16; + pitch = (_w * pixelsize +(INTERNAL_ALIGN-1))&(~(INTERNAL_ALIGN-1)); + pitchA = hasAlpha ? pitch : 0; + + Y_plane = (BYTE*) Env->Allocate(pitch*_h, 64, AVS_POOLED_ALLOC); U_plane = (BYTE*) Env->Allocate(pitch*_h, 64, AVS_POOLED_ALLOC); V_plane = (BYTE*) Env->Allocate(pitch*_h, 64, AVS_POOLED_ALLOC); - if (!Y_plane || !U_plane || !V_plane) { + A_plane = hasAlpha ? (BYTE*) Env->Allocate(pitch*_h, 64, AVS_POOLED_ALLOC) : nullptr; + if (!Y_plane || !U_plane || !V_plane || (hasAlpha && !A_plane)) { Env->Free(Y_plane); Env->Free(U_plane); Env->Free(V_plane); - Env->ThrowError("Image444: Could not reserve memory."); + Env->Free(A_plane); + Env->ThrowError("Image444: Could not reserve memory."); } +#endif ResetFake(); } - Image444(BYTE* Y, BYTE* U, BYTE* V, int _inw, int _inh, int _pitch, IScriptEnvironment* env) : Env(static_cast(env)), _w(_inw), _h(_inh) { + /* + Image444(BYTE* Y, BYTE* U, BYTE* V, BYTE *A, int _inw, int _inh, int _pitch, int _pitchUV, int _pitchA, int _in_bits_per_pixel, bool _hasAlpha, IScriptEnvironment* env) : + Env(static_cast(env)), _w(_inw), _h(_inh), _bits_per_pixel(_in_bits_per_pixel), hasAlpha(_hasAlpha) { if (!(_w && _h)) { _RPT0(1,"Image444: Height or Width is 0"); } Y_plane = Y; U_plane = U; V_plane = V; + A_plane = A; pitch = _pitch; + pitchUV = _pitchUV; + pitchA = _pitchA; ResetFake(); } + */ void free_chroma() { +#ifndef USE_ORIG_FRAME Env->Free(U_plane); Env->Free(V_plane); +#endif } void free_luma() { +#ifndef USE_ORIG_FRAME Env->Free(Y_plane); + Env->Free(A_plane); +#endif } void free_all() { +#ifndef USE_ORIG_FRAME if (!(_w && _h)) { _RPT0(1,"Image444: Height or Width is 0"); } free_luma(); free_chroma(); +#endif } __inline int w() { return (return_original) ? _w : fake_w; } @@ -129,6 +187,8 @@ class Image444 { return (return_original) ? U_plane : fake_U_plane; case PLANAR_V: return (return_original) ? V_plane : fake_V_plane; + case PLANAR_A: + return (return_original) ? A_plane : fake_A_plane; } return Y_plane; } @@ -147,17 +207,44 @@ class Image444 { case PLANAR_V: fake_Y_plane = V_plane = ptr; break; + case PLANAR_A: + fake_A_plane = A_plane = ptr; + break; + } + } + + int GetPitch(int plane) { + if (!(_w && _h)) { + _RPT0(1,"Image444: Height or Width is 0"); + } + switch (plane) { + case PLANAR_Y: + return pitch; + case PLANAR_U: + case PLANAR_V: + return pitchUV; + case PLANAR_A: + return pitchA; } + return pitch; } void SubFrame(int x, int y, int new_w, int new_h) { new_w = min(new_w, w()-x); new_h = min(new_h, h()-y); - fake_Y_plane = GetPtr(PLANAR_Y) + x + (y*pitch); - fake_U_plane = GetPtr(PLANAR_U) + x + (y*pitch); - fake_V_plane = GetPtr(PLANAR_V) + x + (y*pitch); - + int pixelsize; + switch(_bits_per_pixel) { + case 8: pixelsize = 1; break; + case 32: pixelsize = 4; break; + default: pixelsize = 2; + } + + fake_Y_plane = GetPtr(PLANAR_Y) + x*pixelsize + (y*pitch); + fake_U_plane = GetPtr(PLANAR_U) + x*pixelsize + (y*pitchUV); + fake_V_plane = GetPtr(PLANAR_V) + x*pixelsize + (y*pitchUV); + fake_A_plane = pitchA > 0 ? GetPtr(PLANAR_A) + x*pixelsize + (y*pitchA) : nullptr; + fake_w = new_w; fake_h = new_h; } @@ -178,6 +265,7 @@ class Image444 { fake_Y_plane = Y_plane; fake_U_plane = U_plane; fake_V_plane = V_plane; + fake_A_plane = A_plane; fake_w = _w; fake_h = _h; } diff --git a/avs_core/filters/overlay/overlay.cpp b/avs_core/filters/overlay/overlay.cpp index d82de740a..5a7787918 100644 --- a/avs_core/filters/overlay/overlay.cpp +++ b/avs_core/filters/overlay/overlay.cpp @@ -81,29 +81,50 @@ GenericVideoFilter(_child) { // Make copy of the VideoInfo inputVi = (VideoInfo*)malloc(sizeof(VideoInfo)); memcpy(inputVi, &vi, sizeof(VideoInfo)); + outputVi = (VideoInfo*)malloc(sizeof(VideoInfo)); + memcpy(outputVi, &vi, sizeof(VideoInfo)); + vi444 = (VideoInfo*)malloc(sizeof(VideoInfo)); + memcpy(vi444, &vi, sizeof(VideoInfo)); mask = 0; - opacity = (int)(256.0*args[ARG_OPACITY].AsDblDef(1.0)+0.5); + opacity_f = (float)args[ARG_OPACITY].AsDblDef(1.0); // rfu, if once overlay gets float support + opacity = (int)(256.0*opacity_f + 0.5); // range is converted to 256 for all all bit_depth offset_x = args[ARG_X].AsInt(0); offset_y = args[ARG_Y].AsInt(0); + if (!args[ARG_OVERLAY].IsClip()) + env->ThrowError("Overlay: Overlay parameter is not a clip"); + overlay = args[ARG_OVERLAY].AsClip(); + overlayVi = overlay->GetVideoInfo(); +#if 0 + // we omit this phase. use ConvertToYUV444 instead of Overlay's Convert444fromXXX functions + // we only use Convert444fromYV24 overlayConv = SelectInputCS(&overlayVi, env, full_range); +#endif +#if 0 // we are converting in GetFrame on-the-fly if (!overlayConv) { AVSValue new_args[3] = { overlay, false, (full_range) ? "PC.601" : "rec601" }; try { - overlay = env->Invoke("ConvertToYV24", AVSValue(new_args, 3)).AsClip(); + overlay = env->Invoke("ConvertToYUV444", AVSValue(new_args, 3)).AsClip(); } catch (...) {} overlayVi = overlay->GetVideoInfo(); +#if 1 + // overlay here is surely 444 format, SelectInputCS will return this: + overlayConv = new Convert444FromYV24(pixelsize, bits_per_pixel); + overlayConv->SetVideoInfo(&overlayVi); +#else overlayConv = SelectInputCS(&overlayVi, env, full_range); +#endif if (!overlayConv) { // ok - now we've tried everything ;) env->ThrowError("Overlay: Overlay image colorspace not supported."); } } +#endif greymask = args[ARG_GREYMASK].AsBool(true); // Grey mask, default true ignore_conditional = args[ARG_IGNORE_CONDITIONAL].AsBool(false); // Don't ignore conditionals by default @@ -118,40 +139,119 @@ GenericVideoFilter(_child) { env->ThrowError("Overlay: Mask and overlay must have the same image size! (Height is not the same)"); } + +#if 0 // we are converting in GetFrame on-the-fly + +#if 0 maskConv = SelectInputCS(&maskVi, env, full_range); +#else + maskConv = nullptr; // we are using avisynth's format converters +#endif if (!maskConv) { AVSValue new_args[3] = { mask, false, (full_range) ? "PC.601" : "rec601" }; try { - mask = env->Invoke((greymask) ? "ConvertToY8" : "ConvertToYV24", AVSValue(new_args, 3)).AsClip(); + mask = env->Invoke((greymask) ? "ConvertToY" : "ConvertToYUV444", AVSValue(new_args, 3)).AsClip(); } catch (...) {} maskVi = mask->GetVideoInfo(); +#if 1 + // overlay here is surely 444 or Y format, SelectInputCS will return either of this: + maskConv = new Convert444FromYV24(); + // no need for new Convert444FromY8() because + // if greyMask, only FromLuma will be called in GetFrame + // they are simple Blts indeed + maskConv->SetVideoInfo(&maskVi); +#else maskConv = SelectInputCS(&maskVi, env, full_range); +#endif if (!maskConv) { env->ThrowError("Overlay: Mask image colorspace not supported."); } } - +#endif } inputCS = vi.pixel_type; + pixelsize = vi.ComponentSize(); + bits_per_pixel = vi.BitsPerComponent(); +#if 0 // we are converting in GetFrame on-the-fly + +#if 0 inputConv = SelectInputCS(inputVi, env, full_range); +#else + inputConv = nullptr; +#endif if (!inputConv) { AVSValue new_args[3] = { child, false, (full_range) ? "PC.601" : "rec601" }; try { - child = env->Invoke("ConvertToYV24", AVSValue(new_args, 3)).AsClip(); + child = env->Invoke("ConvertToYUV444", AVSValue(new_args, 3)).AsClip(); } catch (...) {} vi = child->GetVideoInfo(); memcpy(inputVi, &vi, sizeof(VideoInfo)); +#if 1 + inputConv = new Convert444FromYV24(); +#else inputConv = SelectInputCS(inputVi, env, full_range); +#endif if (!inputConv) { env->ThrowError("Overlay: Colorspace not supported."); } } +#endif +#if 0 outputConv = SelectOutputCS(args[ARG_OUTPUT].AsString(0),env); +#else + vi = child->GetVideoInfo(); + // parse and check output format override vi + const char *output_pixel_format_override = args[ARG_OUTPUT].AsString(0); + if(output_pixel_format_override) { + int output_pixel_type = GetPixelTypeFromName(output_pixel_format_override); + if(output_pixel_type == VideoInfo::CS_UNKNOWN) + env->ThrowError("Overlay: invalid pixel_type!"); + + outputVi->pixel_type = output_pixel_type; // override output pixel format + if(outputVi->BitsPerComponent() != inputVi->BitsPerComponent()) + env->ThrowError("Overlay: output bitdepth should be the same as input's!"); + } + + bool hasAlpha = vi.IsYUVA() || vi.IsPlanarRGBA(); + + // fill yuv 444 template + switch(bits_per_pixel) { + case 8: vi444->pixel_type = hasAlpha ? VideoInfo::CS_YUVA444 : VideoInfo::CS_YV24; break; + case 10: vi444->pixel_type = hasAlpha ? VideoInfo::CS_YUVA444P10 : VideoInfo::CS_YUV444P10; break; + case 12: vi444->pixel_type = hasAlpha ? VideoInfo::CS_YUVA444P12 : VideoInfo::CS_YUV444P12; break; + case 14: vi444->pixel_type = hasAlpha ? VideoInfo::CS_YUVA444P14 : VideoInfo::CS_YUV444P14; break; + case 16: vi444->pixel_type = hasAlpha ? VideoInfo::CS_YUVA444P16 : VideoInfo::CS_YUV444P16; break; + case 32: vi444->pixel_type = hasAlpha ? VideoInfo::CS_YUVA444PS : VideoInfo::CS_YUV444PS; break; + } + + // Set GetFrame's real output format + // We have fast conversions for YV12 and YUY2 + if(pixelsize==1 && outputVi->Is420()) + { + // on-the-fly fast conversion at the end of GetFrame + // check the pixelsize == 1 condition, should be the same as there + switch(bits_per_pixel) { + case 8: vi.pixel_type = hasAlpha ? VideoInfo::CS_YUVA420 : VideoInfo::CS_YV12; break; + case 10: vi.pixel_type = hasAlpha ? VideoInfo::CS_YUVA420P10 : VideoInfo::CS_YUV420P10; break; + case 12: vi.pixel_type = hasAlpha ? VideoInfo::CS_YUVA420P12 : VideoInfo::CS_YUV420P12; break; + case 14: vi.pixel_type = hasAlpha ? VideoInfo::CS_YUVA420P14 : VideoInfo::CS_YUV420P14; break; + case 16: vi.pixel_type = hasAlpha ? VideoInfo::CS_YUVA420P16 : VideoInfo::CS_YUV420P16; break; + case 32: vi.pixel_type = hasAlpha ? VideoInfo::CS_YUVA420PS : VideoInfo::CS_YUV420PS; break; + } + } else if (outputVi->IsYUY2()) + { + // on-the-fly fast conversion at the end of GetFrame + vi.pixel_type = VideoInfo::CS_YUY2; + } else { + vi.pixel_type = vi444->pixel_type; + } + +#endif name = args[ARG_MODE].AsString("Blend"); @@ -160,9 +260,13 @@ GenericVideoFilter(_child) { Overlay::~Overlay() { free(inputVi); + free(outputVi); + free(vi444); +#if 0 delete outputConv; delete inputConv; delete overlayConv; +#endif } PVideoFrame __stdcall Overlay::GetFrame(int n, IScriptEnvironment *env) { @@ -172,51 +276,207 @@ PVideoFrame __stdcall Overlay::GetFrame(int n, IScriptEnvironment *env) { int con_y_offset; FetchConditionals(env, &op_offset, &con_x_offset, &con_y_offset, ignore_conditional); - // Output frame - PVideoFrame f = env->NewVideoFrame(vi); - // Fetch current frame and convert it. +#if 0 + // Fetch current frame and convert it. PVideoFrame frame = child->GetFrame(n, env); +#endif +#ifndef USE_ORIG_FRAME // Image444 initialization - Image444* img = new Image444(vi.width, vi.height, env); - Image444* overlayImg = new Image444(overlayVi.width, overlayVi.height, env); Image444* maskImg = NULL; - if (mask) { - maskImg = new Image444(maskVi.width, maskVi.height, env); + maskImg = new Image444(maskVi.width, maskVi.height, bits_per_pixel, mask->GetVideoInfo().IsYUVA() || mask->GetVideoInfo().IsPlanarRGBA(), env); if (greymask) { maskImg->free_chroma(); maskImg->SetPtr(maskImg->GetPtr(PLANAR_Y), PLANAR_U); maskImg->SetPtr(maskImg->GetPtr(PLANAR_Y), PLANAR_V); } } +#endif - inputConv->ConvertImage(frame, img, env); +#if 1 + // always use avisynth converters + AVSValue child2; + PVideoFrame frame; + if (inputVi->Is444()) + { + frame = child->GetFrame(n, env); + } else if(pixelsize == 1 && inputVi->Is420()) { + // use blazing fast YV12 -> YV24 converter + PVideoFrame frameSrc420 = child->GetFrame(n, env); + frame = env->NewVideoFrame(*vi444); + // no fancy options for chroma resampler, etc.. simply fast + Convert444FromYV12(frameSrc420, frame, pixelsize, bits_per_pixel, env); + // convert frameSrc420 -> frame + } else if(pixelsize == 1 && inputVi->IsYUY2()) { + // use blazing fast YUY2 -> YV24 converter + PVideoFrame frameSrcYUY2 = child->GetFrame(n, env); + frame = env->NewVideoFrame(*vi444); + Convert444FromYUY2(frameSrcYUY2, frame, pixelsize, bits_per_pixel, env); + } else if(inputVi->IsRGB()) { + // generic Avisynth conversion + AVSValue new_args[3] = { child, false, (full_range) ? "PC.601" : "rec601" }; + child2 = env->Invoke("ConvertToYUV444", AVSValue(new_args, 3)).AsClip(); + frame = child2.AsClip()->GetFrame(n, env); + //internal_working_format = child2.AsClip()->GetVideoInfo().pixel_type; + } else { + AVSValue new_args[2] = { child, false}; + child2 = env->Invoke("ConvertToYUV444", AVSValue(new_args, 2)).AsClip(); + frame = child2.AsClip()->GetFrame(n, env); + //internal_working_format = child2.AsClip()->GetVideoInfo().pixel_type; + } + // Fetch current frame and convert it to internal format + Image444* img = new Image444(frame, vi.width, vi.height, bits_per_pixel, child->GetVideoInfo().IsYUVA() || child->GetVideoInfo().IsPlanarRGBA(), env); +#ifndef USE_ORIG_FRAME + CopyToImage444(frame, img, env); +#endif +#else + PVideoFrame frame = child.AsClip()->GetFrame(n, env); + inputConv->ConvertImage(frame, img, env); // always ConvertFrom444 +#endif + +#if 1 + // always use avisynth converters + PVideoFrame Oframe; + AVSValue overlay2; + + Image444* maskImg = NULL; + + if(overlayVi.Is444() +// || (overlayVi.pixel_type == internal_working_format) + ) + // don't convert is input and overlay is the same formats + // so we can work in YUV420 or YUV422 directly besides YUV444 + { + Oframe = overlay->GetFrame(n, env); + } else if(pixelsize == 1 && overlayVi.Is420()) { + // use blazing fast YV12 -> YV24 converter + PVideoFrame frameSrc420 = overlay->GetFrame(n, env); + Oframe = env->NewVideoFrame(*vi444); + // no fancy options for chroma resampler, etc.. simply fast + Convert444FromYV12(frameSrc420, Oframe, pixelsize, bits_per_pixel, env); + // convert frameSrc420 -> frame + } else if(pixelsize == 1 && overlayVi.IsYUY2()) { + // use blazing fast YUY2 -> YV24 converter + PVideoFrame frameSrcYUY2 = overlay->GetFrame(n, env); + Oframe = env->NewVideoFrame(*vi444); + Convert444FromYUY2(frameSrcYUY2, Oframe, pixelsize, bits_per_pixel, env); + } else if(overlayVi.IsRGB()) { + AVSValue new_args[3] = { overlay, false, (full_range) ? "PC.601" : "rec601" }; + overlay2 = env->Invoke("ConvertToYUV444", AVSValue(new_args, 3)).AsClip(); + Oframe = overlay2.AsClip()->GetFrame(n, env); + } else { + AVSValue new_args[2] = { overlay, false }; + overlay2 = env->Invoke("ConvertToYUV444", AVSValue(new_args, 2)).AsClip(); + Oframe = overlay2.AsClip()->GetFrame(n, env); + } + // Fetch current overlay and convert it to internal format + Image444* overlayImg = new Image444(Oframe, overlayVi.width, overlayVi.height, bits_per_pixel, overlay->GetVideoInfo().IsYUVA() || overlay->GetVideoInfo().IsPlanarRGBA(), env); + #ifndef USE_ORIG_FRAME + CopyToImage444(Oframe, overlayImg, env); + #endif +#else // Fetch current overlay and convert it PVideoFrame Oframe = overlay->GetFrame(n, env); overlayConv->ConvertImage(Oframe, overlayImg, env); +#endif // Clip overlay to original image ClipFrames(img, overlayImg, offset_x + con_x_offset, offset_y + con_y_offset); if (overlayImg->IsSizeZero()) { // Nothing to overlay +#ifndef USE_ORIG_FRAME // Convert output image back img->ReturnOriginal(true); overlayImg->ReturnOriginal(true); // Convert output image back - f = outputConv->ConvertImage(img, f, env); +#if 1 + frameOutput = Convert444ToOriginal(&vi, // original or overridden format + img, // Image444 * (source) + frameOutput, // PVideoFrame target, + full_range, // for RGB + env + ); +#else + frameOutput = outputConv->ConvertImage(img, frameOutput, pixelsize, bits_per_pixel, env); +#endif +#else + // frame remains as-is +#endif } else { + // from Avisynth's doc + /* + Inputting RGB for mask clip + An RGB mask clip may behave a bit oddly if it contains color information. + If you use a greyscale mask, or if you leave greymask=true, you will get the result you would expect. + Note that mask values are never scaled, so it will automatically be in 0-255 range, directly copied from the RGB values. + */ + // This last sentence is not true. Mask is converted from RGB the same way as input and overlay clips. + // fetch current mask (if given) if (mask) { - PVideoFrame Mframe = mask->GetFrame(n, env); - if (greymask) - maskConv->ConvertImageLumaOnly(Mframe, maskImg, env); - else - maskConv->ConvertImage(Mframe, maskImg, env); +#if 1 + AVSValue mask2; + PVideoFrame Mframe; + + if(maskVi.Is444() || (greymask && maskVi.IsY())) { + Mframe = mask->GetFrame(n, env); + } else if(pixelsize == 1 && maskVi.Is420()) { + // use blazing fast YV12 -> YV24 converter + PVideoFrame frameSrc420 = mask->GetFrame(n, env); + Mframe = env->NewVideoFrame(*vi444); + // no fancy options for chroma resampler, etc.. simply fast + Convert444FromYV12(frameSrc420, Mframe, pixelsize, bits_per_pixel, env); + // convert frameSrc420 -> frame + } else if(pixelsize == 1 && maskVi.IsYUY2()) { + // use blazing fast YUY2 -> YV24 converter + PVideoFrame frameSrcYUY2 = mask->GetFrame(n, env); + Mframe = env->NewVideoFrame(*vi444); + Convert444FromYUY2(frameSrcYUY2, Mframe, pixelsize, bits_per_pixel, env); + } else if(maskVi.IsRGB()) { + if(greymask) { + AVSValue new_args[2] = { mask, (full_range) ? "PC.601" : "rec601" }; + mask2 = env->Invoke("ConvertToY", AVSValue(new_args, 2)).AsClip(); + } else { + AVSValue new_args[3] = { mask, false, (full_range) ? "PC.601" : "rec601" }; + mask2 = env->Invoke("ConvertToYUV444", AVSValue(new_args, 3)).AsClip(); + } + Mframe = mask2.AsClip()->GetFrame(n, env); + } else { + if(greymask) { + AVSValue new_args[2] = { mask }; + mask2 = env->Invoke("ConvertToY", AVSValue(new_args, 1)).AsClip(); + } else { + AVSValue new_args[2] = { mask, false }; + mask2 = env->Invoke("ConvertToYUV444", AVSValue(new_args, 2)).AsClip(); + } + Mframe = mask2.AsClip()->GetFrame(n, env); + } + maskImg = new Image444(Mframe, maskVi.width, maskVi.height, bits_per_pixel, mask->GetVideoInfo().IsYUVA() || mask->GetVideoInfo().IsPlanarRGBA(), env); + if (greymask) { +#ifndef USE_ORIG_FRAME + maskImg->free_chroma(); +#endif + maskImg->SetPtr(maskImg->GetPtr(PLANAR_Y), PLANAR_U); + maskImg->SetPtr(maskImg->GetPtr(PLANAR_Y), PLANAR_V); + } + #ifndef USE_ORIG_FRAME + if (greymask) + CopyToImage444LumaOnly(Mframe, maskImg, env); + else + CopyToImage444(Mframe, maskImg, env); + #endif +#else + PVideoFrame Mframe = mask->GetFrame(n, env); + if (greymask) + maskConv->ConvertImageLumaOnly(Mframe, maskImg, env); // uses Convert444FromYV24's method, for luma it is the same as Convert444FromY8 + else + maskConv->ConvertImage(Mframe, maskImg, env); +#endif img->ReturnOriginal(true); ClipFrames(img, maskImg, offset_x + con_x_offset, offset_y + con_y_offset); @@ -225,13 +485,18 @@ PVideoFrame __stdcall Overlay::GetFrame(int n, IScriptEnvironment *env) { OverlayFunction* func = SelectFunction(name, env); // Process the image + func->setBitsPerPixel(bits_per_pixel); func->setOpacity(opacity + op_offset); func->setEnv(env); +#ifdef USE_ORIG_FRAME + env->MakeWritable(&frame); // == PVideoFrame &img->frame +#else +#endif if (!mask) { - func->BlendImage(img, overlayImg); + func->DoBlendImage(img, overlayImg); } else { - func->BlendImageMask(img, overlayImg, maskImg); + func->DoBlendImageMask(img, overlayImg, maskImg); } delete func; @@ -242,9 +507,31 @@ PVideoFrame __stdcall Overlay::GetFrame(int n, IScriptEnvironment *env) { if (mask) maskImg->ReturnOriginal(true); - f = outputConv->ConvertImage(img, f, env); - +#ifndef USE_ORIG_FRAME +#if 1 + frameOutput = Convert444ToOriginal(&vi, // original or overridden format + img, // Image444 * (source) + frameOutput, // PVideoFrame target, + full_range, // for RGB + env + ); +#else + f = outputConv->ConvertImage(img, f, pixelsize, bits_per_pixel, env); +#endif +#endif } + + // here img->frame is 444 + // apply fast conversion + if((pixelsize==1) && outputVi->Is420()) + { + PVideoFrame outputFrame = env->NewVideoFrame(*outputVi); + Convert444ToYV12(frame, outputFrame, pixelsize, bits_per_pixel, env); + } else if(outputVi->IsYUY2()) { + PVideoFrame outputFrame = env->NewVideoFrame(*outputVi); + Convert444ToYUY2(frame, outputFrame, pixelsize, bits_per_pixel, env); + } + // all other cases return 4:4:4 // Cleanup if (mask) { @@ -260,8 +547,11 @@ PVideoFrame __stdcall Overlay::GetFrame(int n, IScriptEnvironment *env) { img->free_all(); delete img; } - - return f; +#ifndef USE_ORIG_FRAME + return frameOutput; +#else + return frame; +#endif } @@ -319,17 +609,18 @@ OverlayFunction* Overlay::SelectFunction(const char* name, IScriptEnvironment* e // matching the current VideoInfo (vi) //////////////////////////////// +#if 0 ConvertFrom444* Overlay::SelectOutputCS(const char* name, IScriptEnvironment* env) { if (!name) { - if (vi.IsYV12()) { + if (vi.Is420()) { return new Convert444ToYV12(); - } else if (vi.IsYV24()) { + } else if (vi.Is444()) { return new Convert444ToYV24(); - } else if (vi.IsY8()) { + } else if (vi.IsY()) { return new Convert444ToY8(); } else if (vi.IsYUY2()) { return new Convert444ToYUY2(); - } else if (vi.IsRGB()) { + } else if (vi.IsRGB() && !vi.IsPlanarRGB() && !vi.IsPlanarRGBA()) { if (full_range) { return new Convert444NonCCIRToRGB(); } @@ -345,49 +636,118 @@ ConvertFrom444* Overlay::SelectOutputCS(const char* name, IScriptEnvironment* en } if (!lstrcmpi(name, "YUY2")) { + if(pixelsize != 1) + env->ThrowError("Overlay: Source must be 8 bits."); vi.pixel_type = VideoInfo::CS_YUY2; return new Convert444ToYUY2(); } if (!lstrcmpi(name, "YV12")) { + if(pixelsize != 1) + env->ThrowError("Overlay: Source must be 8 bits."); vi.pixel_type = VideoInfo::CS_YV12; return new Convert444ToYV12(); } if (!lstrcmpi(name, "YV24")) { + if(pixelsize != 1) + env->ThrowError("Overlay: Source must be 8 bits."); vi.pixel_type = VideoInfo::CS_YV24; return new Convert444ToYV24(); } if (!lstrcmpi(name, "Y8")) { + if(pixelsize != 1) + env->ThrowError("Overlay: Source must be 8 bits."); vi.pixel_type = VideoInfo::CS_Y8; return new Convert444ToY8(); } + if (!lstrcmpi(name, "YUV420")) { + switch(bits_per_pixel) { + case 8: vi.pixel_type = VideoInfo::CS_YV12; break; + case 10: vi.pixel_type = VideoInfo::CS_YUV420P10; break; + case 12: vi.pixel_type = VideoInfo::CS_YUV420P12; break; + case 14: vi.pixel_type = VideoInfo::CS_YUV420P14; break; + case 16: vi.pixel_type = VideoInfo::CS_YUV420P16; break; + case 32: vi.pixel_type = VideoInfo::CS_YUV420PS; break; + } + return new Convert444ToYV12(pixelsize, bits_per_pixel); + } + + if (!lstrcmpi(name, "YUV444")) { + switch(bits_per_pixel) { + case 8: vi.pixel_type = VideoInfo::CS_YV24; break; + case 10: vi.pixel_type = VideoInfo::CS_YUV444P10; break; + case 12: vi.pixel_type = VideoInfo::CS_YUV444P12; break; + case 14: vi.pixel_type = VideoInfo::CS_YUV444P14; break; + case 16: vi.pixel_type = VideoInfo::CS_YUV444P16; break; + case 32: vi.pixel_type = VideoInfo::CS_YUV444PS; break; + } + return new Convert444ToYV24(pixelsize, bits_per_pixel); + } + + if (!lstrcmpi(name, "Y")) { + switch(bits_per_pixel) { + case 8: vi.pixel_type = VideoInfo::CS_Y8; break; + case 10: vi.pixel_type = VideoInfo::CS_Y10; break; + case 12: vi.pixel_type = VideoInfo::CS_Y12; break; + case 14: vi.pixel_type = VideoInfo::CS_Y14; break; + case 16: vi.pixel_type = VideoInfo::CS_Y16; break; + case 32: vi.pixel_type = VideoInfo::CS_Y32; break; + } + return new Convert444ToY8(pixelsize, bits_per_pixel); + } + if (!lstrcmpi(name, "RGB")) { + if(pixelsize != 1) + env->ThrowError("Overlay: Source must be 8 bits."); vi.pixel_type = VideoInfo::CS_BGR32; if (full_range) - return new Convert444NonCCIRToRGB(); - return new Convert444ToRGB(); + return new Convert444NonCCIRToRGB(pixelsize, bits_per_pixel); + return new Convert444ToRGB(pixelsize, bits_per_pixel); } if (!lstrcmpi(name, "RGB32")) { + if(pixelsize != 1) + env->ThrowError("Overlay: Source must be 8 bits."); vi.pixel_type = VideoInfo::CS_BGR32; if (full_range) - return new Convert444NonCCIRToRGB(); - return new Convert444ToRGB(); + return new Convert444NonCCIRToRGB(pixelsize, bits_per_pixel); + return new Convert444ToRGB(pixelsize, bits_per_pixel); + } + + if (!lstrcmpi(name, "RGB64")) { + if(pixelsize != 2) + env->ThrowError("Overlay: Source must be 16 bits."); + vi.pixel_type = VideoInfo::CS_BGR64; + if (full_range) + return new Convert444NonCCIRToRGB(pixelsize, bits_per_pixel); + return new Convert444ToRGB(pixelsize, bits_per_pixel); } if (!lstrcmpi(name, "RGB24")) { + if(pixelsize != 1) + env->ThrowError("Overlay: Source must be 8 bits."); vi.pixel_type = VideoInfo::CS_BGR24; if (full_range) - return new Convert444NonCCIRToRGB(); - return new Convert444ToRGB(); + return new Convert444NonCCIRToRGB(pixelsize, bits_per_pixel); + return new Convert444ToRGB(pixelsize, bits_per_pixel); + } + + if (!lstrcmpi(name, "RGB48")) { + if(pixelsize != 2) + env->ThrowError("Overlay: Source must be 16 bits."); + vi.pixel_type = VideoInfo::CS_BGR48; + if (full_range) + return new Convert444NonCCIRToRGB(pixelsize, bits_per_pixel); + return new Convert444ToRGB(pixelsize, bits_per_pixel); } env->ThrowError("Overlay: Invalid 'output' colorspace specified."); return 0; } +#endif /////////////////////////////////// // Note: Instead of throwing an @@ -396,35 +756,38 @@ ConvertFrom444* Overlay::SelectOutputCS(const char* name, IScriptEnvironment* en // for more accurate error reporting /////////////////////////////////// +#if 0 +// let's use Avisynth's converters + ConvertTo444* Overlay::SelectInputCS(VideoInfo* VidI, IScriptEnvironment* env, bool full_range) { - if (VidI->IsYV12()) { - ConvertTo444* c = new Convert444FromYV12(); + if (VidI->Is420()) { + ConvertTo444* c = new Convert444FromYV12(pixelsize, bits_per_pixel); c->SetVideoInfo(VidI); return c; - } else if (VidI->IsYV24()) { - ConvertTo444* c = new Convert444FromYV24(); + } else if (VidI->Is444()) { + ConvertTo444* c = new Convert444FromYV24(pixelsize, bits_per_pixel); c->SetVideoInfo(VidI); return c; - } else if (VidI->IsY8()) { - ConvertTo444* c = new Convert444FromY8(); + } else if (VidI->IsY()) { + ConvertTo444* c = new Convert444FromY8(pixelsize, bits_per_pixel); c->SetVideoInfo(VidI); return c; } else if (VidI->IsYUY2()) { - ConvertTo444* c = new Convert444FromYUY2(); + ConvertTo444* c = new Convert444FromYUY2(pixelsize, bits_per_pixel); c->SetVideoInfo(VidI); return c; - } else if (VidI->IsRGB()) { + } else if (VidI->IsRGB() && !VidI->IsPlanarRGB() && !VidI->IsPlanarRGBA()) { ConvertTo444* c; if (full_range) - c = new Convert444NonCCIRFromRGB(); + c = new Convert444NonCCIRFromRGB(pixelsize, bits_per_pixel); else - c = new Convert444FromRGB(); + c = new Convert444FromRGB(pixelsize, bits_per_pixel); c->SetVideoInfo(VidI); return c; } return 0; } - +#endif void Overlay::ClipFrames(Image444* input, Image444* overlay, int x, int y) { @@ -485,5 +848,65 @@ void Overlay::FetchConditionals(IScriptEnvironment* env, int* op_offset, int* co AVSValue __cdecl Overlay::Create(AVSValue args, void*, IScriptEnvironment* env) { - return new Overlay(args[0].AsClip(), args, env); + //return new Overlay(args[0].AsClip(), args, env); + + Overlay* Result = new Overlay(args[0].AsClip(), args, env); + if (Result->GetVideoInfo().pixel_type == Result->outputVi->pixel_type) + return Result; + // c[interlaced]b[matrix]s[ChromaInPlacement]s[chromaresample]s + // chromaresample = 'bicubic' default + // chromaresample = 'point' is faster + if(Result->outputVi->Is444()) { + // never can be + AVSValue new_args[2] = { Result, false}; + return env->Invoke("ConvertToYUV444", AVSValue(new_args, 2)).AsClip(); + } + if(Result->outputVi->Is422()) { + AVSValue new_args[2] = { Result, false}; + return env->Invoke("ConvertToYUV422", AVSValue(new_args, 2)).AsClip(); + } + if(Result->outputVi->Is420()) { + AVSValue new_args[2] = { Result, false}; + return env->Invoke("ConvertToYUV420", AVSValue(new_args, 2)).AsClip(); + // old overlay 444->YV12 direct converter is much faster than avisynth's internal converter + // because it simply averages chroma and put it back directly + // Avisynth's version is generic, uses generic resamplers for chroma + /* + AVSValue new_args[5] = { Result, "bicubic"}; + static const char* const arg_names[2] = { 0, "chromaresample" }; + return env->Invoke("ConvertToYUV420", AVSValue(new_args, 2), arg_names).AsClip(); + */ + } + if(Result->outputVi->IsYUY2()) { + AVSValue new_args[2] = { Result, false}; + return env->Invoke("ConvertToYUY2", AVSValue(new_args, 2)).AsClip(); + } + if(Result->outputVi->IsY()) { + AVSValue new_args[1] = { Result}; + return env->Invoke("ConvertToY", AVSValue(new_args, 1)).AsClip(); + } + if(Result->outputVi->IsRGB()) { + // c[matrix]s[interlaced]b[ChromaInPlacement]s[chromaresample]s + AVSValue new_args[3] = { Result, (Result->full_range) ? "PC.601" : "rec601", false}; + if(Result->outputVi->IsPlanarRGB()) { + return env->Invoke("ConvertToPlanarRGB", AVSValue(new_args, 3)).AsClip(); + } + if(Result->outputVi->IsPlanarRGBA()) { + return env->Invoke("ConvertToPlanarRGBA", AVSValue(new_args, 3)).AsClip(); + } + if(Result->outputVi->IsRGB24()) { + return env->Invoke("ConvertToRGB24", AVSValue(new_args, 3)).AsClip(); + } + if(Result->outputVi->IsRGB32()) { + return env->Invoke("ConvertToRGB32", AVSValue(new_args, 3)).AsClip(); + } + if(Result->outputVi->IsRGB48()) { + return env->Invoke("ConvertToRGB48", AVSValue(new_args, 3)).AsClip(); + } + if(Result->outputVi->IsRGB64()) { + return env->Invoke("ConvertToRGB64", AVSValue(new_args, 3)).AsClip(); + } + } + env->ThrowError("Overlay: Invalid output format."); + return Result; } diff --git a/avs_core/filters/overlay/overlay.h b/avs_core/filters/overlay/overlay.h index ec93a6cfe..9389dcbcb 100644 --- a/avs_core/filters/overlay/overlay.h +++ b/avs_core/filters/overlay/overlay.h @@ -61,29 +61,42 @@ class Overlay : public GenericVideoFilter private: static OverlayFunction* SelectFunction(const char* name, IScriptEnvironment* env); +#if 0 ConvertFrom444* SelectOutputCS(const char* name, IScriptEnvironment* env); static ConvertTo444* SelectInputCS(VideoInfo* VidI, IScriptEnvironment* env, bool full_range); +#endif static void ClipFrames(Image444* input, Image444* overlay, int x, int y); static void FetchConditionals(IScriptEnvironment* env, int*, int*, int*, bool); VideoInfo overlayVi; VideoInfo maskVi; VideoInfo* inputVi; + VideoInfo* outputVi; + VideoInfo* vi444; + // AVS+: these are obsolate +#if 0 ConvertFrom444* outputConv; + ConvertTo444* inputConv; ConvertTo444* overlayConv; ConvertTo444* maskConv; +#endif + PClip overlay; PClip mask; int opacity; + float opacity_f; bool greymask; bool ignore_conditional; bool full_range; int offset_x, offset_y; int inputCS; - const char* name; + const char* name; // Blend parameter + + int pixelsize; + int bits_per_pixel; }; diff --git a/avs_core/filters/overlay/overlayfunctions.h b/avs_core/filters/overlay/overlayfunctions.h index 4c5877ed9..ed8eacd22 100644 --- a/avs_core/filters/overlay/overlayfunctions.h +++ b/avs_core/filters/overlay/overlayfunctions.h @@ -48,74 +48,124 @@ class OverlayFunction { } void setOpacity(int _opacity) { opacity = clamp(_opacity,0,256); inv_opacity = 256-opacity; } void setEnv(IScriptEnvironment *_env) { env = _env;} - virtual void BlendImage(Image444* base, Image444* overlay) = 0; - virtual void BlendImageMask(Image444* base, Image444* overlay, Image444* mask) = 0; + void setBitsPerPixel(int _bits_per_pixel) { bits_per_pixel = _bits_per_pixel; } + virtual void DoBlendImage(Image444* base, Image444* overlay) = 0; + virtual void DoBlendImageMask(Image444* base, Image444* overlay, Image444* mask) = 0; protected: int opacity; int inv_opacity; + int bits_per_pixel; IScriptEnvironment *env; }; class OL_BlendImage : public OverlayFunction { + void DoBlendImage(Image444* base, Image444* overlay); + void DoBlendImageMask(Image444* base, Image444* overlay, Image444* mask); + template void BlendImage(Image444* base, Image444* overlay); + template void BlendImageMask(Image444* base, Image444* overlay, Image444* mask); private: }; class OL_AddImage : public OverlayFunction { - void BlendImage(Image444* base, Image444* overlay); + void DoBlendImage(Image444* base, Image444* overlay); + void DoBlendImageMask(Image444* base, Image444* overlay, Image444* mask); + //template + //void BlendImage(Image444* base, Image444* overlay); + template void BlendImageMask(Image444* base, Image444* overlay, Image444* mask); }; class OL_SubtractImage : public OverlayFunction { + void DoBlendImage(Image444* base, Image444* overlay); + void DoBlendImageMask(Image444* base, Image444* overlay, Image444* mask); + template void BlendImage(Image444* base, Image444* overlay); + template void BlendImageMask(Image444* base, Image444* overlay, Image444* mask); }; class OL_MultiplyImage : public OverlayFunction { + void DoBlendImage(Image444* base, Image444* overlay); + void DoBlendImageMask(Image444* base, Image444* overlay, Image444* mask); + template void BlendImage(Image444* base, Image444* overlay); + template void BlendImageMask(Image444* base, Image444* overlay, Image444* mask); }; class OL_BlendLumaImage : public OverlayFunction { + void DoBlendImage(Image444* base, Image444* overlay); + void DoBlendImageMask(Image444* base, Image444* overlay, Image444* mask); + template void BlendImage(Image444* base, Image444* overlay); + template void BlendImageMask(Image444* base, Image444* overlay, Image444* mask); private: }; class OL_BlendChromaImage : public OverlayFunction { + void DoBlendImage(Image444* base, Image444* overlay); + void DoBlendImageMask(Image444* base, Image444* overlay, Image444* mask); + template void BlendImage(Image444* base, Image444* overlay); + template void BlendImageMask(Image444* base, Image444* overlay, Image444* mask); private: }; class OL_LightenImage : public OverlayFunction { + void DoBlendImage(Image444* base, Image444* overlay); + void DoBlendImageMask(Image444* base, Image444* overlay, Image444* mask); + template void BlendImage(Image444* base, Image444* overlay); + template void BlendImageMask(Image444* base, Image444* overlay, Image444* mask); }; class OL_DarkenImage : public OverlayFunction { + void DoBlendImage(Image444* base, Image444* overlay); + void DoBlendImageMask(Image444* base, Image444* overlay, Image444* mask); + template void BlendImage(Image444* base, Image444* overlay); + template void BlendImageMask(Image444* base, Image444* overlay, Image444* mask); }; class OL_SoftLightImage : public OverlayFunction { + void DoBlendImage(Image444* base, Image444* overlay); + void DoBlendImageMask(Image444* base, Image444* overlay, Image444* mask); + template void BlendImage(Image444* base, Image444* overlay); + template void BlendImageMask(Image444* base, Image444* overlay, Image444* mask); }; class OL_HardLightImage : public OverlayFunction { + void DoBlendImage(Image444* base, Image444* overlay); + void DoBlendImageMask(Image444* base, Image444* overlay, Image444* mask); + template void BlendImage(Image444* base, Image444* overlay); + template void BlendImageMask(Image444* base, Image444* overlay, Image444* mask); }; class OL_DifferenceImage : public OverlayFunction { + void DoBlendImage(Image444* base, Image444* overlay); + void DoBlendImageMask(Image444* base, Image444* overlay, Image444* mask); + template void BlendImage(Image444* base, Image444* overlay); + template void BlendImageMask(Image444* base, Image444* overlay, Image444* mask); }; class OL_ExclusionImage : public OverlayFunction { + void DoBlendImage(Image444* base, Image444* overlay); + void DoBlendImageMask(Image444* base, Image444* overlay, Image444* mask); + template void BlendImage(Image444* base, Image444* overlay); + template void BlendImageMask(Image444* base, Image444* overlay, Image444* mask); }; From 3b603a273e15002d146124fdf1608ae12156e0ac Mon Sep 17 00:00:00 2001 From: Pinterf Date: Tue, 13 Sep 2016 09:01:24 +0200 Subject: [PATCH 068/120] AddBorders: 10-12-14 bit aware, stretch 8 bit colors for RGB (10-16) --- avs_core/filters/transform.cpp | 37 +++++++++++++++++----------------- 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/avs_core/filters/transform.cpp b/avs_core/filters/transform.cpp index d1221b38e..e9345ea8a 100644 --- a/avs_core/filters/transform.cpp +++ b/avs_core/filters/transform.cpp @@ -387,15 +387,15 @@ AddBorders::AddBorders(int _left, int _top, int _right, int _bot, int _clr, PCli } template -static inline pixel_t GetHbdColorFromByte(uint8_t color) +static inline pixel_t GetHbdColorFromByte(uint8_t color, bool fullscale, int bits_per_pixel) { if (sizeof(pixel_t) == 1) return color; - else if (sizeof(pixel_t) == 2) return (pixel_t)color * 256; + else if (sizeof(pixel_t) == 2) return (pixel_t)(fullscale ? (color * ((1 << bits_per_pixel)-1)) / 255 : (int)color << (bits_per_pixel - 8)); else return (pixel_t)color / 256; // float, scale to [0..1) 128=0.5f } template -static void addborders_planar(PVideoFrame &dst, PVideoFrame &src, VideoInfo &vi, int top, int bot, int left, int right, int rgbcolor, bool isYUV) +static void addborders_planar(PVideoFrame &dst, PVideoFrame &src, VideoInfo &vi, int top, int bot, int left, int right, int rgbcolor, bool isYUV, int bits_per_pixel) { const unsigned int colr = isYUV ? RGB2YUV(rgbcolor) : rgbcolor; const unsigned char YBlack=(unsigned char)((colr >> 16) & 0xff); @@ -426,7 +426,7 @@ static void addborders_planar(PVideoFrame &dst, PVideoFrame &src, VideoInfo &vi, const int final_black = (bot >> ysub) * dst_pitch + vi.BytesFromPixels(right >> xsub) + (dst_pitch - dst->GetRowSize(plane)); - pixel_t current_color = GetHbdColorFromByte(colors[p]); + pixel_t current_color = GetHbdColorFromByte(colors[p], !isYUV, bits_per_pixel); BYTE *dstp = dst->GetWritePtr(plane); // copy original @@ -455,12 +455,13 @@ PVideoFrame AddBorders::GetFrame(int n, IScriptEnvironment* env) PVideoFrame dst = env->NewVideoFrame(vi); if (vi.IsPlanar()) { + int bits_per_pixel = vi.BitsPerComponent(); bool isYUV = vi.IsYUV() || vi.IsYUVA(); switch(vi.ComponentSize()) { - case 1: addborders_planar(dst, src, vi, top, bot, left, right, clr, isYUV); break; - case 2: addborders_planar(dst, src, vi, top, bot, left, right, clr, isYUV); break; + case 1: addborders_planar(dst, src, vi, top, bot, left, right, clr, isYUV, bits_per_pixel); break; + case 2: addborders_planar(dst, src, vi, top, bot, left, right, clr, isYUV, bits_per_pixel); break; default: //case 4: - addborders_planar(dst, src, vi, top, bot, left, right, clr, isYUV); break; + addborders_planar(dst, src, vi, top, bot, left, right, clr, isYUV, bits_per_pixel); break; } return dst; } @@ -549,10 +550,10 @@ PVideoFrame AddBorders::GetFrame(int n, IScriptEnvironment* env) *(unsigned __int32*)(dstp+i) = clr; } } else if (vi.IsRGB48()) { - const uint16_t clr0 = GetHbdColorFromByte(clr & 0xFF); + const uint16_t clr0 = GetHbdColorFromByte(clr & 0xFF, true, 16); uint32_t clr1 = - ((uint32_t)GetHbdColorFromByte((clr >> 16) & 0xFF) << (8 * 2)) + - ((uint32_t)GetHbdColorFromByte((clr >> 8) & 0xFF)); + ((uint32_t)GetHbdColorFromByte((clr >> 16) & 0xFF, true, 16) << (8 * 2)) + + ((uint32_t)GetHbdColorFromByte((clr >> 8) & 0xFF, true, 16)); const int leftbytes = vi.BytesFromPixels(left); const int leftrow = src_row_size + leftbytes; const int rightbytes = vi.BytesFromPixels(right); @@ -590,10 +591,10 @@ PVideoFrame AddBorders::GetFrame(int n, IScriptEnvironment* env) BitBlt(dstp+initial_black, dst_pitch, srcp, src_pitch, src_row_size, src_height); uint64_t clr64 = - ((uint64_t)GetHbdColorFromByte((clr >> 24) & 0xFF) << (24 * 2)) + - ((uint64_t)GetHbdColorFromByte((clr >> 16) & 0xFF) << (16 * 2)) + - ((uint64_t)GetHbdColorFromByte((clr >> 8) & 0xFF) << (8 * 2)) + - ((uint64_t)GetHbdColorFromByte((clr) & 0xFF)); + ((uint64_t)GetHbdColorFromByte((clr >> 24) & 0xFF, true, 16) << (24 * 2)) + + ((uint64_t)GetHbdColorFromByte((clr >> 16) & 0xFF, true, 16) << (16 * 2)) + + ((uint64_t)GetHbdColorFromByte((clr >> 8) & 0xFF, true, 16) << (8 * 2)) + + ((uint64_t)GetHbdColorFromByte((clr) & 0xFF, true, 16)); for (int i = 0; iGetHeight(PLANAR_Y); Ydata = &Ydata[src->GetRowSize(PLANAR_Y)-1]; - {for (int y=0; yGetPitch(PLANAR_Y); - }} + } fillp=src->GetRowSize(PLANAR_U_ALIGNED) - src->GetRowSize(PLANAR_U); Udata = &Udata[src->GetRowSize(PLANAR_U)-1]; Vdata = &Vdata[src->GetRowSize(PLANAR_V)-1]; h=src->GetHeight(PLANAR_U); - {for (int y=0; yGetPitch(PLANAR_U); Vdata+=src->GetPitch(PLANAR_V); - }} + } return src; } From c7c585cad6a633be9c37bb41143f4466a5a03b5c Mon Sep 17 00:00:00 2001 From: Pinterf Date: Tue, 13 Sep 2016 09:01:52 +0200 Subject: [PATCH 069/120] BlankClip: 10-12-14 bit aware --- avs_core/filters/source.cpp | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/avs_core/filters/source.cpp b/avs_core/filters/source.cpp index cb11385d7..838a70788 100644 --- a/avs_core/filters/source.cpp +++ b/avs_core/filters/source.cpp @@ -89,12 +89,14 @@ static PVideoFrame CreateBlankFrame(const VideoInfo& vi, int color, int mode, IS PVideoFrame frame = env->NewVideoFrame(vi); - // RGB 8->16 bit: not << 8 like YUV but 0..255 -> 0..65535 - auto rgbcolor8to16 = [](uint8_t color8) { return (uint16_t)color8 * 257; }; + // RGB 8->16 bit: not << 8 like YUV but 0..255 -> 0..65535 or 0..1023 for 10 bit + int pixelsize = vi.ComponentSize(); + int bits_per_pixel = vi.BitsPerComponent(); + int max_pixel_value = (1 << bits_per_pixel) - 1; + auto rgbcolor8to16 = [](uint8_t color8, int max_pixel_value) { return (uint16_t)(color8 * max_pixel_value / 255); }; - if (vi.IsPlanar()) { - int pixelsize = vi.ComponentSize(); + if (vi.IsPlanar()) { int color_yuv = (mode == COLOR_MODE_YUV) ? color : RGB2YUV(color); @@ -120,6 +122,8 @@ static PVideoFrame CreateBlankFrame(const VideoInfo& vi, int color, int mode, IS case PLANAR_U: Cval.i = (color_yuv >> 8) & 0xff; break; case PLANAR_V: Cval.i = color_yuv & 0xff; break; } + if(bits_per_pixel != 32) + Cval.i = Cval.i << (bits_per_pixel - 8); } else { // planar RGB switch(plane) { @@ -128,6 +132,8 @@ static PVideoFrame CreateBlankFrame(const VideoInfo& vi, int color, int mode, IS case PLANAR_G: Cval.i = (color >> 8) & 0xff; break; case PLANAR_B: Cval.i = color & 0xff; break; } + if(bits_per_pixel != 32) + Cval.i = rgbcolor8to16(Cval.i, max_pixel_value); } @@ -136,7 +142,7 @@ static PVideoFrame CreateBlankFrame(const VideoInfo& vi, int color, int mode, IS switch(pixelsize) { case 1: Cval.i |= (Cval.i << 8) | (Cval.i << 16) | (Cval.i << 24); break; // 4 pixels at a time - case 2: Cval.i = (Cval.i << 8) | (Cval.i << 24); break; // 2 pixels at a time + case 2: Cval.i |= (Cval.i << 16); break; // 2 pixels at a time default: // case 4: Cval.f = float(Cval.i) / 256.0f; // 32 bit float 128=0.5 } @@ -170,9 +176,9 @@ static PVideoFrame CreateBlankFrame(const VideoInfo& vi, int color, int mode, IS for (int i=0; i> 16) & 0xFF); - uint16_t g = rgbcolor8to16((color >> 8 ) & 0xFF); + const uint16_t clr0 = rgbcolor8to16(color & 0xFF, max_pixel_value); + uint16_t r = rgbcolor8to16((color >> 16) & 0xFF, max_pixel_value); + uint16_t g = rgbcolor8to16((color >> 8 ) & 0xFF, max_pixel_value); const uint32_t clr1 = (r << 16) + (g); const int gr = frame->GetRowSize() / sizeof(uint16_t); const int gp = frame->GetPitch() / sizeof(uint16_t); @@ -185,10 +191,10 @@ static PVideoFrame CreateBlankFrame(const VideoInfo& vi, int color, int mode, IS p16 += gp; } } else if (vi.IsRGB64()) { - uint64_t r = rgbcolor8to16((color >> 16) & 0xFF); - uint64_t g = rgbcolor8to16((color >> 8 ) & 0xFF); - uint64_t b = rgbcolor8to16((color ) & 0xFF); - uint64_t a = rgbcolor8to16((color >> 24) & 0xFF); + uint64_t r = rgbcolor8to16((color >> 16) & 0xFF, max_pixel_value); + uint64_t g = rgbcolor8to16((color >> 8 ) & 0xFF, max_pixel_value); + uint64_t b = rgbcolor8to16((color ) & 0xFF, max_pixel_value); + uint64_t a = rgbcolor8to16((color >> 24) & 0xFF, max_pixel_value); uint64_t color64 = (a << 48) + (r << 32) + (g << 16) + (b); std::fill_n(reinterpret_cast(p), size / sizeof(uint64_t), color64); } From f15cb76eaffb565f98f2cbd43e3bf377ec1cfef2 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Wed, 14 Sep 2016 13:58:33 +0200 Subject: [PATCH 070/120] Tweak: Luma LUT for 10-16 bits, Chroma LUT for 10 bit (old: LUT 8 bit only) --- avs_core/filters/levels.cpp | 575 ++++++++++++++++++++++-------------- avs_core/filters/levels.h | 38 ++- 2 files changed, 391 insertions(+), 222 deletions(-) diff --git a/avs_core/filters/levels.cpp b/avs_core/filters/levels.cpp index 8de6d8cfd..462455d7a 100644 --- a/avs_core/filters/levels.cpp +++ b/avs_core/filters/levels.cpp @@ -53,7 +53,7 @@ extern const AVSFunction Levels_filters[] = { { "Levels", BUILTIN_FUNC_PREFIX, "cifiii[coring]b[dither]b", Levels::Create }, // src_low, gamma, src_high, dst_low, dst_high { "RGBAdjust", BUILTIN_FUNC_PREFIX, "c[r]f[g]f[b]f[a]f[rb]f[gb]f[bb]f[ab]f[rg]f[gg]f[bg]f[ag]f[analyze]b[dither]b", RGBAdjust::Create }, - { "Tweak", BUILTIN_FUNC_PREFIX, "c[hue]f[sat]f[bright]f[cont]f[coring]b[sse]b[startHue]f[endHue]f[maxSat]f[minSat]f[interp]f[dither]b[realcalc]b", Tweak::Create }, + { "Tweak", BUILTIN_FUNC_PREFIX, "c[hue]f[sat]f[bright]f[cont]f[coring]b[sse]b[startHue]f[endHue]f[maxSat]f[minSat]f[interp]f[dither]b[realcalc]b[dither_strength]f", Tweak::Create }, { "MaskHS", BUILTIN_FUNC_PREFIX, "c[startHue]f[endHue]f[maxSat]f[minSat]f[coring]b", MaskHS::Create }, { "Limiter", BUILTIN_FUNC_PREFIX, "c[min_luma]i[max_luma]i[min_chroma]i[max_chroma]i[show]s", Limiter::Create }, { 0 } @@ -221,8 +221,8 @@ Levels::Levels(PClip _child, int in_min, double gamma, int in_max, int out_min, int ii; if(dither) { - int i_base = dither ? (i & ~0xFF) : i; - int i_dithershift = dither ? (i & 0xFF) << (bits_per_pixel - 8) : 0; + int i_base = i & ~0xFF; + int i_dithershift = (i & 0xFF) << (bits_per_pixel - 8); ii = i_base + i_dithershift; // otherwise dither has no visible effect on 10..16 bit } else { @@ -1039,17 +1039,102 @@ static bool ProcessPixelUnscaled(double X, double Y, double startHue, double end /********************** ****** Tweak ***** **********************/ +template +void Tweak::tweak_calc_luma(BYTE *srcp, int src_pitch, float minY, float maxY, int width, int height) +{ + float ditherval = 0.0f; + for (int y = 0; y < height; ++y) { + const int _y = (y << 4) & 0xf0; + for (int x = 0; x < width; ++x) { + if (dither) + ditherval = (ditherMap[(x & 0x0f) | _y] * dither_strength + bias_dither_luma) / (float)scale_dither_luma; // 0x00..0xFF -> -0.7F .. + 0.7F (+/- maxrange/512) + float y0 = reinterpret_cast(srcp)[x] - minY; + if(bpp10_14) + y0 = minY + (y0 + ditherval)*(float)dcont + (float)(1 << (bits_per_pixel - 8))*(float)dbright; // dbright parameter always 0..255. Scale to 0..255*4, 0.. 255*256 + else if(pixelsize == 2) + y0 = minY + (y0 + ditherval)*(float)dcont + 256.0f*(float)dbright; // dbright parameter always 0..255. Scale to 0..255*4, 0.. 255*256 + else if(pixelsize == 4) + y0 = minY + (y0 + ditherval)*(float)dcont + (float)dbright / 256.0f; // dbright parameter always 0..255, scale it to 0..1 + else // pixelsize == 1 + y0 = minY + ((y0 + ditherval)*(float)dcont + 1.0f*(float)dbright); // dbright parameter always 0..255. Scale to 0..255*4, 0.. 255*256 + + reinterpret_cast(srcp)[x] = (pixel_t)clamp(y0, minY, maxY); + /* + int y = int(((ii - range_low * scale_dither_luma)*_cont + _bright * scale_dither_luma + bias_dither_luma) / scale_dither_luma + range_low + 0.5); // 256 _cont & _bright param range + // coring, dither: + // int y = int(((ii - 16 * 256)*_cont + _bright * 256 - 127.5) / 256 + 16.5); // 256 _cont & _bright param range + // coring, no dither: + // int y = int(((ii - 16)*_cont + _bright) + 16.5); // 256 _cont & _bright param range + // no coring, dither: + // int y = int((ii *_cont + _bright * 256 - 127.5) / 256 + 0.5 ); // 256 _cont & _bright param range + // no coring, no dither: + // int y = int((ii *_cont + _bright) + 0.5 ); // 256 _cont & _bright param range + */ + } + srcp += src_pitch; + } +} Tweak::Tweak(PClip _child, double _hue, double _sat, double _bright, double _cont, bool _coring, bool _sse, double _startHue, double _endHue, double _maxSat, double _minSat, double p, - bool _dither, bool _realcalc, IScriptEnvironment* env) + bool _dither, bool _realcalc, double _dither_strength, IScriptEnvironment* env) : GenericVideoFilter(_child), coring(_coring), sse(_sse), dither(_dither), realcalc(_realcalc), dhue(_hue), dsat(_sat), dbright(_bright), dcont(_cont), dstartHue(_startHue), dendHue(_endHue), - dmaxSat(_maxSat), dminSat(_minSat), dinterp(p) + dmaxSat(_maxSat), dminSat(_minSat), dinterp(p), dither_strength((float)_dither_strength) { if (vi.IsRGB()) env->ThrowError("Tweak: YUV data only (no RGB)"); + pixelsize = vi.ComponentSize(); + bits_per_pixel = vi.BitsPerComponent(); + max_pixel_value = (1 << bits_per_pixel) - 1; + lut_size = 1 << bits_per_pixel; + int safe_luma_lookup_size = (pixelsize == 1) ? 256 : 65536; // avoids lut overflow in case of non-standard content of a 10 bit clip + + if(bits_per_pixel < 32) { + tv_range_low = 16 << (bits_per_pixel - 8); // 16 + tv_range_hi_luma = ((235+1) << (bits_per_pixel - 8)) - 1; // 16-235 + range_luma = tv_range_hi_luma - tv_range_low; // 219 + + tv_range_hi_chroma = ((240+1) << (bits_per_pixel - 8)) - 1; // 16-240,64–963, 256–3855,... 4096-61695 + range_chroma = tv_range_hi_chroma - tv_range_low; // 224 + } + else { // float: range is 0..255 scaled later + tv_range_low = 16; // 16 + tv_range_hi_luma = 235; // 16-235 + range_luma = tv_range_hi_luma - tv_range_low; // 219 + + tv_range_hi_chroma = 240; // 16-240 + range_chroma = tv_range_hi_chroma - tv_range_low; // 224 + } + middle_chroma = 1 << (bits_per_pixel - 1); // 128 + + scale_dither_luma = 1; + divisor_dither_luma = 1; + bias_dither_luma = 0.0; + + scale_dither_chroma = 1; + divisor_dither_chroma = 1; + bias_dither_chroma = 0.0; + + if (pixelsize == 4) + dither_strength /= 256.0f; + else + dither_strength = (1 << (bits_per_pixel - 8)) * dither_strength; // base: 8-bit lookup + // make dither_strength = 4.0 for 10 bits, 256.0 for 16 bits in order to have same dither range as for 8 bit + // when 1.0 (default) is given as parameter + + if (dither) { + // lut scale settings + scale_dither_luma = 256; // lower 256 is dither value + divisor_dither_luma *= 256; + bias_dither_luma = -(256.0f * dither_strength - 1) / 2; // -127.5; + + scale_dither_chroma = 16; // lower 16 is dither value + divisor_dither_chroma *= 16; + bias_dither_chroma = -(16.0f * dither_strength - (pixelsize==4 ? 1/256.0f : 1)) / 2; // -7.5 + } + // Flag to skip special processing if doing all pixels // If defaults, don't check for ranges, just do all const bool allPixels = (_startHue == 0.0 && _endHue == 360.0 && _maxSat == 150.0 && _minSat == 0.0); @@ -1094,55 +1179,55 @@ Tweak::Tweak(PClip _child, double _hue, double _sat, double _bright, double _con Sin = (int) (SIN * 4096 + 0.5); Cos = (int) (COS * 4096 + 0.5); - int bits_per_pixel = vi.BitsPerComponent(); // 8,10..16,32 - - if (vi.IsPlanar() && (bits_per_pixel > 8)) - realcalc = true; // 8bit: lut OK. 10+ bits: no lookup tables. - // todo: 10 bit lut is still OK + realcalc_luma = realcalc; // from parameter + realcalc_chroma = realcalc; + if (vi.IsPlanar() && (bits_per_pixel > 10)) + realcalc_chroma = true; + // 8/10bit: chroma lut OK. 12+ bits: force no lookup tables. auto env2 = static_cast(env); - if(!(realcalc && vi.IsPlanar())) - { // fill brightness/constrast lookup tables - size_t map_size = dither ? 256 * 256 : 256; + // fill brightness/constrast lookup tables + if(!(realcalc_luma && vi.IsPlanar())) + { + size_t map_size = pixelsize * safe_luma_lookup_size * scale_dither_luma; + // for 10-16 bit with dither: 2 * 65536 * 256 = 33 MByte + // w/o dither: 2 * 65536 = 128 KByte map = static_cast(env2->Allocate(map_size, 8, AVS_NORMAL_ALLOC)); if (!map) env->ThrowError("Tweak: Could not reserve memory."); env->AtExit(free_buffer, map); - if (dither) { - if (coring) { - for (int i = 0; i < 256 * 256; i++) { - /* brightness and contrast */ - int y = int(((i - 16 * 256)*_cont + _bright * 256 - 127.5) / 256 + 16.5); - map[i] = (BYTE)clamp(y, 16, 235); + if(bits_per_pixel>8 && bits_per_pixel<16) // make lut table safe for 10-14 bit garbage + std::fill_n((uint16_t *)map, map_size / pixelsize, max_pixel_value); - } - } - else { - for (int i = 0; i < 256 * 256; i++) { - /* brightness and contrast */ - int y = int((i*_cont + _bright * 256 - 127.5) / 256 + 0.5); - map[i] = (BYTE)clamp(y, 0, 255); - } - } - } - else { - if (coring) { - for (int i = 0; i < 256; i++) { - /* brightness and contrast */ - int y = int((i - 16)*_cont + _bright + 16.5); - map[i] = (BYTE)clamp(y, 16, 235); + int range_low = coring ? tv_range_low : 0; + int range_high = coring ? tv_range_hi_luma : max_pixel_value; - } - } - else { - for (int i = 0; i < 256; i++) { - /* brightness and contrast */ - int y = int(i*_cont + _bright + 0.5); - map[i] = (BYTE)clamp(y, 0, 255); - } + // dither_scale_luma = 1 if no dither, 256 if dither + /* create luma lut for brightness and contrast */ + for (int i = 0; i < lut_size * scale_dither_luma; i++) { + int ii; + if(dither) { + ii = (i & 0xFFFFFF00) + (int)((i & 0xFF)*dither_strength); + } else { + ii = i; } + // _bright param range is accepted as 0..256 + int y = (int)(((ii - range_low * scale_dither_luma)*_cont + _bright * (1 << (bits_per_pixel - 8)) * scale_dither_luma + bias_dither_luma) / scale_dither_luma + range_low + 0.5); + + // coring, dither: + // int y = int(((ii - 16 * 256)*_cont + _bright * 256 - 127.5) / 256 + 16.5); // 256 _cont & _bright param range + // coring, no dither: + // int y = int(((ii - 16)*_cont + _bright) + 16.5); // 256 _cont & _bright param range + // no coring, dither: + // int y = int((ii *_cont + _bright * 256 - 127.5) / 256 + 0.5 ); // 256 _cont & _bright param range + // no coring, no dither: + // int y = int((ii *_cont + _bright) + 0.5 ); // 256 _cont & _bright param range + if(pixelsize==1) + map[i] = (BYTE)clamp(y, range_low, range_high); + else + reinterpret_cast(map)[i] = (uint16_t)clamp(y, range_low, range_high); } } // 100% equals sat=119 (= maximal saturation of valid RGB (R=255,G=B=0) @@ -1152,53 +1237,71 @@ Tweak::Tweak(PClip _child, double _hue, double _sat, double _bright, double _con p *= 1.19; // Same units as minSat/maxSat - const int maxUV = coring ? 240 : 255; - const int minUV = coring ? 16 : 0; - - if (!(realcalc && vi.IsPlanar())) + if (!(realcalc_chroma && vi.IsPlanar())) { // fill lookup tables for UV - size_t map_size = 256 * 256 * sizeof(uint16_t) * (dither ? 16 : 1); - mapUV = static_cast(env2->Allocate(map_size, 8, AVS_NORMAL_ALLOC)); + size_t map_size = pixelsize * lut_size * lut_size * 2 * scale_dither_chroma; + // for 10 bit with dither: 2 * 1024 * 1024 * 2 * 4 = 4*64 MByte = 256M huh! + // for 10 bit w/o dither: 2 * 1024 * 1024 * 2 = 64 MByte + + mapUV = static_cast(env2->Allocate(map_size, 8, AVS_NORMAL_ALLOC)); // uint16_t for (U+V bytes), casted to uint32_t for (U+V words in non-8 bit) if (!mapUV) env->ThrowError("Tweak: Could not reserve memory."); env->AtExit(free_buffer, mapUV); + int range_low = tv_range_low; + int range_high = tv_range_hi_chroma; + if (dither) { - for (int d = 0; d < 16; d++) { - for (int u = 0; u < 256; u++) { - const double destu = ((u << 4 | d) - 7.5) / 16.0 - 128.0; - for (int v = 0; v < 256; v++) { - const double destv = ((v << 4 | d) - 7.5) / 16.0 - 128.0; + // lut chroma, dither + for (int d = 0; d < scale_dither_chroma; d++) { // scale = 4 0..15 mini-dither + for (int u = 0; u < lut_size; u++) { + // dither_strength: optional correction for 8+ bit to have the same dither range as in 8 bits + const double destu = (((u << 4) + d*dither_strength) + bias_dither_chroma) / scale_dither_chroma - middle_chroma; // scale_dither_chroma: 16 + for (int v = 0; v < lut_size; v++) { + const double destv = (((v << 4) + d*dither_strength) + bias_dither_chroma) / scale_dither_chroma - middle_chroma; int iSat = Sat; if (allPixels || ProcessPixel(destv, destu, _startHue, _endHue, maxSat, minSat, p, iSat)) { - int du = int((destu*COS + destv*SIN) * iSat + 0x100) >> 9; // back from the extra 9 bits Sat precision - int dv = int((destv*COS - destu*SIN) * iSat + 0x100) >> 9; - du = clamp(du + 128, minUV, maxUV); - dv = clamp(dv + 128, minUV, maxUV); - mapUV[(u << 12) | (v << 4) | d] = (uint16_t)(du | (dv << 8)); + int du = (int)((destu*COS + destv*SIN) * iSat + 0x100) >> 9; // back from the extra 9 bits Sat precision + int dv = (int)((destv*COS - destu*SIN) * iSat + 0x100) >> 9; + du = clamp(du + middle_chroma, range_low, range_high); + dv = clamp(dv + middle_chroma, range_low, range_high); + if(pixelsize==1) + mapUV[(u << 12) | (v << 4) | d] = (uint16_t)(du | (dv << 8)); // U and V: two bytes + else + reinterpret_cast(mapUV)[(u << (4+bits_per_pixel)) | (v << 4) | d] = (uint32_t)(du | (dv << 16)); // U and V: two words } else { - mapUV[(u << 12) | (v << 4) | d] = (uint16_t)(clamp(u, minUV, maxUV) | (clamp(v, minUV, maxUV) << 8)); + if(pixelsize==1) + mapUV[(u << 12) | (v << 4) | d] = (uint16_t)(clamp(u, range_low, range_high) | (clamp(v, range_low, range_high) << 8)); // U and V: two bytes + else + reinterpret_cast(mapUV)[(u << (4+bits_per_pixel)) | (v << 4) | d] = (uint32_t)(clamp(u, range_low, range_high) | ((clamp(v, range_low, range_high) << 16))); // U and V: two words } } } } } else { - for (int u = 0; u < 256; u++) { - const double destu = u - 128; - for (int v = 0; v < 256; v++) { - const double destv = v - 128; + // lut chroma, no dither + for (int u = 0; u < lut_size; u++) { + const double destu = u - middle_chroma; + for (int v = 0; v < lut_size; v++) { + const double destv = v - middle_chroma; int iSat = Sat; if (allPixels || ProcessPixel(destv, destu, _startHue, _endHue, maxSat, minSat, p, iSat)) { int du = int((destu*COS + destv*SIN) * iSat) >> 9; // back from the extra 9 bits Sat precision int dv = int((destv*COS - destu*SIN) * iSat) >> 9; - du = clamp(du + 128, minUV, maxUV); - dv = clamp(dv + 128, minUV, maxUV); - mapUV[(u << 8) | v] = (uint16_t)(du | (dv << 8)); + du = clamp(du + middle_chroma, range_low, range_high); + dv = clamp(dv + middle_chroma, range_low, range_high); + if(pixelsize==1) + mapUV[(u << 8) | v] = (uint16_t)(du | (dv << 8)); // U and V: two bytes + else + reinterpret_cast(mapUV)[(u << bits_per_pixel) | v] = (uint32_t)(du | (dv << 16)); // U and V: two words } else { - mapUV[(u << 8) | v] = (uint16_t)(clamp(u, minUV, maxUV) | (clamp(v, minUV, maxUV) << 8)); + if(pixelsize==1) + mapUV[(u << 8) | v] = (uint16_t)(clamp(u, range_low, range_high) | (clamp(v, range_low, range_high) << 8)); // U and V: two bytes + else + reinterpret_cast(mapUV)[(u << bits_per_pixel) | v] = (uint32_t)(clamp(u, range_low, range_high) | (clamp(v, range_low, range_high) << 16)); // U and V: two words } } } @@ -1207,6 +1310,60 @@ Tweak::Tweak(PClip _child, double _hue, double _sat, double _bright, double _con } +template +void Tweak::tweak_calc_chroma(BYTE *srcpu, BYTE *srcpv, int src_pitch, int width, int height, float minUV, float maxUV) +{ + // no lookup, alway true for 16/32 bit, optional for 8 bit + const double Hue = (dhue * PI) / 180.0; + // 100% equals sat=119 (= maximal saturation of valid RGB (R=255,G=B=0) + // 150% (=180) - 100% (=119) overshoot + const double minSat = 1.19 * dminSat; + const double maxSat = 1.19 * dmaxSat; + + const double p = dinterp * 1.19; // Same units as minSat/maxSat + + const int minUVi = (int)minUV; + const int maxUVi = (int)maxUV; + + float ditherval = 0.0; + float u, v; + const float cosHue = (float)cos(Hue); + const float sinHue = (float)sin(Hue); + // no lut, realcalc, float internals + const float pixel_range = sizeof(pixel_t) == 4 ? 1.0f : (float)(max_pixel_value + 1); + + for (int y = 0; y < height; ++y) { + const int _y = (y << 2) & 0xC; + for (int x = 0; x < width; ++x) { + if (dither) + ditherval = ((float(ditherMap4[(x & 0x3) | _y]) * dither_strength + bias_dither_chroma) / scale_dither_chroma); // +/-0.5 on 0..255 range + u = sizeof(pixel_t) == 4 ? (reinterpret_cast(srcpu)[x] - 0.5f) : (reinterpret_cast(srcpu)[x] - middle_chroma); + v = sizeof(pixel_t) == 4 ? (reinterpret_cast(srcpv)[x] - 0.5f) : (reinterpret_cast(srcpv)[x] - middle_chroma); + + u = (u + (dither ? ditherval : 0)) / (sizeof(pixel_t) == 4 ? 1.0f : pixel_range); // going from 0..1 to +/-0.5 + v = (v + (dither ? ditherval : 0)) / (sizeof(pixel_t) == 4 ? 1.0f : pixel_range); + + double dWorkSat = dsat; // init from original param + ProcessPixelUnscaled(v, u, dstartHue, dendHue, maxSat, minSat, p, dWorkSat); + + float du = ((u*cosHue + v*sinHue) * (float)dWorkSat) + 0.5f; // back to 0..1 + float dv = ((v*cosHue - u*sinHue) * (float)dWorkSat) + 0.5f; + + if(sizeof(pixel_t) == 4) { + reinterpret_cast(srcpu)[x] = (pixel_t)clamp(du, minUV, maxUV); + reinterpret_cast(srcpv)[x] = (pixel_t)clamp(dv, minUV, maxUV); + } else { + reinterpret_cast(srcpu)[x] = (pixel_t)clamp((int)(du * pixel_range), minUVi, maxUVi); + reinterpret_cast(srcpv)[x] = (pixel_t)clamp((int)(dv * pixel_range), minUVi, maxUVi); + } + } + srcpu += src_pitch; + srcpv += src_pitch; + } + +} + + PVideoFrame __stdcall Tweak::GetFrame(int n, IScriptEnvironment* env) { PVideoFrame src = child->GetFrame(n, env); @@ -1260,80 +1417,89 @@ PVideoFrame __stdcall Tweak::GetFrame(int n, IScriptEnvironment* env) srcp += src_pitch; } } + // YUY2 end } else if (vi.IsPlanar()) { // brightness and contrast - if (realcalc) // no lookup! alway true for 16/32 bit, optional for 8 bit + // no_lut and lut + int width = row_size / pixelsize; + if (realcalc_luma) { - int pixelsize = vi.ComponentSize(); - int width = row_size / pixelsize; + // no luma lookup! alway true for 32 bit, optional for 8-16 bits float maxY; float minY; float ditherval = 0.0f; // unique for each bit-depth, difference in the innermost loop (speed) - if (pixelsize == 1) { // uint8_t - maxY = coring ? 235.0f : 255.0f; - minY = coring ? 16.0f : 0; - for (int y = 0; y < height; ++y) { - const int _y = (y << 4) & 0xf0; - for (int x = 0; x < width; ++x) { - if (dither) - ditherval = (ditherMap[(x & 0x0f) | _y] - 127.5f) / 256.0f; // 0x00..0xFF -> -0.5 .. + 0.5 (+/- maxrange/512) - float y0 = minY + ((srcp[x] - minY) + ditherval)*(float)dcont + (float)dbright; // dbright parameter always 0..255 - srcp[x] = (BYTE)clamp(y0, minY, maxY); - } - srcp += src_pitch; - } - - } else if (pixelsize == 2) { // uint16_t - maxY = coring ? 235.0f * 256 : 65535.0f; - minY = coring ? 16.0f * 256 : 0; - for (int y = 0; y < height; ++y) { - const int _y = (y << 4) & 0xf0; - for (int x = 0; x < width; ++x) { - if (dither) ditherval = (ditherMap[(x & 0x0f) | _y] - 127.5f); // 0x00..0xFF -> -0.7F .. + 0.7F (+/- maxrange/512) - float y0 = (reinterpret_cast(srcp)[x] - minY); - y0 = minY + ( (y0 + ditherval)*(float)dcont + 256.0f*(float)dbright); // dbright parameter always 0..255 - reinterpret_cast(srcp)[x] = (uint16_t)clamp(y0, minY, maxY); - } - srcp += src_pitch; - } - } else { // pixelsize 4: float - maxY = coring ? 235.0f / 256 : 1.0f; // scale into 0..1 range - minY = coring ? 16.0f / 256 : 0; - for (int y = 0; y < height; ++y) { - const int _y = (y << 4) & 0xf0; - for (int x = 0; x < width; ++x) { - if (dither) ditherval = (ditherMap[(x & 0x0f) | _y] - 127.5f) / 65536.0f; // 0x00..0xFF -> -0.5 .. + 0.5 (+/- maxrange/512) - float y0 = (reinterpret_cast(srcp)[x] - minY); - y0 = minY + (y0 + ditherval)*(float)dcont + (float)dbright / 256.0f; // dbright parameter always 0..255, scale it to 0..1 - reinterpret_cast(srcp)[x] = (float)clamp(y0, minY, maxY); - } - srcp += src_pitch; - } + maxY = (float)(coring ? tv_range_hi_luma : max_pixel_value); + minY = (float)(coring ? tv_range_low : 0); + + if(pixelsize == 1) { + if(dither) + tweak_calc_luma(srcp, src_pitch, minY, maxY, width, height); + else + tweak_calc_luma(srcp, src_pitch, minY, maxY, width, height); + } else if (bits_per_pixel < 16) { + if(dither) + tweak_calc_luma(srcp, src_pitch, minY, maxY, width, height); + else + tweak_calc_luma(srcp, src_pitch, minY, maxY, width, height); + } else if(bits_per_pixel == 16) { + if(dither) + tweak_calc_luma(srcp, src_pitch, minY, maxY, width, height); + else + tweak_calc_luma(srcp, src_pitch, minY, maxY, width, height); + } else { // float + maxY = coring ? 235.0f / 256 : 1.0f; // scale into 0..1 range + minY = coring ? 16.0f / 256 : 0; + if(dither) + tweak_calc_luma(srcp, src_pitch, minY, maxY, width, height); + else + tweak_calc_luma(srcp, src_pitch, minY, maxY, width, height); } - } else { - // use lookup for 8 bit + /* brightness and contrast */ + // use luma lookup for 8-16 bits if (dither) { + if(pixelsize==1) { for (int y = 0; y < height; ++y) { const int _y = (y << 4) & 0xf0; - for (int x = 0; x < row_size; ++x) { + for (int x = 0; x < width; ++x) { /* brightness and contrast */ srcp[x] = map[srcp[x] << 8 | ditherMap[(x & 0x0f) | _y]]; } srcp += src_pitch; } + } + else { // pixelsize == 2 + for (int y = 0; y < height; ++y) { + const int _y = (y << 4) & 0xf0; + for (int x = 0; x < width; ++x) { + reinterpret_cast(srcp)[x] = reinterpret_cast(map)[reinterpret_cast(srcp)[x] << 8 | ditherMap[(x & 0x0f) | _y]]; + // no clamp, map is safely sized + } + srcp += src_pitch; + } + } } else { + if(pixelsize==1) { for (int y = 0; y < height; ++y) { - for (int x = 0; x < row_size; ++x) { - /* brightness and contrast */ + for (int x = 0; x < width; ++x) { srcp[x] = map[srcp[x]]; } srcp += src_pitch; } + } + else { // pixelsize == 2 + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + reinterpret_cast(srcp)[x] = reinterpret_cast(map)[reinterpret_cast(srcp)[x]]; + // no clamp, map is safely sized + } + srcp += src_pitch; + } + } } } // Y: brightness and contrast done @@ -1344,103 +1510,38 @@ PVideoFrame __stdcall Tweak::GetFrame(int n, IScriptEnvironment* env) BYTE * srcpv = src->GetWritePtr(PLANAR_V); row_size = src->GetRowSize(PLANAR_U); height = src->GetHeight(PLANAR_U); - - if (realcalc) { - // no lookup, alway true for 16/32 bit, optional for 8 bit - const double Hue = (dhue * PI) / 180.0; - // 100% equals sat=119 (= maximal saturation of valid RGB (R=255,G=B=0) - // 150% (=180) - 100% (=119) overshoot - const double minSat = 1.19 * dminSat; - const double maxSat = 1.19 * dmaxSat; - - double p = dinterp * 1.19; // Same units as minSat/maxSat - - int pixelsize = vi.ComponentSize(); - int width = row_size / pixelsize; - double maxUV = coring ? 240.0f : 255.0f; - double minUV = coring ? 16.0f : 0; - double ditherval = 0.0; - double u, v; - double cosHue = cos(Hue); - double sinHue = sin(Hue); - - // unique for each bit-depth, difference in the innermost loop (speed) - if (pixelsize==1) - { - for (int y = 0; y < height; ++y) { - const int _y = (y << 2) & 0xC; - for (int x = 0; x < width; ++x) { - if (dither) - ditherval = ((double(ditherMap4[(x & 0x3) | _y]) - 7.5) / 16) / 256; - u = srcpu[x] / 256.0; - v = srcpv[x] / 256.0; - - u = u + ditherval - 0.5; // going from 0..1 to +/-0.5 - v = v + ditherval - 0.5; - double dWorkSat = dsat; // init from original param - ProcessPixelUnscaled(v, u, dstartHue, dendHue, dmaxSat, dminSat, p, dWorkSat); - double du = (u*cosHue + v*sinHue) * dWorkSat + 0.5f; // back to 0..1 - double dv = (v*cosHue - u*sinHue) * dWorkSat + 0.5f; - srcpu[x] = (BYTE)clamp(du * 256.0, minUV, maxUV); - srcpv[x] = (BYTE)clamp(dv * 256.0, minUV, maxUV); - } - srcpu += src_pitch; - srcpv += src_pitch; - } - } else if (pixelsize==2) { - maxUV *= 256; - minUV *= 256; - for (int y = 0; y < height; ++y) { - const int _y = (y << 2) & 0xC; - for (int x = 0; x < width; ++x) { - - if (dither) - ditherval = ((double(ditherMap4[(x & 0x3) | _y]) - 7.5) / 16) / 256; - u = reinterpret_cast(srcpu)[x] / 65536.0f; - v = reinterpret_cast(srcpv)[x] / 65536.0f; - - u = u + ditherval - 0.5; // going from 0..1 to +/-0.5 - v = v + ditherval - 0.5; - double dWorkSat = dsat; // init from original param - ProcessPixelUnscaled(v, u, dstartHue, dendHue, dmaxSat, dminSat, p, dWorkSat); - double du = ((u*cosHue + v*sinHue) * dWorkSat) + 0.5; // back to 0..1 - double dv = ((v*cosHue - u*sinHue) * dWorkSat) + 0.5; - reinterpret_cast(srcpu)[x] = (uint16_t)clamp(du * 65536.0, minUV, maxUV); - reinterpret_cast(srcpv)[x] = (uint16_t)clamp(dv * 65536.0, minUV, maxUV); - } - srcpu += src_pitch; - srcpv += src_pitch; - } - } else { // pixelsize==4 float - maxUV /= 256; - minUV /= 256; - for (int y = 0; y < height; ++y) { - const int _y = (y << 2) & 0xC; - for (int x = 0; x < width; ++x) { - if (dither) - ditherval = ((double(ditherMap4[(x & 0x3) | _y]) - 7.5) / 16) / 256; - u = reinterpret_cast(srcpu)[x]; - v = reinterpret_cast(srcpv)[x]; - - u = u + ditherval - 0.5; // going from 0..1 to +/-0.5 - v = v + ditherval - 0.5; - double dWorkSat = dsat; // init from original param - ProcessPixelUnscaled(v, u, dstartHue, dendHue, dmaxSat, dminSat, p, dWorkSat); - double du = ((u*cosHue + v*sinHue) * dWorkSat) + 0.5; // back to 0..1 - double dv = ((v*cosHue - u*sinHue) * dWorkSat) + 0.5; - reinterpret_cast(srcpu)[x] = (float)clamp(du/* * factor*/, minUV, maxUV); - reinterpret_cast(srcpv)[x] = (float)clamp(dv/* * factor*/, minUV, maxUV); - } - srcpu += src_pitch; - srcpv += src_pitch; - } - } + width = row_size / pixelsize; + + if (realcalc_chroma) { + // no lookup, alway true for > 10 bit, optional for 8/10 bit + float maxUV = (float)(coring ? tv_range_hi_chroma : max_pixel_value); + float minUV = (float)(coring ? tv_range_low : 0); + if(pixelsize == 1) { + if (dither) + tweak_calc_chroma(srcpu, srcpv, src_pitch, width, height, minUV, maxUV); + else + tweak_calc_chroma(srcpu, srcpv, src_pitch, width, height, minUV, maxUV); + } else if(pixelsize==2) { + if (dither) + tweak_calc_chroma(srcpu, srcpv, src_pitch, width, height, minUV, maxUV); + else + tweak_calc_chroma(srcpu, srcpv, src_pitch, width, height, minUV, maxUV); + } else { // pixelsize == 4 + maxUV /= 256.0f; + minUV /= 256.0f; + if (dither) + tweak_calc_chroma(srcpu, srcpv, src_pitch, width, height, minUV, maxUV); + else + tweak_calc_chroma(srcpu, srcpv, src_pitch, width, height, minUV, maxUV); + } } - else { + else { // lookup UV if (dither) { + // lut + dither + if(pixelsize==1) { for (int y = 0; y < height; ++y) { const int _y = (y << 2) & 0xC; - for (int x = 0; x < row_size; ++x) { + for (int x = 0; x < width; ++x) { const int _dither = ditherMap4[(x & 0x3) | _y]; /* hue and saturation */ const int u = srcpu[x]; @@ -1452,20 +1553,53 @@ PVideoFrame __stdcall Tweak::GetFrame(int n, IScriptEnvironment* env) srcpu += src_pitch; srcpv += src_pitch; } + } + else { // pixelsize == 2 + for (int y = 0; y < height; ++y) { + const int _y = (y << 2) & 0xC; + for (int x = 0; x < width; ++x) { + const int _dither = ditherMap4[(x & 0x3) | _y]; // 0..15 + /* hue and saturation */ + const int u = clamp(0,(int)reinterpret_cast(srcpu)[x], max_pixel_value); + const int v = clamp(0,(int)reinterpret_cast(srcpv)[x], max_pixel_value); + const unsigned int mapped = reinterpret_cast(mapUV)[(u << (4+bits_per_pixel)) | (v << 4) | _dither]; + reinterpret_cast(srcpu)[x] = (uint16_t)(mapped & 0xffff); + reinterpret_cast(srcpv)[x] = (uint16_t)(mapped >> 16); + } + srcpu += src_pitch; + srcpv += src_pitch; + } + } } else { + // lut + no dither + if(pixelsize==1) { for (int y = 0; y < height; ++y) { - for (int x = 0; x < row_size; ++x) { - /* hue and saturation */ - const int u = srcpu[x]; - const int v = srcpv[x]; - const int mapped = mapUV[(u << 8) | v]; - srcpu[x] = (BYTE)(mapped & 0xff); - srcpv[x] = (BYTE)(mapped >> 8); - } - srcpu += src_pitch; - srcpv += src_pitch; + for (int x = 0; x < width; ++x) { + /* hue and saturation */ + const int u = srcpu[x]; + const int v = srcpv[x]; + const int mapped = mapUV[(u << 8) | v]; + srcpu[x] = (BYTE)(mapped & 0xff); + srcpv[x] = (BYTE)(mapped >> 8); + } + srcpu += src_pitch; + srcpv += src_pitch; + } + } + else { // pixelsize == 2 + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + const int u = clamp(0,(int)reinterpret_cast(srcpu)[x], max_pixel_value); + const int v = clamp(0,(int)reinterpret_cast(srcpv)[x], max_pixel_value); + const unsigned int mapped = reinterpret_cast(mapUV)[(u << bits_per_pixel) | v]; + reinterpret_cast(srcpu)[x] = (uint16_t)(mapped & 0xffff); + reinterpret_cast(srcpv)[x] = (uint16_t)(mapped >> 16); + } + srcpu += src_pitch; + srcpv += src_pitch; } + } } } } @@ -1488,7 +1622,8 @@ AVSValue __cdecl Tweak::Create(AVSValue args, void* user_data, IScriptEnvironmen args[10].AsDblDef(0.0), // minSat args[11].AsDblDef(16.0 / 1.19),// interp args[12].AsBool(false), // dither - args[13].AsBool(false), // realcalc: force no-lookup (pure double calc/pixel) for 8 bit + args[13].AsBool(false), // realcalc: force no-lookup (pure float calculation pixel) + args[14].AsDblDef(1.0), // dither_strength 1.0 = +/-0.5 on the 0.255 range, scaled for others env); } diff --git a/avs_core/filters/levels.h b/avs_core/filters/levels.h index 9b6d3603f..bbd25c731 100644 --- a/avs_core/filters/levels.h +++ b/avs_core/filters/levels.h @@ -107,7 +107,7 @@ class Tweak : public GenericVideoFilter public: Tweak(PClip _child, double _hue, double _sat, double _bright, double _cont, bool _coring, bool _sse, double _startHue, double _endHue, double _maxSat, double _minSat, double _interp, - bool _dither, bool _realcalc, IScriptEnvironment* env); + bool _dither, bool _realcalc, double _dither_strength, IScriptEnvironment* env); PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment* env); @@ -118,15 +118,49 @@ class Tweak : public GenericVideoFilter static AVSValue __cdecl Create(AVSValue args, void* user_data, IScriptEnvironment* env); private: + template + void tweak_calc_luma(BYTE *srcp, int src_pitch, float minY, float maxY, int width, int height); + + template + void tweak_calc_chroma(BYTE *srcpU, BYTE *srcpV, int src_pitch, int width, int height, float minUV, float maxUV); + int Sin, Cos; int Sat, Bright, Cont; bool coring, sse, dither; - bool realcalc; // no lookup, realtime calculation, always for 16/32 bits + const bool realcalc; // no lookup, realtime calculation, always for 16/32 bits double dhue, dsat, dbright, dcont, dstartHue, dendHue, dmaxSat, dminSat, dinterp; BYTE *map; uint16_t *mapUV; + // avs+ + bool realcalc_luma; + bool realcalc_chroma; + + int pixelsize; + int bits_per_pixel; // 8,10..16 + int max_pixel_value; + int lut_size; + int real_lookup_size; + + int tv_range_low; + int tv_range_hi_luma; + int range_luma; + + int tv_range_hi_chroma; + int range_chroma; + + int middle_chroma; + + int scale_dither_luma; + int divisor_dither_luma; + float bias_dither_luma; + + int scale_dither_chroma; + int divisor_dither_chroma; + float bias_dither_chroma; + + float dither_strength; }; From aaf897707d40f3ce36a71ef20ca4b0fa5af7211b Mon Sep 17 00:00:00 2001 From: Pinterf Date: Wed, 14 Sep 2016 18:03:51 +0200 Subject: [PATCH 071/120] Subtract: Planar RGB(A), YUV(A) 10-16,Float, RGB48 and RGB64 --- avs_core/filters/layer.cpp | 133 +++++++++++++++++++++++++++++-------- avs_core/filters/layer.h | 4 +- 2 files changed, 110 insertions(+), 27 deletions(-) diff --git a/avs_core/filters/layer.cpp b/avs_core/filters/layer.cpp index 67f5cb9f8..88bf55997 100644 --- a/avs_core/filters/layer.cpp +++ b/avs_core/filters/layer.cpp @@ -3316,7 +3316,7 @@ AVSValue __cdecl Layer::Create(AVSValue args, void*, IScriptEnvironment* env) ******* Subtract Filter ****** *********************************/ bool Subtract::DiffFlag = false; -BYTE Subtract::Diff[513]; +BYTE Subtract::LUT_Diff8[513]; Subtract::Subtract(PClip _child1, PClip _child2, IScriptEnvironment* env) : child1(_child1), child2(_child2) @@ -3334,9 +3334,34 @@ Subtract::Subtract(PClip _child1, PClip _child2, IScriptEnvironment* env) vi.num_frames = max(vi1.num_frames, vi2.num_frames); vi.num_audio_samples = max(vi1.num_audio_samples, vi2.num_audio_samples); + pixelsize = vi.ComponentSize(); + bits_per_pixel = vi.BitsPerComponent(); + if (!DiffFlag) { // Init the global Diff table DiffFlag = true; - for (int i=0; i<=512; i++) Diff[i] = max(0,min(255,i-129)); + for (int i=0; i<=512; i++) LUT_Diff8[i] = max(0,min(255,i-129)); + // 0 .. 129 130 131 ... 255 256 257 258 384 ... 512 + // 0 .. 0 1 2 3 ... 126 127 128 129 ... 255 ... 255 + } +} + +template +static void subtract_plane(BYTE *src1p, const BYTE *src2p, int src1_pitch, int src2_pitch, int width, int height, int bits_per_pixel) +{ + typedef typename std::conditional < sizeof(pixel_t) == 4, float, int>::type limits_t; + + const limits_t limit = sizeof(pixel_t) == 1 ? 255 : sizeof(pixel_t) == 2 ? ((1 << bits_per_pixel) - 1) : (limits_t)1.0f; + const limits_t equal_luma = sizeof(pixel_t) == 1 ? midpixel : sizeof(pixel_t) == 2 ? (midpixel << (bits_per_pixel - 8)) : (limits_t)(midpixel / 256.0f); + for (int y=0; y(src1p)[x] = + (pixel_t)clamp( + (limits_t)(reinterpret_cast(src1p)[x] - reinterpret_cast(src2p)[x] + equal_luma), // 126: luma of equality + (limits_t)0, + limit); + } + src1p += src1_pitch; + src2p += src2_pitch; } } @@ -3350,55 +3375,111 @@ PVideoFrame __stdcall Subtract::GetFrame(int n, IScriptEnvironment* env) BYTE* src1p = src1->GetWritePtr(); const BYTE* src2p = src2->GetReadPtr(); int row_size = src1->GetRowSize(); + int src1_pitch = src1->GetPitch(); + int src2_pitch = src2->GetPitch(); - if (vi.IsPlanar()) { - for (int y=0; y(src1p, src2p, src1_pitch, src2_pitch, width, height, bits_per_pixel); + // LUT is a bit faster than clamp version + for (int y=0; yGetPitch(); + src2p += src2->GetPitch(); } - src1p += src1->GetPitch(); - src2p += src2->GetPitch(); - } + } else if (pixelsize==2) + subtract_plane(src1p, src2p, src1_pitch, src2_pitch, width, height, bits_per_pixel); + else //if (pixelsize==4) + subtract_plane(src1p, src2p, src1_pitch, src2_pitch, width, height, bits_per_pixel); + // chroma row_size=src1->GetRowSize(PLANAR_U); - if (row_size) { + if (row_size) { + width = row_size / pixelsize; + height = src1->GetHeight(PLANAR_U); + src1_pitch = src1->GetPitch(PLANAR_U); + src2_pitch = src2->GetPitch(PLANAR_U); + // U_plane exists BYTE* src1p = src1->GetWritePtr(PLANAR_U); const BYTE* src2p = src2->GetReadPtr(PLANAR_U); BYTE* src1pV = src1->GetWritePtr(PLANAR_V); const BYTE* src2pV = src2->GetReadPtr(PLANAR_V); - for (int y=0; yGetHeight(PLANAR_U); y++) { - for (int x=0; x(src1p, src2p, src1_pitch, src2_pitch, width, height, bits_per_pixel); + //subtract_plane(src1pV, src2pV, src1_pitch, src2_pitch, width, height, bits_per_pixel); + + // LUT is a bit faster than clamp version + for (int y=0; yGetPitch(PLANAR_U); - src2p += src2->GetPitch(PLANAR_U); - src1pV += src1->GetPitch(PLANAR_V); - src2pV += src2->GetPitch(PLANAR_V); + } else if (pixelsize==2) { + subtract_plane(src1p, src2p, src1_pitch, src2_pitch, width, height, bits_per_pixel); + subtract_plane(src1pV, src2pV, src1_pitch, src2_pitch, width, height, bits_per_pixel); + } else { //if (pixelsize==4) + subtract_plane(src1p, src2p, src1_pitch, src2_pitch, width, height, bits_per_pixel); + subtract_plane(src1pV, src2pV, src1_pitch, src2_pitch, width, height, bits_per_pixel); } } return src1; - } // End planar + } // End planar YUV // For YUY2, 50% gray is about (126,128,128) instead of (128,128,128). Grr... if (vi.IsYUY2()) { for (int y=0; yGetPitch(); src2p += src2->GetPitch(); } } else { // RGB - for (int y=0; yGetWritePtr(plane); + src2p = src2->GetReadPtr(plane); + src1_pitch = src1->GetPitch(plane); + src2_pitch = src2->GetPitch(plane); + if(pixelsize==1) + subtract_plane(src1p, src2p, src1_pitch, src2_pitch, width, height, bits_per_pixel); + else if(pixelsize==2) + subtract_plane(src1p, src2p, src1_pitch, src2_pitch, width, height, bits_per_pixel); + else + subtract_plane(src1p, src2p, src1_pitch, src2_pitch, width, height, bits_per_pixel); + } + } else { // packed RGB + if(pixelsize == 1) { + for (int y=0; yGetPitch(); - src2p += src2->GetPitch(); + src1p += src1->GetPitch(); + src2p += src2->GetPitch(); + } + } + else { // pixelsize == 2: RGB48, RGB64 + // width is getrowsize based here: ok. + subtract_plane(src1p, src2p, src1_pitch, src2_pitch, width, height, bits_per_pixel); + } } } return src1; diff --git a/avs_core/filters/layer.h b/avs_core/filters/layer.h index 27831bb2c..b4fadd51e 100644 --- a/avs_core/filters/layer.h +++ b/avs_core/filters/layer.h @@ -260,7 +260,9 @@ class Subtract : public IClip // Common to all instances static bool DiffFlag; - static BYTE Diff[513]; + static BYTE LUT_Diff8[513]; + int pixelsize; + int bits_per_pixel; }; From 1ded5d5442cc3f15adeb962166f1b5912b6ba62e Mon Sep 17 00:00:00 2001 From: Pinterf Date: Wed, 14 Sep 2016 19:24:22 +0200 Subject: [PATCH 072/120] ColorKeyMask: RGB64, Planar RGBA 8-16,float. --- avs_core/core/internal.h | 2 + avs_core/filters/layer.cpp | 133 +++++++++++++++++++++++++++++++------ avs_core/filters/layer.h | 7 ++ 3 files changed, 121 insertions(+), 21 deletions(-) diff --git a/avs_core/core/internal.h b/avs_core/core/internal.h index 5a0a0a23d..9c43d37ae 100644 --- a/avs_core/core/internal.h +++ b/avs_core/core/internal.h @@ -191,6 +191,8 @@ static __inline uint16_t ScaledPixelClipEx(__int64 i, int max_value) { static __inline bool IsClose(int a, int b, unsigned threshold) { return (unsigned(a-b+threshold) <= threshold*2); } +static __inline bool IsCloseFloat(float a, float b, float threshold) +{ return (a-b+threshold <= threshold*2); } diff --git a/avs_core/filters/layer.cpp b/avs_core/filters/layer.cpp index 88bf55997..3a870e702 100644 --- a/avs_core/filters/layer.cpp +++ b/avs_core/filters/layer.cpp @@ -360,8 +360,22 @@ AVSValue __cdecl Mask::Create(AVSValue args, void*, IScriptEnvironment* env) ColorKeyMask::ColorKeyMask(PClip _child, int _color, int _tolB, int _tolG, int _tolR, IScriptEnvironment *env) : GenericVideoFilter(_child), color(_color & 0xffffff), tolB(_tolB & 0xff), tolG(_tolG & 0xff), tolR(_tolR & 0xff) { - if (!vi.IsRGB32()) - env->ThrowError("ColorKeyMask: requires RGB32 input"); + if (!vi.IsRGB32() && !vi.IsRGB64() && !vi.IsPlanarRGBA()) + env->ThrowError("ColorKeyMask: requires RGB32, RGB64 or Planar RGBA input"); + pixelsize = vi.ComponentSize(); + bits_per_pixel = vi.BitsPerComponent(); + max_pixel_value = (1 << bits_per_pixel) - 1; + + auto rgbcolor8to16 = [](uint8_t color8, int max_pixel_value) { return (uint16_t)(color8 * max_pixel_value / 255); }; + + uint64_t r = rgbcolor8to16((color >> 16) & 0xFF, max_pixel_value); + uint64_t g = rgbcolor8to16((color >> 8 ) & 0xFF, max_pixel_value); + uint64_t b = rgbcolor8to16((color ) & 0xFF, max_pixel_value); + uint64_t a = rgbcolor8to16((color >> 24) & 0xFF, max_pixel_value); + color64 = (a << 48) + (r << 32) + (g << 16) + (b); + tolR16 = rgbcolor8to16(tolR & 0xFF, max_pixel_value); // scale tolerance + tolG16 = rgbcolor8to16(tolG & 0xFF, max_pixel_value); + tolB16 = rgbcolor8to16(tolB & 0xFF, max_pixel_value); } static void colorkeymask_sse2(BYTE* pf, int pitch, int color, int height, int width, int tolB, int tolG, int tolR) { @@ -434,11 +448,11 @@ static void colorkeymask_mmx(BYTE* pf, int pitch, int color, int height, int wid #endif -static void colorkeymask_c(BYTE* pf, int pitch, int color, int height, int rowsize, int tolB, int tolG, int tolR) { - const int R = (color >> 16) & 0xff; - const int G = (color >> 8) & 0xff; - const int B = color & 0xff; - +template +static void colorkeymask_c(BYTE* pf8, int pitch, int R, int G, int B, int height, int rowsize, int tolB, int tolG, int tolR) { + pixel_t *pf = reinterpret_cast(pf8); + rowsize /= sizeof(pixel_t); + pitch /= sizeof(pixel_t); for (int y = 0; y< height; y++) { for (int x = 0; x < rowsize; x+=4) { if (IsClose(pf[x],B,tolB) && IsClose(pf[x+1],G,tolG) && IsClose(pf[x+2],R,tolR)) @@ -448,6 +462,45 @@ static void colorkeymask_c(BYTE* pf, int pitch, int color, int height, int rowsi } } +template +static void colorkeymask_planar_c(const BYTE* pfR8, const BYTE* pfG8, const BYTE* pfB8, BYTE* pfA8, int pitch, int R, int G, int B, int height, int width, int tolB, int tolG, int tolR) { + const pixel_t *pfR = reinterpret_cast(pfR8); + const pixel_t *pfG = reinterpret_cast(pfG8); + const pixel_t *pfB = reinterpret_cast(pfB8); + pixel_t *pfA = reinterpret_cast(pfA8); + pitch /= sizeof(pixel_t); + for (int y = 0; y< height; y++) { + for (int x = 0; x < width; x++) { + if (IsClose(pfB[x],B,tolB) && IsClose(pfG[x],G,tolG) && IsClose(pfR[x],R,tolR)) + pfA[x]=0; + } + pfR += pitch; + pfG += pitch; + pfB += pitch; + pfA += pitch; + } +} + +static void colorkeymask_planar_float_c(const BYTE* pfR8, const BYTE* pfG8, const BYTE* pfB8, BYTE* pfA8, int pitch, float R, float G, float B, int height, int width, float tolB, float tolG, float tolR) { + typedef float pixel_t; + const pixel_t *pfR = reinterpret_cast(pfR8); + const pixel_t *pfG = reinterpret_cast(pfG8); + const pixel_t *pfB = reinterpret_cast(pfB8); + pixel_t *pfA = reinterpret_cast(pfA8); + pitch /= sizeof(pixel_t); + for (int y = 0; y< height; y++) { + for (int x = 0; x < width; x++) { + if (IsCloseFloat(pfB[x],B,tolB) && IsCloseFloat(pfG[x],G,tolG) && IsCloseFloat(pfR[x],R,tolR)) + pfA[x]=0; + } + pfR += pitch; + pfG += pitch; + pfB += pitch; + pfA += pitch; + } +} + + PVideoFrame __stdcall ColorKeyMask::GetFrame(int n, IScriptEnvironment *env) { PVideoFrame frame = child->GetFrame(n, env); @@ -457,21 +510,59 @@ PVideoFrame __stdcall ColorKeyMask::GetFrame(int n, IScriptEnvironment *env) const int pitch = frame->GetPitch(); const int rowsize = frame->GetRowSize(); - if ((env->GetCPUFlags() & CPUF_SSE2) && IsPtrAligned(pf, 16)) - { - colorkeymask_sse2(pf, pitch, color, vi.height, rowsize, tolB, tolG, tolR); - } - else -#ifdef X86_32 - if (env->GetCPUFlags() & CPUF_MMX) - { - colorkeymask_mmx(pf, pitch, color, vi.height, rowsize, tolB, tolG, tolR); + if(vi.IsPlanarRGBA()) { + const BYTE* pf_g = frame->GetReadPtr(PLANAR_G); + const BYTE* pf_b = frame->GetReadPtr(PLANAR_B); + const BYTE* pf_r = frame->GetReadPtr(PLANAR_R); + BYTE* pf_a = frame->GetWritePtr(PLANAR_A); + + const int pitch = frame->GetPitch(); + const int width = vi.width; + + if(pixelsize == 1) { + const int R = (color >> 16) & 0xff; + const int G = (color >> 8) & 0xff; + const int B = color & 0xff; + colorkeymask_planar_c(pf_r, pf_g, pf_b, pf_a, pitch, R, G, B, vi.height, width, tolB, tolG, tolR); + } else if (pixelsize == 2) { + const int R = (color64 >> 32) & 0xffff; + const int G = (color64 >> 16) & 0xffff; + const int B = color64 & 0xffff; + colorkeymask_planar_c(pf_r, pf_g, pf_b, pf_a, pitch, R, G, B, vi.height, width, tolB16, tolG16, tolR16); + } else { // float + const float R = ((color >> 16) & 0xff) / 255.0f; + const float G = ((color >> 8) & 0xff) / 255.0f; + const float B = (color & 0xff) / 255.0f; + colorkeymask_planar_float_c(pf_r, pf_g, pf_b, pf_a, pitch, R, G, B, vi.height, width, tolB / 255.0f, tolG / 255.0f, tolR / 255.0f); + } + } else { + // RGB32, RGB64 + if ((pixelsize==1) && (env->GetCPUFlags() & CPUF_SSE2) && IsPtrAligned(pf, 16)) + { + colorkeymask_sse2(pf, pitch, color, vi.height, rowsize, tolB, tolG, tolR); + } + else + #ifdef X86_32 + if ((pixelsize==1) && (env->GetCPUFlags() & CPUF_MMX)) + { + colorkeymask_mmx(pf, pitch, color, vi.height, rowsize, tolB, tolG, tolR); + } + else + #endif + { + if(pixelsize == 1) { + const int R = (color >> 16) & 0xff; + const int G = (color >> 8) & 0xff; + const int B = color & 0xff; + colorkeymask_c(pf, pitch, R, G, B, vi.height, rowsize, tolB, tolG, tolR); + } else { + const int R = (color64 >> 32) & 0xffff; + const int G = (color64 >> 16) & 0xffff; + const int B = color64 & 0xffff; + colorkeymask_c(pf, pitch, R, G, B, vi.height, rowsize, tolB16, tolG16, tolR16); + } + } } - else -#endif - { - colorkeymask_c(pf, pitch, color, vi.height, rowsize, tolB, tolG, tolR); - } return frame; } diff --git a/avs_core/filters/layer.h b/avs_core/filters/layer.h index b4fadd51e..d16211027 100644 --- a/avs_core/filters/layer.h +++ b/avs_core/filters/layer.h @@ -43,6 +43,7 @@ #define __Layer_H__ #include +#include /******************************************************************** @@ -98,6 +99,12 @@ class ColorKeyMask : public GenericVideoFilter private: const int color, tolB, tolG, tolR; + uint64_t color64; + int tolB16, tolG16, tolR16; + int pixelsize; + int bits_per_pixel; + int max_pixel_value; + }; From 152f32851a9ccfaf871967a41e737bd5642008e9 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Thu, 15 Sep 2016 18:56:54 +0200 Subject: [PATCH 073/120] MergeChroma: fix regression for 8 bit (+variable renames) --- avs_core/filters/merge.cpp | 92 +++++++++++++++++++------------------- avs_core/filters/merge.h | 2 +- 2 files changed, 48 insertions(+), 46 deletions(-) diff --git a/avs_core/filters/merge.cpp b/avs_core/filters/merge.cpp index 2977903e6..584d3afcd 100644 --- a/avs_core/filters/merge.cpp +++ b/avs_core/filters/merge.cpp @@ -355,17 +355,17 @@ static void replace_luma_yuy2_c(BYTE *src, const BYTE *luma, int pitch, int luma * average_plane * ----------------------------------- */ -template -static void average_plane_sse2(BYTE *p1, const BYTE *p2, int p1_pitch, int p2_pitch, int width, int height) { +template +static void average_plane_sse2(BYTE *p1, const BYTE *p2, int p1_pitch, int p2_pitch, int rowsize, int height) { // width is RowSize here - int mod16_width = width / 16 * 16; + int mod16_width = rowsize / 16 * 16; for(int y = 0; y < height; y++) { for(int x = 0; x < mod16_width; x+=16) { __m128i src1 = _mm_load_si128(reinterpret_cast(p1+x)); __m128i src2 = _mm_load_si128(reinterpret_cast(p2+x)); __m128i dst; - if(sizeof(pixel_size)==1) + if(sizeof(pixel_t)==1) dst = _mm_avg_epu8(src1, src2); // 8 pixels else // pixel_size == 2 dst = _mm_avg_epu16(src1, src2); // 4 pixels @@ -373,9 +373,9 @@ static void average_plane_sse2(BYTE *p1, const BYTE *p2, int p1_pitch, int p2_pi _mm_store_si128(reinterpret_cast<__m128i*>(p1+x), dst); } - if (mod16_width != width) { - for (size_t x = mod16_width / sizeof(pixel_size); x < width/sizeof(pixel_size); ++x) { - reinterpret_cast(p1)[x] = (int(reinterpret_cast(p1)[x]) + reinterpret_cast(p2)[x] + 1) >> 1; + if (mod16_width != rowsize) { + for (size_t x = mod16_width / sizeof(pixel_t); x < rowsize/sizeof(pixel_t); ++x) { + reinterpret_cast(p1)[x] = (int(reinterpret_cast(p1)[x]) + reinterpret_cast(p2)[x] + 1) >> 1; } } p1 += p1_pitch; @@ -384,26 +384,26 @@ static void average_plane_sse2(BYTE *p1, const BYTE *p2, int p1_pitch, int p2_pi } #ifdef X86_32 -template -static void average_plane_isse(BYTE *p1, const BYTE *p2, int p1_pitch, int p2_pitch, int width, int height) { +template +static void average_plane_isse(BYTE *p1, const BYTE *p2, int p1_pitch, int p2_pitch, int rowsize, int height) { // width is RowSize here - int mod8_width = width / 8 * 8; + int mod8_width = rowsize / 8 * 8; for(int y = 0; y < height; y++) { for(int x = 0; x < mod8_width; x+=8) { __m64 src1 = *reinterpret_cast(p1+x); __m64 src2 = *reinterpret_cast(p2+x); __m64 dst; - if(sizeof(pixel_size)==1) + if(sizeof(pixel_t)==1) dst = _mm_avg_pu8(src1, src2); // 8 pixels else // pixel_size == 2 dst = _mm_avg_pu16(src1, src2); // 4 pixels *reinterpret_cast<__m64*>(p1+x) = dst; } - if (mod8_width != width) { - for (size_t x = mod8_width / sizeof(pixel_size); x < width / sizeof(pixel_size); ++x) { - reinterpret_cast(p1)[x] = (int(reinterpret_cast(p1)[x]) + reinterpret_cast(p2)[x] + 1) >> 1; + if (mod8_width != rowsize) { + for (size_t x = mod8_width / sizeof(pixel_t); x < rowsize / sizeof(pixel_t); ++x) { + reinterpret_cast(p1)[x] = (int(reinterpret_cast(p1)[x]) + reinterpret_cast(p2)[x] + 1) >> 1; } } p1 += p1_pitch; @@ -414,11 +414,11 @@ static void average_plane_isse(BYTE *p1, const BYTE *p2, int p1_pitch, int p2_pi #endif // for uint8_t and uint16_t -template +template static void average_plane_c(BYTE *p1, const BYTE *p2, int p1_pitch, int p2_pitch, int rowsize, int height) { for (int y = 0; y < height; ++y) { - for (size_t x = 0; x < rowsize / sizeof(pixel_size); ++x) { - reinterpret_cast(p1)[x] = (int(reinterpret_cast(p1)[x]) + reinterpret_cast(p2)[x] + 1) >> 1; + for (size_t x = 0; x < rowsize / sizeof(pixel_t); ++x) { + reinterpret_cast(p1)[x] = (int(reinterpret_cast(p1)[x]) + reinterpret_cast(p2)[x] + 1) >> 1; } p1 += p1_pitch; p2 += p2_pitch; @@ -442,13 +442,13 @@ static void average_plane_c_float(BYTE *p1, const BYTE *p2, int p1_pitch, int p2 * weighted_merge_planar * ----------------------------------- */ -void weighted_merge_planar_uint16_sse41(BYTE *p1, const BYTE *p2, int p1_pitch, int p2_pitch, int width, int height, int weight, int invweight) { +void weighted_merge_planar_uint16_sse41(BYTE *p1, const BYTE *p2, int p1_pitch, int p2_pitch, int rowsize, int height, int weight, int invweight) { __m128i round_mask = _mm_set1_epi32(0x4000); __m128i zero = _mm_setzero_si128(); __m128i weightmask = _mm_set1_epi32(weight); __m128i invweightmask = _mm_set1_epi32(invweight); - int wMod16 = (width / 16) * 16; + int wMod16 = (rowsize / 16) * 16; for (int y = 0; y < height; y++) { for (int x = 0; x < wMod16; x += 16) { @@ -479,7 +479,7 @@ void weighted_merge_planar_uint16_sse41(BYTE *p1, const BYTE *p2, int p1_pitch, _mm_stream_si128(reinterpret_cast<__m128i*>(p1 + x), result); } - for (size_t x = wMod16 / sizeof(uint16_t); x < width / sizeof(uint16_t); x++) { + for (size_t x = wMod16 / sizeof(uint16_t); x < rowsize / sizeof(uint16_t); x++) { reinterpret_cast(p1)[x] = (reinterpret_cast(p1)[x] * invweight + reinterpret_cast(p2)[x] * weight + 16384) >> 15; } @@ -488,13 +488,13 @@ void weighted_merge_planar_uint16_sse41(BYTE *p1, const BYTE *p2, int p1_pitch, } } -void weighted_merge_planar_sse2(BYTE *p1, const BYTE *p2, int p1_pitch, int p2_pitch, int width, int height, int weight, int invweight) { +void weighted_merge_planar_sse2(BYTE *p1, const BYTE *p2, int p1_pitch, int p2_pitch, int rowsize, int height, int weight, int invweight) { // 8 bit only. SSE2 has weak support for unsigned 16 bit __m128i round_mask = _mm_set1_epi32(0x4000); __m128i zero = _mm_setzero_si128(); __m128i mask = _mm_set_epi16(weight, invweight, weight, invweight, weight, invweight, weight, invweight); - int wMod16 = (width / 16) * 16; + int wMod16 = (rowsize / 16) * 16; for (int y = 0; y < height; y++) { for (int x = 0; x < wMod16; x += 16) { @@ -532,7 +532,7 @@ void weighted_merge_planar_sse2(BYTE *p1, const BYTE *p2, int p1_pitch, int p2_p _mm_store_si128(reinterpret_cast<__m128i*>(p1 + x), result); } - for (size_t x = wMod16 / sizeof(uint8_t); x < width / sizeof(uint8_t); x++) { + for (size_t x = wMod16 / sizeof(uint8_t); x < rowsize / sizeof(uint8_t); x++) { reinterpret_cast(p1)[x] = (reinterpret_cast(p1)[x] * invweight + reinterpret_cast(p2)[x] * weight + 16384) >> 15; } @@ -542,12 +542,12 @@ void weighted_merge_planar_sse2(BYTE *p1, const BYTE *p2, int p1_pitch, int p2_p } #ifdef X86_32 -void weighted_merge_planar_mmx(BYTE *p1, const BYTE *p2, int p1_pitch, int p2_pitch, int width, int height, int weight, int invweight) { +void weighted_merge_planar_mmx(BYTE *p1, const BYTE *p2, int p1_pitch, int p2_pitch, int rowsize, int height, int weight, int invweight) { __m64 round_mask = _mm_set1_pi32(0x4000); __m64 zero = _mm_setzero_si64(); __m64 mask = _mm_set_pi16(weight, invweight, weight, invweight); - int wMod8 = (width/8) * 8; + int wMod8 = (rowsize/8) * 8; for (int y = 0; y < height; y++) { for (int x = 0; x < wMod8; x += 8) { @@ -585,7 +585,7 @@ void weighted_merge_planar_mmx(BYTE *p1, const BYTE *p2, int p1_pitch, int p2_pi *reinterpret_cast<__m64*>(p1+x) = result; } - for (int x = wMod8; x < width; x++) { + for (int x = wMod8; x < rowsize; x++) { p1[x] = (p1[x]*invweight + p2[x]*weight + 16384) >> 15; } @@ -597,11 +597,11 @@ void weighted_merge_planar_mmx(BYTE *p1, const BYTE *p2, int p1_pitch, int p2_pi #endif -template +template void weighted_merge_planar_c(BYTE *p1, const BYTE *p2, int p1_pitch, int p2_pitch,int rowsize, int height, int weight, int invweight) { for (int y=0;y(p1))[x] = ((reinterpret_cast(p1))[x]*invweight + (reinterpret_cast(p2))[x]*weight + 32768) >> 16; + for (size_t x=0;x(p1))[x] = ((reinterpret_cast(p1))[x]*invweight + (reinterpret_cast(p2))[x]*weight + 32768) >> 16; } p2+=p2_pitch; p1+=p1_pitch; @@ -634,7 +634,7 @@ extern const AVSFunction Merge_filters[] = { { 0 } }; -static void merge_plane(BYTE* srcp, const BYTE* otherp, int src_pitch, int other_pitch, int src_width, int src_height, float weight, int pixelsize, IScriptEnvironment *env) { +static void merge_plane(BYTE* srcp, const BYTE* otherp, int src_pitch, int other_pitch, int src_rowsize, int src_height, float weight, int pixelsize, IScriptEnvironment *env) { if ((weight > 0.4961f) && (weight < 0.5039f)) { //average of two planes @@ -642,30 +642,30 @@ static void merge_plane(BYTE* srcp, const BYTE* otherp, int src_pitch, int other { if ((env->GetCPUFlags() & CPUF_SSE2) && IsPtrAligned(srcp, 16) && IsPtrAligned(otherp, 16)) { if(pixelsize==1) - average_plane_sse2(srcp, otherp, src_pitch, other_pitch, src_width, src_height); + average_plane_sse2(srcp, otherp, src_pitch, other_pitch, src_rowsize, src_height); else // pixel_size==2 - average_plane_sse2(srcp, otherp, src_pitch, other_pitch, src_width, src_height); + average_plane_sse2(srcp, otherp, src_pitch, other_pitch, src_rowsize, src_height); } else #ifdef X86_32 if (env->GetCPUFlags() & CPUF_INTEGER_SSE) { if (pixelsize == 1) - average_plane_isse(srcp, otherp, src_pitch, other_pitch, src_width, src_height); + average_plane_isse(srcp, otherp, src_pitch, other_pitch, src_rowsize, src_height); else // pixel_size==2 - average_plane_isse(srcp, otherp, src_pitch, other_pitch, src_width, src_height); + average_plane_isse(srcp, otherp, src_pitch, other_pitch, src_rowsize, src_height); } else #endif { if (pixelsize == 1) - average_plane_c(srcp, otherp, src_pitch, other_pitch, src_width, src_height); + average_plane_c(srcp, otherp, src_pitch, other_pitch, src_rowsize, src_height); else // pixel_size==2 - average_plane_c(srcp, otherp, src_pitch, other_pitch, src_width, src_height); + average_plane_c(srcp, otherp, src_pitch, other_pitch, src_rowsize, src_height); } } else // if (pixelsize == 4) { // todo sse for float - average_plane_c_float(srcp, otherp, src_pitch, other_pitch, src_width, src_height); + average_plane_c_float(srcp, otherp, src_pitch, other_pitch, src_rowsize, src_height); } } @@ -705,13 +705,13 @@ static void merge_plane(BYTE* srcp, const BYTE* otherp, int src_pitch, int other weighted_merge_planar = &weighted_merge_planar_c; } } - weighted_merge_planar(srcp, otherp, src_pitch, other_pitch, src_width, src_height, iweight, invweight); + weighted_merge_planar(srcp, otherp, src_pitch, other_pitch, src_rowsize, src_height, iweight, invweight); } else // if (pixelsize == 4) { float fweight = weight; // intentional float finvweight = 1-fweight; - weighted_merge_planar_c_float(srcp, otherp, src_pitch, other_pitch, src_width, src_height, fweight, finvweight); + weighted_merge_planar_c_float(srcp, otherp, src_pitch, other_pitch, src_rowsize, src_height, fweight, finvweight); } } } @@ -736,6 +736,8 @@ MergeChroma::MergeChroma(PClip _child, PClip _clip, float _weight, IScriptEnviro if (weight<0.0f) weight=0.0f; if (weight>1.0f) weight=1.0f; + + pixelsize = vi.ComponentSize(); } @@ -784,12 +786,12 @@ PVideoFrame __stdcall MergeChroma::GetFrame(int n, IScriptEnvironment* env) BYTE* chromapV = (BYTE*)chroma->GetReadPtr(PLANAR_V); int src_pitch_uv = src->GetPitch(PLANAR_U); int chroma_pitch_uv = chroma->GetPitch(PLANAR_U); - int src_width_u = src->GetRowSize(PLANAR_U_ALIGNED); - int src_width_v = src->GetRowSize(PLANAR_V_ALIGNED); + int src_rowsize_u = src->GetRowSize(PLANAR_U_ALIGNED); + int src_rowsize_v = src->GetRowSize(PLANAR_V_ALIGNED); int src_height_uv = src->GetHeight(PLANAR_U); - merge_plane(srcpU, chromapU, src_pitch_uv, chroma_pitch_uv, src_width_u, src_height_uv, weight, pixelsize, env); - merge_plane(srcpV, chromapV, src_pitch_uv, chroma_pitch_uv, src_width_v, src_height_uv, weight, pixelsize, env); + merge_plane(srcpU, chromapU, src_pitch_uv, chroma_pitch_uv, src_rowsize_u, src_height_uv, weight, pixelsize, env); + merge_plane(srcpV, chromapV, src_pitch_uv, chroma_pitch_uv, src_rowsize_v, src_height_uv, weight, pixelsize, env); if(vi.IsYUVA()) merge_plane(src->GetWritePtr(PLANAR_A), chroma->GetReadPtr(PLANAR_A), src->GetPitch(PLANAR_A), chroma->GetPitch(PLANAR_A), @@ -975,10 +977,10 @@ PVideoFrame __stdcall MergeLuma::GetFrame(int n, IScriptEnvironment* env) BYTE* lumapY = (BYTE*)luma->GetReadPtr(PLANAR_Y); int src_pitch = src->GetPitch(PLANAR_Y); int luma_pitch = luma->GetPitch(PLANAR_Y); - int src_width = src->GetRowSize(PLANAR_Y); + int src_rowsize = src->GetRowSize(PLANAR_Y); int src_height = src->GetHeight(PLANAR_Y); - merge_plane(srcpY, lumapY, src_pitch, luma_pitch, src_width, src_height, weight, pixelsize, env); + merge_plane(srcpY, lumapY, src_pitch, luma_pitch, src_rowsize, src_height, weight, pixelsize, env); } return src; diff --git a/avs_core/filters/merge.h b/avs_core/filters/merge.h index f9224d8ab..3b2192d08 100644 --- a/avs_core/filters/merge.h +++ b/avs_core/filters/merge.h @@ -117,7 +117,7 @@ typedef void(*MergeFuncPtr) (BYTE *p1, const BYTE *p2, int p1_pitch, int p2_pitc void weighted_merge_planar_uint16_sse41(BYTE *p1, const BYTE *p2, int p1_pitch, int p2_pitch, int width, int height, int weight, int invweight); void weighted_merge_planar_sse2(BYTE *p1,const BYTE *p2, int p1_pitch, int p2_pitch,int rowsize, int height, int weight, int invweight); void weighted_merge_planar_mmx(BYTE *p1,const BYTE *p2, int p1_pitch, int p2_pitch,int rowsize, int height, int weight, int invweight); -template +template void weighted_merge_planar_c(BYTE *p1, const BYTE *p2, int p1_pitch, int p2_pitch, int rowsize, int height, int weight, int invweight); void weighted_merge_planar_c_float(BYTE *p1, const BYTE *p2, int p1_pitch, int p2_pitch, int rowsize, int height, float weight, float invweight); From 873f4f7d50498bd154ce609841bc1c9ca011880d Mon Sep 17 00:00:00 2001 From: Pinterf Date: Thu, 15 Sep 2016 21:25:18 +0200 Subject: [PATCH 074/120] MaskHS: 10-16bit,float. Tweak: fix using start/endHue, min/maxSat for 10+ bits MaskHS: LUT for 10/12 bits. LUT usage override when new parameter: realcalc=true --- avs_core/filters/levels.cpp | 239 ++++++++++++++++++++++++++++-------- avs_core/filters/levels.h | 34 ++++- 2 files changed, 217 insertions(+), 56 deletions(-) diff --git a/avs_core/filters/levels.cpp b/avs_core/filters/levels.cpp index 462455d7a..74a013c53 100644 --- a/avs_core/filters/levels.cpp +++ b/avs_core/filters/levels.cpp @@ -54,7 +54,7 @@ extern const AVSFunction Levels_filters[] = { { "Levels", BUILTIN_FUNC_PREFIX, "cifiii[coring]b[dither]b", Levels::Create }, // src_low, gamma, src_high, dst_low, dst_high { "RGBAdjust", BUILTIN_FUNC_PREFIX, "c[r]f[g]f[b]f[a]f[rb]f[gb]f[bb]f[ab]f[rg]f[gg]f[bg]f[ag]f[analyze]b[dither]b", RGBAdjust::Create }, { "Tweak", BUILTIN_FUNC_PREFIX, "c[hue]f[sat]f[bright]f[cont]f[coring]b[sse]b[startHue]f[endHue]f[maxSat]f[minSat]f[interp]f[dither]b[realcalc]b[dither_strength]f", Tweak::Create }, - { "MaskHS", BUILTIN_FUNC_PREFIX, "c[startHue]f[endHue]f[maxSat]f[minSat]f[coring]b", MaskHS::Create }, + { "MaskHS", BUILTIN_FUNC_PREFIX, "c[startHue]f[endHue]f[maxSat]f[minSat]f[coring]b[realcalc]b", MaskHS::Create }, { "Limiter", BUILTIN_FUNC_PREFIX, "c[min_luma]i[max_luma]i[min_chroma]i[max_chroma]i[show]s", Limiter::Create }, { 0 } }; @@ -1137,7 +1137,7 @@ Tweak::Tweak(PClip _child, double _hue, double _sat, double _bright, double _con // Flag to skip special processing if doing all pixels // If defaults, don't check for ranges, just do all - const bool allPixels = (_startHue == 0.0 && _endHue == 360.0 && _maxSat == 150.0 && _minSat == 0.0); + allPixels = (_startHue == 0.0 && _endHue == 360.0 && _maxSat == 150.0 && _minSat == 0.0); // The new "mapping" C code is faster than the iSSE code on my 3GHz P4HT - Make it optional if (sse && (!allPixels || coring || dither || !vi.IsYUY2())) @@ -1248,8 +1248,10 @@ Tweak::Tweak(PClip _child, double _hue, double _sat, double _bright, double _con env->ThrowError("Tweak: Could not reserve memory."); env->AtExit(free_buffer, mapUV); - int range_low = tv_range_low; - int range_high = tv_range_hi_chroma; + int range_low = coring ? tv_range_low : 0; + int range_high = coring ? tv_range_hi_chroma : max_pixel_value; + + double uv_range_corr = 1.0 / (1 << (bits_per_pixel - 8)); if (dither) { // lut chroma, dither @@ -1260,7 +1262,7 @@ Tweak::Tweak(PClip _child, double _hue, double _sat, double _bright, double _con for (int v = 0; v < lut_size; v++) { const double destv = (((v << 4) + d*dither_strength) + bias_dither_chroma) / scale_dither_chroma - middle_chroma; int iSat = Sat; - if (allPixels || ProcessPixel(destv, destu, _startHue, _endHue, maxSat, minSat, p, iSat)) { + if (allPixels || ProcessPixel(destv * uv_range_corr, destu * uv_range_corr, _startHue, _endHue, maxSat, minSat, p, iSat)) { int du = (int)((destu*COS + destv*SIN) * iSat + 0x100) >> 9; // back from the extra 9 bits Sat precision int dv = (int)((destv*COS - destu*SIN) * iSat + 0x100) >> 9; du = clamp(du + middle_chroma, range_low, range_high); @@ -1287,7 +1289,7 @@ Tweak::Tweak(PClip _child, double _hue, double _sat, double _bright, double _con for (int v = 0; v < lut_size; v++) { const double destv = v - middle_chroma; int iSat = Sat; - if (allPixels || ProcessPixel(destv, destu, _startHue, _endHue, maxSat, minSat, p, iSat)) { + if (allPixels || ProcessPixel(destv * uv_range_corr, destu * uv_range_corr, _startHue, _endHue, maxSat, minSat, p, iSat)) { int du = int((destu*COS + destv*SIN) * iSat) >> 9; // back from the extra 9 bits Sat precision int dv = int((destv*COS - destu*SIN) * iSat) >> 9; du = clamp(du + middle_chroma, range_low, range_high); @@ -1332,29 +1334,43 @@ void Tweak::tweak_calc_chroma(BYTE *srcpu, BYTE *srcpv, int src_pitch, int width // no lut, realcalc, float internals const float pixel_range = sizeof(pixel_t) == 4 ? 1.0f : (float)(max_pixel_value + 1); + double uv_range_corr = 255.0; + for (int y = 0; y < height; ++y) { const int _y = (y << 2) & 0xC; for (int x = 0; x < width; ++x) { if (dither) ditherval = ((float(ditherMap4[(x & 0x3) | _y]) * dither_strength + bias_dither_chroma) / scale_dither_chroma); // +/-0.5 on 0..255 range - u = sizeof(pixel_t) == 4 ? (reinterpret_cast(srcpu)[x] - 0.5f) : (reinterpret_cast(srcpu)[x] - middle_chroma); - v = sizeof(pixel_t) == 4 ? (reinterpret_cast(srcpv)[x] - 0.5f) : (reinterpret_cast(srcpv)[x] - middle_chroma); + pixel_t orig_u = reinterpret_cast(srcpu)[x]; + pixel_t orig_v = reinterpret_cast(srcpv)[x] ; + u = sizeof(pixel_t) == 4 ? (orig_u - 0.5f) : (orig_u - middle_chroma); + v = sizeof(pixel_t) == 4 ? (orig_v - 0.5f) : (orig_v - middle_chroma); u = (u + (dither ? ditherval : 0)) / (sizeof(pixel_t) == 4 ? 1.0f : pixel_range); // going from 0..1 to +/-0.5 v = (v + (dither ? ditherval : 0)) / (sizeof(pixel_t) == 4 ? 1.0f : pixel_range); double dWorkSat = dsat; // init from original param - ProcessPixelUnscaled(v, u, dstartHue, dendHue, maxSat, minSat, p, dWorkSat); - - float du = ((u*cosHue + v*sinHue) * (float)dWorkSat) + 0.5f; // back to 0..1 - float dv = ((v*cosHue - u*sinHue) * (float)dWorkSat) + 0.5f; + if(allPixels || ProcessPixelUnscaled(v * uv_range_corr, u * uv_range_corr, dstartHue, dendHue, maxSat, minSat, p, dWorkSat)) + { + float du = ((u*cosHue + v*sinHue) * (float)dWorkSat) + 0.5f; // back to 0..1 + float dv = ((v*cosHue - u*sinHue) * (float)dWorkSat) + 0.5f; - if(sizeof(pixel_t) == 4) { - reinterpret_cast(srcpu)[x] = (pixel_t)clamp(du, minUV, maxUV); - reinterpret_cast(srcpv)[x] = (pixel_t)clamp(dv, minUV, maxUV); - } else { - reinterpret_cast(srcpu)[x] = (pixel_t)clamp((int)(du * pixel_range), minUVi, maxUVi); - reinterpret_cast(srcpv)[x] = (pixel_t)clamp((int)(dv * pixel_range), minUVi, maxUVi); + if(sizeof(pixel_t) == 4) { + reinterpret_cast(srcpu)[x] = (pixel_t)clamp(du, minUV, maxUV); + reinterpret_cast(srcpv)[x] = (pixel_t)clamp(dv, minUV, maxUV); + } else { + reinterpret_cast(srcpu)[x] = (pixel_t)clamp((int)(du * pixel_range), minUVi, maxUVi); + reinterpret_cast(srcpv)[x] = (pixel_t)clamp((int)(dv * pixel_range), minUVi, maxUVi); + } + } + else { + if(sizeof(pixel_t) == 4) { + reinterpret_cast(srcpu)[x] = (pixel_t)clamp((float)orig_u, minUV, maxUV); + reinterpret_cast(srcpv)[x] = (pixel_t)clamp((float)orig_v, minUV, maxUV); + } else { + reinterpret_cast(srcpu)[x] = (pixel_t)clamp((int)(orig_u), minUVi, maxUVi); + reinterpret_cast(srcpv)[x] = (pixel_t)clamp((int)(orig_v), minUVi, maxUVi); + } } } srcpu += src_pitch; @@ -1631,9 +1647,9 @@ AVSValue __cdecl Tweak::Create(AVSValue args, void* user_data, IScriptEnvironmen ****** MaskHS ***** **********************/ -MaskHS::MaskHS(PClip _child, double startHue, double endHue, double _maxSat, double _minSat, bool coring, +MaskHS::MaskHS(PClip _child, double _startHue, double _endHue, double _maxSat, double _minSat, bool _coring, bool _realcalc, IScriptEnvironment* env) - : GenericVideoFilter(_child) + : GenericVideoFilter(_child), dstartHue(_startHue), dendHue(_endHue), dmaxSat(_maxSat), dminSat(_minSat), coring(_coring), realcalc(_realcalc) { if (vi.IsRGB()) env->ThrowError("MaskHS: YUV data only (no RGB)"); @@ -1642,50 +1658,100 @@ MaskHS::MaskHS(PClip _child, double startHue, double endHue, double _maxSat, dou env->ThrowError("MaskHS: clip must contain chroma."); } - if (startHue < 0.0 || startHue >= 360.0) + if (dstartHue < 0.0 || dstartHue >= 360.0) env->ThrowError("MaskHS: startHue must be greater than or equal to 0.0 and less than 360.0"); - if (endHue <= 0.0 || endHue > 360.0) + if (dendHue <= 0.0 || dendHue > 360.0) env->ThrowError("MaskHS: endHue must be greater than 0.0 and less than or equal to 360.0"); - if (_minSat >= _maxSat) + if (dminSat >= dmaxSat) env->ThrowError("MaskHS: MinSat must be less than MaxSat"); - if (_minSat < 0.0 || _minSat >= 150.0) + if (dminSat < 0.0 || dminSat >= 150.0) env->ThrowError("MaskHS: minSat must be greater than or equal to 0 and less than 150."); - if (_maxSat <= 0.0 || _maxSat > 150.0) + if (dmaxSat <= 0.0 || dmaxSat > 150.0) env->ThrowError("MaskHS: maxSat must be greater than 0 and less than or equal to 150."); + pixelsize = vi.ComponentSize(); + bits_per_pixel = vi.BitsPerComponent(); + max_pixel_value = (1 << bits_per_pixel) - 1; + lut_size = 1 << bits_per_pixel; + + if(bits_per_pixel < 32) { + tv_range_low = 16 << (bits_per_pixel - 8); // 16 + tv_range_hi_luma = ((235+1) << (bits_per_pixel - 8)) - 1; // 16-235 + range_luma = tv_range_hi_luma - tv_range_low; // 219 + + tv_range_hi_chroma = ((240+1) << (bits_per_pixel - 8)) - 1; // 16-240,64–963, 256–3855,... 4096-61695 + range_chroma = tv_range_hi_chroma - tv_range_low; // 224 + } + else { // float: range is 0..255 scaled later + tv_range_low = 16; // 16 + tv_range_hi_luma = 235; // 16-235 + range_luma = tv_range_hi_luma - tv_range_low; // 219 + + tv_range_hi_chroma = 240; // 16-240 + range_chroma = tv_range_hi_chroma - tv_range_low; // 224 + + max_pixel_value = 255; + } + actual_chroma_range_low = coring ? tv_range_low : 0; + actual_chroma_range_high = coring ? tv_range_hi_chroma : max_pixel_value; - const BYTE maxY = coring ? 235 : 255; - const BYTE minY = coring ? 16 : 0; + middle_chroma = 1 << (bits_per_pixel - 1); // 128 + + realcalc_chroma = realcalc; + if (vi.IsPlanar() && (bits_per_pixel > 12)) // max bitdepth is 12 for lut + realcalc_chroma = true; // 100% equals sat=119 (= maximal saturation of valid RGB (R=255,G=B=0) // 150% (=180) - 100% (=119) overshoot - const double minSat = 1.19 * _minSat; - const double maxSat = 1.19 * _maxSat; - - // apply mask - for (int u = 0; u < 256; u++) { - const double destu = u - 128; - for (int v = 0; v < 256; v++) { - const double destv = v - 128; - int iSat = 0; // won't be used in MaskHS; interpolation is skipped since p==0: - if (ProcessPixel(destv, destu, startHue, endHue, maxSat, minSat, 0.0, iSat)) { - mapY[(u << 8) | v] = maxY; - } - else { - mapY[(u << 8) | v] = minY; - } - } - } + minSat = 1.19 * dminSat; + maxSat = 1.19 * dmaxSat; + + if (!(realcalc_chroma && vi.IsPlanar())) + { // fill lookup tables for UV + size_t map_size = pixelsize * lut_size * lut_size; + // for 8 bit : 1 * 256 * 256 = 65536 byte + // for 10 bit : 2 * 1024 * 1024 = 2 MByte + // for 12 bit : 2 * 4096 * 4096 = 32 MByte + auto env2 = static_cast(env); + + mapUV = static_cast(env2->Allocate(map_size, 8, AVS_NORMAL_ALLOC)); // uint16_t for (U+V bytes), casted to uint32_t for (U+V words in non-8 bit) + if (!mapUV) + env->ThrowError("Tweak: Could not reserve memory."); + env->AtExit(free_buffer, mapUV); + + // apply mask + double uv_range_corr = 1.0 / (1 << (bits_per_pixel - 8)); // no float here + for (int u = 0; u < lut_size; u++) { + const double destu = (u - middle_chroma) * uv_range_corr; // processpixel's minSat and maxSat is for 256 range + int ushift = u << bits_per_pixel; + for (int v = 0; v < lut_size; v++) { + const double destv = (v - middle_chroma) * uv_range_corr; + int iSat = 0; // won't be used in MaskHS; interpolation is skipped since p==0: + bool low = ProcessPixel(destv, destu, dstartHue, dendHue, maxSat, minSat, 0.0, iSat); + if(pixelsize==1) + mapUV[ushift | v] = low ? actual_chroma_range_low : actual_chroma_range_high; + else + reinterpret_cast(mapUV)[ushift | v] = low ? actual_chroma_range_low : actual_chroma_range_high; + } + } + } // end of lut calculation // #define MaskPointResizing #ifndef MaskPointResizing vi.width >>= vi.GetPlaneWidthSubsampling(PLANAR_U); vi.height >>= vi.GetPlaneHeightSubsampling(PLANAR_U); #endif - vi.pixel_type = VideoInfo::CS_Y8; + switch(bits_per_pixel) { + case 8: vi.pixel_type = VideoInfo::CS_Y8; break; + case 10: vi.pixel_type = VideoInfo::CS_Y10; break; + case 12: vi.pixel_type = VideoInfo::CS_Y12; break; + case 14: vi.pixel_type = VideoInfo::CS_Y14; break; + case 16: vi.pixel_type = VideoInfo::CS_Y16; break; + case 32: vi.pixel_type = VideoInfo::CS_Y32; break; + } } @@ -1709,7 +1775,7 @@ PVideoFrame __stdcall MaskHS::GetFrame(int n, IScriptEnvironment* env) for (int y = 0; y < height; y++) { for (int x = 0; x < row_size; x++) { - dstp[x] = mapY[((srcp[x * 4 + 1]) << 8) | srcp[x * 4 + 3]]; + dstp[x] = mapUV[((srcp[x * 4 + 1]) << 8) | srcp[x * 4 + 3]]; } srcp += src_pitch; dstp += dst_pitch; @@ -1732,17 +1798,81 @@ PVideoFrame __stdcall MaskHS::GetFrame(int n, IScriptEnvironment* env) const int srcu_pitch = src->GetPitch(PLANAR_U); const uint8_t* srcpu = src->GetReadPtr(PLANAR_U); const uint8_t* srcpv = src->GetReadPtr(PLANAR_V); - const int row_sizeu = src->GetRowSize(PLANAR_U); + const int width = src->GetRowSize(PLANAR_U) / pixelsize; const int heightu = src->GetHeight(PLANAR_U); #ifndef MaskPointResizing - for (int y = 0; y < heightu; ++y) { - for (int x = 0; x < row_sizeu; ++x) { - dstp[x] = mapY[((srcpu[x]) << 8) | srcpv[x]]; + if(realcalc_chroma) { + double uv_range_corr = (pixelsize == 4) ? 255.0 : 1.0 / (1 << (bits_per_pixel - 8)); + if(pixelsize == 1) { + for (int y = 0; y < heightu; ++y) { + for (int x = 0; x < width; ++x) { + const double destu = srcpu[x] - middle_chroma; + const double destv = srcpv[x] - middle_chroma; + int iSat = 0; // won't be used in MaskHS; interpolation is skipped since p==0: + bool low = ProcessPixel(destv * uv_range_corr, destu * uv_range_corr, dstartHue, dendHue, maxSat, minSat, 0.0, iSat); + dstp[x] = low ? actual_chroma_range_low : actual_chroma_range_high; + } + dstp += dst_pitch; + srcpu += srcu_pitch; + srcpv += srcu_pitch; } - dstp += dst_pitch; - srcpu += srcu_pitch; - srcpv += srcu_pitch; + } + else if (pixelsize == 2) { + double range_corr = 1 << (bits_per_pixel - 8); + for (int y = 0; y < heightu; ++y) { + for (int x = 0; x < width; ++x) { + const double destu = (reinterpret_cast(srcpu)[x] - middle_chroma); + const double destv = (reinterpret_cast(srcpv)[x] - middle_chroma); + int iSat = 0; // won't be used in MaskHS; interpolation is skipped since p==0: + bool low = ProcessPixel(destv * uv_range_corr, destu * uv_range_corr, dstartHue, dendHue, maxSat, minSat, 0.0, iSat); + reinterpret_cast(dstp)[x] = low ? actual_chroma_range_low : actual_chroma_range_high; + } + dstp += dst_pitch; + srcpu += srcu_pitch; + srcpv += srcu_pitch; + } + } else { // pixelsize == 4 + const float middle_chroma_f = 0.5f; + const float actual_chroma_range_low_f = actual_chroma_range_low / 255.0f; + const float actual_chroma_range_high_f = actual_chroma_range_high / 255.0f; + for (int y = 0; y < heightu; ++y) { + for (int x = 0; x < width; ++x) { + const double destu = (reinterpret_cast(srcpu)[x] - middle_chroma_f); + const double destv = (reinterpret_cast(srcpv)[x] - middle_chroma_f); + int iSat = 0; // won't be used in MaskHS; interpolation is skipped since p==0: + bool low = ProcessPixel(destv * uv_range_corr, destu * uv_range_corr, dstartHue, dendHue, maxSat, minSat, 0.0, iSat); + reinterpret_cast(dstp)[x] = low ? actual_chroma_range_low_f : actual_chroma_range_high_f; + } + dstp += dst_pitch; + srcpu += srcu_pitch; + srcpv += srcu_pitch; + } + } + + } else { + // use LUT + if(pixelsize==1) { + for (int y = 0; y < heightu; ++y) { + for (int x = 0; x < width; ++x) { + dstp[x] = mapUV[((srcpu[x]) << 8) | srcpv[x]]; + } + dstp += dst_pitch; + srcpu += srcu_pitch; + srcpv += srcu_pitch; + } + } + else if (pixelsize == 2) { + for (int y = 0; y < heightu; ++y) { + for (int x = 0; x < width; ++x) { + reinterpret_cast(dstp)[x] = + reinterpret_cast(mapUV)[((reinterpret_cast(srcpu)[x]) << bits_per_pixel) | reinterpret_cast(srcpv)[x]]; + } + dstp += dst_pitch; + srcpu += srcu_pitch; + srcpv += srcu_pitch; + } + } // no lut for float (and for 14-16 bit) } #else const int swidth = child->GetVideoInfo().GetPlaneWidthSubsampling(PLANAR_U); @@ -1783,6 +1913,7 @@ AVSValue __cdecl MaskHS::Create(AVSValue args, void* user_data, IScriptEnvironme args[3].AsDblDef(150.0), // maxSat args[4].AsDblDef(0.0), // minSat args[5].AsBool(false), // coring - env); + args[6].AsBool(false), // realcalc + env); } diff --git a/avs_core/filters/levels.h b/avs_core/filters/levels.h index bbd25c731..747f2f712 100644 --- a/avs_core/filters/levels.h +++ b/avs_core/filters/levels.h @@ -130,6 +130,8 @@ class Tweak : public GenericVideoFilter const bool realcalc; // no lookup, realtime calculation, always for 16/32 bits double dhue, dsat, dbright, dcont, dstartHue, dendHue, dmaxSat, dminSat, dinterp; + + bool allPixels; BYTE *map; uint16_t *mapUV; @@ -167,7 +169,7 @@ class Tweak : public GenericVideoFilter class MaskHS : public GenericVideoFilter { public: - MaskHS( PClip _child, double _startHue, double _endHue, double _maxSat, double _minSat, bool _coring, IScriptEnvironment* env ); + MaskHS( PClip _child, double _startHue, double _endHue, double _maxSat, double _minSat, bool _coring, bool _realcalc, IScriptEnvironment* env ); PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment* env); @@ -178,7 +180,35 @@ class MaskHS : public GenericVideoFilter static AVSValue __cdecl Create(AVSValue args, void* user_data, IScriptEnvironment* env); private: - BYTE mapY[256*256]; + const double dstartHue, dendHue, dmaxSat, dminSat; + const bool coring; + const bool realcalc; + + double minSat, maxSat; // corrected values + + uint8_t *mapUV; + // avs+ + bool realcalc_luma; + bool realcalc_chroma; + + int pixelsize; + int bits_per_pixel; // 8,10..16 + int max_pixel_value; + int lut_size; + int real_lookup_size; + + int tv_range_low; + int tv_range_hi_luma; + int range_luma; + + int tv_range_hi_chroma; + int range_chroma; + + int middle_chroma; + + int actual_chroma_range_low; + int actual_chroma_range_high; + }; #endif // __Levels_H__ From cdab57eeb90fefafa3163c8aaf761a1b7c7dd7c9 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Thu, 15 Sep 2016 21:26:57 +0200 Subject: [PATCH 075/120] Apply template naming style --- avs_core/filters/resample.cpp | 12 ++++++------ avs_core/filters/transform.cpp | 8 ++++---- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/avs_core/filters/resample.cpp b/avs_core/filters/resample.cpp index 07a489bee..4c5a8eab8 100644 --- a/avs_core/filters/resample.cpp +++ b/avs_core/filters/resample.cpp @@ -102,20 +102,20 @@ static __forceinline __m128i _MM_PACKUS_EPI32( __m128i a, __m128i b ) ***** Vertical Resizer Assembly ******* ***************************************/ -template +template static void resize_v_planar_pointresize(BYTE* dst, const BYTE* src, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int target_height, const int* pitch_table, const void* storage) { int filter_size = program->filter_size; - pixel_size* src0 = (pixel_size *)src; - pixel_size* dst0 = (pixel_size *)dst; - dst_pitch = dst_pitch / sizeof(pixel_size); + pixel_t* src0 = (pixel_t *)src; + pixel_t* dst0 = (pixel_t *)dst; + dst_pitch = dst_pitch / sizeof(pixel_t); for (int y = 0; y < target_height; y++) { int offset = program->pixel_offset[y]; - const pixel_size* src_ptr = src0 + pitch_table[offset]/sizeof(pixel_size); + const pixel_t* src_ptr = src0 + pitch_table[offset]/sizeof(pixel_t); - memcpy(dst0, src_ptr, width*sizeof(pixel_size)); + memcpy(dst0, src_ptr, width*sizeof(pixel_t)); dst0 += dst_pitch; } diff --git a/avs_core/filters/transform.cpp b/avs_core/filters/transform.cpp index e9345ea8a..d4fc4e24c 100644 --- a/avs_core/filters/transform.cpp +++ b/avs_core/filters/transform.cpp @@ -113,13 +113,13 @@ AVSValue __cdecl FlipVertical::Create(AVSValue args, void*, IScriptEnvironment* ******* Flip Horizontal ****** ********************************/ -template +template static void flip_horizontal_plane_c(BYTE* dstp, const BYTE* srcp, int dst_pitch, int src_pitch, int width, int height) { - width = width / sizeof(pixel_size); // width is called with GetRowSize value - srcp += (width-1) * sizeof(pixel_size); + width = width / sizeof(pixel_t); // width is called with GetRowSize value + srcp += (width-1) * sizeof(pixel_t); for (int y = 0; y < height; y++) { // Loop planar luma. for (int x = 0; x < width; x++) { - (reinterpret_cast(dstp))[x] = (reinterpret_cast(srcp))[-x]; + (reinterpret_cast(dstp))[x] = (reinterpret_cast(srcp))[-x]; } srcp += src_pitch; dstp += dst_pitch; From 53890f5fa5a51a385706d9a5084381d75df8fa97 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Fri, 16 Sep 2016 19:25:57 +0200 Subject: [PATCH 076/120] Tweak dither strength back to base +/-0.5. Use env2->Allocate for RGBAdjust You can still, simulate set 8 bit dither range with dither_range=4.0 for 10 bit, 256.0 for 16 bit --- avs_core/filters/levels.cpp | 26 ++++++++++++++++++++++---- avs_core/filters/levels.h | 3 +++ 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/avs_core/filters/levels.cpp b/avs_core/filters/levels.cpp index 74a013c53..71a9bc109 100644 --- a/avs_core/filters/levels.cpp +++ b/avs_core/filters/levels.cpp @@ -833,8 +833,15 @@ PVideoFrame __stdcall RGBAdjust::GetFrame(int n, IScriptEnvironment* env) int real_lookup_size = (pixelsize == 1) ? 256 : 65536; // avoids lut overflow in case of non-standard content of a 10 bit clip int pixel_max = lookup_size - 1; - // worst case - unsigned int accum_r[65536], accum_g[65536], accum_b[65536]; + // worst case: 65536 for even 10 bits, too. Possible garbage + auto env2 = static_cast(env); + int bufsize = real_lookup_size * sizeof(uint32_t); + // allocate 3x bufsize for R. G and B will share it + accum_r = static_cast(env2->Allocate(bufsize*3 , 16, AVS_NORMAL_ALLOC)); + accum_g = accum_r + real_lookup_size; + accum_b = accum_g + real_lookup_size; + if (!accum_r) + env->ThrowError("RGBAdjust: Could not reserve memory."); for (int i = 0; i < lookup_size; i++) { accum_r[i] = 0; @@ -929,6 +936,7 @@ PVideoFrame __stdcall RGBAdjust::GetFrame(int n, IScriptEnvironment* env) Amax_r, Amax_g, Amax_b ); env->ApplyMessage(&frame, vi, text, vi.width / 4, 0xa0a0a0, 0, 0); + env2->Free(accum_r); } return frame; } @@ -1120,7 +1128,7 @@ Tweak::Tweak(PClip _child, double _hue, double _sat, double _bright, double _con if (pixelsize == 4) dither_strength /= 256.0f; else - dither_strength = (1 << (bits_per_pixel - 8)) * dither_strength; // base: 8-bit lookup + dither_strength = /*(1 << (bits_per_pixel - 8)) * */ dither_strength; // base: 8-bit lookup // make dither_strength = 4.0 for 10 bits, 256.0 for 16 bits in order to have same dither range as for 8 bit // when 1.0 (default) is given as parameter @@ -1128,7 +1136,17 @@ Tweak::Tweak(PClip _child, double _hue, double _sat, double _bright, double _con // lut scale settings scale_dither_luma = 256; // lower 256 is dither value divisor_dither_luma *= 256; - bias_dither_luma = -(256.0f * dither_strength - 1) / 2; // -127.5; + bias_dither_luma = -(256.0f * dither_strength - 1) / 2; + // original bias: -127.5 or -(256.0f * dither_strength - 1) / 2; + // dither strength =1 = (1 << (8-8)) + // dither min: int( (0*1-127.5)/256+0.5) = -0.498046875 + 0.5 = 0,001953125 + // dither max: int( (255*1-127.5)/256+0.5) = 0,998046875 + + // 16 bit: 32767,5 + // dither strength =256 = (1 << (16-8)) + // dither min: int( (0*256-32767,5)/256+0.5) = -127,498046875 + // dither max: int( (255*256-32767,5)/256+0.5) = 127,501953125 + scale_dither_chroma = 16; // lower 16 is dither value divisor_dither_chroma *= 16; diff --git a/avs_core/filters/levels.h b/avs_core/filters/levels.h index 747f2f712..ed40adbc8 100644 --- a/avs_core/filters/levels.h +++ b/avs_core/filters/levels.h @@ -97,6 +97,9 @@ class RGBAdjust : public GenericVideoFilter int pixelsize; int bits_per_pixel; // 8,10..16 bool use_lut; + + unsigned int *accum_r, *accum_g, *accum_b; + }; From cf65dbc222efc4e5713befcff2ad235de8465740 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Fri, 16 Sep 2016 19:27:34 +0200 Subject: [PATCH 077/120] Histogram: "Levels" with bits=10 gives 10-bit wide histogram for 10+ bit-depth Float is still todo. Planar RGB also. --- avs_core/filters/histogram.cpp | 437 +++++++++++++++++++++++++-------- avs_core/filters/histogram.h | 5 +- 2 files changed, 332 insertions(+), 110 deletions(-) diff --git a/avs_core/filters/histogram.cpp b/avs_core/filters/histogram.cpp index 9d53aee38..4df25a39a 100644 --- a/avs_core/filters/histogram.cpp +++ b/avs_core/filters/histogram.cpp @@ -52,7 +52,7 @@ ********************************************************************/ extern const AVSFunction Histogram_filters[] = { - { "Histogram", BUILTIN_FUNC_PREFIX, "c[mode]s[].", Histogram::Create }, // src clip + { "Histogram", BUILTIN_FUNC_PREFIX, "c[mode]s[].[bits]i", Histogram::Create }, // src clip, avs+ new bits parameter { 0 } }; @@ -63,14 +63,23 @@ extern const AVSFunction Histogram_filters[] = { ******* Histogram Filter ****** **********************************/ -Histogram::Histogram(PClip _child, Mode _mode, AVSValue _option, IScriptEnvironment* env) - : GenericVideoFilter(_child), mode(_mode), option(_option) +Histogram::Histogram(PClip _child, Mode _mode, AVSValue _option, int _show_bits, IScriptEnvironment* env) + : GenericVideoFilter(_child), mode(_mode), option(_option), show_bits(_show_bits) { bool optionValid = false; + pixelsize = vi.ComponentSize(); + bits_per_pixel = vi.BitsPerComponent(); + + if(show_bits < 8 || show_bits>10) + env->ThrowError("Histogram: bits parameter can only be 8 or 10"); + + if(show_bits > bits_per_pixel) + show_bits = bits_per_pixel; // cannot show 10 bit levels for a 8 bit clip + if (mode == ModeClassic) { - if (!vi.IsYUV()) - env->ThrowError("Histogram: YUV data only"); + if (!vi.IsYUV() && !vi.IsYUVA()) + env->ThrowError("Histogram: YUV(A) data only"); vi.width += 256; } @@ -78,26 +87,29 @@ Histogram::Histogram(PClip _child, Mode _mode, AVSValue _option, IScriptEnvironm if (!vi.IsPlanar()) { env->ThrowError("Histogram: Levels mode only available in PLANAR."); } - if (vi.IsY8()) { - env->ThrowError("Histogram: Levels mode not available in Y8."); + if (vi.IsY()) { + env->ThrowError("Histogram: Levels mode not available in greyscale."); } optionValid = option.IsFloat(); const double factor = option.AsDblDef(100.0); // Population limit % factor if (factor < 0.0 || factor > 100.0) { env->ThrowError("Histogram: Levels population clamping must be between 0 and 100%"); } - vi.width += 256; + // put diagram on the right side + vi.width += (1 << show_bits); // 256 for 8 bit vi.height = max(256, vi.height); } + if (mode == ModeColor) { if (!vi.IsPlanar()) { env->ThrowError("Histogram: Color mode only available in PLANAR."); } - if (vi.IsY8()) { - env->ThrowError("Histogram: Color mode not available in Y8."); + if (vi.IsY()) { + env->ThrowError("Histogram: Color mode not available in greyscale."); } - vi.width += 256; + // put diagram on the right side + vi.width += (1 << show_bits); // 256 for 8 bit vi.height = max(256,vi.height); } @@ -105,21 +117,23 @@ Histogram::Histogram(PClip _child, Mode _mode, AVSValue _option, IScriptEnvironm if (!vi.IsPlanar()) { env->ThrowError("Histogram: Color2 mode only available in PLANAR."); } - if (vi.IsY8()) { - env->ThrowError("Histogram: Color2 mode not available in Y8."); + if (vi.IsY()) { + env->ThrowError("Histogram: Color2 mode not available in greyscale."); } - vi.width += 256; - vi.height = max(256,vi.height); - + // put circle on the right side + vi.width += (1 << show_bits); // 256 for 8 bit + vi.height = max((1 << show_bits),vi.height); // yes, height can change + int half = 1 << (show_bits - 1); // 127 + int R = half - 1; // 126 for (int y=0; y<24; y++) { // just inside the big circle - deg15c[y] = (int) ( 126.0*cos(y*PI/12.) + 0.5) + 127; - deg15s[y] = (int) (-126.0*sin(y*PI/12.) + 0.5) + 127; + deg15c[y] = (int) ( R*cos(y*PI/12.) + 0.5) + half; + deg15s[y] = (int) (-R*sin(y*PI/12.) + 0.5) + half; } } - if (mode == ModeLuma && !vi.IsYUV()) { - env->ThrowError("Histogram: Luma mode only available in YUV."); + if (mode == ModeLuma && !vi.IsYUV() && !vi.IsYUVA()) { + env->ThrowError("Histogram: Luma mode only available in YUV(A)."); } if ((mode == ModeStereoY8)||(mode == ModeStereo)||(mode == ModeOverlay)) { @@ -770,94 +784,250 @@ PVideoFrame Histogram::DrawModeLevels(int n, IScriptEnvironment* env) { PVideoFrame dst = env->NewVideoFrame(vi); BYTE* p = dst->GetWritePtr(); + int show_size = 1 << show_bits; + + // of source + int src_width = src->GetRowSize() / pixelsize; + int src_height = src->GetHeight(); + + bool RGB = vi.IsRGB(); + int plane_default_black[3] = { + RGB ? 0 : (16 << (bits_per_pixel - 8)), + RGB ? 0 : (128 << (bits_per_pixel - 8)), + RGB ? 0 : (128 << (bits_per_pixel - 8)) + }; + + const int planesYUV[4] = { PLANAR_Y, PLANAR_U, PLANAR_V, PLANAR_A}; + const int planesRGB[4] = { PLANAR_G, PLANAR_B, PLANAR_R, PLANAR_A}; + const int *planes = vi.IsYUV() || vi.IsYUVA() ? planesYUV : planesRGB; + if (src->GetHeight() < dst->GetHeight()) { + const int fillSize = (dst->GetHeight()-src->GetHeight()) * dst->GetPitch(); const int fillStart = src->GetHeight() * dst->GetPitch(); - memset(p+fillStart, 16, fillSize); - const int fillSizeU = (dst->GetHeight(PLANAR_U)-src->GetHeight(PLANAR_U)) * dst->GetPitch(PLANAR_U); - const int fillStartU = src->GetHeight(PLANAR_U) * dst->GetPitch(PLANAR_U); - memset(dst->GetWritePtr(PLANAR_U)+fillStartU, 128, fillSizeU); - memset(dst->GetWritePtr(PLANAR_V)+fillStartU, 128, fillSizeU); + switch(pixelsize) { + case 1: memset(p + fillStart, plane_default_black[0], fillSize); break; + case 2: std::fill_n((uint16_t *)(p + fillStart), fillSize / sizeof(uint16_t), plane_default_black[0]); break; + case 4: std::fill_n((float *)(p + fillStart), fillSize / sizeof(float), (float)plane_default_black[0] / 255.0f); break; + } + + // first plane is already processed + // dont't touch Alpha + for (int p = 1; p < 3; p++) { + const int plane = planes[p]; + BYTE *dstp = dst->GetWritePtr(plane); + + const int fillSize = (dst->GetHeight(plane)-src->GetHeight(plane)) * dst->GetPitch(plane); + const int fillStart = src->GetHeight(plane) * dst->GetPitch(plane); + int chroma_fill = plane_default_black[p]; + switch(pixelsize) { + case 1: memset(dstp+fillStart, RGB ? 0 : chroma_fill, fillSize); break; + case 2: std::fill_n((uint16_t *)(dstp + fillStart), fillSize / sizeof(uint16_t), chroma_fill); break; + case 4: std::fill_n((float *)(dstp + fillStart), fillSize / sizeof(float), RGB ? 0.0f : 0.5f); break; + } + } } + // counters + auto env2 = static_cast(env); + int bufsize = sizeof(uint32_t)*show_size; + uint32_t *histPlane1 = static_cast(env2->Allocate(bufsize * 3, 16, AVS_NORMAL_ALLOC)); + uint32_t *histPlanes[3] = { histPlane1, histPlane1 + show_size, histPlane1 + 2 * show_size }; + if (!histPlane1) + env->ThrowError("Histogram: Could not reserve memory."); + std::fill_n(histPlane1, show_size*3, 0); + + // copy planes + // luma or G env->BitBlt(p, dst->GetPitch(), src->GetReadPtr(), src->GetPitch(), src->GetRowSize(), src->GetHeight()); if (vi.IsPlanar()) { - env->BitBlt(dst->GetWritePtr(PLANAR_U), dst->GetPitch(PLANAR_U), src->GetReadPtr(PLANAR_U), src->GetPitch(PLANAR_U), src->GetRowSize(PLANAR_U), src->GetHeight(PLANAR_U)); - env->BitBlt(dst->GetWritePtr(PLANAR_V), dst->GetPitch(PLANAR_V), src->GetReadPtr(PLANAR_V), src->GetPitch(PLANAR_V), src->GetRowSize(PLANAR_V), src->GetHeight(PLANAR_V)); + // copy rest planes + for (int p = 1; p < vi.NumComponents(); p++) { + const int plane = planes[p]; + env->BitBlt(dst->GetWritePtr(plane), dst->GetPitch(plane), src->GetReadPtr(plane), src->GetPitch(plane), src->GetRowSize(plane), src->GetHeight(plane)); + } - int histY[256] = { 0 }; - int histU[256] = { 0 }; - int histV[256] = { 0 }; + // accumulate population + for (int p = 0; p < 3; p++) { + const int plane = planes[p]; + const BYTE* srcp = src->GetReadPtr(plane); - const BYTE* pY = src->GetReadPtr(PLANAR_Y); - const BYTE* pU = src->GetReadPtr(PLANAR_U); - const BYTE* pV = src->GetReadPtr(PLANAR_V); + const int w = src->GetRowSize(plane) / pixelsize; + const int h = src->GetHeight(plane); + const int pitch = src->GetPitch(plane) / pixelsize; - const int wy = src->GetRowSize(PLANAR_Y); - const int hy = src->GetHeight(PLANAR_Y); - const int wu = src->GetRowSize(PLANAR_U); - const int hu = src->GetHeight(PLANAR_U); - const int pitU = src->GetPitch(PLANAR_U); - const int pitY = src->GetPitch(PLANAR_Y); + // accumulator of current plane + // size: show_size (256 or 1024) + uint32_t *hist = histPlanes[p]; - // luma - for (int y = 0; y < hy; y++) { - for (int x = 0; x < wy; x++) { - histY[pY[y*pitY+x]]++; + if(pixelsize==1) { + for (int y = 0; y < h; y++) { + for (int x = 0; x < w; x++) { + hist[srcp[y*pitch + x]]++; + } + } } - } - - // chroma - for (int y = 0; y < hu; y++) { - for (int x = 0; x < wu; x++) { - histU[pU[y*pitU+x]]++; - histV[pV[y*pitU+x]]++; + else if (pixelsize == 2) { + const uint16_t *srcp16 = reinterpret_cast(srcp); + int shift = bits_per_pixel - show_bits; + int max_pixel_value = show_size - 1; + for (int y = 0; y < h; y++) { + for (int x = 0; x < w; x++) { + hist[min(srcp16[x] >> shift, max_pixel_value)]++; + } + srcp16 += pitch; + } } - } + else { + // float + const float *srcp32 = reinterpret_cast(srcp); + const float multiplier = (float)(show_size - 1); + for (int y = 0; y < h; y++) { + for (int x = 0; x < w; x++) { + hist[(int)(clamp(srcp32[x], 0.0f, 1.0f)*multiplier)]++; + } + srcp32 += pitch; + } + } + } // accumulate end - unsigned char* pdstb = dst->GetWritePtr(PLANAR_Y); - pdstb += wy; + int width = src->GetRowSize() / pixelsize; + int pos_shift = (show_bits - 8); + int show_middle_pos = (128 << pos_shift); + // draw planes + for (int p = 0; p < 3; p++) { + const int plane = planes[p]; + const BYTE* srcp = src->GetReadPtr(plane); - const int dstPitch = dst->GetPitch(PLANAR_Y); + //const int w = src->GetRowSize(plane) / pixelsize; + //const int h = src->GetHeight(plane); + //const int pitch = src->GetPitch(plane) / pixelsize; - // Clear Y - for (int y = 0; yGetHeight(); y++) { - memset(&pdstb[y*dstPitch], 16, 256); - } + int swidth = vi.GetPlaneWidthSubsampling(plane); + int sheight = vi.GetPlaneHeightSubsampling(plane); - // Draw Unsafe zone (Y-graph) - for (int y = 0; y<=64; y++) { - int x = 0; - for (; x<16; x++) { - pdstb[dstPitch*y+x] = 32; - } - x += 220; - for (; x<256; x++) { - pdstb[dstPitch*y+x] = 32; - } - } + // Draw Unsafe zone (UV-graph) - // Draw Unsafe zone (UV-graph) - // x=0-16, R=G=255, B=0; x=128, R=G=B=0; x=240-255, R=G=0, B=255 - // Draw upper gradient - for (int y = 64+16; y<=128+16; y++) { - int x = 0; - for (; x<15; x++) { - pdstb[dstPitch*y+x] = 210/2; - } - for (; x<=128; x++) { - pdstb[dstPitch*y + x] = (unsigned char)(((128 - x) * 15) >> 3); // *1.875 - } - for (; x<=240; x++) { - pdstb[dstPitch*y + x] = (unsigned char)(((x - 128) * 24001) >> 16); // *0.366 + unsigned char* pdstb = dst->GetWritePtr(plane); + pdstb += width*pixelsize; // next to the source image + + const int dstPitch = dst->GetPitch(plane); + + // Clear Y/U/V or B, R G + BYTE *ptr = pdstb; + int color = plane_default_black[p]; + for (int y = 0; y < dst->GetHeight(); y++) { + switch (pixelsize) { + case 1: memset(ptr, color, show_size >> swidth); break; + case 2: std::fill_n((uint16_t *)(ptr), show_size >> swidth, color); break; + case 4: std::fill_n((float *)(ptr), show_size >> swidth, (float)color / 255); break; + } + ptr += dstPitch; } - for (; x<256; x++) { - pdstb[dstPitch*y+x] = 41/2; + + // Draw Unsafe zone (Y-graph) + int color_unsafeZones[3] = { 32, 16, 160 }; + + int color_usz = color_unsafeZones[p]; + int color_i = color_usz << (bits_per_pixel - 8); + float color_f = color / 255.0f; + ptr = pdstb + 0 * dstPitch;; + for (int y = 0; y <= 64 >> sheight; y++) { + int x = 0; + for (; x < (16 << pos_shift) >> swidth; x++) { + if (pixelsize == 1) + ptr[x] = color_i; + else if (pixelsize == 2) + reinterpret_cast(ptr)[x] = color_i; + else + reinterpret_cast(ptr)[x] = color_f; + } + for (x = (236 << pos_shift) >> swidth; x < (show_size >> swidth); x++) { + if (pixelsize == 1) + ptr[x] = color_i; + else if (pixelsize == 2) + reinterpret_cast(ptr)[x] = color_i; + else + reinterpret_cast(ptr)[x] = color_f; + } + ptr += dstPitch; } - } + + for (int gradient_upper_lower = 0; gradient_upper_lower < 2; gradient_upper_lower++) + { + // Draw upper and lower gradient + // upper: x=0-16, R=G=255, B=0; x=128, R=G=B=0; x=240-255, R=G=0, B=255 + // lower: x=0-16, R=0, G=B=255; x=128, R=G=B=0; x=240-255, R=255, G=B=0 + int color1_upper_lower_gradient[2][3] = { { 210 / 2, 16 + 112 / 2, 128 },{ 170 / 2, 128, 16 + 112 / 2 } }; + int color = color1_upper_lower_gradient[gradient_upper_lower][p]; + int color_i = color << (bits_per_pixel - 8); + float color_f = color / 255.0f; + + int color2_upper_lower_gradient[2][3] = { { 41 / 2, 240 - 112 / 2, 128 },{ 81 / 2, 128, 240 - 112 / 2 } }; + int color2 = color2_upper_lower_gradient[gradient_upper_lower][p]; + int color2_i = color2 << (bits_per_pixel - 8); + float color2_f = color2 / 255.0f; + + // upper only for planar U and Y + if (plane == PLANAR_V && gradient_upper_lower == 0) + continue; + // lower only for planar V and Y + if (plane == PLANAR_U && gradient_upper_lower == 1) + continue; + int StartY = gradient_upper_lower == 0 ? 64 + 16 : 128 + 32; + ptr = pdstb + ((StartY) >> sheight) * dstPitch; + for (int y = (StartY) >> sheight; y <= (StartY + 64) >> sheight; y++) { + int x = 0; + + for (; x < ((16 << pos_shift) >> swidth) - 1; x++) { // 0..15, 0..63 + if (pixelsize == 1) ptr[x] = color_i; + else if (pixelsize == 2) reinterpret_cast(ptr)[x] = color_i; + else reinterpret_cast(ptr)[x] = color_f; + } + + if (plane == PLANAR_Y) { + for (; x <= show_middle_pos; x++) { + int color3 = + (gradient_upper_lower == 0) ? + (((show_middle_pos - x) * 15) >> 3) >> pos_shift : // *1.875 + ((show_middle_pos - x) * 99515) >> 16 >> pos_shift; // *1.518 + int color3_i = color3 << (bits_per_pixel - 8); + float color3_f = color3 / 255.0f; + if (pixelsize == 1) ptr[x] = color3_i; + else if (pixelsize == 2) reinterpret_cast(ptr)[x] = color3_i; + else reinterpret_cast(ptr)[x] = color3_f; + } + } + + for (; x <= (240 << pos_shift) >> swidth; x++) { + int color4 = (plane == PLANAR_Y) ? + ( + (gradient_upper_lower == 0) ? + ((x - show_middle_pos) * 24001) >> 16 >> pos_shift : // *0.366 + ((x - show_middle_pos) * 47397) >> 16 >> pos_shift // *0.723 + ) + : + (x << swidth) >> pos_shift; + int color4_i = color4 << (bits_per_pixel - 8); + float color4_f = color4 / 255.0f; + if (pixelsize == 1) ptr[x] = color4_i; + else if (pixelsize == 2) reinterpret_cast(ptr)[x] = color4_i; + else reinterpret_cast(ptr)[x] = color4_f; + } + + for (; x<(show_size >> swidth); x++) { + if (pixelsize == 1) ptr[x] = color2_i; + else if (pixelsize == 2) reinterpret_cast(ptr)[x] = color2_i; + else reinterpret_cast(ptr)[x] = color2_f; + } + ptr += dstPitch; + } // for y gradient draw + } // gradient for upper lower + } // planes for + /* // x=0-16, R=0, G=B=255; x=128, R=G=B=0; x=240-255, R=255, G=B=0 // Draw lower gradient for (int y = 128+32; y<=128+64+32; y++) { @@ -875,35 +1045,80 @@ PVideoFrame Histogram::DrawModeLevels(int n, IScriptEnvironment* env) { pdstb[dstPitch*y+x] = 81/2; } } - + */ // Draw dotted centerline + int color = 128; + int color_i = color << (bits_per_pixel - 8); + float color_f = 0.5f; + + const int dstPitch = dst->GetPitch(PLANAR_Y); + + unsigned char* pdstb = dst->GetWritePtr(PLANAR_Y); + pdstb += width*pixelsize; // next to the original clip + BYTE *ptr = pdstb; + for (int y = 0; y<=256-32; y++) { - if ((y&3)>1) - pdstb[dstPitch*y+128] = 128; - } + if ((y&3)>1) { + if(pixelsize==1) ptr[show_middle_pos] = color_i; + else if(pixelsize==2) reinterpret_cast(ptr)[show_middle_pos] = color_i; + else reinterpret_cast(ptr)[show_middle_pos] = color_f; - // Draw Y histograms - const int clampval = (int)((hy*wy)*option.AsDblDef(100.0)/100.0); // Population limit % factor - int maxval = 0; - for (int i = 0; i<256; i++) { - if (histY[i] > clampval) histY[i] = clampval; - maxval = max(histY[i], maxval); + } + ptr += dstPitch; } - float scale = float(64.0 / maxval); + for (int n = 0; n < 3; n++) { + // Draw Y histograms + const uint32_t clampval = (int)((src_width*src_height)*option.AsDblDef(100.0) / 100.0); // Population limit % factor + uint32_t maxval = 0; + uint32_t *hist; - for (int x = 0; x<256; x++) { - float scaled_h = (float)histY[x] * scale; - int h = 64 - min((int)scaled_h, 64)+1; - int left = (int)(220.0f*(scaled_h-(float)((int)scaled_h))); + hist = histPlanes[n]; + for (int i = 0; i < show_size; i++) { + if (hist[i] > clampval) hist[i] = clampval; + maxval = max(hist[i], maxval); + } - for (int y = 64+1; y > h; y--) { - pdstb[x+y*dstPitch] = 235; + float scale = float(64.0 / maxval); + + int color = 235; + int color_i = color << (bits_per_pixel - 8); // igazából max_luma + float color_f = 0.5f; + + int Y_pos; + switch (n) { + case 0: Y_pos = 64; break; + case 1: Y_pos = 128 + 16; break; + case 2: Y_pos = 192 + 32; break; } - pdstb[x + h*dstPitch] = (unsigned char)(16 + left); - } - const int clampvalUV = (int)((hu*wu)*option.AsDblDef(100.0)/100.0); // Population limit % factor + for (int x = 0; x < show_size; x++) { + float scaled_h = (float)hist[x] * scale; + int h = Y_pos - min((int)scaled_h, 64) + 1; + int left = (int)(220.0f*(scaled_h - (float)((int)scaled_h))); // color, scaled later + + ptr = pdstb + (Y_pos + 1) * dstPitch; + for (int y = Y_pos + 1; y > h; y--) { + //pdstb[x + y*dstPitch] = 235; + if (pixelsize == 1) ptr[x] = color_i; + else if (pixelsize == 2) reinterpret_cast(ptr)[x] = color_i; + else reinterpret_cast(ptr)[x] = color_f; + ptr -= dstPitch; + } + int color_top = (16 + left); + int color_top_i = color_top << (bits_per_pixel - 8); // igazából max_luma + float color_top_f = color_top / 255.0f; + + ptr = pdstb + h*dstPitch; + if (pixelsize == 1) ptr[x] = color_top_i; + else if (pixelsize == 2) reinterpret_cast(ptr)[x] = color_top_i; + else reinterpret_cast(ptr)[x] = color_top_f; + + //pdstb[x + h*dstPitch] = color_i; + } + } +/* + const int clampvalUV = (int)((src_height*src_width)*option.AsDblDef(100.0)/100.0); // Population limit % factor // Draw U maxval = 0; @@ -943,12 +1158,13 @@ PVideoFrame Histogram::DrawModeLevels(int n, IScriptEnvironment* env) { } pdstb[x + h*dstPitch] = (unsigned char)(16 + left); } - +*/ + /* // Draw chroma unsigned char* pdstbU = dst->GetWritePtr(PLANAR_U); unsigned char* pdstbV = dst->GetWritePtr(PLANAR_V); - pdstbU += wu; - pdstbV += wu; + pdstbU += src_width*pixelsize; + pdstbV += src_width*pixelsize; // Clear chroma int dstPitchUV = dst->GetPitch(PLANAR_U); @@ -1002,8 +1218,11 @@ PVideoFrame Histogram::DrawModeLevels(int n, IScriptEnvironment* env) { pdstbV[dstPitchUV*y+x] = 240-112/2; } } + */ } + env2->Free(histPlane1); + return dst; } @@ -1144,5 +1363,5 @@ AVSValue __cdecl Histogram::Create(AVSValue args, void*, IScriptEnvironment* env if (!lstrcmpi(st_m, "audiolevels")) mode = ModeAudioLevels; - return new Histogram(args[0].AsClip(), mode, args[2], env); + return new Histogram(args[0].AsClip(), mode, args[2], args[3].AsInt(8), env); } diff --git a/avs_core/filters/histogram.h b/avs_core/filters/histogram.h index 24ac1b14f..e7d8c4453 100644 --- a/avs_core/filters/histogram.h +++ b/avs_core/filters/histogram.h @@ -59,7 +59,7 @@ class Histogram : public GenericVideoFilter ModeAudioLevels }; - Histogram(PClip _child, Mode _mode, AVSValue _option, IScriptEnvironment* env); + Histogram(PClip _child, Mode _mode, AVSValue _option, int _show_bits, IScriptEnvironment* env); PVideoFrame __stdcall GetFrame(int n, IScriptEnvironment* env); PVideoFrame DrawModeClassic (int n, IScriptEnvironment* env); PVideoFrame DrawModeLevels (int n, IScriptEnvironment* env); @@ -80,6 +80,9 @@ class Histogram : public GenericVideoFilter int deg15c[24], deg15s[24]; PClip aud_clip; AVSValue option; + int pixelsize; + int bits_per_pixel; + int show_bits; // e.g. levels for 10 bits }; From 3caef6cf9737fa074f619287f16ed0d0483d153e Mon Sep 17 00:00:00 2001 From: Pinterf Date: Sat, 17 Sep 2016 19:07:05 +0200 Subject: [PATCH 078/120] Histogram "levels": parameter: bits=8,9,10,11,12 for finer histograms. Float support --- avs_core/filters/histogram.cpp | 85 ++++++++++++++++++++-------------- 1 file changed, 50 insertions(+), 35 deletions(-) diff --git a/avs_core/filters/histogram.cpp b/avs_core/filters/histogram.cpp index 4df25a39a..d2bdc3c31 100644 --- a/avs_core/filters/histogram.cpp +++ b/avs_core/filters/histogram.cpp @@ -71,11 +71,8 @@ Histogram::Histogram(PClip _child, Mode _mode, AVSValue _option, int _show_bits, pixelsize = vi.ComponentSize(); bits_per_pixel = vi.BitsPerComponent(); - if(show_bits < 8 || show_bits>10) - env->ThrowError("Histogram: bits parameter can only be 8 or 10"); - - if(show_bits > bits_per_pixel) - show_bits = bits_per_pixel; // cannot show 10 bit levels for a 8 bit clip + if(show_bits < 8 || show_bits>12) + env->ThrowError("Histogram: bits parameter can only be 8, 9 .. 12"); if (mode == ModeClassic) { if (!vi.IsYUV() && !vi.IsYUVA()) @@ -782,7 +779,7 @@ PVideoFrame Histogram::DrawModeColor(int n, IScriptEnvironment* env) { PVideoFrame Histogram::DrawModeLevels(int n, IScriptEnvironment* env) { PVideoFrame src = child->GetFrame(n, env); PVideoFrame dst = env->NewVideoFrame(vi); - BYTE* p = dst->GetWritePtr(); + BYTE* dstp = dst->GetWritePtr(); int show_size = 1 << show_bits; @@ -791,10 +788,11 @@ PVideoFrame Histogram::DrawModeLevels(int n, IScriptEnvironment* env) { int src_height = src->GetHeight(); bool RGB = vi.IsRGB(); + int color_shift = (bits_per_pixel == 32) ? 0 : (bits_per_pixel - 8); int plane_default_black[3] = { - RGB ? 0 : (16 << (bits_per_pixel - 8)), - RGB ? 0 : (128 << (bits_per_pixel - 8)), - RGB ? 0 : (128 << (bits_per_pixel - 8)) + RGB ? 0 : (16 << color_shift), + RGB ? 0 : (128 << color_shift), + RGB ? 0 : (128 << color_shift) }; const int planesYUV[4] = { PLANAR_Y, PLANAR_U, PLANAR_V, PLANAR_A}; @@ -807,24 +805,24 @@ PVideoFrame Histogram::DrawModeLevels(int n, IScriptEnvironment* env) { const int fillStart = src->GetHeight() * dst->GetPitch(); switch(pixelsize) { - case 1: memset(p + fillStart, plane_default_black[0], fillSize); break; - case 2: std::fill_n((uint16_t *)(p + fillStart), fillSize / sizeof(uint16_t), plane_default_black[0]); break; - case 4: std::fill_n((float *)(p + fillStart), fillSize / sizeof(float), (float)plane_default_black[0] / 255.0f); break; + case 1: memset(dstp + fillStart, plane_default_black[0], fillSize); break; + case 2: std::fill_n((uint16_t *)(dstp + fillStart), fillSize / sizeof(uint16_t), plane_default_black[0]); break; + case 4: std::fill_n((float *)(dstp + fillStart), fillSize / sizeof(float), (float)plane_default_black[0] / 255.0f); break; } // first plane is already processed // dont't touch Alpha for (int p = 1; p < 3; p++) { const int plane = planes[p]; - BYTE *dstp = dst->GetWritePtr(plane); + BYTE *ptr = dst->GetWritePtr(plane); const int fillSize = (dst->GetHeight(plane)-src->GetHeight(plane)) * dst->GetPitch(plane); const int fillStart = src->GetHeight(plane) * dst->GetPitch(plane); int chroma_fill = plane_default_black[p]; switch(pixelsize) { - case 1: memset(dstp+fillStart, RGB ? 0 : chroma_fill, fillSize); break; - case 2: std::fill_n((uint16_t *)(dstp + fillStart), fillSize / sizeof(uint16_t), chroma_fill); break; - case 4: std::fill_n((float *)(dstp + fillStart), fillSize / sizeof(float), RGB ? 0.0f : 0.5f); break; + case 1: memset(ptr+fillStart, RGB ? 0 : chroma_fill, fillSize); break; + case 2: std::fill_n((uint16_t *)(ptr + fillStart), fillSize / sizeof(uint16_t), chroma_fill); break; + case 4: std::fill_n((float *)(ptr + fillStart), fillSize / sizeof(float), RGB ? 0.0f : 0.5f); break; } } } @@ -840,7 +838,7 @@ PVideoFrame Histogram::DrawModeLevels(int n, IScriptEnvironment* env) { // copy planes // luma or G - env->BitBlt(p, dst->GetPitch(), src->GetReadPtr(), src->GetPitch(), src->GetRowSize(), src->GetHeight()); + env->BitBlt(dstp, dst->GetPitch(), src->GetReadPtr(), src->GetPitch(), src->GetRowSize(), src->GetHeight()); if (vi.IsPlanar()) { // copy rest planes for (int p = 1; p < vi.NumComponents(); p++) { @@ -862,21 +860,38 @@ PVideoFrame Histogram::DrawModeLevels(int n, IScriptEnvironment* env) { uint32_t *hist = histPlanes[p]; if(pixelsize==1) { + const uint8_t *srcp8 = reinterpret_cast(srcp); + int invshift = show_bits - bits_per_pixel; + // 8 bit clip into 8,9,... bit histogram for (int y = 0; y < h; y++) { for (int x = 0; x < w; x++) { - hist[srcp[y*pitch + x]]++; + hist[(int)srcp8[x] << invshift]++; + //hist[srcp[y*pitch + x]]++; } + srcp8 += pitch; } } else if (pixelsize == 2) { const uint16_t *srcp16 = reinterpret_cast(srcp); int shift = bits_per_pixel - show_bits; int max_pixel_value = show_size - 1; - for (int y = 0; y < h; y++) { - for (int x = 0; x < w; x++) { - hist[min(srcp16[x] >> shift, max_pixel_value)]++; + if (shift < 0) { + // 10 bit clip into 11 bit histogram + int invshift = -shift; + for (int y = 0; y < h; y++) { + for (int x = 0; x < w; x++) { + hist[srcp16[x] << invshift]++; + } + srcp16 += pitch; + } + } else { + // e.g.10 bit clip into 8-9-10 bit histogram + for (int y = 0; y < h; y++) { + for (int x = 0; x < w; x++) { + hist[min(srcp16[x] >> shift, max_pixel_value)]++; + } + srcp16 += pitch; } - srcp16 += pitch; } } else { @@ -911,14 +926,14 @@ PVideoFrame Histogram::DrawModeLevels(int n, IScriptEnvironment* env) { unsigned char* pdstb = dst->GetWritePtr(plane); - pdstb += width*pixelsize; // next to the source image + pdstb += (width*pixelsize) >> swidth; // next to the source image const int dstPitch = dst->GetPitch(plane); // Clear Y/U/V or B, R G BYTE *ptr = pdstb; int color = plane_default_black[p]; - for (int y = 0; y < dst->GetHeight(); y++) { + for (int y = 0; y < dst->GetHeight() >> sheight; y++) { switch (pixelsize) { case 1: memset(ptr, color, show_size >> swidth); break; case 2: std::fill_n((uint16_t *)(ptr), show_size >> swidth, color); break; @@ -931,7 +946,7 @@ PVideoFrame Histogram::DrawModeLevels(int n, IScriptEnvironment* env) { int color_unsafeZones[3] = { 32, 16, 160 }; int color_usz = color_unsafeZones[p]; - int color_i = color_usz << (bits_per_pixel - 8); + int color_i = color_usz << color_shift; float color_f = color / 255.0f; ptr = pdstb + 0 * dstPitch;; for (int y = 0; y <= 64 >> sheight; y++) { @@ -963,12 +978,12 @@ PVideoFrame Histogram::DrawModeLevels(int n, IScriptEnvironment* env) { // lower: x=0-16, R=0, G=B=255; x=128, R=G=B=0; x=240-255, R=255, G=B=0 int color1_upper_lower_gradient[2][3] = { { 210 / 2, 16 + 112 / 2, 128 },{ 170 / 2, 128, 16 + 112 / 2 } }; int color = color1_upper_lower_gradient[gradient_upper_lower][p]; - int color_i = color << (bits_per_pixel - 8); + int color_i = color << color_shift; float color_f = color / 255.0f; int color2_upper_lower_gradient[2][3] = { { 41 / 2, 240 - 112 / 2, 128 },{ 81 / 2, 128, 240 - 112 / 2 } }; int color2 = color2_upper_lower_gradient[gradient_upper_lower][p]; - int color2_i = color2 << (bits_per_pixel - 8); + int color2_i = color2 << color_shift; float color2_f = color2 / 255.0f; // upper only for planar U and Y @@ -994,11 +1009,11 @@ PVideoFrame Histogram::DrawModeLevels(int n, IScriptEnvironment* env) { (gradient_upper_lower == 0) ? (((show_middle_pos - x) * 15) >> 3) >> pos_shift : // *1.875 ((show_middle_pos - x) * 99515) >> 16 >> pos_shift; // *1.518 - int color3_i = color3 << (bits_per_pixel - 8); + int color3_i = color3 << color_shift; float color3_f = color3 / 255.0f; if (pixelsize == 1) ptr[x] = color3_i; else if (pixelsize == 2) reinterpret_cast(ptr)[x] = color3_i; - else reinterpret_cast(ptr)[x] = color3_f; + else reinterpret_cast(ptr)[x] = color3_f; } } @@ -1011,7 +1026,7 @@ PVideoFrame Histogram::DrawModeLevels(int n, IScriptEnvironment* env) { ) : (x << swidth) >> pos_shift; - int color4_i = color4 << (bits_per_pixel - 8); + int color4_i = color4 << color_shift; float color4_f = color4 / 255.0f; if (pixelsize == 1) ptr[x] = color4_i; else if (pixelsize == 2) reinterpret_cast(ptr)[x] = color4_i; @@ -1048,13 +1063,13 @@ PVideoFrame Histogram::DrawModeLevels(int n, IScriptEnvironment* env) { */ // Draw dotted centerline int color = 128; - int color_i = color << (bits_per_pixel - 8); + int color_i = color << color_shift; float color_f = 0.5f; const int dstPitch = dst->GetPitch(PLANAR_Y); unsigned char* pdstb = dst->GetWritePtr(PLANAR_Y); - pdstb += width*pixelsize; // next to the original clip + pdstb += (width*pixelsize); // next to the original clip, Y plane: no ">> swidth" needed BYTE *ptr = pdstb; for (int y = 0; y<=256-32; y++) { @@ -1082,8 +1097,8 @@ PVideoFrame Histogram::DrawModeLevels(int n, IScriptEnvironment* env) { float scale = float(64.0 / maxval); int color = 235; - int color_i = color << (bits_per_pixel - 8); // igazából max_luma - float color_f = 0.5f; + int color_i = color << color_shift; // igazából max_luma + float color_f = color / 255.0f; int Y_pos; switch (n) { @@ -1106,7 +1121,7 @@ PVideoFrame Histogram::DrawModeLevels(int n, IScriptEnvironment* env) { ptr -= dstPitch; } int color_top = (16 + left); - int color_top_i = color_top << (bits_per_pixel - 8); // igazából max_luma + int color_top_i = color_top << color_shift; // igazából max_luma float color_top_f = color_top / 255.0f; ptr = pdstb + h*dstPitch; From 37599c90e566a9fa2028f47cce0da2423cc43fa4 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Sat, 17 Sep 2016 19:08:09 +0200 Subject: [PATCH 079/120] Levels, Tweak, RGBAdjust: dither range fix for 10-16 bit + float --- avs_core/filters/levels.cpp | 109 +++++++++++++++++++----------------- avs_core/filters/levels.h | 28 +++++++++ 2 files changed, 85 insertions(+), 52 deletions(-) diff --git a/avs_core/filters/levels.cpp b/avs_core/filters/levels.cpp index 71a9bc109..24f155b1b 100644 --- a/avs_core/filters/levels.cpp +++ b/avs_core/filters/levels.cpp @@ -141,7 +141,9 @@ Levels::Levels(PClip _child, int in_min, double gamma, int in_max, int out_min, divisor = in_max - in_min; int scale = 1; - double bias = 0.0; + //double bias = 0.0; + + dither_strength = 1.0f; // later: from parameter as Tweak pixelsize = vi.ComponentSize(); bits_per_pixel = vi.BitsPerComponent(); // 8,10..16 @@ -151,29 +153,37 @@ Levels::Levels(PClip _child, int in_min, double gamma, int in_max, int out_min, // No lookup for float. todo: slow on-the-fly realtime calculation int lookup_size = 1 << bits_per_pixel; // 256, 1024, 4096, 16384, 65536 - int real_lookup_size = (pixelsize == 1) ? 256 : 65536; // avoids lut overflow in case of non-standard content of a 10 bit clip - int pixel_max = lookup_size - 1; + real_lookup_size = (pixelsize == 1) ? 256 : 65536; // avoids lut overflow in case of non-standard content of a 10 bit clip + int max_pixel_value = (1 << bits_per_pixel) - 1; use_lut = bits_per_pixel != 32; // for float: realtime (todo) if (!use_lut) dither = false; - int tv_range_low = 16 << (bits_per_pixel - 8); // 16 - int tv_range_hi_luma = ((235+1) << (bits_per_pixel - 8)) - 1; // 16-235 - int range_luma = tv_range_hi_luma - tv_range_low; // 219 + tv_range_low = 16 << (bits_per_pixel - 8); // 16 + tv_range_hi_luma = ((235+1) << (bits_per_pixel - 8)) - 1; // 16-235 + range_luma = tv_range_hi_luma - tv_range_low; // 219 + + tv_range_hi_chroma = ((240+1) << (bits_per_pixel - 8)) - 1; // 16-240,64–963, 256–3855,... 4096-61695 + range_chroma = tv_range_hi_chroma - tv_range_low; // 224 - int tv_range_hi_chroma = ((240+1) << (bits_per_pixel - 8)) - 1; // 16-240,64–963, 256–3855,... 4096-61695 - int range_chroma = tv_range_hi_chroma - tv_range_low; // 224 + middle_chroma = 1 << (bits_per_pixel - 1); // 128 - int middle_chroma = 1 << (bits_per_pixel - 1); // 128 + if (pixelsize == 4) + dither_strength /= 65536.0f; // same dither range as for a 16 bit clip if (dither) { // lut scale settings + // same 256*dither for chroma and luma scale = 256; // lower 256 is dither value divisor *= 256; in_min *= 256; - bias = -((1 << bits_per_pixel) - 1) / 2; // -127.5 for 8 bit, scaling because of dithershift + bias_dither = -(256.0f * dither_strength - 1) / 2; // -127.5 for 8 bit, scaling because of dithershift + } + else { + scale = 1; + bias_dither = 0.0f; } // one buffer for map and mapchroma @@ -186,13 +196,13 @@ Levels::Levels(PClip _child, int in_min, double gamma, int in_max, int out_min, if (!map) env->ThrowError("Levels: Could not reserve memory."); env->AtExit(free_buffer, map); - if(bits_per_pixel>=10 && bits_per_pixel<=14) - std::fill_n(map, bufsize, 0); // 8 and 16 bit fully overwrites + if(bits_per_pixel>8 && bits_per_pixel<16) // make lut table safe for 10-14 bit garbage + std::fill_n(map, bufsize, 0); // 8 and 16 bit is safe } if (vi.IsYUV() || vi.IsYUVA()) { - mapchroma = map + pixelsize * real_lookup_size * scale; + mapchroma = map + pixelsize * real_lookup_size * scale; // pointer offset for (int i = 0; i(map)[i] = (uint16_t)p; + // no lookup for float } } } @@ -590,8 +597,9 @@ RGBAdjust::RGBAdjust(PClip _child, double r, double g, double b, double a, // No lookup for float. todo: slow on-the-fly realtime calculation int lookup_size = 1 << bits_per_pixel; // 256, 1024, 4096, 16384, 65536 - int real_lookup_size = (pixelsize == 1) ? 256 : 65536; // avoids lut overflow in case of non-standard content of a 10 bit clip - int pixel_max = lookup_size - 1; + real_lookup_size = (pixelsize == 1) ? 256 : 65536; // avoids lut overflow in case of non-standard content of a 10 bit clip + max_pixel_value = (1 << bits_per_pixel) - 1; + dither_strength = 1.0f; // fixed use_lut = bits_per_pixel != 32; // for float: realtime (todo) @@ -611,36 +619,32 @@ RGBAdjust::RGBAdjust(PClip _child, double r, double g, double b, double a, if (!mapR) env->ThrowError("RGBAdjust: Could not reserve memory."); env->AtExit(free_buffer, mapR); - if(bits_per_pixel>=10 && bits_per_pixel<=14) + if(bits_per_pixel>8 && bits_per_pixel<16) // make lut table safe for 10-14 bit garbage std::fill_n(mapR, one_bufsize * number_of_maps, 0); // 8 and 16 bit fully overwrites mapG = mapR + one_bufsize; mapB = mapG + one_bufsize; mapA = number_of_maps == 4 ? mapB + one_bufsize : nullptr; - void(*set_map)(BYTE*, int, int, const double, const double, const double); + void(*set_map)(BYTE*, int, int, float, const double, const double, const double); if (dither) { - set_map = [](BYTE* map, int lookup_size, int bits_per_pixel, const double c0, const double c1, const double c2) { - double bias = -((1 << bits_per_pixel) - 1) / 2; // -127.5 for 8 bit, scaling because of dithershift + set_map = [](BYTE* map, int lookup_size, int bits_per_pixel, float dither_strength, const double c0, const double c1, const double c2) { + double bias_dither = -(256.0f * dither_strength - 1) / 2; // -127.5 for 8 bit, scaling because of dithershift double pixel_max = (1 << bits_per_pixel) - 1; if(bits_per_pixel == 8) { for (int i = 0; i < lookup_size * 256; ++i) { - int i_base = i & ~0xFF; - int i_dithershift = (i & 0xFF) << (bits_per_pixel - 8); - int ii = ii = i_base + i_dithershift; // otherwise dither has no visible effect on 10..16 bit - map[i] = BYTE(pow(clamp((c0 * 256 + ii * c1 - bias) / (double(pixel_max) * 256), 0.0, 1.0), c2) * (double)pixel_max + 0.5); + int ii = (i & 0xFFFFFF00) + (int)((i & 0xFF)*dither_strength); + map[i] = BYTE(pow(clamp((c0 * 256 + ii * c1 - bias_dither) / (double(pixel_max) * 256), 0.0, 1.0), c2) * (double)pixel_max + 0.5); } } else { for (int i = 0; i < lookup_size * 256; ++i) { - int i_base = i & ~0xFF; - int i_dithershift = (i & 0xFF) << (bits_per_pixel - 8); - int ii = ii = i_base + i_dithershift; // otherwise dither has no visible effect on 10..16 bit - reinterpret_cast(map)[i] = uint16_t(pow(clamp((c0 * 256 + ii * c1 - bias) / (double(pixel_max) * 256), 0.0, 1.0), c2) * (double)pixel_max + 0.5); + int ii = (i & 0xFFFFFF00) + (int)((i & 0xFF)*dither_strength); + reinterpret_cast(map)[i] = uint16_t(pow(clamp((c0 * 256 + ii * c1 - bias_dither) / (double(pixel_max) * 256), 0.0, 1.0), c2) * (double)pixel_max + 0.5); } } }; } else { - set_map = [](BYTE* map, int lookup_size, int bits_per_pixel, const double c0, const double c1, const double c2) { + set_map = [](BYTE* map, int lookup_size, int bits_per_pixel, float dither_strength, const double c0, const double c1, const double c2) { double pixel_max = (1 << bits_per_pixel) - 1; if(bits_per_pixel==8) { for (int i = 0; i < lookup_size; ++i) { // fix of bug introduced in an earlier refactor was: i < 256 * 256 @@ -655,11 +659,11 @@ RGBAdjust::RGBAdjust(PClip _child, double r, double g, double b, double a, }; } - set_map(mapR, lookup_size, bits_per_pixel, rb, r, rg); - set_map(mapG, lookup_size, bits_per_pixel, gb, g, gg); - set_map(mapB, lookup_size, bits_per_pixel, bb, b, bg); + set_map(mapR, lookup_size, bits_per_pixel, dither_strength, rb, r, rg); + set_map(mapG, lookup_size, bits_per_pixel, dither_strength, gb, g, gg); + set_map(mapB, lookup_size, bits_per_pixel, dither_strength, bb, b, bg); if (number_of_maps == 4) - set_map(mapA, lookup_size, bits_per_pixel, ab, a, ag); + set_map(mapA, lookup_size, bits_per_pixel, dither_strength, ab, a, ag); } } @@ -1126,11 +1130,9 @@ Tweak::Tweak(PClip _child, double _hue, double _sat, double _bright, double _con bias_dither_chroma = 0.0; if (pixelsize == 4) - dither_strength /= 256.0f; - else - dither_strength = /*(1 << (bits_per_pixel - 8)) * */ dither_strength; // base: 8-bit lookup - // make dither_strength = 4.0 for 10 bits, 256.0 for 16 bits in order to have same dither range as for 8 bit - // when 1.0 (default) is given as parameter + dither_strength /= 65536.0f; // same dither range as for a 16 bit clip + // Set dither_strength = 4.0 for 10 bits or 256.0 for 16 bits in order to have same dither range as for 8 bits + // Otherwise dithering is always +/- 0.5 at all bit-depth if (dither) { // lut scale settings @@ -1201,7 +1203,10 @@ Tweak::Tweak(PClip _child, double _hue, double _sat, double _bright, double _con realcalc_chroma = realcalc; if (vi.IsPlanar() && (bits_per_pixel > 10)) realcalc_chroma = true; + if (vi.IsPlanar() && (bits_per_pixel == 32)) + realcalc_luma = true; // 8/10bit: chroma lut OK. 12+ bits: force no lookup tables. + // 8-16bit: luma lut OK. float: force no lookup tables. auto env2 = static_cast(env); diff --git a/avs_core/filters/levels.h b/avs_core/filters/levels.h index ed40adbc8..d3b3b8fef 100644 --- a/avs_core/filters/levels.h +++ b/avs_core/filters/levels.h @@ -67,6 +67,23 @@ class Levels : public GenericVideoFilter int pixelsize; int bits_per_pixel; // 8,10..16 bool use_lut; + + int max_pixel_value; + int lut_size; + int real_lookup_size; + + int tv_range_low; + int tv_range_hi_luma; + int range_luma; + + int tv_range_hi_chroma; + int range_chroma; + + int middle_chroma; + + float bias_dither; + + float dither_strength; }; @@ -98,6 +115,17 @@ class RGBAdjust : public GenericVideoFilter int bits_per_pixel; // 8,10..16 bool use_lut; + int max_pixel_value; + int lut_size; + int real_lookup_size; + + int tv_range_low; + int tv_range_hi_luma; + int range_luma; + + float dither_strength; + float bias_dither; + unsigned int *accum_r, *accum_g, *accum_b; }; From a6c0d502aaf0afc387d5a0be52da69e4445a599d Mon Sep 17 00:00:00 2001 From: Pinterf Date: Mon, 19 Sep 2016 16:07:52 +0200 Subject: [PATCH 080/120] ConvertBits(), refactor, YUVA alpha always full scale to retain max. opacity Parameter list: c[bits]i[truerange]b[dither]i[scale]f --- avs_core/convert/convert.cpp | 873 ++++++++++++++---------------- avs_core/convert/convert_planar.h | 52 +- 2 files changed, 411 insertions(+), 514 deletions(-) diff --git a/avs_core/convert/convert.cpp b/avs_core/convert/convert.cpp index 152174fd7..0bbff2d1f 100644 --- a/avs_core/convert/convert.cpp +++ b/avs_core/convert/convert.cpp @@ -69,9 +69,10 @@ extern const AVSFunction Convert_filters[] = { // matrix can be "rec601", { "ConvertToYUV420", BUILTIN_FUNC_PREFIX, "c[interlaced]b[matrix]s[ChromaInPlacement]s[chromaresample]s[ChromaOutPlacement]s", ConvertToPlanarGeneric::CreateYUV420}, { "ConvertToYUV422", BUILTIN_FUNC_PREFIX, "c[interlaced]b[matrix]s[ChromaInPlacement]s[chromaresample]s", ConvertToPlanarGeneric::CreateYUV422}, { "ConvertToYUV444", BUILTIN_FUNC_PREFIX, "c[interlaced]b[matrix]s[ChromaInPlacement]s[chromaresample]s", ConvertToPlanarGeneric::CreateYUV444}, - { "ConvertTo8bit", BUILTIN_FUNC_PREFIX, "c[truerange]b[dither]i[scale]f", ConvertTo8bit::Create}, - { "ConvertTo16bit", BUILTIN_FUNC_PREFIX, "c[bits]i[truerange]b[dither]i[scale]f", ConvertTo16bit::Create}, - { "ConvertToFloat", BUILTIN_FUNC_PREFIX, "c[truerange]b[scale]f", ConvertToFloat::Create}, + { "ConvertTo8bit", BUILTIN_FUNC_PREFIX, "c[bits]i[truerange]b[dither]i[scale]f", ConvertBits::Create, (void *)8 }, + { "ConvertTo16bit", BUILTIN_FUNC_PREFIX, "c[bits]i[truerange]b[dither]i[scale]f", ConvertBits::Create, (void *)16 }, + { "ConvertToFloat", BUILTIN_FUNC_PREFIX, "c[bits]i[truerange]b[dither]i[scale]f", ConvertBits::Create, (void *)32 }, + { "ConvertBits", BUILTIN_FUNC_PREFIX, "c[bits]i[truerange]b[dither]i[scale]f", ConvertBits::Create, (void *)0 }, { 0 } }; @@ -603,8 +604,8 @@ AVSValue __cdecl ConvertToRGB::Create(AVSValue args, void* user_data, IScriptEnv clip = ConvertToPlanarGeneric::CreateYUV444(AVSValue(new_args, 5), NULL, env).AsClip(); if((target_rgbtype==24 || target_rgbtype==32) && vi.ComponentSize()!=1) env->ThrowError("ConvertToRGB%d: conversion is allowed only from 8 bit colorspace",target_rgbtype); - if((target_rgbtype==48 || target_rgbtype==64) && vi.ComponentSize()!=2) - env->ThrowError("ConvertToRGB%d: conversion is allowed only from 16 bit colorspace",target_rgbtype); + if((target_rgbtype==48 || target_rgbtype==64) && vi.BitsPerComponent() != 16) + env->ThrowError("ConvertToRGB%d: conversion is allowed only from exact 16 bit colorspace",target_rgbtype); if(target_rgbtype==0 && vi.ComponentSize()==4) env->ThrowError("ConvertToRGB: conversion is allowed only from 8 or 16 bit colorspaces"); int rgbtype_param; @@ -1108,536 +1109,472 @@ static void convert_uintN_to_float_c(const BYTE *srcp, BYTE *dstp, int src_rowsi } } -ConvertTo8bit::ConvertTo8bit(PClip _child, const float _float_range, const int _dither_mode, const int _bitdepth, const int _truerange, IScriptEnvironment* env) : - GenericVideoFilter(_child), float_range(_float_range), dither_mode(_dither_mode), source_bitdepth(_bitdepth), truerange(_truerange) -{ - bool sse2 = !!(env->GetCPUFlags() & CPUF_SSE2); // frames are always 16 bit aligned - - if (vi.ComponentSize() == 2) // 16(,14,12,10)->8 bit - { - // for RGB scaling is not shift by 8 as in YUV but 0..65535->0..255 - if (vi.IsRGB48() || vi.IsRGB64()) - conv_function = convert_rgb_uint16_to_8_c<16>; - else if (vi.IsPlanarRGB() || vi.IsPlanarRGBA()) { - if(truerange) { - switch(source_bitdepth) - { - case 10: conv_function = convert_rgb_uint16_to_8_c<10>; break; - case 12: conv_function = convert_rgb_uint16_to_8_c<12>; break; - case 14: conv_function = convert_rgb_uint16_to_8_c<14>; break; - case 16: conv_function = convert_rgb_uint16_to_8_c<16>; break; - default: env->ThrowError("ConvertTo8bit: invalid source bitdepth"); - } - } else { - conv_function = convert_rgb_uint16_to_8_c<16>; - } - } else if (vi.IsYUV() || vi.IsYUVA()) - { - if(truerange) { - switch(source_bitdepth) - { - case 10: conv_function = sse2 ? convert_uint16_to_8_sse2<10> : convert_uint16_to_8_c<10>; break; - case 12: conv_function = sse2 ? convert_uint16_to_8_sse2<12> : convert_uint16_to_8_c<12>; break; - case 14: conv_function = sse2 ? convert_uint16_to_8_sse2<14> : convert_uint16_to_8_c<14>; break; - case 16: conv_function = sse2 ? convert_uint16_to_8_sse2<16> : convert_uint16_to_8_c<16>; break; - default: env->ThrowError("ConvertTo8bit: invalid source bitdepth"); - } - } else { - conv_function = sse2 ? convert_uint16_to_8_sse2<16> : convert_uint16_to_8_c<16>; // always convert from 16 bit scale - } - } else - env->ThrowError("ConvertTo8bit: unsupported color space"); - } else if (vi.ComponentSize() == 4) // 32->8 bit - { - conv_function = convert_32_to_uintN_c; - } else - env->ThrowError("ConvertTo8bit: unsupported bit depth"); - - if (vi.NumComponents() == 1) - vi.pixel_type = VideoInfo::CS_Y8; - else if (vi.Is420()) - vi.pixel_type = vi.IsYUVA() ? VideoInfo::CS_YUVA420 : VideoInfo::CS_YV12; - else if (vi.Is422()) - vi.pixel_type = vi.IsYUVA() ? VideoInfo::CS_YUVA422 : VideoInfo::CS_YV16; - else if (vi.Is444()) - vi.pixel_type = vi.IsYUVA() ? VideoInfo::CS_YUVA444 : VideoInfo::CS_YV24; - else if (vi.IsRGB48()) - vi.pixel_type = VideoInfo::CS_BGR24; - else if (vi.IsRGB64()) - vi.pixel_type = VideoInfo::CS_BGR32; - else if (vi.IsPlanarRGB()) - vi.pixel_type = VideoInfo::CS_RGBP; - else if (vi.IsPlanarRGBA()) - vi.pixel_type = VideoInfo::CS_RGBAP; - else - env->ThrowError("ConvertTo8bit: unsupported color space"); -} - - -AVSValue __cdecl ConvertTo8bit::Create(AVSValue args, void*, IScriptEnvironment* env) { - PClip clip = args[0].AsClip(); - - const VideoInfo &vi = clip->GetVideoInfo(); - // c[truerange]b[dither]i[scale]f, +ConvertBits::ConvertBits(PClip _child, const float _float_range, const int _dither_mode, const int _target_bitdepth, bool _truerange, IScriptEnvironment* env) : + GenericVideoFilter(_child), float_range(_float_range), dither_mode(_dither_mode), target_bitdepth(_target_bitdepth), truerange(_truerange) +{ - if (!vi.IsPlanar() && !vi.IsRGB()) - env->ThrowError("ConvertTo8bit: Can only convert from Planar YUV/RGB or packed RGB."); + pixelsize = vi.ComponentSize(); + bits_per_pixel = vi.BitsPerComponent(); + format_change_only = false; - if (vi.ComponentSize() == 1) - return clip; // 8 bit -> 8 bit: no conversion + bool sse2 = !!(env->GetCPUFlags() & CPUF_SSE2); // frames are always 16 bit aligned - if (vi.ComponentSize() != 4 && args[1].Defined()) - env->ThrowError("ConvertTo8bit: Float range parameter for non float source"); + BitDepthConvFuncPtr conv_function_full_scale; + BitDepthConvFuncPtr conv_function_shifted_scale; - // float range parameter - float float_range = (float)args[1].AsFloat(1.0f); + // ConvertToFloat + if (target_bitdepth == 32) { + // always full scale + if (pixelsize == 1) // 8->32 bit + { + conv_function = convert_uintN_to_float_c; + } + else if (pixelsize == 2) // 16->32 bit + { + if (vi.IsPlanar() && truerange) + { + switch (bits_per_pixel) + { + case 10: conv_function = convert_uintN_to_float_c; break; + case 12: conv_function = convert_uintN_to_float_c; break; + case 14: conv_function = convert_uintN_to_float_c; break; + case 16: conv_function = convert_uintN_to_float_c; break; + default: env->ThrowError("ConvertToFloat: unsupported bit depth"); + } + } + else { + conv_function = convert_uintN_to_float_c; + } + } + else + env->ThrowError("ConvertToFloat: internal error 32->32 is not valid here"); + + conv_function_a = conv_function; // alpha copy is the same full scale + + if (vi.NumComponents() == 1) + vi.pixel_type = VideoInfo::CS_Y32; + else if (vi.Is420()) + vi.pixel_type = vi.IsYUVA() ? VideoInfo::CS_YUVA420PS : VideoInfo::CS_YUV420PS; + else if (vi.Is422()) + vi.pixel_type = vi.IsYUVA() ? VideoInfo::CS_YUVA422PS : VideoInfo::CS_YUV422PS; + else if (vi.Is444()) + vi.pixel_type = vi.IsYUVA() ? VideoInfo::CS_YUVA444PS : VideoInfo::CS_YUV444PS; + else if (vi.IsPlanarRGB()) + vi.pixel_type = VideoInfo::CS_RGBPS; + else if (vi.IsPlanarRGBA()) + vi.pixel_type = VideoInfo::CS_RGBAPS; + else + env->ThrowError("ConvertToFloat: unsupported color space"); - if (vi.ComponentSize() == 4) { - if(float_range<=0.0) - env->ThrowError("ConvertTo8bit: Float range parameter cannot be <= 0"); - // other checkings + return; } + // ConvertToFloat end + + // ConvertTo16bit() (10, 12, 14, 16) + // Conversion to uint16_t targets + // planar YUV(A) and RGB(A): + // from 8 bit -> 10/12/14/16 with strict range expansion or expansion to 16 + // from 10/12/14 -> 16 bit with strict source range (expansion from 10/12/14 to 16 bit) or just casting pixel_type + // from 16 bit -> 10/12/14 bit with strict target range (reducing range from 16 bit to 10/12/14 bits) or just casting pixel_type + // from float -> 10/12/14/16 with strict range expansion or expansion to 16 + // packed RGB: + // RGB24->RGB48, RGB32->RGB64 + if (target_bitdepth > 8 && target_bitdepth <= 16) { + // 8,10-16,32 -> 16 bit + if (pixelsize == 1) // 8->10-12-14-16 bit + { + if (truerange) + { + switch (target_bitdepth) + { + case 10: + conv_function_full_scale = convert_rgb_8_to_uint16_c<10>; + conv_function_shifted_scale = sse2 ? convert_8_to_uint16_sse2<10> : convert_8_to_uint16_c<10>; + break; + case 12: + conv_function_full_scale = convert_rgb_8_to_uint16_c<12>; + conv_function_shifted_scale = sse2 ? convert_8_to_uint16_sse2<12> : convert_8_to_uint16_c<12>; + break; + case 14: + conv_function_full_scale = convert_rgb_8_to_uint16_c<14>; + conv_function_shifted_scale = sse2 ? convert_8_to_uint16_sse2<14> : convert_8_to_uint16_c<14>; + break; + case 16: + conv_function_full_scale = convert_rgb_8_to_uint16_c<16>; + conv_function_shifted_scale = sse2 ? convert_8_to_uint16_sse2<16> : convert_8_to_uint16_c<16>; + break; + default: env->ThrowError("ConvertTo16bit: unsupported bit depth"); + } + } + else { + conv_function_full_scale = convert_rgb_8_to_uint16_c<16>; + conv_function_shifted_scale = sse2 ? convert_8_to_uint16_sse2<16> : convert_8_to_uint16_c<16>; + } - // dither parameter rfu - int dither_type = args[2].AsInt(-1); - - if ((!vi.IsPlanar() || vi.ComponentSize() != 2) && args[3].Defined()) - env->ThrowError("ConvertTo8bit: truerange specified for non-16bit or non-planar source"); - - int source_bitdepth = 16; // n/a - if (vi.IsPlanar() && vi.ComponentSize() == 2) - source_bitdepth = vi.BitsPerComponent(); - - // when converting from 10-16 bit formats, truerange=false indicates bitdepth of 16 bits regardless of the 10-12-14 bit format - int assume_truerange = args[3].AsBool(true); // n/a for non planar formats - - return new ConvertTo8bit(clip, float_range, dither_type, source_bitdepth, assume_truerange, env); -} - -PVideoFrame __stdcall ConvertTo8bit::GetFrame(int n, IScriptEnvironment* env) { - PVideoFrame src = child->GetFrame(n, env); - PVideoFrame dst = env->NewVideoFrame(vi); - - if(vi.IsPlanar()) - { - int planes_y[4] = { PLANAR_Y, PLANAR_U, PLANAR_V, PLANAR_A }; - int planes_r[4] = { PLANAR_G, PLANAR_B, PLANAR_R, PLANAR_A }; - int *planes = (vi.IsYUV() || vi.IsYUVA()) ? planes_y : planes_r; - for (int p = 0; p < vi.NumComponents(); ++p) { - const int plane = planes[p]; - conv_function(src->GetReadPtr(plane), dst->GetWritePtr(plane), - src->GetRowSize(plane), src->GetHeight(plane), - src->GetPitch(plane), dst->GetPitch(plane), float_range /*, dither_mode */); + // RGB scaling is not shift by 8 as in YUV but like 0..255->0..65535 + if (vi.IsRGB24() || vi.IsRGB32()) + conv_function = conv_function_full_scale; // convert_rgb_8_to_uint16_c<16>; + // conv_function_a: n/a no separate alpha plane + else if (vi.IsPlanarRGB() || vi.IsPlanarRGBA()) { + conv_function = conv_function_full_scale; // RGB is full scale + conv_function_a = conv_function_full_scale; // alpha copy is the same full scale + } + else if (vi.IsYUV() || vi.IsYUVA()) { + conv_function = conv_function_shifted_scale; // + conv_function_a = conv_function_full_scale; // alpha copy is the same full scale + } + else + env->ThrowError("ConvertTo16bit: unsupported color space"); } - } - else { - // packed RGBs - conv_function(src->GetReadPtr(), dst->GetWritePtr(), - src->GetRowSize(), src->GetHeight(), - src->GetPitch(), dst->GetPitch(), float_range /*, dither_mode */); - } - - return dst; -} + else if (pixelsize == 2) + { + if (truerange) + { -// Conversion to uint16_t targets -// planar YUV(A) and RGB(A): -// from 8 bit -> 10/12/14/16 with strict range expansion or expansion to 16 -// from 10/12/14 -> 16 bit with strict source range (expansion from 10/12/14 to 16 bit) or just casting pixel_type -// from 16 bit -> 10/12/14 bit with strict target range (reducing range from 16 bit to 10/12/14 bits) or just casting pixel_type -// from float -> 10/12/14/16 with strict range expansion or expansion to 16 -// packed RGB: -// RGB24->RGB48, RGB32->RGB64 -ConvertTo16bit::ConvertTo16bit(PClip _child, const float _float_range, const int _dither_mode, const int _source_bitdepth, const int _target_bitdepth, bool _truerange, IScriptEnvironment* env) : - GenericVideoFilter(_child), float_range(_float_range), dither_mode(_dither_mode), source_bitdepth(_source_bitdepth), target_bitdepth(_target_bitdepth), truerange(_truerange) -{ - change_only_format = false; - - bool sse2 = !!(env->GetCPUFlags() & CPUF_SSE2); // frames are always 16 bit aligned + // full_scale is used for alpha plane always (keep max opacity 255, 1023, 4095, 16383, 65535) - if (vi.ComponentSize() == 1) // 8->10-12-14-16 bit - { - // RGB scaling is not shift by 8 as in YUV but like 0..255->0..65535 - if (vi.IsRGB24() || vi.IsRGB32()) - conv_function = convert_rgb_8_to_uint16_c<16>; - else if (vi.IsPlanarRGB() || vi.IsPlanarRGBA()) { - if (truerange) + // fill conv_function_full_scale and conv_function_shifted_scale + // first get full_scale converter functions + if (bits_per_pixel > target_bitdepth) // reduce range + { + if (bits_per_pixel == 16) // 16->10/12/14 keep full range + switch (target_bitdepth) { - switch (target_bitdepth) - { - case 10: conv_function = convert_rgb_8_to_uint16_c<10>; break; - case 12: conv_function = convert_rgb_8_to_uint16_c<12>; break; - case 14: conv_function = convert_rgb_8_to_uint16_c<14>; break; - case 16: conv_function = convert_rgb_8_to_uint16_c<16>; break; - default: env->ThrowError("ConvertTo16bit: unsupported bit depth"); - } - } - else { - conv_function = convert_rgb_8_to_uint16_c<16>; + case 10: conv_function_full_scale = convert_rgb_uint16_to_uint16_c<16, 10>; + break; + case 12: conv_function_full_scale = convert_rgb_uint16_to_uint16_c<16, 12>; + break; + case 14: conv_function_full_scale = convert_rgb_uint16_to_uint16_c<16, 14>; + break; } - } - else if (vi.IsYUV() || vi.IsYUVA()) { - if (truerange) + else if (bits_per_pixel == 14) // 14->10/12 keep full range + switch (target_bitdepth) { - switch (target_bitdepth) - { - case 10: conv_function = sse2 ? convert_8_to_uint16_sse2<10> : convert_8_to_uint16_c<10>; break; - case 12: conv_function = sse2 ? convert_8_to_uint16_sse2<12> : convert_8_to_uint16_c<12>; break; - case 14: conv_function = sse2 ? convert_8_to_uint16_sse2<14> : convert_8_to_uint16_c<14>; break; - case 16: conv_function = sse2 ? convert_8_to_uint16_sse2<16> : convert_8_to_uint16_c<16>; break; - default: env->ThrowError("ConvertTo16bit: unsupported bit depth"); - } + case 10: conv_function_full_scale = convert_rgb_uint16_to_uint16_c<14, 10>; + break; + case 12: conv_function_full_scale = convert_rgb_uint16_to_uint16_c<14, 12>; + break; } - else { - conv_function = sse2 ? convert_8_to_uint16_sse2<16> : convert_8_to_uint16_c<16>; // always 16 bit scale + else if (bits_per_pixel == 12) // 12->10 keep full range + switch (target_bitdepth) + { + case 10: conv_function_full_scale = convert_rgb_uint16_to_uint16_c<12, 10>; + break; } } - else - env->ThrowError("ConvertTo16bit: unsupported color space"); - } - else if (vi.ComponentSize() == 2) - { - // 10/12/14 -> 16 bit or 16 bit -> 10/12/14 bit - // range reducing or expansion (truerange=true), or just overriding the pixel_type, keeping scale at 16 bits - if (truerange) { - // invalid combinations were already checked - if (vi.IsPlanarRGB() || vi.IsPlanarRGBA()) { - if (source_bitdepth > target_bitdepth) // reduce range - { - if (source_bitdepth == 16) // 16->10/12/14 keep full range - switch (target_bitdepth) - { - case 10: conv_function = convert_rgb_uint16_to_uint16_c<16, 10>; break; - case 12: conv_function = convert_rgb_uint16_to_uint16_c<16, 12>; break; - case 14: conv_function = convert_rgb_uint16_to_uint16_c<16, 14>; break; - } - else if (source_bitdepth == 14) // 14->10/12 keep full range - switch (target_bitdepth) - { - case 10: conv_function = convert_rgb_uint16_to_uint16_c<14, 10>; break; - case 12: conv_function = convert_rgb_uint16_to_uint16_c<14, 12>; break; - } - else if (source_bitdepth == 12) // 14->10/12 keep full range - switch (target_bitdepth) - { - case 10: conv_function = convert_rgb_uint16_to_uint16_c<12, 10>; break; - } - } else {// expand - if (target_bitdepth == 16) // 10/12/14->16 keep full range - switch (source_bitdepth) - { - case 10: conv_function = convert_rgb_uint16_to_uint16_c<10, 16>; break; - case 12: conv_function = convert_rgb_uint16_to_uint16_c<12, 16>; break; - case 14: conv_function = convert_rgb_uint16_to_uint16_c<14, 16>; break; - } - else if (target_bitdepth == 14) // 10/12->14 keep full range - switch (source_bitdepth) - { - case 10: conv_function = convert_rgb_uint16_to_uint16_c<10, 14>; break; - case 12: conv_function = convert_rgb_uint16_to_uint16_c<12, 14>; break; - } - else if (target_bitdepth == 12) // 10->12 keep full range - switch (source_bitdepth) - { - case 10: conv_function = convert_rgb_uint16_to_uint16_c<10, 12>; break; - } - } + else {// expand + if (target_bitdepth == 16) // 10/12/14->16 keep full range + switch (bits_per_pixel) + { + case 10: conv_function_full_scale = convert_rgb_uint16_to_uint16_c<10, 16>; + break; + case 12: conv_function_full_scale = convert_rgb_uint16_to_uint16_c<12, 16>; + break; + case 14: conv_function_full_scale = convert_rgb_uint16_to_uint16_c<14, 16>; + break; + } + else if (target_bitdepth == 14) // 10/12->14 keep full range + switch (bits_per_pixel) + { + case 10: conv_function_full_scale = convert_rgb_uint16_to_uint16_c<10, 14>; + break; + case 12: conv_function_full_scale = convert_rgb_uint16_to_uint16_c<12, 14>; + break; } - else if (vi.IsYUV() || vi.IsYUVA()) { - if (source_bitdepth > target_bitdepth) // reduce range 16->14/12/10 14->12/10 12->10. template: bitshift - switch (source_bitdepth - target_bitdepth) - { - case 2: conv_function = convert_uint16_to_uint16_c; break; - case 4: conv_function = convert_uint16_to_uint16_c; break; - case 6: conv_function = convert_uint16_to_uint16_c; break; - } - else // expand range - switch (target_bitdepth - source_bitdepth) - { - case 2: conv_function = convert_uint16_to_uint16_c; break; - case 4: conv_function = convert_uint16_to_uint16_c; break; - case 6: conv_function = convert_uint16_to_uint16_c; break; - } + else if (target_bitdepth == 12) // 10->12 keep full range + switch (bits_per_pixel) + { + case 10: conv_function_full_scale = convert_rgb_uint16_to_uint16_c<10, 12>; + break; } } - else { // truerange==false - change_only_format = true; + // fill shift_range converter functions + if (bits_per_pixel > target_bitdepth) // reduce range 16->14/12/10 14->12/10 12->10. template: bitshift + switch (bits_per_pixel - target_bitdepth) + { + case 2: conv_function_shifted_scale = convert_uint16_to_uint16_c; break; + case 4: conv_function_shifted_scale = convert_uint16_to_uint16_c; break; + case 6: conv_function_shifted_scale = convert_uint16_to_uint16_c; break; + } + else // expand range + switch (target_bitdepth - bits_per_pixel) + { + case 2: conv_function_shifted_scale = convert_uint16_to_uint16_c; break; + case 4: conv_function_shifted_scale = convert_uint16_to_uint16_c; break; + case 6: conv_function_shifted_scale = convert_uint16_to_uint16_c; break; + } + } + else { + // no conversion for truerange == false + } + + // 10/12/14 -> 16 bit or 16 bit -> 10/12/14 bit + // range reducing or expansion (truerange=true), or just overriding the pixel_type, keeping scale at 16 bits + // 10-16 -> 10->16 truerange == false already handled + if (truerange) { + // invalid combinations were already checked + if (vi.IsPlanarRGB() || vi.IsPlanarRGBA()) { + conv_function = conv_function_full_scale; + conv_function_a = conv_function_full_scale; + } + else if (vi.IsYUV() || vi.IsYUVA()) { + conv_function = conv_function_shifted_scale; + conv_function_a = conv_function_full_scale; // alpha: always full } + } + else { // truerange==false + // 10->12 .. 16->12 etc + // only vi bit_depth format override + format_change_only = true; + } } - else if (vi.ComponentSize() == 4) // 32->16 bit + else if (pixelsize == 4) // 32->16 bit { - if (truerange) { - switch(target_bitdepth) - { - case 10: conv_function = convert_32_to_uintN_c; break; - case 12: conv_function = convert_32_to_uintN_c; break; - case 14: conv_function = convert_32_to_uintN_c; break; - case 16: conv_function = convert_32_to_uintN_c; break; - } - } else { - conv_function = convert_32_to_uintN_c; + if (truerange) { + switch (target_bitdepth) + { + case 10: conv_function = convert_32_to_uintN_c; break; + case 12: conv_function = convert_32_to_uintN_c; break; + case 14: conv_function = convert_32_to_uintN_c; break; + case 16: conv_function = convert_32_to_uintN_c; break; } - } else - env->ThrowError("ConvertTo16bit: unsupported bit depth"); - - if (vi.NumComponents() == 1) { - switch(target_bitdepth) - { - case 10: vi.pixel_type = VideoInfo::CS_Y10; break; - case 12: vi.pixel_type = VideoInfo::CS_Y12; break; - case 14: vi.pixel_type = VideoInfo::CS_Y14; break; - case 16: vi.pixel_type = VideoInfo::CS_Y16; break; - default: - env->ThrowError("ConvertTo16bit: unsupported effective bit depth"); } - } else if (vi.Is420()) { - switch(target_bitdepth) - { - case 10: vi.pixel_type = vi.IsYUVA() ? VideoInfo::CS_YUVA420P10 : VideoInfo::CS_YUV420P10; break; - case 12: vi.pixel_type = vi.IsYUVA() ? VideoInfo::CS_YUVA420P12 : VideoInfo::CS_YUV420P12; break; - case 14: vi.pixel_type = vi.IsYUVA() ? VideoInfo::CS_YUVA420P14 : VideoInfo::CS_YUV420P14; break; - case 16: vi.pixel_type = vi.IsYUVA() ? VideoInfo::CS_YUVA420P16 : VideoInfo::CS_YUV420P16; break; - default: - env->ThrowError("ConvertTo16bit: unsupported effective bit depth"); + else { + conv_function = convert_32_to_uintN_c; } - } else if (vi.Is422()) { - switch(target_bitdepth) - { - case 10: vi.pixel_type = vi.IsYUVA() ? VideoInfo::CS_YUVA422P10 : VideoInfo::CS_YUV422P10; break; - case 12: vi.pixel_type = vi.IsYUVA() ? VideoInfo::CS_YUVA422P12 : VideoInfo::CS_YUV422P12; break; - case 14: vi.pixel_type = vi.IsYUVA() ? VideoInfo::CS_YUVA422P14 : VideoInfo::CS_YUV422P14; break; - case 16: vi.pixel_type = vi.IsYUVA() ? VideoInfo::CS_YUVA422P16 : VideoInfo::CS_YUV422P16; break; - default: - env->ThrowError("ConvertTo16bit: unsupported effective bit depth"); + conv_function_a = conv_function; + } + else { + env->ThrowError("ConvertTo16bit: unsupported bit depth"); + } + + // set output vi format + if (vi.IsRGB24()) { + if (target_bitdepth == 16) + vi.pixel_type = VideoInfo::CS_BGR48; + else + env->ThrowError("ConvertTo16bit: unsupported bit depth"); + } + else if (vi.IsRGB32()) { + if (target_bitdepth == 16) + vi.pixel_type = VideoInfo::CS_BGR64; + else + env->ThrowError("ConvertTo16bit: unsupported bit depth"); + } + else { + // Y or YUV(A) or PlanarRGB(A) + if (vi.IsYV12()) // YV12 can have an exotic compatibility constant + vi.pixel_type = VideoInfo::CS_YV12; + int new_bitdepth_bits; + switch (target_bitdepth) { + case 8: new_bitdepth_bits = VideoInfo::CS_Sample_Bits_8; break; + case 10: new_bitdepth_bits = VideoInfo::CS_Sample_Bits_10; break; + case 12: new_bitdepth_bits = VideoInfo::CS_Sample_Bits_12; break; + case 14: new_bitdepth_bits = VideoInfo::CS_Sample_Bits_14; break; + case 16: new_bitdepth_bits = VideoInfo::CS_Sample_Bits_16; break; + case 32: new_bitdepth_bits = VideoInfo::CS_Sample_Bits_32; break; } - } else if (vi.Is444()) { - switch(target_bitdepth) - { - case 10: vi.pixel_type = vi.IsYUVA() ? VideoInfo::CS_YUVA444P10 : VideoInfo::CS_YUV444P10; break; - case 12: vi.pixel_type = vi.IsYUVA() ? VideoInfo::CS_YUVA444P12 : VideoInfo::CS_YUV444P12; break; - case 14: vi.pixel_type = vi.IsYUVA() ? VideoInfo::CS_YUVA444P14 : VideoInfo::CS_YUV444P14; break; - case 16: vi.pixel_type = vi.IsYUVA() ? VideoInfo::CS_YUVA444P16 : VideoInfo::CS_YUV444P16; break; - default: - env->ThrowError("ConvertTo16bit: unsupported effective bit depth"); + vi.pixel_type = (vi.pixel_type & ~VideoInfo::CS_Sample_Bits_Mask) | new_bitdepth_bits; + } + + return; + } + + // ConvertTo8bit() + if (target_bitdepth == 8) { + if (pixelsize == 2) // 16(,14,12,10)->8 bit + { + // fill conv_function_full_scale and conv_function_shifted_scale + if (truerange) { + switch (bits_per_pixel) + { + case 10: conv_function_full_scale = convert_rgb_uint16_to_8_c<10>; + conv_function_shifted_scale = sse2 ? convert_uint16_to_8_sse2<10> : convert_uint16_to_8_c<10>; + break; + case 12: conv_function_full_scale = convert_rgb_uint16_to_8_c<12>; + conv_function_shifted_scale = sse2 ? convert_uint16_to_8_sse2<12> : convert_uint16_to_8_c<12>; + break; + case 14: conv_function_full_scale = convert_rgb_uint16_to_8_c<14>; + conv_function_shifted_scale = sse2 ? convert_uint16_to_8_sse2<14> : convert_uint16_to_8_c<14>; + break; + case 16: conv_function_full_scale = convert_rgb_uint16_to_8_c<16>; + conv_function_shifted_scale = sse2 ? convert_uint16_to_8_sse2<16> : convert_uint16_to_8_c<16>; + break; + default: env->ThrowError("ConvertTo8bit: invalid source bitdepth"); + } } - } else if (vi.IsPlanarRGB()) { - switch(target_bitdepth) - { - case 10: vi.pixel_type = VideoInfo::CS_RGBP10; break; - case 12: vi.pixel_type = VideoInfo::CS_RGBP12; break; - case 14: vi.pixel_type = VideoInfo::CS_RGBP14; break; - case 16: vi.pixel_type = VideoInfo::CS_RGBP16; break; - default: - env->ThrowError("ConvertTo16bit: unsupported effective bit depth"); + else { + conv_function_full_scale = convert_rgb_uint16_to_8_c<16>; + conv_function_shifted_scale = sse2 ? convert_uint16_to_8_sse2<16> : convert_uint16_to_8_c<16>; } - } else if (vi.IsPlanarRGBA()) { - switch(target_bitdepth) + + // for RGB scaling is not shift by 8 as in YUV but 0..65535->0..255 + if (vi.IsRGB48() || vi.IsRGB64()) { + conv_function = conv_function_full_scale; + // no separate alpha plane + } else if (vi.IsPlanarRGB() || vi.IsPlanarRGBA()) { + conv_function = conv_function_full_scale; + conv_function_a = conv_function_full_scale; + } + else if (vi.IsYUV() || vi.IsYUVA()) { - case 10: vi.pixel_type = VideoInfo::CS_RGBAP10; break; - case 12: vi.pixel_type = VideoInfo::CS_RGBAP12; break; - case 14: vi.pixel_type = VideoInfo::CS_RGBAP14; break; - case 16: vi.pixel_type = VideoInfo::CS_RGBAP16; break; - default: - env->ThrowError("ConvertTo16bit: unsupported effective bit depth"); + conv_function = conv_function_shifted_scale; + conv_function_a = conv_function_full_scale; } - } else if(vi.IsRGB24()) { - if(target_bitdepth == 16) - vi.pixel_type = VideoInfo::CS_BGR48; - else - env->ThrowError("ConvertTo16bit: unsupported bit depth"); - } else if(vi.IsRGB32()) { - if(target_bitdepth == 16) - vi.pixel_type = VideoInfo::CS_BGR64; else - env->ThrowError("ConvertTo16bit: unsupported bit depth"); - } else - env->ThrowError("ConvertTo16bit: unsupported color space"); -} + env->ThrowError("ConvertTo8bit: unsupported color space"); + } + else if (vi.ComponentSize() == 4) // 32->8 bit + { + // full scale + conv_function = convert_32_to_uintN_c; + conv_function_a = conv_function; + } + else + env->ThrowError("ConvertTo8bit: unsupported bit depth"); + + if (vi.NumComponents() == 1) + vi.pixel_type = VideoInfo::CS_Y8; + else if (vi.Is420()) + vi.pixel_type = vi.IsYUVA() ? VideoInfo::CS_YUVA420 : VideoInfo::CS_YV12; + else if (vi.Is422()) + vi.pixel_type = vi.IsYUVA() ? VideoInfo::CS_YUVA422 : VideoInfo::CS_YV16; + else if (vi.Is444()) + vi.pixel_type = vi.IsYUVA() ? VideoInfo::CS_YUVA444 : VideoInfo::CS_YV24; + else if (vi.IsRGB48()) + vi.pixel_type = VideoInfo::CS_BGR24; + else if (vi.IsRGB64()) + vi.pixel_type = VideoInfo::CS_BGR32; + else if (vi.IsPlanarRGB()) + vi.pixel_type = VideoInfo::CS_RGBP; + else if (vi.IsPlanarRGBA()) + vi.pixel_type = VideoInfo::CS_RGBAP; + else + env->ThrowError("ConvertTo8bit: unsupported color space"); + + return; + } + env->ThrowError("ConvertBits: unsupported target bit-depth (%d)", target_bitdepth); -AVSValue __cdecl ConvertTo16bit::Create(AVSValue args, void*, IScriptEnvironment* env) { +} + +AVSValue __cdecl ConvertBits::Create(AVSValue args, void* user_data, IScriptEnvironment* env) { PClip clip = args[0].AsClip(); //0 1 2 3 4 //c[bits]i[truerange]b[dither]i[scale]f const VideoInfo &vi = clip->GetVideoInfo(); - if (!vi.IsPlanar() && !vi.IsRGB24() && !vi.IsRGB32()) - env->ThrowError("ConvertTo16bit: Can only convert from Planar YUV/RGB or packed RGB."); - if (vi.ComponentSize() != 4 && args[4].Defined()) - env->ThrowError("ConvertTo16bit: Float range parameter not allowed for non float source"); + intptr_t create_param = (int)reinterpret_cast(user_data); + // float range parameter float float_range = (float)args[4].AsFloat(1.0f); + // bits parameter is compulsory + if (!args[1].Defined() && create_param == 0) { + env->ThrowError("ConvertBits: missing bits parameter"); + } + // when converting from/true 10-16 bit formats, truerange=false indicates bitdepth of 16 bits regardless of the 10-12-14 bit format bool assume_truerange = args[2].AsBool(true); // n/a for non planar formats - int target_bitdepth = args[1].AsInt(16); // default: 16 bit. can override with 10/12/14 bits + // bits parameter + + int target_bitdepth = args[1].AsInt(create_param); // default comes by calling from old To8,To16,ToFloat functions int source_bitdepth = vi.BitsPerComponent(); + int pixelsize = vi.ComponentSize(); - if(target_bitdepth!=10 && target_bitdepth!=12 && target_bitdepth!=14 && target_bitdepth!=16) - env->ThrowError("ConvertTo16bit: invalid bit depth"); + if(target_bitdepth!=8 && target_bitdepth!=10 && target_bitdepth!=12 && target_bitdepth!=14 && target_bitdepth!=16 && target_bitdepth!=32) + env->ThrowError("ConvertBits: invalid bit depth: %d", target_bitdepth); - if (!vi.IsPlanar() && args[2].Defined()) - env->ThrowError("ConvertTo16bit: truerange specified for non-planar source"); - - if (vi.IsRGB24() || vi.IsRGB32()) { - if (target_bitdepth != 16) - env->ThrowError("ConvertTo16bit: only 16 bit allowed for packed RGB"); - } + if(create_param == 8 && target_bitdepth !=8) + env->ThrowError("ConvertTo8Bit: invalid bit depth: %d", target_bitdepth); + if(create_param == 32 && target_bitdepth !=32) + env->ThrowError("ConvertToFloat: invalid bit depth: %d", target_bitdepth); + if(create_param == 16 && (target_bitdepth == 8 || target_bitdepth ==32)) + env->ThrowError("ConvertTo16bit: invalid bit depth: %d", target_bitdepth); - // 10/12/14/16 -> 10/12/14/16 - if (vi.ComponentSize() == 2) - { - if((source_bitdepth == target_bitdepth) && assume_truerange) // 10->10 .. 16->16 - return clip; - // source_10_bit.ConvertTo16bit(truerange=true) : upscale range - // source_10_bit.ConvertTo16bit(truerange=false) : leaves data, only format conversion - // source_10_bit.ConvertTo16bit(bits=12,truerange=true) : upscale range from 10 to 12 - // source_10_bit.ConvertTo16bit(bits=12,truerange=false) : leaves data, only format conversion - // source_16_bit.ConvertTo16bit(bits=10, truerange=true) : downscale range - // source_16_bit.ConvertTo16bit(bits=10, truerange=false) : leaves data, only format conversion + if (args[2].Defined()) { + if (!vi.IsPlanar()) + env->ThrowError("ConvertBits: truerange specified for non-planar source"); } - if (vi.ComponentSize() == 4) { - if(float_range<=0.0) - env->ThrowError("ConvertTo16bit: Float range parameter cannot be <= 0"); - // other checkings + // no change -> return unmodified + if((source_bitdepth == target_bitdepth)) // 10->10 .. 16->16 + return clip; + + // YUY2 conversion is limited + if (vi.IsYUY2()) { + env->ThrowError("ConvertBits: YUY2 source is 8-bit only"); } - // dither parameter, rfu - int dither_type = args[3].AsInt(-1); + // packed RGB conversion is limited + if (vi.IsRGB24() || vi.IsRGB32()) { + if (target_bitdepth != 16) + env->ThrowError("ConvertBits: invalid bit-depth specified for packed RGB"); + } - return new ConvertTo16bit(clip, float_range, dither_type, source_bitdepth, target_bitdepth, assume_truerange, env); -} + if (vi.IsRGB48() || vi.IsRGB64()) { + if (target_bitdepth != 8) + env->ThrowError("ConvertBits: invalid bit-depth specified for packed RGB"); + } -PVideoFrame __stdcall ConvertTo16bit::GetFrame(int n, IScriptEnvironment* env) { - PVideoFrame src = child->GetFrame(n, env); - PVideoFrame dst = env->NewVideoFrame(vi); + // remark + // source_10_bit.ConvertTo16bit(truerange=true) : upscale range + // source_10_bit.ConvertTo16bit(truerange=false) : leaves data, only format conversion + // source_10_bit.ConvertTo16bit(bits=12,truerange=true) : upscale range from 10 to 12 + // source_10_bit.ConvertTo16bit(bits=12,truerange=false) : leaves data, only format conversion + // source_16_bit.ConvertTo16bit(bits=10, truerange=true) : downscale range + // source_16_bit.ConvertTo16bit(bits=10, truerange=false) : leaves data, only format conversion - if (change_only_format) { - // only vi changed, all planes are copied unmodified - int planes_y[4] = { PLANAR_Y, PLANAR_U, PLANAR_V, PLANAR_A }; - int planes_r[4] = { PLANAR_G, PLANAR_B, PLANAR_R, PLANAR_A }; - int *planes = (vi.IsYUV() || vi.IsYUVA()) ? planes_y : planes_r; - for (int p = 0; p < vi.NumComponents(); ++p) { - const int plane = planes[p]; - env->BitBlt(dst->GetWritePtr(plane), dst->GetPitch(plane), src->GetReadPtr(plane), src->GetPitch(plane), src->GetRowSize(plane), src->GetHeight(plane)); - } - return dst; - } - if(vi.IsPlanar()) - { - int planes_y[4] = { PLANAR_Y, PLANAR_U, PLANAR_V, PLANAR_A }; - int planes_r[4] = { PLANAR_G, PLANAR_B, PLANAR_R, PLANAR_A }; - int *planes = (vi.IsYUV() || vi.IsYUVA()) ? planes_y : planes_r; - for (int p = 0; p < vi.NumComponents(); ++p) { - const int plane = planes[p]; - conv_function(src->GetReadPtr(plane), dst->GetWritePtr(plane), - src->GetRowSize(plane), src->GetHeight(plane), - src->GetPitch(plane), dst->GetPitch(plane), float_range /*, dither_mode */); - } - } - else { - // packed RGBs - conv_function(src->GetReadPtr(), dst->GetWritePtr(), - src->GetRowSize(), src->GetHeight(), - src->GetPitch(), dst->GetPitch(), float_range /*, dither_mode */); + if (args[4].Defined() && (target_bitdepth != 32 || source_bitdepth != 32)) { + env->ThrowError("ConvertBits: Float range parameter is not allowed here"); } - return dst; -} - + if(float_range<=0.0) + env->ThrowError("ConvertBits: Float range parameter cannot be <= 0"); -// float 32 bit -ConvertToFloat::ConvertToFloat(PClip _child, const float _float_range, const int _source_bitdepth, bool _truerange, IScriptEnvironment* env) : - GenericVideoFilter(_child), float_range(_float_range), source_bitdepth(_source_bitdepth), truerange(_truerange) -{ + // dither parameter, rfu + int dither_type = args[3].AsInt(-1); - if (vi.ComponentSize() == 1) // 8->32 bit - { - conv_function = convert_uintN_to_float_c; - } else if (vi.ComponentSize() == 2) // 16->32 bit - { - if (vi.IsPlanar() && truerange) - { - switch (source_bitdepth) - { - case 10: conv_function = convert_uintN_to_float_c; break; - case 12: conv_function = convert_uintN_to_float_c; break; - case 14: conv_function = convert_uintN_to_float_c; break; - case 16: conv_function = convert_uintN_to_float_c; break; - default: env->ThrowError("ConvertToFloat: unsupported bit depth"); - } - } else { - conv_function = convert_uintN_to_float_c; - } - } else - env->ThrowError("ConvertToFloat: unsupported bit depth"); - - if (vi.NumComponents() == 1) - vi.pixel_type = VideoInfo::CS_Y32; - else if (vi.Is420()) - vi.pixel_type = vi.IsYUVA() ? VideoInfo::CS_YUVA420PS : VideoInfo::CS_YUV420PS; - else if (vi.Is422()) - vi.pixel_type = vi.IsYUVA() ? VideoInfo::CS_YUVA422PS : VideoInfo::CS_YUV422PS; - else if (vi.Is444()) - vi.pixel_type = vi.IsYUVA() ? VideoInfo::CS_YUVA444PS : VideoInfo::CS_YUV444PS; - else if (vi.IsPlanarRGB()) - vi.pixel_type = VideoInfo::CS_RGBPS; - else if (vi.IsPlanarRGBA()) - vi.pixel_type = VideoInfo::CS_RGBAPS; - else - env->ThrowError("ConvertToFloat: unsupported color space"); + return new ConvertBits(clip, float_range, dither_type, target_bitdepth, assume_truerange, env); } -AVSValue __cdecl ConvertToFloat::Create(AVSValue args, void*, IScriptEnvironment* env) { - PClip clip = args[0].AsClip(); - - const VideoInfo &vi = clip->GetVideoInfo(); - //0 1 2 - //c[truerange]b[scale]f - - if (!vi.IsPlanar()) - env->ThrowError("ConvertToFloat: Can only convert from Planar YUV(A) or RGB(A)."); - - if (vi.ComponentSize() == 4) - return clip; // 32 bit -> 32 bit: no conversion - - // float range parameter - float float_range = (float)args[2].AsFloat(1.0f); - - if(float_range<=0.0) - env->ThrowError("ConvertToFloat: Float range parameter cannot be <= 0"); - - bool assume_truerange = args[1].AsBool(true); // n/a for non planar formats - int source_bitdepth = vi.BitsPerComponent(); - - if (vi.ComponentSize() != 2 && args[1].Defined()) - env->ThrowError("ConvertToFloat: truerange specified for 8 bit source"); +PVideoFrame __stdcall ConvertBits::GetFrame(int n, IScriptEnvironment* env) { + PVideoFrame src = child->GetFrame(n, env); - return new ConvertToFloat(clip, float_range, source_bitdepth, assume_truerange, env); -} + if (format_change_only) + { + // for 10-16 bit: simple format override in constructor + return src; + } -PVideoFrame __stdcall ConvertToFloat::GetFrame(int n, IScriptEnvironment* env) { - PVideoFrame src = child->GetFrame(n, env); PVideoFrame dst = env->NewVideoFrame(vi); - int planes_y[4] = { PLANAR_Y, PLANAR_U, PLANAR_V, PLANAR_A }; - int planes_r[4] = { PLANAR_G, PLANAR_B, PLANAR_R, PLANAR_A }; - int *planes = (vi.IsYUV() || vi.IsYUVA()) ? planes_y : planes_r; - for (int p = 0; p < vi.NumComponents(); ++p) { - const int plane = planes[p]; - conv_function(src->GetReadPtr(plane), dst->GetWritePtr(plane), - src->GetRowSize(plane), src->GetHeight(plane), - src->GetPitch(plane), dst->GetPitch(plane), float_range /*, dither_mode */); + if(vi.IsPlanar()) + { + int planes_y[4] = { PLANAR_Y, PLANAR_U, PLANAR_V, PLANAR_A }; + int planes_r[4] = { PLANAR_G, PLANAR_B, PLANAR_R, PLANAR_A }; + int *planes = (vi.IsYUV() || vi.IsYUVA()) ? planes_y : planes_r; + for (int p = 0; p < vi.NumComponents(); ++p) { + const int plane = planes[p]; + if(plane==PLANAR_A) + conv_function_a(src->GetReadPtr(plane), dst->GetWritePtr(plane), + src->GetRowSize(plane), src->GetHeight(plane), + src->GetPitch(plane), dst->GetPitch(plane), float_range /*, dither_mode */); + else + conv_function(src->GetReadPtr(plane), dst->GetWritePtr(plane), + src->GetRowSize(plane), src->GetHeight(plane), + src->GetPitch(plane), dst->GetPitch(plane), float_range /*, dither_mode */); + } + } + else { + // packed RGBs + conv_function(src->GetReadPtr(), dst->GetWritePtr(), + src->GetRowSize(), src->GetHeight(), + src->GetPitch(), dst->GetPitch(), float_range /*, dither_mode */); } - return dst; } - - - - - diff --git a/avs_core/convert/convert_planar.h b/avs_core/convert/convert_planar.h index 06d17fab3..487bc93c6 100644 --- a/avs_core/convert/convert_planar.h +++ b/avs_core/convert/convert_planar.h @@ -198,10 +198,10 @@ class ConvertToPlanarGeneric : public GenericVideoFilter // todo: separate file? typedef void (*BitDepthConvFuncPtr)(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range); -class ConvertTo8bit : public GenericVideoFilter +class ConvertBits : public GenericVideoFilter { public: - ConvertTo8bit(PClip _child, const float _float_range, const int _dither_mode, const int _source_bitdepth, const int _truerange, IScriptEnvironment* env); + ConvertBits(PClip _child, const float _float_range, const int _dither_mode, const int _target_bitdepth, bool _truerange, IScriptEnvironment* env); PVideoFrame __stdcall GetFrame(int n,IScriptEnvironment* env); int __stdcall SetCacheHints(int cachehints, int frame_range) override { @@ -211,54 +211,14 @@ class ConvertTo8bit : public GenericVideoFilter static AVSValue __cdecl Create(AVSValue args, void*, IScriptEnvironment* env); private: BitDepthConvFuncPtr conv_function; + BitDepthConvFuncPtr conv_function_a; float float_range; int dither_mode; int pixelsize; - int source_bitdepth; - int truerange; -}; - -class ConvertTo16bit : public GenericVideoFilter -{ -public: - ConvertTo16bit(PClip _child, const float _float_range, const int _dither_mode, const int _source_bitdepth, const int _target_bitdepth, bool _truerange, IScriptEnvironment* env); - PVideoFrame __stdcall GetFrame(int n,IScriptEnvironment* env); - - int __stdcall SetCacheHints(int cachehints, int frame_range) override { - return cachehints == CACHE_GET_MTMODE ? MT_NICE_FILTER : 0; - } - - static AVSValue __cdecl Create(AVSValue args, void*, IScriptEnvironment* env); -private: - BitDepthConvFuncPtr conv_function; - float float_range; - int dither_mode; - int pixelsize; - int source_bitdepth; // effective 10/12/14/16 bits within the 2 byte container - int target_bitdepth; // effective 10/12/14/16 bits within the 2 byte container - bool truerange; // if 16->10 range reducing or e.g. 14->16 bit range expansion needed - bool change_only_format; // if 16->10 bit affects only pixel_type -}; - -class ConvertToFloat : public GenericVideoFilter -{ -public: - ConvertToFloat(PClip _child, const float _float_range, const int _source_bitdepth, bool _truerange, IScriptEnvironment* env); - PVideoFrame __stdcall GetFrame(int n,IScriptEnvironment* env); - - int __stdcall SetCacheHints(int cachehints, int frame_range) override { - return cachehints == CACHE_GET_MTMODE ? MT_NICE_FILTER : 0; - } - - static AVSValue __cdecl Create(AVSValue args, void*, IScriptEnvironment* env); -private: - BitDepthConvFuncPtr conv_function; - float float_range; - int source_bitdepth; // effective 10/12/14/16 bits within the 2 byte container + int bits_per_pixel; + int target_bitdepth; bool truerange; // if 16->10 range reducing or e.g. 14->16 bit range expansion needed - int pixelsize; + bool format_change_only; }; - - #endif From 598efff129be892c5b6f4562a2e5d9427718afa3 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Mon, 19 Sep 2016 17:07:21 +0200 Subject: [PATCH 081/120] rec2020 matrix for RGB<->444 and GreyScale. not for YUY2 --- avs_core/convert/convert.cpp | 9 +++++++-- avs_core/convert/convert.h | 7 ++++--- avs_core/convert/convert_planar.cpp | 15 +++++++++++++++ avs_core/convert/convert_yuy2.h | 1 + avs_core/filters/greyscale.cpp | 26 ++++++++++++++++++++++---- avs_core/filters/greyscale.h | 2 +- 6 files changed, 50 insertions(+), 10 deletions(-) diff --git a/avs_core/convert/convert.cpp b/avs_core/convert/convert.cpp index 0bbff2d1f..d0055584b 100644 --- a/avs_core/convert/convert.cpp +++ b/avs_core/convert/convert.cpp @@ -49,7 +49,7 @@ ***** Declare index of new filters for Avisynth's filter engine ***** ********************************************************************/ -extern const AVSFunction Convert_filters[] = { // matrix can be "rec601", rec709", "PC.601" or "PC.709" +extern const AVSFunction Convert_filters[] = { // matrix can be "rec601", "rec709", "PC.601" or "PC.709" or "rec2020" { "ConvertToRGB", BUILTIN_FUNC_PREFIX, "c[matrix]s[interlaced]b[ChromaInPlacement]s[chromaresample]s", ConvertToRGB::Create, (void *)0 }, { "ConvertToRGB24", BUILTIN_FUNC_PREFIX, "c[matrix]s[interlaced]b[ChromaInPlacement]s[chromaresample]s", ConvertToRGB::Create, (void *)24 }, { "ConvertToRGB32", BUILTIN_FUNC_PREFIX, "c[matrix]s[interlaced]b[ChromaInPlacement]s[chromaresample]s", ConvertToRGB::Create, (void *)32 }, @@ -76,6 +76,7 @@ extern const AVSFunction Convert_filters[] = { // matrix can be "rec601", { 0 } }; +// for YUY2 static const int crv_rec601 = int(1.596*65536+0.5); static const int cgv_rec601 = int(0.813*65536+0.5); static const int cgu_rec601 = int(0.391*65536+0.5); @@ -99,6 +100,7 @@ static const int cbu_pc709 = int(1.863*65536+0.5); static const int cy_rec = int((255.0/219.0)*65536+0.5); static const int cy_pc = 65536; +// still YUY2 only static const int crv_values[4] = { crv_rec601, crv_rec709, crv_pc601, crv_pc709 }; static const int cgv_values[4] = { cgv_rec601, cgv_rec709, cgv_pc601, cgv_pc709 }; static const int cgu_values[4] = { cgu_rec601, cgu_rec709, cgu_pc601, cgu_pc709 }; @@ -122,6 +124,8 @@ int getMatrix( const char* matrix, IScriptEnvironment* env) { return PC_709; if (!lstrcmpi(matrix, "AVERAGE")) return AVERAGE; + if (!lstrcmpi(matrix, "rec2020")) + return Rec2020; env->ThrowError("Convert: Unknown colormatrix"); } return Rec601; // Default colorspace conversion for AviSynth @@ -138,6 +142,7 @@ ConvertToRGB::ConvertToRGB( PClip _child, bool rgb24, const char* matrix, : GenericVideoFilter(_child) { theMatrix = Rec601; + // no rec2020 here if (matrix) { if (!lstrcmpi(matrix, "rec709")) theMatrix = Rec709; @@ -938,7 +943,7 @@ static void convert_32_to_uintN_c(const BYTE *srcp, BYTE *dstp, int src_rowsize, } } -// rgb: full scale. Difference to YUV: *257 instead of << 8 (full 16 bit sample) +// rgb/alpha: full scale. Difference to YUV: *257 instead of << 8 (full 16 bit sample) template static void convert_rgb_8_to_uint16_c(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range) { diff --git a/avs_core/convert/convert.h b/avs_core/convert/convert.h index d3f5c3ab6..bc8870a66 100644 --- a/avs_core/convert/convert.h +++ b/avs_core/convert/convert.h @@ -37,7 +37,7 @@ #include "../core/internal.h" -enum {Rec601=0, Rec709=1, PC_601=2, PC_709=3, AVERAGE=4 }; +enum {Rec601=0, Rec709=1, PC_601=2, PC_709=3, AVERAGE=4, Rec2020=5 }; int getMatrix( const char* matrix, IScriptEnvironment* env); /***************************************************** @@ -66,7 +66,7 @@ inline int RGB2YUV(int rgb) ******* Colorspace GenericVideoFilter Classes ****** *******************************************************/ - +// YUY2 only class ConvertToRGB : public GenericVideoFilter /** * Class to handle conversion to RGB & RGBA @@ -84,9 +84,10 @@ class ConvertToRGB : public GenericVideoFilter private: int theMatrix; - enum {Rec601=0, Rec709=1, PC_601=2, PC_709=3 }; + enum {Rec601=0, Rec709=1, PC_601=2, PC_709=3}; }; +// YUY2 only class ConvertToYV12 : public GenericVideoFilter /** * Class for conversions to YV12 diff --git a/avs_core/convert/convert_planar.cpp b/avs_core/convert/convert_planar.cpp index 14c1f9d6e..8b39e973c 100644 --- a/avs_core/convert/convert_planar.cpp +++ b/avs_core/convert/convert_planar.cpp @@ -117,6 +117,15 @@ ConvertToY8::ConvertToY8(PClip src, int in_matrix, IScriptEnvironment* env) : Ge matrix.r_f = (float)((219.0/255.0)*0.2126); //R matrix.offset_y = 16; matrix.offset_y_f = 16.0f / 256.0f; + } else if (in_matrix == Rec2020) { + matrix.b = (int16_t)((219.0/255.0)*0.0593*32768.0+0.5); //B + matrix.g = (int16_t)((219.0/255.0)*0.6780*32768.0+0.5); //G + matrix.r = (int16_t)((219.0/255.0)*0.2627*32768.0+0.5); //R + matrix.b_f = (float)((219.0/255.0)*0.0593); //B + matrix.g_f = (float)((219.0/255.0)*0.6780); //G + matrix.r_f = (float)((219.0/255.0)*0.2627); //R + matrix.offset_y = 16; + matrix.offset_y_f = 16.0f / 256.0f; } else if (in_matrix == PC_709) { matrix.b = (int16_t)(0.0722*32768.0+0.5); //B matrix.g = (int16_t)(0.7152*32768.0+0.5); //G @@ -593,6 +602,9 @@ ConvertRGBToYV24::ConvertRGBToYV24(PClip src, int in_matrix, IScriptEnvironment* BuildMatrix(1.0/3, /* 1.0/3 */ 1.0/3, 255, 127, 0, shift); } + else if (in_matrix == Rec2020) { + BuildMatrix(0.2627, /* 0.6780 */ 0.0593, 219, 112, 16, shift); + } else { env->ThrowError("ConvertRGBToYV24: Unknown matrix."); } @@ -1094,6 +1106,9 @@ ConvertYUV444ToRGB::ConvertYUV444ToRGB(PClip src, int in_matrix, int _pixel_step BuildMatrix(0.2126, /* 0.7152 */ 0.0722, 255, 127, 0, shift); } + else if (in_matrix == Rec2020) { + BuildMatrix(0.2627, /* 0.6780 */ 0.0593, 219, 112, 16, shift); + } else if (in_matrix == AVERAGE) { BuildMatrix(1.0/3, /* 1.0/3 */ 1.0/3, 255, 127, 0, shift); diff --git a/avs_core/convert/convert_yuy2.h b/avs_core/convert/convert_yuy2.h index 8b6203bd3..dfa80ff04 100644 --- a/avs_core/convert/convert_yuy2.h +++ b/avs_core/convert/convert_yuy2.h @@ -61,6 +61,7 @@ class ConvertToYUY2 : public GenericVideoFilter protected: const int src_cs; // Source colorspace int theMatrix; + // no rec2020 for YUY2 enum {Rec601=0, Rec709=1, PC_601=2, PC_709=3 }; // Note! convert_yuy2.cpp assumes these values }; diff --git a/avs_core/filters/greyscale.cpp b/avs_core/filters/greyscale.cpp index aad0392f5..7c9853628 100644 --- a/avs_core/filters/greyscale.cpp +++ b/avs_core/filters/greyscale.cpp @@ -49,7 +49,7 @@ ************************************/ extern const AVSFunction Greyscale_filters[] = { - { "Greyscale", BUILTIN_FUNC_PREFIX, "c[matrix]s", Greyscale::Create }, // matrix can be "rec601", "rec709" or "Average" + { "Greyscale", BUILTIN_FUNC_PREFIX, "c[matrix]s", Greyscale::Create }, // matrix can be "rec601", "rec709" or "Average" or "rec2020" { "Grayscale", BUILTIN_FUNC_PREFIX, "c[matrix]s", Greyscale::Create }, { 0 } }; @@ -67,8 +67,10 @@ Greyscale::Greyscale(PClip _child, const char* matrix, IScriptEnvironment* env) matrix_ = Average; else if (!lstrcmpi(matrix, "rec601")) matrix_ = Rec601; + else if (!lstrcmpi(matrix, "rec2020")) + matrix_ = Rec2020; else - env->ThrowError("GreyScale: invalid \"matrix\" parameter (must be matrix=\"Rec601\", \"Rec709\" or \"Average\")"); + env->ThrowError("GreyScale: invalid \"matrix\" parameter (must be matrix=\"Rec601\", \"Rec709\", \"Rec2020\" or \"Average\")"); } BuildGreyMatrix(); pixelsize = vi.ComponentSize(); @@ -301,6 +303,8 @@ static void greyscale_planar_rgb_float_c(BYTE *srcp_r8, BYTE *srcp_g8, BYTE *src } void Greyscale::BuildGreyMatrix() { +#if 0 + // not used, kept for sample // 16 bit scaled const int cyavb_sc16 = 21845; // const int cyav = int(0.333333*65536+0.5); const int cyavg_sc16 = 21845; @@ -316,13 +320,14 @@ void Greyscale::BuildGreyMatrix() { const int cyr601_sc16 = 19595; // int(0.299*65536+0.5); // 19595 // sum: 65536 OK + const int cyb709_sc16 = 4732; // int(0.0722 * 65536 + 0.5); // 4732 const int cyg709_sc16 = 46871; // int(0.7152 * 65536 + 0.5); // 46871 const int cyr709_sc16 = 13933; // int(0.2126 * 65536 + 0.5); // 13933 // Sum: 65536 OK // This is the correct brigtness calculations (standardized in Rec. 709) - - // 15 bit scaled +#endif + // 15 bit scaled // PF check: int32 overflow in 16 bits // 32769 * 65535 + 16384 = 8000BFFF int32 overflow // 32768 * 65535 + 16384 = 7FFFC000 OK @@ -355,9 +360,22 @@ void Greyscale::BuildGreyMatrix() { const float cyg709_f = 0.7152f; const float cyr709_f = 0.2126f; + // --- Rec2020 + const int cyb2020_sc15 = 1943; // int(0.0593 * 32768 + 0.5); // 1943 + const int cyg2020_sc15 = 22217; // int(0.6780 * 32768 + 0.5); // 22217 + const int cyr2020_sc15 = 8608; // int(0.2627 * 32768 + 0.5); // 8608 + // sum: 32768 OK + const float cyb2020_f = 0.0593f; + const float cyg2020_f = 0.6780f; + const float cyr2020_f = 0.2627f; + + if(matrix_ == Rec709) { greyMatrix.b = cyb709_sc15; greyMatrix.g = cyg709_sc15; greyMatrix.r = cyr709_sc15; greyMatrix.b_f = cyb709_f; greyMatrix.g_f = cyg709_f; greyMatrix.r_f = cyr709_f; + } else if(matrix_ == Rec2020) { + greyMatrix.b = cyb2020_sc15; greyMatrix.g = cyg2020_sc15; greyMatrix.r = cyr2020_sc15; + greyMatrix.b_f = cyb2020_f; greyMatrix.g_f = cyg2020_f; greyMatrix.r_f = cyr2020_f; } else if (matrix_ == Average) { greyMatrix.b = cybav_sc15; greyMatrix.g = cygav_sc15; greyMatrix.r = cyrav_sc15; greyMatrix.b_f = cybav_f; greyMatrix.g_f = cygav_f; greyMatrix.r_f = cyrav_f; diff --git a/avs_core/filters/greyscale.h b/avs_core/filters/greyscale.h index 2be9b7873..151382561 100644 --- a/avs_core/filters/greyscale.h +++ b/avs_core/filters/greyscale.h @@ -66,7 +66,7 @@ class Greyscale : public GenericVideoFilter void BuildGreyMatrix(); GreyConversionMatrix greyMatrix; int matrix_; - enum {Rec601 = 0, Rec709, Average }; + enum {Rec601 = 0, Rec709, Average, Rec2020 }; int pixelsize; int bits_per_pixel; From 47571adcf2f2f988722ae1e478123caf79e4f22a Mon Sep 17 00:00:00 2001 From: Pinterf Date: Thu, 22 Sep 2016 14:21:55 +0200 Subject: [PATCH 082/120] Merge: SSE2 for 10-14 bits (10-16 for SSE4.1 still work) --- avs_core/core/internal.h | 27 +++++++++ avs_core/filters/edit.cpp | 25 +++++--- avs_core/filters/edit.h | 1 + avs_core/filters/merge.cpp | 105 ++++++++++++++++++++++++++-------- avs_core/filters/merge.h | 9 +++ avs_core/filters/resample.cpp | 10 ---- 6 files changed, 136 insertions(+), 41 deletions(-) diff --git a/avs_core/core/internal.h b/avs_core/core/internal.h index 9c43d37ae..481c45403 100644 --- a/avs_core/core/internal.h +++ b/avs_core/core/internal.h @@ -57,6 +57,7 @@ enum MANAGE_CACHE_KEYS #include #include "parser/script.h" // TODO we only need ScriptFunction from here +#include class AVSFunction { @@ -194,6 +195,32 @@ static __inline bool IsClose(int a, int b, unsigned threshold) static __inline bool IsCloseFloat(float a, float b, float threshold) { return (a-b+threshold <= threshold*2); } +// useful SIMD helpers +// sse2 replacement of _mm_mullo_epi32 in SSE4.1 +// use it after speed test, may have too much overhead and C is faster +__forceinline __m128i _MM_MULLO_EPI32(const __m128i &a, const __m128i &b) +{ + // for SSE 4.1: return _mm_mullo_epi32(a, b); + __m128i tmp1 = _mm_mul_epu32(a,b); // mul 2,0 + __m128i tmp2 = _mm_mul_epu32( _mm_srli_si128(a,4), _mm_srli_si128(b,4)); // mul 3,1 + // shuffle results to [63..0] and pack. a2->a1, a0->a0 + return _mm_unpacklo_epi32(_mm_shuffle_epi32(tmp1, _MM_SHUFFLE (0,0,2,0)), _mm_shuffle_epi32(tmp2, _MM_SHUFFLE (0,0,2,0))); +} + +// fake _mm_packus_epi32 (orig is SSE4.1 only) +__forceinline __m128i _MM_PACKUS_EPI32( __m128i a, __m128i b ) +{ + a = _mm_slli_epi32 (a, 16); + a = _mm_srai_epi32 (a, 16); + b = _mm_slli_epi32 (b, 16); + b = _mm_srai_epi32 (b, 16); + a = _mm_packs_epi32 (a, b); + return a; +} + +// unsigned short div 255 +#define SSE2_DIV255_U16(x) _mm_srli_epi16(_mm_mulhi_epu16(x, _mm_set1_epi16((short)0x8081)), 7) +#define AVX2_DIV255_U16(x) _mm256_srli_epi16(_mm256_mulhi_epu16(x, _mm256_set1_epi16((short)0x8081)), 7) #endif // __Internal_H__ diff --git a/avs_core/filters/edit.cpp b/avs_core/filters/edit.cpp index f4a64304e..050a676c7 100644 --- a/avs_core/filters/edit.cpp +++ b/avs_core/filters/edit.cpp @@ -627,13 +627,14 @@ Dissolve::Dissolve(PClip _child1, PClip _child2, int _overlap, double fps, IScri if (!(vi.IsSameColorspace(vi2))) env->ThrowError("Dissolve: video formats don't match"); - pixelsize = vi.BytesFromPixels(1); // AVS16 + pixelsize = vi.ComponentSize(); // AVS16 + bits_per_pixel = vi.BitsPerComponent(); - video_fade_start = vi.num_frames - overlap; - video_fade_end = vi.num_frames - 1; + video_fade_start = vi.num_frames - overlap; + video_fade_end = vi.num_frames - 1; - audio_fade_start = vi.AudioSamplesFromFrames(video_fade_start); - audio_fade_end = vi.AudioSamplesFromFrames(video_fade_end+1)-1; + audio_fade_start = vi.AudioSamplesFromFrames(video_fade_start); + audio_fade_end = vi.AudioSamplesFromFrames(video_fade_end+1)-1; } else { video_fade_start = 0; @@ -683,8 +684,18 @@ PVideoFrame Dissolve::GetFrame(int n, IScriptEnvironment* env) // similar to merge.cpp if ((pixelsize == 2) && (env->GetCPUFlags() & CPUF_SSE4_1)) { - // uint16: sse 4.1 - weighted_merge_planar = &weighted_merge_planar_uint16_sse41; +#if 0 + in SSE4 lessthan16bit arithmetic is slower than the general path + if(bits_per_pixel < 16) + weighted_merge_planar = &weighted_merge_planar_uint16_sse41; + else +#endif + weighted_merge_planar = &weighted_merge_planar_uint16_sse41; + } + else if ((pixelsize == 2) && (bits_per_pixel<16) && (env->GetCPUFlags() & CPUF_SSE2)) { + // using lessthan16bit signed short multiply routines + weighted_merge_planar = &weighted_merge_planar_uint16_sse41; + // no SSE2 for 16 bit unsigned : slooow! } else if ((pixelsize == 1) && (env->GetCPUFlags() & CPUF_SSE2)) { // uint8: sse2 diff --git a/avs_core/filters/edit.h b/avs_core/filters/edit.h index ee5435781..a6206c045 100644 --- a/avs_core/filters/edit.h +++ b/avs_core/filters/edit.h @@ -171,6 +171,7 @@ class Dissolve : public GenericVideoFilter BYTE* audbuffer; size_t audbufsize; int pixelsize; + int bits_per_pixel; void EnsureBuffer(int minsize); }; diff --git a/avs_core/filters/merge.cpp b/avs_core/filters/merge.cpp index 584d3afcd..517bd5475 100644 --- a/avs_core/filters/merge.cpp +++ b/avs_core/filters/merge.cpp @@ -442,29 +442,69 @@ static void average_plane_c_float(BYTE *p1, const BYTE *p2, int p1_pitch, int p2 * weighted_merge_planar * ----------------------------------- */ +template void weighted_merge_planar_uint16_sse41(BYTE *p1, const BYTE *p2, int p1_pitch, int p2_pitch, int rowsize, int height, int weight, int invweight) { __m128i round_mask = _mm_set1_epi32(0x4000); __m128i zero = _mm_setzero_si128(); __m128i weightmask = _mm_set1_epi32(weight); __m128i invweightmask = _mm_set1_epi32(invweight); + __m128i weightmask16 = _mm_set1_epi16(weight); + __m128i invweightmask16 = _mm_set1_epi16(invweight); int wMod16 = (rowsize / 16) * 16; for (int y = 0; y < height; y++) { for (int x = 0; x < wMod16; x += 16) { - __m128i px1 = _mm_stream_load_si128(reinterpret_cast<__m128i*>(p1 + x)); // y7 y6 y5 y4 y3 y2 y1 y0 - __m128i px2 = _mm_stream_load_si128(const_cast<__m128i*>(reinterpret_cast(p2 + x))); // Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 - - __m128i p1_0123 = _mm_unpacklo_epi16(px1, zero); // y3 y2 y1 y0 4*int - __m128i p1_4567 = _mm_unpackhi_epi16(px1, zero); // y7 y6 y5 y4 4*int - __m128i p2_0123 = _mm_unpacklo_epi16(px2, zero); - __m128i p2_4567 = _mm_unpackhi_epi16(px2, zero); - - // mullo: sse4 - p1_0123 = _mm_mullo_epi32(p1_0123, invweightmask); // 4x(32bit x 32bit = 32 bit) - p1_4567 = _mm_mullo_epi32(p1_4567, invweightmask); - p2_0123 = _mm_mullo_epi32(p2_0123, weightmask); - p2_4567 = _mm_mullo_epi32(p2_4567, weightmask); + __m128i px1, px2; + if(sse41) { + px1 = _mm_stream_load_si128(reinterpret_cast<__m128i*>(p1 + x)); // y7 y6 y5 y4 y3 y2 y1 y0 + px2 = _mm_stream_load_si128(const_cast<__m128i*>(reinterpret_cast(p2 + x))); // Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 + } + else { + px1 = _mm_load_si128(reinterpret_cast<__m128i*>(p1 + x)); // y7 y6 y5 y4 y3 y2 y1 y0 + px2 = _mm_load_si128(const_cast<__m128i*>(reinterpret_cast(p2 + x))); // Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 + } + __m128i p1_0123, p1_4567; + __m128i p2_0123, p2_4567; + + if (lessthan16bit) { + // signed int path + __m128i p1_07_lower32 = _mm_mullo_epi16(px1, invweightmask16); // 8x(16bit x 16bit = 32 bit) + __m128i p1_07_upper32 = _mm_mulhi_epu16(px1, invweightmask16); // 8x(16bit x 16bit = 32 bit) + p1_0123 = _mm_unpacklo_epi16(p1_07_lower32, p1_07_upper32); // 4 int32 + p1_4567 = _mm_unpackhi_epi16(p1_07_lower32, p1_07_upper32); // 4 int32 + __m128i p2_07_lower32 = _mm_mullo_epi16(px2, weightmask16); // 8x(16bit x 16bit = 32 bit) + __m128i p2_07_upper32 = _mm_mulhi_epu16(px2, weightmask16); // 8x(16bit x 16bit = 32 bit) + p2_0123 = _mm_unpacklo_epi16(p2_07_lower32, p2_07_upper32); // 4 int32 + p2_4567 = _mm_unpackhi_epi16(p2_07_lower32, p2_07_upper32); // 4 int32 + } else { + //------- part 1 + p1_0123 = _mm_unpacklo_epi16(px1, zero); // y3 y2 y1 y0 4*int + p2_0123 = _mm_unpacklo_epi16(px2, zero); + // mullo: sse4 + if(sse41) { + p1_0123 = _mm_mullo_epi32(p1_0123, invweightmask); // 4x(32bit x 32bit = 32 bit) + p2_0123 = _mm_mullo_epi32(p2_0123, weightmask); + } + else { + // simulation is sloooower than C! + p1_0123 = _MM_MULLO_EPI32(p1_0123, invweightmask); // 4x(32bit x 32bit = 32 bit) + p2_0123 = _MM_MULLO_EPI32(p2_0123, weightmask); + } + //------- part 2 + p1_4567 = _mm_unpackhi_epi16(px1, zero); // y7 y6 y5 y4 4*int + p2_4567 = _mm_unpackhi_epi16(px2, zero); + // mullo: sse4 + if(sse41) { + p1_4567 = _mm_mullo_epi32(p1_4567, invweightmask); + p2_4567 = _mm_mullo_epi32(p2_4567, weightmask); + } + else { + // simulation is sloooower than C! + p1_4567 = _MM_MULLO_EPI32(p1_4567, invweightmask); + p2_4567 = _MM_MULLO_EPI32(p2_4567, weightmask); + } + } // 16 bit unsigned int path p1_0123 = _mm_add_epi32(p1_0123, p2_0123); // 4x(32bit + 32bit = 32 bit) p1_4567 = _mm_add_epi32(p1_4567, p2_4567); @@ -475,7 +515,11 @@ void weighted_merge_planar_uint16_sse41(BYTE *p1, const BYTE *p2, int p1_pitch, p1_0123 = _mm_srli_epi32(p1_0123, 15); p1_4567 = _mm_srli_epi32(p1_4567, 15); - __m128i result = _mm_packus_epi32(p1_0123, p1_4567); // packus: SSE4.1 + __m128i result; + if(sse41) + result = _mm_packus_epi32(p1_0123, p1_4567); // packus: SSE4.1 + else + result = _MM_PACKUS_EPI32(p1_0123, p1_4567); // packus simulation for SSE2 _mm_stream_si128(reinterpret_cast<__m128i*>(p1 + x), result); } @@ -634,7 +678,7 @@ extern const AVSFunction Merge_filters[] = { { 0 } }; -static void merge_plane(BYTE* srcp, const BYTE* otherp, int src_pitch, int other_pitch, int src_rowsize, int src_height, float weight, int pixelsize, IScriptEnvironment *env) { +static void merge_plane(BYTE* srcp, const BYTE* otherp, int src_pitch, int other_pitch, int src_rowsize, int src_height, float weight, int pixelsize, int bits_per_pixel, IScriptEnvironment *env) { if ((weight > 0.4961f) && (weight < 0.5039f)) { //average of two planes @@ -677,10 +721,20 @@ static void merge_plane(BYTE* srcp, const BYTE* otherp, int src_pitch, int other if (pixelsize != 4) { MergeFuncPtr weighted_merge_planar; - + if ((pixelsize == 2) && (env->GetCPUFlags() & CPUF_SSE4_1)) { - // uint16: sse 4.1 - weighted_merge_planar = &weighted_merge_planar_uint16_sse41; + #if 0 + in SSE4 lessthan16bit arithmetic is slower + if(bits_per_pixel < 16) + weighted_merge_planar = &weighted_merge_planar_uint16_sse41; + else + #endif + weighted_merge_planar = &weighted_merge_planar_uint16_sse41; + } + else if ((pixelsize == 2) && (bits_per_pixel < 16) && (env->GetCPUFlags() & CPUF_SSE2)) { + // using lessthan16bit signed short multiply routines + weighted_merge_planar = &weighted_merge_planar_uint16_sse41; + // no SSE2 for 16 bit unsigned : slooow! } else if ((pixelsize == 1) && (env->GetCPUFlags() & CPUF_SSE2) && IsPtrAligned(srcp, 16) && IsPtrAligned(otherp, 16)) { @@ -738,6 +792,7 @@ MergeChroma::MergeChroma(PClip _child, PClip _clip, float _weight, IScriptEnviro if (weight>1.0f) weight=1.0f; pixelsize = vi.ComponentSize(); + bits_per_pixel = vi.BitsPerComponent(); } @@ -790,12 +845,12 @@ PVideoFrame __stdcall MergeChroma::GetFrame(int n, IScriptEnvironment* env) int src_rowsize_v = src->GetRowSize(PLANAR_V_ALIGNED); int src_height_uv = src->GetHeight(PLANAR_U); - merge_plane(srcpU, chromapU, src_pitch_uv, chroma_pitch_uv, src_rowsize_u, src_height_uv, weight, pixelsize, env); - merge_plane(srcpV, chromapV, src_pitch_uv, chroma_pitch_uv, src_rowsize_v, src_height_uv, weight, pixelsize, env); + merge_plane(srcpU, chromapU, src_pitch_uv, chroma_pitch_uv, src_rowsize_u, src_height_uv, weight, pixelsize, bits_per_pixel, env); + merge_plane(srcpV, chromapV, src_pitch_uv, chroma_pitch_uv, src_rowsize_v, src_height_uv, weight, pixelsize, bits_per_pixel, env); if(vi.IsYUVA()) merge_plane(src->GetWritePtr(PLANAR_A), chroma->GetReadPtr(PLANAR_A), src->GetPitch(PLANAR_A), chroma->GetPitch(PLANAR_A), - src->GetRowSize(PLANAR_A_ALIGNED), src->GetHeight(PLANAR_A), weight, pixelsize, env); + src->GetRowSize(PLANAR_A_ALIGNED), src->GetHeight(PLANAR_A), weight, pixelsize, bits_per_pixel, env); } } else { // weight == 1.0 if (vi.IsYUY2()) { @@ -868,6 +923,7 @@ MergeLuma::MergeLuma(PClip _child, PClip _clip, float _weight, IScriptEnvironmen env->ThrowError("MergeLuma: YUV data only (no RGB); use ConvertToYUY2, ConvertToYV12/16/24 or ConvertToYUVxxx"); pixelsize = vi.ComponentSize(); + bits_per_pixel = vi.BitsPerComponent(); if (!vi.IsSameColorspace(vi2)) { // Since this is luma we allow all planar formats to be merged. if (!(vi.IsPlanar() && vi2.IsPlanar())) { @@ -980,7 +1036,7 @@ PVideoFrame __stdcall MergeLuma::GetFrame(int n, IScriptEnvironment* env) int src_rowsize = src->GetRowSize(PLANAR_Y); int src_height = src->GetHeight(PLANAR_Y); - merge_plane(srcpY, lumapY, src_pitch, luma_pitch, src_rowsize, src_height, weight, pixelsize, env); + merge_plane(srcpY, lumapY, src_pitch, luma_pitch, src_rowsize, src_height, weight, pixelsize, bits_per_pixel, env); } return src; @@ -1010,6 +1066,7 @@ MergeAll::MergeAll(PClip _child, PClip _clip, float _weight, IScriptEnvironment* env->ThrowError("Merge: Images must have same width and height!"); pixelsize = vi.ComponentSize(); + bits_per_pixel = vi.BitsPerComponent(); if (weight<0.0f) weight=0.0f; if (weight>1.0f) weight=1.0f; @@ -1031,7 +1088,7 @@ PVideoFrame __stdcall MergeAll::GetFrame(int n, IScriptEnvironment* env) const int src_pitch = src->GetPitch(); const int src_rowsize = src->GetRowSize(); - merge_plane(srcp, srcp2, src_pitch, src2->GetPitch(), src_rowsize, src->GetHeight(), weight, pixelsize, env); + merge_plane(srcp, srcp2, src_pitch, src2->GetPitch(), src_rowsize, src->GetHeight(), weight, pixelsize, bits_per_pixel, env); if (vi.IsPlanar()) { const int planesYUV[4] = { PLANAR_Y, PLANAR_U, PLANAR_V, PLANAR_A}; @@ -1040,7 +1097,7 @@ PVideoFrame __stdcall MergeAll::GetFrame(int n, IScriptEnvironment* env) // first plane is already processed for (int p = 1; p < vi.NumComponents(); p++) { const int plane = planes[p]; - merge_plane(src->GetWritePtr(plane), src2->GetReadPtr(plane), src->GetPitch(plane), src2->GetPitch(plane), src->GetRowSize(plane), src->GetHeight(plane), weight, pixelsize, env); + merge_plane(src->GetWritePtr(plane), src2->GetReadPtr(plane), src->GetPitch(plane), src2->GetPitch(plane), src->GetRowSize(plane), src->GetHeight(plane), weight, pixelsize, bits_per_pixel, env); } } diff --git a/avs_core/filters/merge.h b/avs_core/filters/merge.h index 3b2192d08..af23907e2 100644 --- a/avs_core/filters/merge.h +++ b/avs_core/filters/merge.h @@ -66,6 +66,7 @@ class MergeChroma : public GenericVideoFilter PClip clip; float weight; int pixelsize; + int bits_per_pixel; }; @@ -88,6 +89,7 @@ class MergeLuma : public GenericVideoFilter PClip clip; float weight; int pixelsize; + int bits_per_pixel; }; @@ -110,11 +112,18 @@ class MergeAll : public GenericVideoFilter PClip clip; float weight; int pixelsize; + int bits_per_pixel; }; typedef void(*MergeFuncPtr) (BYTE *p1, const BYTE *p2, int p1_pitch, int p2_pitch, int rowsize, int height, int weight, int invweight); +template void weighted_merge_planar_uint16_sse41(BYTE *p1, const BYTE *p2, int p1_pitch, int p2_pitch, int width, int height, int weight, int invweight); +// instantiate to let them access from other modules +template void weighted_merge_planar_uint16_sse41(BYTE *p1, const BYTE *p2, int p1_pitch, int p2_pitch, int rowsize, int height, int weight, int invweight); +template void weighted_merge_planar_uint16_sse41(BYTE *p1, const BYTE *p2, int p1_pitch, int p2_pitch, int rowsize, int height, int weight, int invweight); +// other two cases are slower + void weighted_merge_planar_sse2(BYTE *p1,const BYTE *p2, int p1_pitch, int p2_pitch,int rowsize, int height, int weight, int invweight); void weighted_merge_planar_mmx(BYTE *p1,const BYTE *p2, int p1_pitch, int p2_pitch,int rowsize, int height, int weight, int invweight); template diff --git a/avs_core/filters/resample.cpp b/avs_core/filters/resample.cpp index 4c5a8eab8..c6dff4032 100644 --- a/avs_core/filters/resample.cpp +++ b/avs_core/filters/resample.cpp @@ -87,16 +87,6 @@ __forceinline __m128 simd_loadps_unaligned(const float* adr) return _mm_loadu_ps(adr); } -// fake _mm_packus_epi32 (orig is SSE4.1 only) -static __forceinline __m128i _MM_PACKUS_EPI32( __m128i a, __m128i b ) -{ - a = _mm_slli_epi32 (a, 16); - a = _mm_srai_epi32 (a, 16); - b = _mm_slli_epi32 (b, 16); - b = _mm_srai_epi32 (b, 16); - a = _mm_packs_epi32 (a, b); - return a; -} /*************************************** ***** Vertical Resizer Assembly ******* From 0077f82e13d914ed60a20a8c2ee43c15e79806b0 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Thu, 22 Sep 2016 17:05:29 +0200 Subject: [PATCH 083/120] Ordered dither for 10-16->8 bit. SSE2 for 8->10-16 bit full scale (RGB). Use: clip10_16bit.ConvertBits(8, dither=0). dither=-1 or no param: no dither --- avs_core/convert/convert.cpp | 439 +++++++++++++++++++++++++++--- avs_core/convert/convert_planar.h | 1 + 2 files changed, 399 insertions(+), 41 deletions(-) diff --git a/avs_core/convert/convert.cpp b/avs_core/convert/convert.cpp index d0055584b..f59f84d45 100644 --- a/avs_core/convert/convert.cpp +++ b/avs_core/convert/convert.cpp @@ -42,6 +42,7 @@ #include #include #include +#include @@ -69,10 +70,10 @@ extern const AVSFunction Convert_filters[] = { // matrix can be "rec601", { "ConvertToYUV420", BUILTIN_FUNC_PREFIX, "c[interlaced]b[matrix]s[ChromaInPlacement]s[chromaresample]s[ChromaOutPlacement]s", ConvertToPlanarGeneric::CreateYUV420}, { "ConvertToYUV422", BUILTIN_FUNC_PREFIX, "c[interlaced]b[matrix]s[ChromaInPlacement]s[chromaresample]s", ConvertToPlanarGeneric::CreateYUV422}, { "ConvertToYUV444", BUILTIN_FUNC_PREFIX, "c[interlaced]b[matrix]s[ChromaInPlacement]s[chromaresample]s", ConvertToPlanarGeneric::CreateYUV444}, - { "ConvertTo8bit", BUILTIN_FUNC_PREFIX, "c[bits]i[truerange]b[dither]i[scale]f", ConvertBits::Create, (void *)8 }, - { "ConvertTo16bit", BUILTIN_FUNC_PREFIX, "c[bits]i[truerange]b[dither]i[scale]f", ConvertBits::Create, (void *)16 }, - { "ConvertToFloat", BUILTIN_FUNC_PREFIX, "c[bits]i[truerange]b[dither]i[scale]f", ConvertBits::Create, (void *)32 }, - { "ConvertBits", BUILTIN_FUNC_PREFIX, "c[bits]i[truerange]b[dither]i[scale]f", ConvertBits::Create, (void *)0 }, + { "ConvertTo8bit", BUILTIN_FUNC_PREFIX, "c[bits]i[truerange]b[dither]i[scale]f[dither_bits]i", ConvertBits::Create, (void *)8 }, + { "ConvertTo16bit", BUILTIN_FUNC_PREFIX, "c[bits]i[truerange]b[dither]i[scale]f[dither_bits]i", ConvertBits::Create, (void *)16 }, + { "ConvertToFloat", BUILTIN_FUNC_PREFIX, "c[bits]i[truerange]b[dither]i[scale]f[dither_bits]i", ConvertBits::Create, (void *)32 }, + { "ConvertBits", BUILTIN_FUNC_PREFIX, "c[bits]i[truerange]b[dither]i[scale]f[dither_bits]i", ConvertBits::Create, (void *)0 }, { 0 } }; @@ -781,18 +782,99 @@ AVSValue __cdecl ConvertToYV12::Create(AVSValue args, void*, IScriptEnvironment* /********************************** ****** Bitdepth conversions ***** **********************************/ +// 10->8 +static const BYTE dither2x2[4] = { + 0, 2, + 3, 1 +}; -template +// 12->8 +static const BYTE dither4x4[16] = { + 0, 8, 2, 10, + 12, 4, 14, 6, + 3, 11, 1, 9, + 15, 7, 13, 5 +}; + +// 14->8 +static const BYTE dither8x8[8][8] = { + { 0, 32, 8, 40, 2, 34, 10, 42}, /* 8x8 Bayer ordered dithering */ + {48, 16, 56, 24, 50, 18, 58, 26}, /* pattern. Each input pixel */ + {12, 44, 4, 36, 14, 46, 6, 38}, /* is scaled to the 0..63 range */ + {60, 28, 52, 20, 62, 30, 54, 22}, /* before looking in this table */ + { 3, 35, 11, 43, 1, 33, 9, 41}, /* to determine the action. */ + {51, 19, 59, 27, 49, 17, 57, 25}, + {15, 47, 7, 39, 13, 45, 5, 37}, + {63, 31, 55, 23, 61, 29, 53, 21} +}; + +// 16->8 +static const BYTE dither16x16[16][16] = { + { 0,192, 48,240, 12,204, 60,252, 3,195, 51,243, 15,207, 63,255 }, + { 128, 64,176,112,140, 76,188,124,131, 67,179,115,143, 79,191,127 }, + { 32,224, 16,208, 44,236, 28,220, 35,227, 19,211, 47,239, 31,223 }, + { 160, 96,144, 80,172,108,156, 92,163, 99,147, 83,175,111,159, 95 }, + { 8,200, 56,248, 4,196, 52,244, 11,203, 59,251, 7,199, 55,247 }, + { 136, 72,184,120,132, 68,180,116,139, 75,187,123,135, 71,183,119 }, + { 40,232, 24,216, 36,228, 20,212, 43,235, 27,219, 39,231, 23,215 }, + { 168,104,152, 88,164,100,148, 84,171,107,155, 91,167,103,151, 87 }, + { 2,194, 50,242, 14,206, 62,254, 1,193, 49,241, 13,205, 61,253 }, + { 130, 66,178,114,142, 78,190,126,129, 65,177,113,141, 77,189,125 }, + { 34,226, 18,210, 46,238, 30,222, 33,225, 17,209, 45,237, 29,221 }, + { 162, 98,146, 82,174,110,158, 94,161, 97,145, 81,173,109,157, 93 }, + { 10,202, 58,250, 6,198, 54,246, 9,201, 57,249, 5,197, 53,245 }, + { 138, 74,186,122,134, 70,182,118,137, 73,185,121,133, 69,181,117 }, + { 42,234, 26,218, 38,230, 22,214, 41,233, 25,217, 37,229, 21,213 }, + { 170,106,154, 90,166,102,150, 86,169,105,153, 89,165,101,149, 85 } +}; + + +template static void convert_rgb_uint16_to_8_c(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range) { const uint16_t *srcp0 = reinterpret_cast(srcp); src_pitch = src_pitch / sizeof(uint16_t); int src_width = src_rowsize / sizeof(uint16_t); + + int _y = 0; // for ordered dither + + const int TARGET_BITDEPTH = 8; // here is constant (uint8_t target) + + // for test, make it 2,4,6,8. sourcebits-TARGET_DITHER_BITDEPTH cannot exceed 8 bit + // const int TARGET_DITHER_BITDEPTH = 2; + + const int max_pixel_value = (1 << TARGET_BITDEPTH) - 1; + const int max_pixel_value_dithered = (1 << TARGET_DITHER_BITDEPTH) - 1; + // precheck ensures: + // TARGET_BITDEPTH >= TARGET_DITHER_BITDEPTH + // sourcebits - TARGET_DITHER_BITDEPTH <= 8 + // sourcebits - TARGET_DITHER_BITDEPTH is even (later we can use PRESHIFT) + const int DITHER_BIT_DIFF = (sourcebits - TARGET_DITHER_BITDEPTH); // 2, 4, 6, 8 + const int PRESHIFT = DITHER_BIT_DIFF & 1; // 0 or 1: correction for odd bit differences (not used here but generality) + const int DITHER_ORDER = (DITHER_BIT_DIFF + PRESHIFT) / 2; + const int DITHER_SIZE = 1 << DITHER_ORDER; // 9,10=2 11,12=4 13,14=8 15,16=16 + const int MASK = DITHER_SIZE - 1; + // 10->8: 0x01 (2x2) + // 11->8: 0x03 (4x4) + // 12->8: 0x03 (4x4) + // 14->8: 0x07 (8x8) + // 16->8: 0x0F (16x16) + const BYTE *matrix; + switch (sourcebits-TARGET_DITHER_BITDEPTH) { + case 2: matrix = reinterpret_cast(dither2x2); break; + case 4: matrix = reinterpret_cast(dither4x4); break; + case 6: matrix = reinterpret_cast(dither8x8); break; + case 8: matrix = reinterpret_cast(dither16x16); break; + } + for(int y=0; y 0..65535 (*255 / 65535) // hint for simd code writers: @@ -858,7 +940,32 @@ static void convert_rgb_uint16_to_8_c(const BYTE *srcp, BYTE *dstp, int src_rows add eax, edx mov BYTE PTR [ecx+ebx], al */ - } + } + else { // dither_mode == 0 -> ordered dither + const int corr = matrix[_y | ((x / rgb_step) & MASK)]; + // vvv for the non-fullscale version: int new_pixel = ((srcp0[x] + corr) >> DITHER_BIT_DIFF); + int new_pixel; + if (DITHER_BIT_DIFF == 8) + new_pixel = (srcp0[x]+corr) / 257; // RGB: full range 0..255 <-> 0..65535 (*255 / 65535) + else if (DITHER_BIT_DIFF == 6) + new_pixel = (srcp0[x]+corr) * 255 / 16383; // RGB: full range 0..255 <-> 0..16384-1 + else if (DITHER_BIT_DIFF == 4) + new_pixel = (srcp0[x]+corr) * 255 / 4095; // RGB: full range 0..255 <-> 0..16384-1 + else if (DITHER_BIT_DIFF == 2) + new_pixel = (srcp0[x]+corr) * 255 / 1023; // RGB: full range 0..255 <-> 0..16384-1 + else + new_pixel = (srcp0[x]+corr); + new_pixel = min(new_pixel, max_pixel_value_dithered); // clamp upper + + // scale back to the required bit depth + // for generality. Now target == 8 bit, and dither_target is also 8 bit + // for test: source:10 bit, target=8 bit, dither_target=4 bit + const int BITDIFF_BETWEEN_DITHER_AND_TARGET = DITHER_BIT_DIFF - (sourcebits - TARGET_BITDEPTH); + if(BITDIFF_BETWEEN_DITHER_AND_TARGET != 0) // dither to 8, target to 8 + new_pixel = new_pixel << BITDIFF_BETWEEN_DITHER_AND_TARGET; // if implemented non-8bit dither target, this should be fullscale + dstp[x] = (BYTE)new_pixel; + } + } // x dstp += dst_pitch; srcp0 += src_pitch; } @@ -867,23 +974,67 @@ static void convert_rgb_uint16_to_8_c(const BYTE *srcp, BYTE *dstp, int src_rows // YUV conversions (bit shifts) // BitDepthConvFuncPtr // Conversion from 16-14-12-10 to 8 bits (bitshift: 8-6-4-2) -template +template static void convert_uint16_to_8_c(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range) { const uint16_t *srcp0 = reinterpret_cast(srcp); src_pitch = src_pitch / sizeof(uint16_t); int src_width = src_rowsize / sizeof(uint16_t); + + int _y = 0; // for ordered dither + + const int TARGET_BITDEPTH = 8; // here is constant (uint8_t target) + const int max_pixel_value = (1 << TARGET_BITDEPTH) - 1; + const int max_pixel_value_dithered = (1 << TARGET_DITHER_BITDEPTH) - 1; + // precheck ensures: + // TARGET_BITDEPTH >= TARGET_DITHER_BITDEPTH + // sourcebits - TARGET_DITHER_BITDEPTH <= 8 + // sourcebits - TARGET_DITHER_BITDEPTH is even (later we can use PRESHIFT) + const int DITHER_BIT_DIFF = (sourcebits - TARGET_DITHER_BITDEPTH); // 2, 4, 6, 8 + const int PRESHIFT = DITHER_BIT_DIFF & 1; // 0 or 1: correction for odd bit differences (not used here but generality) + const int DITHER_ORDER = (DITHER_BIT_DIFF + PRESHIFT) / 2; + const int DITHER_SIZE = 1 << DITHER_ORDER; // 9,10=2 11,12=4 13,14=8 15,16=16 + const int MASK = DITHER_SIZE - 1; + // 10->8: 0x01 (2x2) + // 11->8: 0x03 (4x4) + // 12->8: 0x03 (4x4) + // 14->8: 0x07 (8x8) + // 16->8: 0x0F (16x16) + const BYTE *matrix; + switch (sourcebits-TARGET_DITHER_BITDEPTH) { + case 2: matrix = reinterpret_cast(dither2x2); break; + case 4: matrix = reinterpret_cast(dither4x4); break; + case 6: matrix = reinterpret_cast(dither8x8); break; + case 8: matrix = reinterpret_cast(dither16x16); break; + } + for(int y=0; y> (sourcebits-8); // no dithering, no range conversion, simply shift + if(dither_mode < 0) // -1: no dither + dstp[x] = srcp0[x] >> (sourcebits-TARGET_BITDEPTH); // no dithering, no range conversion, simply shift + else { // dither_mode == 0 -> ordered dither + int corr = matrix[_y | (x & MASK)]; + //BYTE new_pixel = (((srcp0[x] << PRESHIFT) >> (sourcebits - 8)) + corr) >> PRESHIFT; // >> (sourcebits - 8); + int new_pixel = ((srcp0[x] + corr) >> DITHER_BIT_DIFF); + new_pixel = min(new_pixel, max_pixel_value_dithered); // clamp upper + // scale back to the required bit depth + // for generality. Now target == 8 bit, and dither_target is also 8 bit + // for test: source:10 bit, target=8 bit, dither_target=4 bit + const int BITDIFF_BETWEEN_DITHER_AND_TARGET = DITHER_BIT_DIFF - (sourcebits - TARGET_BITDEPTH); + if(BITDIFF_BETWEEN_DITHER_AND_TARGET != 0) // dither to 8, target to 8 + new_pixel = new_pixel << BITDIFF_BETWEEN_DITHER_AND_TARGET; // closest in palette: simple shift with + dstp[x] = (BYTE)new_pixel; + } } dstp += dst_pitch; srcp0 += src_pitch; } } +// todo: dither template static void convert_uint16_to_8_sse2(const BYTE *srcp8, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range) { @@ -943,7 +1094,7 @@ static void convert_32_to_uintN_c(const BYTE *srcp, BYTE *dstp, int src_rowsize, } } -// rgb/alpha: full scale. Difference to YUV: *257 instead of << 8 (full 16 bit sample) +// rgb/alpha: full scale. No bit shift, scale full ranges template static void convert_rgb_8_to_uint16_c(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range) { @@ -961,19 +1112,190 @@ static void convert_rgb_8_to_uint16_c(const BYTE *srcp, BYTE *dstp, int src_rows { // test if(targetbits==16) - dstp0[x] = srcp0[x] * 257; // RGB: full range 0..255 <-> 0..65535 (257 = 65535 / 255) + dstp0[x] = srcp0[x] * 257; // full range 0..255 <-> 0..65535 (257 = 65535 / 255) else if (targetbits==14) - dstp0[x] = srcp0[x] * 16383 / 255; // RGB: full range 0..255 <-> 0..16384-1 + dstp0[x] = srcp0[x] * 16383 / 255; // full range 0..255 <-> 0..16384-1 else if (targetbits==12) - dstp0[x] = srcp0[x] * 4095 / 255; // RGB: full range 0..255 <-> 0..4096-1 + dstp0[x] = srcp0[x] * 4095 / 255; // full range 0..255 <-> 0..4096-1 else if (targetbits==10) - dstp0[x] = srcp0[x] * 1023 / 255; // RGB: full range 0..255 <-> 0..1024-1 + dstp0[x] = srcp0[x] * 1023 / 255; // full range 0..255 <-> 0..1024-1 } dstp0 += dst_pitch; srcp0 += src_pitch; } } +#if 0 +// leave it here, maybe we can use it later +// Tricky simd implementation of integer div 255 w/o division +static inline __m128i Div_4xint32_by_255(const __m128i &esi, const __m128i &magic255div) { + // simd implementation of + /* + Trick of integer/255 w/o division: + tmp = (int)((( (__int64)esi * (-2139062143)) >> 32) & 0xFFFFFFFF) + esi) >> 7 + result = tmp + (tmp >> 31) + + movzx eax, BYTE PTR [ecx+edi] // orig pixel + imul esi, eax, 16383 // * Scale_Multiplier + // div 255 follows + // result in esi is int32 + // Div_4xint32_by_255 implementation from here! + mov eax, -2139062143 ; 80808081H + imul esi // signed! + add edx, esi + sar edx, 7 + mov eax, edx + shr eax, 31 ; 0000001fH + add eax, edx + mov WORD PTR [ebx+ecx*2], ax + */ + // edx_eax_64 = mulres_lo(esi) * magic255div(eax) + // _mm_mul_epu32: r64_0 := a0 * b0, r64_1 := a2 * b2 (edx_eax edx_eax) + // signed mul! + __m128i mulwithmagic02 = _mm_mul_epi32(esi, magic255div); // signed! need epi not epu! only sse4.1 + __m128i mulwithmagic13 = _mm_mul_epi32(_mm_srli_si128(esi, 4), magic255div); + // shuffle hi32bit of results to [63..0] and pack. a3->a1, a1->a0 + __m128i upper32bits_edx = _mm_unpacklo_epi32(_mm_shuffle_epi32(mulwithmagic02, _MM_SHUFFLE (0,0,3,1)), _mm_shuffle_epi32(mulwithmagic13, _MM_SHUFFLE (0,0,3,1))); + + // vvv lower 32 bit of result is never used in the algorithm + // shuffle lo32bit results to [63..0] and pack + // __m128i lower32bits_eax = _mm_unpacklo_epi32(_mm_shuffle_epi32(mulwithmagic02, _MM_SHUFFLE (0,0,2,0)), _mm_shuffle_epi32(mulwithmagic13, _MM_SHUFFLE (0,0,2,0))); + + // add edx, mulres_lo(esi) + __m128i tmp_edx = _mm_add_epi32(upper32bits_edx, esi); + // sar edx, 7 + // shift arithmetic + tmp_edx = _mm_srai_epi32(tmp_edx, 7); + // mov eax, edx + // shr eax, 31 ; 0000001fH + // shift logical + __m128i tmp_eax = _mm_srli_epi32(tmp_edx, 31); + // add eax, edx + __m128i result = _mm_add_epi32(tmp_eax, tmp_edx); + return result; + // 4 results in the lower 16 bits of 4x32 bit register +} +#endif + +template +static void convert_rgb_8_to_uint16_sse2(const BYTE *srcp8, BYTE *dstp8, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range) +{ + const uint8_t *srcp = reinterpret_cast(srcp8); + uint16_t *dstp = reinterpret_cast(dstp8); + + src_pitch = src_pitch / sizeof(uint8_t); + dst_pitch = dst_pitch / sizeof(uint16_t); + + int src_width = src_rowsize / sizeof(uint8_t); + int wmod16 = (src_width / 16) * 16; + + const int MUL = (targetbits == 16) ? 257 : ((1 << targetbits) - 1); + const int DIV = (targetbits == 16) ? 1 : 255; + // 16 bit: one mul only, no need for /255 + // for others: // *16383 *4095 *1023 and /255 + + __m128i zero = _mm_setzero_si128(); + __m128i multiplier = _mm_set1_epi16(MUL); + __m128i magic255div = _mm_set1_epi32(-2139062143); // 80808081H + __m128 multiplier_float = _mm_set1_ps((float)MUL / DIV); + // This is ok, since the default SIMD rounding mode is round-to-nearest unlike c++ truncate + // in C: 1023 * multiplier = 1022.999 -> truncates. + + for(int y=0; y(srcp + x)); // 16* uint8 + __m128i src_lo = _mm_unpacklo_epi8(src, zero); // 8* uint16 + __m128i src_hi = _mm_unpackhi_epi8(src, zero); // 8* uint16 + // test + if(targetbits==16) { + // *257 mullo is faster than x*257 = (x<<8 + x) add/or solution (i7) + __m128i res_lo = _mm_mullo_epi16(src_lo, multiplier); // lower 16 bit of multiplication is enough + __m128i res_hi = _mm_mullo_epi16(src_hi, multiplier); + // dstp[x] = srcp[x] * 257; // RGB: full range 0..255 <-> 0..65535 (257 = 65535 / 255) + _mm_store_si128(reinterpret_cast<__m128i*>(dstp+x), res_lo); + _mm_store_si128(reinterpret_cast<__m128i*>(dstp+x+8), res_hi); + } + else { +#if 0 + if(false) { + // dead end + // simulate integer tricky div 255 arithmetic. + // Unfortunately it's sse41 only plus much slower than float, but still much faster than C. Too much overhead + + // process 8*uint16_t + //-------------- + // first src_lo + + // imul esi, eax, 16383 + __m128i res_lower16bit = _mm_mullo_epi16(src_lo, multiplier); // *16383 *4095 *1023 result: int32. get lower 16 + __m128i res_upper16bit = _mm_mulhi_epi16(src_lo, multiplier); // *16383 *4095 *1023 result: int32. get upper 16 + __m128i mulres_lo = _mm_unpacklo_epi16(res_lower16bit, res_upper16bit); // 4 int32 + __m128i mulres_hi = _mm_unpackhi_epi16(res_lower16bit, res_upper16bit); // 4 int32 + + // process first 4 of 8 uint32_t (mulres_lo) + __m128i tmp_eax_lo = Div_4xint32_by_255(mulres_lo, magic255div); + // process second 4 of 8 uint32_t (mulres_hi) + __m128i tmp_eax_hi = Div_4xint32_by_255(mulres_hi, magic255div); + __m128i dst = _mm_packus_epi32(tmp_eax_lo, tmp_eax_hi); + _mm_store_si128(reinterpret_cast<__m128i*>(dstp+x), dst); + + //-------------- + // second src_hi + { + // imul esi, eax, 16383|4095|1023 + __m128i res_lower16bit = _mm_mullo_epi16(src_hi, multiplier); // *16383 *4095 *1023 result: int32. get lower 16 + __m128i res_upper16bit = _mm_mulhi_epi16(src_hi, multiplier); // *16383 *4095 *1023 result: int32. get upper 16 + __m128i mulres_lo = _mm_unpacklo_epi16(res_lower16bit, res_upper16bit); // 4 int32 + __m128i mulres_hi = _mm_unpackhi_epi16(res_lower16bit, res_upper16bit); // 4 int32 + + // process first 4 of 8 uint32_t (mulres_lo) + __m128i tmp_eax_lo = Div_4xint32_by_255(mulres_lo, magic255div); + // process second 4 of 8 uint32_t (mulres_hi) + __m128i tmp_eax_hi = Div_4xint32_by_255(mulres_hi, magic255div); + __m128i dst = _mm_packus_epi32(tmp_eax_lo, tmp_eax_hi); + _mm_store_si128(reinterpret_cast<__m128i*>(dstp+x+8), dst); + } + } + else +#endif + { + // src_lo: 8*uint16 + // convert to int32 then float, multiply and convert back + __m128 res_lo = _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi16(src_lo, zero)), multiplier_float); + __m128 res_hi = _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi16(src_lo, zero)), multiplier_float); + // Converts the four single-precision, floating-point values of a to signed 32-bit integer values. + __m128i result_l = _mm_cvtps_epi32(res_lo); // The default rounding mode is round-to-nearest unlike c++ truncate + __m128i result_h = _mm_cvtps_epi32(res_hi); + // Pack and store no need for packus for <= 14 bit + __m128i result = _mm_packs_epi32(result_l, result_h); // 4*32+4*32 = 8*16 + _mm_store_si128(reinterpret_cast<__m128i*>(dstp+x), result); + + // src_hi: 8*uint16 + // convert to int32 then float, multiply and convert back + res_lo = _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi16(src_hi, zero)), multiplier_float); + res_hi = _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi16(src_hi, zero)), multiplier_float); + // Converts the four single-precision, floating-point values of a to signed 32-bit integer values. + result_l = _mm_cvtps_epi32(res_lo); + result_h = _mm_cvtps_epi32(res_hi); + // Pack and store no need for packus for <= 14 bit + result = _mm_packs_epi32(result_l, result_h); // 4*32+4*32 = 8*16 + _mm_store_si128(reinterpret_cast<__m128i*>(dstp+x+8), result); + } + } // if 16 bit else + } // for x + // rest + for (int x = wmod16; x < src_width; x++) + { + dstp[x] = srcp[x] * MUL / DIV; // RGB: full range 0..255 <-> 0..16384-1 + } + dstp += dst_pitch; + srcp += src_pitch; + } // for y +} + + // YUV: bit shift 8 to 10-12-14-16 bits template static void convert_8_to_uint16_c(const BYTE *srcp, BYTE *dstp8, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range) @@ -1061,7 +1383,8 @@ static void convert_rgb_uint16_to_uint16_c(const BYTE *srcp, BYTE *dstp, int src } } -// YUV: bit shift 10-12-14-16 <=> 16 bits +// YUV: bit shift 10-12-14-16 <=> 10-12-14-16 bits +// shift right or left, depending on expandrange template param template static void convert_uint16_to_uint16_c(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range) { @@ -1126,6 +1449,7 @@ ConvertBits::ConvertBits(PClip _child, const float _float_range, const int _dith bool sse2 = !!(env->GetCPUFlags() & CPUF_SSE2); // frames are always 16 bit aligned BitDepthConvFuncPtr conv_function_full_scale; + BitDepthConvFuncPtr conv_function_full_scale_no_dither; BitDepthConvFuncPtr conv_function_shifted_scale; // ConvertToFloat @@ -1194,26 +1518,26 @@ ConvertBits::ConvertBits(PClip _child, const float _float_range, const int _dith switch (target_bitdepth) { case 10: - conv_function_full_scale = convert_rgb_8_to_uint16_c<10>; + conv_function_full_scale = sse2 ? convert_rgb_8_to_uint16_sse2<10> : convert_rgb_8_to_uint16_c<10>; conv_function_shifted_scale = sse2 ? convert_8_to_uint16_sse2<10> : convert_8_to_uint16_c<10>; break; case 12: - conv_function_full_scale = convert_rgb_8_to_uint16_c<12>; + conv_function_full_scale = sse2 ? convert_rgb_8_to_uint16_sse2<12> : convert_rgb_8_to_uint16_c<12>; conv_function_shifted_scale = sse2 ? convert_8_to_uint16_sse2<12> : convert_8_to_uint16_c<12>; break; case 14: - conv_function_full_scale = convert_rgb_8_to_uint16_c<14>; + conv_function_full_scale = sse2 ? convert_rgb_8_to_uint16_sse2<14> : convert_rgb_8_to_uint16_c<14>; conv_function_shifted_scale = sse2 ? convert_8_to_uint16_sse2<14> : convert_8_to_uint16_c<14>; break; case 16: - conv_function_full_scale = convert_rgb_8_to_uint16_c<16>; + conv_function_full_scale = sse2 ? convert_rgb_8_to_uint16_sse2<16> : convert_rgb_8_to_uint16_c<16>; conv_function_shifted_scale = sse2 ? convert_8_to_uint16_sse2<16> : convert_8_to_uint16_c<16>; break; default: env->ThrowError("ConvertTo16bit: unsupported bit depth"); } } else { - conv_function_full_scale = convert_rgb_8_to_uint16_c<16>; + conv_function_full_scale = sse2 ? convert_rgb_8_to_uint16_sse2<16> : convert_rgb_8_to_uint16_c<16>; conv_function_shifted_scale = sse2 ? convert_8_to_uint16_sse2<16> : convert_8_to_uint16_c<16>; } @@ -1390,42 +1714,63 @@ ConvertBits::ConvertBits(PClip _child, const float _float_range, const int _dith if (target_bitdepth == 8) { if (pixelsize == 2) // 16(,14,12,10)->8 bit { + // todo: it gets complicated, so we better using tuples for function lookup + // parameters for full scale: source bitdepth, dither_type (-1:none, 0:ordered), target_dither_bitdepth(now always 8), rgb_step(3 for RGB48, 4 for RGB64, 1 for all planars) + // rgb_step can differ from 1 only when source bits_per_pixel==16 and packed RGB type + // target_dither_bitdepth==8 (RFU for dithering down from e.g. 10->2 bit) + // fill conv_function_full_scale and conv_function_shifted_scale if (truerange) { switch (bits_per_pixel) { - case 10: conv_function_full_scale = convert_rgb_uint16_to_8_c<10>; - conv_function_shifted_scale = sse2 ? convert_uint16_to_8_sse2<10> : convert_uint16_to_8_c<10>; + case 10: + // no convert_rgb_uint16_to_8_c yet, choosing logic is left here for sample + conv_function_full_scale = (sse2 && dither_mode<0) ? convert_rgb_uint16_to_8_c<10, -1, 8, 1> : (dither_mode>=0 ? convert_rgb_uint16_to_8_c<10, 0, 8, 1> : convert_rgb_uint16_to_8_c<10, -1, 8, 1>); + conv_function_full_scale_no_dither = sse2 ? convert_rgb_uint16_to_8_c<10, -1, 8, 1> : convert_rgb_uint16_to_8_c<10, -1, 8, 1>; + conv_function_shifted_scale = (sse2 && dither_mode<0) ? convert_uint16_to_8_sse2<10> : (dither_mode>=0 ? convert_uint16_to_8_c<10, 0, 8> : convert_uint16_to_8_c<10, -1, 8>); break; - case 12: conv_function_full_scale = convert_rgb_uint16_to_8_c<12>; - conv_function_shifted_scale = sse2 ? convert_uint16_to_8_sse2<12> : convert_uint16_to_8_c<12>; + case 12: + conv_function_full_scale = (sse2 && dither_mode<0) ? convert_rgb_uint16_to_8_c<12, -1, 8, 1> : (dither_mode>=0 ? convert_rgb_uint16_to_8_c<12, 0, 8, 1> : convert_rgb_uint16_to_8_c<12, -1, 8, 1>); + conv_function_full_scale_no_dither = sse2 ? convert_rgb_uint16_to_8_c<12, -1, 8, 1> : convert_rgb_uint16_to_8_c<12, -1, 8, 1>; + conv_function_shifted_scale = (sse2 && dither_mode<0) ? convert_uint16_to_8_sse2<12> : (dither_mode>=0 ? convert_uint16_to_8_c<12, 0, 8> : convert_uint16_to_8_c<12, -1, 8>); break; - case 14: conv_function_full_scale = convert_rgb_uint16_to_8_c<14>; - conv_function_shifted_scale = sse2 ? convert_uint16_to_8_sse2<14> : convert_uint16_to_8_c<14>; + case 14: + conv_function_full_scale = (sse2 && dither_mode<0) ? convert_rgb_uint16_to_8_c<14, -1, 8, 1> : (dither_mode>=0 ? convert_rgb_uint16_to_8_c<14, 0, 8, 1> : convert_rgb_uint16_to_8_c<14, -1, 8, 1>); + conv_function_full_scale_no_dither = sse2 ? convert_rgb_uint16_to_8_c<14, -1, 8, 1> : convert_rgb_uint16_to_8_c<14, -1, 8, 1>; + conv_function_shifted_scale = (sse2 && dither_mode<0) ? convert_uint16_to_8_sse2<14> : (dither_mode>=0 ? convert_uint16_to_8_c<14, 0, 8> : convert_uint16_to_8_c<14, -1, 8>); break; - case 16: conv_function_full_scale = convert_rgb_uint16_to_8_c<16>; - conv_function_shifted_scale = sse2 ? convert_uint16_to_8_sse2<16> : convert_uint16_to_8_c<16>; + case 16: + conv_function_full_scale = (sse2 && dither_mode<0) ? convert_rgb_uint16_to_8_c<16, -1, 8, 1> : (dither_mode>=0 ? convert_rgb_uint16_to_8_c<16, 0, 8, 1> : convert_rgb_uint16_to_8_c<16, -1, 8, 1>); + conv_function_full_scale_no_dither = sse2 ? convert_rgb_uint16_to_8_c<16, -1, 8, 1> : convert_rgb_uint16_to_8_c<16, -1, 8, 1>; + conv_function_shifted_scale = (sse2 && dither_mode<0) ? convert_uint16_to_8_sse2<16> : (dither_mode>=0 ? convert_uint16_to_8_c<16, 0, 8> : convert_uint16_to_8_c<16, -1, 8>); break; default: env->ThrowError("ConvertTo8bit: invalid source bitdepth"); } } else { - conv_function_full_scale = convert_rgb_uint16_to_8_c<16>; - conv_function_shifted_scale = sse2 ? convert_uint16_to_8_sse2<16> : convert_uint16_to_8_c<16>; + if(vi.IsRGB48()) { // packed RGB: specify rgb_step for dither table access + conv_function_full_scale = (sse2 && dither_mode<0) ? convert_rgb_uint16_to_8_c<16, -1, 8, 3> : (dither_mode>=0 ? convert_rgb_uint16_to_8_c<16, 0, 8, 3> : convert_rgb_uint16_to_8_c<16, -1, 8, 3>); + } else if(vi.IsRGB64()) { + conv_function_full_scale = (sse2 && dither_mode<0) ? convert_rgb_uint16_to_8_c<16, -1, 8, 4> : (dither_mode>=0 ? convert_rgb_uint16_to_8_c<16, 0, 8, 4> : convert_rgb_uint16_to_8_c<16, -1, 8, 4>); + } else { + conv_function_full_scale = (sse2 && dither_mode<0) ? convert_rgb_uint16_to_8_c<16, -1, 8, 1> : (dither_mode>=0 ? convert_rgb_uint16_to_8_c<16, 0, 8, 1> : convert_rgb_uint16_to_8_c<16, -1, 8, 1>); + } + conv_function_full_scale_no_dither = sse2 ? convert_rgb_uint16_to_8_c<16, -1, 8, 1> : convert_rgb_uint16_to_8_c<16, -1, 8, 1>; + conv_function_shifted_scale = (sse2 && dither_mode<0) ? convert_uint16_to_8_sse2<16> : (dither_mode>=0 ? convert_uint16_to_8_c<16, 0, 8> : convert_uint16_to_8_c<16, -1, 8>); } - // for RGB scaling is not shift by 8 as in YUV but 0..65535->0..255 + // packed RGB scaling is full_scale 0..65535->0..255 if (vi.IsRGB48() || vi.IsRGB64()) { conv_function = conv_function_full_scale; // no separate alpha plane } else if (vi.IsPlanarRGB() || vi.IsPlanarRGBA()) { conv_function = conv_function_full_scale; - conv_function_a = conv_function_full_scale; + conv_function_a = conv_function_full_scale_no_dither; // don't dither alpha plane } else if (vi.IsYUV() || vi.IsYUVA()) { conv_function = conv_function_shifted_scale; - conv_function_a = conv_function_full_scale; + conv_function_a = conv_function_full_scale_no_dither; // don't dither alpha plane } else env->ThrowError("ConvertTo8bit: unsupported color space"); @@ -1467,8 +1812,8 @@ ConvertBits::ConvertBits(PClip _child, const float _float_range, const int _dith AVSValue __cdecl ConvertBits::Create(AVSValue args, void* user_data, IScriptEnvironment* env) { PClip clip = args[0].AsClip(); - //0 1 2 3 4 - //c[bits]i[truerange]b[dither]i[scale]f + //0 1 2 3 4 5 + //c[bits]i[truerange]b[dither]i[scale]f[dither_bits]i const VideoInfo &vi = clip->GetVideoInfo(); @@ -1489,6 +1834,7 @@ AVSValue __cdecl ConvertBits::Create(AVSValue args, void* user_data, IScriptEnvi int target_bitdepth = args[1].AsInt(create_param); // default comes by calling from old To8,To16,ToFloat functions int source_bitdepth = vi.BitsPerComponent(); int pixelsize = vi.ComponentSize(); + int dither_bitdepth = args[5].AsInt(target_bitdepth); // RFU if(target_bitdepth!=8 && target_bitdepth!=10 && target_bitdepth!=12 && target_bitdepth!=14 && target_bitdepth!=16 && target_bitdepth!=32) env->ThrowError("ConvertBits: invalid bit depth: %d", target_bitdepth); @@ -1505,8 +1851,22 @@ AVSValue __cdecl ConvertBits::Create(AVSValue args, void* user_data, IScriptEnvi env->ThrowError("ConvertBits: truerange specified for non-planar source"); } - // no change -> return unmodified - if((source_bitdepth == target_bitdepth)) // 10->10 .. 16->16 + int dither_type = args[3].AsInt(-1); + bool dither_defined = args[3].Defined(); + if(dither_defined && dither_type != 0 && dither_type != -1) + env->ThrowError("ConvertBits: invalid dither type parameter. Only -1 (disabled) or 0 (ordered dither) is allowed"); + + if(source_bitdepth - dither_bitdepth > 8) + env->ThrowError("ConvertBits: ditherbits cannot differ with more than 8 bits from source"); + + if(source_bitdepth < target_bitdepth && dither_defined) + env->ThrowError("ConvertBits: dithering is allowed only for scale down"); + + if(target_bitdepth!=8 && dither_defined) + env->ThrowError("ConvertBits: dithering is allowed only for 8 bit targets"); + + // no change -> return unmodified if no dithering required + if(source_bitdepth == target_bitdepth && dither_type < 0) // 10->10 .. 16->16 return clip; // YUY2 conversion is limited @@ -1540,9 +1900,6 @@ AVSValue __cdecl ConvertBits::Create(AVSValue args, void* user_data, IScriptEnvi if(float_range<=0.0) env->ThrowError("ConvertBits: Float range parameter cannot be <= 0"); - // dither parameter, rfu - int dither_type = args[3].AsInt(-1); - return new ConvertBits(clip, float_range, dither_type, target_bitdepth, assume_truerange, env); } diff --git a/avs_core/convert/convert_planar.h b/avs_core/convert/convert_planar.h index 487bc93c6..ce18dff37 100644 --- a/avs_core/convert/convert_planar.h +++ b/avs_core/convert/convert_planar.h @@ -217,6 +217,7 @@ class ConvertBits : public GenericVideoFilter int pixelsize; int bits_per_pixel; int target_bitdepth; + int dither_bitdepth; bool truerange; // if 16->10 range reducing or e.g. 14->16 bit range expansion needed bool format_change_only; }; From 547f536d28039c79bfb76d777701b83d9188cb8a Mon Sep 17 00:00:00 2001 From: Pinterf Date: Thu, 22 Sep 2016 17:44:43 +0200 Subject: [PATCH 084/120] ConvertBits: dither parameter check --- avs_core/convert/convert.cpp | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/avs_core/convert/convert.cpp b/avs_core/convert/convert.cpp index f59f84d45..e304506cb 100644 --- a/avs_core/convert/convert.cpp +++ b/avs_core/convert/convert.cpp @@ -1856,17 +1856,19 @@ AVSValue __cdecl ConvertBits::Create(AVSValue args, void* user_data, IScriptEnvi if(dither_defined && dither_type != 0 && dither_type != -1) env->ThrowError("ConvertBits: invalid dither type parameter. Only -1 (disabled) or 0 (ordered dither) is allowed"); - if(source_bitdepth - dither_bitdepth > 8) - env->ThrowError("ConvertBits: ditherbits cannot differ with more than 8 bits from source"); + if(dither_defined) { + if(source_bitdepth - dither_bitdepth > 8) + env->ThrowError("ConvertBits: ditherbits cannot differ with more than 8 bits from source"); - if(source_bitdepth < target_bitdepth && dither_defined) - env->ThrowError("ConvertBits: dithering is allowed only for scale down"); + if(source_bitdepth < target_bitdepth) + env->ThrowError("ConvertBits: dithering is allowed only for scale down"); - if(target_bitdepth!=8 && dither_defined) - env->ThrowError("ConvertBits: dithering is allowed only for 8 bit targets"); + if(target_bitdepth!=8) + env->ThrowError("ConvertBits: dithering is allowed only for 8 bit targets"); + } // no change -> return unmodified if no dithering required - if(source_bitdepth == target_bitdepth && dither_type < 0) // 10->10 .. 16->16 + if(source_bitdepth == target_bitdepth /*&& dither_type < 0*/) // 10->10 .. 16->16 return clip; // YUY2 conversion is limited From ea0655a08c1136534d87e5649d76ea95842ed42c Mon Sep 17 00:00:00 2001 From: Pinterf Date: Thu, 29 Sep 2016 11:24:06 +0200 Subject: [PATCH 085/120] TemporalSoften: SSE2 for SAD 16 bit, bits_per_pixel SAD scaling --- avs_core/filters/focus.cpp | 107 ++++++++++++++++++++++++++++++------- 1 file changed, 89 insertions(+), 18 deletions(-) diff --git a/avs_core/filters/focus.cpp b/avs_core/filters/focus.cpp index 902e33910..27cf06829 100644 --- a/avs_core/filters/focus.cpp +++ b/avs_core/filters/focus.cpp @@ -1455,9 +1455,9 @@ static void accumulate_line(BYTE* c_plane, const BYTE** planeP, int planes, size } -static int calculate_sad_sse2(const BYTE* cur_ptr, const BYTE* other_ptr, int cur_pitch, int other_pitch, size_t width, size_t height) +static int calculate_sad_sse2(const BYTE* cur_ptr, const BYTE* other_ptr, int cur_pitch, int other_pitch, size_t rowsize, size_t height) { - size_t mod16_width = width / 16 * 16; + size_t mod16_width = rowsize / 16 * 16; int result = 0; __m128i sum = _mm_setzero_si128(); for (size_t y = 0; y < height; ++y) { @@ -1467,8 +1467,8 @@ static int calculate_sad_sse2(const BYTE* cur_ptr, const BYTE* other_ptr, int cu __m128i sad = _mm_sad_epu8(cur, other); sum = _mm_add_epi32(sum, sad); } - if (mod16_width != width) { - for (size_t x = mod16_width; x < width; ++x) { + if (mod16_width != rowsize) { + for (size_t x = mod16_width; x < rowsize; ++x) { result += std::abs(cur_ptr[x] - other_ptr[x]); } } @@ -1481,10 +1481,71 @@ static int calculate_sad_sse2(const BYTE* cur_ptr, const BYTE* other_ptr, int cu return result; } +template +__int64 calculate_sad16_sse2(const BYTE* cur_ptr, const BYTE* other_ptr, int cur_pitch, int other_pitch, size_t rowsize, size_t height) +{ + size_t mod16_width = rowsize / 16 * 16; + + __m128i zero = _mm_setzero_si128(); + __int64 totalsum = 0; // fullframe SAD exceeds int32 at 8+ bit + + for ( size_t y = 0; y < height; y++ ) + { + __m128i sum = _mm_setzero_si128(); // for one row int is enough + for ( size_t x = 0; x < rowsize; x+=16 ) + { + __m128i src1, src2; + src1 = _mm_load_si128((__m128i *) (cur_ptr + x)); // 16 bytes or 8 words + src2 = _mm_load_si128((__m128i *) (other_ptr + x)); + if(sizeof(pixel_t) == 1) { + // this is uint_16 specific, but leave here for sample + sum = _mm_add_epi32(sum, _mm_sad_epu8(src1, src2)); // sum0_32, 0, sum1_32, 0 + } + else if (sizeof(pixel_t) == 2) { + __m128i greater_t = _mm_subs_epu16(src1, src2); // unsigned sub with saturation + __m128i smaller_t = _mm_subs_epu16(src2, src1); + __m128i absdiff = _mm_or_si128(greater_t, smaller_t); //abs(s1-s2) == (satsub(s1,s2) | satsub(s2,s1)) + // 8 x uint16 absolute differences + sum = _mm_add_epi32(sum, _mm_unpacklo_epi16(absdiff, zero)); + sum = _mm_add_epi32(sum, _mm_unpackhi_epi16(absdiff, zero)); + // sum0_32, sum1_32, sum2_32, sum3_32 + } + } + // summing up partial sums + if(sizeof(pixel_t) == 2) { + // at 16 bits: we have 4 integers for sum: a0 a1 a2 a3 + __m128i a0_a1 = _mm_unpacklo_epi32(sum, zero); // a0 0 a1 0 + __m128i a2_a3 = _mm_unpackhi_epi32(sum, zero); // a2 0 a3 0 + sum = _mm_add_epi32( a0_a1, a2_a3 ); // a0+a2, 0, a1+a3, 0 + /* SSSE3: told to be not too fast + sum = _mm_hadd_epi32(sum, zero); // A1+A2, B1+B2, 0+0, 0+0 + sum = _mm_hadd_epi32(sum, zero); // A1+A2+B1+B2, 0+0+0+0, 0+0+0+0, 0+0+0+0 + */ + } + // sum here: two 32 bit partial result: sum1 0 sum2 0 + __m128i sum_hi = _mm_unpackhi_epi64(sum, zero); + sum = _mm_add_epi32(sum, sum_hi); + int rowsum = _mm_cvtsi128_si32(sum); + + // rest + if (mod16_width != rowsize) { + for (size_t x = mod16_width / sizeof(pixel_t); x < rowsize / sizeof(pixel_t); ++x) { + rowsum += std::abs(reinterpret_cast(cur_ptr)[x] - reinterpret_cast(other_ptr)[x]); + } + } + + totalsum += rowsum; + + cur_ptr += cur_pitch; + other_ptr += other_pitch; + } + return totalsum; +} + #ifdef X86_32 -static int calculate_sad_isse(const BYTE* cur_ptr, const BYTE* other_ptr, int cur_pitch, int other_pitch, size_t width, size_t height) +static int calculate_sad_isse(const BYTE* cur_ptr, const BYTE* other_ptr, int cur_pitch, int other_pitch, size_t rowsize, size_t height) { - size_t mod8_width = width / 8 * 8; + size_t mod8_width = rowsize / 8 * 8; int result = 0; __m64 sum = _mm_setzero_si64(); for (size_t y = 0; y < height; ++y) { @@ -1494,8 +1555,8 @@ static int calculate_sad_isse(const BYTE* cur_ptr, const BYTE* other_ptr, int cu __m64 sad = _mm_sad_pu8(cur, other); sum = _mm_add_pi32(sum, sad); } - if (mod8_width != width) { - for (size_t x = mod8_width; x < width; ++x) { + if (mod8_width != rowsize) { + for (size_t x = mod8_width; x < rowsize; ++x) { result += std::abs(cur_ptr[x] - other_ptr[x]); } } @@ -1515,25 +1576,32 @@ static __int64 calculate_sad_c(const BYTE* cur_ptr, const BYTE* other_ptr, int c const pixel_t *ptr1 = reinterpret_cast(cur_ptr); const pixel_t *ptr2 = reinterpret_cast(other_ptr); size_t width = rowsize / sizeof(pixel_t); + cur_pitch /= sizeof(pixel_t); + other_pitch /= sizeof(pixel_t); - typedef typename std::conditional < std::is_floating_point::value, float, __int64>::type sum_t; + // for fullframe float may loose precision + typedef typename std::conditional < std::is_floating_point::value, double, __int64>::type sum_t; + // for one row int is enough and faster than int64 + typedef typename std::conditional < std::is_floating_point::value, float, int>::type sumrow_t; sum_t sum = 0; for (size_t y = 0; y < height; ++y) { + sumrow_t sumrow = 0; for (size_t x = 0; x < width; ++x) { - sum += std::abs(ptr1[x] - ptr2[x]); + sumrow += std::abs(ptr1[x] - ptr2[x]); } + sum += sumrow; ptr1 += cur_pitch / sizeof(pixel_t); ptr2 += other_pitch / sizeof(pixel_t); } if (std::is_floating_point::value) - return (__int64)(sum * 256); // float defaulting to 0..1 range + return (__int64)(sum * 256); // scale 0..1 based sum to 8 bit range else - return (__int64)sum; + return (__int64)sum; // for int, scaling to 8 bit range is done outside } // sum of byte-diffs. -static __int64 calculate_sad(const BYTE* cur_ptr, const BYTE* other_ptr, int cur_pitch, int other_pitch, size_t rowsize, size_t height, int pixelsize, IScriptEnvironment* env) { +static __int64 calculate_sad(const BYTE* cur_ptr, const BYTE* other_ptr, int cur_pitch, int other_pitch, size_t rowsize, size_t height, int pixelsize, int bits_per_pixel, IScriptEnvironment* env) { // todo: sse for 16bit/float if ((pixelsize == 1) && (env->GetCPUFlags() & CPUF_SSE2) && IsPtrAligned(cur_ptr, 16) && IsPtrAligned(other_ptr, 16) && rowsize >= 16) { return (__int64)calculate_sad_sse2(cur_ptr, other_ptr, cur_pitch, other_pitch, rowsize, height); @@ -1543,14 +1611,17 @@ static __int64 calculate_sad(const BYTE* cur_ptr, const BYTE* other_ptr, int cur return (__int64)calculate_sad_isse(cur_ptr, other_ptr, cur_pitch, other_pitch, rowsize, height); } #endif + // sse2 uint16_t + if ((pixelsize == 2) && (env->GetCPUFlags() & CPUF_SSE2) && IsPtrAligned(cur_ptr, 16) && IsPtrAligned(other_ptr, 16) && rowsize >= 16) { + return calculate_sad16_sse2(cur_ptr, other_ptr, cur_pitch, other_pitch, rowsize, height) >> (bits_per_pixel-8); + } + switch(pixelsize) { case 1: return calculate_sad_c(cur_ptr, other_ptr, cur_pitch, other_pitch, rowsize, height); - case 2: return calculate_sad_c(cur_ptr, other_ptr, cur_pitch, other_pitch, rowsize, height) / 256; + case 2: return calculate_sad_c(cur_ptr, other_ptr, cur_pitch, other_pitch, rowsize, height) >> (bits_per_pixel-8); // scale back to 8 bit range; default: // case 4 return calculate_sad_c(cur_ptr, other_ptr, cur_pitch, other_pitch, rowsize, height); } - - } PVideoFrame TemporalSoften::GetFrame(int n, IScriptEnvironment* env) @@ -1617,7 +1688,7 @@ PVideoFrame TemporalSoften::GetFrame(int n, IScriptEnvironment* env) bool skiprest = false; for (int i = radius-1; i>=0; i--) { // Check frames backwards if ((!skiprest) && (!planeDisabled[i])) { - int sad = (int)calculate_sad(c_plane, planeP[i], pitch, planePitch[i], frames[radius]->GetRowSize(planes[c]), h, pixelsize, env); + int sad = (int)calculate_sad(c_plane, planeP[i], pitch, planePitch[i], frames[radius]->GetRowSize(planes[c]), h, pixelsize, bits_per_pixel, env); if (sad < scenechange) { planePitch2[d2] = planePitch[i]; planeP2[d2++] = planeP[i]; @@ -1632,7 +1703,7 @@ PVideoFrame TemporalSoften::GetFrame(int n, IScriptEnvironment* env) skiprest = false; for (int i = radius; i < 2*radius; i++) { // Check forward frames if ((!skiprest) && (!planeDisabled[i])) { // Disable this frame on next plane (so that Y can affect UV) - int sad = (int)calculate_sad(c_plane, planeP[i], pitch, planePitch[i], frames[radius]->GetRowSize(planes[c]), h, pixelsize, env); + int sad = (int)calculate_sad(c_plane, planeP[i], pitch, planePitch[i], frames[radius]->GetRowSize(planes[c]), h, pixelsize, bits_per_pixel, env); if (sad < scenechange) { planePitch2[d2] = planePitch[i]; planeP2[d2++] = planeP[i]; From 36ef8a7f111ebed524638e29d32842e16f53acae Mon Sep 17 00:00:00 2001 From: Pinterf Date: Thu, 29 Sep 2016 11:28:55 +0200 Subject: [PATCH 086/120] No need "typename" in non-templated function (compiler compatibility) --- avs_core/filters/focus.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/avs_core/filters/focus.cpp b/avs_core/filters/focus.cpp index 27cf06829..148d1e8a9 100644 --- a/avs_core/filters/focus.cpp +++ b/avs_core/filters/focus.cpp @@ -835,7 +835,7 @@ static __forceinline void af_horizontal_yv12_process_line_uint16_c(uint16_t left typedef uint16_t pixel_t; pixel_t* dstp = reinterpret_cast(dstp8); const int max_pixel_value = (1 << bits_per_pixel) - 1; // clamping on 10-12-14-16 bitdepth - typedef typename std::conditional < sizeof(pixel_t) == 1, int, __int64>::type weight_t; // for calling the right ScaledPixelClip() + typedef std::conditional < sizeof(pixel_t) == 1, int, __int64>::type weight_t; // for calling the right ScaledPixelClip() size_t width = row_size / sizeof(pixel_t); for (x = 0; x < width-1; ++x) { pixel_t temp = (pixel_t)ScaledPixelClipEx((weight_t)(dstp[x] * (weight_t)center_weight + (left + dstp[x+1]) * (weight_t)outer_weight), max_pixel_value); From 133b9f001ba2ca89cc7a6878543168eec79af20c Mon Sep 17 00:00:00 2001 From: Pinterf Date: Fri, 30 Sep 2016 09:30:44 +0200 Subject: [PATCH 087/120] VDubFilter: really allow and convert double/long-type params --- plugins/VDubFilter/VDubFilter.cpp | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/plugins/VDubFilter/VDubFilter.cpp b/plugins/VDubFilter/VDubFilter.cpp index 3e6fe5176..ff3e52b0b 100644 --- a/plugins/VDubFilter/VDubFilter.cpp +++ b/plugins/VDubFilter/VDubFilter.cpp @@ -135,6 +135,7 @@ class CScriptValue { void operator=(char **s) { type = T_STR; u.s = s; } void operator=(__int64 l) { type = T_LONG; u.l = l; } // not used, only integer exists in avs void operator=(double d) { type = T_DOUBLE; u.d = d; } + void operator=(float f) { type = T_DOUBLE; u.d = (double)f; } }; class CScriptValueStringHelper { @@ -378,7 +379,7 @@ class FilterDefinitionList { class FilterStateInfo { public: - long lCurrentFrame; // current output frame + long lCurrentFrame; // current sequence frame (previously called output frame) long lMicrosecsPerFrame; // microseconds per output frame long lCurrentSourceFrame; // current source frame long lMicrosecsPerSrcFrame; // microseconds per source frame @@ -744,11 +745,11 @@ class VirtualdubFilterProxy : public GenericVideoFilter { void InvokeSyliaConfigFunction(FilterDefinition* fd, AVSValue args, IScriptEnvironment* env) { if (fd->script_obj && fd->script_obj->func_list && args.ArraySize() > 1) { for (ScriptFunctionDef* i = fd->script_obj->func_list; i->arg_list; i++) { - const char* p = i->arg_list; + const char* p = i->arg_list; // p: original virtualdub param list e.g. 0ddddddddd int j; for (j=1; jnext = NULL; } + const int MAX_PARAMS = 64; + char converted_paramlist[MAX_PARAMS+1]; + fm->env->AddFunction(fm->avisynth_function_name, "c", VirtualdubFilterProxy::Create, fdl); if (fd->script_obj && fd->script_obj->func_list) { for (ScriptFunctionDef* i = fd->script_obj->func_list; i->arg_list; i++) { - const char* params = fm->env->Sprintf("c%s%s", i->arg_list+1, strchr(i->arg_list+1, '.') ? "*" : ""); + // avisynth does not know 'd'ouble or 'l'ong + // let's fake them to 'f'loat and 'i'nt for avisynth + char *p_src = i->arg_list + 1; + char *p_target = converted_paramlist; + char ch; + while(ch = *p_src++ && (p_target-converted_paramlist)env->Sprintf("c%s%s", converted_paramlist, strchr(i->arg_list+1, '.') ? "*" : ""); + // put * if . found + fm->env->AddFunction(fm->avisynth_function_name, params, VirtualdubFilterProxy::Create, fdl); } } From b450628463dcf3f842cad3f1b621e45e96f767c3 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Sat, 1 Oct 2016 18:26:15 +0200 Subject: [PATCH 088/120] CPU feature constants for AVX2, FMA3, F16C (AES, MOVBE) + Info() --- avs_core/core/cpuid.cpp | 20 +++++++++++++++++--- avs_core/filters/text-overlay.cpp | 16 +++++++++++----- avs_core/include/avisynth_c.h | 16 ++++++++++++++-- avs_core/include/avs/cpuid.h | 14 ++++++++++++++ 4 files changed, 56 insertions(+), 10 deletions(-) diff --git a/avs_core/core/cpuid.cpp b/avs_core/core/cpuid.cpp index cf9e4554e..e683034e2 100644 --- a/avs_core/core/cpuid.cpp +++ b/avs_core/core/cpuid.cpp @@ -45,15 +45,29 @@ static int CPUCheckForExtensions() result |= CPUF_SSE4_1; if (IS_BIT_SET(cpuinfo[2], 20)) result |= CPUF_SSE4_2; - + if (IS_BIT_SET(cpuinfo[2], 12)) + result |= CPUF_FMA3; + if (IS_BIT_SET(cpuinfo[2], 22)) + result |= CPUF_MOVBE; + if (IS_BIT_SET(cpuinfo[2], 23)) + result |= CPUF_POPCNT; + if (IS_BIT_SET(cpuinfo[2], 25)) + result |= CPUF_AES; + if (IS_BIT_SET(cpuinfo[2], 29)) + result |= CPUF_F16C; // AVX #if (_MSC_FULL_VER >= 160040219) // We require VC++2010 SP1 at least bool xgetbv_supported = IS_BIT_SET(cpuinfo[2], 27); bool avx_supported = IS_BIT_SET(cpuinfo[2], 28); if (xgetbv_supported && avx_supported) { - if ((_xgetbv(_XCR_XFEATURE_ENABLED_MASK) & 0x6ull) == 0x6ull) - result |= CPUF_AVX; + if ((_xgetbv(_XCR_XFEATURE_ENABLED_MASK) & 0x6ull) == 0x6ull) { + result |= CPUF_AVX; + __cpuid(cpuinfo, 7); + if (IS_BIT_SET(cpuinfo[1], 5)) + result |= CPUF_AVX2; + } + } #endif diff --git a/avs_core/filters/text-overlay.cpp b/avs_core/filters/text-overlay.cpp index 1c40824d8..7eb5d4f70 100644 --- a/avs_core/filters/text-overlay.cpp +++ b/avs_core/filters/text-overlay.cpp @@ -1245,12 +1245,12 @@ std::string GetCpuMsg(IScriptEnvironment * env) int flags = env->GetCPUFlags(); std::stringstream ss; - if (flags & CPUF_FPU) - ss << "x87 "; + //if (flags & CPUF_FPU) + // ss << "x87 "; if (flags & CPUF_MMX) - ss << "MMX "; + ss << "MMX "; if (flags & CPUF_INTEGER_SSE) - ss << "ISSE "; + ss << "ISSE "; if (flags & CPUF_SSE4_2) ss << "SSE4.2 "; @@ -1261,13 +1261,19 @@ std::string GetCpuMsg(IScriptEnvironment * env) else if (flags & CPUF_SSE2) ss << "SSE2 "; else if (flags & CPUF_SSE) - ss << "SSE "; + ss << "SSE "; if (flags & CPUF_SSSE3) ss << "SSSE3 "; if (flags & CPUF_AVX) ss << "AVX "; + if (flags & CPUF_AVX2) + ss << "AVX2 "; + if (flags & CPUF_FMA3) + ss << "FMA3 "; + if (flags & CPUF_F16C) + ss << "F16C "; if (flags & CPUF_3DNOW_EXT) ss << "3DNOW_EXT"; diff --git a/avs_core/include/avisynth_c.h b/avs_core/include/avisynth_c.h index fca600350..c3de30d47 100644 --- a/avs_core/include/avisynth_c.h +++ b/avs_core/include/avisynth_c.h @@ -751,8 +751,20 @@ enum { AVS_CPUF_SSE4_1 = 0x400, AVS_CPUF_AVX = 0x800, // Sandy Bridge, Bulldozer AVS_CPUF_SSE4_2 = 0x1000, // Nehalem -//AVS_CPUF_AVX2 = 0x2000, // Haswell -//AVS_CPUF_AVX512 = 0x4000, // Knights Landing + // AVS+ + AVS_CPUF_AVX2 = 0x2000, // Haswell + AVS_CPUF_FMA3 = 0x4000, + AVS_CPUF_F16C = 0x8000, + AVS_CPUF_MOVBE = 0x10000, // Big Endian Move + AVS_CPUF_POPCNT = 0x20000, + AVS_CPUF_AES = 0x40000, + // AVS_CPUF_AVX512F = 0x80000, // AVX-512 Foundation. + // AVS_CPUF_AVX512DQ = 0x100000, // AVX-512 DQ (Double/Quad granular) Instructions + // AVS_CPUF_AVX512PF = 0x200000, // AVX-512 Prefetch + // AVS_CPUF_AVX512ER = 0x400000, // AVX-512 Exponential and Reciprocal + // AVS_CPUF_AVX512CD = 0x800000, // AVX-512 Conflict Detection + // AVS_CPUF_AVX512BW = 0x1000000, // AVX-512 BW (Byte/Word granular) Instructions + // AVS_CPUF_AVX512VL = 0x2000000, // AVX-512 VL (128/256 Vector Length) Extensions }; diff --git a/avs_core/include/avs/cpuid.h b/avs_core/include/avs/cpuid.h index e2915ef8a..a1ff5a95a 100644 --- a/avs_core/include/avs/cpuid.h +++ b/avs_core/include/avs/cpuid.h @@ -51,6 +51,20 @@ enum { CPUF_SSE4_1 = 0x400, // Penryn, Wolfdale, Yorkfield CPUF_AVX = 0x800, // Sandy Bridge, Bulldozer CPUF_SSE4_2 = 0x1000, // Nehalem + // AVS+ + CPUF_AVX2 = 0x2000, // Haswell + CPUF_FMA3 = 0x4000, + CPUF_F16C = 0x8000, + CPUF_MOVBE = 0x10000, // Big Endian move + CPUF_POPCNT = 0x20000, + CPUF_AES = 0x40000, + // CPUF_AVX512F = 0x80000, // AVX-512 Foundation. + // CPUF_AVX512DQ = 0x100000, // AVX-512 DQ (Double/Quad granular) Instructions + // CPUF_AVX512PF = 0x200000, // AVX-512 Prefetch + // CPUF_AVX512ER = 0x400000, // AVX-512 Exponential and Reciprocal + // CPUF_AVX512CD = 0x800000, // AVX-512 Conflict Detection + // CPUF_AVX512BW = 0x1000000, // AVX-512 BW (Byte/Word granular) Instructions + // CPUF_AVX512VL = 0x2000000, // AVX-512 VL (128/256 Vector Length) Extensions }; #ifdef BUILDING_AVSCORE From 0023d711e55b266e80b797196dedd381220a3ae3 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Sat, 1 Oct 2016 18:42:11 +0200 Subject: [PATCH 089/120] Remove MSVC specific version checking from CPUCheckForExtensions --- avs_core/core/cpuid.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/avs_core/core/cpuid.cpp b/avs_core/core/cpuid.cpp index e683034e2..0c4a0fcf8 100644 --- a/avs_core/core/cpuid.cpp +++ b/avs_core/core/cpuid.cpp @@ -56,7 +56,6 @@ static int CPUCheckForExtensions() if (IS_BIT_SET(cpuinfo[2], 29)) result |= CPUF_F16C; // AVX -#if (_MSC_FULL_VER >= 160040219) // We require VC++2010 SP1 at least bool xgetbv_supported = IS_BIT_SET(cpuinfo[2], 27); bool avx_supported = IS_BIT_SET(cpuinfo[2], 28); if (xgetbv_supported && avx_supported) @@ -69,7 +68,6 @@ static int CPUCheckForExtensions() } } -#endif // 3DNow!, 3DNow!, and ISSE __cpuid(cpuinfo, 0x80000000); From 5dd0d8b35bda539acf3e4ca5bcf3853f9ff95575 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Sat, 1 Oct 2016 20:49:32 +0200 Subject: [PATCH 090/120] AddAlphaPlane/RemoveAlphaPlane + misc refactor on ->8bit conversion --- avs_core/convert/convert.cpp | 308 ++++++++++++++++++++++++++---- avs_core/convert/convert_planar.h | 32 ++++ 2 files changed, 306 insertions(+), 34 deletions(-) diff --git a/avs_core/convert/convert.cpp b/avs_core/convert/convert.cpp index e304506cb..35ae83347 100644 --- a/avs_core/convert/convert.cpp +++ b/avs_core/convert/convert.cpp @@ -43,6 +43,8 @@ #include #include #include +#include +#include @@ -74,6 +76,8 @@ extern const AVSFunction Convert_filters[] = { // matrix can be "rec601", { "ConvertTo16bit", BUILTIN_FUNC_PREFIX, "c[bits]i[truerange]b[dither]i[scale]f[dither_bits]i", ConvertBits::Create, (void *)16 }, { "ConvertToFloat", BUILTIN_FUNC_PREFIX, "c[bits]i[truerange]b[dither]i[scale]f[dither_bits]i", ConvertBits::Create, (void *)32 }, { "ConvertBits", BUILTIN_FUNC_PREFIX, "c[bits]i[truerange]b[dither]i[scale]f[dither_bits]i", ConvertBits::Create, (void *)0 }, + { "AddAlphaPlane", BUILTIN_FUNC_PREFIX, "c[mask]f", AddAlphaPlane::Create}, + { "RemoveAlphaPlane", BUILTIN_FUNC_PREFIX, "c", RemoveAlphaPlane::Create}, { 0 } }; @@ -1437,6 +1441,70 @@ static void convert_uintN_to_float_c(const BYTE *srcp, BYTE *dstp, int src_rowsi } } +BitDepthConvFuncPtr get_convert_to_8_function(bool full_scale, int source_bitdepth, int dither_mode, int dither_bitdepth, int rgb_step, int cpu) +{ + std::map, BitDepthConvFuncPtr> func_copy; + using std::make_tuple; + /* + conv_function_full_scale = (sse2 && dither_mode<0) ? convert_rgb_uint16_to_8_c<10, -1, 8, 1> : (dither_mode>=0 ? convert_rgb_uint16_to_8_c<10, 0, 8, 1> : convert_rgb_uint16_to_8_c<10, -1, 8, 1>); + conv_function_full_scale_no_dither = sse2 ? convert_rgb_uint16_to_8_c<10, -1, 8, 1> : convert_rgb_uint16_to_8_c<10, -1, 8, 1>; + conv_function_shifted_scale = (sse2 && dither_mode<0) ? convert_uint16_to_8_sse2<10> : (dither_mode>=0 ? convert_uint16_to_8_c<10, 0, 8> : convert_uint16_to_8_c<10, -1, 8>); + */ + const int DITHER_BITDEPTH = 8; // only 8 bit supported + + // full scale + + // no dither, C + func_copy[make_tuple(true, 10, -1, DITHER_BITDEPTH, 1, 0)] = convert_rgb_uint16_to_8_c<10, -1, DITHER_BITDEPTH, 1>; + func_copy[make_tuple(true, 12, -1, DITHER_BITDEPTH, 1, 0)] = convert_rgb_uint16_to_8_c<12, -1, DITHER_BITDEPTH, 1>; + func_copy[make_tuple(true, 14, -1, DITHER_BITDEPTH, 1, 0)] = convert_rgb_uint16_to_8_c<14, -1, DITHER_BITDEPTH, 1>; + func_copy[make_tuple(true, 16, -1, DITHER_BITDEPTH, 1, 0)] = convert_rgb_uint16_to_8_c<16, -1, DITHER_BITDEPTH, 1>; + // for RGB48 and RGB64 source + func_copy[make_tuple(true, 16, -1, DITHER_BITDEPTH, 3, 0)] = convert_rgb_uint16_to_8_c<16, -1, DITHER_BITDEPTH, 1>; // dither rgb_step param is n/a + func_copy[make_tuple(true, 16, -1, DITHER_BITDEPTH, 4, 0)] = convert_rgb_uint16_to_8_c<16, -1, DITHER_BITDEPTH, 1>; // dither rgb_step param is n/a + // full scale, no dither, SSE2 + /* no sse2 yet + func_copy[make_tuple(true, 10, -1, DITHER_BITDEPTH, 1, CPUF_SSE2)] = convert_rgb_uint16_to_8_sse2<10, -1, DITHER_BITDEPTH, 1>; + func_copy[make_tuple(true, 12, -1, DITHER_BITDEPTH, 1, CPUF_SSE2)] = convert_rgb_uint16_to_8_sse2<12, -1, DITHER_BITDEPTH, 1>; + func_copy[make_tuple(true, 14, -1, DITHER_BITDEPTH, 1, CPUF_SSE2)] = convert_rgb_uint16_to_8_sse2<14, -1, DITHER_BITDEPTH, 1>; + func_copy[make_tuple(true, 16, -1, DITHER_BITDEPTH, 1, CPUF_SSE2)] = convert_rgb_uint16_to_8_sse2<16, -1, DITHER_BITDEPTH, 1>; + */ + // for RGB48 and RGB64 source + func_copy[make_tuple(true, 16, -1, DITHER_BITDEPTH, 3, CPUF_SSE2)] = convert_rgb_uint16_to_8_c<16, -1, DITHER_BITDEPTH, 1>; // dither rgb_step param is n/a + func_copy[make_tuple(true, 16, -1, DITHER_BITDEPTH, 4, CPUF_SSE2)] = convert_rgb_uint16_to_8_c<16, -1, DITHER_BITDEPTH, 1>; // dither rgb_step param is n/a + // full scale, dither, C + func_copy[make_tuple(true, 10, 0, DITHER_BITDEPTH, 1, 0)] = convert_rgb_uint16_to_8_c<10, 0, DITHER_BITDEPTH, 1>; + func_copy[make_tuple(true, 12, 0, DITHER_BITDEPTH, 1, 0)] = convert_rgb_uint16_to_8_c<12, 0, DITHER_BITDEPTH, 1>; + func_copy[make_tuple(true, 14, 0, DITHER_BITDEPTH, 1, 0)] = convert_rgb_uint16_to_8_c<14, 0, DITHER_BITDEPTH, 1>; + func_copy[make_tuple(true, 16, 0, DITHER_BITDEPTH, 1, 0)] = convert_rgb_uint16_to_8_c<16, 0, DITHER_BITDEPTH, 1>; + // for RGB48 and RGB64 source + func_copy[make_tuple(true, 16, 0, DITHER_BITDEPTH, 3, 0)] = convert_rgb_uint16_to_8_c<16, 0, DITHER_BITDEPTH, 3>; // dither rgb_step param is filled + func_copy[make_tuple(true, 16, 0, DITHER_BITDEPTH, 4, 0)] = convert_rgb_uint16_to_8_c<16, 0, DITHER_BITDEPTH, 4>; // dither rgb_step param is filled + + // shifted scale + + // no dither, C + func_copy[make_tuple(false, 10, -1, DITHER_BITDEPTH, 1, 0)] = convert_uint16_to_8_c<10, -1, DITHER_BITDEPTH>; + func_copy[make_tuple(false, 12, -1, DITHER_BITDEPTH, 1, 0)] = convert_uint16_to_8_c<12, -1, DITHER_BITDEPTH>; + func_copy[make_tuple(false, 14, -1, DITHER_BITDEPTH, 1, 0)] = convert_uint16_to_8_c<14, -1, DITHER_BITDEPTH>; + func_copy[make_tuple(false, 16, -1, DITHER_BITDEPTH, 1, 0)] = convert_uint16_to_8_c<16, -1, DITHER_BITDEPTH>; + // no dither, SSE2 + func_copy[make_tuple(false, 10, -1, DITHER_BITDEPTH, 1, CPUF_SSE2)] = convert_uint16_to_8_sse2<10>; + func_copy[make_tuple(false, 12, -1, DITHER_BITDEPTH, 1, CPUF_SSE2)] = convert_uint16_to_8_sse2<12>; + func_copy[make_tuple(false, 14, -1, DITHER_BITDEPTH, 1, CPUF_SSE2)] = convert_uint16_to_8_sse2<14>; + func_copy[make_tuple(false, 16, -1, DITHER_BITDEPTH, 1, CPUF_SSE2)] = convert_uint16_to_8_sse2<16>; + // dither, C + func_copy[make_tuple(false, 10, 0, DITHER_BITDEPTH, 1, 0)] = convert_uint16_to_8_c<10, 0, DITHER_BITDEPTH>; + func_copy[make_tuple(false, 12, 0, DITHER_BITDEPTH, 1, 0)] = convert_uint16_to_8_c<12, 0, DITHER_BITDEPTH>; + func_copy[make_tuple(false, 14, 0, DITHER_BITDEPTH, 1, 0)] = convert_uint16_to_8_c<14, 0, DITHER_BITDEPTH>; + func_copy[make_tuple(false, 16, 0, DITHER_BITDEPTH, 1, 0)] = convert_uint16_to_8_c<16, 0, DITHER_BITDEPTH>; + + BitDepthConvFuncPtr result = func_copy[make_tuple(full_scale, source_bitdepth, dither_mode, dither_bitdepth, rgb_step, cpu)]; + if (result == nullptr) + result = func_copy[make_tuple(full_scale, source_bitdepth, dither_mode, dither_bitdepth, rgb_step, 0)]; // fallback to C + return result; +} + ConvertBits::ConvertBits(PClip _child, const float _float_range, const int _dither_mode, const int _target_bitdepth, bool _truerange, IScriptEnvironment* env) : GenericVideoFilter(_child), float_range(_float_range), dither_mode(_dither_mode), target_bitdepth(_target_bitdepth), truerange(_truerange) @@ -1721,42 +1789,21 @@ ConvertBits::ConvertBits(PClip _child, const float _float_range, const int _dith // fill conv_function_full_scale and conv_function_shifted_scale if (truerange) { - switch (bits_per_pixel) - { - case 10: - // no convert_rgb_uint16_to_8_c yet, choosing logic is left here for sample - conv_function_full_scale = (sse2 && dither_mode<0) ? convert_rgb_uint16_to_8_c<10, -1, 8, 1> : (dither_mode>=0 ? convert_rgb_uint16_to_8_c<10, 0, 8, 1> : convert_rgb_uint16_to_8_c<10, -1, 8, 1>); - conv_function_full_scale_no_dither = sse2 ? convert_rgb_uint16_to_8_c<10, -1, 8, 1> : convert_rgb_uint16_to_8_c<10, -1, 8, 1>; - conv_function_shifted_scale = (sse2 && dither_mode<0) ? convert_uint16_to_8_sse2<10> : (dither_mode>=0 ? convert_uint16_to_8_c<10, 0, 8> : convert_uint16_to_8_c<10, -1, 8>); - break; - case 12: - conv_function_full_scale = (sse2 && dither_mode<0) ? convert_rgb_uint16_to_8_c<12, -1, 8, 1> : (dither_mode>=0 ? convert_rgb_uint16_to_8_c<12, 0, 8, 1> : convert_rgb_uint16_to_8_c<12, -1, 8, 1>); - conv_function_full_scale_no_dither = sse2 ? convert_rgb_uint16_to_8_c<12, -1, 8, 1> : convert_rgb_uint16_to_8_c<12, -1, 8, 1>; - conv_function_shifted_scale = (sse2 && dither_mode<0) ? convert_uint16_to_8_sse2<12> : (dither_mode>=0 ? convert_uint16_to_8_c<12, 0, 8> : convert_uint16_to_8_c<12, -1, 8>); - break; - case 14: - conv_function_full_scale = (sse2 && dither_mode<0) ? convert_rgb_uint16_to_8_c<14, -1, 8, 1> : (dither_mode>=0 ? convert_rgb_uint16_to_8_c<14, 0, 8, 1> : convert_rgb_uint16_to_8_c<14, -1, 8, 1>); - conv_function_full_scale_no_dither = sse2 ? convert_rgb_uint16_to_8_c<14, -1, 8, 1> : convert_rgb_uint16_to_8_c<14, -1, 8, 1>; - conv_function_shifted_scale = (sse2 && dither_mode<0) ? convert_uint16_to_8_sse2<14> : (dither_mode>=0 ? convert_uint16_to_8_c<14, 0, 8> : convert_uint16_to_8_c<14, -1, 8>); - break; - case 16: - conv_function_full_scale = (sse2 && dither_mode<0) ? convert_rgb_uint16_to_8_c<16, -1, 8, 1> : (dither_mode>=0 ? convert_rgb_uint16_to_8_c<16, 0, 8, 1> : convert_rgb_uint16_to_8_c<16, -1, 8, 1>); - conv_function_full_scale_no_dither = sse2 ? convert_rgb_uint16_to_8_c<16, -1, 8, 1> : convert_rgb_uint16_to_8_c<16, -1, 8, 1>; - conv_function_shifted_scale = (sse2 && dither_mode<0) ? convert_uint16_to_8_sse2<16> : (dither_mode>=0 ? convert_uint16_to_8_c<16, 0, 8> : convert_uint16_to_8_c<16, -1, 8>); - break; - default: env->ThrowError("ConvertTo8bit: invalid source bitdepth"); - } + conv_function_full_scale = get_convert_to_8_function(true, bits_per_pixel, dither_mode, 8, 1, CPUF_SSE2); + conv_function_full_scale_no_dither = get_convert_to_8_function(true, bits_per_pixel, -1, 8, 1, CPUF_SSE2); // force dither_mode==-1 + conv_function_shifted_scale = get_convert_to_8_function(false, bits_per_pixel, dither_mode, 8, 1, CPUF_SSE2); } else { - if(vi.IsRGB48()) { // packed RGB: specify rgb_step for dither table access - conv_function_full_scale = (sse2 && dither_mode<0) ? convert_rgb_uint16_to_8_c<16, -1, 8, 3> : (dither_mode>=0 ? convert_rgb_uint16_to_8_c<16, 0, 8, 3> : convert_rgb_uint16_to_8_c<16, -1, 8, 3>); - } else if(vi.IsRGB64()) { - conv_function_full_scale = (sse2 && dither_mode<0) ? convert_rgb_uint16_to_8_c<16, -1, 8, 4> : (dither_mode>=0 ? convert_rgb_uint16_to_8_c<16, 0, 8, 4> : convert_rgb_uint16_to_8_c<16, -1, 8, 4>); - } else { - conv_function_full_scale = (sse2 && dither_mode<0) ? convert_rgb_uint16_to_8_c<16, -1, 8, 1> : (dither_mode>=0 ? convert_rgb_uint16_to_8_c<16, 0, 8, 1> : convert_rgb_uint16_to_8_c<16, -1, 8, 1>); - } - conv_function_full_scale_no_dither = sse2 ? convert_rgb_uint16_to_8_c<16, -1, 8, 1> : convert_rgb_uint16_to_8_c<16, -1, 8, 1>; - conv_function_shifted_scale = (sse2 && dither_mode<0) ? convert_uint16_to_8_sse2<16> : (dither_mode>=0 ? convert_uint16_to_8_c<16, 0, 8> : convert_uint16_to_8_c<16, -1, 8>); + conv_function_full_scale = get_convert_to_8_function(true, 16, dither_mode, 8, 1, CPUF_SSE2); + conv_function_full_scale_no_dither = get_convert_to_8_function(true, 16, -1, 8, 1, CPUF_SSE2); + conv_function_shifted_scale = get_convert_to_8_function(false, 16, dither_mode, 8, 1, CPUF_SSE2); + } + + // override for RGB48 and 64 (internal rgb_step may differ when dithering is used + if(vi.IsRGB48()) { // packed RGB: specify rgb_step for dither table access + conv_function_full_scale = get_convert_to_8_function(true, 16, dither_mode, 8, 3, CPUF_SSE2); + } else if(vi.IsRGB64()) { + conv_function_full_scale = get_convert_to_8_function(true, 16, dither_mode, 8, 4, CPUF_SSE2); } // packed RGB scaling is full_scale 0..65535->0..255 @@ -1942,3 +1989,196 @@ PVideoFrame __stdcall ConvertBits::GetFrame(int n, IScriptEnvironment* env) { } return dst; } + +AVSValue AddAlphaPlane::Create(AVSValue args, void*, IScriptEnvironment* env) +{ + bool isMaskDefined = args[1].Defined(); + // if mask is not defined and videoformat has Alpha then we return + const VideoInfo& vi = args[0].AsClip()->GetVideoInfo(); + if (!isMaskDefined && (vi.IsPlanarRGBA() || vi.IsYUVA() || vi.IsRGB32() || vi.IsRGB64())) + return args[0].AsClip(); + if (vi.IsRGB24()) { + AVSValue new_args[1] = { args[0].AsClip() }; + PClip child = env->Invoke("ConvertToRGB32", AVSValue(new_args, 1)).AsClip(); + return new AddAlphaPlane(child, (float)args[1].AsFloat(-1.0f), isMaskDefined, env); + } else if(vi.IsRGB48()) { + AVSValue new_args[1] = { args[0].AsClip() }; + PClip child = env->Invoke("ConvertToRGB64", AVSValue(new_args, 1)).AsClip(); + return new AddAlphaPlane(child, (float)args[1].AsFloat(-1.0f), isMaskDefined, env); + } + return new AddAlphaPlane(args[0].AsClip(), (float)args[1].AsFloat(-1.0f), isMaskDefined, env); +} + +AddAlphaPlane::AddAlphaPlane(PClip _child, float _mask_f, bool isMaskDefined, IScriptEnvironment* env) + : GenericVideoFilter(_child) +{ + if(vi.IsYUY2()) + env->ThrowError("AddAlphaPlane: YUY2 is not allowed"); + if(vi.IsY()) + env->ThrowError("AddAlphaPlane: greyscale source is not allowed"); + if(vi.IsYUV() && !vi.Is420() && !vi.Is422() && !vi.Is444()) // e.g. 410 + env->ThrowError("AddAlphaPlane: format not supported"); + if(!vi.IsYUV() && !vi.IsYUVA() && !vi.IsRGB()) + env->ThrowError("AddAlphaPlane: format not supported"); + + pixelsize = vi.ComponentSize(); + bits_per_pixel = vi.BitsPerComponent(); + + if (vi.IsYUV()) { + int pixel_type = vi.pixel_type; + if (vi.IsYV12()) + pixel_type = VideoInfo::CS_YV12; + int new_pixel_type = (pixel_type & ~VideoInfo::CS_YUV) | VideoInfo::CS_YUVA; + vi.pixel_type = new_pixel_type; + } else if(vi.IsPlanarRGB()) { + int pixel_type = vi.pixel_type; + int new_pixel_type = (pixel_type & ~VideoInfo::CS_RGB_TYPE) | VideoInfo::CS_RGBA_TYPE; + vi.pixel_type = new_pixel_type; + } + // RGB24 and RGB48 already converted to 32/64 + // RGB32, RGB64, YUVA and RGBA: no change + + // mask parameter. If none->max transparency + + int max_pixel_value = (1 << bits_per_pixel) - 1; + if(!isMaskDefined || _mask_f < 0) { + mask_f = 1.0f; + mask = max_pixel_value; + } + else { + mask_f = _mask_f; + if (mask_f < 0) mask_f = 0; + mask = (int)mask_f; + + mask = clamp(mask, 0, max_pixel_value); + mask_f = clamp(mask_f, 0.0f, 1.0f); + } +} + +PVideoFrame AddAlphaPlane::GetFrame(int n, IScriptEnvironment* env) +{ + PVideoFrame src = child->GetFrame(n, env); + PVideoFrame dst = env->NewVideoFrame(vi); + if(vi.IsPlanar()) + { + int planes_y[4] = { PLANAR_Y, PLANAR_U, PLANAR_V, PLANAR_A }; + int planes_r[4] = { PLANAR_G, PLANAR_B, PLANAR_R, PLANAR_A }; + int *planes = (vi.IsYUV() || vi.IsYUVA()) ? planes_y : planes_r; + // copy existing 3 planes + for (int p = 0; p < 3; ++p) { + const int plane = planes[p]; + env->BitBlt(dst->GetWritePtr(plane), dst->GetPitch(plane), src->GetReadPtr(plane), + src->GetPitch(plane), src->GetRowSize(plane), src->GetHeight(plane)); + } + } else { + // Packed RGB, already converted to RGB32 or RGB64 + env->BitBlt(dst->GetWritePtr(), dst->GetPitch(), src->GetReadPtr(), + src->GetPitch(), src->GetRowSize(), src->GetHeight()); + } + + if (vi.IsPlanarRGBA() || vi.IsYUVA()) { + const int dst_pitchA = dst->GetPitch(PLANAR_A); + BYTE* dstp_a = dst->GetWritePtr(PLANAR_A); + const int heightA = dst->GetHeight(PLANAR_A); + + switch (vi.ComponentSize()) + { + case 1: + fill_plane(dstp_a, heightA, dst_pitchA, mask); + break; + case 2: + fill_plane(dstp_a, heightA, dst_pitchA, mask); + break; + case 4: + fill_plane(dstp_a, heightA, dst_pitchA, mask_f); + break; + } + return dst; + } + // RGB32 and RGB64 + + BYTE* pf = dst->GetWritePtr(); + int pitch = dst->GetPitch(); + int rowsize = dst->GetRowSize(); + int height = dst->GetHeight(); + int width = vi.width; + + if(vi.IsRGB32()) { + for (int y = 0; y(pf)[x] = mask; + } + pf += pitch; + } + } + + return dst; +} + +AVSValue RemoveAlphaPlane::Create(AVSValue args, void*, IScriptEnvironment* env) +{ + // if videoformat has no Alpha then we return + const VideoInfo& vi = args[0].AsClip()->GetVideoInfo(); + if(vi.IsPlanar() && (vi.IsYUV() || vi.IsPlanarRGB())) // planar and no alpha + return args[0].AsClip(); + if(vi.IsRGB24() || vi.IsRGB48()) // packed RGB and no alpha + return args[0].AsClip(); + if (vi.IsRGB32()) { + AVSValue new_args[1] = { args[0].AsClip() }; + return env->Invoke("ConvertToRGB24", AVSValue(new_args, 1)).AsClip(); + } + if (vi.IsRGB64()) { + AVSValue new_args[1] = { args[0].AsClip() }; + return env->Invoke("ConvertToRGB48", AVSValue(new_args, 1)).AsClip(); + } + return new RemoveAlphaPlane(args[0].AsClip(), env); +} + +RemoveAlphaPlane::RemoveAlphaPlane(PClip _child, IScriptEnvironment* env) + : GenericVideoFilter(_child) +{ + if(vi.IsYUY2()) + env->ThrowError("RemoveAlphaPlane: YUY2 is not allowed"); + if(vi.IsY()) + env->ThrowError("RemoveAlphaPlane: greyscale source is not allowed"); + + if (vi.IsYUVA()) { + int pixel_type = vi.pixel_type; + int new_pixel_type = (pixel_type & ~VideoInfo::CS_YUVA) | VideoInfo::CS_YUV; + vi.pixel_type = new_pixel_type; + } else if(vi.IsPlanarRGBA()) { + int pixel_type = vi.pixel_type; + int new_pixel_type = (pixel_type & ~VideoInfo::CS_RGBA_TYPE) | VideoInfo::CS_RGB_TYPE; + vi.pixel_type = new_pixel_type; + } +} + +PVideoFrame RemoveAlphaPlane::GetFrame(int n, IScriptEnvironment* env) +{ + PVideoFrame src = child->GetFrame(n, env); + PVideoFrame dst = env->NewVideoFrame(vi); + if(vi.IsPlanar()) + { + int planes_y[4] = { PLANAR_Y, PLANAR_U, PLANAR_V, PLANAR_A }; + int planes_r[4] = { PLANAR_G, PLANAR_B, PLANAR_R, PLANAR_A }; + int *planes = (vi.IsYUV() || vi.IsYUVA()) ? planes_y : planes_r; + // copy 3 planes w/o alpha + for (int p = 0; p < 3; ++p) { + const int plane = planes[p]; + env->BitBlt(dst->GetWritePtr(plane), dst->GetPitch(plane), src->GetReadPtr(plane), + src->GetPitch(plane), src->GetRowSize(plane), src->GetHeight(plane)); + } + } + return dst; + // Packed RGB: already handled in ::Create through Invoke 32->24 or 64->48 conversion +} + diff --git a/avs_core/convert/convert_planar.h b/avs_core/convert/convert_planar.h index ce18dff37..04bac66c4 100644 --- a/avs_core/convert/convert_planar.h +++ b/avs_core/convert/convert_planar.h @@ -222,4 +222,36 @@ class ConvertBits : public GenericVideoFilter bool format_change_only; }; +class AddAlphaPlane : public GenericVideoFilter +{ +public: + AddAlphaPlane(PClip _child, float _mask_f, bool isMaskDefined, IScriptEnvironment* env); + PVideoFrame __stdcall GetFrame(int n,IScriptEnvironment* env); + + int __stdcall SetCacheHints(int cachehints, int frame_range) override { + return cachehints == CACHE_GET_MTMODE ? MT_NICE_FILTER : 0; + } + + static AVSValue __cdecl Create(AVSValue args, void*, IScriptEnvironment* env); +private: + int mask; + float mask_f; + int pixelsize; + int bits_per_pixel; +}; + +class RemoveAlphaPlane : public GenericVideoFilter +{ +public: + RemoveAlphaPlane(PClip _child, IScriptEnvironment* env); + PVideoFrame __stdcall GetFrame(int n,IScriptEnvironment* env); + + int __stdcall SetCacheHints(int cachehints, int frame_range) override { + return cachehints == CACHE_GET_MTMODE ? MT_NICE_FILTER : 0; + } + + static AVSValue __cdecl Create(AVSValue args, void*, IScriptEnvironment* env); +private: +}; + #endif From 2dba1640ac5a16bec92d1e6d0c0804af514a1032 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Wed, 5 Oct 2016 15:38:33 +0200 Subject: [PATCH 091/120] Avisynth.h: auto fallback to avs2.6 when avs+ VideoInfo:: member function does not exist --- avs_core/include/avisynth.h | 147 +++++++++++++++++++++--------------- 1 file changed, 88 insertions(+), 59 deletions(-) diff --git a/avs_core/include/avisynth.h b/avs_core/include/avisynth.h index 13e613be1..c0df07697 100644 --- a/avs_core/include/avisynth.h +++ b/avs_core/include/avisynth.h @@ -1,6 +1,9 @@ // Avisynth v2.5. Copyright 2002 Ben Rudiak-Gould et al. // Avisynth v2.6. Copyright 2006 Klaus Post. // Avisynth v2.6. Copyright 2009 Ian Brabham. +// Avisynth+ project +// Last modification date: 20161005 + // http://www.avisynth.org // This program is free software; you can redistribute it and/or modify @@ -320,6 +323,9 @@ struct AVS_Linkage { # define AVS_BakedCode(arg) ; # define AVS_LinkCall(arg) # define AVS_LinkCallV(arg) +# define AVS_LinkCallOpt(arg, argOpt) AVSLinkCall(arg) +# define AVS_LinkCallOptDefault(arg, argDefaultValue) AVSLinkCall(arg()) +# define CALL_MEMBER_FN(object,ptrToMember) #else /* Macro resolution for code inside user plugin */ @@ -336,6 +342,14 @@ extern const AVS_Linkage* AVS_linkage; # define AVS_BakedCode(arg) { arg ; } # define AVS_LinkCall(arg) !AVS_linkage || offsetof(AVS_Linkage, arg) >= AVS_linkage->Size ? 0 : (this->*(AVS_linkage->arg)) # define AVS_LinkCallV(arg) !AVS_linkage || offsetof(AVS_Linkage, arg) >= AVS_linkage->Size ? *this : (this->*(AVS_linkage->arg)) +// Helper macros for fallback option when a function does not exists +#define CALL_MEMBER_FN(object,ptrToMember) ((object)->*(ptrToMember)) +#define AVS_LinkCallOpt(arg, argOpt) !AVS_linkage ? 0 : \ + ( offsetof(AVS_Linkage, arg) >= AVS_linkage->Size ? \ + (offsetof(AVS_Linkage, argOpt) >= AVS_linkage->Size ? 0 : CALL_MEMBER_FN(this, AVS_linkage->argOpt)() ) : \ + CALL_MEMBER_FN(this, AVS_linkage->arg)() ) +// AVS_LinkCallOptDefault puts automatically () only after arg +# define AVS_LinkCallOptDefault(arg, argDefaultValue) !AVS_linkage || offsetof(AVS_Linkage, arg) >= AVS_linkage->Size ? (argDefaultValue) : ((this->*(AVS_linkage->arg))()) #endif @@ -601,99 +615,111 @@ enum { }; // useful functions of the above - bool HasVideo() const AVS_BakedCode( return AVS_LinkCall(HasVideo)() ) - bool HasAudio() const AVS_BakedCode( return AVS_LinkCall(HasAudio)() ) - bool IsRGB() const AVS_BakedCode( return AVS_LinkCall(IsRGB)() ) - bool IsRGB24() const AVS_BakedCode( return AVS_LinkCall(IsRGB24)() ) - bool IsRGB32() const AVS_BakedCode( return AVS_LinkCall(IsRGB32)() ) - bool IsYUV() const AVS_BakedCode( return AVS_LinkCall(IsYUV)() ) - bool IsYUY2() const AVS_BakedCode( return AVS_LinkCall(IsYUY2)() ) - - bool IsYV24() const AVS_BakedCode( return AVS_LinkCall(IsYV24)() ) - bool IsYV16() const AVS_BakedCode( return AVS_LinkCall(IsYV16)() ) - bool IsYV12() const AVS_BakedCode( return AVS_LinkCall(IsYV12)() ) - bool IsYV411() const AVS_BakedCode( return AVS_LinkCall(IsYV411)() ) -//bool IsYUV9() const; - bool IsY8() const AVS_BakedCode( return AVS_LinkCall(IsY8)() ) - - bool IsColorSpace(int c_space) const AVS_BakedCode( return AVS_LinkCall(IsColorSpace)(c_space) ) - - bool Is(int property) const AVS_BakedCode( return AVS_LinkCall(Is)(property) ) - bool IsPlanar() const AVS_BakedCode( return AVS_LinkCall(IsPlanar)() ) - bool IsFieldBased() const AVS_BakedCode( return AVS_LinkCall(IsFieldBased)() ) - bool IsParityKnown() const AVS_BakedCode( return AVS_LinkCall(IsParityKnown)() ) - bool IsBFF() const AVS_BakedCode( return AVS_LinkCall(IsBFF)() ) - bool IsTFF() const AVS_BakedCode( return AVS_LinkCall(IsTFF)() ) - - bool IsVPlaneFirst() const AVS_BakedCode( return AVS_LinkCall(IsVPlaneFirst)() ) // Don't use this + bool HasVideo() const AVS_BakedCode(return AVS_LinkCall(HasVideo)()) + bool HasAudio() const AVS_BakedCode(return AVS_LinkCall(HasAudio)()) + bool IsRGB() const AVS_BakedCode(return AVS_LinkCall(IsRGB)()) + bool IsRGB24() const AVS_BakedCode(return AVS_LinkCall(IsRGB24)()) + bool IsRGB32() const AVS_BakedCode(return AVS_LinkCall(IsRGB32)()) + bool IsYUV() const AVS_BakedCode(return AVS_LinkCall(IsYUV)()) + bool IsYUY2() const AVS_BakedCode(return AVS_LinkCall(IsYUY2)()) + + bool IsYV24() const AVS_BakedCode(return AVS_LinkCall(IsYV24)()) + bool IsYV16() const AVS_BakedCode(return AVS_LinkCall(IsYV16)()) + bool IsYV12() const AVS_BakedCode(return AVS_LinkCall(IsYV12)()) + bool IsYV411() const AVS_BakedCode(return AVS_LinkCall(IsYV411)()) + //bool IsYUV9() const; + bool IsY8() const AVS_BakedCode(return AVS_LinkCall(IsY8)()) + + bool IsColorSpace(int c_space) const AVS_BakedCode(return AVS_LinkCall(IsColorSpace)(c_space)) + + bool Is(int property) const AVS_BakedCode(return AVS_LinkCall(Is)(property)) + bool IsPlanar() const AVS_BakedCode(return AVS_LinkCall(IsPlanar)()) + bool IsFieldBased() const AVS_BakedCode(return AVS_LinkCall(IsFieldBased)()) + bool IsParityKnown() const AVS_BakedCode(return AVS_LinkCall(IsParityKnown)()) + bool IsBFF() const AVS_BakedCode(return AVS_LinkCall(IsBFF)()) + bool IsTFF() const AVS_BakedCode(return AVS_LinkCall(IsTFF)()) + + bool IsVPlaneFirst() const AVS_BakedCode(return AVS_LinkCall(IsVPlaneFirst)()) // Don't use this // Will not work on planar images, but will return only luma planes - int BytesFromPixels(int pixels) const AVS_BakedCode( return AVS_LinkCall(BytesFromPixels)(pixels) ) - int RowSize(int plane=0) const AVS_BakedCode( return AVS_LinkCall(RowSize)(plane) ) - int BMPSize() const AVS_BakedCode( return AVS_LinkCall(BMPSize)() ) - - __int64 AudioSamplesFromFrames(int frames) const AVS_BakedCode( return AVS_LinkCall(AudioSamplesFromFrames)(frames) ) - int FramesFromAudioSamples(__int64 samples) const AVS_BakedCode( return AVS_LinkCall(FramesFromAudioSamples)(samples) ) - __int64 AudioSamplesFromBytes(__int64 bytes) const AVS_BakedCode( return AVS_LinkCall(AudioSamplesFromBytes)(bytes) ) - __int64 BytesFromAudioSamples(__int64 samples) const AVS_BakedCode( return AVS_LinkCall(BytesFromAudioSamples)(samples) ) - int AudioChannels() const AVS_BakedCode( return AVS_LinkCall(AudioChannels)() ) - int SampleType() const AVS_BakedCode( return AVS_LinkCall(SampleType)() ) - bool IsSampleType(int testtype) const AVS_BakedCode( return AVS_LinkCall(IsSampleType)(testtype) ) - int SamplesPerSecond() const AVS_BakedCode( return AVS_LinkCall(SamplesPerSecond)() ) - int BytesPerAudioSample() const AVS_BakedCode( return AVS_LinkCall(BytesPerAudioSample)() ) - void SetFieldBased(bool isfieldbased) AVS_BakedCode( AVS_LinkCall(SetFieldBased)(isfieldbased) ) - void Set(int property) AVS_BakedCode( AVS_LinkCall(Set)(property) ) - void Clear(int property) AVS_BakedCode( AVS_LinkCall(Clear)(property) ) + int BytesFromPixels(int pixels) const AVS_BakedCode(return AVS_LinkCall(BytesFromPixels)(pixels)) + int RowSize(int plane = 0) const AVS_BakedCode(return AVS_LinkCall(RowSize)(plane)) + int BMPSize() const AVS_BakedCode(return AVS_LinkCall(BMPSize)()) + + __int64 AudioSamplesFromFrames(int frames) const AVS_BakedCode(return AVS_LinkCall(AudioSamplesFromFrames)(frames)) + int FramesFromAudioSamples(__int64 samples) const AVS_BakedCode(return AVS_LinkCall(FramesFromAudioSamples)(samples)) + __int64 AudioSamplesFromBytes(__int64 bytes) const AVS_BakedCode(return AVS_LinkCall(AudioSamplesFromBytes)(bytes)) + __int64 BytesFromAudioSamples(__int64 samples) const AVS_BakedCode(return AVS_LinkCall(BytesFromAudioSamples)(samples)) + int AudioChannels() const AVS_BakedCode(return AVS_LinkCall(AudioChannels)()) + int SampleType() const AVS_BakedCode(return AVS_LinkCall(SampleType)()) + bool IsSampleType(int testtype) const AVS_BakedCode(return AVS_LinkCall(IsSampleType)(testtype)) + int SamplesPerSecond() const AVS_BakedCode(return AVS_LinkCall(SamplesPerSecond)()) + int BytesPerAudioSample() const AVS_BakedCode(return AVS_LinkCall(BytesPerAudioSample)()) + void SetFieldBased(bool isfieldbased) AVS_BakedCode(AVS_LinkCall(SetFieldBased)(isfieldbased)) + void Set(int property) AVS_BakedCode(AVS_LinkCall(Set)(property)) + void Clear(int property) AVS_BakedCode(AVS_LinkCall(Clear)(property)) // Subsampling in bitshifts! - int GetPlaneWidthSubsampling(int plane) const AVS_BakedCode( return AVS_LinkCall(GetPlaneWidthSubsampling)(plane) ) - int GetPlaneHeightSubsampling(int plane) const AVS_BakedCode( return AVS_LinkCall(GetPlaneHeightSubsampling)(plane) ) - int BitsPerPixel() const AVS_BakedCode( return AVS_LinkCall(BitsPerPixel)() ) + int GetPlaneWidthSubsampling(int plane) const AVS_BakedCode(return AVS_LinkCall(GetPlaneWidthSubsampling)(plane)) + int GetPlaneHeightSubsampling(int plane) const AVS_BakedCode(return AVS_LinkCall(GetPlaneHeightSubsampling)(plane)) + int BitsPerPixel() const AVS_BakedCode(return AVS_LinkCall(BitsPerPixel)()) - int BytesPerChannelSample() const AVS_BakedCode( return AVS_LinkCall(BytesPerChannelSample)() ) + int BytesPerChannelSample() const AVS_BakedCode(return AVS_LinkCall(BytesPerChannelSample)()) // useful mutator - void SetFPS(unsigned numerator, unsigned denominator) AVS_BakedCode( AVS_LinkCall(SetFPS)(numerator, denominator) ) + void SetFPS(unsigned numerator, unsigned denominator) AVS_BakedCode(AVS_LinkCall(SetFPS)(numerator, denominator)) // Range protected multiply-divide of FPS - void MulDivFPS(unsigned multiplier, unsigned divisor) AVS_BakedCode( AVS_LinkCall(MulDivFPS)(multiplier, divisor) ) + void MulDivFPS(unsigned multiplier, unsigned divisor) AVS_BakedCode(AVS_LinkCall(MulDivFPS)(multiplier, divisor)) // Test for same colorspace bool IsSameColorspace(const VideoInfo& vi) const AVS_BakedCode(return AVS_LinkCall(IsSameColorspace)(vi)) + // AVS+ extensions + // 20161005: + // Mapping of AVS+ extensions to classic 2.6 functions. + // In order to use these extended AVS+ functions for plugins that should work + // either with AVS+ or with Classic (8 bit) Avs 2.6 ans earlier AVS+ versions, there is an + // automatic fallback mechanism. + // From AVS+'s point of view these are not "baked" codes, the primary functions should exist. + // Examples: + // Is444() is mapped to IsYV24() for classic AVS2.6 + // ComponentSize() returns constant 1 (1 bytes per pixel component) + // BitsPerComponent() returns constant 8 (Classic AVS2.6 is 8 bit only) + // Returns the number of color channels or planes in a frame - int NumComponents() const AVS_BakedCode(return AVS_LinkCall(NumComponents)()) + int NumComponents() const AVS_BakedCode(return AVS_LinkCallOptDefault(NumComponents, (((AVS_LinkCall(IsYUV)()) && !(AVS_LinkCall(IsY8)())) ? 3 : AVS_LinkCall(BytesFromPixels)(1)) ) ) // Returns the size in bytes of a single component of a pixel - int ComponentSize() const AVS_BakedCode(return AVS_LinkCall(ComponentSize)()) + int ComponentSize() const AVS_BakedCode(return AVS_LinkCallOptDefault(ComponentSize, 1)) // Returns the bit depth of a single component of a pixel - int BitsPerComponent() const AVS_BakedCode(return AVS_LinkCall(BitsPerComponent)()) + int BitsPerComponent() const AVS_BakedCode(return AVS_LinkCallOptDefault(BitsPerComponent, 8)) // like IsYV24, but bit-depth independent also for YUVA - bool Is444() const AVS_BakedCode( return AVS_LinkCall(Is444)() ) + bool Is444() const AVS_BakedCode(return AVS_LinkCallOpt(Is444, IsYV24) ) // like IsYV16, but bit-depth independent also for YUVA - bool Is422() const AVS_BakedCode( return AVS_LinkCall(Is422)() ) + bool Is422() const AVS_BakedCode(return AVS_LinkCallOpt(Is422, IsYV16) ) // like IsYV12, but bit-depth independent also for YUVA - bool Is420() const AVS_BakedCode( return AVS_LinkCall(Is420)() ) + bool Is420() const AVS_BakedCode( return AVS_LinkCallOpt(Is420, IsYV12) ) // like IsY8, but bit-depth independent - bool IsY() const AVS_BakedCode( return AVS_LinkCall(IsY)() ) + bool IsY() const AVS_BakedCode( return AVS_LinkCallOpt(IsY, IsY8) ) // like IsRGB24 for 16 bit samples - bool IsRGB48() const AVS_BakedCode( return AVS_LinkCall(IsRGB48)() ) + bool IsRGB48() const AVS_BakedCode( return AVS_LinkCallOptDefault(IsRGB48, false) ) // like IsRGB32 for 16 bit samples - bool IsRGB64() const AVS_BakedCode( return AVS_LinkCall(IsRGB64)() ) + bool IsRGB64() const AVS_BakedCode( return AVS_LinkCallOptDefault(IsRGB64, false) ) // YUVA? - bool IsYUVA() const AVS_BakedCode( return AVS_LinkCall(IsYUVA)() ) + bool IsYUVA() const AVS_BakedCode( return AVS_LinkCallOptDefault(IsYUVA, false) ) // Planar RGB? - bool IsPlanarRGB() const AVS_BakedCode( return AVS_LinkCall(IsPlanarRGB)() ) + bool IsPlanarRGB() const AVS_BakedCode( return AVS_LinkCallOptDefault(IsPlanarRGB, false) ) // Planar RGBA? - bool IsPlanarRGBA() const AVS_BakedCode( return AVS_LinkCall(IsPlanarRGBA)() ) + bool IsPlanarRGBA() const AVS_BakedCode( return AVS_LinkCallOptDefault(IsPlanarRGBA, false) ) }; // end struct VideoInfo @@ -1063,6 +1089,9 @@ class AVSValue { #endif }; // end class AVSValue +#undef CALL_MEMBER_FN +#undef AVS_LinkCallOptDefault +#undef AVS_LinkCallOpt #undef AVS_LinkCallV #undef AVS_LinkCall #undef AVS_BakedCode From c586ae0c4ed88c52b683c7c85b6511e4773a15d6 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Wed, 5 Oct 2016 15:40:29 +0200 Subject: [PATCH 092/120] YUV 10-16->10-16 bitdepth conversions to SSE2 --- avs_core/convert/convert.cpp | 100 ++++++++++++++++++++++++++++++++--- 1 file changed, 92 insertions(+), 8 deletions(-) diff --git a/avs_core/convert/convert.cpp b/avs_core/convert/convert.cpp index 35ae83347..7d2fb2f48 100644 --- a/avs_core/convert/convert.cpp +++ b/avs_core/convert/convert.cpp @@ -1414,6 +1414,51 @@ static void convert_uint16_to_uint16_c(const BYTE *srcp, BYTE *dstp, int src_row } } +template +static void convert_uint16_to_uint16_sse2(const BYTE *srcp8, BYTE *dstp8, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range) +{ + // remark: Compiler with SSE2 option generates the same effective code like this in C + // Drawback of SSE2: a future avx2 target gives more efficient code than inline SSE2 (256 bit registers) + const uint16_t *srcp = reinterpret_cast(srcp8); + src_pitch = src_pitch / sizeof(uint16_t); + uint16_t *dstp = reinterpret_cast(dstp8); + dst_pitch = dst_pitch / sizeof(uint16_t); + int src_width = src_rowsize / sizeof(uint16_t); + int wmod = (src_width / 16) * 16; + + __m128i shift = _mm_set_epi32(0,0,0,shiftbits); + + // no dithering, no range conversion, simply shift + for(int y=0; y(srcp + x)); // 8* uint16 + __m128i src_hi = _mm_load_si128(reinterpret_cast(srcp + x + 8)); // 8* uint16 + if(expandrange) { + src_lo = _mm_sll_epi16(src_lo, shift); + src_hi = _mm_sll_epi16(src_hi, shift); + } else { + src_lo = _mm_srl_epi16(src_lo, shift); + src_hi = _mm_srl_epi16(src_hi, shift); + } + _mm_store_si128(reinterpret_cast<__m128i*>(dstp+x), src_lo); + _mm_store_si128(reinterpret_cast<__m128i*>(dstp+x+8), src_hi); + } + // rest + for (int x = wmod; x < src_width; x++) + { + if(expandrange) + dstp[x] = srcp[x] << shiftbits; // expand range. No clamp before, source is assumed to have valid range + else + dstp[x] = srcp[x] >> shiftbits; // reduce range + } + dstp += dst_pitch; + srcp += src_pitch; + } +} + + // 8 bit to float, 16/14/12/10 bits to float template static void convert_uintN_to_float_c(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range) @@ -1426,7 +1471,8 @@ static void convert_uintN_to_float_c(const BYTE *srcp, BYTE *dstp, int src_rowsi int src_width = src_rowsize / sizeof(pixel_t); - float max_src_pixelvalue = (float)((1< 0..float_range @@ -1434,11 +1480,49 @@ static void convert_uintN_to_float_c(const BYTE *srcp, BYTE *dstp, int src_rowsi { for (int x = 0; x < src_width; x++) { - dstp0[x] = srcp0[x] / max_src_pixelvalue * float_range; // or lookup + dstp0[x] = srcp0[x] * factor; } dstp0 += dst_pitch; srcp0 += src_pitch; } + // seems we better stuck with C in the future on such a simple loops + // if we could put it in a separate file + // VS2015 AVX2 code for this: + // takes (8 uint16_t -> 8*float(256 bit) at a time) * unroll_by_2 + // then makes singles with unrolled_by_4 until it can, then do the rest. + /* + AVX2 by VS2015: (8*uint16->8*float)xUnrollBy2 + $LL7@convert_ui: + vpmovzxwd ymm0, XMMWORD PTR [esi+ecx*2] + vcvtdq2ps ymm0, ymm0 + vmulps ymm0, ymm0, ymm2 + vmovups YMMWORD PTR [edi+ecx*4], ymm0 + vpmovzxwd ymm0, XMMWORD PTR [esi+ecx*2+16] + vcvtdq2ps ymm0, ymm0 + vmulps ymm0, ymm0, ymm2 + vmovups YMMWORD PTR [edi+ecx*4+32], ymm0 + add ecx, 16 ; 00000010H + cmp ecx, ebx + jl SHORT $LL7@convert_ui + + SSE2 by VS2015 (4*uint16->4*float)xUnrollBy2 + $LL7@convert_ui: + movq xmm1, QWORD PTR [ebp+ecx*2] + xorps xmm0, xmm0 + punpcklwd xmm1, xmm0 + cvtdq2ps xmm0, xmm1 + mulps xmm0, xmm3 + movups XMMWORD PTR [ebx+ecx*4], xmm0 + movq xmm1, QWORD PTR [ebp+ecx*2+8] + xorps xmm0, xmm0 + punpcklwd xmm1, xmm0 + cvtdq2ps xmm0, xmm1 + mulps xmm0, xmm3 + movups XMMWORD PTR [ebx+ecx*4+16], xmm0 + add ecx, 8 + cmp ecx, esi + jl SHORT $LL7@convert_ui + */ } BitDepthConvFuncPtr get_convert_to_8_function(bool full_scale, int source_bitdepth, int dither_mode, int dither_bitdepth, int rgb_step, int cpu) @@ -1690,16 +1774,16 @@ ConvertBits::ConvertBits(PClip _child, const float _float_range, const int _dith if (bits_per_pixel > target_bitdepth) // reduce range 16->14/12/10 14->12/10 12->10. template: bitshift switch (bits_per_pixel - target_bitdepth) { - case 2: conv_function_shifted_scale = convert_uint16_to_uint16_c; break; - case 4: conv_function_shifted_scale = convert_uint16_to_uint16_c; break; - case 6: conv_function_shifted_scale = convert_uint16_to_uint16_c; break; + case 2: conv_function_shifted_scale = sse2 ? convert_uint16_to_uint16_sse2 : convert_uint16_to_uint16_c; break; + case 4: conv_function_shifted_scale = sse2 ? convert_uint16_to_uint16_sse2 : convert_uint16_to_uint16_c; break; + case 6: conv_function_shifted_scale = sse2 ? convert_uint16_to_uint16_sse2 : convert_uint16_to_uint16_c; break; } else // expand range switch (target_bitdepth - bits_per_pixel) { - case 2: conv_function_shifted_scale = convert_uint16_to_uint16_c; break; - case 4: conv_function_shifted_scale = convert_uint16_to_uint16_c; break; - case 6: conv_function_shifted_scale = convert_uint16_to_uint16_c; break; + case 2: conv_function_shifted_scale = sse2 ? convert_uint16_to_uint16_sse2 : convert_uint16_to_uint16_c; break; + case 4: conv_function_shifted_scale = sse2 ? convert_uint16_to_uint16_sse2 : convert_uint16_to_uint16_c; break; + case 6: conv_function_shifted_scale = sse2 ? convert_uint16_to_uint16_sse2 : convert_uint16_to_uint16_c; break; } } else { From 8ab11f41f77ed96ba93ff0a7826d933b2b1ae00f Mon Sep 17 00:00:00 2001 From: Pinterf Date: Wed, 12 Oct 2016 16:19:02 +0200 Subject: [PATCH 093/120] Missing parenthesis when x64 build --- avs_core/filters/overlay/444convert.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/avs_core/filters/overlay/444convert.cpp b/avs_core/filters/overlay/444convert.cpp index 064e7d42f..2e8e7677b 100644 --- a/avs_core/filters/overlay/444convert.cpp +++ b/avs_core/filters/overlay/444convert.cpp @@ -580,7 +580,7 @@ void Convert444ToYV12(PVideoFrame &src, PVideoFrame &dst, int pixelsize, int bit convert_yv24_chroma_to_yv12_isse(dstU, srcU, dstUVpitch, srcUVpitch, w, h); convert_yv24_chroma_to_yv12_isse(dstV, srcV, dstUVpitch, srcUVpitch, w, h); } - else { + else #endif { if(pixelsize==1) { @@ -591,7 +591,7 @@ void Convert444ToYV12(PVideoFrame &src, PVideoFrame &dst, int pixelsize, int bit convert_yv24_chroma_to_yv12_c(dstU, srcU, dstUVpitch, srcUVpitch, w, h); convert_yv24_chroma_to_yv12_c(dstV, srcV, dstUVpitch, srcUVpitch, w, h); } - else // if (pixelsize == 4) + else { // if (pixelsize == 4) convert_yv24_chroma_to_yv12_c(dstU, srcU, dstUVpitch, srcUVpitch, w, h); convert_yv24_chroma_to_yv12_c(dstV, srcV, dstUVpitch, srcUVpitch, w, h); } From 409069c4d2cfa80060c2a223fe8f1a667bbac0f1 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Wed, 12 Oct 2016 16:19:29 +0200 Subject: [PATCH 094/120] Fix some warnings --- avs_core/filters/conditional/conditional_functions.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/avs_core/filters/conditional/conditional_functions.cpp b/avs_core/filters/conditional/conditional_functions.cpp index bfdb1008d..d24a3d175 100644 --- a/avs_core/filters/conditional/conditional_functions.cpp +++ b/avs_core/filters/conditional/conditional_functions.cpp @@ -491,7 +491,7 @@ AVSValue ComparePlane::CmpPlane(AVSValue clip, AVSValue clip2, void* user_data, double sad = 0.0; if (vi.IsRGB32() || vi.IsRGB64()) { if ((pixelsize==1) && sum_in_32bits && (env->GetCPUFlags() & CPUF_SSE2) && IsPtrAligned(srcp, 16) && IsPtrAligned(srcp2, 16) && width >= 16) { - sad = get_sad_rgb_sse2(srcp, srcp2, height, width, pitch, pitch2); + sad = (double)get_sad_rgb_sse2(srcp, srcp2, height, width, pitch, pitch2); } else #ifdef X86_32 if ((pixelsize==1) && sum_in_32bits && (env->GetCPUFlags() & CPUF_INTEGER_SSE) && width >= 8) { @@ -506,7 +506,7 @@ AVSValue ComparePlane::CmpPlane(AVSValue clip, AVSValue clip2, void* user_data, } } else { if ((pixelsize==1) && sum_in_32bits && (env->GetCPUFlags() & CPUF_SSE2) && IsPtrAligned(srcp, 16) && IsPtrAligned(srcp2, 16) && width >= 16) { - sad = get_sad_sse2(srcp, srcp2, height, width, pitch, pitch2); + sad = (double)get_sad_sse2(srcp, srcp2, height, width, pitch, pitch2); } else #ifdef X86_32 if ((pixelsize==1) && sum_in_32bits && (env->GetCPUFlags() & CPUF_INTEGER_SSE) && width >= 8) { @@ -584,7 +584,7 @@ AVSValue ComparePlane::CmpPlaneSame(AVSValue clip, void* user_data, int offset, double sad = 0; if (vi.IsRGB32() || vi.IsRGB64()) { if ((pixelsize==1) && sum_in_32bits && (env->GetCPUFlags() & CPUF_SSE2) && IsPtrAligned(srcp, 16) && IsPtrAligned(srcp2, 16) && width >= 16) { - sad = get_sad_rgb_sse2(srcp, srcp2, height, width, pitch, pitch2); + sad = (double)get_sad_rgb_sse2(srcp, srcp2, height, width, pitch, pitch2); } else #ifdef X86_32 if ((pixelsize==1) && sum_in_32bits && (env->GetCPUFlags() & CPUF_INTEGER_SSE) && width >= 8) { @@ -599,7 +599,7 @@ AVSValue ComparePlane::CmpPlaneSame(AVSValue clip, void* user_data, int offset, } } else { if ((pixelsize==1) && sum_in_32bits && (env->GetCPUFlags() & CPUF_SSE2) && IsPtrAligned(srcp, 16) && IsPtrAligned(srcp2, 16) && width >= 16) { - sad = get_sad_sse2(srcp, srcp2, height, width, pitch, pitch2); + sad = (double)get_sad_sse2(srcp, srcp2, height, width, pitch, pitch2); } else #ifdef X86_32 if ((pixelsize==1) && sum_in_32bits && (env->GetCPUFlags() & CPUF_INTEGER_SSE) && width >= 8) { From cdd15f64cdf4313acaef96f5740ff8459bf9d407 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Wed, 12 Oct 2016 16:20:07 +0200 Subject: [PATCH 095/120] Fix warning --- avs_core/filters/focus.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/avs_core/filters/focus.cpp b/avs_core/filters/focus.cpp index 148d1e8a9..e29af70bd 100644 --- a/avs_core/filters/focus.cpp +++ b/avs_core/filters/focus.cpp @@ -1250,7 +1250,7 @@ static void accumulate_line_c(BYTE* _c_plane, const BYTE** planeP, int planes, i if (std::is_floating_point::value) threshold = threshold / 256; // float else if (sizeof(pixel_t) == 2) - threshold = threshold * (1 << (bits_per_pixel - 8)); // uint16_t, 10 bit: *4 16bit: *256 + threshold = threshold * (uint16_t)(1 << (bits_per_pixel - 8)); // uint16_t, 10 bit: *4 16bit: *256 for (size_t x = offset; x < width; ++x) { pixel_t current = c_plane[x]; From 595744e24f0f645ca32313d07e8e188c18115992 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Wed, 12 Oct 2016 16:28:24 +0200 Subject: [PATCH 096/120] AVX code path for 16/32 bit resampler --- avs_core/filters/resample_avx.cpp | 516 ++++++++++++++++++++++++++++++ avs_core/filters/resample_avx.h | 48 +++ 2 files changed, 564 insertions(+) create mode 100644 avs_core/filters/resample_avx.cpp create mode 100644 avs_core/filters/resample_avx.h diff --git a/avs_core/filters/resample_avx.cpp b/avs_core/filters/resample_avx.cpp new file mode 100644 index 000000000..29778f565 --- /dev/null +++ b/avs_core/filters/resample_avx.cpp @@ -0,0 +1,516 @@ +// Avisynth v2.5. Copyright 2002 Ben Rudiak-Gould et al. +// http://www.avisynth.org + +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit +// http://www.gnu.org/copyleft/gpl.html . +// +// Linking Avisynth statically or dynamically with other modules is making a +// combined work based on Avisynth. Thus, the terms and conditions of the GNU +// General Public License cover the whole combination. +// +// As a special exception, the copyright holders of Avisynth give you +// permission to link Avisynth with independent modules that communicate with +// Avisynth solely through the interfaces defined in avisynth.h, regardless of the license +// terms of these independent modules, and to copy and distribute the +// resulting combined work under terms of your choice, provided that +// every copy of the combined work is accompanied by a complete copy of +// the source code of Avisynth (the version of Avisynth used to produce the +// combined work), being distributed under the terms of the GNU General +// Public License plus this exception. An independent module is a module +// which is not derived from or based on Avisynth, such as 3rd-party filters, +// import and export plugins, or graphical user interfaces. + +#include "resample.h" +#include +#include "../core/internal.h" + +#include "transform.h" +#include "turn.h" +#include +#include +#include "../convert/convert_planar.h" +#include "../convert/convert_yuy2.h" + +#include +// Intrinsics for SSE4.1, SSSE3, SSE3, SSE2, ISSE and MMX +#include +#include +#include +#include "resample_avx.h" + +template +void resizer_h_ssse3_as_avx_generic_int16_float(BYTE* dst8, const BYTE* src8, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int height, int bits_per_pixel) { + int filter_size = AlignNumber(program->filter_size, 8) / 8; + __m128i zero = _mm_setzero_si128(); + + const pixel_t *src = reinterpret_cast(src8); + pixel_t *dst = reinterpret_cast(dst8); + dst_pitch /= sizeof(pixel_t); + src_pitch /= sizeof(pixel_t); + + __m128 clamp_limit; + if (sizeof(pixel_t) == 2) + clamp_limit = _mm_set1_ps((float)(((int)1 << bits_per_pixel) - 1)); // clamp limit + + for (int y = 0; y < height; y++) { + float* current_coeff = program->pixel_coefficient_float; + for (int x = 0; x < width; x+=4) { + __m128 result1 = _mm_set1_ps(0.0f); + __m128 result2 = result1; + __m128 result3 = result1; + __m128 result4 = result1; + + int begin1 = program->pixel_offset[x+0]; + int begin2 = program->pixel_offset[x+1]; + int begin3 = program->pixel_offset[x+2]; + int begin4 = program->pixel_offset[x+3]; + + // begin1, result1 + for (int i = 0; i < filter_size; i++) { + __m128 data_l_single, data_h_single; + if(sizeof(pixel_t)==2) // word + { + // unaligned + __m128i src_p = _mm_loadu_si128(reinterpret_cast(src+begin1+i*8)); // uint16_t 8*16=128 8 pixels at a time + __m128i src_l = _mm_unpacklo_epi16(src_p, zero); // spread lower 4*uint16_t pixel value -> 4*32 bit + __m128i src_h = _mm_unpackhi_epi16(src_p, zero); // spread higher 4*uint16_t pixel value -> 4*32 bit + data_l_single = _mm_cvtepi32_ps (src_l); // Converts the four signed 32-bit integer values of a to single-precision, floating-point values. + data_h_single = _mm_cvtepi32_ps (src_h); + } + else { // float + // unaligned + data_l_single = _mm_loadu_ps(reinterpret_cast(src+begin1+i*8)); // float 4*32=128 4 pixels at a time + data_h_single = _mm_loadu_ps(reinterpret_cast(src+begin1+i*8+4)); // float 4*32=128 4 pixels at a time + } + __m128 coeff_l = /*loadps*/_mm_load_ps(reinterpret_cast(current_coeff)); // always aligned + __m128 coeff_h = /*loadps*/_mm_load_ps(reinterpret_cast(current_coeff+4)); // always aligned + __m128 dst_l = _mm_mul_ps(data_l_single, coeff_l); // Multiply by coefficient + __m128 dst_h = _mm_mul_ps(data_h_single, coeff_h); // 4*(32bit*32bit=32bit) + result1 = _mm_add_ps(result1, dst_l); // accumulate result. + result1 = _mm_add_ps(result1, dst_h); + + current_coeff += 8; + } + + // begin2, result2 + for (int i = 0; i < filter_size; i++) { + __m128 data_l_single, data_h_single; + if(sizeof(pixel_t)==2) // word + { + // unaligned + __m128i src_p = _mm_loadu_si128(reinterpret_cast(src+begin2+i*8)); // uint16_t 8*16=128 8 pixels at a time + __m128i src_l = _mm_unpacklo_epi16(src_p, zero); // spread lower 4*uint16_t pixel value -> 4*32 bit + __m128i src_h = _mm_unpackhi_epi16(src_p, zero); // spread higher 4*uint16_t pixel value -> 4*32 bit + data_l_single = _mm_cvtepi32_ps (src_l); // Converts the four signed 32-bit integer values of a to single-precision, floating-point values. + data_h_single = _mm_cvtepi32_ps (src_h); + } + else { // float + // unaligned + data_l_single = _mm_loadu_ps(reinterpret_cast(src+begin2+i*8)); // float 4*32=128 4 pixels at a time + data_h_single = _mm_loadu_ps(reinterpret_cast(src+begin2+i*8+4)); // float 4*32=128 4 pixels at a time + } + __m128 coeff_l = /*loadps*/_mm_load_ps(reinterpret_cast(current_coeff)); // always aligned + __m128 coeff_h = /*loadps*/_mm_load_ps(reinterpret_cast(current_coeff+4)); // always aligned + __m128 dst_l = _mm_mul_ps(data_l_single, coeff_l); // Multiply by coefficient + __m128 dst_h = _mm_mul_ps(data_h_single, coeff_h); // 4*(32bit*32bit=32bit) + result2 = _mm_add_ps(result2, dst_l); // accumulate result. + result2 = _mm_add_ps(result2, dst_h); + + current_coeff += 8; + } + + // begin3, result3 + for (int i = 0; i < filter_size; i++) { + __m128 data_l_single, data_h_single; + if(sizeof(pixel_t)==2) // word + { + // unaligned + __m128i src_p = _mm_loadu_si128(reinterpret_cast(src+begin3+i*8)); // uint16_t 8*16=128 8 pixels at a time + __m128i src_l = _mm_unpacklo_epi16(src_p, zero); // spread lower 4*uint16_t pixel value -> 4*32 bit + __m128i src_h = _mm_unpackhi_epi16(src_p, zero); // spread higher 4*uint16_t pixel value -> 4*32 bit + data_l_single = _mm_cvtepi32_ps (src_l); // Converts the four signed 32-bit integer values of a to single-precision, floating-point values. + data_h_single = _mm_cvtepi32_ps (src_h); + } + else { // float + // unaligned + data_l_single = _mm_loadu_ps(reinterpret_cast(src+begin3+i*8)); // float 4*32=128 4 pixels at a time + data_h_single = _mm_loadu_ps(reinterpret_cast(src+begin3+i*8+4)); // float 4*32=128 4 pixels at a time + } + __m128 coeff_l = /*loadps*/_mm_load_ps(reinterpret_cast(current_coeff)); // always aligned + __m128 coeff_h = /*loadps*/_mm_load_ps(reinterpret_cast(current_coeff+4)); // always aligned + __m128 dst_l = _mm_mul_ps(data_l_single, coeff_l); // Multiply by coefficient + __m128 dst_h = _mm_mul_ps(data_h_single, coeff_h); // 4*(32bit*32bit=32bit) + result3 = _mm_add_ps(result3, dst_l); // accumulate result. + result3 = _mm_add_ps(result3, dst_h); + + current_coeff += 8; + } + + // begin4, result4 + for (int i = 0; i < filter_size; i++) { + __m128 data_l_single, data_h_single; + if(sizeof(pixel_t)==2) // word + { + // unaligned + __m128i src_p = _mm_loadu_si128(reinterpret_cast(src+begin4+i*8)); // uint16_t 8*16=128 8 pixels at a time + __m128i src_l = _mm_unpacklo_epi16(src_p, zero); // spread lower 4*uint16_t pixel value -> 4*32 bit + __m128i src_h = _mm_unpackhi_epi16(src_p, zero); // spread higher 4*uint16_t pixel value -> 4*32 bit + data_l_single = _mm_cvtepi32_ps (src_l); // Converts the four signed 32-bit integer values of a to single-precision, floating-point values. + data_h_single = _mm_cvtepi32_ps (src_h); + } + else { // float + // unaligned + data_l_single = _mm_loadu_ps(reinterpret_cast(src+begin4+i*8)); // float 4*32=128 4 pixels at a time + data_h_single = _mm_loadu_ps(reinterpret_cast(src+begin4+i*8+4)); // float 4*32=128 4 pixels at a time + } + __m128 coeff_l = /*loadps*/_mm_load_ps(reinterpret_cast(current_coeff)); // always aligned + __m128 coeff_h = /*loadps*/_mm_load_ps(reinterpret_cast(current_coeff+4)); // always aligned + __m128 dst_l = _mm_mul_ps(data_l_single, coeff_l); // Multiply by coefficient + __m128 dst_h = _mm_mul_ps(data_h_single, coeff_h); // 4*(32bit*32bit=32bit) + result4 = _mm_add_ps(result4, dst_l); // accumulate result. + result4 = _mm_add_ps(result4, dst_h); + + current_coeff += 8; + } + + __m128 result; + + // this part needs ssse3 + __m128 result12 = _mm_hadd_ps(result1, result2); + __m128 result34 = _mm_hadd_ps(result3, result4); + result = _mm_hadd_ps(result12, result34); + + if (sizeof(pixel_t) == 2) + { + result = _mm_min_ps(result, clamp_limit); // mainly for 10-14 bit + // result = _mm_max_ps(result, zero); low limit through pack_us + } + + if(sizeof(pixel_t)==2) // word + { + // Converts the four single-precision, floating-point values of a to signed 32-bit integer values. + __m128i result_4x_int32 = _mm_cvtps_epi32(result); // 4 * 32 bit integers + // SIMD Extensions 4 (SSE4) packus or simulation + __m128i result_4x_uint16 = hasSSE41 ? _mm_packus_epi32(result_4x_int32, zero) : (_MM_PACKUS_EPI32(result_4x_int32, zero)) ; // 4*32+zeros = lower 4*16 OK + _mm_storel_epi64(reinterpret_cast<__m128i *>(dst + x), result_4x_uint16); + } else { // float + // aligned + _mm_store_ps(reinterpret_cast(dst+x), result); // 4 results at a time + } + + } + + dst += dst_pitch; + src += src_pitch; + } + _mm256_zeroupper(); +} + +template +void resizer_h_avx_generic_int16_float(BYTE* dst8, const BYTE* src8, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int height, int bits_per_pixel) { + + int filter_size = AlignNumber(program->filter_size, 8) / 8; + const __m128 zero = _mm_setzero_ps(); + const __m128i zero128 = _mm_setzero_si128(); + + const pixel_t *src = reinterpret_cast(src8); + pixel_t *dst = reinterpret_cast(dst8); + dst_pitch /= sizeof(pixel_t); + src_pitch /= sizeof(pixel_t); + + __m128 clamp_limit; + if (sizeof(pixel_t) == 2) + clamp_limit = _mm_set1_ps((float)(((int)1 << bits_per_pixel) - 1)); // clamp limit + + for (int y = 0; y < height; y++) { + float* current_coeff = program->pixel_coefficient_float; + +#if 0 + for (int x = 0; x < width; x+=4) { + __m256 result1 = _mm256_setzero_ps(); + __m256 result2 = result1; + __m256 result3 = result1; + __m256 result4 = result1; + __m256 coeff; + + int *begin = &program->pixel_offset[x]; // x+0..x+3 + + for(int a = 0; a<4; a++) + { + result4 = _mm256_setzero_ps(); + // begin1, result1 + for (int i = 0; i < filter_size; i++) { + __m256 data_single; + if(sizeof(pixel_t)==2) // word + { + // AVX2 _mm256_cvtepu16_epi32 + //__m256i src256 = _mm256_cvtepu16_epi32(_mm_loadu_si128(reinterpret_cast(src + begin1 + i * 8))); // 8*16->8*32 bits + __m128i src_p = _mm_loadu_si128(reinterpret_cast(src + *begin + i * 8)); // uint16_t 8*16=128 8 pixels at a time + __m128i src_l = _mm_unpacklo_epi16(src_p, zero128); // spread lower 4*uint16_t pixel value -> 4*32 bit + __m128i src_h = _mm_unpackhi_epi16(src_p, zero128); // spread higher 4*uint16_t pixel value -> 4*32 bit + __m256i src256 = _mm256_set_m128i(src_h, src_l); + data_single = _mm256_cvtepi32_ps (src256); // Converts the 8x signed 32-bit integer values of a to single-precision, floating-point values. + coeff = /*loadps*/_mm256_loadu_ps(reinterpret_cast(current_coeff)); // always aligned + } + else { // float + // unaligned + /* + data_single = _mm256_castsi256_ps( + _mm256_lddqu_si256(reinterpret_cast( + reinterpret_cast(src + *begin + i * 8))) + ); + */ + // float 8*32=256 8 pixels at a time +#ifndef __AVX2__ + data_single = _mm256_loadu_ps(reinterpret_cast(src + *begin + i * 8)); + // for AVX2 + coeff = _mm256_load_ps(reinterpret_cast(current_coeff)); // always aligned +#else + data_single = _mm256_loadu2_m128(reinterpret_cast(src+*begin+i*8+4), reinterpret_cast(src+*begin+i*8 )); // float 4*32=128 4 pixels at a time + //__m128i data_l_single = _mm_loadu_ps(reinterpret_cast(src+begin+i*8 )); // float 4*32=128 4 pixels at a time + //__m128i data_h_single = _mm_loadu_ps(reinterpret_cast(src+begin+i*8+4)); // float 4*32=128 4 pixels at a time + coeff = _mm256_load_ps(reinterpret_cast(current_coeff)); // always aligned + /* + __m128 coeff_l = _mm_load_ps(reinterpret_cast(current_coeff)); // always aligned + __m128 coeff_h = _mm_load_ps(reinterpret_cast(current_coeff+4)); // always aligned + coeff = _mm256_set_m128(coeff_h, coeff_l); + */ +#endif + } + + //__m128 dst_l = _mm_mul_ps(data_l_single, coeff_l); // Multiply by coefficient + //__m128 dst_h = _mm_mul_ps(data_h_single, coeff_h); // 4*(32bit*32bit=32bit) + __m256 dst = _mm256_mul_ps(data_single, coeff); // Multiply by coefficient + //result1 = _mm_add_ps(result1, dst_l); // accumulate result. + //result1 = _mm_add_ps(result1, dst_h); + result4 = _mm256_add_ps(result4, dst); + + current_coeff += 8; + } + switch (a) { + case 0: result1 = result4; break; + case 1: result2 = result4; break; + case 2: result3 = result4; break; + // result 4 ok + } + begin++; + } + + + __m128 result; + + // result1: A0 A1 A2 A3 A4 A5 A6 A7 + // result2: B0 B1 B2 B3 B4 B5 B6 B7 + /* + ymm2 = _mm256_permute2f128_ps(ymm , ymm , 1); + ymm = _mm256_add_ps(ymm, ymm2); + ymm = _mm256_hadd_ps(ymm, ymm); + ymm = _mm256_hadd_ps(ymm, ymm); + */ + // hiQuad = ( x7, x6, x5, x4 ) + //const __m128 hiQuad = _mm256_extractf128_ps(result1, 1); + // loQuad = ( x3, x2, x1, x0 ) + //const __m128 loQuad = _mm256_castps256_ps128(result); + // sumQuad = ( x3 + x7, x2 + x6, x1 + x5, x0 + x4 ) + const __m128 sumQuad1 = _mm_add_ps(_mm256_castps256_ps128(result1), _mm256_extractf128_ps(result1, 1)); + const __m128 sumQuad2 = _mm_add_ps(_mm256_castps256_ps128(result2), _mm256_extractf128_ps(result2, 1)); + __m128 result12 = _mm_hadd_ps(sumQuad1, sumQuad2); + const __m128 sumQuad3 = _mm_add_ps(_mm256_castps256_ps128(result3), _mm256_extractf128_ps(result3, 1)); + const __m128 sumQuad4 = _mm_add_ps(_mm256_castps256_ps128(result4), _mm256_extractf128_ps(result4, 1)); + __m128 result34 = _mm_hadd_ps(sumQuad3, sumQuad4); + result = _mm_hadd_ps(result12, result34); + + if (sizeof(pixel_t) == 2) + { + result = _mm_min_ps(result, clamp_limit); // mainly for 10-14 bit + // result = _mm_max_ps(result, zero); low limit through pack_us + } + /* + __m256 result12 = _mm256_hadd_ps(result1, result2); + // result12: A0+A1, A2+A3, B0+B1, B2+B3, A4+A5, A6+A7, B4+B5, B6+B7 + __m256 result34 = _mm256_hadd_ps(result3, result4); + result = _mm256_hadd_ps(result12, result34); + */ + if(sizeof(pixel_t)==2) // word + { + // Converts the four single-precision, floating-point values of a to signed 32-bit integer values. + __m128i result_4x_int32 = _mm_cvtps_epi32(result); // 4 * 32 bit integers + __m128i result_4x_uint16 = _mm_packus_epi32(result_4x_int32, zero128); // 4*32+zeros = lower 4*16 OK + _mm_storel_epi64(reinterpret_cast<__m128i *>(dst + x), result_4x_uint16); + } + else { // float + // aligned + //_mm_store_ps(reinterpret_cast(dst+x), result); // 4 results at a time + _mm_stream_ps(reinterpret_cast(dst+x), result); // 4 results at a time + } + + } +#else + for (int x = 0; x < width; x+=4) { + __m256 result1 = _mm256_setzero_ps(); + __m256 result2 = result1; + __m256 result3 = result1; + __m256 result4 = result1; + + int begin1 = program->pixel_offset[x+0]; + int begin2 = program->pixel_offset[x+1]; + int begin3 = program->pixel_offset[x+2]; + int begin4 = program->pixel_offset[x+3]; + + // begin1, result1 + for (int i = 0; i < filter_size; i++) { + __m256 data_single; + if(sizeof(pixel_t)==2) // word + { + // AVX2 _mm256_cvtepu16_epi32 + //__m256i src256 = _mm256_cvtepu16_epi32(_mm_loadu_si128(reinterpret_cast(src + begin1 + i * 8))); // 8*16->8*32 bits + __m128i src_p = _mm_loadu_si128(reinterpret_cast(src+begin1+i*8)); // uint16_t 8*16=128 8 pixels at a time + __m128i src_l = _mm_unpacklo_epi16(src_p, zero128); // spread lower 4*uint16_t pixel value -> 4*32 bit + __m128i src_h = _mm_unpackhi_epi16(src_p, zero128); // spread higher 4*uint16_t pixel value -> 4*32 bit + __m256i src256 = _mm256_set_m128i(src_h, src_l); + data_single = _mm256_cvtepi32_ps (src256); // Converts the 8x signed 32-bit integer values of a to single-precision, floating-point values. + } + else { // float + // unaligned + data_single = _mm256_loadu_ps(reinterpret_cast(src+begin1+i*8)); // float 8*32=256 8 pixels at a time + //data_l_single = _mm_loadu_ps(reinterpret_cast(src+begin1+i*8 )); // float 4*32=128 4 pixels at a time + //data_h_single = _mm_loadu_ps(reinterpret_cast(src+begin1+i*8+4)); // float 4*32=128 4 pixels at a time + } + //__m128 coeff_l = _mm_load_ps(reinterpret_cast(current_coeff)); // always aligned + //__m128 coeff_h = _mm_load_ps(reinterpret_cast(current_coeff+4)); // always aligned + __m256 coeff = _mm256_load_ps(reinterpret_cast(current_coeff)); // always aligned + //__m128 dst_l = _mm_mul_ps(data_l_single, coeff_l); // Multiply by coefficient + //__m128 dst_h = _mm_mul_ps(data_h_single, coeff_h); // 4*(32bit*32bit=32bit) + __m256 dst = _mm256_mul_ps(data_single, coeff); // Multiply by coefficient + //result1 = _mm_add_ps(result1, dst_l); // accumulate result. + //result1 = _mm_add_ps(result1, dst_h); + result1 = _mm256_add_ps(result1, dst); + + current_coeff += 8; + } + + // begin2, result2 + for (int i = 0; i < filter_size; i++) { + __m256 data_single; + if(sizeof(pixel_t)==2) // word + { + // AVX2 _mm256_cvtepu16_epi32 + //__m256i src256 = _mm256_cvtepu16_epi32(_mm_loadu_si128(reinterpret_cast(src + begin2 + i * 8))); // 8*16->8*32 bits + __m128i src_p = _mm_loadu_si128(reinterpret_cast(src+begin2+i*8)); // uint16_t 8*16=128 8 pixels at a time + __m128i src_l = _mm_unpacklo_epi16(src_p, zero128); // spread lower 4*uint16_t pixel value -> 4*32 bit + __m128i src_h = _mm_unpackhi_epi16(src_p, zero128); // spread higher 4*uint16_t pixel value -> 4*32 bit + __m256i src256 = _mm256_set_m128i(src_h, src_l); + data_single = _mm256_cvtepi32_ps (src256); // Converts the 8x signed 32-bit integer values of a to single-precision, floating-point values. + } + else { // float + data_single = _mm256_loadu_ps(reinterpret_cast(src+begin2+i*8)); // float 8*32=256 8 pixels at a time + } + __m256 coeff = _mm256_load_ps(reinterpret_cast(current_coeff)); // always aligned + __m256 dst = _mm256_mul_ps(data_single, coeff); // Multiply by coefficient + result2 = _mm256_add_ps(result2, dst); + + current_coeff += 8; + } + + // begin3, result3 + for (int i = 0; i < filter_size; i++) { + __m256 data_single; + if(sizeof(pixel_t)==2) // word + { + // AVX2 _mm256_cvtepu16_epi32 + //__m256i src256 = _mm256_cvtepu16_epi32(_mm_loadu_si128(reinterpret_cast(src + begin3 + i * 8))); // 8*16->8*32 bits + __m128i src_p = _mm_loadu_si128(reinterpret_cast(src+begin3+i*8)); // uint16_t 8*16=128 8 pixels at a time + __m128i src_l = _mm_unpacklo_epi16(src_p, zero128); // spread lower 4*uint16_t pixel value -> 4*32 bit + __m128i src_h = _mm_unpackhi_epi16(src_p, zero128); // spread higher 4*uint16_t pixel value -> 4*32 bit + __m256i src256 = _mm256_set_m128i(src_h, src_l); + data_single = _mm256_cvtepi32_ps (src256); // Converts the 8x signed 32-bit integer values of a to single-precision, floating-point values. + } + else { // float + data_single = _mm256_loadu_ps(reinterpret_cast(src+begin3+i*8)); // float 8*32=256 8 pixels at a time + } + __m256 coeff = _mm256_load_ps(reinterpret_cast(current_coeff)); // always aligned + __m256 dst = _mm256_mul_ps(data_single, coeff); // Multiply by coefficient + result3 = _mm256_add_ps(result3, dst); + + current_coeff += 8; + } + + // begin4, result4 + for (int i = 0; i < filter_size; i++) { + __m256 data_single; + if(sizeof(pixel_t)==2) // word + { + // AVX2 _mm256_cvtepu16_epi32 + //__m256i src256 = _mm256_cvtepu16_epi32(_mm_loadu_si128(reinterpret_cast(src + begin4 + i * 8))); // 8*16->8*32 bits + __m128i src_p = _mm_loadu_si128(reinterpret_cast(src+begin4+i*8)); // uint16_t 8*16=128 8 pixels at a time + __m128i src_l = _mm_unpacklo_epi16(src_p, zero128); // spread lower 4*uint16_t pixel value -> 4*32 bit + __m128i src_h = _mm_unpackhi_epi16(src_p, zero128); // spread higher 4*uint16_t pixel value -> 4*32 bit + __m256i src256 = _mm256_set_m128i(src_h, src_l); + data_single = _mm256_cvtepi32_ps (src256); // Converts the 8x signed 32-bit integer values of a to single-precision, floating-point values. + } + else { // float + data_single = _mm256_loadu_ps(reinterpret_cast(src+begin4+i*8)); // float 8*32=256 8 pixels at a time + } + __m256 coeff = _mm256_load_ps(reinterpret_cast(current_coeff)); // always aligned + __m256 dst = _mm256_mul_ps(data_single, coeff); // Multiply by coefficient + result4 = _mm256_add_ps(result4, dst); + + current_coeff += 8; + } + + __m128 result; + + const __m128 sumQuad1 = _mm_add_ps(_mm256_castps256_ps128(result1), _mm256_extractf128_ps(result1, 1)); + const __m128 sumQuad2 = _mm_add_ps(_mm256_castps256_ps128(result2), _mm256_extractf128_ps(result2, 1)); + __m128 result12 = _mm_hadd_ps(sumQuad1, sumQuad2); + const __m128 sumQuad3 = _mm_add_ps(_mm256_castps256_ps128(result3), _mm256_extractf128_ps(result3, 1)); + const __m128 sumQuad4 = _mm_add_ps(_mm256_castps256_ps128(result4), _mm256_extractf128_ps(result4, 1)); + __m128 result34 = _mm_hadd_ps(sumQuad3, sumQuad4); + result = _mm_hadd_ps(result12, result34); + + if (sizeof(pixel_t) == 2) + { + result = _mm_min_ps(result, clamp_limit); // mainly for 10-14 bit + // result = _mm_max_ps(result, zero); low limit through pack_us + } + if(sizeof(pixel_t)==2) // word + { + // Converts the four single-precision, floating-point values of a to signed 32-bit integer values. + __m128i result_4x_int32 = _mm_cvtps_epi32(result); // 4 * 32 bit integers + __m128i result_4x_uint16 = _mm_packus_epi32(result_4x_int32, zero128); // 4*32+zeros = lower 4*16 OK + _mm_storel_epi64(reinterpret_cast<__m128i *>(dst + x), result_4x_uint16); + } + else { // float + // aligned + _mm_store_ps(reinterpret_cast(dst+x), result); // 4 results at a time + } + + } +#endif + + dst += dst_pitch; + src += src_pitch; + } + _mm256_zeroupper(); +} + +// instantiate here +template void resizer_h_avx_generic_int16_float(BYTE* dst8, const BYTE* src8, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int height, int bits_per_pixel); +template void resizer_h_avx_generic_int16_float(BYTE* dst8, const BYTE* src8, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int height, int bits_per_pixel); + +template void resizer_h_ssse3_as_avx_generic_int16_float(BYTE* dst8, const BYTE* src8, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int height, int bits_per_pixel); +template void resizer_h_ssse3_as_avx_generic_int16_float(BYTE* dst8, const BYTE* src8, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int height, int bits_per_pixel); +template void resizer_h_ssse3_as_avx_generic_int16_float(BYTE* dst8, const BYTE* src8, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int height, int bits_per_pixel); +template void resizer_h_ssse3_as_avx_generic_int16_float(BYTE* dst8, const BYTE* src8, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int height, int bits_per_pixel); diff --git a/avs_core/filters/resample_avx.h b/avs_core/filters/resample_avx.h new file mode 100644 index 000000000..3f825bd93 --- /dev/null +++ b/avs_core/filters/resample_avx.h @@ -0,0 +1,48 @@ +// Avisynth v2.5. Copyright 2002 Ben Rudiak-Gould et al. +// http://www.avisynth.org + +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit +// http://www.gnu.org/copyleft/gpl.html . +// +// Linking Avisynth statically or dynamically with other modules is making a +// combined work based on Avisynth. Thus, the terms and conditions of the GNU +// General Public License cover the whole combination. +// +// As a special exception, the copyright holders of Avisynth give you +// permission to link Avisynth with independent modules that communicate with +// Avisynth solely through the interfaces defined in avisynth.h, regardless of the license +// terms of these independent modules, and to copy and distribute the +// resulting combined work under terms of your choice, provided that +// every copy of the combined work is accompanied by a complete copy of +// the source code of Avisynth (the version of Avisynth used to produce the +// combined work), being distributed under the terms of the GNU General +// Public License plus this exception. An independent module is a module +// which is not derived from or based on Avisynth, such as 3rd-party filters, +// import and export plugins, or graphical user interfaces. + +#ifndef __Resample_AVX_H__ +#define __Resample_AVX_H__ + +#include +#include "resample_functions.h" + +template +void resizer_h_avx_generic_int16_float(BYTE* dst8, const BYTE* src8, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int height, int bits_per_pixel); + +template +void resizer_h_ssse3_as_avx_generic_int16_float(BYTE* dst8, const BYTE* src8, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int height, int bits_per_pixel); + + +#endif // __Resample_AVX_H__ From fdb04487dcc5c545af31be620eceed9cc2e1d746 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Wed, 12 Oct 2016 16:29:25 +0200 Subject: [PATCH 097/120] Resizers: proper clamping for 10,12,14 bits --- avs_core/filters/resample.cpp | 135 ++++++++++++++++++++-------------- avs_core/filters/resample.h | 10 ++- 2 files changed, 87 insertions(+), 58 deletions(-) diff --git a/avs_core/filters/resample.cpp b/avs_core/filters/resample.cpp index c6dff4032..832b10412 100644 --- a/avs_core/filters/resample.cpp +++ b/avs_core/filters/resample.cpp @@ -33,6 +33,7 @@ // import and export plugins, or graphical user interfaces. #include "resample.h" +#include "resample_avx.h" #include #include "../core/internal.h" @@ -93,7 +94,7 @@ __forceinline __m128 simd_loadps_unaligned(const float* adr) ***************************************/ template -static void resize_v_planar_pointresize(BYTE* dst, const BYTE* src, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int target_height, const int* pitch_table, const void* storage) +static void resize_v_planar_pointresize(BYTE* dst, const BYTE* src, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int target_height, int bits_per_pixel, const int* pitch_table, const void* storage) { int filter_size = program->filter_size; @@ -112,7 +113,7 @@ static void resize_v_planar_pointresize(BYTE* dst, const BYTE* src, int dst_pitc } template -static void resize_v_c_planar(BYTE* dst, const BYTE* src, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int target_height, const int* pitch_table, const void* storage) +static void resize_v_c_planar(BYTE* dst, const BYTE* src, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int target_height, int bits_per_pixel, const int* pitch_table, const void* storage) { int filter_size = program->filter_size; @@ -131,7 +132,7 @@ static void resize_v_c_planar(BYTE* dst, const BYTE* src, int dst_pitch, int src pixel_t limit = 0; if (!std::is_floating_point::value) { // floats are unscaled and uncapped if (sizeof(pixel_t) == 1) limit = 255; - else if (sizeof(pixel_t) == 2) limit = pixel_t(65535); + else if (sizeof(pixel_t) == 2) limit = pixel_t((1 << bits_per_pixel) - 1); } for (int y = 0; y < target_height; y++) { @@ -158,7 +159,7 @@ static void resize_v_c_planar(BYTE* dst, const BYTE* src, int dst_pitch, int src } #ifdef X86_32 -static void resize_v_mmx_planar(BYTE* dst, const BYTE* src, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int target_height, const int* pitch_table, const void* storage) +static void resize_v_mmx_planar(BYTE* dst, const BYTE* src, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int target_height, int bits_per_pixel, const int* pitch_table, const void* storage) { int filter_size = program->filter_size; short* current_coeff = program->pixel_coefficient; @@ -263,7 +264,7 @@ static void resize_v_mmx_planar(BYTE* dst, const BYTE* src, int dst_pitch, int s #endif template -static void resize_v_sse2_planar(BYTE* dst, const BYTE* src, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int target_height, const int* pitch_table, const void* storage) +static void resize_v_sse2_planar(BYTE* dst, const BYTE* src, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int target_height, int bits_per_pixel, const int* pitch_table, const void* storage) { int filter_size = program->filter_size; short* current_coeff = program->pixel_coefficient; @@ -366,7 +367,7 @@ static void resize_v_sse2_planar(BYTE* dst, const BYTE* src, int dst_pitch, int // for uint16_t and float. Both uses float arithmetic and coefficients template -static void resize_v_sseX_planar_16or32(BYTE* dst0, const BYTE* src0, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int target_height, const int* pitch_table, const void* storage) +static void resize_v_sseX_planar_16or32(BYTE* dst0, const BYTE* src0, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int target_height, int bits_per_pixel, const int* pitch_table, const void* storage) { int filter_size = program->filter_size; //short* current_coeff = program->pixel_coefficient; @@ -380,6 +381,13 @@ static void resize_v_sseX_planar_16or32(BYTE* dst0, const BYTE* src0, int dst_pi pixel_t* dst = (pixel_t *)dst0; dst_pitch = dst_pitch / sizeof(pixel_t); src_pitch = src_pitch / sizeof(pixel_t); + + __m128 clamp_limit; + float limit; + if (sizeof(pixel_t) == 2) { + limit = (float)(((int)1 << bits_per_pixel) - 1); + clamp_limit = _mm_set1_ps(limit); // clamp limit + } for (int y = 0; y < target_height; y++) { int offset = program->pixel_offset[y]; @@ -419,6 +427,10 @@ static void resize_v_sseX_planar_16or32(BYTE* dst0, const BYTE* src0, int dst_pi if(sizeof(pixel_t)==2) // word { + // clamp! + result_l_single = _mm_min_ps(result_l_single, clamp_limit); // mainly for 10-14 bit + result_h_single = _mm_min_ps(result_h_single, clamp_limit); // mainly for 10-14 bit + // result = _mm_max_ps(result, zero); low limit through pack_us // Converts the four single-precision, floating-point values of a to signed 32-bit integer values. __m128i result_l = _mm_cvtps_epi32(result_l_single); __m128i result_h = _mm_cvtps_epi32(result_h_single); @@ -440,7 +452,7 @@ static void resize_v_sseX_planar_16or32(BYTE* dst0, const BYTE* src0, int dst_pi result += (src_ptr+pitch_table[i]/sizeof(pixel_t))[x] * current_coeff_float[i]; } if (!std::is_floating_point::value) { // floats are unscaled and uncapped - result = clamp(result, 0.0f, 65535.0f); + result = clamp(result, 0.0f, limit); } dst[x] = (pixel_t) result; } @@ -452,7 +464,7 @@ static void resize_v_sseX_planar_16or32(BYTE* dst0, const BYTE* src0, int dst_pi template -static void resize_v_ssse3_planar(BYTE* dst, const BYTE* src, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int target_height, const int* pitch_table, const void* storage) +static void resize_v_ssse3_planar(BYTE* dst, const BYTE* src, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int target_height, int bits_per_pixel, const int* pitch_table, const void* storage) { int filter_size = program->filter_size; short* current_coeff = program->pixel_coefficient; @@ -532,7 +544,7 @@ __forceinline static void resize_v_create_pitch_table(int* table, int pitch, int ********* Horizontal Resizer** ******** ***************************************/ -static void resize_h_pointresize(BYTE* dst, const BYTE* src, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int height) { +static void resize_h_pointresize(BYTE* dst, const BYTE* src, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int height, int bits_per_pixel) { int wMod4 = width/4 * 4; for (int y = 0; y < height; y++) { @@ -589,7 +601,7 @@ static void resize_h_prepare_coeff_8(ResamplingProgram* p, IScriptEnvironment2* } template -static void resize_h_c_planar(BYTE* dst, const BYTE* src, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int height) { +static void resize_h_c_planar(BYTE* dst, const BYTE* src, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int height, int bits_per_pixel) { int filter_size = program->filter_size; typedef typename std::conditional < std::is_floating_point::value, float, short>::type coeff_t; @@ -598,7 +610,7 @@ static void resize_h_c_planar(BYTE* dst, const BYTE* src, int dst_pitch, int src pixel_t limit = 0; if (!std::is_floating_point::value) { // floats are unscaled and uncapped if (sizeof(pixel_t) == 1) limit = 255; - else if (sizeof(pixel_t) == 2) limit = pixel_t(65535); + else if (sizeof(pixel_t) == 2) limit = pixel_t((1 << bits_per_pixel) - 1); } src_pitch = src_pitch / sizeof(pixel_t); @@ -631,8 +643,9 @@ static void resize_h_c_planar(BYTE* dst, const BYTE* src, int dst_pitch, int src } } +#if 1 template -static void resizer_h_ssse3_generic_int16_float(BYTE* dst8, const BYTE* src8, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int height) { +static void resizer_h_ssse3_generic_int16_float(BYTE* dst8, const BYTE* src8, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int height, int bits_per_pixel) { int filter_size = AlignNumber(program->filter_size, 8) / 8; __m128i zero = _mm_setzero_si128(); @@ -641,6 +654,10 @@ static void resizer_h_ssse3_generic_int16_float(BYTE* dst8, const BYTE* src8, in dst_pitch /= sizeof(pixel_t); src_pitch /= sizeof(pixel_t); + __m128 clamp_limit; + if (sizeof(pixel_t) == 2) + clamp_limit = _mm_set1_ps((float)(((int)1 << bits_per_pixel) - 1)); // clamp limit + for (int y = 0; y < height; y++) { float* current_coeff = program->pixel_coefficient_float; for (int x = 0; x < width; x+=4) { @@ -769,19 +786,20 @@ static void resizer_h_ssse3_generic_int16_float(BYTE* dst8, const BYTE* src8, in __m128 result34 = _mm_hadd_ps(result3, result4); result = _mm_hadd_ps(result12, result34); + if (sizeof(pixel_t) == 2) + { + result = _mm_min_ps(result, clamp_limit); // mainly for 10-14 bit + // result = _mm_max_ps(result, zero); low limit through pack_us + } + if(sizeof(pixel_t)==2) // word { // Converts the four single-precision, floating-point values of a to signed 32-bit integer values. __m128i result_4x_int32 = _mm_cvtps_epi32(result); // 4 * 32 bit integers // SIMD Extensions 4 (SSE4) packus or simulation __m128i result_4x_uint16 = hasSSE41 ? _mm_packus_epi32(result_4x_int32, zero) : (_MM_PACKUS_EPI32(result_4x_int32, zero)) ; // 4*32+zeros = lower 4*16 OK -#ifdef X86_32 - *((uint64_t *)(dst + x)) = _mm_cvtsi128_si32(result_4x_uint16) + (((__int64)_mm_cvtsi128_si32(_mm_srli_si128(result_4x_uint16, 4))) << 32); -#else - *((uint64_t *)(dst + x)) = _mm_cvtsi128_si64(result_4x_uint16); // 64 bit only -#endif - } - else { // float + _mm_storel_epi64(reinterpret_cast<__m128i *>(dst + x), result_4x_uint16); + } else { // float // aligned _mm_store_ps(reinterpret_cast(dst+x), result); // 4 results at a time } @@ -792,8 +810,9 @@ static void resizer_h_ssse3_generic_int16_float(BYTE* dst8, const BYTE* src8, in src += src_pitch; } } +#endif -static void resizer_h_ssse3_generic(BYTE* dst, const BYTE* src, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int height) { +static void resizer_h_ssse3_generic(BYTE* dst, const BYTE* src, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int height, int bits_per_pixel) { int filter_size = AlignNumber(program->filter_size, 8) / 8; __m128i zero = _mm_setzero_si128(); @@ -871,7 +890,7 @@ static void resizer_h_ssse3_generic(BYTE* dst, const BYTE* src, int dst_pitch, i } } -static void resizer_h_ssse3_8(BYTE* dst, const BYTE* src, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int height) { +static void resizer_h_ssse3_8(BYTE* dst, const BYTE* src, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int height, int bits_per_pixel) { int filter_size = AlignNumber(program->filter_size, 8) / 8; __m128i zero = _mm_setzero_si128(); @@ -986,6 +1005,7 @@ FilteredResizeH::FilteredResizeH( PClip _child, double subrange_left, double sub dst_height = vi.height; pixelsize = vi.ComponentSize(); // AVS16 + bits_per_pixel = vi.BitsPerComponent(); grey = vi.IsY(); bool isRGBPfamily = vi.IsPlanarRGB() || vi.IsPlanarRGBA(); @@ -1038,9 +1058,9 @@ FilteredResizeH::FilteredResizeH( PClip _child, double subrange_left, double sub // Create resampling program and pitch table src_pitch_table_luma = new int[vi.width]; - resampler_luma = FilteredResizeV::GetResampler(env->GetCPUFlags(), true, pixelsize, filter_storage_luma, resampling_program_luma); + resampler_luma = FilteredResizeV::GetResampler(env->GetCPUFlags(), true, pixelsize, bits_per_pixel, filter_storage_luma, resampling_program_luma); if (vi.IsPlanar() && !grey && !isRGBPfamily) { - resampler_chroma = FilteredResizeV::GetResampler(env->GetCPUFlags(), true, pixelsize, filter_storage_chroma, resampling_program_chroma); + resampler_chroma = FilteredResizeV::GetResampler(env->GetCPUFlags(), true, pixelsize, bits_per_pixel, filter_storage_chroma, resampling_program_chroma); } // Temporary buffer size @@ -1105,10 +1125,10 @@ FilteredResizeH::FilteredResizeH( PClip _child, double subrange_left, double sub } } } else { // Plannar + SSSE3 = use new horizontal resizer routines - resampler_h_luma = GetResampler(env->GetCPUFlags(), true, pixelsize, resampling_program_luma, env2); + resampler_h_luma = GetResampler(env->GetCPUFlags(), true, pixelsize, bits_per_pixel, resampling_program_luma, env2); if (!grey && !isRGBPfamily) { - resampler_h_chroma = GetResampler(env->GetCPUFlags(), true, pixelsize, resampling_program_chroma, env2); + resampler_h_chroma = GetResampler(env->GetCPUFlags(), true, pixelsize, bits_per_pixel, resampling_program_chroma, env2); } } @@ -1139,17 +1159,17 @@ PVideoFrame __stdcall FilteredResizeH::GetFrame(int n, IScriptEnvironment* env) if (!vi.IsRGB() || isRGBPfamily) { // Y/G Plane turn_right(src->GetReadPtr(), temp_1, src_width * pixelsize, src_height, src->GetPitch(), temp_1_pitch); // * pixelsize: turn_right needs GetPlaneWidth full size - resampler_luma(temp_2, temp_1, temp_2_pitch, temp_1_pitch, resampling_program_luma, src_height, dst_width, src_pitch_table_luma, filter_storage_luma); + resampler_luma(temp_2, temp_1, temp_2_pitch, temp_1_pitch, resampling_program_luma, src_height, dst_width, bits_per_pixel, src_pitch_table_luma, filter_storage_luma); turn_left(temp_2, dst->GetWritePtr(), dst_height * pixelsize, dst_width, temp_2_pitch, dst->GetPitch()); if (isRGBPfamily) { turn_right(src->GetReadPtr(PLANAR_B), temp_1, src_width * pixelsize, src_height, src->GetPitch(PLANAR_B), temp_1_pitch); // * pixelsize: turn_right needs GetPlaneWidth full size - resampler_luma(temp_2, temp_1, temp_2_pitch, temp_1_pitch, resampling_program_luma, src_height, dst_width, src_pitch_table_luma, filter_storage_luma); + resampler_luma(temp_2, temp_1, temp_2_pitch, temp_1_pitch, resampling_program_luma, src_height, dst_width, bits_per_pixel, src_pitch_table_luma, filter_storage_luma); turn_left(temp_2, dst->GetWritePtr(PLANAR_B), dst_height * pixelsize, dst_width, temp_2_pitch, dst->GetPitch(PLANAR_B)); turn_right(src->GetReadPtr(PLANAR_R), temp_1, src_width * pixelsize, src_height, src->GetPitch(PLANAR_R), temp_1_pitch); // * pixelsize: turn_right needs GetPlaneWidth full size - resampler_luma(temp_2, temp_1, temp_2_pitch, temp_1_pitch, resampling_program_luma, src_height, dst_width, src_pitch_table_luma, filter_storage_luma); + resampler_luma(temp_2, temp_1, temp_2_pitch, temp_1_pitch, resampling_program_luma, src_height, dst_width, bits_per_pixel, src_pitch_table_luma, filter_storage_luma); turn_left(temp_2, dst->GetWritePtr(PLANAR_R), dst_height * pixelsize, dst_width, temp_2_pitch, dst->GetPitch(PLANAR_R)); } else if(!grey) { @@ -1164,18 +1184,18 @@ PVideoFrame __stdcall FilteredResizeH::GetFrame(int n, IScriptEnvironment* env) // turn_xxx: width * pixelsize: needs GetPlaneWidth-like full size // U Plane turn_right(src->GetReadPtr(PLANAR_U), temp_1, src_chroma_width * pixelsize, src_chroma_height, src->GetPitch(PLANAR_U), temp_1_pitch); - resampler_luma(temp_2, temp_1, temp_2_pitch, temp_1_pitch, resampling_program_chroma, src_chroma_height, dst_chroma_width, src_pitch_table_luma, filter_storage_chroma); + resampler_luma(temp_2, temp_1, temp_2_pitch, temp_1_pitch, resampling_program_chroma, src_chroma_height, dst_chroma_width, bits_per_pixel, src_pitch_table_luma, filter_storage_chroma); turn_left(temp_2, dst->GetWritePtr(PLANAR_U), dst_chroma_height * pixelsize, dst_chroma_width, temp_2_pitch, dst->GetPitch(PLANAR_U)); // V Plane turn_right(src->GetReadPtr(PLANAR_V), temp_1, src_chroma_width * pixelsize, src_chroma_height, src->GetPitch(PLANAR_V), temp_1_pitch); - resampler_luma(temp_2, temp_1, temp_2_pitch, temp_1_pitch, resampling_program_chroma, src_chroma_height, dst_chroma_width, src_pitch_table_luma, filter_storage_chroma); + resampler_luma(temp_2, temp_1, temp_2_pitch, temp_1_pitch, resampling_program_chroma, src_chroma_height, dst_chroma_width, bits_per_pixel, src_pitch_table_luma, filter_storage_chroma); turn_left(temp_2, dst->GetWritePtr(PLANAR_V), dst_chroma_height * pixelsize, dst_chroma_width, temp_2_pitch, dst->GetPitch(PLANAR_V)); } if (vi.IsYUVA() || vi.IsPlanarRGBA()) { turn_right(src->GetReadPtr(PLANAR_A), temp_1, src_width * pixelsize, src_height, src->GetPitch(PLANAR_A), temp_1_pitch); // * pixelsize: turn_right needs GetPlaneWidth full size - resampler_luma(temp_2, temp_1, temp_2_pitch, temp_1_pitch, resampling_program_luma, src_height, dst_width, src_pitch_table_luma, filter_storage_luma); + resampler_luma(temp_2, temp_1, temp_2_pitch, temp_1_pitch, resampling_program_luma, src_height, dst_width, bits_per_pixel, src_pitch_table_luma, filter_storage_luma); turn_left(temp_2, dst->GetWritePtr(PLANAR_A), dst_height * pixelsize, dst_width, temp_2_pitch, dst->GetPitch(PLANAR_A)); } @@ -1183,7 +1203,7 @@ PVideoFrame __stdcall FilteredResizeH::GetFrame(int n, IScriptEnvironment* env) // packed RGB // First left, then right. Reason: packed RGB bottom to top. Right+left shifts RGB24/RGB32 image to the opposite horizontal direction turn_left(src->GetReadPtr(), temp_1, vi.BytesFromPixels(src_width), src_height, src->GetPitch(), temp_1_pitch); - resampler_luma(temp_2, temp_1, temp_2_pitch, temp_1_pitch, resampling_program_luma, vi.BytesFromPixels(src_height) / pixelsize, dst_width, src_pitch_table_luma, filter_storage_luma); + resampler_luma(temp_2, temp_1, temp_2_pitch, temp_1_pitch, resampling_program_luma, vi.BytesFromPixels(src_height) / pixelsize, dst_width, bits_per_pixel, src_pitch_table_luma, filter_storage_luma); turn_right(temp_2, dst->GetWritePtr(), vi.BytesFromPixels(dst_height), dst_width, temp_2_pitch, dst->GetPitch()); } @@ -1192,25 +1212,25 @@ PVideoFrame __stdcall FilteredResizeH::GetFrame(int n, IScriptEnvironment* env) } else { // Y Plane - resampler_h_luma(dst->GetWritePtr(), src->GetReadPtr(), dst->GetPitch(), src->GetPitch(), resampling_program_luma, dst_width, dst_height); + resampler_h_luma(dst->GetWritePtr(), src->GetReadPtr(), dst->GetPitch(), src->GetPitch(), resampling_program_luma, dst_width, dst_height, bits_per_pixel); if (isRGBPfamily) { - resampler_h_luma(dst->GetWritePtr(PLANAR_B), src->GetReadPtr(PLANAR_B), dst->GetPitch(PLANAR_B), src->GetPitch(PLANAR_B), resampling_program_luma, dst_width, dst_height); - resampler_h_luma(dst->GetWritePtr(PLANAR_R), src->GetReadPtr(PLANAR_R), dst->GetPitch(PLANAR_R), src->GetPitch(PLANAR_R), resampling_program_luma, dst_width, dst_height); + resampler_h_luma(dst->GetWritePtr(PLANAR_B), src->GetReadPtr(PLANAR_B), dst->GetPitch(PLANAR_B), src->GetPitch(PLANAR_B), resampling_program_luma, dst_width, dst_height, bits_per_pixel); + resampler_h_luma(dst->GetWritePtr(PLANAR_R), src->GetReadPtr(PLANAR_R), dst->GetPitch(PLANAR_R), src->GetPitch(PLANAR_R), resampling_program_luma, dst_width, dst_height, bits_per_pixel); } else if (!grey) { const int dst_chroma_width = dst_width >> vi.GetPlaneWidthSubsampling(PLANAR_U); const int dst_chroma_height = dst_height >> vi.GetPlaneHeightSubsampling(PLANAR_U); // U Plane - resampler_h_chroma(dst->GetWritePtr(PLANAR_U), src->GetReadPtr(PLANAR_U), dst->GetPitch(PLANAR_U), src->GetPitch(PLANAR_U), resampling_program_chroma, dst_chroma_width, dst_chroma_height); + resampler_h_chroma(dst->GetWritePtr(PLANAR_U), src->GetReadPtr(PLANAR_U), dst->GetPitch(PLANAR_U), src->GetPitch(PLANAR_U), resampling_program_chroma, dst_chroma_width, dst_chroma_height, bits_per_pixel); // V Plane - resampler_h_chroma(dst->GetWritePtr(PLANAR_V), src->GetReadPtr(PLANAR_V), dst->GetPitch(PLANAR_V), src->GetPitch(PLANAR_V), resampling_program_chroma, dst_chroma_width, dst_chroma_height); + resampler_h_chroma(dst->GetWritePtr(PLANAR_V), src->GetReadPtr(PLANAR_V), dst->GetPitch(PLANAR_V), src->GetPitch(PLANAR_V), resampling_program_chroma, dst_chroma_width, dst_chroma_height, bits_per_pixel); } if (vi.IsYUVA() || vi.IsPlanarRGBA()) { - resampler_h_luma(dst->GetWritePtr(PLANAR_A), src->GetReadPtr(PLANAR_A), dst->GetPitch(PLANAR_A), src->GetPitch(PLANAR_A), resampling_program_luma, dst_width, dst_height); + resampler_h_luma(dst->GetWritePtr(PLANAR_A), src->GetReadPtr(PLANAR_A), dst->GetPitch(PLANAR_A), src->GetPitch(PLANAR_A), resampling_program_luma, dst_width, dst_height, bits_per_pixel); } } @@ -1218,7 +1238,7 @@ PVideoFrame __stdcall FilteredResizeH::GetFrame(int n, IScriptEnvironment* env) return dst; } -ResamplerH FilteredResizeH::GetResampler(int CPU, bool aligned, int pixelsize, ResamplingProgram* program, IScriptEnvironment2* env) +ResamplerH FilteredResizeH::GetResampler(int CPU, bool aligned, int pixelsize, int bits_per_pixel, ResamplingProgram* program, IScriptEnvironment2* env) { if (pixelsize == 1) { @@ -1237,6 +1257,9 @@ ResamplerH FilteredResizeH::GetResampler(int CPU, bool aligned, int pixelsize, R else if (pixelsize == 2) { if (CPU & CPUF_SSSE3) { resize_h_prepare_coeff_8(program, env); + if (CPU & CPUF_AVX) { + return resizer_h_avx_generic_int16_float; + } if (CPU & CPUF_SSE4_1) return resizer_h_ssse3_generic_int16_float; else @@ -1247,6 +1270,9 @@ ResamplerH FilteredResizeH::GetResampler(int CPU, bool aligned, int pixelsize, R if (CPU & CPUF_SSSE3) { resize_h_prepare_coeff_8(program, env); //if (program->filter_size > 8) + if (CPU & CPUF_AVX) { + return resizer_h_avx_generic_int16_float; + } if (CPU & CPUF_SSE4_1) return resizer_h_ssse3_generic_int16_float; else @@ -1278,6 +1304,7 @@ FilteredResizeV::FilteredResizeV( PClip _child, double subrange_top, double subr env->ThrowError("Resize: Height must be greater than 0."); pixelsize = vi.ComponentSize(); // AVS16 + bits_per_pixel = vi.BitsPerComponent(); grey = vi.IsY(); bool isRGBPfamily = vi.IsPlanarRGB() || vi.IsPlanarRGBA(); @@ -1296,8 +1323,8 @@ FilteredResizeV::FilteredResizeV( PClip _child, double subrange_top, double subr // Create resampling program and pitch table resampling_program_luma = func->GetResamplingProgram(vi.height, subrange_top, subrange_height, target_height, env2); - resampler_luma_aligned = GetResampler(env->GetCPUFlags(), true , pixelsize, filter_storage_luma_aligned, resampling_program_luma); - resampler_luma_unaligned = GetResampler(env->GetCPUFlags(), false, pixelsize, filter_storage_luma_unaligned, resampling_program_luma); + resampler_luma_aligned = GetResampler(env->GetCPUFlags(), true , pixelsize, bits_per_pixel, filter_storage_luma_aligned, resampling_program_luma); + resampler_luma_unaligned = GetResampler(env->GetCPUFlags(), false, pixelsize, bits_per_pixel, filter_storage_luma_unaligned, resampling_program_luma); if (vi.IsPlanar() && !grey && !isRGBPfamily) { const int shift = vi.GetPlaneHeightSubsampling(PLANAR_U); @@ -1310,8 +1337,8 @@ FilteredResizeV::FilteredResizeV( PClip _child, double subrange_top, double subr target_height >> shift, env2); - resampler_chroma_aligned = GetResampler(env->GetCPUFlags(), true , pixelsize, filter_storage_chroma_aligned, resampling_program_chroma); - resampler_chroma_unaligned = GetResampler(env->GetCPUFlags(), false, pixelsize, filter_storage_chroma_unaligned, resampling_program_chroma); + resampler_chroma_aligned = GetResampler(env->GetCPUFlags(), true , pixelsize, bits_per_pixel, filter_storage_chroma_aligned, resampling_program_chroma); + resampler_chroma_unaligned = GetResampler(env->GetCPUFlags(), false, pixelsize, bits_per_pixel, filter_storage_chroma_unaligned, resampling_program_chroma); } // Change target video info size @@ -1357,9 +1384,9 @@ PVideoFrame __stdcall FilteredResizeV::GetFrame(int n, IScriptEnvironment* env) // Do resizing int work_width = vi.IsPlanar() ? vi.width : vi.BytesFromPixels(vi.width) / pixelsize; // packed RGB: or vi.width * vi.NumComponent() if (IsPtrAligned(srcp, 16) && (src_pitch & 15) == 0) - resampler_luma_aligned(dstp, srcp, dst_pitch, src_pitch, resampling_program_luma, work_width, vi.height, src_pitch_table_luma, filter_storage_luma_aligned); + resampler_luma_aligned(dstp, srcp, dst_pitch, src_pitch, resampling_program_luma, work_width, vi.height, bits_per_pixel, src_pitch_table_luma, filter_storage_luma_aligned); else - resampler_luma_unaligned(dstp, srcp, dst_pitch, src_pitch, resampling_program_luma, work_width, vi.height, src_pitch_table_luma, filter_storage_luma_unaligned); + resampler_luma_unaligned(dstp, srcp, dst_pitch, src_pitch, resampling_program_luma, work_width, vi.height, bits_per_pixel, src_pitch_table_luma, filter_storage_luma_unaligned); if(isRGBPfamily) { @@ -1368,18 +1395,18 @@ PVideoFrame __stdcall FilteredResizeV::GetFrame(int n, IScriptEnvironment* env) srcp = src->GetReadPtr(PLANAR_B); dstp = dst->GetWritePtr(PLANAR_B); if (IsPtrAligned(srcp, 16) && (src_pitch & 15) == 0) - resampler_luma_aligned(dstp, srcp, dst_pitch, src_pitch, resampling_program_luma, work_width, vi.height, src_pitch_table_luma, filter_storage_luma_aligned); + resampler_luma_aligned(dstp, srcp, dst_pitch, src_pitch, resampling_program_luma, work_width, vi.height, bits_per_pixel, src_pitch_table_luma, filter_storage_luma_aligned); else - resampler_luma_unaligned(dstp, srcp, dst_pitch, src_pitch, resampling_program_luma, work_width, vi.height, src_pitch_table_luma, filter_storage_luma_unaligned); + resampler_luma_unaligned(dstp, srcp, dst_pitch, src_pitch, resampling_program_luma, work_width, vi.height, bits_per_pixel, src_pitch_table_luma, filter_storage_luma_unaligned); src_pitch = src->GetPitch(PLANAR_R); dst_pitch = dst->GetPitch(PLANAR_R); srcp = src->GetReadPtr(PLANAR_R); dstp = dst->GetWritePtr(PLANAR_R); if (IsPtrAligned(srcp, 16) && (src_pitch & 15) == 0) - resampler_luma_aligned(dstp, srcp, dst_pitch, src_pitch, resampling_program_luma, work_width, vi.height, src_pitch_table_luma, filter_storage_luma_aligned); + resampler_luma_aligned(dstp, srcp, dst_pitch, src_pitch, resampling_program_luma, work_width, vi.height, bits_per_pixel, src_pitch_table_luma, filter_storage_luma_aligned); else - resampler_luma_unaligned(dstp, srcp, dst_pitch, src_pitch, resampling_program_luma, work_width, vi.height, src_pitch_table_luma, filter_storage_luma_unaligned); + resampler_luma_unaligned(dstp, srcp, dst_pitch, src_pitch, resampling_program_luma, work_width, vi.height, bits_per_pixel, src_pitch_table_luma, filter_storage_luma_unaligned); } else if (!grey && vi.IsPlanar()) { int width = vi.width >> vi.GetPlaneWidthSubsampling(PLANAR_U); @@ -1392,9 +1419,9 @@ PVideoFrame __stdcall FilteredResizeV::GetFrame(int n, IScriptEnvironment* env) dstp = dst->GetWritePtr(PLANAR_U); if (IsPtrAligned(srcp, 16) && (src_pitch & 15) == 0) - resampler_chroma_aligned(dstp, srcp, dst_pitch, src_pitch, resampling_program_chroma, width, height, src_pitch_table_chromaU, filter_storage_chroma_unaligned); + resampler_chroma_aligned(dstp, srcp, dst_pitch, src_pitch, resampling_program_chroma, width, height, bits_per_pixel, src_pitch_table_chromaU, filter_storage_chroma_unaligned); else - resampler_chroma_unaligned(dstp, srcp, dst_pitch, src_pitch, resampling_program_chroma, width, height, src_pitch_table_chromaU, filter_storage_chroma_unaligned); + resampler_chroma_unaligned(dstp, srcp, dst_pitch, src_pitch, resampling_program_chroma, width, height, bits_per_pixel, src_pitch_table_chromaU, filter_storage_chroma_unaligned); // Plane V resizing src_pitch = src->GetPitch(PLANAR_V); @@ -1403,9 +1430,9 @@ PVideoFrame __stdcall FilteredResizeV::GetFrame(int n, IScriptEnvironment* env) dstp = dst->GetWritePtr(PLANAR_V); if (IsPtrAligned(srcp, 16) && (src_pitch & 15) == 0) - resampler_chroma_aligned(dstp, srcp, dst_pitch, src_pitch, resampling_program_chroma, width, height, src_pitch_table_chromaV, filter_storage_chroma_unaligned); + resampler_chroma_aligned(dstp, srcp, dst_pitch, src_pitch, resampling_program_chroma, width, height, bits_per_pixel, src_pitch_table_chromaV, filter_storage_chroma_unaligned); else - resampler_chroma_unaligned(dstp, srcp, dst_pitch, src_pitch, resampling_program_chroma, width, height, src_pitch_table_chromaV, filter_storage_chroma_unaligned); + resampler_chroma_unaligned(dstp, srcp, dst_pitch, src_pitch, resampling_program_chroma, width, height, bits_per_pixel, src_pitch_table_chromaV, filter_storage_chroma_unaligned); } // Free pitch table @@ -1416,7 +1443,7 @@ PVideoFrame __stdcall FilteredResizeV::GetFrame(int n, IScriptEnvironment* env) return dst; } -ResamplerV FilteredResizeV::GetResampler(int CPU, bool aligned, int pixelsize, void*& storage, ResamplingProgram* program) +ResamplerV FilteredResizeV::GetResampler(int CPU, bool aligned, int pixelsize, int bits_per_pixel, void*& storage, ResamplingProgram* program) { if (program->filter_size == 1) { // Fast pointresize diff --git a/avs_core/filters/resample.h b/avs_core/filters/resample.h index 2a1501819..665431818 100644 --- a/avs_core/filters/resample.h +++ b/avs_core/filters/resample.h @@ -39,8 +39,8 @@ #include "resample_functions.h" // Resizer function pointer -typedef void (*ResamplerV)(BYTE* dst, const BYTE* src, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int target_height, const int* pitch_table, const void* storage); -typedef void (*ResamplerH)(BYTE* dst, const BYTE* src, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int target_height); +typedef void (*ResamplerV)(BYTE* dst, const BYTE* src, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int target_height, int bits_per_pixel, const int* pitch_table, const void* storage); +typedef void (*ResamplerH)(BYTE* dst, const BYTE* src, int dst_pitch, int src_pitch, ResamplingProgram* program, int width, int target_height, int bits_per_pixel); // Turn function pointer -- copied from turn.h typedef void (*TurnFuncPtr) (const BYTE *srcp, BYTE *dstp, int width, int height, int src_pitch, int dst_pitch); @@ -61,7 +61,7 @@ class FilteredResizeH : public GenericVideoFilter return cachehints == CACHE_GET_MTMODE ? MT_NICE_FILTER : 0; } - static ResamplerH GetResampler(int CPU, bool aligned, int pixelsize, ResamplingProgram* program, IScriptEnvironment2* env); + static ResamplerH GetResampler(int CPU, bool aligned, int pixelsize, int bits_per_pixel, ResamplingProgram* program, IScriptEnvironment2* env); private: // Resampling @@ -80,6 +80,7 @@ class FilteredResizeH : public GenericVideoFilter int src_width, src_height, dst_width, dst_height; bool grey; int pixelsize; // AVS16 + int bits_per_pixel; ResamplerH resampler_h_luma; ResamplerH resampler_h_chroma; @@ -107,11 +108,12 @@ class FilteredResizeV : public GenericVideoFilter return cachehints == CACHE_GET_MTMODE ? MT_NICE_FILTER : 0; } - static ResamplerV GetResampler(int CPU, bool aligned, int pixelsize, void*& storage, ResamplingProgram* program); + static ResamplerV GetResampler(int CPU, bool aligned, int pixelsize, int bits_per_pixel, void*& storage, ResamplingProgram* program); private: bool grey; int pixelsize; // AVS16 + int bits_per_pixel; ResamplingProgram *resampling_program_luma; ResamplingProgram *resampling_program_chroma; From 6e942ec761e8e7efddf72f523f756132bc238f65 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Wed, 12 Oct 2016 16:29:53 +0200 Subject: [PATCH 098/120] Fix avisynth.h for x64 build --- avs_core/include/avisynth.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/avs_core/include/avisynth.h b/avs_core/include/avisynth.h index c0df07697..2beaa2618 100644 --- a/avs_core/include/avisynth.h +++ b/avs_core/include/avisynth.h @@ -1056,7 +1056,7 @@ class AVSValue { #ifdef X86_64 // if ever, only x64 will support. It breaks struct size on 32 bit __int64 longlong; // 8 bytes - double double_pt // 8 bytes + double double_pt; // 8 bytes #endif }; From 595d38fddc7d77f761ab99ab99cd7dace1bc387d Mon Sep 17 00:00:00 2001 From: Pinterf Date: Wed, 12 Oct 2016 16:31:16 +0200 Subject: [PATCH 099/120] AVX and AVX2 paths for 32->(8..16) and 10..16<->10..16 bitdepth converters --- avs_core/convert/convert_avx.cpp | 121 ++++++++++++++++++++++++++++++ avs_core/convert/convert_avx.h | 46 ++++++++++++ avs_core/convert/convert_avx2.cpp | 120 +++++++++++++++++++++++++++++ avs_core/convert/convert_avx2.h | 46 ++++++++++++ 4 files changed, 333 insertions(+) create mode 100644 avs_core/convert/convert_avx.cpp create mode 100644 avs_core/convert/convert_avx.h create mode 100644 avs_core/convert/convert_avx2.cpp create mode 100644 avs_core/convert/convert_avx2.h diff --git a/avs_core/convert/convert_avx.cpp b/avs_core/convert/convert_avx.cpp new file mode 100644 index 000000000..4ceededf8 --- /dev/null +++ b/avs_core/convert/convert_avx.cpp @@ -0,0 +1,121 @@ +// Avisynth v2.5. Copyright 2002-2009 Ben Rudiak-Gould et al. +// http://www.avisynth.org + +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit +// http://www.gnu.org/copyleft/gpl.html . +// +// Linking Avisynth statically or dynamically with other modules is making a +// combined work based on Avisynth. Thus, the terms and conditions of the GNU +// General Public License cover the whole combination. +// +// As a special exception, the copyright holders of Avisynth give you +// permission to link Avisynth with independent modules that communicate with +// Avisynth solely through the interfaces defined in avisynth.h, regardless of the license +// terms of these independent modules, and to copy and distribute the +// resulting combined work under terms of your choice, provided that +// every copy of the combined work is accompanied by a complete copy of +// the source code of Avisynth (the version of Avisynth used to produce the +// combined work), being distributed under the terms of the GNU General +// Public License plus this exception. An independent module is a module +// which is not derived from or based on Avisynth, such as 3rd-party filters, +// import and export plugins, or graphical user interfaces. + + +#include "convert.h" +#include "convert_planar.h" +#include "convert_rgb.h" +#include "convert_yv12.h" +#include "convert_yuy2.h" +#include +#include +#include +#include +#include +#include +#include + +#include "convert_avx.h" + +template +void convert_32_to_uintN_c_avx(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range) +{ + const float *srcp0 = reinterpret_cast(srcp); + pixel_t *dstp0 = reinterpret_cast(dstp); + + src_pitch = src_pitch / sizeof(float); + dst_pitch = dst_pitch / sizeof(pixel_t); + + int src_width = src_rowsize / sizeof(float); + + float max_dst_pixelvalue = (float)((1<(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range); +template void convert_32_to_uintN_c_avx(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range); +template void convert_32_to_uintN_c_avx(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range); +template void convert_32_to_uintN_c_avx(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range); +template void convert_32_to_uintN_c_avx(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range); + + +// YUV: bit shift 10-12-14-16 <=> 10-12-14-16 bits +// shift right or left, depending on expandrange template param +template +void convert_uint16_to_uint16_c_avx(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range) +{ + const uint16_t *srcp0 = reinterpret_cast(srcp); + uint16_t *dstp0 = reinterpret_cast(dstp); + + src_pitch = src_pitch / sizeof(uint16_t); + dst_pitch = dst_pitch / sizeof(uint16_t); + + const int src_width = src_rowsize / sizeof(uint16_t); + + for(int y=0; y> shiftbits; // reduce range + } + dstp0 += dst_pitch; + srcp0 += src_pitch; + } + _mm256_zeroupper(); +} + +// instantiate them +template void convert_uint16_to_uint16_c_avx(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range); +template void convert_uint16_to_uint16_c_avx(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range); +template void convert_uint16_to_uint16_c_avx(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range); +template void convert_uint16_to_uint16_c_avx(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range); +template void convert_uint16_to_uint16_c_avx(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range); +template void convert_uint16_to_uint16_c_avx(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range); + diff --git a/avs_core/convert/convert_avx.h b/avs_core/convert/convert_avx.h new file mode 100644 index 000000000..981a46765 --- /dev/null +++ b/avs_core/convert/convert_avx.h @@ -0,0 +1,46 @@ +// Avisynth v2.5. Copyright 2002 Ben Rudiak-Gould et al. +// http://www.avisynth.org + +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit +// http://www.gnu.org/copyleft/gpl.html . +// +// Linking Avisynth statically or dynamically with other modules is making a +// combined work based on Avisynth. Thus, the terms and conditions of the GNU +// General Public License cover the whole combination. +// +// As a special exception, the copyright holders of Avisynth give you +// permission to link Avisynth with independent modules that communicate with +// Avisynth solely through the interfaces defined in avisynth.h, regardless of the license +// terms of these independent modules, and to copy and distribute the +// resulting combined work under terms of your choice, provided that +// every copy of the combined work is accompanied by a complete copy of +// the source code of Avisynth (the version of Avisynth used to produce the +// combined work), being distributed under the terms of the GNU General +// Public License plus this exception. An independent module is a module +// which is not derived from or based on Avisynth, such as 3rd-party filters, +// import and export plugins, or graphical user interfaces. + +#ifndef __Convert_AVX_H__ +#define __Convert_AVX_H__ + +#include "../core/internal.h" + +template +void convert_uint16_to_uint16_c_avx(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range); + +template +void convert_32_to_uintN_c_avx(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range); + +#endif // __Convert_AVX_H__ diff --git a/avs_core/convert/convert_avx2.cpp b/avs_core/convert/convert_avx2.cpp new file mode 100644 index 000000000..3b733fe83 --- /dev/null +++ b/avs_core/convert/convert_avx2.cpp @@ -0,0 +1,120 @@ +// Avisynth v2.5. Copyright 2002-2009 Ben Rudiak-Gould et al. +// http://www.avisynth.org + +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit +// http://www.gnu.org/copyleft/gpl.html . +// +// Linking Avisynth statically or dynamically with other modules is making a +// combined work based on Avisynth. Thus, the terms and conditions of the GNU +// General Public License cover the whole combination. +// +// As a special exception, the copyright holders of Avisynth give you +// permission to link Avisynth with independent modules that communicate with +// Avisynth solely through the interfaces defined in avisynth.h, regardless of the license +// terms of these independent modules, and to copy and distribute the +// resulting combined work under terms of your choice, provided that +// every copy of the combined work is accompanied by a complete copy of +// the source code of Avisynth (the version of Avisynth used to produce the +// combined work), being distributed under the terms of the GNU General +// Public License plus this exception. An independent module is a module +// which is not derived from or based on Avisynth, such as 3rd-party filters, +// import and export plugins, or graphical user interfaces. + + +#include "convert.h" +#include "convert_planar.h" +#include "convert_rgb.h" +#include "convert_yv12.h" +#include "convert_yuy2.h" +#include +#include +#include +#include +#include +#include +#include + +#include "convert_avx2.h" + +template +void convert_32_to_uintN_c_avx2(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range) +{ + const float *srcp0 = reinterpret_cast(srcp); + pixel_t *dstp0 = reinterpret_cast(dstp); + + src_pitch = src_pitch / sizeof(float); + dst_pitch = dst_pitch / sizeof(pixel_t); + + int src_width = src_rowsize / sizeof(float); + + float max_dst_pixelvalue = (float)((1<(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range); +template void convert_32_to_uintN_c_avx2(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range); +template void convert_32_to_uintN_c_avx2(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range); +template void convert_32_to_uintN_c_avx2(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range); +template void convert_32_to_uintN_c_avx2(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range); + +// YUV: bit shift 10-12-14-16 <=> 10-12-14-16 bits +// shift right or left, depending on expandrange template param +template +void convert_uint16_to_uint16_c_avx2(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range) +{ + const uint16_t *srcp0 = reinterpret_cast(srcp); + uint16_t *dstp0 = reinterpret_cast(dstp); + + src_pitch = src_pitch / sizeof(uint16_t); + dst_pitch = dst_pitch / sizeof(uint16_t); + + const int src_width = src_rowsize / sizeof(uint16_t); + + for(int y=0; y> shiftbits; // reduce range + } + dstp0 += dst_pitch; + srcp0 += src_pitch; + } + // Anti-sse2-avx penalty vzeroupper (_mm256_zeroupper()) is automatically placed here if ymm registers are used +} + +// instantiate them +template void convert_uint16_to_uint16_c_avx2(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range); +template void convert_uint16_to_uint16_c_avx2(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range); +template void convert_uint16_to_uint16_c_avx2(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range); +template void convert_uint16_to_uint16_c_avx2(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range); +template void convert_uint16_to_uint16_c_avx2(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range); +template void convert_uint16_to_uint16_c_avx2(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range); + + diff --git a/avs_core/convert/convert_avx2.h b/avs_core/convert/convert_avx2.h new file mode 100644 index 000000000..644e4fdba --- /dev/null +++ b/avs_core/convert/convert_avx2.h @@ -0,0 +1,46 @@ +// Avisynth v2.5. Copyright 2002 Ben Rudiak-Gould et al. +// http://www.avisynth.org + +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit +// http://www.gnu.org/copyleft/gpl.html . +// +// Linking Avisynth statically or dynamically with other modules is making a +// combined work based on Avisynth. Thus, the terms and conditions of the GNU +// General Public License cover the whole combination. +// +// As a special exception, the copyright holders of Avisynth give you +// permission to link Avisynth with independent modules that communicate with +// Avisynth solely through the interfaces defined in avisynth.h, regardless of the license +// terms of these independent modules, and to copy and distribute the +// resulting combined work under terms of your choice, provided that +// every copy of the combined work is accompanied by a complete copy of +// the source code of Avisynth (the version of Avisynth used to produce the +// combined work), being distributed under the terms of the GNU General +// Public License plus this exception. An independent module is a module +// which is not derived from or based on Avisynth, such as 3rd-party filters, +// import and export plugins, or graphical user interfaces. + +#ifndef __Convert_AVX2_H__ +#define __Convert_AVX2_H__ + +#include "../core/internal.h" + +template +void convert_32_to_uintN_c_avx2(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range); + +template +void convert_uint16_to_uint16_c_avx2(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range); + +#endif // __Convert_AVX2_H__ From e0640b5b8e7e81e1c33029dfe1a8c86b47ae08cc Mon Sep 17 00:00:00 2001 From: Pinterf Date: Wed, 12 Oct 2016 16:32:07 +0200 Subject: [PATCH 100/120] Use AVX/AVX2 path for two bitdepth conversion function --- avs_core/convert/convert.cpp | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/avs_core/convert/convert.cpp b/avs_core/convert/convert.cpp index 7d2fb2f48..606fe7b56 100644 --- a/avs_core/convert/convert.cpp +++ b/avs_core/convert/convert.cpp @@ -46,7 +46,8 @@ #include #include - +#include "convert_avx.h" +#include "convert_avx2.h" /******************************************************************** ***** Declare index of new filters for Avisynth's filter engine ***** @@ -1599,6 +1600,8 @@ ConvertBits::ConvertBits(PClip _child, const float _float_range, const int _dith format_change_only = false; bool sse2 = !!(env->GetCPUFlags() & CPUF_SSE2); // frames are always 16 bit aligned + bool avx = !!(env->GetCPUFlags() & CPUF_AVX); + bool avx2 = !!(env->GetCPUFlags() & CPUF_AVX2); BitDepthConvFuncPtr conv_function_full_scale; BitDepthConvFuncPtr conv_function_full_scale_no_dither; @@ -1774,16 +1777,16 @@ ConvertBits::ConvertBits(PClip _child, const float _float_range, const int _dith if (bits_per_pixel > target_bitdepth) // reduce range 16->14/12/10 14->12/10 12->10. template: bitshift switch (bits_per_pixel - target_bitdepth) { - case 2: conv_function_shifted_scale = sse2 ? convert_uint16_to_uint16_sse2 : convert_uint16_to_uint16_c; break; - case 4: conv_function_shifted_scale = sse2 ? convert_uint16_to_uint16_sse2 : convert_uint16_to_uint16_c; break; - case 6: conv_function_shifted_scale = sse2 ? convert_uint16_to_uint16_sse2 : convert_uint16_to_uint16_c; break; + case 2: conv_function_shifted_scale = avx2 ? convert_uint16_to_uint16_c_avx2 : avx ? convert_uint16_to_uint16_c_avx : (sse2 ? convert_uint16_to_uint16_sse2 : convert_uint16_to_uint16_c); break; + case 4: conv_function_shifted_scale = avx2 ? convert_uint16_to_uint16_c_avx2 : avx ? convert_uint16_to_uint16_c_avx : (sse2 ? convert_uint16_to_uint16_sse2 : convert_uint16_to_uint16_c); break; + case 6: conv_function_shifted_scale = avx2 ? convert_uint16_to_uint16_c_avx2 : avx ? convert_uint16_to_uint16_c_avx : (sse2 ? convert_uint16_to_uint16_sse2 : convert_uint16_to_uint16_c); break; } else // expand range switch (target_bitdepth - bits_per_pixel) { - case 2: conv_function_shifted_scale = sse2 ? convert_uint16_to_uint16_sse2 : convert_uint16_to_uint16_c; break; - case 4: conv_function_shifted_scale = sse2 ? convert_uint16_to_uint16_sse2 : convert_uint16_to_uint16_c; break; - case 6: conv_function_shifted_scale = sse2 ? convert_uint16_to_uint16_sse2 : convert_uint16_to_uint16_c; break; + case 2: conv_function_shifted_scale = avx2 ? convert_uint16_to_uint16_c_avx2 : avx ? convert_uint16_to_uint16_c_avx : (sse2 ? convert_uint16_to_uint16_sse2 : convert_uint16_to_uint16_c); break; + case 4: conv_function_shifted_scale = avx2 ? convert_uint16_to_uint16_c_avx2 : avx ? convert_uint16_to_uint16_c_avx : (sse2 ? convert_uint16_to_uint16_sse2 : convert_uint16_to_uint16_c); break; + case 6: conv_function_shifted_scale = avx2 ? convert_uint16_to_uint16_c_avx2 : avx ? convert_uint16_to_uint16_c_avx : (sse2 ? convert_uint16_to_uint16_sse2 : convert_uint16_to_uint16_c); break; } } else { @@ -1815,14 +1818,14 @@ ConvertBits::ConvertBits(PClip _child, const float _float_range, const int _dith if (truerange) { switch (target_bitdepth) { - case 10: conv_function = convert_32_to_uintN_c; break; - case 12: conv_function = convert_32_to_uintN_c; break; - case 14: conv_function = convert_32_to_uintN_c; break; - case 16: conv_function = convert_32_to_uintN_c; break; + case 10: conv_function = avx2 ? convert_32_to_uintN_c_avx2 : avx ? convert_32_to_uintN_c_avx : convert_32_to_uintN_c; break; + case 12: conv_function = avx2 ? convert_32_to_uintN_c_avx2 : avx ? convert_32_to_uintN_c_avx : convert_32_to_uintN_c; break; + case 14: conv_function = avx2 ? convert_32_to_uintN_c_avx2 : avx ? convert_32_to_uintN_c_avx : convert_32_to_uintN_c; break; + case 16: conv_function = avx2 ? convert_32_to_uintN_c_avx2 : avx ? convert_32_to_uintN_c_avx : convert_32_to_uintN_c; break; } } else { - conv_function = convert_32_to_uintN_c; + conv_function = avx2 ? convert_32_to_uintN_c_avx2 : avx ? convert_32_to_uintN_c_avx : convert_32_to_uintN_c; } conv_function_a = conv_function; } @@ -1909,7 +1912,7 @@ ConvertBits::ConvertBits(PClip _child, const float _float_range, const int _dith else if (vi.ComponentSize() == 4) // 32->8 bit { // full scale - conv_function = convert_32_to_uintN_c; + conv_function = avx ? convert_32_to_uintN_c_avx : convert_32_to_uintN_c; conv_function_a = conv_function; } else From 8af39fa6c0de24621ef665815716d1f3d778bbcb Mon Sep 17 00:00:00 2001 From: Pinterf Date: Wed, 12 Oct 2016 16:33:52 +0200 Subject: [PATCH 101/120] cmakelist.txt: set special avx/avx2 options for files *_avx.cpp and *_avx2.cpp --- avs_core/CMakeLists.txt | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/avs_core/CMakeLists.txt b/avs_core/CMakeLists.txt index a6541157b..74167e421 100644 --- a/avs_core/CMakeLists.txt +++ b/avs_core/CMakeLists.txt @@ -25,6 +25,14 @@ foreach(FILE ${AvsCore_Sources}) source_group("${GROUP}" FILES "${FILE}") endforeach() +# special AVX option for source files with *_avx.cpp pattern +file(GLOB_RECURSE SRCS_AVX "*_avx.cpp") +set_source_files_properties(${SRCS_AVX} PROPERTIES COMPILE_FLAGS " /arch:AVX ") + +# special AVX2 option for source files with *_avx2.cpp pattern +file(GLOB_RECURSE SRCS_AVX2 "*_avx2.cpp") +set_source_files_properties(${SRCS_AVX2} PROPERTIES COMPILE_FLAGS " /arch:AVX2 ") + # Specify include directories target_include_directories("AvsCore" PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include) # Specify preprocessor definitions From ffe50717199374196002864aecfe4997ccd08235 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Wed, 12 Oct 2016 16:35:58 +0200 Subject: [PATCH 102/120] Test for fixing "Only a single prefetcher is allowed per script." --- avs_core/core/avisynth.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/avs_core/core/avisynth.cpp b/avs_core/core/avisynth.cpp index 267b111f7..ec39065a3 100644 --- a/avs_core/core/avisynth.cpp +++ b/avs_core/core/avisynth.cpp @@ -2622,7 +2622,10 @@ success:; // Determine MT-mode, as if this instance had not called Invoke() // in its constructor. Note that this is not necessary the final // MT-mode. - if (fret.IsClip()) + // PF 161012 hack(?) don't call if prefetch. If effective mt mode is MT_MULTI, then + // Prefetch create gets called again + // Prefetch is activated above in: fret = funcCtor->InstantiateFilter(); + if (fret.IsClip() && strcmp(f->name, "Prefetch")) { const PClip &clip = fret.AsClip(); From 0b440c5d844c1b53fc7be5aef95809fc35bb02e9 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Wed, 12 Oct 2016 16:37:54 +0200 Subject: [PATCH 103/120] Comments in main cmakelist.txt --- CMakeLists.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 01c80ce4a..8a1446639 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -36,6 +36,7 @@ IF( MSVC_IDE ) # Check for Visual Studio # Enable C++ with SEH exceptions set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /EHa") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHa") + # or add_compile_options( /EHa ) for CMake>=3? # Prevent VC++ from complaining about not using MS-specific functions add_definitions("/D _CRT_SECURE_NO_WARNINGS /D _SECURE_SCL=0") @@ -48,8 +49,9 @@ IF( MSVC_IDE ) # Check for Visual Studio if(CMAKE_SIZEOF_VOID_P EQUAL 4) # VC++ enables the SSE2 instruction set by default even on 32-bits. Step back a bit. add_definitions("/arch:SSE") + #add_definitions("/arch:SSE2") # Better use this one, it's 2016 now endif() - + # Set additional optimization flags set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /Oy /Ot /GS-") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Oy /Ot /GS-") From 87ce6b47089590f216bb7662e59c50a722cb6422 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Thu, 13 Oct 2016 12:38:53 +0200 Subject: [PATCH 104/120] Fix some size_t warnings for x64 --- avs_core/convert/convert.cpp | 2 +- avs_core/filters/focus.cpp | 4 ++-- avs_core/filters/levels.cpp | 2 +- avs_core/filters/text-overlay.cpp | 5 ++++- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/avs_core/convert/convert.cpp b/avs_core/convert/convert.cpp index 606fe7b56..cacec9f54 100644 --- a/avs_core/convert/convert.cpp +++ b/avs_core/convert/convert.cpp @@ -1951,7 +1951,7 @@ AVSValue __cdecl ConvertBits::Create(AVSValue args, void* user_data, IScriptEnvi const VideoInfo &vi = clip->GetVideoInfo(); - intptr_t create_param = (int)reinterpret_cast(user_data); + int create_param = (int)reinterpret_cast(user_data); // float range parameter float float_range = (float)args[4].AsFloat(1.0f); diff --git a/avs_core/filters/focus.cpp b/avs_core/filters/focus.cpp index e29af70bd..5e2411dc4 100644 --- a/avs_core/filters/focus.cpp +++ b/avs_core/filters/focus.cpp @@ -410,9 +410,9 @@ static void af_horizontal_rgb32_64_c(BYTE* dstp8, size_t height, size_t pitch8, weight_t outer_weight = 32768-half_amount; // (1-1/2^_amount)/2 32768 = 0.5 pixel_t* dstp = reinterpret_cast(dstp8); - int pitch = pitch8 / sizeof(pixel_t); + size_t pitch = pitch8 / sizeof(pixel_t); - for (int y = height; y>0; --y) + for (size_t y = height; y>0; --y) { pixel_t b_left = dstp[0]; pixel_t g_left = dstp[1]; diff --git a/avs_core/filters/levels.cpp b/avs_core/filters/levels.cpp index 24f155b1b..fa0d9ef34 100644 --- a/avs_core/filters/levels.cpp +++ b/avs_core/filters/levels.cpp @@ -190,7 +190,7 @@ Levels::Levels(PClip _child, int in_min, double gamma, int in_max, int out_min, map = nullptr; if(use_lut) { auto env2 = static_cast(env); - size_t number_of_maps = vi.IsYUV() || vi.IsYUVA() ? 2 : 1; + int number_of_maps = vi.IsYUV() || vi.IsYUVA() ? 2 : 1; int bufsize = pixelsize * real_lookup_size * scale * number_of_maps; map = static_cast(env2->Allocate(bufsize , 16, AVS_NORMAL_ALLOC)); if (!map) diff --git a/avs_core/filters/text-overlay.cpp b/avs_core/filters/text-overlay.cpp index 7eb5d4f70..923be80a0 100644 --- a/avs_core/filters/text-overlay.cpp +++ b/avs_core/filters/text-overlay.cpp @@ -1394,6 +1394,7 @@ PVideoFrame FilterInfo::GetFrame(int n, IScriptEnvironment* env) // More flexible way: get text extent RECT r; +#if 0 if(false && !font_override) { // To prevent slowish full MxN rendering, we calculate a dummy @@ -1422,7 +1423,9 @@ PVideoFrame FilterInfo::GetFrame(int n, IScriptEnvironment* env) DrawText(hdcAntialias, s_horiz.c_str(), -1, &r0_h, DT_CALCRECT); // and use the width and height dimensions from the two results r = { 32, 16, min(32+(int)r0_h.right,vi.width * 8-1), min(16+int(r0_v.bottom), vi.height*8-1) }; // do not crop if larger font is used - } else { + } else +#endif + { // font was overridden, may not be fixed type RECT r0 = { 0, 0, 100, 100 }; // do not crop if larger font is used DrawText(hdcAntialias, text, -1, &r0, DT_CALCRECT); From a289325293233b6917e1c3111202794ceb32c9ab Mon Sep 17 00:00:00 2001 From: Pinterf Date: Thu, 13 Oct 2016 14:34:58 +0200 Subject: [PATCH 105/120] Overlay: Subtract 10-16 bit. Unify with "Add" --- avs_core/filters/overlay/OF_add.cpp | 127 +++++++++++++++----- avs_core/filters/overlay/OF_subtract.cpp | 3 + avs_core/filters/overlay/overlay.cpp | 56 ++++++--- avs_core/filters/overlay/overlay.h | 4 +- avs_core/filters/overlay/overlayfunctions.h | 23 +++- 5 files changed, 164 insertions(+), 49 deletions(-) diff --git a/avs_core/filters/overlay/OF_add.cpp b/avs_core/filters/overlay/OF_add.cpp index 61e9fbe90..8595881f0 100644 --- a/avs_core/filters/overlay/OF_add.cpp +++ b/avs_core/filters/overlay/OF_add.cpp @@ -40,21 +40,43 @@ #include void OL_AddImage::DoBlendImageMask(Image444* base, Image444* overlay, Image444* mask) { - if (bits_per_pixel == 8) - BlendImageMask(base, overlay, mask); - else if(bits_per_pixel <= 16) - BlendImageMask(base, overlay, mask); - //else if(bits_per_pixel == 32) - // BlendImageMask(base, overlay, mask); + if(of_mode == OF_Add) { + if (bits_per_pixel == 8) + BlendImageMask(base, overlay, mask); + else if(bits_per_pixel <= 16) + BlendImageMask(base, overlay, mask); + //else if(bits_per_pixel == 32) + // BlendImageMask(base, overlay, mask); + } + else { + // OF_Subtract + if (bits_per_pixel == 8) + BlendImageMask(base, overlay, mask); + else if(bits_per_pixel <= 16) + BlendImageMask(base, overlay, mask); + //else if(bits_per_pixel == 32) + // BlendImageMask(base, overlay, mask); + } } void OL_AddImage::DoBlendImage(Image444* base, Image444* overlay) { - if (bits_per_pixel == 8) - BlendImageMask(base, overlay, nullptr); - else if(bits_per_pixel <= 16) - BlendImageMask(base, overlay, nullptr); - //else if(bits_per_pixel == 32) - // BlendImage(base, overlay); + if(of_mode == OF_Add) { + if (bits_per_pixel == 8) + BlendImageMask(base, overlay, nullptr); + else if(bits_per_pixel <= 16) + BlendImageMask(base, overlay, nullptr); + //else if(bits_per_pixel == 32) + // BlendImage(base, overlay); + } + else { + // OF_Subtract + if (bits_per_pixel == 8) + BlendImageMask(base, overlay, nullptr); + else if(bits_per_pixel <= 16) + BlendImageMask(base, overlay, nullptr); + //else if(bits_per_pixel == 32) + // BlendImage(base, overlay); + } } /* @@ -137,7 +159,7 @@ void OL_AddImage::BlendImageMask(Image444* base, Image444* overlay, Image444* ma } */ -template +template void OL_AddImage::BlendImageMask(Image444* base, Image444* overlay, Image444* mask) { pixel_t* baseY = reinterpret_cast(base->GetPtr(PLANAR_Y)); @@ -171,14 +193,29 @@ void OL_AddImage::BlendImageMask(Image444* base, Image444* overlay, Image444* ma if (opacity == 256) { for (int y = 0; y < h; y++) { for (int x = 0; x < w; x++) { - int Y = baseY[x] + (maskMode ? (((result_t)ovY[x] * maskY[x]) >> MASK_CORR_SHIFT) : ovY[x]); - int U = baseU[x] + (int)(maskMode ? ((((result_t)half_pixel_value*(pixel_range - maskU[x])) + ((result_t)maskU[x] * ovU[x])) >> MASK_CORR_SHIFT) : ovU[x]) - half_pixel_value; - int V = baseV[x] + (int)(maskMode ? ((((result_t)half_pixel_value*(pixel_range - maskV[x])) + ((result_t)maskV[x] * ovV[x])) >> MASK_CORR_SHIFT) : ovV[x]) - half_pixel_value; - if (Y>max_pixel_value) { // Apply overbrightness to UV - int multiplier = max(0,pixel_range + over32 -Y); // 0 to 32 - U = ((U*multiplier) + (half_pixel_value*(over32-multiplier)))>>SHIFT; - V = ((V*multiplier) + (half_pixel_value*(over32-multiplier)))>>SHIFT; - Y = max_pixel_value; + int Y, U, V; + if (of_add) { + Y = baseY[x] + (maskMode ? (((result_t)ovY[x] * maskY[x]) >> MASK_CORR_SHIFT) : ovY[x]); + U = baseU[x] + (int)(maskMode ? ((((result_t)half_pixel_value*(pixel_range - maskU[x])) + ((result_t)maskU[x] * ovU[x])) >> MASK_CORR_SHIFT) : ovU[x]) - half_pixel_value; + V = baseV[x] + (int)(maskMode ? ((((result_t)half_pixel_value*(pixel_range - maskV[x])) + ((result_t)maskV[x] * ovV[x])) >> MASK_CORR_SHIFT) : ovV[x]) - half_pixel_value; + if (Y>max_pixel_value) { // Apply overbrightness to UV + int multiplier = max(0,pixel_range + over32 -Y); // 0 to 32 + U = ((U*( multiplier)) + (half_pixel_value*(over32-multiplier)))>>SHIFT; + V = ((V*( multiplier)) + (half_pixel_value*(over32-multiplier)))>>SHIFT; + Y = max_pixel_value; + } + } + else { + // of_subtract + Y = baseY[x] - (maskMode ? (((result_t)ovY[x] * maskY[x]) >> MASK_CORR_SHIFT) : ovY[x]); + U = baseU[x] - (int)(maskMode ? ((((result_t)half_pixel_value*(pixel_range - maskU[x])) + ((result_t)maskU[x] * ovU[x])) >> MASK_CORR_SHIFT) : ovU[x]) + half_pixel_value; + V = baseV[x] - (int)(maskMode ? ((((result_t)half_pixel_value*(pixel_range - maskV[x])) + ((result_t)maskV[x] * ovV[x])) >> MASK_CORR_SHIFT) : ovV[x]) + half_pixel_value; + if (Y<0) { // Apply overbrightness to UV + int multiplier = min(-Y,over32); // 0 to 32 + U = ((U*(over32 - multiplier)) + (half_pixel_value*( multiplier)))>>SHIFT; + V = ((V*(over32 - multiplier)) + (half_pixel_value*( multiplier)))>>SHIFT; + Y = 0; + } } baseU[x] = (pixel_t)clamp(U, 0, max_pixel_value); baseV[x] = (pixel_t)clamp(V, 0, max_pixel_value); @@ -201,23 +238,49 @@ void OL_AddImage::BlendImageMask(Image444* base, Image444* overlay, Image444* ma } else { for (int y = 0; y < h; y++) { for (int x = 0; x < w; x++) { - int Y = baseY[x] + (maskMode ? (((result_t)maskY[x] * opacity*ovY[x]) >> (OPACITY_SHIFT + MASK_CORR_SHIFT)) : ((opacity*ovY[x]) >> OPACITY_SHIFT)); - int U, V; + int Y, U, V; + if(of_add) + Y = baseY[x] + (maskMode ? (((result_t)maskY[x] * opacity*ovY[x]) >> (OPACITY_SHIFT + MASK_CORR_SHIFT)) : ((opacity*ovY[x]) >> OPACITY_SHIFT)); + else + Y = baseY[x] - (maskMode ? (((result_t)maskY[x] * opacity*ovY[x]) >> (OPACITY_SHIFT + MASK_CORR_SHIFT)) : ((opacity*ovY[x]) >> OPACITY_SHIFT)); if (maskMode) { result_t mU = (maskU[x] * opacity) >> OPACITY_SHIFT; result_t mV = (maskV[x] * opacity) >> OPACITY_SHIFT; - U = baseU[x] + (int)(((half_pixel_value*(pixel_range - mU)) + (mU*ovU[x])) >> MASK_CORR_SHIFT) - half_pixel_value; - V = baseV[x] + (int)(((half_pixel_value*(pixel_range - mV)) + (mV*ovV[x])) >> MASK_CORR_SHIFT) - half_pixel_value; + if(of_add) { + U = baseU[x] + (int)(((half_pixel_value*(pixel_range - mU)) + (mU*ovU[x])) >> MASK_CORR_SHIFT) - half_pixel_value; + V = baseV[x] + (int)(((half_pixel_value*(pixel_range - mV)) + (mV*ovV[x])) >> MASK_CORR_SHIFT) - half_pixel_value; + } + else { + U = baseU[x] - (int)(((half_pixel_value*(pixel_range - mU)) + (mU*ovU[x])) >> MASK_CORR_SHIFT) + half_pixel_value; + V = baseV[x] - (int)(((half_pixel_value*(pixel_range - mV)) + (mV*ovV[x])) >> MASK_CORR_SHIFT) + half_pixel_value; + } } else { - U = baseU[x] + (((half_pixel_value*inv_opacity)+(opacity*(ovU[x])))>>OPACITY_SHIFT) - half_pixel_value; - V = baseV[x] + (((half_pixel_value*inv_opacity)+(opacity*(ovV[x])))>>OPACITY_SHIFT) - half_pixel_value; + if(of_add) { + U = baseU[x] + (((half_pixel_value*inv_opacity)+(opacity*(ovU[x])))>>OPACITY_SHIFT) - half_pixel_value; + V = baseV[x] + (((half_pixel_value*inv_opacity)+(opacity*(ovV[x])))>>OPACITY_SHIFT) - half_pixel_value; + } + else { + U = baseU[x] - (((half_pixel_value*inv_opacity)+(opacity*(ovU[x])))>>OPACITY_SHIFT) + half_pixel_value; + V = baseV[x] - (((half_pixel_value*inv_opacity)+(opacity*(ovV[x])))>>OPACITY_SHIFT) + half_pixel_value; + } } - if (Y>max_pixel_value) { // Apply overbrightness to UV - int multiplier = max(0,(max_pixel_value + 1) + over32 - Y); // 288-Y : 0 to 32 - U = ((U*multiplier) + (half_pixel_value*(over32 - multiplier))) >> SHIFT; - V = ((V*multiplier) + (half_pixel_value*(over32 - multiplier))) >> SHIFT; - Y = max_pixel_value; + if(of_add) { + if (Y>max_pixel_value) { // Apply overbrightness to UV + int multiplier = max(0,(max_pixel_value + 1) + over32 - Y); // 288-Y : 0 to 32 + U = ((U*multiplier) + (half_pixel_value*(over32 - multiplier))) >> SHIFT; + V = ((V*multiplier) + (half_pixel_value*(over32 - multiplier))) >> SHIFT; + Y = max_pixel_value; + } + } + else { + // of_subtract + if (Y<0) { // Apply overbrightness to UV + int multiplier = min(-Y,over32); // 0 to 32 + U = ((U*(over32 - multiplier)) + (half_pixel_value*( multiplier)))>>SHIFT; + V = ((V*(over32 - multiplier)) + (half_pixel_value*( multiplier)))>>SHIFT; + Y = 0; + } } baseU[x] = (pixel_t)clamp(U, 0, max_pixel_value); baseV[x] = (pixel_t)clamp(V, 0, max_pixel_value); diff --git a/avs_core/filters/overlay/OF_subtract.cpp b/avs_core/filters/overlay/OF_subtract.cpp index 033c3b75c..7e5fdaa65 100644 --- a/avs_core/filters/overlay/OF_subtract.cpp +++ b/avs_core/filters/overlay/OF_subtract.cpp @@ -39,6 +39,8 @@ #include +#if 0 +// Common with OF_Add void OL_SubtractImage::DoBlendImageMask(Image444* base, Image444* overlay, Image444* mask) { if (bits_per_pixel == 8) BlendImageMask(base, overlay, mask); @@ -202,4 +204,5 @@ void OL_SubtractImage::BlendImage(Image444* base, Image444* overlay) { } } } +#endif diff --git a/avs_core/filters/overlay/overlay.cpp b/avs_core/filters/overlay/overlay.cpp index 5a7787918..2db748722 100644 --- a/avs_core/filters/overlay/overlay.cpp +++ b/avs_core/filters/overlay/overlay.cpp @@ -482,9 +482,10 @@ PVideoFrame __stdcall Overlay::GetFrame(int n, IScriptEnvironment *env) { ClipFrames(img, maskImg, offset_x + con_x_offset, offset_y + con_y_offset); } - OverlayFunction* func = SelectFunction(name, env); + OverlayFunction* func = SelectFunction(name, of_mode, env); // Process the image + func->setMode(of_mode); func->setBitsPerPixel(bits_per_pixel); func->setOpacity(opacity + op_offset); func->setEnv(env); @@ -560,43 +561,68 @@ PVideoFrame __stdcall Overlay::GetFrame(int n, IScriptEnvironment *env) { *************************/ -OverlayFunction* Overlay::SelectFunction(const char* name, IScriptEnvironment* env) { +OverlayFunction* Overlay::SelectFunction(const char* name, int &of_mode, IScriptEnvironment* env) { - if (!lstrcmpi(name, "Blend")) + if (!lstrcmpi(name, "Blend")) { + of_mode = OF_Blend; return new OL_BlendImage(); + } - if (!lstrcmpi(name, "Add")) + if (!lstrcmpi(name, "Add")) { + of_mode = OF_Add; return new OL_AddImage(); + } - if (!lstrcmpi(name, "Subtract")) - return new OL_SubtractImage(); + if (!lstrcmpi(name, "Subtract")) { + of_mode = OF_Subtract; + //return new OL_SubtractImage(); + return new OL_AddImage(); // common with Add + } - if (!lstrcmpi(name, "Multiply")) + if (!lstrcmpi(name, "Multiply")) { + of_mode = OF_Multiply; return new OL_MultiplyImage(); + } - if (!lstrcmpi(name, "Chroma")) + if (!lstrcmpi(name, "Chroma")) { + of_mode = OF_Chroma; return new OL_BlendChromaImage(); + } - if (!lstrcmpi(name, "Luma")) + if (!lstrcmpi(name, "Luma")) { + of_mode = OF_Luma; return new OL_BlendLumaImage(); + } - if (!lstrcmpi(name, "Lighten")) + if (!lstrcmpi(name, "Lighten")) { + of_mode = OF_Lighten; return new OL_LightenImage(); + } - if (!lstrcmpi(name, "Darken")) + if (!lstrcmpi(name, "Darken")) { + of_mode = OF_Darken; return new OL_DarkenImage(); + } - if (!lstrcmpi(name, "SoftLight")) + if (!lstrcmpi(name, "SoftLight")) { + of_mode = OF_SoftLight; return new OL_SoftLightImage(); + } - if (!lstrcmpi(name, "HardLight")) + if (!lstrcmpi(name, "HardLight")) { + of_mode = OF_HardLight; return new OL_HardLightImage(); + } - if (!lstrcmpi(name, "Difference")) + if (!lstrcmpi(name, "Difference")) { + of_mode = OF_Difference; return new OL_DifferenceImage(); + } - if (!lstrcmpi(name, "Exclusion")) + if (!lstrcmpi(name, "Exclusion")) { + of_mode = OF_Exclusion; return new OL_ExclusionImage(); + } env->ThrowError("Overlay: Invalid 'Mode' specified."); return 0; diff --git a/avs_core/filters/overlay/overlay.h b/avs_core/filters/overlay/overlay.h index 9389dcbcb..fb038e74c 100644 --- a/avs_core/filters/overlay/overlay.h +++ b/avs_core/filters/overlay/overlay.h @@ -60,7 +60,7 @@ class Overlay : public GenericVideoFilter } private: - static OverlayFunction* SelectFunction(const char* name, IScriptEnvironment* env); + static OverlayFunction* SelectFunction(const char* name, int &of_mode, IScriptEnvironment* env); #if 0 ConvertFrom444* SelectOutputCS(const char* name, IScriptEnvironment* env); static ConvertTo444* SelectInputCS(VideoInfo* VidI, IScriptEnvironment* env, bool full_range); @@ -97,6 +97,8 @@ class Overlay : public GenericVideoFilter int pixelsize; int bits_per_pixel; + int of_mode; + }; diff --git a/avs_core/filters/overlay/overlayfunctions.h b/avs_core/filters/overlay/overlayfunctions.h index ed8eacd22..c566ac8c9 100644 --- a/avs_core/filters/overlay/overlayfunctions.h +++ b/avs_core/filters/overlay/overlayfunctions.h @@ -42,6 +42,21 @@ #include "imghelpers.h" #include "blend_common.h" +enum { + OF_Blend = 0, + OF_Add, + OF_Subtract, + OF_Multiply, + OF_Chroma, + OF_Luma, + OF_Lighten, + OF_Darken, + OF_SoftLight, + OF_HardLight, + OF_Difference, + OF_Exclusion +}; + class OverlayFunction { public: OverlayFunction() { @@ -49,12 +64,14 @@ class OverlayFunction { void setOpacity(int _opacity) { opacity = clamp(_opacity,0,256); inv_opacity = 256-opacity; } void setEnv(IScriptEnvironment *_env) { env = _env;} void setBitsPerPixel(int _bits_per_pixel) { bits_per_pixel = _bits_per_pixel; } + void setMode(int _of_mode) { of_mode = _of_mode; } virtual void DoBlendImage(Image444* base, Image444* overlay) = 0; virtual void DoBlendImageMask(Image444* base, Image444* overlay, Image444* mask) = 0; protected: int opacity; int inv_opacity; int bits_per_pixel; + int of_mode; // add/subtract, etc IScriptEnvironment *env; }; @@ -68,15 +85,18 @@ class OL_BlendImage : public OverlayFunction { private: }; +// common add/subtract class OL_AddImage : public OverlayFunction { void DoBlendImage(Image444* base, Image444* overlay); void DoBlendImageMask(Image444* base, Image444* overlay, Image444* mask); //template //void BlendImage(Image444* base, Image444* overlay); - template + template void BlendImageMask(Image444* base, Image444* overlay, Image444* mask); }; +#if 0 +// common with Add class OL_SubtractImage : public OverlayFunction { void DoBlendImage(Image444* base, Image444* overlay); void DoBlendImageMask(Image444* base, Image444* overlay, Image444* mask); @@ -85,6 +105,7 @@ class OL_SubtractImage : public OverlayFunction { template void BlendImageMask(Image444* base, Image444* overlay, Image444* mask); }; +#endif class OL_MultiplyImage : public OverlayFunction { void DoBlendImage(Image444* base, Image444* overlay); From 384f10d70bb42c42209769aa18fbad4ffa0eb91d Mon Sep 17 00:00:00 2001 From: Pinterf Date: Thu, 13 Oct 2016 16:57:30 +0200 Subject: [PATCH 106/120] Overlay: Darken/Lighten to 10-16 bit --- avs_core/filters/overlay/OF_darken.cpp | 206 ++++++++++++++------ avs_core/filters/overlay/OF_lighten.cpp | 5 +- avs_core/filters/overlay/blend_common.cpp | 65 ++++-- avs_core/filters/overlay/blend_common.h | 2 + avs_core/filters/overlay/overlay.cpp | 3 +- avs_core/filters/overlay/overlayfunctions.h | 9 +- 6 files changed, 210 insertions(+), 80 deletions(-) diff --git a/avs_core/filters/overlay/OF_darken.cpp b/avs_core/filters/overlay/OF_darken.cpp index 089f4945c..26c7f53cc 100644 --- a/avs_core/filters/overlay/OF_darken.cpp +++ b/avs_core/filters/overlay/OF_darken.cpp @@ -37,91 +37,186 @@ #include "overlayfunctions.h" #include +#include void OL_DarkenImage::DoBlendImageMask(Image444* base, Image444* overlay, Image444* mask) { - if (bits_per_pixel == 8) - BlendImageMask(base, overlay, mask); - //else if(bits_per_pixel == 32) - // BlendImageMask(base, overlay, mask); - else if(bits_per_pixel == 16) - BlendImageMask(base, overlay, mask); + if(of_mode == OF_Darken) { + if (bits_per_pixel == 8) + BlendImageMask(base, overlay, mask); + else if(bits_per_pixel <= 16) + BlendImageMask(base, overlay, mask); + //else if(bits_per_pixel == 32) + // BlendImageMask(base, overlay, mask); + } + else { + // OF_Lighten + if (bits_per_pixel == 8) + BlendImageMask(base, overlay, mask); + else if(bits_per_pixel <= 16) + BlendImageMask(base, overlay, mask); + //else if(bits_per_pixel == 32) + // BlendImageMask(base, overlay, mask); + } } void OL_DarkenImage::DoBlendImage(Image444* base, Image444* overlay) { - if (bits_per_pixel == 8) - BlendImage(base, overlay); - //else if(bits_per_pixel == 32) - // BlendImage(base, overlay); - else if(bits_per_pixel == 16) - BlendImage(base, overlay); + if(of_mode == OF_Darken) { + if (bits_per_pixel == 8) + BlendImageMask(base, overlay, nullptr); + else if(bits_per_pixel <= 16) + BlendImageMask(base, overlay, nullptr); + //else if(bits_per_pixel == 32) + // BlendImageMask(base, overlay, mask); + } + else { + // OF_Lighten + if (bits_per_pixel == 8) + BlendImageMask(base, overlay, nullptr); + else if(bits_per_pixel <= 16) + BlendImageMask(base, overlay, nullptr); + //else if(bits_per_pixel == 32) + // BlendImageMask(base, overlay, mask); + } } -template +template void OL_DarkenImage::BlendImageMask(Image444* base, Image444* overlay, Image444* mask) { - BYTE* baseY = base->GetPtr(PLANAR_Y); - BYTE* baseU = base->GetPtr(PLANAR_U); - BYTE* baseV = base->GetPtr(PLANAR_V); - BYTE* ovY = overlay->GetPtr(PLANAR_Y); - BYTE* ovU = overlay->GetPtr(PLANAR_U); - BYTE* ovV = overlay->GetPtr(PLANAR_V); + pixel_t* baseY = reinterpret_cast(base->GetPtr(PLANAR_Y)); + pixel_t* baseU = reinterpret_cast(base->GetPtr(PLANAR_U)); + pixel_t* baseV = reinterpret_cast(base->GetPtr(PLANAR_V)); + + pixel_t* ovY = reinterpret_cast(overlay->GetPtr(PLANAR_Y)); + pixel_t* ovU = reinterpret_cast(overlay->GetPtr(PLANAR_U)); + pixel_t* ovV = reinterpret_cast(overlay->GetPtr(PLANAR_V)); + + pixel_t* maskY = maskMode ? reinterpret_cast(mask->GetPtr(PLANAR_Y)) : nullptr; + pixel_t* maskU = maskMode ? reinterpret_cast(mask->GetPtr(PLANAR_U)) : nullptr; + pixel_t* maskV = maskMode ? reinterpret_cast(mask->GetPtr(PLANAR_V)) : nullptr; + + const int half_pixel_value = (sizeof(pixel_t) == 1) ? 128 : (1 << (bits_per_pixel - 1)); + const int max_pixel_value = (sizeof(pixel_t) == 1) ? 255 : (1 << bits_per_pixel) - 1; + const int pixel_range = max_pixel_value + 1; + const int MASK_CORR_SHIFT = (sizeof(pixel_t) == 1) ? 8 : bits_per_pixel; + const int OPACITY_SHIFT = 8; // opacity always max 0..256 + const int basepitch = (base->pitch) / sizeof(pixel_t); + const int overlaypitch = (overlay->pitch) / sizeof(pixel_t); + const int maskpitch = maskMode ? (mask->pitch) / sizeof(pixel_t) : 0; + + // avoid "uint16*uint16 can't get into int32" overflows + typedef std::conditional < sizeof(pixel_t) == 1, int, typename std::conditional < sizeof(pixel_t) == 2, __int64, float>::type >::type result_t; - BYTE* maskY = mask->GetPtr(PLANAR_Y); - BYTE* maskU = mask->GetPtr(PLANAR_U); - BYTE* maskV = mask->GetPtr(PLANAR_V); int w = base->w(); int h = base->h(); if (opacity == 256) { - for (int y = 0; y < h; y++) { - for (int x = 0; x < w; x++) { - if (ovY[x] < baseY[x] ) { - baseY[x] = (BYTE)((((256-maskY[x])*baseY[x]) + (maskY[x]*ovY[x]+128))>>8); - baseU[x] = (BYTE)((((256-maskU[x])*baseU[x]) + (maskU[x]*ovU[x]+128))>>8); - baseV[x] = (BYTE)((((256-maskV[x])*baseV[x]) + (maskV[x]*ovV[x]+128))>>8); + if(maskMode) { + // opacity == 256 && maskMode + for (int y = 0; y < h; y++) { + for (int x = 0; x < w; x++) { + bool cmp; + if (of_darken) + cmp = ovY[x] < baseY[x]; + else + cmp = ovY[x] > baseY[x]; + if (cmp) { + result_t maskYx = maskY[x]; + result_t maskUx = maskU[x]; + result_t maskVx = maskV[x]; + baseY[x] = (pixel_t)((((pixel_range - maskYx)*baseY[x]) + (maskYx * ovY[x] + half_pixel_value)) >> MASK_CORR_SHIFT); + baseU[x] = (pixel_t)((((pixel_range - maskUx)*baseU[x]) + (maskUx * ovU[x] + half_pixel_value)) >> MASK_CORR_SHIFT); + baseV[x] = (pixel_t)((((pixel_range - maskVx)*baseV[x]) + (maskVx * ovV[x] + half_pixel_value)) >> MASK_CORR_SHIFT); + } } - } - maskY += mask->pitch; - maskU += mask->pitch; - maskV += mask->pitch; + maskY += maskpitch; + maskU += maskpitch; + maskV += maskpitch; - baseY += base->pitch; - baseU += base->pitch; - baseV += base->pitch; + baseY += basepitch; + baseU += basepitch; + baseV += basepitch; - ovY += overlay->pitch; - ovU += overlay->pitch; - ovV += overlay->pitch; + ovY += overlaypitch; + ovU += overlaypitch; + ovV += overlaypitch; + } + } else { + // opacity == 256 && !maskMode + if(of_darken) { + if (sizeof(pixel_t)==1 && (env->GetCPUFlags() & CPUF_SSE4_1)) { + overlay_darken_sse41((BYTE *)baseY, (BYTE *)baseU, (BYTE *)baseV, (BYTE *)ovY, (BYTE *)ovU, (BYTE *)ovV, basepitch, overlaypitch, w, h); + } else if (sizeof(pixel_t)==1 && (env->GetCPUFlags() & CPUF_SSE2)) { + overlay_darken_sse2((BYTE *)baseY, (BYTE *)baseU, (BYTE *)baseV, (BYTE *)ovY, (BYTE *)ovU, (BYTE *)ovV, basepitch, overlaypitch, w, h); + } else + #ifdef X86_32 + if (sizeof(pixel_t)==1 && (env->GetCPUFlags() & CPUF_MMX)) { + overlay_darken_mmx((BYTE *)baseY, (BYTE *)baseU, (BYTE *)baseV, (BYTE *)ovY, (BYTE *)ovU, (BYTE *)ovV, basepitch, overlaypitch, w, h); + } else + #endif + { + overlay_darken_c((BYTE *)baseY, (BYTE *)baseU, (BYTE *)baseV, (BYTE *)ovY, (BYTE *)ovU, (BYTE *)ovV, basepitch, basepitch, w, h); + } + } else { + // OF_Lighten + if (sizeof(pixel_t)==1 && (env->GetCPUFlags() & CPUF_SSE4_1)) { + overlay_lighten_sse41((BYTE *)baseY, (BYTE *)baseU, (BYTE *)baseV, (BYTE *)ovY, (BYTE *)ovU, (BYTE *)ovV, basepitch, basepitch, w, h); + } else if (sizeof(pixel_t)==1 && (env->GetCPUFlags() & CPUF_SSE2)) { + overlay_lighten_sse2((BYTE *)baseY, (BYTE *)baseU, (BYTE *)baseV, (BYTE *)ovY, (BYTE *)ovU, (BYTE *)ovV, basepitch, basepitch, w, h); + } else +#ifdef X86_32 + if (sizeof(pixel_t)==1 && (env->GetCPUFlags() & CPUF_MMX)) { + overlay_lighten_mmx((BYTE *)baseY, (BYTE *)baseU, (BYTE *)baseV, (BYTE *)ovY, (BYTE *)ovU, (BYTE *)ovV, basepitch, basepitch, w, h); + } else +#endif + { + overlay_lighten_c((BYTE *)baseY, (BYTE *)baseU, (BYTE *)baseV, (BYTE *)ovY, (BYTE *)ovU, (BYTE *)ovV, basepitch, basepitch, w, h); + } + } } } else { + // opacity != 256 && maskMode for (int y = 0; y < h; y++) { for (int x = 0; x < w; x++) { - if (ovY[x] < baseY[x] ) { - int mY = (maskY[x] * opacity)>>8; - int mU = (maskU[x] * opacity)>>8; - int mV = (maskV[x] * opacity)>>8; - baseY[x] = (BYTE)((((256-mY)*baseY[x]) + (mY*ovY[x]+128))>>8); - baseU[x] = (BYTE)((((256-mU)*baseU[x]) + (mU*ovU[x]+128))>>8); - baseV[x] = (BYTE)((((256-mV)*baseV[x]) + (mV*ovV[x]+128))>>8); + bool cmp; + if (of_darken) + cmp = ovY[x] < baseY[x]; + else + cmp = ovY[x] > baseY[x]; + if (cmp) { + if (maskMode) { + result_t mY = (maskY[x] * opacity) >> OPACITY_SHIFT; + result_t mU = (maskU[x] * opacity) >> OPACITY_SHIFT; + result_t mV = (maskV[x] * opacity) >> OPACITY_SHIFT; + baseY[x] = (pixel_t)((((pixel_range - mY)*baseY[x]) + (mY*ovY[x] + half_pixel_value)) >> MASK_CORR_SHIFT); + baseU[x] = (pixel_t)((((pixel_range - mU)*baseU[x]) + (mU*ovU[x] + half_pixel_value)) >> MASK_CORR_SHIFT); + baseV[x] = (pixel_t)((((pixel_range - mV)*baseV[x]) + (mV*ovV[x] + half_pixel_value)) >> MASK_CORR_SHIFT); + } + else { + baseY[x] = (pixel_t)(((inv_opacity*baseY[x]) + (opacity*ovY[x] + 128)) >> OPACITY_SHIFT); // 128: half 256 opacity mul rounding + baseU[x] = (pixel_t)(((inv_opacity*baseU[x]) + (opacity*ovU[x] + 128)) >> OPACITY_SHIFT); + baseV[x] = (pixel_t)(((inv_opacity*baseV[x]) + (opacity*ovV[x] + 128)) >> OPACITY_SHIFT); + } } } - baseY += base->pitch; - baseU += base->pitch; - baseV += base->pitch; + baseY += basepitch; + baseU += basepitch; + baseV += basepitch; - ovY += overlay->pitch; - ovU += overlay->pitch; - ovV += overlay->pitch; + ovY += overlaypitch; + ovU += overlaypitch; + ovV += overlaypitch; - maskY += mask->pitch; - maskU += mask->pitch; - maskV += mask->pitch; + if(maskMode) { + maskY += maskpitch; + maskU += maskpitch; + maskV += maskpitch; + } } } - } +#if 0 template void OL_DarkenImage::BlendImage(Image444* base, Image444* overlay) { @@ -169,4 +264,5 @@ void OL_DarkenImage::BlendImage(Image444* base, Image444* overlay) { } } } +#endif diff --git a/avs_core/filters/overlay/OF_lighten.cpp b/avs_core/filters/overlay/OF_lighten.cpp index 1aa105fff..42dc57b9f 100644 --- a/avs_core/filters/overlay/OF_lighten.cpp +++ b/avs_core/filters/overlay/OF_lighten.cpp @@ -38,6 +38,7 @@ #include +#if 0 void OL_LightenImage::DoBlendImageMask(Image444* base, Image444* overlay, Image444* mask) { if (bits_per_pixel == 8) BlendImageMask(base, overlay, mask); @@ -147,7 +148,7 @@ void OL_LightenImage::BlendImage(Image444* base, Image444* overlay) { } else #endif { - overlay_lighten_c(baseY, baseU, baseV, ovY, ovU, ovV, base->pitch, overlay->pitch, w, h); + overlay_lighten_c(baseY, baseU, baseV, ovY, ovU, ovV, base->pitch, overlay->pitch, w, h); } } else { for (int y = 0; y < h; y++) { @@ -168,4 +169,4 @@ void OL_LightenImage::BlendImage(Image444* base, Image444* overlay) { } } } - +#endif diff --git a/avs_core/filters/overlay/blend_common.cpp b/avs_core/filters/overlay/blend_common.cpp index 52598f834..92f490937 100644 --- a/avs_core/filters/overlay/blend_common.cpp +++ b/avs_core/filters/overlay/blend_common.cpp @@ -41,6 +41,7 @@ // Intrinsics for SSE4.1, SSSE3, SSE3, SSE2, ISSE and MMX #include +#include /******************************* ********* Masked Blend ******** @@ -88,7 +89,8 @@ __forceinline static __m128i overlay_merge_mask_sse2(const __m128i& p1, const __ ********* Blend Opaque ********* ** Use for Lighten and Darken ** ********************************/ -__forceinline BYTE overlay_blend_opaque_c_core(const BYTE p1, const BYTE p2, const BYTE mask) { +template +__forceinline pixel_t overlay_blend_opaque_c_core(const pixel_t p1, const pixel_t p2, const pixel_t mask) { return (mask) ? p2 : p1; } @@ -481,16 +483,29 @@ typedef __m128i (OverlaySseCompare)(const __m128i&, const __m128i&, const __m128 #ifdef X86_32 typedef __m64 (OverlayMmxCompare)(const __m64&, const __m64&, const __m64&); #endif -typedef int (OverlayCCompare)(BYTE, BYTE); -template -__forceinline void overlay_darklighten_c(BYTE *p1Y, BYTE *p1U, BYTE *p1V, const BYTE *p2Y, const BYTE *p2U, const BYTE *p2V, int p1_pitch, int p2_pitch, int width, int height) { +typedef int (OverlayCCompare)(BYTE, BYTE); + +template compare*/> +__forceinline void overlay_darklighten_c(BYTE *p1Y_8, BYTE *p1U_8, BYTE *p1V_8, const BYTE *p2Y_8, const BYTE *p2U_8, const BYTE *p2V_8, int p1_pitch, int p2_pitch, int width, int height) { + pixel_t* p1Y = reinterpret_cast(p1Y_8); + pixel_t* p1U = reinterpret_cast(p1U_8); + pixel_t* p1V = reinterpret_cast(p1V_8); + + const pixel_t* p2Y = reinterpret_cast(p2Y_8); + const pixel_t* p2U = reinterpret_cast(p2U_8); + const pixel_t* p2V = reinterpret_cast(p2V_8); + + // pitches are already scaled + //p1_pitch /= sizeof(pixel_t); + //p2_pitch /= sizeof(pixel_t); + for (int y = 0; y < height; y++) { for (int x = 0; x < width; x++) { - int mask = compare(p1Y[x], p2Y[x]); - p1Y[x] = overlay_blend_opaque_c_core(p1Y[x], p2Y[x], mask); - p1U[x] = overlay_blend_opaque_c_core(p1U[x], p2U[x], mask); - p1V[x] = overlay_blend_opaque_c_core(p1V[x], p2V[x], mask); + int mask = darken ? (p2Y[x] <= p1Y[x]) : (p2Y[x] >= p1Y[x]); // compare(p1Y[x], p2Y[x]); + p1Y[x] = overlay_blend_opaque_c_core(p1Y[x], p2Y[x], mask); + p1U[x] = overlay_blend_opaque_c_core(p1U[x], p2U[x], mask); + p1V[x] = overlay_blend_opaque_c_core(p1V[x], p2V[x], mask); } p1Y += p1_pitch; @@ -541,9 +556,9 @@ __forceinline void overlay_darklighten_mmx(BYTE *p1Y, BYTE *p1U, BYTE *p1V, cons // Leftover value for (int x = wMod8; x < width; x++) { int mask = compare_c(p1Y[x], p2Y[x]); - p1Y[x] = overlay_blend_opaque_c_core(p1Y[x], p2Y[x], mask); - p1U[x] = overlay_blend_opaque_c_core(p1U[x], p2U[x], mask); - p1V[x] = overlay_blend_opaque_c_core(p1V[x], p2V[x], mask); + p1Y[x] = overlay_blend_opaque_c_core(p1Y[x], p2Y[x], mask); + p1U[x] = overlay_blend_opaque_c_core(p1U[x], p2U[x], mask); + p1V[x] = overlay_blend_opaque_c_core(p1V[x], p2V[x], mask); } p1Y += p1_pitch; @@ -596,9 +611,9 @@ __forceinline void overlay_darklighten_sse(BYTE *p1Y, BYTE *p1U, BYTE *p1V, cons // Leftover value for (int x = wMod16; x < width; x++) { int mask = compare_c(p1Y[x], p2Y[x]); - p1Y[x] = overlay_blend_opaque_c_core(p1Y[x], p2Y[x], mask); - p1U[x] = overlay_blend_opaque_c_core(p1U[x], p2U[x], mask); - p1V[x] = overlay_blend_opaque_c_core(p1V[x], p2V[x], mask); + p1Y[x] = overlay_blend_opaque_c_core(p1Y[x], p2Y[x], mask); + p1U[x] = overlay_blend_opaque_c_core(p1U[x], p2U[x], mask); + p1V[x] = overlay_blend_opaque_c_core(p1V[x], p2V[x], mask); } p1Y += p1_pitch; @@ -628,7 +643,8 @@ __forceinline __m128i overlay_darken_sse_cmp(const __m128i& p1, const __m128i& p return _mm_cmpeq_epi8(diff, zero); } -__forceinline int overlay_lighten_c_cmp(BYTE p1, BYTE p2) { +template +__forceinline int overlay_lighten_c_cmp(pixel_t p1, pixel_t p2) { return p2 >= p1; } @@ -645,13 +661,24 @@ __forceinline __m128i overlay_lighten_sse_cmp(const __m128i& p1, const __m128i& } // Exported function -void overlay_darken_c(BYTE *p1Y, BYTE *p1U, BYTE *p1V, const BYTE *p2Y, const BYTE *p2U, const BYTE *p2V, int p1_pitch, int p2_pitch, int width, int height) { - overlay_darklighten_c(p1Y, p1U, p1V, p2Y, p2U, p2V, p1_pitch, p2_pitch, width, height); +template +void overlay_darken_c(BYTE *p1Y_8, BYTE *p1U_8, BYTE *p1V_8, const BYTE *p2Y_8, const BYTE *p2U_8, const BYTE *p2V_8, int p1_pitch, int p2_pitch, int width, int height) { + overlay_darklighten_c(p1Y_8, p1U_8, p1V_8, p2Y_8, p2U_8, p2V_8, p1_pitch, p2_pitch, width, height); } -void overlay_lighten_c(BYTE *p1Y, BYTE *p1U, BYTE *p1V, const BYTE *p2Y, const BYTE *p2U, const BYTE *p2V, int p1_pitch, int p2_pitch, int width, int height) { - overlay_darklighten_c(p1Y, p1U, p1V, p2Y, p2U, p2V, p1_pitch, p2_pitch, width, height); +// instantiate +template void overlay_darken_c(BYTE *p1Y_8, BYTE *p1U_8, BYTE *p1V_8, const BYTE *p2Y_8, const BYTE *p2U_8, const BYTE *p2V_8, int p1_pitch, int p2_pitch, int width, int height); +template void overlay_darken_c(BYTE *p1Y_8, BYTE *p1U_8, BYTE *p1V_8, const BYTE *p2Y_8, const BYTE *p2U_8, const BYTE *p2V_8, int p1_pitch, int p2_pitch, int width, int height); + +template +void overlay_lighten_c(BYTE *p1Y_8, BYTE *p1U_8, BYTE *p1V_8, const BYTE *p2Y_8, const BYTE *p2U_8, const BYTE *p2V_8, int p1_pitch, int p2_pitch, int width, int height) { + overlay_darklighten_c(p1Y_8, p1U_8, p1V_8, p2Y_8, p2U_8, p2V_8, p1_pitch, p2_pitch, width, height); } +// instantiate +template void overlay_lighten_c(BYTE *p1Y_8, BYTE *p1U_8, BYTE *p1V_8, const BYTE *p2Y_8, const BYTE *p2U_8, const BYTE *p2V_8, int p1_pitch, int p2_pitch, int width, int height); +template void overlay_lighten_c(BYTE *p1Y_8, BYTE *p1U_8, BYTE *p1V_8, const BYTE *p2Y_8, const BYTE *p2U_8, const BYTE *p2V_8, int p1_pitch, int p2_pitch, int width, int height); + + #ifdef X86_32 void overlay_darken_mmx(BYTE *p1Y, BYTE *p1U, BYTE *p1V, const BYTE *p2Y, const BYTE *p2U, const BYTE *p2V, int p1_pitch, int p2_pitch, int width, int height) { overlay_darklighten_mmx(p1Y, p1U, p1V, p2Y, p2U, p2V, p1_pitch, p2_pitch, width, height); diff --git a/avs_core/filters/overlay/blend_common.h b/avs_core/filters/overlay/blend_common.h index b10d44ada..c1a5bd4f1 100644 --- a/avs_core/filters/overlay/blend_common.h +++ b/avs_core/filters/overlay/blend_common.h @@ -77,7 +77,9 @@ void overlay_blend_sse2_plane_masked_opacity(BYTE *p1, const BYTE *p2, const BYT const int width, const int height, const int opacity); // Mode: Darken/lighten +template void overlay_darken_c(BYTE *p1Y, BYTE *p1U, BYTE *p1V, const BYTE *p2Y, const BYTE *p2U, const BYTE *p2V, int p1_pitch, int p2_pitch, int width, int height); +template void overlay_lighten_c(BYTE *p1Y, BYTE *p1U, BYTE *p1V, const BYTE *p2Y, const BYTE *p2U, const BYTE *p2V, int p1_pitch, int p2_pitch, int width, int height); #ifdef X86_32 diff --git a/avs_core/filters/overlay/overlay.cpp b/avs_core/filters/overlay/overlay.cpp index 2db748722..5e115020c 100644 --- a/avs_core/filters/overlay/overlay.cpp +++ b/avs_core/filters/overlay/overlay.cpp @@ -596,7 +596,8 @@ OverlayFunction* Overlay::SelectFunction(const char* name, int &of_mode, IScript if (!lstrcmpi(name, "Lighten")) { of_mode = OF_Lighten; - return new OL_LightenImage(); + //return new OL_LightenImage(); + return new OL_DarkenImage(); // common with Darken } if (!lstrcmpi(name, "Darken")) { diff --git a/avs_core/filters/overlay/overlayfunctions.h b/avs_core/filters/overlay/overlayfunctions.h index c566ac8c9..71e51d8fd 100644 --- a/avs_core/filters/overlay/overlayfunctions.h +++ b/avs_core/filters/overlay/overlayfunctions.h @@ -136,6 +136,7 @@ class OL_BlendChromaImage : public OverlayFunction { private: }; +#if 0 class OL_LightenImage : public OverlayFunction { void DoBlendImage(Image444* base, Image444* overlay); void DoBlendImageMask(Image444* base, Image444* overlay, Image444* mask); @@ -144,13 +145,15 @@ class OL_LightenImage : public OverlayFunction { template void BlendImageMask(Image444* base, Image444* overlay, Image444* mask); }; +#endif +// common darken/lighten class OL_DarkenImage : public OverlayFunction { void DoBlendImage(Image444* base, Image444* overlay); void DoBlendImageMask(Image444* base, Image444* overlay, Image444* mask); - template - void BlendImage(Image444* base, Image444* overlay); - template + //template + //void BlendImage(Image444* base, Image444* overlay); + template void BlendImageMask(Image444* base, Image444* overlay, Image444* mask); }; From 3c993d7064c1d665e6457a16edf42525968ceadb Mon Sep 17 00:00:00 2001 From: Pinterf Date: Fri, 14 Oct 2016 11:17:41 +0200 Subject: [PATCH 107/120] Overlay: Blend,Chroma,Luma 10-16 bit --- avs_core/filters/overlay/OF_blend.cpp | 120 +++++++++++--- avs_core/filters/overlay/OF_lumachroma.cpp | 180 +++++++++++++++++---- avs_core/filters/overlay/blend_common.cpp | 140 +++++++++++++--- avs_core/filters/overlay/blend_common.h | 3 + 4 files changed, 371 insertions(+), 72 deletions(-) diff --git a/avs_core/filters/overlay/OF_blend.cpp b/avs_core/filters/overlay/OF_blend.cpp index b8b6d5bad..4d078e7f8 100644 --- a/avs_core/filters/overlay/OF_blend.cpp +++ b/avs_core/filters/overlay/OF_blend.cpp @@ -42,19 +42,19 @@ void OL_BlendImage::DoBlendImageMask(Image444* base, Image444* overlay, Image444* mask) { if (bits_per_pixel == 8) BlendImageMask(base, overlay, mask); + else if(bits_per_pixel <= 16) + BlendImageMask(base, overlay, mask); //else if(bits_per_pixel == 32) // BlendImageMask(base, overlay, mask); - else if(bits_per_pixel == 16) - BlendImageMask(base, overlay, mask); } void OL_BlendImage::DoBlendImage(Image444* base, Image444* overlay) { if (bits_per_pixel == 8) BlendImage(base, overlay); + else if(bits_per_pixel <= 16) + BlendImage(base, overlay); //else if(bits_per_pixel == 32) // BlendImage(base, overlay); - else if(bits_per_pixel == 16) - BlendImage(base, overlay); } @@ -75,14 +75,16 @@ void OL_BlendImage::BlendImageMask(Image444* base, Image444* overlay, Image444* int w = base->w(); int h = base->h(); + const int pixelsize = sizeof(pixel_t); + if (opacity == 256) { - if (env->GetCPUFlags() & CPUF_SSE2) { + if (pixelsize==1 && (env->GetCPUFlags() & CPUF_SSE2)) { overlay_blend_sse2_plane_masked(baseY, ovY, maskY, base->pitch, overlay->pitch, mask->pitch, w, h); overlay_blend_sse2_plane_masked(baseU, ovU, maskU, base->pitch, overlay->pitch, mask->pitch, w, h); overlay_blend_sse2_plane_masked(baseV, ovV, maskV, base->pitch, overlay->pitch, mask->pitch, w, h); } else #ifdef X86_32 - if (env->GetCPUFlags() & CPUF_MMX) { + if (pixelsize==1 && (env->GetCPUFlags() & CPUF_MMX)) { overlay_blend_mmx_plane_masked(baseY, ovY, maskY, base->pitch, overlay->pitch, mask->pitch, w, h); overlay_blend_mmx_plane_masked(baseU, ovU, maskU, base->pitch, overlay->pitch, mask->pitch, w, h); overlay_blend_mmx_plane_masked(baseV, ovV, maskV, base->pitch, overlay->pitch, mask->pitch, w, h); @@ -90,18 +92,42 @@ void OL_BlendImage::BlendImageMask(Image444* base, Image444* overlay, Image444* } else #endif { - overlay_blend_c_plane_masked(baseY, ovY, maskY, base->pitch, overlay->pitch, mask->pitch, w, h); - overlay_blend_c_plane_masked(baseU, ovU, maskU, base->pitch, overlay->pitch, mask->pitch, w, h); - overlay_blend_c_plane_masked(baseV, ovV, maskV, base->pitch, overlay->pitch, mask->pitch, w, h); + switch (bits_per_pixel) { + case 8: + overlay_blend_c_plane_masked(baseY, ovY, maskY, base->pitch, overlay->pitch, mask->pitch, w, h); + overlay_blend_c_plane_masked(baseU, ovU, maskU, base->pitch, overlay->pitch, mask->pitch, w, h); + overlay_blend_c_plane_masked(baseV, ovV, maskV, base->pitch, overlay->pitch, mask->pitch, w, h); + break; + case 10: + overlay_blend_c_plane_masked(baseY, ovY, maskY, base->pitch, overlay->pitch, mask->pitch, w, h); + overlay_blend_c_plane_masked(baseU, ovU, maskU, base->pitch, overlay->pitch, mask->pitch, w, h); + overlay_blend_c_plane_masked(baseV, ovV, maskV, base->pitch, overlay->pitch, mask->pitch, w, h); + break; + case 12: + overlay_blend_c_plane_masked(baseY, ovY, maskY, base->pitch, overlay->pitch, mask->pitch, w, h); + overlay_blend_c_plane_masked(baseU, ovU, maskU, base->pitch, overlay->pitch, mask->pitch, w, h); + overlay_blend_c_plane_masked(baseV, ovV, maskV, base->pitch, overlay->pitch, mask->pitch, w, h); + break; + case 14: + overlay_blend_c_plane_masked(baseY, ovY, maskY, base->pitch, overlay->pitch, mask->pitch, w, h); + overlay_blend_c_plane_masked(baseU, ovU, maskU, base->pitch, overlay->pitch, mask->pitch, w, h); + overlay_blend_c_plane_masked(baseV, ovV, maskV, base->pitch, overlay->pitch, mask->pitch, w, h); + break; + case 16: + overlay_blend_c_plane_masked(baseY, ovY, maskY, base->pitch, overlay->pitch, mask->pitch, w, h); + overlay_blend_c_plane_masked(baseU, ovU, maskU, base->pitch, overlay->pitch, mask->pitch, w, h); + overlay_blend_c_plane_masked(baseV, ovV, maskV, base->pitch, overlay->pitch, mask->pitch, w, h); + break; + } } } else { - if (env->GetCPUFlags() & CPUF_SSE2) { + if (pixelsize==1 && (env->GetCPUFlags() & CPUF_SSE2)) { overlay_blend_sse2_plane_masked_opacity(baseY, ovY, maskY, base->pitch, overlay->pitch, mask->pitch, w, h, opacity); overlay_blend_sse2_plane_masked_opacity(baseU, ovU, maskU, base->pitch, overlay->pitch, mask->pitch, w, h, opacity); overlay_blend_sse2_plane_masked_opacity(baseV, ovV, maskV, base->pitch, overlay->pitch, mask->pitch, w, h, opacity); } else #ifdef X86_32 - if (env->GetCPUFlags() & CPUF_MMX) { + if (pixelsize==1 && (env->GetCPUFlags() & CPUF_MMX)) { overlay_blend_mmx_plane_masked_opacity(baseY, ovY, maskY, base->pitch, overlay->pitch, mask->pitch, w, h, opacity); overlay_blend_mmx_plane_masked_opacity(baseU, ovU, maskU, base->pitch, overlay->pitch, mask->pitch, w, h, opacity); overlay_blend_mmx_plane_masked_opacity(baseV, ovV, maskV, base->pitch, overlay->pitch, mask->pitch, w, h, opacity); @@ -109,9 +135,33 @@ void OL_BlendImage::BlendImageMask(Image444* base, Image444* overlay, Image444* } else #endif { - overlay_blend_c_plane_masked_opacity(baseY, ovY, maskY, base->pitch, overlay->pitch, mask->pitch, w, h, opacity); - overlay_blend_c_plane_masked_opacity(baseU, ovU, maskU, base->pitch, overlay->pitch, mask->pitch, w, h, opacity); - overlay_blend_c_plane_masked_opacity(baseV, ovV, maskV, base->pitch, overlay->pitch, mask->pitch, w, h, opacity); + switch (bits_per_pixel) { + case 8: + overlay_blend_c_plane_masked_opacity(baseY, ovY, maskY, base->pitch, overlay->pitch, mask->pitch, w, h, opacity); + overlay_blend_c_plane_masked_opacity(baseU, ovU, maskU, base->pitch, overlay->pitch, mask->pitch, w, h, opacity); + overlay_blend_c_plane_masked_opacity(baseV, ovV, maskV, base->pitch, overlay->pitch, mask->pitch, w, h, opacity); + break; + case 10: + overlay_blend_c_plane_masked_opacity(baseY, ovY, maskY, base->pitch, overlay->pitch, mask->pitch, w, h, opacity); + overlay_blend_c_plane_masked_opacity(baseU, ovU, maskU, base->pitch, overlay->pitch, mask->pitch, w, h, opacity); + overlay_blend_c_plane_masked_opacity(baseV, ovV, maskV, base->pitch, overlay->pitch, mask->pitch, w, h, opacity); + break; + case 12: + overlay_blend_c_plane_masked_opacity(baseY, ovY, maskY, base->pitch, overlay->pitch, mask->pitch, w, h, opacity); + overlay_blend_c_plane_masked_opacity(baseU, ovU, maskU, base->pitch, overlay->pitch, mask->pitch, w, h, opacity); + overlay_blend_c_plane_masked_opacity(baseV, ovV, maskV, base->pitch, overlay->pitch, mask->pitch, w, h, opacity); + break; + case 14: + overlay_blend_c_plane_masked_opacity(baseY, ovY, maskY, base->pitch, overlay->pitch, mask->pitch, w, h, opacity); + overlay_blend_c_plane_masked_opacity(baseU, ovU, maskU, base->pitch, overlay->pitch, mask->pitch, w, h, opacity); + overlay_blend_c_plane_masked_opacity(baseV, ovV, maskV, base->pitch, overlay->pitch, mask->pitch, w, h, opacity); + break; + case 16: + overlay_blend_c_plane_masked_opacity(baseY, ovY, maskY, base->pitch, overlay->pitch, mask->pitch, w, h, opacity); + overlay_blend_c_plane_masked_opacity(baseU, ovU, maskU, base->pitch, overlay->pitch, mask->pitch, w, h, opacity); + overlay_blend_c_plane_masked_opacity(baseV, ovV, maskV, base->pitch, overlay->pitch, mask->pitch, w, h, opacity); + break; + } } } } @@ -129,18 +179,20 @@ void OL_BlendImage::BlendImage(Image444* base, Image444* overlay) { int w = base->w(); int h = base->h(); + const int pixelsize = sizeof(pixel_t); + if (opacity == 256) { - env->BitBlt(baseY, base->pitch, ovY, overlay->pitch, w, h); - env->BitBlt(baseU, base->pitch, ovU, overlay->pitch, w, h); - env->BitBlt(baseV, base->pitch, ovV, overlay->pitch, w, h); + env->BitBlt(baseY, base->pitch, ovY, overlay->pitch, w*pixelsize, h); + env->BitBlt(baseU, base->pitch, ovU, overlay->pitch, w*pixelsize, h); + env->BitBlt(baseV, base->pitch, ovV, overlay->pitch, w*pixelsize, h); } else { - if (env->GetCPUFlags() & CPUF_SSE2) { + if (pixelsize==1 && (env->GetCPUFlags() & CPUF_SSE2)) { overlay_blend_sse2_plane_opacity(baseY, ovY, base->pitch, overlay->pitch, w, h, opacity); overlay_blend_sse2_plane_opacity(baseU, ovU, base->pitch, overlay->pitch, w, h, opacity); overlay_blend_sse2_plane_opacity(baseV, ovV, base->pitch, overlay->pitch, w, h, opacity); } else #ifdef X86_32 - if (env->GetCPUFlags() & CPUF_MMX) { + if (pixelsize==1 && (env->GetCPUFlags() & CPUF_MMX)) { overlay_blend_mmx_plane_opacity(baseY, ovY, base->pitch, overlay->pitch, w, h, opacity); overlay_blend_mmx_plane_opacity(baseU, ovU, base->pitch, overlay->pitch, w, h, opacity); overlay_blend_mmx_plane_opacity(baseV, ovV, base->pitch, overlay->pitch, w, h, opacity); @@ -148,9 +200,33 @@ void OL_BlendImage::BlendImage(Image444* base, Image444* overlay) { } else #endif { - overlay_blend_c_plane_opacity(baseY, ovY, base->pitch, overlay->pitch, w, h, opacity); - overlay_blend_c_plane_opacity(baseU, ovU, base->pitch, overlay->pitch, w, h, opacity); - overlay_blend_c_plane_opacity(baseV, ovV, base->pitch, overlay->pitch, w, h, opacity); + switch (bits_per_pixel) { + case 8: + overlay_blend_c_plane_opacity(baseY, ovY, base->pitch, overlay->pitch, w, h, opacity); + overlay_blend_c_plane_opacity(baseU, ovU, base->pitch, overlay->pitch, w, h, opacity); + overlay_blend_c_plane_opacity(baseV, ovV, base->pitch, overlay->pitch, w, h, opacity); + break; + case 10: + overlay_blend_c_plane_opacity(baseY, ovY, base->pitch, overlay->pitch, w, h, opacity); + overlay_blend_c_plane_opacity(baseU, ovU, base->pitch, overlay->pitch, w, h, opacity); + overlay_blend_c_plane_opacity(baseV, ovV, base->pitch, overlay->pitch, w, h, opacity); + break; + case 12: + overlay_blend_c_plane_opacity(baseY, ovY, base->pitch, overlay->pitch, w, h, opacity); + overlay_blend_c_plane_opacity(baseU, ovU, base->pitch, overlay->pitch, w, h, opacity); + overlay_blend_c_plane_opacity(baseV, ovV, base->pitch, overlay->pitch, w, h, opacity); + break; + case 14: + overlay_blend_c_plane_opacity(baseY, ovY, base->pitch, overlay->pitch, w, h, opacity); + overlay_blend_c_plane_opacity(baseU, ovU, base->pitch, overlay->pitch, w, h, opacity); + overlay_blend_c_plane_opacity(baseV, ovV, base->pitch, overlay->pitch, w, h, opacity); + break; + case 16: + overlay_blend_c_plane_opacity(baseY, ovY, base->pitch, overlay->pitch, w, h, opacity); + overlay_blend_c_plane_opacity(baseU, ovU, base->pitch, overlay->pitch, w, h, opacity); + overlay_blend_c_plane_opacity(baseV, ovV, base->pitch, overlay->pitch, w, h, opacity); + break; + } } } } diff --git a/avs_core/filters/overlay/OF_lumachroma.cpp b/avs_core/filters/overlay/OF_lumachroma.cpp index 5d873d596..9539e6198 100644 --- a/avs_core/filters/overlay/OF_lumachroma.cpp +++ b/avs_core/filters/overlay/OF_lumachroma.cpp @@ -42,37 +42,37 @@ void OL_BlendLumaImage::DoBlendImageMask(Image444* base, Image444* overlay, Image444* mask) { if (bits_per_pixel == 8) BlendImageMask(base, overlay, mask); + else if(bits_per_pixel <= 16) + BlendImageMask(base, overlay, mask); //else if(bits_per_pixel == 32) // BlendImageMask(base, overlay, mask); - else if(bits_per_pixel == 16) - BlendImageMask(base, overlay, mask); } void OL_BlendLumaImage::DoBlendImage(Image444* base, Image444* overlay) { if (bits_per_pixel == 8) BlendImage(base, overlay); + else if(bits_per_pixel <= 16) + BlendImage(base, overlay); //else if(bits_per_pixel == 32) // BlendImage(base, overlay); - else if(bits_per_pixel == 16) - BlendImage(base, overlay); } void OL_BlendChromaImage::DoBlendImageMask(Image444* base, Image444* overlay, Image444* mask) { if (bits_per_pixel == 8) BlendImageMask(base, overlay, mask); + else if(bits_per_pixel <= 16) + BlendImageMask(base, overlay, mask); //else if(bits_per_pixel == 32) // BlendImageMask(base, overlay, mask); - else if(bits_per_pixel == 16) - BlendImageMask(base, overlay, mask); } void OL_BlendChromaImage::DoBlendImage(Image444* base, Image444* overlay) { if (bits_per_pixel == 8) BlendImage(base, overlay); + else if(bits_per_pixel <= 16) + BlendImage(base, overlay); //else if(bits_per_pixel == 32) // BlendImage(base, overlay); - else if(bits_per_pixel == 16) - BlendImage(base, overlay); } template @@ -86,31 +86,65 @@ void OL_BlendLumaImage::BlendImageMask(Image444* base, Image444* overlay, Image4 int w = base->w(); int h = base->h(); + const int pixelsize = sizeof(pixel_t); + if (opacity == 256) { - if (env->GetCPUFlags() & CPUF_SSE2) { + if (pixelsize == 1 && (env->GetCPUFlags() & CPUF_SSE2)) { overlay_blend_sse2_plane_masked(baseY, ovY, maskY, base->pitch, overlay->pitch, mask->pitch, w, h); } else #ifdef X86_32 - if (env->GetCPUFlags() & CPUF_MMX) { + if (pixelsize == 1 && (env->GetCPUFlags() & CPUF_MMX)) { overlay_blend_mmx_plane_masked(baseY, ovY, maskY, base->pitch, overlay->pitch, mask->pitch, w, h); _mm_empty(); } else #endif { - overlay_blend_c_plane_masked(baseY, ovY, maskY, base->pitch, overlay->pitch, mask->pitch, w, h); + switch (bits_per_pixel) { + case 8: + overlay_blend_c_plane_masked(baseY, ovY, maskY, base->pitch, overlay->pitch, mask->pitch, w, h); + break; + case 10: + overlay_blend_c_plane_masked(baseY, ovY, maskY, base->pitch, overlay->pitch, mask->pitch, w, h); + break; + case 12: + overlay_blend_c_plane_masked(baseY, ovY, maskY, base->pitch, overlay->pitch, mask->pitch, w, h); + break; + case 14: + overlay_blend_c_plane_masked(baseY, ovY, maskY, base->pitch, overlay->pitch, mask->pitch, w, h); + break; + case 16: + overlay_blend_c_plane_masked(baseY, ovY, maskY, base->pitch, overlay->pitch, mask->pitch, w, h); + break; + } } } else { - if (env->GetCPUFlags() & CPUF_SSE2) { + if (pixelsize == 1 && (env->GetCPUFlags() & CPUF_SSE2)) { overlay_blend_sse2_plane_masked_opacity(baseY, ovY, maskY, base->pitch, overlay->pitch, mask->pitch, w, h, opacity); } else #ifdef X86_32 - if (env->GetCPUFlags() & CPUF_MMX) { + if (pixelsize == 1 && (env->GetCPUFlags() & CPUF_MMX)) { overlay_blend_mmx_plane_masked_opacity(baseY, ovY, maskY, base->pitch, overlay->pitch, mask->pitch, w, h, opacity); _mm_empty(); } else #endif { - overlay_blend_c_plane_masked_opacity(baseY, ovY, maskY, base->pitch, overlay->pitch, mask->pitch, w, h, opacity); + switch (bits_per_pixel) { + case 8: + overlay_blend_c_plane_masked_opacity(baseY, ovY, maskY, base->pitch, overlay->pitch, mask->pitch, w, h, opacity); + break; + case 10: + overlay_blend_c_plane_masked_opacity(baseY, ovY, maskY, base->pitch, overlay->pitch, mask->pitch, w, h, opacity); + break; + case 12: + overlay_blend_c_plane_masked_opacity(baseY, ovY, maskY, base->pitch, overlay->pitch, mask->pitch, w, h, opacity); + break; + case 14: + overlay_blend_c_plane_masked_opacity(baseY, ovY, maskY, base->pitch, overlay->pitch, mask->pitch, w, h, opacity); + break; + case 16: + overlay_blend_c_plane_masked_opacity(baseY, ovY, maskY, base->pitch, overlay->pitch, mask->pitch, w, h, opacity); + break; + } } } } @@ -125,20 +159,38 @@ void OL_BlendLumaImage::BlendImage(Image444* base, Image444* overlay) { int w = base->w(); int h = base->h(); + const int pixelsize = sizeof(pixel_t); + if (opacity == 256) { - env->BitBlt(baseY, base->pitch, ovY, overlay->pitch, w, h); + env->BitBlt(baseY, base->pitch, ovY, overlay->pitch, w*pixelsize, h); } else { - if (env->GetCPUFlags() & CPUF_SSE2) { + if (pixelsize == 1 && (env->GetCPUFlags() & CPUF_SSE2)) { overlay_blend_sse2_plane_opacity(baseY, ovY, base->pitch, overlay->pitch, w, h, opacity); } else #ifdef X86_32 - if (env->GetCPUFlags() & CPUF_MMX) { + if (pixelsize == 1 && (env->GetCPUFlags() & CPUF_MMX)) { overlay_blend_mmx_plane_opacity(baseY, ovY, base->pitch, overlay->pitch, w, h, opacity); _mm_empty(); } else #endif { - overlay_blend_c_plane_opacity(baseY, ovY, base->pitch, overlay->pitch, w, h, opacity); + switch (bits_per_pixel) { + case 8: + overlay_blend_c_plane_opacity(baseY, ovY, base->pitch, overlay->pitch, w, h, opacity); + break; + case 10: + overlay_blend_c_plane_opacity(baseY, ovY, base->pitch, overlay->pitch, w, h, opacity); + break; + case 12: + overlay_blend_c_plane_opacity(baseY, ovY, base->pitch, overlay->pitch, w, h, opacity); + break; + case 14: + overlay_blend_c_plane_opacity(baseY, ovY, base->pitch, overlay->pitch, w, h, opacity); + break; + case 16: + overlay_blend_c_plane_opacity(baseY, ovY, base->pitch, overlay->pitch, w, h, opacity); + break; + } } } } @@ -159,37 +211,79 @@ void OL_BlendChromaImage::BlendImageMask(Image444* base, Image444* overlay, Imag int w = base->w(); int h = base->h(); + const int pixelsize = sizeof(pixel_t); + if (opacity == 256) { - if (env->GetCPUFlags() & CPUF_SSE2) { + if (pixelsize == 1 && (env->GetCPUFlags() & CPUF_SSE2)) { overlay_blend_sse2_plane_masked(baseU, ovU, maskU, base->pitch, overlay->pitch, mask->pitch, w, h); overlay_blend_sse2_plane_masked(baseV, ovV, maskV, base->pitch, overlay->pitch, mask->pitch, w, h); } else #ifdef X86_32 - if (env->GetCPUFlags() & CPUF_MMX) { + if (pixelsize == 1 && (env->GetCPUFlags() & CPUF_MMX)) { overlay_blend_mmx_plane_masked(baseU, ovU, maskU, base->pitch, overlay->pitch, mask->pitch, w, h); overlay_blend_mmx_plane_masked(baseV, ovV, maskV, base->pitch, overlay->pitch, mask->pitch, w, h); _mm_empty(); } else #endif { - overlay_blend_c_plane_masked(baseU, ovU, maskU, base->pitch, overlay->pitch, mask->pitch, w, h); - overlay_blend_c_plane_masked(baseV, ovV, maskV, base->pitch, overlay->pitch, mask->pitch, w, h); + switch (bits_per_pixel) { + case 8: + overlay_blend_c_plane_masked(baseU, ovU, maskU, base->pitch, overlay->pitch, mask->pitch, w, h); + overlay_blend_c_plane_masked(baseV, ovV, maskV, base->pitch, overlay->pitch, mask->pitch, w, h); + break; + case 10: + overlay_blend_c_plane_masked(baseU, ovU, maskU, base->pitch, overlay->pitch, mask->pitch, w, h); + overlay_blend_c_plane_masked(baseV, ovV, maskV, base->pitch, overlay->pitch, mask->pitch, w, h); + break; + case 120: + overlay_blend_c_plane_masked(baseU, ovU, maskU, base->pitch, overlay->pitch, mask->pitch, w, h); + overlay_blend_c_plane_masked(baseV, ovV, maskV, base->pitch, overlay->pitch, mask->pitch, w, h); + break; + case 14: + overlay_blend_c_plane_masked(baseU, ovU, maskU, base->pitch, overlay->pitch, mask->pitch, w, h); + overlay_blend_c_plane_masked(baseV, ovV, maskV, base->pitch, overlay->pitch, mask->pitch, w, h); + break; + case 16: + overlay_blend_c_plane_masked(baseU, ovU, maskU, base->pitch, overlay->pitch, mask->pitch, w, h); + overlay_blend_c_plane_masked(baseV, ovV, maskV, base->pitch, overlay->pitch, mask->pitch, w, h); + break; + } } } else { - if (env->GetCPUFlags() & CPUF_SSE2) { + if (pixelsize == 1 && (env->GetCPUFlags() & CPUF_SSE2)) { overlay_blend_sse2_plane_masked_opacity(baseU, ovU, maskU, base->pitch, overlay->pitch, mask->pitch, w, h, opacity); overlay_blend_sse2_plane_masked_opacity(baseV, ovV, maskV, base->pitch, overlay->pitch, mask->pitch, w, h, opacity); } else #ifdef X86_32 - if (env->GetCPUFlags() & CPUF_MMX) { + if (pixelsize == 1 && (env->GetCPUFlags() & CPUF_MMX)) { overlay_blend_mmx_plane_masked_opacity(baseU, ovU, maskU, base->pitch, overlay->pitch, mask->pitch, w, h, opacity); overlay_blend_mmx_plane_masked_opacity(baseV, ovV, maskV, base->pitch, overlay->pitch, mask->pitch, w, h, opacity); _mm_empty(); } else #endif { - overlay_blend_c_plane_masked_opacity(baseU, ovU, maskU, base->pitch, overlay->pitch, mask->pitch, w, h, opacity); - overlay_blend_c_plane_masked_opacity(baseV, ovV, maskV, base->pitch, overlay->pitch, mask->pitch, w, h, opacity); + switch (bits_per_pixel) { + case 8: + overlay_blend_c_plane_masked_opacity(baseU, ovU, maskU, base->pitch, overlay->pitch, mask->pitch, w, h, opacity); + overlay_blend_c_plane_masked_opacity(baseV, ovV, maskV, base->pitch, overlay->pitch, mask->pitch, w, h, opacity); + break; + case 10: + overlay_blend_c_plane_masked_opacity(baseU, ovU, maskU, base->pitch, overlay->pitch, mask->pitch, w, h, opacity); + overlay_blend_c_plane_masked_opacity(baseV, ovV, maskV, base->pitch, overlay->pitch, mask->pitch, w, h, opacity); + break; + case 12: + overlay_blend_c_plane_masked_opacity(baseU, ovU, maskU, base->pitch, overlay->pitch, mask->pitch, w, h, opacity); + overlay_blend_c_plane_masked_opacity(baseV, ovV, maskV, base->pitch, overlay->pitch, mask->pitch, w, h, opacity); + break; + case 14: + overlay_blend_c_plane_masked_opacity(baseU, ovU, maskU, base->pitch, overlay->pitch, mask->pitch, w, h, opacity); + overlay_blend_c_plane_masked_opacity(baseV, ovV, maskV, base->pitch, overlay->pitch, mask->pitch, w, h, opacity); + break; + case 16: + overlay_blend_c_plane_masked_opacity(baseU, ovU, maskU, base->pitch, overlay->pitch, mask->pitch, w, h, opacity); + overlay_blend_c_plane_masked_opacity(baseV, ovV, maskV, base->pitch, overlay->pitch, mask->pitch, w, h, opacity); + break; + } } } } @@ -205,24 +299,46 @@ void OL_BlendChromaImage::BlendImage(Image444* base, Image444* overlay) { int w = base->w(); int h = base->h(); + const int pixelsize = sizeof(pixel_t); + if (opacity == 256) { - env->BitBlt(baseU, base->pitch, ovU, overlay->pitch, w, h); - env->BitBlt(baseV, base->pitch, ovV, overlay->pitch, w, h); + env->BitBlt(baseU, base->pitch, ovU, overlay->pitch, w*pixelsize, h); + env->BitBlt(baseV, base->pitch, ovV, overlay->pitch, w*pixelsize, h); } else { - if (env->GetCPUFlags() & CPUF_SSE2) { + if (pixelsize == 1 && (env->GetCPUFlags() & CPUF_SSE2)) { overlay_blend_sse2_plane_opacity(baseU, ovU, base->pitch, overlay->pitch, w, h, opacity); overlay_blend_sse2_plane_opacity(baseV, ovV, base->pitch, overlay->pitch, w, h, opacity); } else #ifdef X86_32 - if (env->GetCPUFlags() & CPUF_MMX) { + if (pixelsize == 1 && (env->GetCPUFlags() & CPUF_MMX)) { overlay_blend_mmx_plane_opacity(baseU, ovU, base->pitch, overlay->pitch, w, h, opacity); overlay_blend_mmx_plane_opacity(baseV, ovV, base->pitch, overlay->pitch, w, h, opacity); _mm_empty(); } else #endif { - overlay_blend_c_plane_opacity(baseU, ovU, base->pitch, overlay->pitch, w, h, opacity); - overlay_blend_c_plane_opacity(baseV, ovV, base->pitch, overlay->pitch, w, h, opacity); + switch (bits_per_pixel) { + case 8: + overlay_blend_c_plane_opacity(baseU, ovU, base->pitch, overlay->pitch, w, h, opacity); + overlay_blend_c_plane_opacity(baseV, ovV, base->pitch, overlay->pitch, w, h, opacity); + break; + case 10: + overlay_blend_c_plane_opacity(baseU, ovU, base->pitch, overlay->pitch, w, h, opacity); + overlay_blend_c_plane_opacity(baseV, ovV, base->pitch, overlay->pitch, w, h, opacity); + break; + case 12: + overlay_blend_c_plane_opacity(baseU, ovU, base->pitch, overlay->pitch, w, h, opacity); + overlay_blend_c_plane_opacity(baseV, ovV, base->pitch, overlay->pitch, w, h, opacity); + break; + case 14: + overlay_blend_c_plane_opacity(baseU, ovU, base->pitch, overlay->pitch, w, h, opacity); + overlay_blend_c_plane_opacity(baseV, ovV, base->pitch, overlay->pitch, w, h, opacity); + break; + case 16: + overlay_blend_c_plane_opacity(baseU, ovU, base->pitch, overlay->pitch, w, h, opacity); + overlay_blend_c_plane_opacity(baseV, ovV, base->pitch, overlay->pitch, w, h, opacity); + break; + } } } } diff --git a/avs_core/filters/overlay/blend_common.cpp b/avs_core/filters/overlay/blend_common.cpp index 92f490937..d53c1b152 100644 --- a/avs_core/filters/overlay/blend_common.cpp +++ b/avs_core/filters/overlay/blend_common.cpp @@ -42,12 +42,19 @@ // Intrinsics for SSE4.1, SSSE3, SSE3, SSE2, ISSE and MMX #include #include +#include + /******************************* ********* Masked Blend ******** *******************************/ -__forceinline static BYTE overlay_blend_c_core(const BYTE p1, const BYTE p2, const int mask) { - return (BYTE)((((p1<<8) | 128) + (p2-p1)*mask) >> 8); +template +__forceinline static pixel_t overlay_blend_c_core(const pixel_t p1, const pixel_t p2, const pixel_t mask) { + return (pixel_t)(((((intermediate_result_t)p1 << bits_per_pixel) | (1 << (bits_per_pixel-1))) + (p2-p1)*(intermediate_result_t)mask) >> bits_per_pixel); +} + +__forceinline static BYTE overlay_blend_c_core_8(const BYTE p1, const BYTE p2, const int mask) { + return overlay_blend_c_core(p1, p2, mask); } #ifdef X86_32 @@ -67,7 +74,12 @@ __forceinline static __m128i overlay_blend_sse2_core(const __m128i& p1, const __ /******************************************* ********* Merge Two Masks Function ******** *******************************************/ -__forceinline static BYTE overley_merge_mask_c(const BYTE p1, const int p2) { +template +__forceinline static pixel_t overlay_merge_mask_c(const pixel_t p1, const pixel_t p2) { + return ((intermediate_result_t)p1*p2) >> bits_per_pixel; +} + +__forceinline static BYTE overlay_merge_mask_c_8(const BYTE p1, const int p2) { return (p1*p2) >> 8; } @@ -116,13 +128,25 @@ __forceinline __m128i overlay_blend_opaque_sse41_core(const __m128i& p1, const _ ********* Mode: Blend ******** ******************************/ +template void overlay_blend_c_plane_masked(BYTE *p1, const BYTE *p2, const BYTE *mask, const int p1_pitch, const int p2_pitch, const int mask_pitch, const int width, const int height) { + const int MASK_CORR_SHIFT = (sizeof(pixel_t) == 1) ? 8 : bits_per_pixel; + const int half_pixel_value_rounding = (1 << (MASK_CORR_SHIFT - 1)); + + // avoid "uint16*uint16 can't get into int32" overflows + typedef std::conditional < sizeof(pixel_t) == 1, int, typename std::conditional < sizeof(pixel_t) == 2, __int64, float>::type >::type result_t; + for (int y = 0; y < height; y++) { for (int x = 0; x < width; x++) { - BYTE result = overlay_blend_c_core(p1[x], p2[x], static_cast(mask[x])); - p1[x] = result; + pixel_t new_mask = reinterpret_cast(mask)[x]; + result_t p1x = reinterpret_cast(p1)[x]; + pixel_t p2x = reinterpret_cast(p2)[x]; + pixel_t result = (pixel_t)((((p1x << MASK_CORR_SHIFT) | half_pixel_value_rounding) + (p2x-p1x)*new_mask) >> MASK_CORR_SHIFT); + + //pixel_t result = overlay_blend_c_core(reinterpret_cast(p1)[x], reinterpret_cast(p2)[x], static_cast(reinterpret_cast(mask)[x])); + reinterpret_cast(p1)[x] = result; } p1 += p1_pitch; @@ -131,6 +155,24 @@ void overlay_blend_c_plane_masked(BYTE *p1, const BYTE *p2, const BYTE *mask, } } +// instantiate +template void overlay_blend_c_plane_masked(BYTE *p1, const BYTE *p2, const BYTE *mask, + const int p1_pitch, const int p2_pitch, const int mask_pitch, + const int width, const int height); +template void overlay_blend_c_plane_masked(BYTE *p1, const BYTE *p2, const BYTE *mask, + const int p1_pitch, const int p2_pitch, const int mask_pitch, + const int width, const int height); +template void overlay_blend_c_plane_masked(BYTE *p1, const BYTE *p2, const BYTE *mask, + const int p1_pitch, const int p2_pitch, const int mask_pitch, + const int width, const int height); +template void overlay_blend_c_plane_masked(BYTE *p1, const BYTE *p2, const BYTE *mask, + const int p1_pitch, const int p2_pitch, const int mask_pitch, + const int width, const int height); +template void overlay_blend_c_plane_masked(BYTE *p1, const BYTE *p2, const BYTE *mask, + const int p1_pitch, const int p2_pitch, const int mask_pitch, + const int width, const int height); + + #ifdef X86_32 // The following disable EMMS warning, since it's caller-called. #pragma warning (push) @@ -172,7 +214,7 @@ void overlay_blend_mmx_plane_masked(BYTE *p1, const BYTE *p2, const BYTE *mask, // Leftover value for (int x = wMod8; x < width; x++) { - BYTE result = overlay_blend_c_core(p1[x], p2[x], static_cast(mask[x])); + BYTE result = overlay_blend_c_core_8(p1[x], p2[x], static_cast(mask[x])); p1[x] = result; } @@ -226,7 +268,7 @@ void overlay_blend_sse2_plane_masked(BYTE *p1, const BYTE *p2, const BYTE *mask, // Leftover value for (int x = wMod16; x < width; x++) { - BYTE result = overlay_blend_c_core(p1[x], p2[x], static_cast(mask[x])); + BYTE result = overlay_blend_c_core_8(p1[x], p2[x], static_cast(mask[x])); p1[x] = result; } @@ -236,13 +278,26 @@ void overlay_blend_sse2_plane_masked(BYTE *p1, const BYTE *p2, const BYTE *mask, } } +template void overlay_blend_c_plane_opacity(BYTE *p1, const BYTE *p2, const int p1_pitch, const int p2_pitch, const int width, const int height, const int opacity) { + + const int OPACITY_SHIFT = 8; // opacity always max 0..256 + const int MASK_CORR_SHIFT = OPACITY_SHIFT; // no mask, mask = opacity, 8 bits always + const int half_pixel_value_rounding = (1 << (MASK_CORR_SHIFT - 1)); + + // avoid "uint16*uint16 can't get into int32" overflows + // no need here, opacity as mask is always 8 bit + // typedef std::conditional < sizeof(pixel_t) == 1, int, typename std::conditional < sizeof(pixel_t) == 2, __int64, float>::type >::type result_t; + for (int y = 0; y < height; y++) { for (int x = 0; x < width; x++) { - BYTE result = overlay_blend_c_core(p1[x], p2[x], opacity); - p1[x] = result; + pixel_t p1x = reinterpret_cast(p1)[x]; + pixel_t p2x = reinterpret_cast(p2)[x]; + pixel_t result = (pixel_t)((((p1x << MASK_CORR_SHIFT) | half_pixel_value_rounding) + (p2x-p1x)*opacity) >> MASK_CORR_SHIFT); + //BYTE result = overlay_blend_c_core_8(p1[x], p2[x], opacity); + reinterpret_cast(p1)[x] = result; } p1 += p1_pitch; @@ -250,6 +305,24 @@ void overlay_blend_c_plane_opacity(BYTE *p1, const BYTE *p2, } } +// instantiate +template void overlay_blend_c_plane_opacity(BYTE *p1, const BYTE *p2, + const int p1_pitch, const int p2_pitch, + const int width, const int height, const int opacity); +template void overlay_blend_c_plane_opacity(BYTE *p1, const BYTE *p2, + const int p1_pitch, const int p2_pitch, + const int width, const int height, const int opacity); +template void overlay_blend_c_plane_opacity(BYTE *p1, const BYTE *p2, + const int p1_pitch, const int p2_pitch, + const int width, const int height, const int opacity); +template void overlay_blend_c_plane_opacity(BYTE *p1, const BYTE *p2, + const int p1_pitch, const int p2_pitch, + const int width, const int height, const int opacity); +template void overlay_blend_c_plane_opacity(BYTE *p1, const BYTE *p2, + const int p1_pitch, const int p2_pitch, + const int width, const int height, const int opacity); + + #ifdef X86_32 // The following disable EMMS warning, since it's caller-called. #pragma warning (push) @@ -287,7 +360,7 @@ void overlay_blend_mmx_plane_opacity(BYTE *p1, const BYTE *p2, // Leftover value for (int x = wMod8; x < width; x++) { - BYTE result = overlay_blend_c_core(p1[x], p2[x], opacity); + BYTE result = overlay_blend_c_core_8(p1[x], p2[x], opacity); p1[x] = result; } @@ -334,7 +407,7 @@ void overlay_blend_sse2_plane_opacity(BYTE *p1, const BYTE *p2, // Leftover value for (int x = wMod16; x < width; x++) { - BYTE result = overlay_blend_c_core(p1[x], p2[x], opacity); + BYTE result = overlay_blend_c_core_8(p1[x], p2[x], opacity); p1[x] = result; } @@ -343,14 +416,27 @@ void overlay_blend_sse2_plane_opacity(BYTE *p1, const BYTE *p2, } } +template void overlay_blend_c_plane_masked_opacity(BYTE *p1, const BYTE *p2, const BYTE *mask, const int p1_pitch, const int p2_pitch, const int mask_pitch, const int width, const int height, const int opacity) { + const int MASK_CORR_SHIFT = (sizeof(pixel_t) == 1) ? 8 : bits_per_pixel; + const int OPACITY_SHIFT = 8; // opacity always max 0..256 + const int half_pixel_value_rounding = (1 << (MASK_CORR_SHIFT - 1)); + + // avoid "uint16*uint16 can't get into int32" overflows + typedef std::conditional < sizeof(pixel_t) == 1, int, typename std::conditional < sizeof(pixel_t) == 2, __int64, float>::type >::type result_t; + for (int y = 0; y < height; y++) { for (int x = 0; x < width; x++) { - int new_mask = overley_merge_mask_c(mask[x], opacity); - BYTE result = overlay_blend_c_core(p1[x], p2[x], static_cast(new_mask)); - p1[x] = result; + int new_mask = (reinterpret_cast(mask)[x] * opacity) >> OPACITY_SHIFT; // int is enough, opacity is 8 bits + result_t p1x = reinterpret_cast(p1)[x]; + pixel_t p2x = reinterpret_cast(p2)[x]; + + pixel_t result = (pixel_t)((((p1x << MASK_CORR_SHIFT) | half_pixel_value_rounding) + (p2x-p1x)*new_mask) >> MASK_CORR_SHIFT); + //int new_mask = overlay_merge_mask_c(mask[x], opacity); + //BYTE result = overlay_blend_c_core_8(p1[x], p2[x], static_cast(new_mask)); + reinterpret_cast(p1)[x] = result; } p1 += p1_pitch; @@ -359,6 +445,24 @@ void overlay_blend_c_plane_masked_opacity(BYTE *p1, const BYTE *p2, const BYTE * } } +// instantiate +template void overlay_blend_c_plane_masked_opacity(BYTE *p1, const BYTE *p2, const BYTE *mask, + const int p1_pitch, const int p2_pitch, const int mask_pitch, + const int width, const int height, const int opacity); +template void overlay_blend_c_plane_masked_opacity(BYTE *p1, const BYTE *p2, const BYTE *mask, + const int p1_pitch, const int p2_pitch, const int mask_pitch, + const int width, const int height, const int opacity); +template void overlay_blend_c_plane_masked_opacity(BYTE *p1, const BYTE *p2, const BYTE *mask, + const int p1_pitch, const int p2_pitch, const int mask_pitch, + const int width, const int height, const int opacity); +template void overlay_blend_c_plane_masked_opacity(BYTE *p1, const BYTE *p2, const BYTE *mask, + const int p1_pitch, const int p2_pitch, const int mask_pitch, + const int width, const int height, const int opacity); +template void overlay_blend_c_plane_masked_opacity(BYTE *p1, const BYTE *p2, const BYTE *mask, + const int p1_pitch, const int p2_pitch, const int mask_pitch, + const int width, const int height, const int opacity); + + #ifdef X86_32 // The following disable EMMS warning, since it's caller-called. #pragma warning (push) @@ -404,8 +508,8 @@ void overlay_blend_mmx_plane_masked_opacity(BYTE *p1, const BYTE *p2, const BYTE // Leftover value for (int x = wMod8; x < width; x++) { - int new_mask = overley_merge_mask_c(mask[x], opacity); - BYTE result = overlay_blend_c_core(p1[x], p2[x], static_cast(new_mask)); + int new_mask = overlay_merge_mask_c_8(mask[x], opacity); + BYTE result = overlay_blend_c_core_8(p1[x], p2[x], static_cast(new_mask)); p1[x] = result; } @@ -463,8 +567,8 @@ void overlay_blend_sse2_plane_masked_opacity(BYTE *p1, const BYTE *p2, const BYT // Leftover value for (int x = wMod16; x < width; x++) { - int new_mask = overley_merge_mask_c(mask[x], opacity); - BYTE result = overlay_blend_c_core(p1[x], p2[x], static_cast(new_mask)); + int new_mask = overlay_merge_mask_c_8(mask[x], opacity); + BYTE result = overlay_blend_c_core_8(p1[x], p2[x], static_cast(new_mask)); p1[x] = result; } diff --git a/avs_core/filters/overlay/blend_common.h b/avs_core/filters/overlay/blend_common.h index c1a5bd4f1..1d58e0cd8 100644 --- a/avs_core/filters/overlay/blend_common.h +++ b/avs_core/filters/overlay/blend_common.h @@ -40,6 +40,7 @@ #include // Mode: Overlay +template void overlay_blend_c_plane_masked(BYTE *p1, const BYTE *p2, const BYTE *mask, const int p1_pitch, const int p2_pitch, const int mask_pitch, const int width, const int height); @@ -52,6 +53,7 @@ void overlay_blend_sse2_plane_masked(BYTE *p1, const BYTE *p2, const BYTE *mask, const int p1_pitch, const int p2_pitch, const int mask_pitch, const int width, const int height); +template void overlay_blend_c_plane_opacity(BYTE *p1, const BYTE *p2, const int p1_pitch, const int p2_pitch, const int width, const int height, const int opacity); @@ -64,6 +66,7 @@ void overlay_blend_sse2_plane_opacity(BYTE *p1, const BYTE *p2, const int p1_pitch, const int p2_pitch, const int width, const int height, const int opacity); +template void overlay_blend_c_plane_masked_opacity(BYTE *p1, const BYTE *p2, const BYTE *mask, const int p1_pitch, const int p2_pitch, const int mask_pitch, const int width, const int height, const int opacity); From 8f17960c57ba113c4dea5ac32c29d0dde6ab6107 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Fri, 14 Oct 2016 12:35:26 +0200 Subject: [PATCH 108/120] Overlay: Multiply 10-16 bits --- avs_core/filters/overlay/OF_multiply.cpp | 201 +++++++++++++---------- 1 file changed, 116 insertions(+), 85 deletions(-) diff --git a/avs_core/filters/overlay/OF_multiply.cpp b/avs_core/filters/overlay/OF_multiply.cpp index 3980713e9..97ff9a347 100644 --- a/avs_core/filters/overlay/OF_multiply.cpp +++ b/avs_core/filters/overlay/OF_multiply.cpp @@ -37,6 +37,7 @@ #include "overlayfunctions.h" #include +#include void OL_MultiplyImage::DoBlendImageMask(Image444* base, Image444* overlay, Image444* mask) { if (bits_per_pixel == 8) @@ -59,81 +60,96 @@ void OL_MultiplyImage::DoBlendImage(Image444* base, Image444* overlay) { template void OL_MultiplyImage::BlendImageMask(Image444* base, Image444* overlay, Image444* mask) { - BYTE* baseY = base->GetPtr(PLANAR_Y); - BYTE* baseU = base->GetPtr(PLANAR_U); - BYTE* baseV = base->GetPtr(PLANAR_V); + pixel_t* baseY = reinterpret_cast(base->GetPtr(PLANAR_Y)); + pixel_t* baseU = reinterpret_cast(base->GetPtr(PLANAR_U)); + pixel_t* baseV = reinterpret_cast(base->GetPtr(PLANAR_V)); + + pixel_t* ovY = reinterpret_cast(overlay->GetPtr(PLANAR_Y)); + pixel_t* ovU = reinterpret_cast(overlay->GetPtr(PLANAR_U)); + pixel_t* ovV = reinterpret_cast(overlay->GetPtr(PLANAR_V)); + + pixel_t* maskY = reinterpret_cast(mask->GetPtr(PLANAR_Y)); + pixel_t* maskU = reinterpret_cast(mask->GetPtr(PLANAR_U)); + pixel_t* maskV = reinterpret_cast(mask->GetPtr(PLANAR_V)); + + const int half_pixel_value_rounding = (sizeof(pixel_t) == 1) ? 128 : (1 << (bits_per_pixel - 1)); + const int max_pixel_value = (sizeof(pixel_t) == 1) ? 255 : (1 << bits_per_pixel) - 1; + const int pixel_range = max_pixel_value + 1; + const int MASK_CORR_SHIFT = (sizeof(pixel_t) == 1) ? 8 : bits_per_pixel; + const int OPACITY_SHIFT = 8; // opacity always max 0..256 + const int basepitch = (base->pitch) / sizeof(pixel_t); + const int overlaypitch = (overlay->pitch) / sizeof(pixel_t); + const int maskpitch = (mask->pitch) / sizeof(pixel_t); + + // avoid "uint16*uint16 can't get into int32" overflows + typedef std::conditional < sizeof(pixel_t) == 1, int, typename std::conditional < sizeof(pixel_t) == 2, __int64, float>::type >::type result_t; - BYTE* ovY = overlay->GetPtr(PLANAR_Y); - BYTE* ovU = overlay->GetPtr(PLANAR_U); - BYTE* ovV = overlay->GetPtr(PLANAR_V); - - BYTE* maskY = mask->GetPtr(PLANAR_Y); - BYTE* maskU = mask->GetPtr(PLANAR_U); - BYTE* maskV = mask->GetPtr(PLANAR_V); int w = base->w(); int h = base->h(); if (opacity == 256) { for (int y = 0; y < h; y++) { for (int x = 0; x < w; x++) { int op = maskY[x]; - int invop = 256 - op; - int Y = (baseY[x] * (256*invop + (ovY[x] * op))) >> 16; + result_t invop = pixel_range - op; + result_t ovYx = ovY[x]; + int Y = (int)((baseY[x] * (pixel_range*invop + (ovYx * op))) >> (MASK_CORR_SHIFT*2)); op = maskU[x]; - invop = 256 - op; - int U = ((baseU[x] * invop * 256) + (op * (baseU[x] * ovY[x] + 128 * (256-ovY[x])))) >> 16; + invop = pixel_range - op; + int U = (int)(((baseU[x] * invop * pixel_range) + (op * (baseU[x] * ovYx + half_pixel_value_rounding * (pixel_range-ovYx)))) >> (MASK_CORR_SHIFT*2)); op = maskV[x]; - invop = 256-op; - int V = ((baseV[x] * invop * 256) + (op * (baseV[x] * ovY[x] + 128 * (256-ovY[x])))) >> 16; + invop = pixel_range-op; + int V = (int)(((baseV[x] * invop * pixel_range) + (op * (baseV[x] * ovYx + half_pixel_value_rounding * (pixel_range-ovYx)))) >> (MASK_CORR_SHIFT*2)); - baseU[x] = (BYTE)U; - baseV[x] = (BYTE)V; - baseY[x] = (BYTE)Y; + baseU[x] = (pixel_t)U; + baseV[x] = (pixel_t)V; + baseY[x] = (pixel_t)Y; } - maskY += mask->pitch; - maskU += mask->pitch; - maskV += mask->pitch; + maskY += maskpitch; + maskU += maskpitch; + maskV += maskpitch; - baseY += base->pitch; - baseU += base->pitch; - baseV += base->pitch; + baseY += basepitch; + baseU += basepitch; + baseV += basepitch; - ovY += overlay->pitch; - ovU += overlay->pitch; - ovV += overlay->pitch; + ovY += overlaypitch; + ovU += overlaypitch; + ovV += overlaypitch; } } else { for (int y = 0; y < h; y++) { for (int x = 0; x < w; x++) { - int op = (maskY[x]*opacity)>>8; - int invop = 256 - op; - int Y = (baseY[x] * (256*invop + (ovY[x] * op))) >> 16; - - op = (maskU[x]*opacity)>>8; - invop = 256 - op; - int U = ((baseU[x] * invop * 256) + (op * (baseU[x] * ovY[x] + 128 * (256-ovY[x])))) >> 16; - - op = (maskV[x]*opacity)>>8; - invop = 256-op; - int V = ((baseV[x] * invop * 256) + (op * (baseV[x] * ovY[x] + 128 * (256-ovY[x])))) >> 16; - - baseU[x] = (BYTE)U; - baseV[x] = (BYTE)V; - baseY[x] = (BYTE)Y; + int op = (maskY[x]*opacity)>>OPACITY_SHIFT; + result_t invop = pixel_range - op; + result_t ovYx = ovY[x]; + int Y = (int)((baseY[x] * (pixel_range*invop + (ovYx * op))) >> (MASK_CORR_SHIFT*2)); + + op = (maskU[x]*opacity)>>OPACITY_SHIFT; + invop = pixel_range - op; + int U = (int)(((baseU[x] * invop * pixel_range) + (op * (baseU[x] * ovYx + half_pixel_value_rounding * (pixel_range-ovYx)))) >> (MASK_CORR_SHIFT*2)); + + op = (maskV[x]*opacity)>>OPACITY_SHIFT; + invop = pixel_range-op; + int V = (int)(((baseV[x] * invop * pixel_range) + (op * (baseV[x] * ovYx + half_pixel_value_rounding * (pixel_range-ovYx)))) >> (MASK_CORR_SHIFT*2)); + + baseU[x] = (pixel_t)U; + baseV[x] = (pixel_t)V; + baseY[x] = (pixel_t)Y; } - baseY += base->pitch; - baseU += base->pitch; - baseV += base->pitch; + baseY += basepitch; + baseU += basepitch; + baseV += basepitch; - ovY += overlay->pitch; - ovU += overlay->pitch; - ovV += overlay->pitch; + ovY += overlaypitch; + ovU += overlaypitch; + ovV += overlaypitch; - maskY += mask->pitch; - maskU += mask->pitch; - maskV += mask->pitch; + maskY += maskpitch; + maskU += maskpitch; + maskV += maskpitch; } } @@ -142,54 +158,69 @@ void OL_MultiplyImage::BlendImageMask(Image444* base, Image444* overlay, Image44 template void OL_MultiplyImage::BlendImage(Image444* base, Image444* overlay) { - BYTE* baseY = base->GetPtr(PLANAR_Y); - BYTE* baseU = base->GetPtr(PLANAR_U); - BYTE* baseV = base->GetPtr(PLANAR_V); + pixel_t* baseY = reinterpret_cast(base->GetPtr(PLANAR_Y)); + pixel_t* baseU = reinterpret_cast(base->GetPtr(PLANAR_U)); + pixel_t* baseV = reinterpret_cast(base->GetPtr(PLANAR_V)); + + pixel_t* ovY = reinterpret_cast(overlay->GetPtr(PLANAR_Y)); + pixel_t* ovU = reinterpret_cast(overlay->GetPtr(PLANAR_U)); + pixel_t* ovV = reinterpret_cast(overlay->GetPtr(PLANAR_V)); + + const int half_pixel_value_rounding = (sizeof(pixel_t) == 1) ? 128 : (1 << (bits_per_pixel - 1)); + const int max_pixel_value = (sizeof(pixel_t) == 1) ? 255 : (1 << bits_per_pixel) - 1; + const int pixel_range = max_pixel_value + 1; + const int MASK_CORR_SHIFT = (sizeof(pixel_t) == 1) ? 8 : bits_per_pixel; + const int OPACITY_SHIFT = 8; // opacity always max 0..256 + const int basepitch = (base->pitch) / sizeof(pixel_t); + const int overlaypitch = (overlay->pitch) / sizeof(pixel_t); + + // avoid "uint16*uint16 can't get into int32" overflows + typedef std::conditional < sizeof(pixel_t) == 1, int, typename std::conditional < sizeof(pixel_t) == 2, __int64, float>::type >::type result_t; - BYTE* ovY = overlay->GetPtr(PLANAR_Y); - BYTE* ovU = overlay->GetPtr(PLANAR_U); - BYTE* ovV = overlay->GetPtr(PLANAR_V); - int w = base->w(); int h = base->h(); if (opacity == 256) { for (int y = 0; y < h; y++) { for (int x = 0; x < w; x++) { - int Y = (baseY[x] * ovY[x])>>8; - int U = (baseU[x] * ovY[x] + 128 * (256-ovY[x]) ) >> 8; - int V = (baseV[x] * ovY[x] + 128 * (256-ovY[x]) ) >> 8; - baseY[x] = (BYTE)Y; - baseU[x] = (BYTE)U; - baseV[x] = (BYTE)V; + result_t ovYx = ovY[x]; + int Y = (int)((baseY[x] * ovYx) >> MASK_CORR_SHIFT); + int U = (int)((baseU[x] * ovYx + half_pixel_value_rounding * (pixel_range - ovYx)) >> MASK_CORR_SHIFT); + int V = (int)((baseV[x] * ovYx + half_pixel_value_rounding * (pixel_range - ovYx)) >> MASK_CORR_SHIFT); + baseY[x] = (pixel_t)Y; + baseU[x] = (pixel_t)U; + baseV[x] = (pixel_t)V; } - baseY += base->pitch; - baseU += base->pitch; - baseV += base->pitch; + baseY += basepitch; + baseU += basepitch; + baseV += basepitch; - ovY += overlay->pitch; - ovU += overlay->pitch; - ovV += overlay->pitch; + ovY += overlaypitch; + ovU += overlaypitch; + ovV += overlaypitch; } } else { for (int y = 0; y < h; y++) { for (int x = 0; x < w; x++) { - - int Y = (baseY[x] * (256*inv_opacity + (ovY[x] * opacity))) >> 16; - int U = ((baseU[x] * inv_opacity * 256) + (opacity * (baseU[x] * ovY[x] + 128 * (256-ovY[x])))) >> 16; - int V = ((baseV[x] * inv_opacity * 256) + (opacity * (baseV[x] * ovY[x] + 128 * (256-ovY[x])))) >> 16; - - baseU[x] = (BYTE)U; - baseV[x] = (BYTE)V; - baseY[x] = (BYTE)Y; + result_t ovYx = ovY[x]; + result_t baseYx = baseY[x]; + int Y = (int)((baseYx * (pixel_range*inv_opacity + (ovYx * opacity))) >> (MASK_CORR_SHIFT+OPACITY_SHIFT)); + result_t baseUx = baseU[x]; + int U = (int)(((baseUx * inv_opacity * pixel_range) + (opacity * (baseUx * ovYx + half_pixel_value_rounding * (pixel_range-ovYx)))) >> (MASK_CORR_SHIFT+OPACITY_SHIFT)); + result_t baseVx = baseV[x]; + int V = (int)(((baseVx * inv_opacity * pixel_range) + (opacity * (baseVx * ovYx + half_pixel_value_rounding * (pixel_range-ovYx)))) >> (MASK_CORR_SHIFT+OPACITY_SHIFT)); + + baseU[x] = (pixel_t)U; + baseV[x] = (pixel_t)V; + baseY[x] = (pixel_t)Y; } - baseY += base->pitch; - baseU += base->pitch; - baseV += base->pitch; + baseY += basepitch; + baseU += basepitch; + baseV += basepitch; - ovY += overlay->pitch; - ovU += overlay->pitch; - ovV += overlay->pitch; + ovY += overlaypitch; + ovU += overlaypitch; + ovV += overlaypitch; } } } From f0cc244077ac3009b331b22da608385fa19e6e82 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Fri, 14 Oct 2016 13:14:39 +0200 Subject: [PATCH 109/120] Overlay: Difference 10-16 bit --- avs_core/filters/overlay/OF_difference.cpp | 179 ++++++++++++-------- avs_core/filters/overlay/overlayfunctions.h | 6 +- 2 files changed, 107 insertions(+), 78 deletions(-) diff --git a/avs_core/filters/overlay/OF_difference.cpp b/avs_core/filters/overlay/OF_difference.cpp index 9983b1ce9..e82e888d4 100644 --- a/avs_core/filters/overlay/OF_difference.cpp +++ b/avs_core/filters/overlay/OF_difference.cpp @@ -42,36 +42,50 @@ void OL_DifferenceImage::DoBlendImageMask(Image444* base, Image444* overlay, Image444* mask) { if (bits_per_pixel == 8) - BlendImageMask(base, overlay, mask); + BlendImageMask(base, overlay, mask); + else if(bits_per_pixel <= 16) + BlendImageMask(base, overlay, mask); //else if(bits_per_pixel == 32) // BlendImageMask(base, overlay, mask); - else if(bits_per_pixel == 16) - BlendImageMask(base, overlay, mask); } void OL_DifferenceImage::DoBlendImage(Image444* base, Image444* overlay) { if (bits_per_pixel == 8) - BlendImage(base, overlay); + BlendImageMask(base, overlay, nullptr); + else if(bits_per_pixel <= 16) + BlendImageMask(base, overlay, nullptr); //else if(bits_per_pixel == 32) // BlendImage(base, overlay); - else if(bits_per_pixel == 16) - BlendImage(base, overlay); } -template +template void OL_DifferenceImage::BlendImageMask(Image444* base, Image444* overlay, Image444* mask) { - BYTE* baseY = base->GetPtr(PLANAR_Y); - BYTE* baseU = base->GetPtr(PLANAR_U); - BYTE* baseV = base->GetPtr(PLANAR_V); + pixel_t* baseY = reinterpret_cast(base->GetPtr(PLANAR_Y)); + pixel_t* baseU = reinterpret_cast(base->GetPtr(PLANAR_U)); + pixel_t* baseV = reinterpret_cast(base->GetPtr(PLANAR_V)); - BYTE* ovY = overlay->GetPtr(PLANAR_Y); - BYTE* ovU = overlay->GetPtr(PLANAR_U); - BYTE* ovV = overlay->GetPtr(PLANAR_V); - - BYTE* maskY = mask->GetPtr(PLANAR_Y); - BYTE* maskU = mask->GetPtr(PLANAR_U); - BYTE* maskV = mask->GetPtr(PLANAR_V); + pixel_t* ovY = reinterpret_cast(overlay->GetPtr(PLANAR_Y)); + pixel_t* ovU = reinterpret_cast(overlay->GetPtr(PLANAR_U)); + pixel_t* ovV = reinterpret_cast(overlay->GetPtr(PLANAR_V)); + + pixel_t* maskY = maskMode ? reinterpret_cast(mask->GetPtr(PLANAR_Y)) : nullptr; + pixel_t* maskU = maskMode ? reinterpret_cast(mask->GetPtr(PLANAR_U)) : nullptr; + pixel_t* maskV = maskMode ? reinterpret_cast(mask->GetPtr(PLANAR_V)) : nullptr; + + const int half_pixel_value = (sizeof(pixel_t) == 1) ? 128 : (1 << (bits_per_pixel - 1)); + const int max_pixel_value = (sizeof(pixel_t) == 1) ? 255 : (1 << bits_per_pixel) - 1; + const int pixel_range = max_pixel_value + 1; + const int SHIFT = (sizeof(pixel_t) == 1) ? 5 : 5 + (bits_per_pixel - 8); + const int MASK_CORR_SHIFT = (sizeof(pixel_t) == 1) ? 8 : bits_per_pixel; + const int OPACITY_SHIFT = 8; // opacity always max 0..256 + const int over32 = (1 << SHIFT); // 32 + const int basepitch = (base->pitch) / sizeof(pixel_t); + const int overlaypitch = (overlay->pitch) / sizeof(pixel_t); + const int maskpitch = maskMode ? (mask->pitch) / sizeof(pixel_t) : 0; + + // avoid "uint16*uint16 can't get into int32" overflows + typedef std::conditional < sizeof(pixel_t) == 1, int, typename std::conditional < sizeof(pixel_t) == 2, __int64, float>::type >::type result_t; int w = base->w(); int h = base->h(); @@ -79,84 +93,98 @@ void OL_DifferenceImage::BlendImageMask(Image444* base, Image444* overlay, Image if (opacity == 256) { for (int y = 0; y < h; y++) { for (int x = 0; x < w; x++) { - int Y = abs((int)baseY[x] - (int)ovY[x])+128; - int U = abs((int)baseU[x] - (int)ovU[x])+128; - int V = abs((int)baseV[x] - (int)ovV[x])+128; - int mY = maskY[x]; - int mU = maskU[x]; - int mV = maskV[x]; - Y = ((Y*mY) + ((256-mY)*baseY[x]))>>8; - U = ((U*mU) + ((256-mU)*baseU[x]))>>8; - V = ((V*mV) + ((256-mV)*baseV[x]))>>8; - if (Y>255) { // Apply overbrightness to UV - int multiplier = max(0,288-Y); // 0 to 32 - U = ((U*multiplier) + (128*(32-multiplier)))>>5; - V = ((V*multiplier) + (128*(32-multiplier)))>>5; - Y = 255; + int Y = abs((int)baseY[x] - (int)ovY[x]) + half_pixel_value; + int U = abs((int)baseU[x] - (int)ovU[x]) + half_pixel_value; + int V = abs((int)baseV[x] - (int)ovV[x]) + half_pixel_value; + if(maskMode) { + result_t mY = maskY[x]; + result_t mU = maskU[x]; + result_t mV = maskV[x]; + Y = (int)(((Y*mY) + ((pixel_range - mY)*baseY[x])) >> MASK_CORR_SHIFT); + U = (int)(((U*mU) + ((pixel_range - mU)*baseU[x])) >> MASK_CORR_SHIFT); + V = (int)(((V*mV) + ((pixel_range - mV)*baseV[x])) >> MASK_CORR_SHIFT); + } + if (Y>max_pixel_value) { // Apply overbrightness to UV + int multiplier = max(0,pixel_range + over32 -Y); // 0 to 32 + U = ((U*( multiplier)) + (half_pixel_value*(over32-multiplier)))>>SHIFT; + V = ((V*( multiplier)) + (half_pixel_value*(over32-multiplier)))>>SHIFT; + Y = max_pixel_value; } else if (Y<0) { // Apply superdark to UV - int multiplier = min(-Y,32); // 0 to 32 - U = ((U*(32-multiplier)) + (128*(multiplier)))>>5; - V = ((V*(32-multiplier)) + (128*(multiplier)))>>5; + int multiplier = min(-Y,over32); // 0 to 32 + U = ((U*(over32 - multiplier)) + (half_pixel_value*( multiplier)))>>SHIFT; + V = ((V*(over32 - multiplier)) + (half_pixel_value*( multiplier)))>>SHIFT; Y = 0; } - baseY[x] = (BYTE)Y; - baseU[x] = (BYTE)clamp(U, 0, 255); - baseV[x] = (BYTE)clamp(V, 0, 255); + baseU[x] = (pixel_t)clamp(U, 0, max_pixel_value); + baseV[x] = (pixel_t)clamp(V, 0, max_pixel_value); + baseY[x] = (pixel_t)Y; } - baseY += base->pitch; - baseU += base->pitch; - baseV += base->pitch; + baseY += basepitch; + baseU += basepitch; + baseV += basepitch; - ovY += overlay->pitch; - ovU += overlay->pitch; - ovV += overlay->pitch; + ovY += overlaypitch; + ovU += overlaypitch; + ovV += overlaypitch; - maskY += mask->pitch; - maskU += mask->pitch; - maskV += mask->pitch; + if(maskMode) { + maskY += maskpitch; + maskU += maskpitch; + maskV += maskpitch; + } } // for y } else { for (int y = 0; y < h; y++) { for (int x = 0; x < w; x++) { - int Y = abs((int)baseY[x] - (int)ovY[x])+128; - int U = abs((int)baseU[x] - (int)ovU[x])+128; - int V = abs((int)baseV[x] - (int)ovV[x])+128; - int mY = (maskY[x]*opacity)>>8; - int mU = (maskU[x]*opacity)>>8; - int mV = (maskV[x]*opacity)>>8; - Y = ((Y*mY) + ((256-mY)*baseY[x]))>>8; - U = ((U*mU) + ((256-mU)*baseU[x]))>>8; - V = ((V*mV) + ((256-mV)*baseV[x]))>>8; - if (Y>255) { // Apply overbrightness to UV - int multiplier = max(0,288-Y); // 0 to 32 - U = ((U*multiplier) + (128*(32-multiplier)))>>5; - V = ((V*multiplier) + (128*(32-multiplier)))>>5; - Y = 255; + int Y = abs((int)baseY[x] - (int)ovY[x]) + half_pixel_value; + int U = abs((int)baseU[x] - (int)ovU[x]) + half_pixel_value; + int V = abs((int)baseV[x] - (int)ovV[x]) + half_pixel_value; + if(maskMode) { + result_t mY = (maskY[x] * opacity) >> OPACITY_SHIFT; + result_t mU = (maskU[x] * opacity) >> OPACITY_SHIFT; + result_t mV = (maskV[x] * opacity) >> OPACITY_SHIFT; + Y = (int)(((Y*mY) + ((pixel_range - mY)*baseY[x])) >> MASK_CORR_SHIFT); + U = (int)(((U*mU) + ((pixel_range - mU)*baseU[x])) >> MASK_CORR_SHIFT); + V = (int)(((V*mV) + ((pixel_range - mV)*baseV[x])) >> MASK_CORR_SHIFT); + } + else { + Y = ((Y*opacity) + (inv_opacity*baseY[x])) >> OPACITY_SHIFT; + U = ((U*opacity) + (inv_opacity*baseU[x])) >> OPACITY_SHIFT; + V = ((V*opacity) + (inv_opacity*baseV[x])) >> OPACITY_SHIFT; + } + if (Y>max_pixel_value) { // Apply overbrightness to UV + int multiplier = max(0,pixel_range + over32 -Y); // 0 to 32 + U = ((U*( multiplier)) + (half_pixel_value*(over32-multiplier)))>>SHIFT; + V = ((V*( multiplier)) + (half_pixel_value*(over32-multiplier)))>>SHIFT; + Y = max_pixel_value; } else if (Y<0) { // Apply superdark to UV - int multiplier = min(-Y,32); // 0 to 32 - U = ((U*(32-multiplier)) + (128*(multiplier)))>>5; - V = ((V*(32-multiplier)) + (128*(multiplier)))>>5; + int multiplier = min(-Y,over32); // 0 to 32 + U = ((U*(over32 - multiplier)) + (half_pixel_value*( multiplier)))>>SHIFT; + V = ((V*(over32 - multiplier)) + (half_pixel_value*( multiplier)))>>SHIFT; Y = 0; } - baseY[x] = (BYTE)Y; - baseU[x] = (BYTE)clamp(U, 0, 255); - baseV[x] = (BYTE)clamp(V, 0, 255); + baseU[x] = (pixel_t)clamp(U, 0, max_pixel_value); + baseV[x] = (pixel_t)clamp(V, 0, max_pixel_value); + baseY[x] = (pixel_t)Y; } - baseY += base->pitch; - baseU += base->pitch; - baseV += base->pitch; + baseY += basepitch; + baseU += basepitch; + baseV += basepitch; - ovY += overlay->pitch; - ovU += overlay->pitch; - ovV += overlay->pitch; + ovY += overlaypitch; + ovU += overlaypitch; + ovV += overlaypitch; - maskY += mask->pitch; - maskU += mask->pitch; - maskV += mask->pitch; + if(maskMode) { + maskY += maskpitch; + maskU += maskpitch; + maskV += maskpitch; + } } // for x } // for y } +#if 0 template void OL_DifferenceImage::BlendImage(Image444* base, Image444* overlay) { BYTE* baseY = base->GetPtr(PLANAR_Y); @@ -233,3 +261,4 @@ void OL_DifferenceImage::BlendImage(Image444* base, Image444* overlay) { } // for y }// if !mmx } +#endif diff --git a/avs_core/filters/overlay/overlayfunctions.h b/avs_core/filters/overlay/overlayfunctions.h index 71e51d8fd..5b49bed48 100644 --- a/avs_core/filters/overlay/overlayfunctions.h +++ b/avs_core/filters/overlay/overlayfunctions.h @@ -178,9 +178,9 @@ class OL_HardLightImage : public OverlayFunction { class OL_DifferenceImage : public OverlayFunction { void DoBlendImage(Image444* base, Image444* overlay); void DoBlendImageMask(Image444* base, Image444* overlay, Image444* mask); - template - void BlendImage(Image444* base, Image444* overlay); - template + //template + //void BlendImage(Image444* base, Image444* overlay); + template void BlendImageMask(Image444* base, Image444* overlay, Image444* mask); }; From e32f825edf09f7487b02c17e1f600e359945f0da Mon Sep 17 00:00:00 2001 From: Pinterf Date: Fri, 14 Oct 2016 13:15:06 +0200 Subject: [PATCH 110/120] misc. comment in overlay add --- avs_core/filters/overlay/OF_add.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/avs_core/filters/overlay/OF_add.cpp b/avs_core/filters/overlay/OF_add.cpp index 8595881f0..b359782b1 100644 --- a/avs_core/filters/overlay/OF_add.cpp +++ b/avs_core/filters/overlay/OF_add.cpp @@ -161,7 +161,7 @@ void OL_AddImage::BlendImageMask(Image444* base, Image444* overlay, Image444* ma */ template void OL_AddImage::BlendImageMask(Image444* base, Image444* overlay, Image444* mask) { - + pixel_t* baseY = reinterpret_cast(base->GetPtr(PLANAR_Y)); pixel_t* baseU = reinterpret_cast(base->GetPtr(PLANAR_U)); pixel_t* baseV = reinterpret_cast(base->GetPtr(PLANAR_V)); @@ -210,7 +210,7 @@ void OL_AddImage::BlendImageMask(Image444* base, Image444* overlay, Image444* ma Y = baseY[x] - (maskMode ? (((result_t)ovY[x] * maskY[x]) >> MASK_CORR_SHIFT) : ovY[x]); U = baseU[x] - (int)(maskMode ? ((((result_t)half_pixel_value*(pixel_range - maskU[x])) + ((result_t)maskU[x] * ovU[x])) >> MASK_CORR_SHIFT) : ovU[x]) + half_pixel_value; V = baseV[x] - (int)(maskMode ? ((((result_t)half_pixel_value*(pixel_range - maskV[x])) + ((result_t)maskV[x] * ovV[x])) >> MASK_CORR_SHIFT) : ovV[x]) + half_pixel_value; - if (Y<0) { // Apply overbrightness to UV + if (Y<0) { // Apply superdark to UV int multiplier = min(-Y,over32); // 0 to 32 U = ((U*(over32 - multiplier)) + (half_pixel_value*( multiplier)))>>SHIFT; V = ((V*(over32 - multiplier)) + (half_pixel_value*( multiplier)))>>SHIFT; From 1f4cf1b78e446b1022b1de9792c55f21869fe5d4 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Fri, 14 Oct 2016 13:43:25 +0200 Subject: [PATCH 111/120] Overlay: Exclusion 10-16 bit --- avs_core/filters/overlay/OF_exclusion.cpp | 183 ++++++++++++-------- avs_core/filters/overlay/overlayfunctions.h | 6 +- 2 files changed, 111 insertions(+), 78 deletions(-) diff --git a/avs_core/filters/overlay/OF_exclusion.cpp b/avs_core/filters/overlay/OF_exclusion.cpp index 39baf4eec..4d6be41b2 100644 --- a/avs_core/filters/overlay/OF_exclusion.cpp +++ b/avs_core/filters/overlay/OF_exclusion.cpp @@ -38,39 +38,55 @@ #include #include +#include void OL_ExclusionImage::DoBlendImageMask(Image444* base, Image444* overlay, Image444* mask) { if (bits_per_pixel == 8) - BlendImageMask(base, overlay, mask); + BlendImageMask(base, overlay, mask); + else if(bits_per_pixel <= 16) + BlendImageMask(base, overlay, mask); //else if(bits_per_pixel == 32) // BlendImageMask(base, overlay, mask); - else if(bits_per_pixel == 16) - BlendImageMask(base, overlay, mask); } void OL_ExclusionImage::DoBlendImage(Image444* base, Image444* overlay) { if (bits_per_pixel == 8) - BlendImage(base, overlay); + BlendImageMask(base, overlay, nullptr); + else if(bits_per_pixel <= 16) + BlendImageMask(base, overlay, nullptr); //else if(bits_per_pixel == 32) // BlendImage(base, overlay); - else if(bits_per_pixel == 16) - BlendImage(base, overlay); } -template +template void OL_ExclusionImage::BlendImageMask(Image444* base, Image444* overlay, Image444* mask) { - BYTE* baseY = base->GetPtr(PLANAR_Y); - BYTE* baseU = base->GetPtr(PLANAR_U); - BYTE* baseV = base->GetPtr(PLANAR_V); + pixel_t* baseY = reinterpret_cast(base->GetPtr(PLANAR_Y)); + pixel_t* baseU = reinterpret_cast(base->GetPtr(PLANAR_U)); + pixel_t* baseV = reinterpret_cast(base->GetPtr(PLANAR_V)); - BYTE* ovY = overlay->GetPtr(PLANAR_Y); - BYTE* ovU = overlay->GetPtr(PLANAR_U); - BYTE* ovV = overlay->GetPtr(PLANAR_V); - - BYTE* maskY = mask->GetPtr(PLANAR_Y); - BYTE* maskU = mask->GetPtr(PLANAR_U); - BYTE* maskV = mask->GetPtr(PLANAR_V); + pixel_t* ovY = reinterpret_cast(overlay->GetPtr(PLANAR_Y)); + pixel_t* ovU = reinterpret_cast(overlay->GetPtr(PLANAR_U)); + pixel_t* ovV = reinterpret_cast(overlay->GetPtr(PLANAR_V)); + + pixel_t* maskY = maskMode ? reinterpret_cast(mask->GetPtr(PLANAR_Y)) : nullptr; + pixel_t* maskU = maskMode ? reinterpret_cast(mask->GetPtr(PLANAR_U)) : nullptr; + pixel_t* maskV = maskMode ? reinterpret_cast(mask->GetPtr(PLANAR_V)) : nullptr; + + const int half_pixel_value = (sizeof(pixel_t) == 1) ? 128 : (1 << (bits_per_pixel - 1)); + const int max_pixel_value = (sizeof(pixel_t) == 1) ? 255 : (1 << bits_per_pixel) - 1; + const int xor_mask = max_pixel_value; + const int pixel_range = max_pixel_value + 1; + const int SHIFT = (sizeof(pixel_t) == 1) ? 5 : 5 + (bits_per_pixel - 8); + const int MASK_CORR_SHIFT = (sizeof(pixel_t) == 1) ? 8 : bits_per_pixel; + const int OPACITY_SHIFT = 8; // opacity always max 0..256 + const int over32 = (1 << SHIFT); // 32 + const int basepitch = (base->pitch) / sizeof(pixel_t); + const int overlaypitch = (overlay->pitch) / sizeof(pixel_t); + const int maskpitch = maskMode ? (mask->pitch) / sizeof(pixel_t) : 0; + + // avoid "uint16*uint16 can't get into int32" overflows + typedef std::conditional < sizeof(pixel_t) == 1, int, typename std::conditional < sizeof(pixel_t) == 2, __int64, float>::type >::type result_t; int w = base->w(); int h = base->h(); @@ -78,84 +94,100 @@ void OL_ExclusionImage::BlendImageMask(Image444* base, Image444* overlay, Image4 if (opacity == 256) { for (int y = 0; y < h; y++) { for (int x = 0; x < w; x++) { - int Y = ((int)(baseY[x]^0xff)*ovY[x] + (int)(ovY[x]^0xff)*baseY[x])>>8; - int U = ((int)(baseU[x]^0xff)*ovY[x] + (int)(ovY[x]^0xff)*baseU[x])>>8; - int V = ((int)(baseV[x]^0xff)*ovY[x] + (int)(ovY[x]^0xff)*baseV[x])>>8; - int mY = maskY[x]; - int mU = maskU[x]; - int mV = maskV[x]; - Y = ((Y*mY) + ((256-mY)*baseY[x]))>>8; - U = ((U*mU) + ((256-mU)*baseU[x]))>>8; - V = ((V*mV) + ((256-mV)*baseV[x]))>>8; - if (Y>255) { // Apply overbrightness to UV - int multiplier = max(0,288-Y); // 0 to 32 - U = ((U*multiplier) + (128*(32-multiplier)))>>5; - V = ((V*multiplier) + (128*(32-multiplier)))>>5; - Y = 255; + result_t ovYx = ovY[x]; + int Y = (int)(((baseY[x] ^ xor_mask)*ovYx + (ovYx ^ xor_mask)*baseY[x]) >> MASK_CORR_SHIFT); + int U = (int)(((baseU[x] ^ xor_mask)*ovYx + (ovYx ^ xor_mask)*baseU[x]) >> MASK_CORR_SHIFT); + int V = (int)(((baseV[x] ^ xor_mask)*ovYx + (ovYx ^ xor_mask)*baseV[x]) >> MASK_CORR_SHIFT); + if(maskMode) { + result_t mY = maskY[x]; + result_t mU = maskU[x]; + result_t mV = maskV[x]; + Y = (int)(((Y*mY) + ((pixel_range - mY)*baseY[x])) >> MASK_CORR_SHIFT); + U = (int)(((U*mU) + ((pixel_range - mU)*baseU[x])) >> MASK_CORR_SHIFT); + V = (int)(((V*mV) + ((pixel_range - mV)*baseV[x])) >> MASK_CORR_SHIFT); + } + if (Y>max_pixel_value) { // Apply overbrightness to UV + int multiplier = max(0,pixel_range + over32 -Y); // 0 to 32 + U = ((U*( multiplier)) + (half_pixel_value*(over32-multiplier)))>>SHIFT; + V = ((V*( multiplier)) + (half_pixel_value*(over32-multiplier)))>>SHIFT; + Y = max_pixel_value; } else if (Y<0) { // Apply superdark to UV - int multiplier = min(-Y,32); // 0 to 32 - U = ((U*(32-multiplier)) + (128*(multiplier)))>>5; - V = ((V*(32-multiplier)) + (128*(multiplier)))>>5; + int multiplier = min(-Y,over32); // 0 to 32 + U = ((U*(over32 - multiplier)) + (half_pixel_value*( multiplier)))>>SHIFT; + V = ((V*(over32 - multiplier)) + (half_pixel_value*( multiplier)))>>SHIFT; Y = 0; } - baseY[x] = (BYTE)Y; - baseU[x] = (BYTE)clamp(U, 0, 255); - baseV[x] = (BYTE)clamp(V, 0, 255); + baseY[x] = (pixel_t)Y; + baseU[x] = (pixel_t)clamp(U, 0, max_pixel_value); + baseV[x] = (pixel_t)clamp(V, 0, max_pixel_value); } - baseY += base->pitch; - baseU += base->pitch; - baseV += base->pitch; + baseY += basepitch; + baseU += basepitch; + baseV += basepitch; - ovY += overlay->pitch; - ovU += overlay->pitch; - ovV += overlay->pitch; + ovY += overlaypitch; + ovU += overlaypitch; + ovV += overlaypitch; - maskY += mask->pitch; - maskU += mask->pitch; - maskV += mask->pitch; + if(maskMode) { + maskY += maskpitch; + maskU += maskpitch; + maskV += maskpitch; + } } // for y } else { for (int y = 0; y < h; y++) { for (int x = 0; x < w; x++) { - int Y = ((int)(baseY[x]^0xff)*ovY[x] + (int)(ovY[x]^0xff)*baseY[x])>>8; - int U = ((int)(baseU[x]^0xff)*ovY[x] + (int)(ovY[x]^0xff)*baseU[x])>>8; - int V = ((int)(baseV[x]^0xff)*ovY[x] + (int)(ovY[x]^0xff)*baseV[x])>>8; - int mY = (maskY[x]*opacity)>>8; - int mU = (maskU[x]*opacity)>>8; - int mV = (maskV[x]*opacity)>>8; - Y = ((Y*mY) + ((256-mY)*baseY[x]))>>8; - U = ((U*mU) + ((256-mU)*baseU[x]))>>8; - V = ((V*mV) + ((256-mV)*baseV[x]))>>8; - if (Y>255) { // Apply overbrightness to UV - int multiplier = max(0,288-Y); // 0 to 32 - U = ((U*multiplier) + (128*(32-multiplier)))>>5; - V = ((V*multiplier) + (128*(32-multiplier)))>>5; - Y = 255; + result_t ovYx = ovY[x]; + int Y = (int)(((baseY[x] ^ xor_mask)*ovYx + (ovYx^xor_mask)*baseY[x]) >> MASK_CORR_SHIFT); + int U = (int)(((baseU[x] ^ xor_mask)*ovYx + (ovYx^xor_mask)*baseU[x]) >> MASK_CORR_SHIFT); + int V = (int)(((baseV[x] ^ xor_mask)*ovYx + (ovYx^xor_mask)*baseV[x]) >> MASK_CORR_SHIFT); + if(maskMode) { + result_t mY = (maskY[x] * opacity) >> OPACITY_SHIFT; + result_t mU = (maskU[x] * opacity) >> OPACITY_SHIFT; + result_t mV = (maskV[x] * opacity) >> OPACITY_SHIFT; + Y = (int)(((Y*mY) + ((pixel_range - mY)*baseY[x])) >> MASK_CORR_SHIFT); + U = (int)(((U*mU) + ((pixel_range - mU)*baseU[x])) >> MASK_CORR_SHIFT); + V = (int)(((V*mV) + ((pixel_range - mV)*baseV[x])) >> MASK_CORR_SHIFT); + } + else { + Y = ((Y*opacity) + (inv_opacity*baseY[x])) >> OPACITY_SHIFT; + U = ((U*opacity) + (inv_opacity*baseU[x])) >> OPACITY_SHIFT; + V = ((V*opacity) + (inv_opacity*baseV[x])) >> OPACITY_SHIFT; + } + if (Y>max_pixel_value) { // Apply overbrightness to UV + int multiplier = max(0,pixel_range + over32 -Y); // 0 to 32 + U = ((U*( multiplier)) + (half_pixel_value*(over32-multiplier)))>>SHIFT; + V = ((V*( multiplier)) + (half_pixel_value*(over32-multiplier)))>>SHIFT; + Y = max_pixel_value; } else if (Y<0) { // Apply superdark to UV - int multiplier = min(-Y,32); // 0 to 32 - U = ((U*(32-multiplier)) + (128*(multiplier)))>>5; - V = ((V*(32-multiplier)) + (128*(multiplier)))>>5; + int multiplier = min(-Y,over32); // 0 to 32 + U = ((U*(over32 - multiplier)) + (half_pixel_value*( multiplier)))>>SHIFT; + V = ((V*(over32 - multiplier)) + (half_pixel_value*( multiplier)))>>SHIFT; Y = 0; } - baseY[x] = (BYTE)Y; - baseU[x] = (BYTE)clamp(U, 0, 255); - baseV[x] = (BYTE)clamp(V, 0, 255); + baseY[x] = (pixel_t)Y; + baseU[x] = (pixel_t)clamp(U, 0, max_pixel_value); + baseV[x] = (pixel_t)clamp(V, 0, max_pixel_value); } - baseY += base->pitch; - baseU += base->pitch; - baseV += base->pitch; + baseY += basepitch; + baseU += basepitch; + baseV += basepitch; - ovY += overlay->pitch; - ovU += overlay->pitch; - ovV += overlay->pitch; + ovY += overlaypitch; + ovU += overlaypitch; + ovV += overlaypitch; - maskY += mask->pitch; - maskU += mask->pitch; - maskV += mask->pitch; + if(maskMode) { + maskY += maskpitch; + maskU += maskpitch; + maskV += maskpitch; + } } // for x } // for y } +#if 0 template void OL_ExclusionImage::BlendImage(Image444* base, Image444* overlay) { BYTE* baseY = base->GetPtr(PLANAR_Y); @@ -232,3 +264,4 @@ void OL_ExclusionImage::BlendImage(Image444* base, Image444* overlay) { } // for y }// if !mmx } +#endif diff --git a/avs_core/filters/overlay/overlayfunctions.h b/avs_core/filters/overlay/overlayfunctions.h index 5b49bed48..89a5b4fa6 100644 --- a/avs_core/filters/overlay/overlayfunctions.h +++ b/avs_core/filters/overlay/overlayfunctions.h @@ -187,9 +187,9 @@ class OL_DifferenceImage : public OverlayFunction { class OL_ExclusionImage : public OverlayFunction { void DoBlendImage(Image444* base, Image444* overlay); void DoBlendImageMask(Image444* base, Image444* overlay, Image444* mask); - template - void BlendImage(Image444* base, Image444* overlay); - template + // template + // void BlendImage(Image444* base, Image444* overlay); + template void BlendImageMask(Image444* base, Image444* overlay, Image444* mask); }; From 101e3f19606574d572acf51791bb3dc12e466a4b Mon Sep 17 00:00:00 2001 From: Pinterf Date: Fri, 14 Oct 2016 13:43:44 +0200 Subject: [PATCH 112/120] Misc. line ordering --- avs_core/filters/overlay/OF_difference.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/avs_core/filters/overlay/OF_difference.cpp b/avs_core/filters/overlay/OF_difference.cpp index e82e888d4..d2856c64d 100644 --- a/avs_core/filters/overlay/OF_difference.cpp +++ b/avs_core/filters/overlay/OF_difference.cpp @@ -115,9 +115,9 @@ void OL_DifferenceImage::BlendImageMask(Image444* base, Image444* overlay, Image V = ((V*(over32 - multiplier)) + (half_pixel_value*( multiplier)))>>SHIFT; Y = 0; } + baseY[x] = (pixel_t)Y; baseU[x] = (pixel_t)clamp(U, 0, max_pixel_value); baseV[x] = (pixel_t)clamp(V, 0, max_pixel_value); - baseY[x] = (pixel_t)Y; } baseY += basepitch; baseU += basepitch; @@ -163,9 +163,9 @@ void OL_DifferenceImage::BlendImageMask(Image444* base, Image444* overlay, Image V = ((V*(over32 - multiplier)) + (half_pixel_value*( multiplier)))>>SHIFT; Y = 0; } + baseY[x] = (pixel_t)Y; baseU[x] = (pixel_t)clamp(U, 0, max_pixel_value); baseV[x] = (pixel_t)clamp(V, 0, max_pixel_value); - baseY[x] = (pixel_t)Y; } baseY += basepitch; baseU += basepitch; From 87426c7078a8b65b083c5712a5be3934093fc72f Mon Sep 17 00:00:00 2001 From: Pinterf Date: Fri, 14 Oct 2016 14:53:03 +0200 Subject: [PATCH 113/120] Overlay: Softlight, Hardlight 10-16 bit. No more left. --- avs_core/filters/overlay/OF_softhardlight.cpp | 255 +++++++++++------- avs_core/filters/overlay/overlay.cpp | 3 +- avs_core/filters/overlay/overlayfunctions.h | 7 +- 3 files changed, 164 insertions(+), 101 deletions(-) diff --git a/avs_core/filters/overlay/OF_softhardlight.cpp b/avs_core/filters/overlay/OF_softhardlight.cpp index 77eb774c2..0fa4d98c0 100644 --- a/avs_core/filters/overlay/OF_softhardlight.cpp +++ b/avs_core/filters/overlay/OF_softhardlight.cpp @@ -37,56 +37,94 @@ #include "overlayfunctions.h" #include +#include void OL_SoftLightImage::DoBlendImageMask(Image444* base, Image444* overlay, Image444* mask) { - if (bits_per_pixel == 8) - BlendImageMask(base, overlay, mask); - //else if(bits_per_pixel == 32) - // BlendImageMask(base, overlay, mask); - else if(bits_per_pixel == 16) - BlendImageMask(base, overlay, mask); + if(of_mode == OF_SoftLight) { + if (bits_per_pixel == 8) + BlendImageMask(base, overlay, mask); + else if(bits_per_pixel <= 16) + BlendImageMask(base, overlay, mask); + //else if(bits_per_pixel == 32) + // BlendImageMask(base, overlay, mask); + } else { + // OF_HardLight + if (bits_per_pixel == 8) + BlendImageMask(base, overlay, mask); + else if(bits_per_pixel <= 16) + BlendImageMask(base, overlay, mask); + //else if(bits_per_pixel == 32) + // BlendImageMask(base, overlay, mask); + } } void OL_SoftLightImage::DoBlendImage(Image444* base, Image444* overlay) { - if (bits_per_pixel == 8) - BlendImage(base, overlay); - //else if(bits_per_pixel == 32) - // BlendImage(base, overlay); - else if(bits_per_pixel == 16) - BlendImage(base, overlay); + if(of_mode == OF_SoftLight) { + if (bits_per_pixel == 8) + BlendImageMask(base, overlay, nullptr); + else if(bits_per_pixel <= 16) + BlendImageMask(base, overlay, nullptr); + //else if(bits_per_pixel == 32) + // BlendImage(base, overlay); + } + else { + // OF_HardLight + if (bits_per_pixel == 8) + BlendImageMask(base, overlay, nullptr); + else if(bits_per_pixel <= 16) + BlendImageMask(base, overlay, nullptr); + //else if(bits_per_pixel == 32) + // BlendImageMask(base, overlay, mask); + } } +#if 0 void OL_HardLightImage::DoBlendImageMask(Image444* base, Image444* overlay, Image444* mask) { if (bits_per_pixel == 8) - BlendImageMask(base, overlay, mask); + BlendImageMask(base, overlay, mask); + else if(bits_per_pixel <= 16) + BlendImageMask(base, overlay, mask); //else if(bits_per_pixel == 32) // BlendImageMask(base, overlay, mask); - else if(bits_per_pixel == 16) - BlendImageMask(base, overlay, mask); } void OL_HardLightImage::DoBlendImage(Image444* base, Image444* overlay) { if (bits_per_pixel == 8) - BlendImage(base, overlay); + BlendImageMask(base, overlay, nullptr); + else if(bits_per_pixel <= 16) + BlendImageMask(base, overlay, nullptr); //else if(bits_per_pixel == 32) // BlendImage(base, overlay); - else if(bits_per_pixel == 16) - BlendImage(base, overlay); } +#endif -template +template void OL_SoftLightImage::BlendImageMask(Image444* base, Image444* overlay, Image444* mask) { - BYTE* baseY = base->GetPtr(PLANAR_Y); - BYTE* baseU = base->GetPtr(PLANAR_U); - BYTE* baseV = base->GetPtr(PLANAR_V); - - BYTE* ovY = overlay->GetPtr(PLANAR_Y); - BYTE* ovU = overlay->GetPtr(PLANAR_U); - BYTE* ovV = overlay->GetPtr(PLANAR_V); - - BYTE* maskY = mask->GetPtr(PLANAR_Y); - BYTE* maskU = mask->GetPtr(PLANAR_U); - BYTE* maskV = mask->GetPtr(PLANAR_V); + pixel_t* baseY = reinterpret_cast(base->GetPtr(PLANAR_Y)); + pixel_t* baseU = reinterpret_cast(base->GetPtr(PLANAR_U)); + pixel_t* baseV = reinterpret_cast(base->GetPtr(PLANAR_V)); + + pixel_t* ovY = reinterpret_cast(overlay->GetPtr(PLANAR_Y)); + pixel_t* ovU = reinterpret_cast(overlay->GetPtr(PLANAR_U)); + pixel_t* ovV = reinterpret_cast(overlay->GetPtr(PLANAR_V)); + + pixel_t* maskY = maskMode ? reinterpret_cast(mask->GetPtr(PLANAR_Y)) : nullptr; + pixel_t* maskU = maskMode ? reinterpret_cast(mask->GetPtr(PLANAR_U)) : nullptr; + pixel_t* maskV = maskMode ? reinterpret_cast(mask->GetPtr(PLANAR_V)) : nullptr; + + const int half_pixel_value = (sizeof(pixel_t) == 1) ? 128 : (1 << (bits_per_pixel - 1)); + const int max_pixel_value = (sizeof(pixel_t) == 1) ? 255 : (1 << bits_per_pixel) - 1; + const int pixel_range = max_pixel_value + 1; + const int SHIFT = (sizeof(pixel_t) == 1) ? 5 : 5 + (bits_per_pixel - 8); + const int MASK_CORR_SHIFT = (sizeof(pixel_t) == 1) ? 8 : bits_per_pixel; + const int OPACITY_SHIFT = 8; // opacity always max 0..256 + const int over32 = (1 << SHIFT); // 32 + const int basepitch = (base->pitch) / sizeof(pixel_t); + const int overlaypitch = (overlay->pitch) / sizeof(pixel_t); + const int maskpitch = maskMode ? (mask->pitch) / sizeof(pixel_t) : 0; + + // avoid "uint16*uint16 can't get into int32" overflows + typedef std::conditional < sizeof(pixel_t) == 1, int, typename std::conditional < sizeof(pixel_t) == 2, __int64, float>::type >::type result_t; int w = base->w(); int h = base->h(); @@ -94,85 +132,106 @@ void OL_SoftLightImage::BlendImageMask(Image444* base, Image444* overlay, Image4 if (opacity == 256) { for (int y = 0; y < h; y++) { for (int x = 0; x < w; x++) { - int Y = (int)baseY[x] + (int)ovY[x] - 128; - int U = baseU[x] + ovU[x] - 128; - int V = baseV[x] + ovV[x] - 128; - int mY = maskY[x]; - int mU = maskU[x]; - int mV = maskV[x]; - Y = ((Y*mY) + ((256-mY)*baseY[x]))>>8; - U = ((U*mU) + ((256-mU)*baseU[x]))>>8; - V = ((V*mV) + ((256-mV)*baseV[x]))>>8; - if (Y>255) { // Apply overbrightness to UV - int multiplier = max(0,288-Y); // 0 to 32 - U = ((U*multiplier) + (128*(32-multiplier)))>>5; - V = ((V*multiplier) + (128*(32-multiplier)))>>5; - Y = 255; + int Y; + if(hardLight) + Y = (int)baseY[x] + ((int)ovY[x])*2 - half_pixel_value*2; + else + Y = (int)baseY[x] + (int)ovY[x] - half_pixel_value; + int U = baseU[x] + ovU[x] - half_pixel_value; + int V = baseV[x] + ovV[x] - half_pixel_value; + if(maskMode) { + result_t mY = maskY[x]; + result_t mU = maskU[x]; + result_t mV = maskV[x]; + Y = (int)(((Y*mY) + ((pixel_range - mY)*baseY[x])) >> MASK_CORR_SHIFT); + U = (int)(((U*mU) + ((pixel_range - mU)*baseU[x])) >> MASK_CORR_SHIFT); + V = (int)(((V*mV) + ((pixel_range - mV)*baseV[x])) >> MASK_CORR_SHIFT); + } + if (Y>max_pixel_value) { // Apply overbrightness to UV + int multiplier = max(0,pixel_range + over32 -Y); // 0 to 32 + U = ((U*( multiplier)) + (half_pixel_value*(over32-multiplier)))>>SHIFT; + V = ((V*( multiplier)) + (half_pixel_value*(over32-multiplier)))>>SHIFT; + Y = max_pixel_value; } else if (Y<0) { // Apply superdark to UV - int multiplier = min(-Y,32); // 0 to 32 - U = ((U*(32-multiplier)) + (128*(multiplier)))>>5; - V = ((V*(32-multiplier)) + (128*(multiplier)))>>5; + int multiplier = min(-Y,over32); // 0 to 32 + U = ((U*(over32 - multiplier)) + (half_pixel_value*( multiplier)))>>SHIFT; + V = ((V*(over32 - multiplier)) + (half_pixel_value*( multiplier)))>>SHIFT; Y = 0; - } - - baseY[x] = (BYTE)Y; - baseU[x] = (BYTE)clamp(U, 0, 255); - baseV[x] = (BYTE)clamp(V, 0, 255); + } + baseY[x] = (pixel_t)Y; + baseU[x] = (pixel_t)clamp(U, 0, max_pixel_value); + baseV[x] = (pixel_t)clamp(V, 0, max_pixel_value); + } + baseY += basepitch; + baseU += basepitch; + baseV += basepitch; + + ovY += overlaypitch; + ovU += overlaypitch; + ovV += overlaypitch; + + if(maskMode) { + maskY += maskpitch; + maskU += maskpitch; + maskV += maskpitch; } - baseY += base->pitch; - baseU += base->pitch; - baseV += base->pitch; - - ovY += overlay->pitch; - ovU += overlay->pitch; - ovV += overlay->pitch; - - maskY += mask->pitch; - maskU += mask->pitch; - maskV += mask->pitch; } // for y } else { for (int y = 0; y < h; y++) { for (int x = 0; x < w; x++) { - int Y = (int)baseY[x] + (int)ovY[x] - 128; - int U = baseU[x] + ovU[x] - 128; - int V = baseV[x] + ovV[x] - 128; - int mY = (maskY[x]*opacity)>>8; - int mU = (maskU[x]*opacity)>>8; - int mV = (maskV[x]*opacity)>>8; - Y = ((Y*mY) + ((256-mY)*baseY[x]))>>8; - U = ((U*mU) + ((256-mU)*baseU[x]))>>8; - V = ((V*mV) + ((256-mV)*baseV[x]))>>8; - if (Y>255) { // Apply overbrightness to UV - int multiplier = max(0,288-Y); // 0 to 32 - U = ((U*multiplier) + (128*(32-multiplier)))>>5; - V = ((V*multiplier) + (128*(32-multiplier)))>>5; - Y = 255; + int Y; + if (hardLight) + Y = (int)baseY[x] + ((int)ovY[x])*2 - half_pixel_value*2; + else + Y = (int)baseY[x] + (int)ovY[x] - half_pixel_value; + int U = baseU[x] + ovU[x] - half_pixel_value; + int V = baseV[x] + ovV[x] - half_pixel_value; + if(maskMode) { + result_t mY = (maskY[x] * opacity) >> OPACITY_SHIFT; + result_t mU = (maskU[x] * opacity) >> OPACITY_SHIFT; + result_t mV = (maskV[x] * opacity) >> OPACITY_SHIFT; + Y = (int)(((Y*mY) + ((pixel_range - mY)*baseY[x])) >> MASK_CORR_SHIFT); + U = (int)(((U*mU) + ((pixel_range - mU)*baseU[x])) >> MASK_CORR_SHIFT); + V = (int)(((V*mV) + ((pixel_range - mV)*baseV[x])) >> MASK_CORR_SHIFT); + } + else { + Y = ((Y*opacity) + (inv_opacity*baseY[x])) >> OPACITY_SHIFT; + U = ((U*opacity) + (inv_opacity*baseU[x])) >> OPACITY_SHIFT; + V = ((V*opacity) + (inv_opacity*baseV[x])) >> OPACITY_SHIFT; + } + if (Y>max_pixel_value) { // Apply overbrightness to UV + int multiplier = max(0,pixel_range + over32 -Y); // 0 to 32 + U = ((U*( multiplier)) + (half_pixel_value*(over32-multiplier)))>>SHIFT; + V = ((V*( multiplier)) + (half_pixel_value*(over32-multiplier)))>>SHIFT; + Y = max_pixel_value; } else if (Y<0) { // Apply superdark to UV - int multiplier = min(-Y,32); // 0 to 32 - U = ((U*(32-multiplier)) + (128*(multiplier)))>>5; - V = ((V*(32-multiplier)) + (128*(multiplier)))>>5; + int multiplier = min(-Y,over32); // 0 to 32 + U = ((U*(over32 - multiplier)) + (half_pixel_value*( multiplier)))>>SHIFT; + V = ((V*(over32 - multiplier)) + (half_pixel_value*( multiplier)))>>SHIFT; Y = 0; } - baseY[x] = (BYTE)Y; - baseU[x] = (BYTE)clamp(U, 0, 255); - baseV[x] = (BYTE)clamp(V, 0, 255); + baseY[x] = (pixel_t)Y; + baseU[x] = (pixel_t)clamp(U, 0, max_pixel_value); + baseV[x] = (pixel_t)clamp(V, 0, max_pixel_value); + } + baseY += basepitch; + baseU += basepitch; + baseV += basepitch; + + ovY += overlaypitch; + ovU += overlaypitch; + ovV += overlaypitch; + + if(maskMode) { + maskY += maskpitch; + maskU += maskpitch; + maskV += maskpitch; } - baseY += base->pitch; - baseU += base->pitch; - baseV += base->pitch; - - ovY += overlay->pitch; - ovU += overlay->pitch; - ovV += overlay->pitch; - - maskY += mask->pitch; - maskU += mask->pitch; - maskV += mask->pitch; } // for x } // for y } +#if 0 template void OL_SoftLightImage::BlendImage(Image444* base, Image444* overlay) { BYTE* baseY = base->GetPtr(PLANAR_Y); @@ -249,11 +308,11 @@ void OL_SoftLightImage::BlendImage(Image444* base, Image444* overlay) { } // for y }// if !mmx } - +#endif /************* Hard Light ***************/ - -template +#if 0 +template void OL_HardLightImage::BlendImageMask(Image444* base, Image444* overlay, Image444* mask) { BYTE* baseY = base->GetPtr(PLANAR_Y); BYTE* baseU = base->GetPtr(PLANAR_U); @@ -428,4 +487,4 @@ void OL_HardLightImage::BlendImage(Image444* base, Image444* overlay) { } // for y }// if !mmx } - +#endif diff --git a/avs_core/filters/overlay/overlay.cpp b/avs_core/filters/overlay/overlay.cpp index 5e115020c..d803d91c5 100644 --- a/avs_core/filters/overlay/overlay.cpp +++ b/avs_core/filters/overlay/overlay.cpp @@ -612,7 +612,8 @@ OverlayFunction* Overlay::SelectFunction(const char* name, int &of_mode, IScript if (!lstrcmpi(name, "HardLight")) { of_mode = OF_HardLight; - return new OL_HardLightImage(); + //return new OL_HardLightImage(); + return new OL_SoftLightImage(); // Common with SoftLight } if (!lstrcmpi(name, "Difference")) { diff --git a/avs_core/filters/overlay/overlayfunctions.h b/avs_core/filters/overlay/overlayfunctions.h index 89a5b4fa6..f8eaa55fc 100644 --- a/avs_core/filters/overlay/overlayfunctions.h +++ b/avs_core/filters/overlay/overlayfunctions.h @@ -162,18 +162,21 @@ class OL_SoftLightImage : public OverlayFunction { void DoBlendImageMask(Image444* base, Image444* overlay, Image444* mask); template void BlendImage(Image444* base, Image444* overlay); - template + template void BlendImageMask(Image444* base, Image444* overlay, Image444* mask); }; +#if 0 +// common with OL_HardLightImage class OL_HardLightImage : public OverlayFunction { void DoBlendImage(Image444* base, Image444* overlay); void DoBlendImageMask(Image444* base, Image444* overlay, Image444* mask); template void BlendImage(Image444* base, Image444* overlay); - template + template void BlendImageMask(Image444* base, Image444* overlay, Image444* mask); }; +#endif class OL_DifferenceImage : public OverlayFunction { void DoBlendImage(Image444* base, Image444* overlay); From 066bc4bd2986705a25b7e301f6145dfe9982df08 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Fri, 14 Oct 2016 17:17:44 +0200 Subject: [PATCH 114/120] Histogram: Classic 10-16 bit, and 32 bit float --- avs_core/filters/histogram.cpp | 204 +++++++++++++++++++++++++++------ 1 file changed, 166 insertions(+), 38 deletions(-) diff --git a/avs_core/filters/histogram.cpp b/avs_core/filters/histogram.cpp index d2bdc3c31..1a34b4de2 100644 --- a/avs_core/filters/histogram.cpp +++ b/avs_core/filters/histogram.cpp @@ -42,6 +42,7 @@ #include #include #include +#include #define PI 3.141592653589793 @@ -74,10 +75,18 @@ Histogram::Histogram(PClip _child, Mode _mode, AVSValue _option, int _show_bits, if(show_bits < 8 || show_bits>12) env->ThrowError("Histogram: bits parameter can only be 8, 9 .. 12"); + // until all histogram is ported + bool non8bit = show_bits != 8 || bits_per_pixel != 8; + + if (non8bit && mode != ModeClassic && mode != ModeLevels) + { + env->ThrowError("Histogram: histogram type is available only for 8 bit formats and parameters"); + } + if (mode == ModeClassic) { if (!vi.IsYUV() && !vi.IsYUVA()) env->ThrowError("Histogram: YUV(A) data only"); - vi.width += 256; + vi.width += (1 << show_bits); } if (mode == ModeLevels) { @@ -97,7 +106,6 @@ Histogram::Histogram(PClip _child, Mode _mode, AVSValue _option, int _show_bits, vi.height = max(256, vi.height); } - if (mode == ModeColor) { if (!vi.IsPlanar()) { env->ThrowError("Histogram: Color mode only available in PLANAR."); @@ -107,7 +115,7 @@ Histogram::Histogram(PClip _child, Mode _mode, AVSValue _option, int _show_bits, } // put diagram on the right side vi.width += (1 << show_bits); // 256 for 8 bit - vi.height = max(256,vi.height); + vi.height = max(1 << show_bits,vi.height); } if (mode == ModeColor2) { @@ -1244,24 +1252,56 @@ PVideoFrame Histogram::DrawModeLevels(int n, IScriptEnvironment* env) { PVideoFrame Histogram::DrawModeClassic(int n, IScriptEnvironment* env) { - static BYTE exptab[256]; + static uint16_t exptab[1<<12]; // max bits=12 static bool init = false; static int E167; + + int show_size = 1 << show_bits; + + int lookup_size = 1 << show_bits; // 256, 1024, 4096, 16384, 65536 + + int hist_max_pixel_value = (1 << show_bits) - 1; + int hist_tv_range_low = 16 << (show_bits - 8); // 16 + int hist_tv_range_hi_luma = ((235+1) << (show_bits - 8)) - 1; // 16-235 + int hist_range_luma = hist_tv_range_hi_luma - hist_tv_range_low; // 219 + int hist_mid_range_luma = (hist_range_luma + 1) / 2; // 124 + int hist_tv_range_hi_chroma = ((240+1) << (show_bits - 8)) - 1; // 16-240,64–963, 256–3855,... 4096-61695 + int hist_range_chroma = hist_tv_range_hi_chroma - hist_tv_range_low; // 224 + + int internal_bits_per_pixel = (pixelsize == 4) ? 16 : bits_per_pixel; // hack for float + + int max_pixel_value = (1 << internal_bits_per_pixel) - 1; + int tv_range_low = 16 << (internal_bits_per_pixel - 8); // 16 + int tv_range_hi_luma = ((235+1) << (internal_bits_per_pixel - 8)) - 1; // 16-235 + int range_luma = tv_range_hi_luma - tv_range_low; // 219 + int mid_range_luma = (internal_bits_per_pixel + 1) / 2; // 124 + int tv_range_hi_chroma = ((240+1) << (internal_bits_per_pixel - 8)) - 1; // 16-240,64–963, 256–3855,... 4096-61695 + int range_chroma = tv_range_hi_chroma - tv_range_low; // 224 + int middle_chroma = 1 << (internal_bits_per_pixel - 1); // 128 + if (!init) { init = true; - const double K = log(0.5/219)/255; // approx -1/42 - - exptab[0] = 16; - for (int i = 1; i<255; i++) { - exptab[i] = BYTE(16.5 + 219 * (1-exp(i*K))); - if (exptab[i] <= 235-68) E167 = i; + const double K = log(0.5/hist_range_luma)/hist_max_pixel_value; // approx -1/42 + const int limit68 = 68 << (internal_bits_per_pixel - 8); + // exptab: pixel values for final drawing + exptab[0] = tv_range_low; + for (int i = 1; i>= (show_bits - internal_bits_per_pixel); // scale intensity down + else + exptab[i] <<= (internal_bits_per_pixel - show_bits); // scale intensity up + */ } - exptab[255] = 235; + exptab[hist_max_pixel_value] = tv_range_hi_luma; } - const int w = vi.width-256; + const int w = vi.width-show_size; // -256 PVideoFrame src = child->GetFrame(n, env); PVideoFrame dst = env->NewVideoFrame(vi); @@ -1273,16 +1313,69 @@ PVideoFrame Histogram::DrawModeClassic(int n, IScriptEnvironment* env) // luma for (int y = 0; yGetHeight(PLANAR_Y); ++y) { - int hist[256] = { 0 }; - for (int x = 0; x235) { - q[x] = exptab[min(E167, hist[x])] + 68; + else if (pixelsize == 2) { + const uint16_t *srcp16 = reinterpret_cast(p); + int shift = bits_per_pixel - show_bits; + int max_pixel_value = show_size - 1; + if (shift < 0) { + // 10 bit clip into 11 bit histogram + int invshift = -shift; + for (int x = 0; x < w; x++) { + hist[srcp16[x] << invshift]++; + } } else { - q[x] = exptab[min(255, hist[x])]; + // e.g.10 bit clip into 8-9-10 bit histogram + for (int x = 0; x < w; x++) { + hist[min(srcp16[x] >> shift, max_pixel_value)]++; + } + } + } + else // pixelsize == 4 + { + // float + const float *srcp32 = reinterpret_cast(p); + const float multiplier = (float)(show_size - 1); + for (int x = 0; x < w; x++) { + hist[(int)(clamp(srcp32[x], 0.0f, 1.0f)*multiplier)]++; + } + } + // accumulate end + BYTE* const q = p + w * pixelsize; // write to frame + if(pixelsize==1) { + for (int x = 0; xhist_tv_range_hi_luma) { + q[x] = (BYTE)exptab[min(E167, hist[x])] + 68; + } else { + q[x] = (BYTE)exptab[min(255, hist[x])]; + } + } + } else if (pixelsize == 2) { + uint16_t *dstp16 = reinterpret_cast(q); + for (int x = 0; xhist_tv_range_hi_luma) { + dstp16[x] = exptab[min(E167, hist[x])] + (68 << (bits_per_pixel - 8)); + } else { + dstp16[x] = exptab[min(hist_max_pixel_value, hist[x])]; + } + } + } else { // pixelsize == 4 + float *dstp32 = reinterpret_cast(q); + for (int x = 0; xhist_tv_range_hi_luma) { + dstp32[x] = (exptab[min(E167, hist[x])] + (68 << (internal_bits_per_pixel - 8))) / 65536.0f; + } else { + dstp32[x] = exptab[min(hist_max_pixel_value, hist[x])] / 65536.0f; + } } } p += dst->GetPitch(); @@ -1293,22 +1386,57 @@ PVideoFrame Histogram::DrawModeClassic(int n, IScriptEnvironment* env) const int subs = vi.GetPlaneWidthSubsampling(PLANAR_U); const int fact = 1<GetWritePtr(PLANAR_U) + (w >> subs); - BYTE* p3 = dst->GetWritePtr(PLANAR_V) + (w >> subs); + BYTE* p2 = dst->GetWritePtr(PLANAR_U) + ((w*pixelsize) >> subs); + BYTE* p3 = dst->GetWritePtr(PLANAR_V) + ((w*pixelsize) >> subs); + + const uint16_t chroma160 = 160 << (internal_bits_per_pixel - 8); + const float tv_range_low_f = 16 / 256.0f; + const float chroma160_f = 160 / 256.0f; + const float middle_chroma_f = 0.5f; for (int y2 = 0; y2GetHeight(PLANAR_U); ++y2) { - for (int x = 0; x<256; x += fact) { - if (x<16 || x>235) { - p2[x >> subs] = 16; - p3[x >> subs] = 160; - } else if (x==124) { - p2[x >> subs] = 160; - p3[x >> subs] = 16; - } else { - p2[x >> subs] = 128; - p3[x >> subs] = 128; + if(pixelsize==1) { + for (int x = 0; xhist_tv_range_hi_luma) { + p2[x >> subs] = 16; + p3[x >> subs] = 160; + } else if (x==hist_mid_range_luma) { + p2[x >> subs] = 160; + p3[x >> subs] = 16; + } else { + p2[x >> subs] = 128; + p3[x >> subs] = 128; + } } } + else if (pixelsize == 2) { + for (int x = 0; xhist_tv_range_hi_luma) { + reinterpret_cast(p2)[x >> subs] = tv_range_low; + reinterpret_cast(p3)[x >> subs] = chroma160; + } else if (x==hist_mid_range_luma) { + reinterpret_cast(p2)[x >> subs] = chroma160; + reinterpret_cast(p3)[x >> subs] = tv_range_low; + } else { + reinterpret_cast(p2)[x >> subs] = middle_chroma; + reinterpret_cast(p3)[x >> subs] = middle_chroma; + } + } + } else { // pixelsize==4 + for (int x = 0; xhist_tv_range_hi_luma) { + reinterpret_cast(p2)[x >> subs] = tv_range_low_f; + reinterpret_cast(p3)[x >> subs] = chroma160_f; + } else if (x==hist_mid_range_luma) { + reinterpret_cast(p2)[x >> subs] = chroma160_f; + reinterpret_cast(p3)[x >> subs] = tv_range_low_f; + } else { + reinterpret_cast(p2)[x >> subs] = middle_chroma_f; + reinterpret_cast(p3)[x >> subs] = middle_chroma_f; + } + } + + } p2 += dst->GetPitch(PLANAR_U); p3 += dst->GetPitch(PLANAR_V); } @@ -1322,19 +1450,19 @@ PVideoFrame Histogram::DrawModeClassic(int n, IScriptEnvironment* env) BYTE* const q = p + w*2; for (int x = 0; x<256; x += 2) { if (x<16 || x>235) { - q[x*2+0] = exptab[min(E167, hist[x])] + 68; + q[x*2+0] = (BYTE)exptab[min(E167, hist[x])] + 68; q[x*2+1] = 16; - q[x*2+2] = exptab[min(E167, hist[x+1])] + 68; + q[x*2+2] = (BYTE)exptab[min(E167, hist[x+1])] + 68; q[x*2+3] = 160; } else if (x==124) { - q[x*2+0] = exptab[min(E167, hist[x])] + 68; + q[x*2+0] = (BYTE)exptab[min(E167, hist[x])] + 68; q[x*2+1] = 160; - q[x*2+2] = exptab[min(255, hist[x+1])]; + q[x*2+2] = (BYTE)exptab[min(255, hist[x+1])]; q[x*2+3] = 16; } else { - q[x*2+0] = exptab[min(255, hist[x])]; + q[x*2+0] = (BYTE)exptab[min(255, hist[x])]; q[x*2+1] = 128; - q[x*2+2] = exptab[min(255, hist[x+1])]; + q[x*2+2] = (BYTE)exptab[min(255, hist[x+1])]; q[x*2+3] = 128; } } From 42331b4814502cac8e0b2f08e8360fcb679f0953 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Fri, 14 Oct 2016 19:55:53 +0200 Subject: [PATCH 115/120] ShowRed/G/B/A: new ShowY/U/V. New:allow PlanarRGB(A)/YUV(A) src and targets --- avs_core/filters/layer.cpp | 353 ++++++++++++++++++++++++++++++++++--- avs_core/filters/layer.h | 7 +- 2 files changed, 335 insertions(+), 25 deletions(-) diff --git a/avs_core/filters/layer.cpp b/avs_core/filters/layer.cpp index 3a870e702..74218feb0 100644 --- a/avs_core/filters/layer.cpp +++ b/avs_core/filters/layer.cpp @@ -56,10 +56,13 @@ extern const AVSFunction Layer_filters[] = { { "ColorKeyMask", BUILTIN_FUNC_PREFIX, "ci[]i[]i[]i", ColorKeyMask::Create }, // clip, color, tolerance[B, toleranceG, toleranceR] { "ResetMask", BUILTIN_FUNC_PREFIX, "c[mask]f", ResetMask::Create }, { "Invert", BUILTIN_FUNC_PREFIX, "c[channels]s", Invert::Create }, - { "ShowAlpha", BUILTIN_FUNC_PREFIX, "c[pixel_type]s", ShowChannel::Create, (void*)3 }, + { "ShowAlpha", BUILTIN_FUNC_PREFIX, "c[pixel_type]s", ShowChannel::Create, (void*)3 }, // AVS+ also for YUVA, PRGBA { "ShowRed", BUILTIN_FUNC_PREFIX, "c[pixel_type]s", ShowChannel::Create, (void*)2 }, { "ShowGreen", BUILTIN_FUNC_PREFIX, "c[pixel_type]s", ShowChannel::Create, (void*)1 }, { "ShowBlue", BUILTIN_FUNC_PREFIX, "c[pixel_type]s", ShowChannel::Create, (void*)0 }, + { "ShowY", BUILTIN_FUNC_PREFIX, "c[pixel_type]s", ShowChannel::Create, (void*)4 }, // AVS+ + { "ShowU", BUILTIN_FUNC_PREFIX, "c[pixel_type]s", ShowChannel::Create, (void*)5 }, // AVS+ + { "ShowV", BUILTIN_FUNC_PREFIX, "c[pixel_type]s", ShowChannel::Create, (void*)6 }, // AVS+ { "MergeRGB", BUILTIN_FUNC_PREFIX, "ccc[pixel_type]s", MergeRGB::Create, (void*)0 }, { "MergeARGB", BUILTIN_FUNC_PREFIX, "cccc", MergeRGB::Create, (void*)1 }, { "Layer", BUILTIN_FUNC_PREFIX, "cc[op]s[level]i[x]i[y]i[threshold]i[use_chroma]b", Layer::Create }, @@ -1020,55 +1023,92 @@ AVSValue Invert::Create(AVSValue args, void*, IScriptEnvironment* env) ShowChannel::ShowChannel(PClip _child, const char * pixel_type, int _channel, IScriptEnvironment* env) - : GenericVideoFilter(_child), channel(_channel), input_type(_child->GetVideoInfo().pixel_type), pixelsize(_child->GetVideoInfo().ComponentSize()) + : GenericVideoFilter(_child), channel(_channel), input_type(_child->GetVideoInfo().pixel_type), + pixelsize(_child->GetVideoInfo().ComponentSize()), bits_per_pixel(_child->GetVideoInfo().BitsPerComponent()) { - static const char * const ShowText[4] = {"Blue", "Green", "Red", "Alpha"}; + static const char * const ShowText[7] = {"Blue", "Green", "Red", "Alpha", "Y", "U", "V"}; - if ((channel == 3) && !vi.IsRGB32() && !vi.IsRGB64()) - env->ThrowError("ShowAlpha: RGB32, RGB64 data only"); + input_type_is_planar_rgb = vi.IsPlanarRGB(); + input_type_is_planar_rgba = vi.IsPlanarRGBA(); + input_type_is_yuva = vi.IsYUVA(); + input_type_is_yuv = vi.IsYUV() && vi.IsPlanar(); - if ((channel < 3) && !vi.IsRGB()) - env->ThrowError("Show%s: RGB data only", ShowText[channel]); + if(vi.IsYUY2()) + env->ThrowError("Show%s: YUY2 not supported", ShowText[channel]); - if(vi.IsPlanarRGB() || vi.IsPlanarRGBA()) + int orig_channel = channel; + + // A channel + if ((channel == 3) && !vi.IsRGB32() && !vi.IsRGB64() && !vi.IsPlanarRGBA() && !vi.IsYUVA()) + env->ThrowError("ShowAlpha: RGB32, RGB64, Planar RGBA or YUVA data only"); + + // R, G, B channel + if ((channel >=0) && (channel <= 2) && !vi.IsRGB()) + env->ThrowError("Show%s: plane is valid only with RGB or planar RGB(A) source", ShowText[channel]); + + // Y, U, V channel (4,5,6) + if ((channel >=4) && (channel <= 6)) { + if (!vi.IsYUV() && !vi.IsYUVA()) + env->ThrowError("Show%s: plane is valid only with YUV(A) source", ShowText[channel]); + if(channel != 4 && vi.IsY()) + env->ThrowError("Show%s: invalid plane for greyscale source", ShowText[channel]); + channel -= 4; // map to 0,1,2 + } + + /*if(vi.IsPlanarRGB() || vi.IsPlanarRGBA()) env->ThrowError("Show%s: Planar RGB source is not supported", ShowText[channel]); + */ int target_pixelsize; + int target_bits_per_pixel; + + if(input_type_is_yuv || input_type_is_yuva) + { + if(channel == 1 || channel == 2) // U or V: target can be smaller than Y + { + vi.width >>= vi.GetPlaneWidthSubsampling(PLANAR_U); + vi.height >>= vi.GetPlaneHeightSubsampling(PLANAR_U); + } + } - if (!lstrcmpi(pixel_type, "rgb")) { + if (!lstrcmpi(pixel_type, "rgb")) { // target is packed RGB switch(pixelsize) { case 1: vi.pixel_type = VideoInfo::CS_BGR32; break; // bit-depth adaptive case 2: vi.pixel_type = VideoInfo::CS_BGR64; break; - default: env->ThrowError("Show%s: source must be 8 or 16 bit", ShowText[channel]); + default: env->ThrowError("Show%s: source must be 8 or 16 bit", ShowText[orig_channel]); } target_pixelsize = pixelsize; + target_bits_per_pixel = bits_per_pixel; } else { int new_pixel_type = GetPixelTypeFromName(pixel_type); if(new_pixel_type == VideoInfo::CS_UNKNOWN) - env->ThrowError("Show%s: invalid pixel_type!", ShowText[channel]); + env->ThrowError("Show%s: invalid pixel_type!", ShowText[orig_channel]); + // new output format vi.pixel_type = new_pixel_type; - if(vi.IsPlanarRGB() || vi.IsPlanarRGBA() || (vi.BitsPerComponent() !=8 && vi.BitsPerComponent() != 16) || vi.IsYUVA()) - env->ThrowError("Show%s supports the following output pixel types: RGB, Y8, Y16, YUY2, or 8/16 bit YUV formats", ShowText[channel]); + //if(vi.IsPlanarRGB() || vi.IsPlanarRGBA() || vi.IsYUVA()) + // env->ThrowError("Show%s supports the following output pixel types: RGB, Y8..Y16, YUY2, or YUV formats", ShowText[channel]); + if (new_pixel_type == VideoInfo::CS_YUY2) { if (vi.width & 1) { - env->ThrowError("Show%s: width must be mod 2 for yuy2", ShowText[channel]); + env->ThrowError("Show%s: width must be mod 2 for yuy2", ShowText[orig_channel]); } } if (vi.Is420()) { if (vi.width & 1) { - env->ThrowError("Show%s: width must be mod 2 for 4:2:0 source", ShowText[channel]); + env->ThrowError("Show%s: width must be mod 2 for 4:2:0 target", ShowText[orig_channel]); } if (vi.height & 1) { - env->ThrowError("Show%s: height must be mod 2 for 4:2:0 source", ShowText[channel]); + env->ThrowError("Show%s: height must be mod 2 for 4:2:0 target", ShowText[orig_channel]); } } if(vi.Is422()) { if (vi.width & 1) { - env->ThrowError("Show%s: width must be mod 2 for 4:2:2 source", ShowText[channel]); + env->ThrowError("Show%s: width must be mod 2 for 4:2:2 target", ShowText[orig_channel]); } } target_pixelsize = vi.ComponentSize(); + target_bits_per_pixel = vi.BitsPerComponent(); } #if 0 @@ -1149,8 +1189,8 @@ ShowChannel::ShowChannel(PClip _child, const char * pixel_type, int _channel, IS env->ThrowError("Show%s supports the following output pixel types: RGB, Y8, Y16, YUY2, or 8/16 bit YUV formats", ShowText[channel]); } #endif - if(target_pixelsize != pixelsize) - env->ThrowError("Show%s: source must be %d bit for %s", ShowText[channel], target_pixelsize*8, pixel_type); + if(target_bits_per_pixel != bits_per_pixel) + env->ThrowError("Show%s: source bit depth must be %d for %s", ShowText[channel], target_bits_per_pixel, pixel_type); } @@ -1210,7 +1250,7 @@ PVideoFrame ShowChannel::GetFrame(int n, IScriptEnvironment* env) for (int j=0; j(dstp); dstp16[j + 0] = dstp16[j + 1] = dstp16[j + 2] = reinterpret_cast(pf)[j + channel]; - dstp16[j + 3] = pf[j + 3]; + dstp16[j + 3] = reinterpret_cast(pf)[j + 3]; } pf += pitch; dstp += dstpitch; @@ -1276,7 +1316,7 @@ PVideoFrame ShowChannel::GetFrame(int n, IScriptEnvironment* env) int dstpitch = dst->GetPitch(); int dstwidth = dst->GetRowSize() / pixelsize; - // RGB is upside-down + // packed RGB is upside-down pf += (height-1) * pitch; // copy to luma @@ -1312,9 +1352,48 @@ PVideoFrame ShowChannel::GetFrame(int n, IScriptEnvironment* env) } return dst; } + else if (vi.IsPlanarRGB() || vi.IsPlanarRGBA()) + { // RGB32/64 -> Planar RGB 8/16 bit + PVideoFrame dst = env->NewVideoFrame(vi); + BYTE * dstp_g = dst->GetWritePtr(PLANAR_G); + BYTE * dstp_b = dst->GetWritePtr(PLANAR_B); + BYTE * dstp_r = dst->GetWritePtr(PLANAR_R); + int dstpitch = dst->GetPitch(); + int dstwidth = dst->GetRowSize() / pixelsize; + + // packed RGB is upside-down + pf += (height-1) * pitch; + + // copy to luma + if(pixelsize==1) { + for (int i=0; i(dstp_g)[j] = + reinterpret_cast(dstp_b)[j] = + reinterpret_cast(dstp_r)[j] = reinterpret_cast(pf)[j*4 + channel]; + } + pf -= pitch; + dstp_g += dstpitch; + dstp_b += dstpitch; + dstp_r += dstpitch; + } + } + } } - } - else if (input_type == VideoInfo::CS_BGR24 || input_type == VideoInfo::CS_BGR48) { + } // end of RGB32/64 source + else if (input_type == VideoInfo::CS_BGR24 || input_type == VideoInfo::CS_BGR48) + { if (vi.pixel_type == VideoInfo::CS_BGR24 || vi.pixel_type == VideoInfo::CS_BGR48) // RGB24->RGB24, RGB48->RGB48 { if (f->IsWritable()) { @@ -1462,8 +1541,234 @@ PVideoFrame ShowChannel::GetFrame(int n, IScriptEnvironment* env) } return dst; } + else if (vi.IsPlanarRGB() || vi.IsPlanarRGBA()) + { // RGB24/48 -> Planar RGB 8/16 bit + PVideoFrame dst = env->NewVideoFrame(vi); + BYTE * dstp_g = dst->GetWritePtr(PLANAR_G); + BYTE * dstp_b = dst->GetWritePtr(PLANAR_B); + BYTE * dstp_r = dst->GetWritePtr(PLANAR_R); + int dstpitch = dst->GetPitch(); + int dstwidth = dst->GetRowSize() / pixelsize; + // packed RGB is upside-down + pf += (height-1) * pitch; + + // copy to luma + if(pixelsize==1) { + for (int i=0; i(dstp_g)[j] = + reinterpret_cast(dstp_b)[j] = + reinterpret_cast(dstp_r)[j] = reinterpret_cast(pf)[j*3 + channel]; + } + pf -= pitch; + dstp_g += dstpitch; + dstp_b += dstpitch; + dstp_r += dstpitch; + } + } + return dst; + } } - } + } // end of RGB24/48 source + else if (input_type_is_planar_rgb || input_type_is_planar_rgba || input_type_is_yuv || input_type_is_yuva) { + // planar source + const int planesYUV[4] = { PLANAR_Y, PLANAR_U, PLANAR_V, PLANAR_A}; + const int planesRGB[4] = { PLANAR_G, PLANAR_B, PLANAR_R, PLANAR_A}; + const int *planes = (input_type_is_planar_rgb || input_type_is_planar_rgba) ? planesRGB : planesYUV; + const int plane = planes[channel]; + + bool hasAlpha = input_type_is_planar_rgba || input_type_is_yuva; + const BYTE* srcp = f->GetReadPtr(plane); // source plane + const BYTE* srcp_a = hasAlpha ? f->GetReadPtr(PLANAR_A) : nullptr; + + const int width = f->GetRowSize(plane) / pixelsize; + const int height = f->GetHeight(plane); + const int pitch = f->GetPitch(plane); + + if (vi.pixel_type == VideoInfo::CS_BGR32 || vi.pixel_type == VideoInfo::CS_BGR64) // PRGB/YUVA->RGB32/RGB64 + { + { // Planar RGBA/YUVA ->RGB32/64 + PVideoFrame dst = env->NewVideoFrame(vi); + BYTE * dstp = dst->GetWritePtr(); + const int dstpitch = dst->GetPitch(); + // RGB is upside-down + dstp += (height-1) * dstpitch; + + if(pixelsize==1) { + for (int i=0; i(dstp); + dstp16[j*4 + 0] = dstp16[j*4 + 1] = dstp16[j*4 + 2] = reinterpret_cast(srcp)[j]; + dstp16[j*4 + 3] = reinterpret_cast(srcp_a)[j]; + } + srcp += pitch; + srcp_a += pitch; + dstp -= dstpitch; + } + } + return dst; + } + } + else if (vi.pixel_type == VideoInfo::CS_BGR24 || vi.pixel_type == VideoInfo::CS_BGR48) // PRGB(A)/YUVA->RGB24, PRGB(A)16/YUVA16->RGB48 + { + PVideoFrame dst = env->NewVideoFrame(vi); + BYTE * dstp = dst->GetWritePtr(); + const int dstpitch = dst->GetPitch(); + // RGB is upside-down + dstp += (height-1) * dstpitch; + + if(pixelsize==1) { + for (int i=0; i(dstp); + dstp16[j*3 + 0] = dstp16[j*3 + 1] = dstp16[j*3 + 2] = reinterpret_cast(srcp)[j]; + } + srcp += pitch; + dstp -= dstpitch; + } + + } + return dst; + } + else if (vi.pixel_type == VideoInfo::CS_YUY2) // // PRGB(A)/YUVA->YUY2 + { + PVideoFrame dst = env->NewVideoFrame(vi); + BYTE * dstp = dst->GetWritePtr(); + const int dstpitch = dst->GetPitch(); + const int dstrowsize = dst->GetRowSize(); + + for (int i=0; iYV12/16/24/Y8 + 16bit + // 444, 422 support + 16 bits + if (vi.Is444() || vi.Is422() || vi.Is420() || vi.IsY()) // Y8, YV12, Y16, YUV420P16, etc. + { + PVideoFrame dst = env->NewVideoFrame(vi); + BYTE * dstp = dst->GetWritePtr(); + int dstpitch = dst->GetPitch(); + int dstwidth = dst->GetRowSize() / pixelsize; + + // copy to luma + if(pixelsize==1) { + for (int i=0; i(dstp)[j] = reinterpret_cast(srcp)[j]; + } + srcp += pitch; + dstp += dstpitch; + } + } + else { // pixelsize == 4 + for (int i=0; i(dstp)[j] = reinterpret_cast(srcp)[j]; + } + srcp += pitch; + dstp += dstpitch; + } + } + if (!vi.IsY()) + { + dstpitch = dst->GetPitch(PLANAR_U); + int dstheight = dst->GetHeight(PLANAR_U); + BYTE * dstp_u = dst->GetWritePtr(PLANAR_U); + BYTE * dstp_v = dst->GetWritePtr(PLANAR_V); + switch (pixelsize) { + case 1: fill_chroma(dstp_u, dstp_v, dstheight, dstpitch, (BYTE)0x80); break; + case 2: fill_chroma(dstp_u, dstp_v, dstheight, dstpitch, 1 << (vi.BitsPerComponent() - 1)); break; + case 4: fill_chroma(dstp_u, dstp_v, dstheight, dstpitch, 0.5f); break; + } + } + return dst; + } + else if (vi.IsPlanarRGB() || vi.IsPlanarRGBA()) + { // PRGB(A)/YUVA -> Planar RGB + PVideoFrame dst = env->NewVideoFrame(vi); + BYTE * dstp_g = dst->GetWritePtr(PLANAR_G); + BYTE * dstp_b = dst->GetWritePtr(PLANAR_B); + BYTE * dstp_r = dst->GetWritePtr(PLANAR_R); + int dstpitch = dst->GetPitch(); + int dstwidth = dst->GetRowSize() / pixelsize; + + // copy to luma + if(pixelsize==1) { + for (int i=0; i(dstp_g)[j] = + reinterpret_cast(dstp_b)[j] = + reinterpret_cast(dstp_r)[j] = reinterpret_cast(srcp)[j]; + } + srcp += pitch; + dstp_g += dstpitch; + dstp_b += dstpitch; + dstp_r += dstpitch; + } + } + return dst; + } + } + } // planar RGB(A) or YUVA source + env->ThrowError("ShowChannel: unexpected end of function"); return f; } diff --git a/avs_core/filters/layer.h b/avs_core/filters/layer.h index d16211027..b164f28d2 100644 --- a/avs_core/filters/layer.h +++ b/avs_core/filters/layer.h @@ -172,9 +172,14 @@ class ShowChannel : public GenericVideoFilter static AVSValue __cdecl Create(AVSValue args, void* channel, IScriptEnvironment* env); private: - const int channel; + int channel; const int input_type; const int pixelsize; + const int bits_per_pixel; + bool input_type_is_planar_rgb; + bool input_type_is_planar_rgba; + bool input_type_is_yuv; + bool input_type_is_yuva; }; From 9d156c6c18aa52686af8e8715c10da1b86dc37ab Mon Sep 17 00:00:00 2001 From: Pinterf Date: Fri, 14 Oct 2016 20:42:48 +0200 Subject: [PATCH 116/120] New script function: ColorSpaceNameToPixelType. Returns a VideoInfo::pixel_type integer --- avs_core/core/parser/script.cpp | 6 ++++++ avs_core/core/parser/script.h | 1 + 2 files changed, 7 insertions(+) diff --git a/avs_core/core/parser/script.cpp b/avs_core/core/parser/script.cpp index e2e86a486..8a180b457 100644 --- a/avs_core/core/parser/script.cpp +++ b/avs_core/core/parser/script.cpp @@ -248,6 +248,7 @@ extern const AVSFunction Script_functions[] = { { "IsYUVA", BUILTIN_FUNC_PREFIX, "c", IsYUVA }, { "IsPlanarRGB", BUILTIN_FUNC_PREFIX, "c", IsPlanarRGB }, { "IsPlanarRGBA", BUILTIN_FUNC_PREFIX, "c", IsPlanarRGBA }, + { "ColorSpaceNameToPixelType", BUILTIN_FUNC_PREFIX, "s", ColorSpaceNameToPixelType }, { 0 } }; @@ -943,6 +944,11 @@ AVSValue PixelType (AVSValue args, void*, IScriptEnvironment* env) { return GetPixelTypeName(VI(args[0]).pixel_type); } +// AVS+ +AVSValue ColorSpaceNameToPixelType (AVSValue args, void*, IScriptEnvironment* env) { + return GetPixelTypeFromName(args[0].AsString()); +} + AVSValue Width(AVSValue args, void*, IScriptEnvironment* env) { return VI(args[0]).width; } AVSValue Height(AVSValue args, void*, IScriptEnvironment* env) { return VI(args[0]).height; } AVSValue FrameCount(AVSValue args, void*, IScriptEnvironment* env) { return VI(args[0]).num_frames; } diff --git a/avs_core/core/parser/script.h b/avs_core/core/parser/script.h index 44a13e438..d80117356 100644 --- a/avs_core/core/parser/script.h +++ b/avs_core/core/parser/script.h @@ -252,5 +252,6 @@ AVSValue BitsPerComponent(AVSValue args, void*, IScriptEnvironment* env); AVSValue IsYUVA(AVSValue args, void*, IScriptEnvironment* env); AVSValue IsPlanarRGB(AVSValue args, void*, IScriptEnvironment* env); AVSValue IsPlanarRGBA(AVSValue args, void*, IScriptEnvironment* env); +AVSValue ColorSpaceNameToPixelType(AVSValue args, void*, IScriptEnvironment* env); #endif // __Script_H__ From e10dcb8f640b2a1562dff75af3c6ee125243c82c Mon Sep 17 00:00:00 2001 From: Balazs OROSZI Date: Sun, 23 Oct 2016 20:05:56 +0200 Subject: [PATCH 117/120] AviSource/DirectShowSource: 16-bit RGB input support (BGR[48], BRA[64]) --- avs_core/filters/AviSource/avi_source.cpp | 44 ++++++++++++++++--- .../DirectShowSource/directshow_source.cpp | 22 +++++++++- plugins/DirectShowSource/directshow_source.h | 6 ++- 3 files changed, 64 insertions(+), 8 deletions(-) diff --git a/avs_core/filters/AviSource/avi_source.cpp b/avs_core/filters/AviSource/avi_source.cpp index ce83e1a24..1136db75c 100644 --- a/avs_core/filters/AviSource/avi_source.cpp +++ b/avs_core/filters/AviSource/avi_source.cpp @@ -296,6 +296,10 @@ void AVISource::LocateVideoCodec(const char fourCC[], IScriptEnvironment* env) { vi.pixel_type = VideoInfo::CS_BGR32; } else if (pbiSrc->biCompression == BI_RGB && pbiSrc->biBitCount == 24) { vi.pixel_type = VideoInfo::CS_BGR24; + } else if (pbiSrc->biCompression == '\100ARB') { // BRA@ ie. BRA[64] + vi.pixel_type = VideoInfo::CS_BGR64; + } else if (pbiSrc->biCompression == '\060RGB') { // BGR0 ie. BGR[48] + vi.pixel_type = VideoInfo::CS_BGR48; } else if (pbiSrc->biCompression == 'YERG') { vi.pixel_type = VideoInfo::CS_Y8; } else if (pbiSrc->biCompression == '008Y') { @@ -409,20 +413,22 @@ AVISource::AVISource(const char filename[], bool fAudio, const char pixel_type[] bool fYUY2 = pixel_type[0] == 0 || lstrcmpi(pixel_type, "YUY2" ) == 0; bool fRGB32 = pixel_type[0] == 0 || lstrcmpi(pixel_type, "RGB32") == 0; bool fRGB24 = pixel_type[0] == 0 || lstrcmpi(pixel_type, "RGB24") == 0; + bool fRGB48 = pixel_type[0] == 0 || lstrcmpi(pixel_type, "RGB48") == 0; + bool fRGB64 = pixel_type[0] == 0 || lstrcmpi(pixel_type, "RGB64") == 0; if (lstrcmpi(pixel_type, "AUTO") == 0) { - fY8 = fYV12 = fYUY2 = fRGB32 = fRGB24 = true; + fY8 = fYV12 = fYUY2 = fRGB32 = fRGB24 = fRGB48 = fRGB64 = true; forcedType = false; } else if (lstrcmpi(pixel_type, "FULL") == 0) { - fY8 = fYV12 = fYV16 = fYV24 = fYV411 = fYUY2 = fRGB32 = fRGB24 = true; + fY8 = fYV12 = fYV16 = fYV24 = fYV411 = fYUY2 = fRGB32 = fRGB24 = fRGB48 = fRGB64 = true; forcedType = false; } - if (!(fY8 || fYV12 || fYV16 || fYV24 || fYV411 || fYUY2 || fRGB32 || fRGB24)) - env->ThrowError("AVISource: requested format must be one of YV24, YV16, YV12, YV411, YUY2, Y8, RGB32, RGB24, AUTO or FULL"); + if (!(fY8 || fYV12 || fYV16 || fYV24 || fYV411 || fYUY2 || fRGB32 || fRGB24 || fRGB48 || fRGB64)) + env->ThrowError("AVISource: requested format must be one of YV24, YV16, YV12, YV411, YUY2, Y8, RGB32, RGB24, RGB48, RGB64, AUTO or FULL"); - // try to decompress to YV12, YV411, YV16, YV24, YUY2, Y8, RGB32, and RGB24 in turn + // try to decompress to YV12, YV411, YV16, YV24, YUY2, Y8, RGB32, and RGB24, RGB48, RGB64 in turn memset(&biDst, 0, sizeof(BITMAPINFOHEADER)); biDst.biSize = sizeof(BITMAPINFOHEADER); biDst.biWidth = vi.width; @@ -530,6 +536,34 @@ AVISource::AVISource(const char filename[], bool fAudio, const char pixel_type[] } } + // RGB48 + if (fRGB48 && bOpen) { + vi.pixel_type = VideoInfo::CS_BGR48; + biDst.biSizeImage = vi.BMPSize(); + biDst.biCompression = '\060RGB'; // BGR0 ie. BGR[48] + biDst.biBitCount = 48; + if (ICERR_OK == ICDecompressQuery(hic, pbiSrc, &biDst)) { + _RPT0(0,"AVISource: Opening as BGR0 (BGR[48]).\n"); + bOpen = false; // Skip further attempts + } else if (forcedType) { + env->ThrowError("AVISource: the video decompressor couldn't produce RGB48 output"); + } + } + + // RGB64 + if (fRGB64 && bOpen) { + vi.pixel_type = VideoInfo::CS_BGR64; + biDst.biSizeImage = vi.BMPSize(); + biDst.biCompression = '\100ARB'; // BRA@ ie. BRA[64] + biDst.biBitCount = 64; + if (ICERR_OK == ICDecompressQuery(hic, pbiSrc, &biDst)) { + _RPT0(0,"AVISource: Opening as BRA@ (BRA[64]).\n"); + bOpen = false; // Skip further attempts + } else if (forcedType) { + env->ThrowError("AVISource: the video decompressor couldn't produce RGB64 output"); + } + } + // Y8 if (fY8 && bOpen) { vi.pixel_type = VideoInfo::CS_Y8; diff --git a/plugins/DirectShowSource/directshow_source.cpp b/plugins/DirectShowSource/directshow_source.cpp index 608e34184..292ca7bd4 100644 --- a/plugins/DirectShowSource/directshow_source.cpp +++ b/plugins/DirectShowSource/directshow_source.cpp @@ -141,6 +141,8 @@ const GUID MEDIASUBTYPE_I420 = {FourCC('I420'), 0x0000, 0x0010, {0x80, 0x00, 0x0 // Already defined by platform headers: const GUID MEDIASUBTYPE_NV12 = {FourCC('NV12'), 0x0000, 0x0010, {0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71}}; const GUID MEDIASUBTYPE_YV16 = {FourCC('YV16'), 0x0000, 0x0010, {0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71}}; const GUID MEDIASUBTYPE_YV24 = {FourCC('YV24'), 0x0000, 0x0010, {0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71}}; +const GUID MEDIASUBTYPE_BRA64 = { FourCC('BRA\100'), 0x0000, 0x0010,{ 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71 } }; // BRA@ ie. BRA[64] +const GUID MEDIASUBTYPE_BGR48 = { FourCC('BGR\060'), 0x0000, 0x0010,{ 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71 } }; // BGR0 ie. BGR[48] // Format a GUID for printing @@ -289,6 +291,8 @@ GetSample::GetSample(bool _load_audio, bool _load_video, unsigned _media, LOG* _ if (media & mediaARGB) InitMediaType(my_media_types[i++], MEDIATYPE_Video, MEDIASUBTYPE_ARGB32); if (media & mediaRGB32) InitMediaType(my_media_types[i++], MEDIATYPE_Video, MEDIASUBTYPE_RGB32); if (media & mediaRGB24) InitMediaType(my_media_types[i++], MEDIATYPE_Video, MEDIASUBTYPE_RGB24); + if (media & mediaRGB64) InitMediaType(my_media_types[i++], MEDIATYPE_Video, MEDIASUBTYPE_BRA64); + if (media & mediaRGB48) InitMediaType(my_media_types[i++], MEDIATYPE_Video, MEDIASUBTYPE_BGR48); no_my_media_types = i; if (media == mediaNONE) media = mediaAUTO; } @@ -1323,6 +1327,20 @@ SubFormat: KSDATAFORMAT_SUBTYPE_PCM {00000001-0000-0010-8000-00AA00389B71} } pixel_type = VideoInfo::CS_BGR32; + } else if (pmt->subtype == MEDIASUBTYPE_BGR48) { + if (!(media & mediaRGB48)) { + dssRPT0(dssNEG, "*** Video: Subtype denied - BGR[48]\n"); + return S_FALSE; + } + pixel_type = VideoInfo::CS_BGR48; + + } else if (pmt->subtype == MEDIASUBTYPE_BRA64) { + if (!(media & mediaRGB64)) { + dssRPT0(dssNEG, "*** Video: Subtype denied - BRA[64]\n"); + return S_FALSE; + } + pixel_type = VideoInfo::CS_BGR64; + } else { dssRPT2(dssNEG, "*** Video: Subtype rejected - '%s' %s\n", PrintFourCC(pmt->subtype.Data1), PrintGUID(&pmt->subtype)); dssRPT1(dssNEG, "*** Video: Format type - %s\n", PrintGUID(&pmt->formattype)); @@ -2584,6 +2602,8 @@ AVSValue __cdecl Create_DirectShowSource(AVSValue args, void*, IScriptEnvironmen else if (!lstrcmpi(pixel_type, "AYUV")) { _media = GetSample::mediaAYUV; } else if (!lstrcmpi(pixel_type, "RGB24")) { _media = GetSample::mediaRGB24; } else if (!lstrcmpi(pixel_type, "RGB32")) { _media = GetSample::mediaRGB32 | GetSample::mediaARGB; } + else if (!lstrcmpi(pixel_type, "RGB48")) { _media = GetSample::mediaRGB48; } + else if (!lstrcmpi(pixel_type, "RGB64")) { _media = GetSample::mediaRGB64; } else if (!lstrcmpi(pixel_type, "ARGB")) { _media = GetSample::mediaARGB; } else if (!lstrcmpi(pixel_type, "RGB")) { _media = GetSample::mediaRGB; } else if (!lstrcmpi(pixel_type, "YUV")) { _media = GetSample::mediaYUV; } @@ -2594,7 +2614,7 @@ AVSValue __cdecl Create_DirectShowSource(AVSValue args, void*, IScriptEnvironmen else if (!lstrcmpi(pixel_type, "YUVEX")) { _media = GetSample::mediaYUVex; } else if (!lstrcmpi(pixel_type, "FULL")) { _media = GetSample::mediaFULL; } else { - env->ThrowError("DirectShowSource: pixel_type must be \"RGB24\", \"RGB32\", \"ARGB\", " + env->ThrowError("DirectShowSource: pixel_type must be \"RGB24\", \"RGB32\", \"ARGB\", \"RGB48\", \"RGB64\", " "\"YUY2\", \"YV12\", \"I420\", \"YV16\", \"YV24\", \"AYUV\", \"Y41P\", " "\"Y411\", \"NV12\", \"RGB\", \"YUV\" , \"YUVex\", \"AUTO\" or \"FULL\""); } diff --git a/plugins/DirectShowSource/directshow_source.h b/plugins/DirectShowSource/directshow_source.h index 407809856..11bd331b6 100644 --- a/plugins/DirectShowSource/directshow_source.h +++ b/plugins/DirectShowSource/directshow_source.h @@ -266,7 +266,7 @@ class GetSample : public IBaseFilter, public IPin, public IMemInputPin { AM_MEDIA_TYPE *am_media_type; unsigned media, no_my_media_types; - AM_MEDIA_TYPE *my_media_types[16]; // 2.6 + AM_MEDIA_TYPE *my_media_types[18]; // 2.6 PVideoFrame pvf; @@ -286,7 +286,9 @@ class GetSample : public IBaseFilter, public IPin, public IMemInputPin { mediaYV24 = 1<<10,// 2.6 mediaI420 = 1<<11,// 2.6 mediaNV12 = 1<<12,// 2.6 - mediaRGB = mediaARGB | mediaRGB32 | mediaRGB24, + mediaRGB64 = 1<<13, + mediaRGB48 = 1<<14, + mediaRGB = mediaARGB | mediaRGB32 | mediaRGB24 | mediaRGB64 | mediaRGB48, mediaYUV = mediaYUV9 | mediaYV12 | mediaYUY2 | mediaAYUV | mediaY411 | mediaY41P, mediaYUVex = mediaYUV | mediaYV16 | mediaYV24 | mediaI420 | mediaNV12, mediaAUTO = mediaRGB | mediaYUV, From 0476aa9fd8af803506570f31433783e57e7c0f4f Mon Sep 17 00:00:00 2001 From: Pinterf Date: Wed, 26 Oct 2016 19:04:20 +0200 Subject: [PATCH 118/120] DirectShowSource compilation help updated --- plugins/DirectShowSource/directshow_source.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/plugins/DirectShowSource/directshow_source.h b/plugins/DirectShowSource/directshow_source.h index 11bd331b6..ac770cb25 100644 --- a/plugins/DirectShowSource/directshow_source.h +++ b/plugins/DirectShowSource/directshow_source.h @@ -61,14 +61,15 @@ - Edit Project Properties|VC++ Directories|Include Paths Add to the beginning c:\Program Files\Microsoft SDKs\Windows\v7.0\Samples\multimedia\directshow\baseclasses\; - c:\Program Files\Microsoft SDKs\Windows\v7.0\Include\; + c:\Program Files\Microsoft SDKs\Windows\v7.0\Include\; or put behind $(VC_IncludePath) if windows.h not found - Edit Project Properties|VC++ Directories|Library Directories For x86 target add c:\Program Files\Microsoft SDKs\Windows\v7.0\Samples\multimedia\directshow\baseclasses\Release_MBCS\; For x64 target add c:\Program Files\Microsoft SDKs\Windows\v7.0\Samples\multimedia\directshow\baseclasses\x64\Release_MBCS\; For XP target add (to find winmm.lib) - c:\Program Files (x86)\Microsoft SDKs\Windows\v7.1A\Lib\; + c:\Program Files (x86)\Microsoft SDKs\Windows\v7.1A\Lib\; (for 32 bit build) + c:\Program Files (x86)\Microsoft SDKs\Windows\v7.1A\Lib\x64\; (for 64 bit build) - Edit Project Properties|Linker|Input|Additional Dependencies Add strmbase.lib to the list - For XP compatibility From b54a6be5940f48b492d7d690f82edc7d9046cb12 Mon Sep 17 00:00:00 2001 From: Pinterf Date: Wed, 26 Oct 2016 19:04:43 +0200 Subject: [PATCH 119/120] Fix Overlay for 8 bit YV12 --- avs_core/filters/overlay/444convert.cpp | 2 -- avs_core/filters/overlay/overlay.cpp | 26 +++++++++++++------------ 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/avs_core/filters/overlay/444convert.cpp b/avs_core/filters/overlay/444convert.cpp index 2e8e7677b..a67b4a5d0 100644 --- a/avs_core/filters/overlay/444convert.cpp +++ b/avs_core/filters/overlay/444convert.cpp @@ -550,8 +550,6 @@ static void convert_yv24_chroma_to_yv12_c(BYTE *dstp8, const BYTE *srcp8, int ds void Convert444ToYV12(PVideoFrame &src, PVideoFrame &dst, int pixelsize, int bits_per_pixel, IScriptEnvironment* env) { -// env->MakeWritable(&dst); already writeable - env->BitBlt(dst->GetWritePtr(PLANAR_Y), dst->GetPitch(PLANAR_Y), src->GetReadPtr(PLANAR_Y), src->GetPitch(), dst->GetRowSize(PLANAR_Y), dst->GetHeight()); diff --git a/avs_core/filters/overlay/overlay.cpp b/avs_core/filters/overlay/overlay.cpp index d803d91c5..2f9e0f662 100644 --- a/avs_core/filters/overlay/overlay.cpp +++ b/avs_core/filters/overlay/overlay.cpp @@ -521,18 +521,6 @@ PVideoFrame __stdcall Overlay::GetFrame(int n, IScriptEnvironment *env) { #endif #endif } - - // here img->frame is 444 - // apply fast conversion - if((pixelsize==1) && outputVi->Is420()) - { - PVideoFrame outputFrame = env->NewVideoFrame(*outputVi); - Convert444ToYV12(frame, outputFrame, pixelsize, bits_per_pixel, env); - } else if(outputVi->IsYUY2()) { - PVideoFrame outputFrame = env->NewVideoFrame(*outputVi); - Convert444ToYUY2(frame, outputFrame, pixelsize, bits_per_pixel, env); - } - // all other cases return 4:4:4 // Cleanup if (mask) { @@ -548,6 +536,20 @@ PVideoFrame __stdcall Overlay::GetFrame(int n, IScriptEnvironment *env) { img->free_all(); delete img; } + + // here img->frame is 444 + // apply fast conversion + if((pixelsize==1) && outputVi->Is420()) + { + PVideoFrame outputFrame = env->NewVideoFrame(*outputVi); + Convert444ToYV12(frame, outputFrame, pixelsize, bits_per_pixel, env); + return outputFrame; + } else if(outputVi->IsYUY2()) { + PVideoFrame outputFrame = env->NewVideoFrame(*outputVi); + Convert444ToYUY2(frame, outputFrame, pixelsize, bits_per_pixel, env); + return outputFrame; + } + // all other cases return 4:4:4 #ifndef USE_ORIG_FRAME return frameOutput; #else From dbcfae24fe9faf878bd834d8272cf2651951aa69 Mon Sep 17 00:00:00 2001 From: Balazs OROSZI Date: Sun, 30 Oct 2016 03:03:20 +0100 Subject: [PATCH 120/120] DirectShowSource: Sensible defaults for CMake baseclasses lib, removed dx include path --- plugins/DirectShowSource/CMakeLists.txt | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/plugins/DirectShowSource/CMakeLists.txt b/plugins/DirectShowSource/CMakeLists.txt index f9c80fff4..899d9a5bf 100644 --- a/plugins/DirectShowSource/CMakeLists.txt +++ b/plugins/DirectShowSource/CMakeLists.txt @@ -5,10 +5,17 @@ CMAKE_MINIMUM_REQUIRED( VERSION 2.8.11 ) set(PluginName "DirectShowSource") set(ProjectName "Plugin${PluginName}") +# Sensible defaults that should just work if WINSDK is installed and baseclasses built +set(DEFAULT_BASECLASSES_PATH "C:/Program Files/Microsoft SDKs/Windows/v7.1/Samples/multimedia/directshow/baseclasses") +if(CMAKE_SIZEOF_VOID_P EQUAL 4) # 32-bit + set(DEFAULT_BASECLASSES_LIB "${DEFAULT_BASECLASSES_PATH}/Release/strmbase.lib") +else() # 64-bit + set(DEFAULT_BASECLASSES_LIB "${DEFAULT_BASECLASSES_PATH}/x64/Release/strmbase.lib") +endif() + # We need these variables set by the user to compile successfully -set(DSHOWSRC_BASECLASSES_PATH "C:/Program Files/Microsoft SDKs/Windows/v7.1/Samples/multimedia/directshow/baseclasses" CACHE STRING "Folder path to the DirectShow example baseclasses.") -set(DSHOWSRC_BASECLASSES_LIB CACHE FILEPATH "File path to the DirectShow example baseclasses precompiled static library ('strmbase.lib').") -set(DSHOWSRC_DX_INCLUDE_PATH "C:/Program Files/Microsoft DirectX SDK (August 2009)/Include" CACHE STRING "Include folder path to the DirectX headers.") +set(DSHOWSRC_BASECLASSES_PATH "${DEFAULT_BASECLASSES_PATH}" CACHE STRING "Folder path to the DirectShow example baseclasses.") +set(DSHOWSRC_BASECLASSES_LIB "${DEFAULT_BASECLASSES_LIB}" CACHE FILEPATH "File path to the DirectShow example baseclasses precompiled static library ('strmbase.lib').") # Create library project(${ProjectName}) @@ -23,7 +30,7 @@ set_target_properties(${ProjectName} PROPERTIES "OUTPUT_NAME" ${PluginName}) target_link_libraries(${ProjectName} "Winmm.lib" "Quartz.lib" "Ole32.lib" "User32.lib" "Oleaut32.lib" "Advapi32.lib" ${DSHOWSRC_BASECLASSES_LIB}) # Include directories -target_include_directories(${ProjectName} PRIVATE ${AvsCore_SOURCE_DIR} ${DSHOWSRC_BASECLASSES_PATH} ${DSHOWSRC_DX_INCLUDE_PATH}) +target_include_directories(${ProjectName} PRIVATE ${AvsCore_SOURCE_DIR} ${DSHOWSRC_BASECLASSES_PATH}) if (MSVC_IDE) # Copy output to a common folder for easy deployment @@ -32,4 +39,4 @@ if (MSVC_IDE) POST_BUILD COMMAND xcopy /Y \"$(TargetPath)\" \"${CMAKE_BINARY_DIR}/Output/plugins\" ) -endif() \ No newline at end of file +endif()