|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
#include "traster.h"
|
|
Toshihiro Shimizu |
890ddd |
#include "trop.h"
|
|
Toshihiro Shimizu |
890ddd |
#include "tpixelgr.h"
|
|
Shinya Kitaoka |
9f5a1b |
#ifdef _WIN32
|
|
Toshihiro Shimizu |
890ddd |
#include <emmintrin.h></emmintrin.h>
|
|
Toshihiro Shimizu |
890ddd |
#include <malloc.h></malloc.h>
|
|
Toshihiro Shimizu |
890ddd |
#endif
|
|
Toshihiro Shimizu |
890ddd |
|
|
Shinya Kitaoka |
120a6e |
namespace {
|
|
Toshihiro Shimizu |
890ddd |
|
|
Shinya Kitaoka |
9f5a1b |
#ifdef _WIN32
|
|
Toshihiro Shimizu |
890ddd |
template <class t=""></class>
|
|
Toshihiro Shimizu |
890ddd |
struct BlurPixel {
|
|
Shinya Kitaoka |
120a6e |
T b;
|
|
Shinya Kitaoka |
120a6e |
T g;
|
|
Shinya Kitaoka |
120a6e |
T r;
|
|
Shinya Kitaoka |
120a6e |
T m;
|
|
Toshihiro Shimizu |
890ddd |
};
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
#else
|
|
Toshihiro Shimizu |
890ddd |
template <class t=""></class>
|
|
Toshihiro Shimizu |
890ddd |
struct BlurPixel {
|
|
Shinya Kitaoka |
120a6e |
T r;
|
|
Shinya Kitaoka |
120a6e |
T g;
|
|
Shinya Kitaoka |
120a6e |
T b;
|
|
Shinya Kitaoka |
120a6e |
T m;
|
|
Toshihiro Shimizu |
890ddd |
};
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
#endif
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
//===================================================================
|
|
Toshihiro Shimizu |
890ddd |
|
|
Shinya Kitaoka |
120a6e |
#define LOAD_COL_CODE \
|
|
Shinya Kitaoka |
120a6e |
\
|
|
Shinya Kitaoka |
120a6e |
buffer += x; \
|
|
Shinya Kitaoka |
120a6e |
pix = col + by1; \
|
|
Shinya Kitaoka |
120a6e |
\
|
|
Shinya Kitaoka |
120a6e |
for (i = by1; i < ly + by1; i++) { \
|
|
Shinya Kitaoka |
120a6e |
*pix++ = *buffer; \
|
|
Shinya Kitaoka |
120a6e |
buffer += lx; \
|
|
Shinya Kitaoka |
120a6e |
} \
|
|
Shinya Kitaoka |
120a6e |
\
|
|
Shinya Kitaoka |
120a6e |
pix += by2; \
|
|
Shinya Kitaoka |
120a6e |
left_val = col[0]; \
|
|
Shinya Kitaoka |
120a6e |
right_val = *(pix - 1); \
|
|
Shinya Kitaoka |
120a6e |
col--; \
|
|
Shinya Kitaoka |
120a6e |
\
|
|
Shinya Kitaoka |
120a6e |
for (i = 0; i < brad; i++) { \
|
|
Shinya Kitaoka |
120a6e |
*col-- = left_val; \
|
|
Shinya Kitaoka |
120a6e |
*pix++ = right_val; \
|
|
Shinya Kitaoka |
120a6e |
}
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
//-------------------------------------------------------------------
|
|
Toshihiro Shimizu |
890ddd |
|
|
Shinya Kitaoka |
120a6e |
#define BLUR_CODE(round_fac, channel_type) \
|
|
Shinya Kitaoka |
120a6e |
pix1 = row1; \
|
|
Shinya Kitaoka |
120a6e |
pix2 = row1 - 1; \
|
|
Shinya Kitaoka |
120a6e |
\
|
|
Shinya Kitaoka |
120a6e |
sigma1.r = pix1->r; \
|
|
Shinya Kitaoka |
120a6e |
sigma1.g = pix1->g; \
|
|
Shinya Kitaoka |
120a6e |
sigma1.b = pix1->b; \
|
|
Shinya Kitaoka |
120a6e |
sigma1.m = pix1->m; \
|
|
Shinya Kitaoka |
120a6e |
pix1++; \
|
|
Shinya Kitaoka |
120a6e |
\
|
|
Shinya Kitaoka |
120a6e |
sigma2.r = sigma2.g = sigma2.b = sigma2.m = 0.0; \
|
|
Shinya Kitaoka |
120a6e |
sigma3.r = sigma3.g = sigma3.b = sigma3.m = 0.0; \
|
|
Shinya Kitaoka |
120a6e |
\
|
|
Shinya Kitaoka |
120a6e |
for (i = 1; i < brad; i++) { \
|
|
Shinya Kitaoka |
120a6e |
sigma1.r += pix1->r; \
|
|
Shinya Kitaoka |
120a6e |
sigma1.g += pix1->g; \
|
|
Shinya Kitaoka |
120a6e |
sigma1.b += pix1->b; \
|
|
Shinya Kitaoka |
120a6e |
sigma1.m += pix1->m; \
|
|
Shinya Kitaoka |
120a6e |
\
|
|
Shinya Kitaoka |
120a6e |
sigma2.r += pix2->r; \
|
|
Shinya Kitaoka |
120a6e |
sigma2.g += pix2->g; \
|
|
Shinya Kitaoka |
120a6e |
sigma2.b += pix2->b; \
|
|
Shinya Kitaoka |
120a6e |
sigma2.m += pix2->m; \
|
|
Shinya Kitaoka |
120a6e |
\
|
|
Shinya Kitaoka |
120a6e |
sigma3.r += i * (pix1->r + pix2->r); \
|
|
Shinya Kitaoka |
120a6e |
sigma3.g += i * (pix1->g + pix2->g); \
|
|
Shinya Kitaoka |
120a6e |
sigma3.b += i * (pix1->b + pix2->b); \
|
|
Shinya Kitaoka |
120a6e |
sigma3.m += i * (pix1->m + pix2->m); \
|
|
Shinya Kitaoka |
120a6e |
\
|
|
Shinya Kitaoka |
120a6e |
pix1++; \
|
|
Shinya Kitaoka |
120a6e |
pix2--; \
|
|
Shinya Kitaoka |
120a6e |
} \
|
|
Shinya Kitaoka |
120a6e |
\
|
|
Shinya Kitaoka |
120a6e |
rsum = (sigma1.r + sigma2.r) * coeff - sigma3.r * coeffq + (round_fac); \
|
|
Shinya Kitaoka |
120a6e |
gsum = (sigma1.g + sigma2.g) * coeff - sigma3.g * coeffq + (round_fac); \
|
|
Shinya Kitaoka |
120a6e |
bsum = (sigma1.b + sigma2.b) * coeff - sigma3.b * coeffq + (round_fac); \
|
|
Shinya Kitaoka |
120a6e |
msum = (sigma1.m + sigma2.m) * coeff - sigma3.m * coeffq + (round_fac); \
|
|
Shinya Kitaoka |
120a6e |
\
|
|
Shinya Kitaoka |
120a6e |
row2->r = (channel_type)(rsum); \
|
|
Shinya Kitaoka |
120a6e |
row2->g = (channel_type)(gsum); \
|
|
Shinya Kitaoka |
120a6e |
row2->b = (channel_type)(bsum); \
|
|
Shinya Kitaoka |
120a6e |
row2->m = (channel_type)(msum); \
|
|
Shinya Kitaoka |
120a6e |
row2++; \
|
|
Shinya Kitaoka |
120a6e |
\
|
|
Shinya Kitaoka |
120a6e |
sigma2.r += row1[-brad].r; \
|
|
Shinya Kitaoka |
120a6e |
sigma2.g += row1[-brad].g; \
|
|
Shinya Kitaoka |
120a6e |
sigma2.b += row1[-brad].b; \
|
|
Shinya Kitaoka |
120a6e |
sigma2.m += row1[-brad].m; \
|
|
Shinya Kitaoka |
120a6e |
\
|
|
Shinya Kitaoka |
120a6e |
pix1 = row1 + brad; \
|
|
Shinya Kitaoka |
120a6e |
pix2 = row1; \
|
|
Shinya Kitaoka |
120a6e |
pix3 = row1 - brad; \
|
|
Shinya Kitaoka |
120a6e |
pix4 = row1 - brad + 1; \
|
|
Shinya Kitaoka |
120a6e |
\
|
|
Shinya Kitaoka |
120a6e |
desigma.r = sigma1.r - sigma2.r; \
|
|
Shinya Kitaoka |
120a6e |
desigma.g = sigma1.g - sigma2.g; \
|
|
Shinya Kitaoka |
120a6e |
desigma.b = sigma1.b - sigma2.b; \
|
|
Shinya Kitaoka |
120a6e |
desigma.m = sigma1.m - sigma2.m; \
|
|
Shinya Kitaoka |
120a6e |
\
|
|
Shinya Kitaoka |
120a6e |
for (i = 1; i < length; i++) { \
|
|
Shinya Kitaoka |
120a6e |
desigma.r += pix1->r - 2 * pix2->r + pix3->r; \
|
|
Shinya Kitaoka |
120a6e |
desigma.g += pix1->g - 2 * pix2->g + pix3->g; \
|
|
Shinya Kitaoka |
120a6e |
desigma.b += pix1->b - 2 * pix2->b + pix3->b; \
|
|
Shinya Kitaoka |
120a6e |
desigma.m += pix1->m - 2 * pix2->m + pix3->m; \
|
|
Shinya Kitaoka |
120a6e |
\
|
|
Shinya Kitaoka |
120a6e |
rsum += (desigma.r + diff * (pix1->r - pix4->r)) * coeffq; \
|
|
Shinya Kitaoka |
120a6e |
gsum += (desigma.g + diff * (pix1->g - pix4->g)) * coeffq; \
|
|
Shinya Kitaoka |
120a6e |
bsum += (desigma.b + diff * (pix1->b - pix4->b)) * coeffq; \
|
|
Shinya Kitaoka |
120a6e |
msum += (desigma.m + diff * (pix1->m - pix4->m)) * coeffq; \
|
|
Shinya Kitaoka |
120a6e |
\
|
|
Shinya Kitaoka |
120a6e |
row2->r = (channel_type)(rsum); \
|
|
Shinya Kitaoka |
120a6e |
row2->g = (channel_type)(gsum); \
|
|
Shinya Kitaoka |
120a6e |
row2->b = (channel_type)(bsum); \
|
|
Shinya Kitaoka |
120a6e |
row2->m = (channel_type)(msum); \
|
|
Shinya Kitaoka |
120a6e |
row2++; \
|
|
Shinya Kitaoka |
120a6e |
pix1++, pix2++, pix3++, pix4++; \
|
|
Shinya Kitaoka |
120a6e |
}
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
//-------------------------------------------------------------------
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
template <typename pixel_dst,="" pixel_src,="" t="" typename=""></typename>
|
|
Shinya Kitaoka |
120a6e |
inline void blur_code(PIXEL_SRC *row1, PIXEL_DST *row2, int length, float coeff,
|
|
Shinya Kitaoka |
120a6e |
float coeffq, int brad, float diff, float round_fac) {
|
|
Shinya Kitaoka |
120a6e |
int i;
|
|
Shinya Kitaoka |
120a6e |
T rsum, gsum, bsum, msum;
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
BlurPixel<t> sigma1, sigma2, sigma3, desigma;</t>
|
|
Shinya Kitaoka |
120a6e |
PIXEL_SRC *pix1, *pix2, *pix3, *pix4;
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
pix1 = row1;
|
|
Shinya Kitaoka |
120a6e |
pix2 = row1 - 1;
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
sigma1.r = pix1->r;
|
|
Shinya Kitaoka |
120a6e |
sigma1.g = pix1->g;
|
|
Shinya Kitaoka |
120a6e |
sigma1.b = pix1->b;
|
|
Shinya Kitaoka |
120a6e |
sigma1.m = pix1->m;
|
|
Shinya Kitaoka |
120a6e |
pix1++;
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
sigma2.r = sigma2.g = sigma2.b = sigma2.m = 0.0;
|
|
Shinya Kitaoka |
120a6e |
sigma3.r = sigma3.g = sigma3.b = sigma3.m = 0.0;
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
for (i = 1; i < brad; i++) {
|
|
Shinya Kitaoka |
120a6e |
sigma1.r += pix1->r;
|
|
Shinya Kitaoka |
120a6e |
sigma1.g += pix1->g;
|
|
Shinya Kitaoka |
120a6e |
sigma1.b += pix1->b;
|
|
Shinya Kitaoka |
120a6e |
sigma1.m += pix1->m;
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
sigma2.r += pix2->r;
|
|
Shinya Kitaoka |
120a6e |
sigma2.g += pix2->g;
|
|
Shinya Kitaoka |
120a6e |
sigma2.b += pix2->b;
|
|
Shinya Kitaoka |
120a6e |
sigma2.m += pix2->m;
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
sigma3.r += i * (pix1->r + pix2->r);
|
|
Shinya Kitaoka |
120a6e |
sigma3.g += i * (pix1->g + pix2->g);
|
|
Shinya Kitaoka |
120a6e |
sigma3.b += i * (pix1->b + pix2->b);
|
|
Shinya Kitaoka |
120a6e |
sigma3.m += i * (pix1->m + pix2->m);
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
pix1++;
|
|
Shinya Kitaoka |
120a6e |
pix2--;
|
|
Shinya Kitaoka |
120a6e |
}
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
rsum = (sigma1.r + sigma2.r) * coeff - sigma3.r * coeffq + (round_fac);
|
|
Shinya Kitaoka |
120a6e |
gsum = (sigma1.g + sigma2.g) * coeff - sigma3.g * coeffq + (round_fac);
|
|
Shinya Kitaoka |
120a6e |
bsum = (sigma1.b + sigma2.b) * coeff - sigma3.b * coeffq + (round_fac);
|
|
Shinya Kitaoka |
120a6e |
msum = (sigma1.m + sigma2.m) * coeff - sigma3.m * coeffq + (round_fac);
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
row2->r = rsum;
|
|
Shinya Kitaoka |
120a6e |
row2->g = gsum;
|
|
Shinya Kitaoka |
120a6e |
row2->b = bsum;
|
|
Shinya Kitaoka |
120a6e |
row2->m = msum;
|
|
Shinya Kitaoka |
120a6e |
row2++;
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
sigma2.r += row1[-brad].r;
|
|
Shinya Kitaoka |
120a6e |
sigma2.g += row1[-brad].g;
|
|
Shinya Kitaoka |
120a6e |
sigma2.b += row1[-brad].b;
|
|
Shinya Kitaoka |
120a6e |
sigma2.m += row1[-brad].m;
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
pix1 = row1 + brad;
|
|
Shinya Kitaoka |
120a6e |
pix2 = row1;
|
|
Shinya Kitaoka |
120a6e |
pix3 = row1 - brad;
|
|
Shinya Kitaoka |
120a6e |
pix4 = row1 - brad + 1;
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
desigma.r = sigma1.r - sigma2.r;
|
|
Shinya Kitaoka |
120a6e |
desigma.g = sigma1.g - sigma2.g;
|
|
Shinya Kitaoka |
120a6e |
desigma.b = sigma1.b - sigma2.b;
|
|
Shinya Kitaoka |
120a6e |
desigma.m = sigma1.m - sigma2.m;
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
for (i = 1; i < length; i++) {
|
|
Shinya Kitaoka |
120a6e |
desigma.r += pix1->r - 2 * pix2->r + pix3->r;
|
|
Shinya Kitaoka |
120a6e |
desigma.g += pix1->g - 2 * pix2->g + pix3->g;
|
|
Shinya Kitaoka |
120a6e |
desigma.b += pix1->b - 2 * pix2->b + pix3->b;
|
|
Shinya Kitaoka |
120a6e |
desigma.m += pix1->m - 2 * pix2->m + pix3->m;
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
rsum += (desigma.r + diff * (pix1->r - pix4->r)) * coeffq;
|
|
Shinya Kitaoka |
120a6e |
gsum += (desigma.g + diff * (pix1->g - pix4->g)) * coeffq;
|
|
Shinya Kitaoka |
120a6e |
bsum += (desigma.b + diff * (pix1->b - pix4->b)) * coeffq;
|
|
Shinya Kitaoka |
120a6e |
msum += (desigma.m + diff * (pix1->m - pix4->m)) * coeffq;
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
row2->r = rsum;
|
|
Shinya Kitaoka |
120a6e |
row2->g = gsum;
|
|
Shinya Kitaoka |
120a6e |
row2->b = bsum;
|
|
Shinya Kitaoka |
120a6e |
row2->m = msum;
|
|
Shinya Kitaoka |
120a6e |
row2++;
|
|
Shinya Kitaoka |
120a6e |
pix1++, pix2++, pix3++, pix4++;
|
|
Shinya Kitaoka |
120a6e |
}
|
|
Toshihiro Shimizu |
890ddd |
}
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
//-------------------------------------------------------------------
|
|
Toshihiro Shimizu |
890ddd |
|
|
Shinya Kitaoka |
9f5a1b |
#ifdef _WIN32
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
//-------------------------------------------------------------------
|
|
Toshihiro Shimizu |
890ddd |
template <class class="" p="" t,=""></class>
|
|
Shinya Kitaoka |
120a6e |
inline void blur_code_SSE2(T *row1, BlurPixel *row2, int length, float coeff,
|
|
Shinya Kitaoka |
120a6e |
float coeffq, int brad, float diff,
|
|
Shinya Kitaoka |
120a6e |
float round_fac) {
|
|
Shinya Kitaoka |
120a6e |
static float two = 2;
|
|
Shinya Kitaoka |
120a6e |
static __m128i zeros = _mm_setzero_si128();
|
|
Shinya Kitaoka |
120a6e |
static __m128 twos = _mm_load_ps1(&two);
|
|
Toshihiro Shimizu |
890ddd |
|
|
Shinya Kitaoka |
120a6e |
int i;
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
__m128 sigma1, sigma2, sigma3, desigma;
|
|
Shinya Kitaoka |
120a6e |
T *pix1, *pix2, *pix3, *pix4;
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
pix1 = row1;
|
|
Shinya Kitaoka |
120a6e |
pix2 = row1 - 1;
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
//
|
|
Shinya Kitaoka |
120a6e |
__m128i piPix1 = _mm_cvtsi32_si128(*(DWORD *)pix1);
|
|
Shinya Kitaoka |
120a6e |
__m128i piPix2 = _mm_cvtsi32_si128(*(DWORD *)pix2);
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
piPix1 = _mm_unpacklo_epi8(piPix1, zeros);
|
|
Shinya Kitaoka |
120a6e |
piPix2 = _mm_unpacklo_epi8(piPix2, zeros);
|
|
Shinya Kitaoka |
120a6e |
piPix1 = _mm_unpacklo_epi16(piPix1, zeros);
|
|
Shinya Kitaoka |
120a6e |
piPix2 = _mm_unpacklo_epi16(piPix2, zeros);
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
sigma1 = _mm_cvtepi32_ps(piPix1);
|
|
Shinya Kitaoka |
120a6e |
//
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
pix1++;
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
float zero = 0;
|
|
Shinya Kitaoka |
120a6e |
sigma2 = _mm_load1_ps(&zero);
|
|
Shinya Kitaoka |
120a6e |
sigma3 = _mm_load1_ps(&zero);
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
for (i = 1; i < brad; i++) {
|
|
Shinya Kitaoka |
120a6e |
piPix1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(DWORD *)pix1), zeros);
|
|
Shinya Kitaoka |
120a6e |
piPix2 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(DWORD *)pix2), zeros);
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
__m128 pPix1 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(piPix1, zeros));
|
|
Shinya Kitaoka |
120a6e |
__m128 pPix2 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(piPix2, zeros));
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
sigma1 = _mm_add_ps(sigma1, pPix1);
|
|
Shinya Kitaoka |
120a6e |
sigma2 = _mm_add_ps(sigma2, pPix2);
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
__m128i pii = _mm_unpacklo_epi8(_mm_cvtsi32_si128(i), zeros);
|
|
Shinya Kitaoka |
120a6e |
__m128 pi = _mm_cvtepi32_ps(_mm_unpacklo_epi16(pii, zeros));
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
pPix1 = _mm_add_ps(pPix1, pPix2);
|
|
Shinya Kitaoka |
120a6e |
pPix1 = _mm_mul_ps(pi, pPix1); // i*(pix1 + pix2)
|
|
Shinya Kitaoka |
120a6e |
sigma3 = _mm_add_ps(sigma3, pPix1); // sigma3 += i*(pix1 + pix2)
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
pix1++;
|
|
Shinya Kitaoka |
120a6e |
pix2--;
|
|
Shinya Kitaoka |
120a6e |
}
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
__m128 pCoeff = _mm_load1_ps(&coeff);
|
|
Shinya Kitaoka |
120a6e |
__m128 pCoeffq = _mm_load1_ps(&coeffq);
|
|
Shinya Kitaoka |
120a6e |
__m128 pRoundFac = _mm_load1_ps(&round_fac);
|
|
Shinya Kitaoka |
120a6e |
__m128 pDiff = _mm_load1_ps(&diff);
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
// sum = (sigma1 + sigma2)*coeff - sigma3*coeffq + round_fac
|
|
Shinya Kitaoka |
120a6e |
__m128 sum = _mm_add_ps(sigma1, sigma2);
|
|
Shinya Kitaoka |
120a6e |
sum = _mm_mul_ps(sum, pCoeff);
|
|
Shinya Kitaoka |
120a6e |
__m128 sum2 = _mm_mul_ps(sigma3, pCoeffq);
|
|
Shinya Kitaoka |
120a6e |
sum2 = _mm_add_ps(sum2, pRoundFac);
|
|
Shinya Kitaoka |
120a6e |
sum = _mm_sub_ps(sum, sum2);
|
|
Shinya Kitaoka |
120a6e |
/*
|
|
Shinya Kitaoka |
120a6e |
__m128i isum = _mm_cvtps_epi32(sum);
|
|
Shinya Kitaoka |
120a6e |
isum = _mm_packs_epi32(isum, zeros);
|
|
Shinya Kitaoka |
120a6e |
isum = _mm_packs_epi16(isum, zeros);
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
*(DWORD*)row2 = _mm_cvtsi128_si32(isum);
|
|
Shinya Kitaoka |
120a6e |
*/
|
|
Shinya Kitaoka |
120a6e |
_mm_store_ps((float *)row2, sum);
|
|
Shinya Kitaoka |
120a6e |
row2++;
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
__m128i piPixMin =
|
|
Shinya Kitaoka |
120a6e |
_mm_unpacklo_epi8(_mm_cvtsi32_si128(*(DWORD *)(row1 - brad)), zeros);
|
|
Shinya Kitaoka |
120a6e |
__m128 pPixMin = _mm_cvtepi32_ps(_mm_unpacklo_epi16(piPixMin, zeros));
|
|
Shinya Kitaoka |
120a6e |
sigma2 = _mm_add_ps(sigma2, pPixMin);
|
|
Shinya Kitaoka |
120a6e |
/*
|
|
Shinya Kitaoka |
120a6e |
sigma2.r += row1[-brad].r;
|
|
Shinya Kitaoka |
120a6e |
sigma2.g += row1[-brad].g;
|
|
Shinya Kitaoka |
120a6e |
sigma2.b += row1[-brad].b;
|
|
Shinya Kitaoka |
120a6e |
sigma2.m += row1[-brad].m;
|
|
Toshihiro Shimizu |
890ddd |
*/
|
|
Toshihiro Shimizu |
890ddd |
|
|
Shinya Kitaoka |
120a6e |
pix1 = row1 + brad;
|
|
Shinya Kitaoka |
120a6e |
pix2 = row1;
|
|
Shinya Kitaoka |
120a6e |
pix3 = row1 - brad;
|
|
Shinya Kitaoka |
120a6e |
pix4 = row1 - brad + 1;
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
desigma = _mm_sub_ps(sigma1, sigma2);
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
for (i = 1; i < length; i++) {
|
|
Shinya Kitaoka |
120a6e |
piPix1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(DWORD *)pix1), zeros);
|
|
Shinya Kitaoka |
120a6e |
piPix2 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(DWORD *)pix2), zeros);
|
|
Shinya Kitaoka |
120a6e |
__m128i piPix3 =
|
|
Shinya Kitaoka |
120a6e |
_mm_unpacklo_epi8(_mm_cvtsi32_si128(*(DWORD *)pix3), zeros);
|
|
Shinya Kitaoka |
120a6e |
__m128i piPix4 =
|
|
Shinya Kitaoka |
120a6e |
_mm_unpacklo_epi8(_mm_cvtsi32_si128(*(DWORD *)pix4), zeros);
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
__m128 pPix1 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(piPix1, zeros));
|
|
Shinya Kitaoka |
120a6e |
__m128 pPix2 =
|
|
Shinya Kitaoka |
120a6e |
_mm_cvtepi32_ps(_mm_slli_epi32(_mm_unpacklo_epi16(piPix2, zeros), 1));
|
|
Shinya Kitaoka |
120a6e |
__m128 pPix3 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(piPix3, zeros));
|
|
Shinya Kitaoka |
120a6e |
__m128 pPix4 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(piPix4, zeros));
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
// desigma += pix1 - 2*pix2 + pix3
|
|
Shinya Kitaoka |
120a6e |
__m128 tmp = _mm_sub_ps(pPix3, pPix2);
|
|
Shinya Kitaoka |
120a6e |
tmp = _mm_add_ps(tmp, pPix1);
|
|
Shinya Kitaoka |
120a6e |
desigma = _mm_add_ps(desigma, tmp);
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
// sum += (desigma + diff*(pix1 - pix4))*coeffq
|
|
Shinya Kitaoka |
120a6e |
tmp = _mm_sub_ps(pPix1, pPix4);
|
|
Shinya Kitaoka |
120a6e |
tmp = _mm_mul_ps(tmp, pDiff);
|
|
Shinya Kitaoka |
120a6e |
tmp = _mm_add_ps(desigma, tmp);
|
|
Shinya Kitaoka |
120a6e |
tmp = _mm_mul_ps(tmp, pCoeffq);
|
|
Shinya Kitaoka |
120a6e |
sum = _mm_add_ps(sum, tmp);
|
|
Shinya Kitaoka |
120a6e |
/*
|
|
Shinya Kitaoka |
120a6e |
isum = _mm_cvtps_epi32(sum);
|
|
Shinya Kitaoka |
120a6e |
isum = _mm_packs_epi32(isum, zeros);
|
|
Shinya Kitaoka |
120a6e |
isum = _mm_packs_epi16(isum, zeros);
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
*(DWORD*)row2 = _mm_cvtsi128_si32(isum);
|
|
Toshihiro Shimizu |
890ddd |
*/
|
|
Shinya Kitaoka |
120a6e |
_mm_store_ps((float *)row2, sum);
|
|
Toshihiro Shimizu |
890ddd |
|
|
Shinya Kitaoka |
120a6e |
row2++;
|
|
Shinya Kitaoka |
120a6e |
pix1++, pix2++, pix3++, pix4++;
|
|
Shinya Kitaoka |
120a6e |
}
|
|
Toshihiro Shimizu |
890ddd |
}
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
//-------------------------------------------------------------------
|
|
Toshihiro Shimizu |
890ddd |
template <class class="" p="" t,=""></class>
|
|
Shinya Kitaoka |
120a6e |
inline void blur_code_SSE2(BlurPixel *row1, T *row2, int length, float coeff,
|
|
Shinya Kitaoka |
120a6e |
float coeffq, int brad, float diff,
|
|
Shinya Kitaoka |
120a6e |
float round_fac) {
|
|
Shinya Kitaoka |
120a6e |
int i;
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
float two = 2;
|
|
Shinya Kitaoka |
120a6e |
__m128i zeros = _mm_setzero_si128();
|
|
Shinya Kitaoka |
120a6e |
__m128 twos = _mm_load_ps1(&two);
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
__m128 sigma1, sigma2, sigma3, desigma;
|
|
Shinya Kitaoka |
120a6e |
BlurPixel *pix1, *pix2, *pix3, *pix4;
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
pix1 = row1;
|
|
Shinya Kitaoka |
120a6e |
pix2 = row1 - 1;
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
__m128 pPix1 = _mm_load_ps((float *)pix1);
|
|
Shinya Kitaoka |
120a6e |
__m128 pPix2 = _mm_load_ps((float *)pix2);
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
// sigma1 = *pix1
|
|
Shinya Kitaoka |
120a6e |
sigma1 = pPix1;
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
pix1++;
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
float zero = 0;
|
|
Shinya Kitaoka |
120a6e |
sigma2 = _mm_load1_ps(&zero);
|
|
Shinya Kitaoka |
120a6e |
sigma3 = _mm_load1_ps(&zero);
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
for (i = 1; i < brad; i++) {
|
|
Shinya Kitaoka |
120a6e |
pPix1 = _mm_load_ps((float *)pix1);
|
|
Shinya Kitaoka |
120a6e |
pPix2 = _mm_load_ps((float *)pix2);
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
sigma1 = _mm_add_ps(sigma1, pPix1);
|
|
Shinya Kitaoka |
120a6e |
sigma2 = _mm_add_ps(sigma2, pPix2);
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
__m128i pii = _mm_unpacklo_epi8(_mm_cvtsi32_si128(i), zeros);
|
|
Shinya Kitaoka |
120a6e |
__m128 pi = _mm_cvtepi32_ps(_mm_unpacklo_epi16(pii, zeros));
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
pPix1 = _mm_add_ps(pPix1, pPix2);
|
|
Shinya Kitaoka |
120a6e |
pPix1 = _mm_mul_ps(pi, pPix1); // i*(pix1 + pix2)
|
|
Shinya Kitaoka |
120a6e |
sigma3 = _mm_add_ps(sigma3, pPix1); // sigma3 += i*(pix1 + pix2)
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
pix1++;
|
|
Shinya Kitaoka |
120a6e |
pix2--;
|
|
Shinya Kitaoka |
120a6e |
}
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
__m128 pCoeff = _mm_load1_ps(&coeff);
|
|
Shinya Kitaoka |
120a6e |
__m128 pCoeffq = _mm_load1_ps(&coeffq);
|
|
Shinya Kitaoka |
120a6e |
// __m128 pRoundFac = _mm_load1_ps(&round_fac);
|
|
Shinya Kitaoka |
120a6e |
__m128 pDiff = _mm_load1_ps(&diff);
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
// sum = (sigma1 + sigma2)*coeff - sigma3*coeffq + round_fac
|
|
Shinya Kitaoka |
120a6e |
__m128 sum = _mm_add_ps(sigma1, sigma2);
|
|
Shinya Kitaoka |
120a6e |
sum = _mm_mul_ps(sum, pCoeff);
|
|
Shinya Kitaoka |
120a6e |
__m128 sum2 = _mm_mul_ps(sigma3, pCoeffq);
|
|
Shinya Kitaoka |
120a6e |
// sum2 = _mm_add_ps(sum2, pRoundFac);
|
|
Shinya Kitaoka |
120a6e |
sum = _mm_sub_ps(sum, sum2);
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
// converte i canali da float a char
|
|
Shinya Kitaoka |
120a6e |
__m128i isum = _mm_cvtps_epi32(sum);
|
|
Shinya Kitaoka |
120a6e |
isum = _mm_packs_epi32(isum, zeros);
|
|
Shinya Kitaoka |
120a6e |
// isum = _mm_packs_epi16(isum, zeros);
|
|
Shinya Kitaoka |
120a6e |
isum = _mm_packus_epi16(isum, zeros);
|
|
Shinya Kitaoka |
120a6e |
*(DWORD *)row2 = _mm_cvtsi128_si32(isum);
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
row2++;
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
// sigma2 += row1[-brad]
|
|
Shinya Kitaoka |
120a6e |
__m128 pPixMin = _mm_load_ps((float *)(row1 - brad));
|
|
Shinya Kitaoka |
120a6e |
sigma2 = _mm_add_ps(sigma2, pPixMin);
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
pix1 = row1 + brad;
|
|
Shinya Kitaoka |
120a6e |
pix2 = row1;
|
|
Shinya Kitaoka |
120a6e |
pix3 = row1 - brad;
|
|
Shinya Kitaoka |
120a6e |
pix4 = row1 - brad + 1;
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
desigma = _mm_sub_ps(sigma1, sigma2);
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
for (i = 1; i < length; i++) {
|
|
Shinya Kitaoka |
120a6e |
__m128 pPix1 = _mm_load_ps((float *)pix1);
|
|
Shinya Kitaoka |
120a6e |
__m128 pPix2 = _mm_load_ps((float *)pix2);
|
|
Shinya Kitaoka |
120a6e |
__m128 pPix3 = _mm_load_ps((float *)pix3);
|
|
Shinya Kitaoka |
120a6e |
__m128 pPix4 = _mm_load_ps((float *)pix4);
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
pPix2 = _mm_mul_ps(pPix2, twos);
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
// desigma += pix1 - 2*pix2 + pix3
|
|
Shinya Kitaoka |
120a6e |
__m128 tmp = _mm_sub_ps(pPix3, pPix2);
|
|
Shinya Kitaoka |
120a6e |
tmp = _mm_add_ps(tmp, pPix1);
|
|
Shinya Kitaoka |
120a6e |
desigma = _mm_add_ps(desigma, tmp);
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
// sum += (desigma + diff*(pix1 - pix4))*coeffq
|
|
Shinya Kitaoka |
120a6e |
tmp = _mm_sub_ps(pPix1, pPix4);
|
|
Shinya Kitaoka |
120a6e |
tmp = _mm_mul_ps(tmp, pDiff);
|
|
Shinya Kitaoka |
120a6e |
tmp = _mm_add_ps(desigma, tmp);
|
|
Shinya Kitaoka |
120a6e |
tmp = _mm_mul_ps(tmp, pCoeffq);
|
|
Shinya Kitaoka |
120a6e |
sum = _mm_add_ps(sum, tmp);
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
// converte i canali da float a char
|
|
Shinya Kitaoka |
120a6e |
__m128i isum = _mm_cvtps_epi32(sum);
|
|
Shinya Kitaoka |
120a6e |
isum = _mm_packs_epi32(isum, zeros);
|
|
Shinya Kitaoka |
120a6e |
// isum = _mm_packs_epi16(isum, zeros); // QUESTA RIGA E' SBAGLIATA
|
|
Shinya Kitaoka |
120a6e |
// assert(false);
|
|
Shinya Kitaoka |
120a6e |
isum = _mm_packus_epi16(isum, zeros);
|
|
Shinya Kitaoka |
120a6e |
*(DWORD *)row2 = _mm_cvtsi128_si32(isum);
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
row2++;
|
|
Shinya Kitaoka |
120a6e |
pix1++, pix2++, pix3++, pix4++;
|
|
Shinya Kitaoka |
120a6e |
}
|
|
Toshihiro Shimizu |
890ddd |
}
|
|
Toshihiro Shimizu |
890ddd |
|
|
Shinya Kitaoka |
120a6e |
#endif // _WIN32
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
//-------------------------------------------------------------------
|
|
Toshihiro Shimizu |
890ddd |
|
|
Shinya Kitaoka |
120a6e |
#define STORE_COL_CODE(crop_val) \
|
|
Shinya Kitaoka |
120a6e |
{ \
|
|
Shinya Kitaoka |
120a6e |
int i, val; \
|
|
Shinya Kitaoka |
120a6e |
double ampl; \
|
|
Shinya Kitaoka |
120a6e |
buffer += x; \
|
|
Shinya Kitaoka |
120a6e |
\
|
|
Shinya Kitaoka |
120a6e |
ampl = 1.0 + blur / 15.0; \
|
|
Shinya Kitaoka |
120a6e |
\
|
|
Shinya Kitaoka |
120a6e |
if (backlit) \
|
|
Shinya Kitaoka |
120a6e |
for (i = ((dy >= 0) ? 0 : -dy); i < std::min(ly, r_ly - dy); i++) { \
|
|
Shinya Kitaoka |
120a6e |
val = troundp(col[i].r * ampl); \
|
|
Shinya Kitaoka |
120a6e |
buffer->r = (val > crop_val) ? crop_val : val; \
|
|
Shinya Kitaoka |
120a6e |
val = troundp(col[i].g * ampl); \
|
|
Shinya Kitaoka |
120a6e |
buffer->g = (val > crop_val) ? crop_val : val; \
|
|
Shinya Kitaoka |
120a6e |
val = troundp(col[i].b * ampl); \
|
|
Shinya Kitaoka |
120a6e |
buffer->b = (val > crop_val) ? crop_val : val; \
|
|
Shinya Kitaoka |
120a6e |
val = troundp(col[i].m * ampl); \
|
|
Shinya Kitaoka |
120a6e |
buffer->m = (val > crop_val) ? crop_val : val; \
|
|
Shinya Kitaoka |
120a6e |
buffer += wrap; \
|
|
Shinya Kitaoka |
120a6e |
} \
|
|
Shinya Kitaoka |
120a6e |
else \
|
|
Shinya Kitaoka |
120a6e |
for (i = ((dy >= 0) ? 0 : -dy); i < std::min(ly, r_ly - dy); i++) { \
|
|
Shinya Kitaoka |
120a6e |
*buffer = col[i]; \
|
|
Shinya Kitaoka |
120a6e |
buffer += wrap; \
|
|
Shinya Kitaoka |
120a6e |
} \
|
|
Shinya Kitaoka |
120a6e |
}
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
//-------------------------------------------------------------------
|
|
Toshihiro Shimizu |
890ddd |
template <class t=""></class>
|
|
Shinya Kitaoka |
120a6e |
void store_colRgb(T *buffer, int wrap, int r_ly, T *col, int ly, int x, int dy,
|
|
Shinya Kitaoka |
120a6e |
int backlit, double blur) {
|
|
Shinya Kitaoka |
120a6e |
int val = T::maxChannelValue;
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
if (val == 255)
|
|
Shinya Kitaoka |
120a6e |
STORE_COL_CODE(204)
|
|
Shinya Kitaoka |
120a6e |
else if (val == 65535)
|
|
Shinya Kitaoka |
120a6e |
STORE_COL_CODE(204 * 257)
|
|
Shinya Kitaoka |
120a6e |
else
|
|
Shinya Kitaoka |
120a6e |
assert(false);
|
|
Toshihiro Shimizu |
890ddd |
}
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
//-------------------------------------------------------------------
|
|
Toshihiro Shimizu |
890ddd |
template <class t=""></class>
|
|
Shinya Kitaoka |
120a6e |
void store_colGray(T *buffer, int wrap, int r_ly, T *col, int ly, int x, int dy,
|
|
Shinya Kitaoka |
120a6e |
int backlit, double blur) {
|
|
Shinya Kitaoka |
120a6e |
int i;
|
|
Shinya Kitaoka |
120a6e |
double ampl;
|
|
Shinya Kitaoka |
120a6e |
buffer += x;
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
ampl = 1.0 + blur / 15.0;
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
for (i = ((dy >= 0) ? 0 : -dy); i < std::min(ly, r_ly - dy); i++) {
|
|
Shinya Kitaoka |
120a6e |
*buffer = col[i];
|
|
Shinya Kitaoka |
120a6e |
buffer += wrap;
|
|
Shinya Kitaoka |
120a6e |
}
|
|
Toshihiro Shimizu |
890ddd |
}
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
//-------------------------------------------------------------------
|
|
Toshihiro Shimizu |
890ddd |
template <class p=""></class>
|
|
Shinya Kitaoka |
120a6e |
void load_colRgb(BlurPixel *buffer, BlurPixel *col, int lx, int ly, int x,
|
|
Shinya Kitaoka |
120a6e |
int brad, int by1, int by2) {
|
|
Shinya Kitaoka |
120a6e |
int i;
|
|
Shinya Kitaoka |
120a6e |
BlurPixel *pix, left_val, right_val;
|
|
Toshihiro Shimizu |
890ddd |
|
|
Shinya Kitaoka |
120a6e |
LOAD_COL_CODE
|
|
Toshihiro Shimizu |
890ddd |
}
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
//-------------------------------------------------------------------
|
|
Toshihiro Shimizu |
890ddd |
|
|
Shinya Kitaoka |
120a6e |
void load_channel_col32(float *buffer, float *col, int lx, int ly, int x,
|
|
Shinya Kitaoka |
120a6e |
int brad, int by1, int by2) {
|
|
Shinya Kitaoka |
120a6e |
int i;
|
|
Shinya Kitaoka |
120a6e |
float *pix, left_val, right_val;
|
|
Toshihiro Shimizu |
890ddd |
|
|
Shinya Kitaoka |
120a6e |
LOAD_COL_CODE
|
|
Toshihiro Shimizu |
890ddd |
}
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
//-------------------------------------------------------------------
|
|
Toshihiro Shimizu |
890ddd |
template <class class="" p="" q,="" t,=""></class>
|
|
Shinya Kitaoka |
120a6e |
void do_filtering_chan(BlurPixel *row1, T *row2, int length, float coeff,
|
|
Shinya Kitaoka |
120a6e |
float coeffq, int brad, float diff, bool useSSE) {
|
|
Shinya Kitaoka |
9f5a1b |
#ifdef _WIN32
|
|
Shinya Kitaoka |
120a6e |
if (useSSE && T::maxChannelValue == 255)
|
|
Shinya Kitaoka |
120a6e |
blur_code_SSE2<t, p="">(row1, row2, length, coeff, coeffq, brad, diff, 0.5);</t,>
|
|
Shinya Kitaoka |
120a6e |
else
|
|
Toshihiro Shimizu |
890ddd |
#endif
|
|
Shinya Kitaoka |
120a6e |
{
|
|
Shinya Kitaoka |
120a6e |
int i;
|
|
Shinya Kitaoka |
120a6e |
P rsum, gsum, bsum, msum;
|
|
Shinya Kitaoka |
120a6e |
BlurPixel sigma1, sigma2, sigma3, desigma;
|
|
Shinya Kitaoka |
120a6e |
BlurPixel *pix1, *pix2, *pix3, *pix4;
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
BLUR_CODE((P)0.5, Q)
|
|
Shinya Kitaoka |
120a6e |
}
|
|
Toshihiro Shimizu |
890ddd |
}
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
//-------------------------------------------------------------------
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
template <class t=""></class>
|
|
Shinya Kitaoka |
120a6e |
void do_filtering_channel_float(T *row1, float *row2, int length, float coeff,
|
|
Shinya Kitaoka |
120a6e |
float coeffq, int brad, float diff) {
|
|
Shinya Kitaoka |
120a6e |
int i;
|
|
Shinya Kitaoka |
120a6e |
float sum;
|
|
Shinya Kitaoka |
120a6e |
float sigma1, sigma2, sigma3, desigma;
|
|
Shinya Kitaoka |
120a6e |
T *pix1, *pix2, *pix3, *pix4;
|
|
Toshihiro Shimizu |
890ddd |
|
|
Shinya Kitaoka |
120a6e |
pix1 = row1;
|
|
Shinya Kitaoka |
120a6e |
pix2 = row1 - 1;
|
|
Toshihiro Shimizu |
890ddd |
|
|
Shinya Kitaoka |
120a6e |
sigma1 = pix1->value;
|
|
Shinya Kitaoka |
120a6e |
pix1++;
|
|
Toshihiro Shimizu |
890ddd |
|
|
Shinya Kitaoka |
120a6e |
sigma2 = 0.0;
|
|
Shinya Kitaoka |
120a6e |
sigma3 = 0.0;
|
|
Toshihiro Shimizu |
890ddd |
|
|
Shinya Kitaoka |
120a6e |
for (i = 1; i < brad; i++) {
|
|
Shinya Kitaoka |
120a6e |
sigma1 += pix1->value;
|
|
Shinya Kitaoka |
120a6e |
sigma2 += pix2->value;
|
|
Shinya Kitaoka |
120a6e |
sigma3 += i * (pix1->value + pix2->value);
|
|
Shinya Kitaoka |
120a6e |
pix1++;
|
|
Shinya Kitaoka |
120a6e |
pix2--;
|
|
Shinya Kitaoka |
120a6e |
}
|
|
Toshihiro Shimizu |
890ddd |
|
|
Shinya Kitaoka |
120a6e |
sum = (sigma1 + sigma2) * coeff - sigma3 * coeffq;
|
|
Toshihiro Shimizu |
890ddd |
|
|
Shinya Kitaoka |
120a6e |
*row2 = sum;
|
|
Shinya Kitaoka |
120a6e |
row2++;
|
|
Toshihiro Shimizu |
890ddd |
|
|
Shinya Kitaoka |
120a6e |
sigma2 += row1[-brad].value;
|
|
Toshihiro Shimizu |
890ddd |
|
|
Shinya Kitaoka |
120a6e |
pix1 = row1 + brad;
|
|
Shinya Kitaoka |
120a6e |
pix2 = row1;
|
|
Shinya Kitaoka |
120a6e |
pix3 = row1 - brad;
|
|
Shinya Kitaoka |
120a6e |
pix4 = row1 - brad + 1;
|
|
Toshihiro Shimizu |
890ddd |
|
|
Shinya Kitaoka |
120a6e |
desigma = sigma1 - sigma2;
|
|
Toshihiro Shimizu |
890ddd |
|
|
Shinya Kitaoka |
120a6e |
for (i = 1; i < length; i++) {
|
|
Shinya Kitaoka |
120a6e |
desigma += pix1->value - 2 * pix2->value + pix3->value;
|
|
Toshihiro Shimizu |
890ddd |
|
|
Shinya Kitaoka |
120a6e |
sum += (desigma + diff * (pix1->value - pix4->value)) * coeffq;
|
|
Toshihiro Shimizu |
890ddd |
|
|
Shinya Kitaoka |
120a6e |
*row2 = sum;
|
|
Shinya Kitaoka |
120a6e |
row2++;
|
|
Shinya Kitaoka |
120a6e |
pix1++, pix2++, pix3++, pix4++;
|
|
Shinya Kitaoka |
120a6e |
}
|
|
Toshihiro Shimizu |
890ddd |
}
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
//-------------------------------------------------------------------
|
|
Toshihiro Shimizu |
890ddd |
template <class t=""></class>
|
|
Shinya Kitaoka |
120a6e |
void do_filtering_channel_gray(float *row1, T *row2, int length, float coeff,
|
|
Shinya Kitaoka |
120a6e |
float coeffq, int brad, float diff) {
|
|
Shinya Kitaoka |
120a6e |
int i;
|
|
Shinya Kitaoka |
120a6e |
float sum;
|
|
Shinya Kitaoka |
120a6e |
float sigma1, sigma2, sigma3, desigma;
|
|
Shinya Kitaoka |
120a6e |
float *pix1, *pix2, *pix3, *pix4;
|
|
Toshihiro Shimizu |
890ddd |
|
|
Shinya Kitaoka |
120a6e |
pix1 = row1;
|
|
Shinya Kitaoka |
120a6e |
pix2 = row1 - 1;
|
|
Toshihiro Shimizu |
890ddd |
|
|
Shinya Kitaoka |
120a6e |
sigma1 = *pix1;
|
|
Shinya Kitaoka |
120a6e |
pix1++;
|
|
Toshihiro Shimizu |
890ddd |
|
|
Shinya Kitaoka |
120a6e |
sigma2 = 0.0;
|
|
Shinya Kitaoka |
120a6e |
sigma3 = 0.0;
|
|
Toshihiro Shimizu |
890ddd |
|
|
Shinya Kitaoka |
120a6e |
for (i = 1; i < brad; i++) {
|
|
Shinya Kitaoka |
120a6e |
sigma1 += *pix1;
|
|
Shinya Kitaoka |
120a6e |
sigma2 += *pix2;
|
|
Shinya Kitaoka |
120a6e |
sigma3 += i * (*pix1 + *pix2);
|
|
Shinya Kitaoka |
120a6e |
pix1++;
|
|
Shinya Kitaoka |
120a6e |
pix2--;
|
|
Shinya Kitaoka |
120a6e |
}
|
|
Toshihiro Shimizu |
890ddd |
|
|
Shinya Kitaoka |
120a6e |
sum = (sigma1 + sigma2) * coeff - sigma3 * coeffq + 0.5F;
|
|
Toshihiro Shimizu |
890ddd |
|
|
Shinya Kitaoka |
120a6e |
row2->setValue((int)sum);
|
|
Shinya Kitaoka |
120a6e |
row2++;
|
|
Toshihiro Shimizu |
890ddd |
|
|
Shinya Kitaoka |
120a6e |
sigma2 += row1[-brad];
|
|
Toshihiro Shimizu |
890ddd |
|
|
Shinya Kitaoka |
120a6e |
pix1 = row1 + brad;
|
|
Shinya Kitaoka |
120a6e |
pix2 = row1;
|
|
Shinya Kitaoka |
120a6e |
pix3 = row1 - brad;
|
|
Shinya Kitaoka |
120a6e |
pix4 = row1 - brad + 1;
|
|
Toshihiro Shimizu |
890ddd |
|
|
Shinya Kitaoka |
120a6e |
desigma = sigma1 - sigma2;
|
|
Toshihiro Shimizu |
890ddd |
|
|
Shinya Kitaoka |
120a6e |
for (i = 1; i < length; i++) {
|
|
Shinya Kitaoka |
120a6e |
desigma += *pix1 - 2 * (*pix2) + (*pix3);
|
|
Toshihiro Shimizu |
890ddd |
|
|
Shinya Kitaoka |
120a6e |
sum += (desigma + diff * (*pix1 - *pix4)) * coeffq;
|
|
Toshihiro Shimizu |
890ddd |
|
|
Shinya Kitaoka |
120a6e |
row2->setValue((int)sum);
|
|
Shinya Kitaoka |
120a6e |
row2++;
|
|
Shinya Kitaoka |
120a6e |
pix1++, pix2++, pix3++, pix4++;
|
|
Shinya Kitaoka |
120a6e |
}
|
|
Toshihiro Shimizu |
890ddd |
}
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
//-------------------------------------------------------------------
|
|
Toshihiro Shimizu |
890ddd |
template <class t=""></class>
|
|
Shinya Kitaoka |
120a6e |
void load_rowRgb(TRasterPT<t> &rin, T *row, int lx, int y, int brad, int bx1,</t>
|
|
Shinya Kitaoka |
120a6e |
int bx2) {
|
|
Shinya Kitaoka |
120a6e |
int i;
|
|
Shinya Kitaoka |
120a6e |
T *buf32, *pix;
|
|
Shinya Kitaoka |
120a6e |
T left_val, right_val;
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
pix = row + bx1;
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
{
|
|
Shinya Kitaoka |
120a6e |
rin->lock();
|
|
Shinya Kitaoka |
120a6e |
buf32 = rin->pixels(y);
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
for (i = 0; i < lx; i++) *pix++ = *buf32++;
|
|
Shinya Kitaoka |
120a6e |
rin->unlock();
|
|
Shinya Kitaoka |
120a6e |
}
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
pix += bx2;
|
|
Shinya Kitaoka |
120a6e |
left_val = *row;
|
|
Shinya Kitaoka |
120a6e |
right_val = *(pix - 1);
|
|
Shinya Kitaoka |
120a6e |
row--;
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
for (i = 0; i < brad;
|
|
Shinya Kitaoka |
120a6e |
i++) /* pixels equal to the ones of border of image are added */
|
|
Shinya Kitaoka |
120a6e |
{ /* to avoid a black blur to get into the picture. */
|
|
Shinya Kitaoka |
120a6e |
*row-- = left_val;
|
|
Shinya Kitaoka |
120a6e |
*pix++ = right_val;
|
|
Shinya Kitaoka |
120a6e |
}
|
|
Toshihiro Shimizu |
890ddd |
}
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
//-------------------------------------------------------------------
|
|
Toshihiro Shimizu |
890ddd |
template <class t=""></class>
|
|
Shinya Kitaoka |
120a6e |
void load_rowGray(TRasterPT<t> &rin, T *row, int lx, int y, int brad, int bx1,</t>
|
|
Shinya Kitaoka |
120a6e |
int bx2) {
|
|
Shinya Kitaoka |
120a6e |
int i;
|
|
Shinya Kitaoka |
120a6e |
T *buf8, *pix;
|
|
Shinya Kitaoka |
120a6e |
T left_val, right_val;
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
pix = row + bx1;
|
|
Shinya Kitaoka |
120a6e |
buf8 = (T *)(rin->pixels(y));
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
for (i = 0; i < lx; i++) *pix++ = *buf8++;
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
pix += bx2;
|
|
Shinya Kitaoka |
120a6e |
left_val = *row;
|
|
Shinya Kitaoka |
120a6e |
right_val = *(pix - 1);
|
|
Shinya Kitaoka |
120a6e |
row--;
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
for (i = 0; i < brad;
|
|
Shinya Kitaoka |
120a6e |
i++) /* pixels equal to the ones of border of image are added */
|
|
Shinya Kitaoka |
120a6e |
{ /* to avoid a black blur to get into the picture. */
|
|
Shinya Kitaoka |
120a6e |
*row-- = left_val;
|
|
Shinya Kitaoka |
120a6e |
*pix++ = right_val;
|
|
Shinya Kitaoka |
120a6e |
}
|
|
Toshihiro Shimizu |
890ddd |
}
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
//-------------------------------------------------------------------
|
|
Toshihiro Shimizu |
890ddd |
template <class class="" p="" t,=""></class>
|
|
Shinya Kitaoka |
120a6e |
void do_filtering_floatRgb(T *row1, BlurPixel *row2, int length, float coeff,
|
|
Shinya Kitaoka |
120a6e |
float coeffq, int brad, float diff, bool useSSE) {
|
|
Toshihiro Shimizu |
890ddd |
/*
|
|
Toshihiro Shimizu |
890ddd |
int i;
|
|
Toshihiro Shimizu |
890ddd |
float rsum, gsum, bsum, msum;
|
|
Toshihiro Shimizu |
890ddd |
CASM_FPIXEL sigma1, sigma2, sigma3, desigma;
|
|
Toshihiro Shimizu |
890ddd |
TPixel32 *pix1, *pix2, *pix3, *pix4;
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
BLUR_CODE(0, unsigned char)
|
|
Toshihiro Shimizu |
890ddd |
*/
|
|
Toshihiro Shimizu |
890ddd |
|
|
Shinya Kitaoka |
9f5a1b |
#ifdef _WIN32
|
|
Shinya Kitaoka |
120a6e |
if (useSSE)
|
|
Shinya Kitaoka |
120a6e |
blur_code_SSE2<t, p="">(row1, row2, length, coeff, coeffq, brad, diff, 0);</t,>
|
|
Shinya Kitaoka |
120a6e |
else
|
|
Toshihiro Shimizu |
890ddd |
#endif
|
|
Shinya Kitaoka |
120a6e |
blur_code<t, blurpixel<p="">, P>(row1, row2, length, coeff, coeffq, brad, diff,</t,>
|
|
Shinya Kitaoka |
120a6e |
0);
|
|
Toshihiro Shimizu |
890ddd |
}
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
//-------------------------------------------------------------------
|
|
Toshihiro Shimizu |
890ddd |
template <class class="" p="" q,="" t,=""></class>
|
|
Shinya Kitaoka |
120a6e |
void doBlurRgb(TRasterPT<t> &dstRas, TRasterPT<t> &srcRas, double blur, int dx,</t></t>
|
|
Shinya Kitaoka |
120a6e |
int dy, bool useSSE) {
|
|
Shinya Kitaoka |
120a6e |
int i, lx, ly, llx, lly, brad;
|
|
Shinya Kitaoka |
120a6e |
float coeff, coeffq, diff;
|
|
Shinya Kitaoka |
120a6e |
int bx1 = 0, by1 = 0, bx2 = 0, by2 = 0;
|
|
Toshihiro Shimizu |
890ddd |
|
|
Shinya Kitaoka |
120a6e |
brad = (int)ceil(blur); /* number of pixels involved in the filtering */
|
|
Toshihiro Shimizu |
890ddd |
|
|
Shinya Kitaoka |
120a6e |
// int border = brad*2; // per sicurezza
|
|
Toshihiro Shimizu |
890ddd |
|
|
Shinya Kitaoka |
120a6e |
coeff = (float)(blur /
|
|
Shinya Kitaoka |
120a6e |
(brad - brad * brad +
|
|
Shinya Kitaoka |
120a6e |
blur * (2 * brad -
|
|
Shinya Kitaoka |
120a6e |
1))); /*sum of the weights of triangolar filter. */
|
|
Shinya Kitaoka |
120a6e |
coeffq = (float)(coeff / blur);
|
|
Shinya Kitaoka |
120a6e |
diff = (float)(blur - brad);
|
|
Shinya Kitaoka |
120a6e |
lx = srcRas->getLx();
|
|
Shinya Kitaoka |
120a6e |
ly = srcRas->getLy();
|
|
Toshihiro Shimizu |
890ddd |
|
|
Shinya Kitaoka |
120a6e |
if ((lx == 0) || (ly == 0)) return;
|
|
Toshihiro Shimizu |
890ddd |
|
|
Shinya Kitaoka |
120a6e |
llx = lx + bx1 + bx2;
|
|
Shinya Kitaoka |
120a6e |
lly = ly + by1 + by2;
|
|
Toshihiro Shimizu |
890ddd |
|
|
Shinya Kitaoka |
120a6e |
T *row1, *col2, *buffer;
|
|
Shinya Kitaoka |
120a6e |
BlurPixel *row2, *col1, *fbuffer;
|
|
Shinya Kitaoka |
120a6e |
TRasterGR8P r1;
|
|
Toshihiro Shimizu |
890ddd |
|
|
Shinya Kitaoka |
9f5a1b |
#ifdef _WIN32
|
|
Shinya Kitaoka |
120a6e |
if (useSSE) {
|
|
Shinya Kitaoka |
120a6e |
fbuffer =
|
|
Shinya Kitaoka |
120a6e |
(BlurPixel *)_aligned_malloc(llx * ly * sizeof(BlurPixel ), 16);
|
|
Shinya Kitaoka |
120a6e |
row1 = (T *)_aligned_malloc((llx + 2 * brad) * sizeof(T), 16);
|
|
Shinya Kitaoka |
120a6e |
col1 = (BlurPixel *)_aligned_malloc(
|
|
Shinya Kitaoka |
120a6e |
(lly + 2 * brad) * sizeof(BlurPixel), 16);
|
|
Shinya Kitaoka |
120a6e |
col2 = (T *)_aligned_malloc(lly * sizeof(T), 16);
|
|
Shinya Kitaoka |
120a6e |
} else
|
|
Toshihiro Shimizu |
890ddd |
#endif
|
|
Shinya Kitaoka |
120a6e |
{
|
|
Shinya Kitaoka |
120a6e |
TRasterGR8P raux(llx * sizeof(BlurPixel), ly);
|
|
Shinya Kitaoka |
120a6e |
r1 = raux;
|
|
Shinya Kitaoka |
120a6e |
r1->lock();
|
|
Shinya Kitaoka |
120a6e |
fbuffer = (BlurPixel *)r1->getRawData(); // new CASM_FPIXEL [llx *ly];
|
|
Shinya Kitaoka |
120a6e |
row1 = new T[llx + 2 * brad];
|
|
Shinya Kitaoka |
6fa9ac |
col1 = new BlurPixel[ lly + 2 * brad ];
|
|
Shinya Kitaoka |
120a6e |
col2 = new T[lly];
|
|
Shinya Kitaoka |
120a6e |
}
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
if ((!fbuffer) || (!row1) || (!col1) || (!col2)) {
|
|
Shinya Kitaoka |
120a6e |
if (!useSSE) r1->unlock();
|
|
Michał Janiszewski |
50e38f |
#ifdef _WIN32
|
|
Shinya Kitaoka |
120a6e |
if (useSSE) {
|
|
Shinya Kitaoka |
120a6e |
_aligned_free(col2);
|
|
Shinya Kitaoka |
120a6e |
_aligned_free(col1);
|
|
Shinya Kitaoka |
120a6e |
_aligned_free(row1);
|
|
Shinya Kitaoka |
120a6e |
_aligned_free(fbuffer);
|
|
Shinya Kitaoka |
120a6e |
} else
|
|
Michał Janiszewski |
50e38f |
#endif
|
|
Shinya Kitaoka |
120a6e |
{
|
|
Shinya Kitaoka |
120a6e |
delete[] col2;
|
|
Shinya Kitaoka |
120a6e |
delete[] col1;
|
|
Shinya Kitaoka |
120a6e |
delete[] row1;
|
|
Shinya Kitaoka |
120a6e |
}
|
|
Shinya Kitaoka |
120a6e |
return;
|
|
Shinya Kitaoka |
120a6e |
}
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
row2 = fbuffer;
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
try {
|
|
Shinya Kitaoka |
120a6e |
for (i = 0; i < ly; i++) {
|
|
Shinya Kitaoka |
120a6e |
load_rowRgb<t>(srcRas, row1 + brad, lx, i, brad, bx1, bx2);</t>
|
|
Shinya Kitaoka |
120a6e |
do_filtering_floatRgb<t>(row1 + brad, row2, llx, coeff, coeffq, brad,</t>
|
|
Shinya Kitaoka |
120a6e |
diff, useSSE);
|
|
Shinya Kitaoka |
120a6e |
row2 += llx;
|
|
Shinya Kitaoka |
120a6e |
}
|
|
Shinya Kitaoka |
120a6e |
dstRas->lock();
|
|
Shinya Kitaoka |
120a6e |
buffer = (T *)dstRas->getRawData();
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
if (dy >= 0) buffer += (dstRas->getWrap()) * dy;
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
for (i = (dx >= 0) ? 0 : -dx; i < std::min(llx, dstRas->getLx() - dx);
|
|
Shinya Kitaoka |
120a6e |
i++) {
|
|
Shinya Kitaoka |
120a6e |
load_colRgb(fbuffer, col1 + brad, llx, ly, i, brad, by1, by2);
|
|
Shinya Kitaoka |
120a6e |
do_filtering_chan<t, p="" q,="">(col1 + brad, col2, lly, coeff, coeffq, brad,</t,>
|
|
Shinya Kitaoka |
120a6e |
diff, useSSE);
|
|
Shinya Kitaoka |
120a6e |
store_colRgb<t>(buffer, dstRas->getWrap(), dstRas->getLy(), col2, lly,</t>
|
|
Shinya Kitaoka |
120a6e |
i + dx, dy, 0, blur);
|
|
Shinya Kitaoka |
120a6e |
}
|
|
Shinya Kitaoka |
120a6e |
dstRas->unlock();
|
|
Shinya Kitaoka |
120a6e |
} catch (...) {
|
|
Shinya Kitaoka |
120a6e |
dstRas->clear();
|
|
Shinya Kitaoka |
120a6e |
}
|
|
Toshihiro Shimizu |
890ddd |
|
|
Shinya Kitaoka |
9f5a1b |
#ifdef _WIN32
|
|
Shinya Kitaoka |
120a6e |
if (useSSE) {
|
|
Shinya Kitaoka |
120a6e |
_aligned_free(col2);
|
|
Shinya Kitaoka |
120a6e |
_aligned_free(col1);
|
|
Shinya Kitaoka |
120a6e |
_aligned_free(row1);
|
|
Shinya Kitaoka |
120a6e |
_aligned_free(fbuffer);
|
|
Shinya Kitaoka |
120a6e |
} else
|
|
Toshihiro Shimizu |
890ddd |
#endif
|
|
Shinya Kitaoka |
120a6e |
{
|
|
Shinya Kitaoka |
120a6e |
delete[] col2;
|
|
Shinya Kitaoka |
120a6e |
delete[] col1;
|
|
Shinya Kitaoka |
120a6e |
delete[] row1;
|
|
Shinya Kitaoka |
120a6e |
r1->unlock();
|
|
Shinya Kitaoka |
120a6e |
}
|
|
Toshihiro Shimizu |
890ddd |
}
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
//-------------------------------------------------------------------
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
template <class t=""></class>
|
|
Shinya Kitaoka |
120a6e |
void doBlurGray(TRasterPT<t> &dstRas, TRasterPT<t> &srcRas, double blur, int dx,</t></t>
|
|
Shinya Kitaoka |
120a6e |
int dy) {
|
|
Shinya Kitaoka |
120a6e |
int i, lx, ly, llx, lly, brad;
|
|
Shinya Kitaoka |
120a6e |
float coeff, coeffq, diff;
|
|
Shinya Kitaoka |
120a6e |
int bx1 = 0, by1 = 0, bx2 = 0, by2 = 0;
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
brad = (int)ceil(blur); /* number of pixels involved in the filtering */
|
|
Shinya Kitaoka |
120a6e |
coeff = (float)(blur /
|
|
Shinya Kitaoka |
120a6e |
(brad - brad * brad +
|
|
Shinya Kitaoka |
120a6e |
blur * (2 * brad -
|
|
Shinya Kitaoka |
120a6e |
1))); /*sum of the weights of triangolar filter. */
|
|
Shinya Kitaoka |
120a6e |
coeffq = (float)(coeff / blur);
|
|
Shinya Kitaoka |
120a6e |
diff = (float)(blur - brad);
|
|
Shinya Kitaoka |
120a6e |
lx = srcRas->getLx();
|
|
Shinya Kitaoka |
120a6e |
ly = srcRas->getLy();
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
if ((lx == 0) || (ly == 0)) return;
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
llx = lx + bx1 + bx2;
|
|
Shinya Kitaoka |
120a6e |
lly = ly + by1 + by2;
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
T *row1, *col2, *buffer;
|
|
Shinya Kitaoka |
120a6e |
float *row2, *col1, *fbuffer;
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
TRasterGR8P r1(llx * sizeof(float), ly);
|
|
Shinya Kitaoka |
120a6e |
r1->lock();
|
|
Shinya Kitaoka |
120a6e |
fbuffer = (float *)r1->getRawData(); // new float[llx *ly];
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
row1 = new T[llx + 2 * brad];
|
|
Shinya Kitaoka |
120a6e |
col1 = new float[lly + 2 * brad];
|
|
Shinya Kitaoka |
120a6e |
col2 = new T[lly];
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
if ((!fbuffer) || (!row1) || (!col1) || (!col2)) {
|
|
Shinya Kitaoka |
120a6e |
delete[] row1;
|
|
Shinya Kitaoka |
120a6e |
delete[] col1;
|
|
Shinya Kitaoka |
120a6e |
delete[] col2;
|
|
Shinya Kitaoka |
120a6e |
return;
|
|
Shinya Kitaoka |
120a6e |
}
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
row2 = fbuffer;
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
for (i = 0; i < ly; i++) {
|
|
Shinya Kitaoka |
120a6e |
load_rowGray<t>(srcRas, row1 + brad, lx, i, brad, bx1, bx2);</t>
|
|
Shinya Kitaoka |
120a6e |
do_filtering_channel_float<t>(row1 + brad, row2, llx, coeff, coeffq, brad,</t>
|
|
Shinya Kitaoka |
120a6e |
diff);
|
|
Shinya Kitaoka |
120a6e |
row2 += llx;
|
|
Shinya Kitaoka |
120a6e |
}
|
|
Shinya Kitaoka |
120a6e |
dstRas->lock();
|
|
Shinya Kitaoka |
120a6e |
buffer = (T *)dstRas->getRawData();
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
if (dy >= 0) buffer += (dstRas->getWrap()) * dy;
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
for (i = (dx >= 0) ? 0 : -dx; i < std::min(llx, dstRas->getLx() - dx); i++) {
|
|
Shinya Kitaoka |
120a6e |
load_channel_col32(fbuffer, col1 + brad, llx, ly, i, brad, by1, by2);
|
|
Shinya Kitaoka |
120a6e |
do_filtering_channel_gray<t>(col1 + brad, col2, lly, coeff, coeffq, brad,</t>
|
|
Shinya Kitaoka |
120a6e |
diff);
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
int backlit = 0;
|
|
Shinya Kitaoka |
120a6e |
store_colGray<t>(buffer, dstRas->getWrap(), dstRas->getLy(), col2, lly,</t>
|
|
Shinya Kitaoka |
120a6e |
i + dx, dy, backlit, blur);
|
|
Shinya Kitaoka |
120a6e |
}
|
|
Shinya Kitaoka |
120a6e |
dstRas->unlock();
|
|
Shinya Kitaoka |
120a6e |
delete[] col2;
|
|
Shinya Kitaoka |
120a6e |
delete[] col1;
|
|
Shinya Kitaoka |
120a6e |
delete[] row1;
|
|
Shinya Kitaoka |
120a6e |
r1->unlock(); // delete[]fbuffer;
|
|
Toshihiro Shimizu |
890ddd |
}
|
|
Toshihiro Shimizu |
890ddd |
|
|
Shinya Kitaoka |
120a6e |
}; // namespace
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
//====================================================================
|
|
Toshihiro Shimizu |
890ddd |
|
|
Shinya Kitaoka |
120a6e |
int TRop::getBlurBorder(double blur) {
|
|
Shinya Kitaoka |
120a6e |
int brad = (int)ceil(blur); /* number of pixels involved in the filtering */
|
|
Toshihiro Shimizu |
890ddd |
|
|
Shinya Kitaoka |
120a6e |
int border = brad * 2; // per sicurezza
|
|
Shinya Kitaoka |
120a6e |
return border;
|
|
Toshihiro Shimizu |
890ddd |
}
|
|
Toshihiro Shimizu |
890ddd |
|
|
Toshihiro Shimizu |
890ddd |
//--------------------------------------------------------------------
|
|
Toshihiro Shimizu |
890ddd |
|
|
Shinya Kitaoka |
120a6e |
void TRop::blur(const TRasterP &dstRas, const TRasterP &srcRas, double blur,
|
|
Shinya Kitaoka |
120a6e |
int dx, int dy, bool useSSE) {
|
|
Shinya Kitaoka |
120a6e |
TRaster32P dstRas32 = dstRas;
|
|
Shinya Kitaoka |
120a6e |
TRaster32P srcRas32 = srcRas;
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
if (dstRas32 && srcRas32)
|
|
Shinya Kitaoka |
120a6e |
doBlurRgb<tpixel32, float="" uchar,="">(dstRas32, srcRas32, blur, dx, dy, useSSE);</tpixel32,>
|
|
Shinya Kitaoka |
120a6e |
else {
|
|
Shinya Kitaoka |
120a6e |
TRaster64P dstRas64 = dstRas;
|
|
Shinya Kitaoka |
120a6e |
TRaster64P srcRas64 = srcRas;
|
|
Shinya Kitaoka |
120a6e |
if (dstRas64 && srcRas64)
|
|
Shinya Kitaoka |
120a6e |
doBlurRgb<tpixel64, double="" ushort,="">(dstRas64, srcRas64, blur, dx, dy,</tpixel64,>
|
|
Shinya Kitaoka |
120a6e |
useSSE);
|
|
Shinya Kitaoka |
120a6e |
else {
|
|
Shinya Kitaoka |
120a6e |
TRasterGR8P dstRasGR8 = dstRas;
|
|
Shinya Kitaoka |
120a6e |
TRasterGR8P srcRasGR8 = srcRas;
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
if (dstRasGR8 && srcRasGR8)
|
|
Shinya Kitaoka |
120a6e |
doBlurGray<tpixelgr8>(dstRasGR8, srcRasGR8, blur, dx, dy);</tpixelgr8>
|
|
Shinya Kitaoka |
120a6e |
else {
|
|
Shinya Kitaoka |
120a6e |
TRasterGR16P dstRasGR16 = dstRas;
|
|
Shinya Kitaoka |
120a6e |
TRasterGR16P srcRasGR16 = srcRas;
|
|
Shinya Kitaoka |
120a6e |
|
|
Shinya Kitaoka |
120a6e |
if (dstRasGR16 && srcRasGR16)
|
|
Shinya Kitaoka |
120a6e |
doBlurGray<tpixelgr16>(dstRasGR16, srcRasGR16, blur, dx, dy);</tpixelgr16>
|
|
Shinya Kitaoka |
120a6e |
else
|
|
Shinya Kitaoka |
120a6e |
throw TException("TRop::blur unsupported pixel type");
|
|
Shinya Kitaoka |
120a6e |
}
|
|
Shinya Kitaoka |
120a6e |
}
|
|
Shinya Kitaoka |
120a6e |
}
|
|
Toshihiro Shimizu |
890ddd |
}
|