說是surf,其實(shí)原算法是基于Notes on the OpenSURF Library這篇文檔。
下載地址:http://opensurf1./files/OpenSURFcpp.zip
首先定義 #define PROCEDURE 1
緊接著進(jìn)入main()主函數(shù):
int
main( void )
{
if (PROCEDURE == 1)
return mainImage();
if (PROCEDURE == 2)
return mainVideo();
if (PROCEDURE == 3)
return mainMatch();
if (PROCEDURE == 4)
return mainMotionPoints();
if (PROCEDURE == 5)
return mainStaticMatch();
if (PROCEDURE == 6)
return mainKmeans();
}
|
我們以監(jiān)測靜態(tài)圖像特征點(diǎn)(即1)為例了解surf算法如何提取特征點(diǎn)。
int
mainImage(void)
{
IpVec ipts;
IplImage
*img=cvLoadImage( "imgs/sf.jpg" );
clock_t start
= clock();
surfDetDes(img, ipts, false , 5, 4, 2,
0.0004f);
clock_t end =
clock();
std::cout<< "OpenSURF found: " << ipts.size() << " interest points" << std::endl;
std::cout<< "OpenSURF took: " << float(end - start) / CLOCKS_PER_SEC <<
" seconds" << std::endl;
drawIpoints(img, ipts);
showImage(img);
return 0;
}
|
EXP:
IpVec的定義在ipoint.h里,typedef std::vector<Ipoint> IpVec;
也就是說,IpVec是一個vector向量,每個成員是一個Ipoint。而Ipoint是一個類,也在ipoint.h里,作者將它按照結(jié)構(gòu)體使用,都是public。
clock()的使用是為了測試程序運(yùn)行的時間,一個記錄起始時間,一個記錄結(jié)束時間,最終將兩者只差輸出,即surfDetDes()特征提取所需要的時間。
drawIpoints()函數(shù)是在img基礎(chǔ)上增加特征點(diǎn)的描述,說白了,就是添加特征點(diǎn)的函數(shù)。
那么,現(xiàn)在進(jìn)入到surfDetDes()函數(shù)內(nèi)部來探個究竟吧。
surfDetDes定義在surflib.h里面:
inline void
surfDetDes(IplImage *img,
std::vector<Ipoint> &ipts,
bool upright = false ,
int octaves = OCTAVES,
int intervals = INTERVALS,
int init_sample = INIT_SAMPLE,
float thres = THRES )
{
IplImage
*int_img = Integral(img);
FastHessian
fh(int_img, ipts, octaves, intervals, init_sample, thres);
fh.getIpoints();
Surf
des(int_img, ipts);
des.getDescriptors(upright);
cvReleaseImage(&int_img);
}
|
總的來說,surfDetDes()里面規(guī)劃了具體的步驟:
1.獲取積分圖像init_img;
2.計算Hessian矩陣特征fh;
3.利用fh提取特征點(diǎn);
4.創(chuàng)建surf特征描述符,并和特征點(diǎn)貫聯(lián);
5.釋放積分圖像。
①積分圖像的實(shí)現(xiàn)
首先在Integral.cpp里面找到Integral(),如下:
IplImage
*Integral(IplImage *source)
{
IplImage *img
= getGray(source);
IplImage
*int_img = cvCreateImage(cvGetSize(img), IPL_DEPTH_32F, 1);
int height =
img->height;
int width =
img->width;
int step =
img->widthStep/sizeof(float);
float *data
= (float *) img->imageData;
float *i_data
= (float *) int_img->imageData;
float rs =
0.0f;
for (int j=0;
j<width; j++)
{
rs +=
data[j];
i_data[j] =
rs;
}
for (int i=1;
i<height; ++i)
{
rs = 0.0f;
for (int j=0;
j<width; ++j)
{
rs +=
data[i*step+j];
i_data[i*step+j] = rs + i_data[(i-1)*step+j];
}
}
cvReleaseImage(&img);
return int_img;
}
|
1. 首先將原輸入轉(zhuǎn)化為灰度圖像,并創(chuàng)建一個大小等于灰度圖像gray-image的圖像數(shù)組--積分圖像int_img。
2. 獲取圖像的信息,比如大?。ǜ遠(yuǎn)eight和寬width)以及gray-image和積分圖像int_img的數(shù)據(jù)首地址data
&& i_data。(注意此時數(shù)據(jù)類型為float)
3. 首先計算第一行像素的積分值,相當(dāng)于一維數(shù)據(jù)的累加。其他數(shù)據(jù)通過迭代計算獲取,i_data[i*step+j] = rs +
i_data[(i-1)*step+j],若當(dāng)前點(diǎn)為(i0,j0),其中rs就為第(i+1)行(即i=i0)行j<=j0之前所有像素值和。 如下所示:
[其中黑色為當(dāng)前點(diǎn)i_data[i*step+j],綠色為i_data[(i-1)*step+j],而rs=橫放著的和黑色點(diǎn)同行的那塊矩形框?qū)?yīng)的區(qū)域像素值之和]
4. 釋放灰度圖像,并返回積分圖像。
相關(guān)函數(shù)integral.h中的BoxIntegral().
inline float BoxIntegral(IplImage *img, int row, int col, int rows, int
cols)
其中,幾個參數(shù)意思分別為源圖像,row,col為A點(diǎn)的坐標(biāo)值,rows和cols分別為高和寬。
利用上面的積分圖像計算 A B
這樣的box區(qū)域里面所有像素點(diǎn)的灰度值之和。S=int_img(D)+int_img(A)-int_img(B)-int_img(C).
C D
②Hessian矩陣特征的計算
FastHessian,計算hessian矩陣的類,它的定義在fasthessian.h里,實(shí)現(xiàn)在fasthessian.cpp里。
class
FastHessian {
public:
FastHessian(std::vector<Ipoint> &ipts,
const int octaves = OCTAVES,
const int intervals = INTERVALS,
const int init_sample = INIT_SAMPLE,
const float thres = THRES);
FastHessian(IplImage *img,
std::vector<Ipoint> &ipts,
const int octaves = OCTAVES,
const int intervals = INTERVALS,
const int init_sample = INIT_SAMPLE,
const float thres = THRES);
~FastHessian();
void
saveParameters(const int octaves,
const int intervals,
const int init_sample,
const float thres);
void
setIntImage(IplImage *img);
void
getIpoints();
private:
void
buildResponseMap();
void
buildResponseLayer(ResponseLayer *r);
int
isExtremum(int r, int c, ResponseLayer *t, ResponseLayer *m, ResponseLayer
*b);
void
interpolateExtremum(int r, int c, ResponseLayer *t, ResponseLayer *m,
ResponseLayer *b);
void
interpolateStep(int r, int c, ResponseLayer *t, ResponseLayer *m, ResponseLayer
*b,
double* xi, double* xr, double* xc );
CvMat*
deriv3D(int r, int c, ResponseLayer *t, ResponseLayer *m, ResponseLayer *b);
CvMat*
hessian3D(int r, int c, ResponseLayer *t, ResponseLayer *m, ResponseLayer *b);
IplImage
*img;
int i_width,
i_height;
std::vector<Ipoint> &ipts;
std::vector<ResponseLayer *> responseMap;
int octaves;
int
intervals;
int
init_sample;
float
thresh;
};
|
在public里面定義了兩種構(gòu)造函數(shù)分別對應(yīng)有無源圖像這一項(xiàng)參數(shù),緊接著還定義了析構(gòu)函數(shù)~FastHessian等函數(shù)。下面在fasthessian.cpp對這些函數(shù)的實(shí)現(xiàn)一一解釋:
兩個構(gòu)造函數(shù)都是調(diào)用了saveParameters(octaves, intervals, init_sample,
thresh)設(shè)置構(gòu)造金字塔的參數(shù),而帶圖像的構(gòu)造函數(shù)另外多加了一句setIntImage(img)用來設(shè)置當(dāng)前圖像。
void
FastHessian::saveParameters(const int octaves, const int intervals,
const int init_sample, const float thresh)
{
this ->octaves =
(octaves
> 0 && octaves <= 4 ? octaves : OCTAVES);
this ->intervals =
(intervals
> 0 && intervals <= 4 ? intervals : INTERVALS);
this ->init_sample =
(init_sample
> 0 && init_sample <= 6 ? init_sample : INIT_SAMPLE);
this ->thresh
= (thresh >= 0 ? thresh : THRES);
}
void
FastHessian::setIntImage(IplImage *img)
{
this ->img =
img;
i_height =
img->height;
i_width =
img->width;
}
|
由于在h頭文件中已設(shè)置
static
const int OCTAVES = 5;
static
const int INTERVALS = 4;
static
const float THRES = 0.0004f;
static
const int INIT_SAMPLE = 2;
|
所以 saveParameters的作用就是調(diào)整參數(shù),以防過大或過小。
FastHessian::getIpoints()提取興趣點(diǎn):
void
FastHessian::getIpoints()
{
static const
int filter_map [OCTAVES][INTERVALS] = {{0,1,2,3}, {1,3,4,5}, {3,5,6,7},
{5,7,8,9}, {7,9,10,11}};
ipts.clear();
buildResponseMap();
...<BR>
...
}
|
首先初始化filter_map,清空標(biāo)記特征點(diǎn)的ipts結(jié)構(gòu)體。
創(chuàng)建高斯平滑層函數(shù)參數(shù)ResponseMap(),大小與論文所給完全一致,
// Oct1: 9, 15, 21, 27 // Oct2: 15, 27, 39, 51 // Oct3: 27, 51, 75, 99 // Oct4: 51, 99, 147,195 // Oct5: 99, 195,291,387
這些都是每組模板的大小,每組間隔遞增,6,12,24,48,96
。ResponseMap這個結(jié)構(gòu)體向量包含4個參數(shù)ResponseLayer(int width, int height, int step, int
filter)定義在responsemap.h里面,其中width和height等于實(shí)際圖像大小除以step(step初始值為2),而filter則是濾波器半徑。
然后使用buildResponseLayer(responseMap[i])對圖像處理后將數(shù)據(jù)存放在responses和laplacian兩個數(shù)組里面。
void
FastHessian::buildResponseLayer(ResponseLayer *rl)
{
float
*responses = rl->responses;
unsigned char
*laplacian = rl->laplacian;
int step =
rl->step;
int b =
(rl->filter - 1) / 2;
int l =
rl->filter / 3;
int w =
rl->filter;
float
inverse_area = 1.f/(w*w);
float Dxx,
Dyy, Dxy;
for (int r, c,
ar = 0, index = 0; ar < rl->height; ++ar)
{
for (int ac = 0;
ac < rl->width; ++ac, index++)
{
r = ar *
step;
c = ac *
step;
Dxx =
BoxIntegral(img, r - l + 1, c - b, 2*l - 1, w)
-
BoxIntegral(img, r - l + 1, c - l / 2, 2*l - 1, l)*3;
Dyy =
BoxIntegral(img, r - b, c - l + 1, w, 2*l - 1)
-
BoxIntegral(img, r - l / 2, c - l + 1, l, 2*l - 1)*3;
Dxy = +
BoxIntegral(img, r - l, c + 1, l, l)
+
BoxIntegral(img, r + 1, c - l, l, l)
-
BoxIntegral(img, r - l, c - l, l, l)
-
BoxIntegral(img, r + 1, c + 1, l, l);
Dxx *=
inverse_area;
Dyy *=
inverse_area;
Dxy *=
inverse_area;
responses[index] = (Dxx * Dyy - 0.81f * Dxy * Dxy);
laplacian[index] = (Dxx + Dyy >= 0 ? 1 :
0);
#ifdef RL_DEBUG
rl->coords.push_back(std::make_pair<int,int>(r,c));
#endif
}
}
}
|
其中計算Dxy和Dyy的示意圖如下,其他的Dxx(Dyy的轉(zhuǎn)置)讀者自己參考?!敬藭rw=9,l=9/3=3,對應(yīng)于右圖的總計算區(qū)域高度和寬度2*l-1】

圓點(diǎn)為當(dāng)前點(diǎn),將紅色方形區(qū)域1內(nèi)的積分值減去綠色方形2區(qū)域內(nèi)的積分值=Dxy*w2
綠色方形區(qū)域2內(nèi)的積分值減去2*紅色色方形區(qū)域1內(nèi)的積分值=Dyy*w2 (==用一整塊區(qū)域-3*紅色區(qū)域)
最后將計算后的結(jié)果存放到ResponseLayer里面的response和laplacian一維數(shù)組里,數(shù)組的大小即為ResponseLayer->width
* ResponseLayer->width。
這樣就計算出了每一層的所有像素點(diǎn)的det(Happrox)=Dxx*Dyy-(0.9*Dxy)2,下面開始判斷當(dāng)前點(diǎn)是否是極值點(diǎn)。
③根據(jù)上一步計算每層的每個點(diǎn)的det(Happrox),判斷極值點(diǎn)。
在fasthessian.cpp里面找到getIpoints(),下面開始抽取每組(共octaves組)相鄰的3層(共有4層,每組有2個這樣層的滿足):
ResponseLayer *b, *m, *t;
for
(int o = 0; o < octaves; ++o) for (int i = 0;
i <= 1; ++i)
{
b =
responseMap.at(filter_map[o][i]);
m =
responseMap.at(filter_map[o][i+1]);
t =
responseMap.at(filter_map[o][i+2]);
for
(int r = 0; r < t->height; ++r)
{
for (int c = 0;
c < t->width; ++c)
{
if (isExtremum(r, c, t, m, b))
{
interpolateExtremum(r, c, t, m, b);
}
}
}
}
|
在判斷的時候用到了isExtremum()和interpolateExtremum()子函數(shù),在當(dāng)前cpp內(nèi)找到它們,并分析。
int
FastHessian::isExtremum(int r, int c, ResponseLayer *t, ResponseLayer *m,
ResponseLayer *b)
{
int
layerBorder = (t->filter + 1) / (2 * t->step);
if
(r <= layerBorder || r >= t->height -
layerBorder || c <= layerBorder || c >= t->width - layerBorder)
return 0;
float
candidate = m->getResponse(r, c, t);
if
(candidate < thresh)
return 0;
for
(int rr = -1; rr <=1; ++rr)
{
for
(int cc = -1; cc <=1; ++cc)
{
if (
t->getResponse(r+rr, c+cc) >= candidate ||
((rr !=
0 || cc != 0) && m->getResponse(r+rr, c+cc, t) >= candidate) ||
b->getResponse(r+rr, c+cc, t) >= candidate
)
return 0;
}
}
return 1;
}
|
大家務(wù)必注意,由于各層大小不一致,顧在比較大小的時候,要統(tǒng)一到統(tǒng)一尺度下,在getResponse()里面有所體現(xiàn),即先計算兩者尺度之比,比如尺度之比為2,最上層當(dāng)前點(diǎn)為(20,25),那么需要比較的緊鄰下層點(diǎn)為(40,50)而不是下層的(20,25)。再看若當(dāng)前點(diǎn)為極值點(diǎn),那么調(diào)用interpolateExtremum():
void
FastHessian::interpolateExtremum(int r, int c, ResponseLayer *t, ResponseLayer
*m, ResponseLayer *b)
{
int filterStep
= (m->filter - b->filter);
assert(filterStep > 0 && t->filter -
m->filter == m->filter - b->filter);
double xi = 0,
xr = 0, xc = 0;
interpolateStep(r, c, t, m, b, &xi, &xr,
&xc );
if ( fabs( xi )
< 0.5f && fabs( xr ) < 0.5f && fabs( xc ) < 0.5f )
{
Ipoint ipt;
ipt.x =
static_cast<float>((c + xc) * t->step);
ipt.y =
static_cast<float>((r + xr) * t->step);
ipt.scale =
static_cast<float>((0.1333f) * (m->filter + xi * filterStep));
ipt.laplacian =
static_cast<int>(m->getLaplacian(r,c,t));
ipts.push_back(ipt);
}
}
|
本函數(shù)實(shí)現(xiàn)功能,利用插值精確計算極值點(diǎn)在原圖像中的位置,并保存在ipt中。
打開interpolateStep,其中deriv3D是求當(dāng)前點(diǎn)的3的方向上的一階導(dǎo),hessian3D是求當(dāng)前點(diǎn)的3維hessian二階導(dǎo),最后計算出3個方向的偏差值xi,xr,xc
.
void
FastHessian::interpolateStep(int r, int c, ResponseLayer *t, ResponseLayer *m,
ResponseLayer *b,
double* xi, double* xr, double* xc )
{
CvMat* dD, *
H, * H_inv, X;
double x[3] =
{ 0 };
dD = deriv3D(
r, c, t, m, b );
H = hessian3D(
r, c, t, m, b );
H_inv =
cvCreateMat( 3, 3, CV_64FC1 );
cvInvert( H,
H_inv, CV_SVD );
cvInitMatHeader( &X, 3, 1, CV_64FC1, x, CV_AUTOSTEP
);
cvGEMM( H_inv,
dD, -1, NULL, 0, &X, 0 );
cvReleaseMat(
&dD );
cvReleaseMat(
&H );
cvReleaseMat(
&H_inv );
*xi = x[2];
*xr = x[1];
*xc = x[0];
}
|
下面開始在特征點(diǎn)周圍提取特征描述符
④ 提取特征描述符
轉(zhuǎn)到surf.cpp里尋找getDescriptors(),由于upright初始化設(shè)置為false(為特征點(diǎn)分配主方向,并旋轉(zhuǎn)找到鄰域提取特征描述符),若為true,則直接提取鄰域特征描述符。
void
Surf::getDescriptors(bool upright)
{
if
(!ipts.size()) return ;
int ipts_size
= (int)ipts.size();
if
(upright)
{
for
(int i = 0; i < ipts_size; ++i)
{
index = i;
getDescriptor( true );
}
}
else
{
for
(int i = 0; i < ipts_size; ++i)
{
index = i;
getOrientation();
getDescriptor( false );
}
}
}
|
其實(shí)兩者區(qū)別在于提取特征描述符之前判斷upright的時候是否需要多調(diào)用一句getOrientation()就是了。前者不考慮旋轉(zhuǎn),兩幅圖只是尺度有異,而后者還考慮了旋轉(zhuǎn)不變性,更加全面。
幾個地方:
1.如論文提出的采取r=6的圓域,共計包含109個像素點(diǎn),大家可以算算,在此不多解釋了。
2.像素點(diǎn)的方向由harrx和harry計算得到,相當(dāng)于梯度公式里面的dx和dy,然后利用getAngle得到θ(分布在0~2∏,而不是簡單的tan-1(harrx/harry)取值在0~∏)。
inline
float Surf::haarX(int row, int column, int s)
{
return BoxIntegral(img, row-s/2, column, s, s/2)
-1 *
BoxIntegral(img, row-s/2, column-s/2, s, s/2);
}
inline
float Surf::haarY(int row, int column, int s)
{
return BoxIntegral(img, row, column-s/2, s/2, s)
-1 *
BoxIntegral(img, row-s/2, column-s/2, s/2, s);
}
float
Surf::getAngle(float X, float Y)
{
if (X > 0
&& Y >= 0)
return atan(Y/X);
if (X < 0
&& Y >= 0)
return pi -
atan(-Y/X);
if (X < 0
&& Y < 0)
return pi +
atan(Y/X);
if (X > 0
&& Y < 0)
return 2*pi -
atan(-Y/X);
return 0;
}
|
圖示如右: (黑色代表-1,白色為1,即白色區(qū)域積分值減去黑色區(qū)域積分值)
在getOrienation()里面resx和resy分別是上面計算出來的harrx和harry分別乘上高斯模板系數(shù)gauss25。
void
Surf::getOrientation()
{
......
for (ang1 = 0;
ang1 < 2*pi; ang1+=0.15f) {
ang2 = (
ang1+pi/3.0f > 2*pi ? ang1-5.0f*pi/3.0f : ang1+pi/3.0f);
sumX = sumY
= 0.f;
for (unsigned
int k = 0; k < Ang.size(); ++k)
{
const
float & ang = Ang[k];
if (ang1 <
ang2 && ang1 < ang && ang < ang2)
{
sumX+=resX[k];
sumY+=resY[k];
}
else if (ang2 <
ang1 &&
((ang
> 0 && ang < ang2) || (ang > ang1 && ang < 2*pi) ))
{
sumX+=resX[k];
sumY+=resY[k];
}
}
if
(sumX*sumX + sumY*sumY > max)
{
max =
sumX*sumX + sumY*sumY;
orientation = getAngle(sumX, sumY);
}
}
ipt->orientation = orientation;
}
|
好了,終于要開始提取特征描述符了哈~.~
void
Surf::getDescriptor(bool bUpright)
{
int y, x,
sample_x, sample_y, count=0;
int i = 0, ix
= 0, j = 0, jx = 0, xs = 0, ys = 0;
float scale,
*desc, dx, dy, mdx, mdy, co, si;
float gauss_s1
= 0.f, gauss_s2 = 0.f;
float rx =
0.f, ry = 0.f, rrx = 0.f, rry = 0.f, len = 0.f;
float cx =
-0.5f, cy = 0.f;
Ipoint *ipt =
&ipts[index];
scale =
ipt->scale;
x =
fRound(ipt->x);
y =
fRound(ipt->y);
desc =
ipt->descriptor;
if
(bUpright)
{
co = 1;
si = 0;
}
else
{
co =
cos(ipt->orientation);
si =
sin(ipt->orientation);
}
i = -8;
while (i <
12)
{
j = -8;
i = i-4;
cx += 1.f;
cy = -0.5f;
while (j <
12)
{
dx=dy=mdx=mdy=0.f;
cy += 1.f;
j = j - 4;
ix = i +
5;
jx = j +
5;
xs =
fRound(x + ( -jx*scale*si + ix*scale*co));
ys =
fRound(y + ( jx*scale*co + ix*scale*si));
for (int k = i;
k < i + 9; ++k)
{
for (int l = j;
l < j + 9; ++l)
{
sample_x = fRound(x + (-l*scale*si + k*scale*co));
sample_y = fRound(y + ( l*scale*co + k*scale*si));
gauss_s1 =
gaussian(xs-sample_x,ys-sample_y,2.5f*scale);
rx =
haarX(sample_y, sample_x, 2*fRound(scale));
ry =
haarY(sample_y, sample_x, 2*fRound(scale));
rrx =
gauss_s1*(-rx*si + ry*co);
rry =
gauss_s1*(rx*co + ry*si);
dx +=
rrx;
dy +=
rry;
mdx +=
fabs(rrx);
mdy +=
fabs(rry);
}
}
gauss_s2 =
gaussian(cx-2.0f,cy-2.0f,1.5f);
desc[count++] = dx*gauss_s2;
desc[count++] = dy*gauss_s2;
desc[count++] = mdx*gauss_s2;
desc[count++] = mdy*gauss_s2;
len +=
(dx*dx + dy*dy + mdx*mdx + mdy*mdy) * gauss_s2*gauss_s2;
j += 9;
}
i += 9;
}
len =
sqrt(len);
for (int i = 0;
i < 64; ++i)
desc[i] /=
len;
}
|
其中i和j分別取的值為-8,-3,2,7,很明顯i,j確定的鄰域?yàn)?-(-8)+1=16,16x16的鄰域,旋轉(zhuǎn)對應(yīng)的在原圖像的點(diǎn)位置為
xs = fRound(x + ( -jx*scale*si +
ix*scale*co)); ys = fRound(y + ( jx*scale*co +
ix*scale*si));
co = cos(ipt->orientation);
si = sin(ipt->orientation);
【ipt->orientation為特征點(diǎn)的方向角】
共有
4x4個子塊(9x9=81個像素點(diǎn)),每個子塊分別計算了其中16個dx,dy,|dx|,|dy|之和(當(dāng)然還要考慮高斯濾波權(quán)重系數(shù)),則最終的特征描述符為4x4x4=64維向量。
main.cpp內(nèi)mainImage函數(shù)內(nèi)部drawIpoints(img, ipts)就不用再做解釋了吧。
搞了一天,終于搞完了~~
來個截圖~~

謝謝大家~
轉(zhuǎn)載請注明blue_lghttp://www.cnblogs.com/blue-lg/archive/2012/07/20/2600436.html
|