當前位置：首頁 > 编程资源 > 编程问答 >内容正文

编程问答

TLD（Tracking-Learning-Detection）学习与源码理解之（四）

發(fā)布時間：2025/3/21 编程问答 17 豆豆

生活随笔收集整理的這篇文章主要介紹了 TLD（Tracking-Learning-Detection）学习与源码理解之（四）小編覺得挺不錯的,現(xiàn)在分享給大家,幫大家做個參考.

TLD（Tracking-Learning-Detection）學習與源碼理解之（四）

zouxy09@qq.com

http://blog.csdn.net/zouxy09

??????

?????? 下面是自己在看論文和這些大牛的分析過程中，對代碼進行了一些理解，但是由于自己接觸圖像處理和機器視覺沒多久，另外由于自己編程能力比較弱，所以分析過程可能會有不少的錯誤，希望各位不吝指正。而且，因為編程很多地方不懂，所以注釋得非常亂，還海涵。

run_tld.cpp

#include <opencv2/opencv.hpp> #include <tld_utils.h> #include <iostream> #include <sstream> //c++中的sstream類，提供了程序和string對象之間的I/O，可以通過ostringstream//和instringstream兩個類來聲明對象，分別對應輸出流和輸入流 #include <TLD.h> #include <stdio.h> using namespace cv; using namespace std; //Global variables Rect box; bool drawing_box = false; bool gotBB = false; bool tl = true; bool rep = false; bool fromfile=false; string video;//讀取記錄bounding box的文件，獲得bounding box的四個參數(shù)：左上角坐標x，y和寬高 /*如在\datasets\06_car\init.txt中：記錄了初始目標的bounding box，內(nèi)容如下 142,125,232,164 */ void readBB(char* file){ifstream bb_file (file); //以輸入方式打開文件string line;//istream& getline ( istream& , string& );//將輸入流is中讀到的字符存入str中，終結(jié)符默認為 '\n'（換行符） getline(bb_file, line);istringstream linestream(line); //istringstream對象可以綁定一行字符串，然后以空格為分隔符把該行分隔開來。string x1,y1,x2,y2;//istream& getline ( istream &is , string &str , char delim ); //將輸入流is中讀到的字符存入str中，直到遇到終結(jié)符delim才結(jié)束。getline (linestream,x1, ',');getline (linestream,y1, ',');getline (linestream,x2, ',');getline (linestream,y2, ',');//atoi 功能：把字符串轉(zhuǎn)換成整型數(shù)int x = atoi(x1.c_str());// = (int)file["bb_x"];int y = atoi(y1.c_str());// = (int)file["bb_y"];int w = atoi(x2.c_str())-x;// = (int)file["bb_w"];int h = atoi(y2.c_str())-y;// = (int)file["bb_h"];box = Rect(x,y,w,h); }//bounding box mouse callback //鼠標的響應就是得到目標區(qū)域的范圍，用鼠標選中bounding box。 void mouseHandler(int event, int x, int y, int flags, void *param){switch( event ){case CV_EVENT_MOUSEMOVE:if (drawing_box){box.width = x-box.x;box.height = y-box.y;}break;case CV_EVENT_LBUTTONDOWN:drawing_box = true;box = Rect( x, y, 0, 0 );break;case CV_EVENT_LBUTTONUP:drawing_box = false;if( box.width < 0 ){box.x += box.width;box.width *= -1;}if( box.height < 0 ){box.y += box.height;box.height *= -1;}gotBB = true; //已經(jīng)獲得bounding boxbreak;} }void print_help(char** argv){printf("use:\n %s -p /path/parameters.yml\n",argv[0]);printf("-s source video\n-b bounding box file\n-tl track and learn\n-r repeat\n"); }//分析運行程序時的命令行參數(shù) void read_options(int argc, char** argv, VideoCapture& capture, FileStorage &fs){for (int i=0;i<argc;i++){if (strcmp(argv[i],"-b")==0){if (argc>i){readBB(argv[i+1]); //是否指定初始的bounding boxgotBB = true;}elseprint_help(argv);}if (strcmp(argv[i],"-s")==0){ //從視頻文件中讀取if (argc>i){video = string(argv[i+1]);capture.open(video);fromfile = true;}elseprint_help(argv);}//Similar in format to XML, Yahoo! Markup Language (YML) provides functionality to Open //Applications in a safe and standardized fashion. You include YML tags in the HTML code//of an Open Application.if (strcmp(argv[i],"-p")==0){ //讀取參數(shù)文件parameters.ymlif (argc>i){//FileStorage類的讀取方式可以是：FileStorage fs(".\\parameters.yml", FileStorage::READ); fs.open(argv[i+1], FileStorage::READ);}elseprint_help(argv);}if (strcmp(argv[i],"-no_tl")==0){ //To train only in the first frame (no tracking, no learning)tl = false;}if (strcmp(argv[i],"-r")==0){ //Repeat the video, first time learns, second time detectsrep = true;}} }/* 運行程序時： %To run from camera ./run_tld -p ../parameters.yml %To run from file ./run_tld -p ../parameters.yml -s ../datasets/06_car/car.mpg %To init bounding box from file ./run_tld -p ../parameters.yml -s ../datasets/06_car/car.mpg -b ../datasets/06_car/init.txt %To train only in the first frame (no tracking, no learning) ./run_tld -p ../parameters.yml -s ../datasets/06_car/car.mpg -b ../datasets/06_car/init.txt -no_tl %To test the final detector (Repeat the video, first time learns, second time detects) ./run_tld -p ../parameters.yml -s ../datasets/06_car/car.mpg -b ../datasets/06_car/init.txt -r */ //感覺就是對起始幀進行初始化工作，然后逐幀讀入圖片序列，進行算法處理。 int main(int argc, char * argv[]){VideoCapture capture;capture.open(0);//OpenCV的C++接口中，用于保存圖像的imwrite只能保存整數(shù)數(shù)據(jù)，且需作為圖像格式。當需要保存浮//點數(shù)據(jù)或XML/YML文件時，OpenCV的C語言接口提供了cvSave函數(shù)，但這一函數(shù)在C++接口中已經(jīng)被刪除。//取而代之的是FileStorage類。FileStorage fs;//Read optionsread_options(argc, argv, capture, fs); //分析命令行參數(shù)//Init cameraif (!capture.isOpened()){cout << "capture device failed to open!" << endl;return 1;}//Register mouse callback to draw the bounding boxcvNamedWindow("TLD",CV_WINDOW_AUTOSIZE);cvSetMouseCallback( "TLD", mouseHandler, NULL ); //用鼠標選中初始目標的bounding box//TLD frameworkTLD tld;//Read parameters filetld.read(fs.getFirstTopLevelNode());Mat frame;Mat last_gray;Mat first;if (fromfile){ //如果指定為從文件讀取capture >> frame; //讀當前幀cvtColor(frame, last_gray, CV_RGB2GRAY); //轉(zhuǎn)換為灰度圖像frame.copyTo(first); //拷貝作為第一幀}else{ //如果為讀取攝像頭，則設(shè)置獲取的圖像大小為320x240 capture.set(CV_CAP_PROP_FRAME_WIDTH,340); //340？？capture.set(CV_CAP_PROP_FRAME_HEIGHT,240);}///Initialization GETBOUNDINGBOX: //標號：獲取bounding boxwhile(!gotBB){if (!fromfile){capture >> frame;}elsefirst.copyTo(frame);cvtColor(frame, last_gray, CV_RGB2GRAY);drawBox(frame,box); //把bounding box 畫出來imshow("TLD", frame);if (cvWaitKey(33) == 'q')return 0;}//由于圖像片（min_win 為15x15像素）是在bounding box中采樣得到的，所以box必須比min_win要大if (min(box.width, box.height)<(int)fs.getFirstTopLevelNode()["min_win"]){cout << "Bounding box too small, try again." << endl;gotBB = false;goto GETBOUNDINGBOX;}//Remove callbackcvSetMouseCallback( "TLD", NULL, NULL ); //如果已經(jīng)獲得第一幀用戶框定的box了，就取消鼠標響應printf("Initial Bounding Box = x:%d y:%d h:%d w:%d\n",box.x,box.y,box.width,box.height);//Output fileFILE *bb_file = fopen("bounding_boxes.txt","w");//TLD initializationtld.init(last_gray, box, bb_file);///Run-timeMat current_gray;BoundingBox pbox;vector<Point2f> pts1;vector<Point2f> pts2;bool status=true; //記錄跟蹤成功與否的狀態(tài) lastbox been foundint frames = 1; //記錄已過去幀數(shù)int detections = 1; //記錄成功檢測到的目標box數(shù)目REPEAT:while(capture.read(frame)){//get framecvtColor(frame, current_gray, CV_RGB2GRAY);//Process Frametld.processFrame(last_gray, current_gray, pts1, pts2, pbox, status, tl, bb_file);//Draw Pointsif (status){ //如果跟蹤成功drawPoints(frame,pts1);drawPoints(frame,pts2,Scalar(0,255,0)); //當前的特征點用藍色點表示drawBox(frame,pbox);detections++;}//Displayimshow("TLD", frame);//swap points and imagesswap(last_gray, current_gray); //STL函數(shù)swap()用來交換兩對象的值。其泛型化版本定義于<algorithm>;pts1.clear();pts2.clear();frames++;printf("Detection rate: %d/%d\n", detections, frames);if (cvWaitKey(33) == 'q')break;}if (rep){rep = false;tl = false;fclose(bb_file);bb_file = fopen("final_detector.txt","w");//capture.set(CV_CAP_PROP_POS_AVI_RATIO,0);capture.release();capture.open(video);goto REPEAT;}fclose(bb_file);return 0; }

tld_utils.cpp

#include <tld_utils.h> using namespace cv; using namespace std;/*vector是C++標準模板庫STL中的部分內(nèi)容，它是一個多功能的，能夠操作多種數(shù)據(jù)結(jié)構(gòu)和算法的模板類和函數(shù)庫。vector之所以被認為是一個容器，是因為它能夠像容器一樣存放各種類型的對象，簡單地說，vector是一個能夠存放任意類型的動態(tài)數(shù)組，能夠增加和壓縮數(shù)據(jù)。為了可以使用vector，必須在你的頭文件中包含下面的代碼： #include <vector> vector屬于std命名域的，因此需要通過命名限定，如下完成你的代碼： using std::vector; */void drawBox(Mat& image, CvRect box, Scalar color, int thick){rectangle( image, cvPoint(box.x, box.y), cvPoint(box.x+box.width,box.y+box.height),color, thick); } //函數(shù) cvRound, cvFloor, cvCeil 用一種舍入方法將輸入浮點數(shù)轉(zhuǎn)換成整數(shù)。 //cvRound 返回和參數(shù)最接近的整數(shù)值。 cvFloor 返回不大于參數(shù)的最大整數(shù)值。 //cvCeil 返回不小于參數(shù)的最小整數(shù)值。 void drawPoints(Mat& image, vector<Point2f> points,Scalar color){for( vector<Point2f>::const_iterator i = points.begin(), ie = points.end(); i != ie; ++i ){Point center( cvRound(i->x ), cvRound(i->y)); //類似于int i(3)的初始化，但center為何沒用到？circle(image,*i,2,color,1);} }Mat createMask(const Mat& image, CvRect box){Mat mask = Mat::zeros(image.rows,image.cols,CV_8U);drawBox(mask,box,Scalar::all(255),CV_FILLED);return mask; }//STL中的nth_element()方法找出一個數(shù)列中排名第n的那個數(shù)。 //對于序列a[0:len-1]將第n大的數(shù)字，排在a[n],同時a[0:n-1]都小于a[n],a[n+1:]都大于a[n], //但a[n]左右的這兩個序列不一定有序。 //用在中值流跟蹤算法中，尋找中值 float median(vector<float> v) {int n = floor(v.size() / 2);nth_element(v.begin(), v.begin()+n, v.end());return v[n]; }//<algorithm> //random_shuffle的頭文件 //shuffle 洗牌首先簡單的介紹一個撲克牌洗牌的方法，假設(shè)一個數(shù)組 poker[52] 中存有一副撲克 //牌1-52的牌點值，使用一個for循環(huán)遍歷這個數(shù)組，每次循環(huán)都生成一個[0，52)之間的隨機數(shù)RandNum， //以RandNum為數(shù)組下標，把當前下標對應的值和RandNum對應位置的值交換，循環(huán)結(jié)束，每個牌都與某個 //位置交換了一次，這樣一副牌就被打亂了。理解代碼如下： /* for (int i = 0; i < 52; ++i) { int RandNum = rand() % 52; int tmp = poker[i]; poker[i] = poker[RandNum]; poker[RandNum] = tmp; } */ //需要指定范圍內(nèi)的隨機數(shù)，傳統(tǒng)的方法是使用ANSI C的函數(shù)random(),然后格式化結(jié)果以便結(jié)果是落在 //指定的范圍內(nèi)。但是，使用這個方法至少有兩個缺點。做格式化時，結(jié)果常常是扭曲的,且只支持整型數(shù)。 //C++中提供了更好的解決方法，那就是STL中的random_shuffle()算法。產(chǎn)生指定范圍內(nèi)的隨機元素集的最佳方法 //是創(chuàng)建一個順序序列（也就是向量或者內(nèi)置數(shù)組），在這個順序序列中含有指定范圍的所有值。 //例如，如果你需要產(chǎn)生100個0-99之間的數(shù)，那么就創(chuàng)建一個向量并用100個按升序排列的數(shù)填充向量. //填充完向量之后，用random_shuffle()算法打亂元素排列順序。 //默認的random_shuffle中, 被操作序列的index 與 rand() % N 兩個位置的值交換，來達到亂序的目的。 //index_shuffle()用于產(chǎn)生指定范圍[begin:end]的隨機數(shù)，返回隨機數(shù)數(shù)組 vector<int> index_shuffle(int begin,int end){vector<int> indexes(end-begin);for (int i=begin;i<end;i++){indexes[i]=i;}random_shuffle(indexes.begin(),indexes.end());return indexes; }

總結(jié)

以上是生活随笔為你收集整理的TLD（Tracking-Learning-Detection）学习与源码理解之（四）的全部內(nèi)容，希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網(wǎng)站內(nèi)容還不錯，歡迎將生活随笔推薦給好友。

上一篇： TLD（Tracking-Learnin
下一篇： TLD（Tracking-Learnin