基于开源TiRG的文本检测与提取实现
生活随笔
收集整理的這篇文章主要介紹了
基于开源TiRG的文本检测与提取实现
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
????????在?http://funkybee.narod.ru/ ?中作者給出了文本檢測和提取的實現,僅有一個.hpp文件,為了在windows上編譯通過,這里簡單進行了改動,改動后的code如下:
#include <math.h>
#include <stdio.h>
#include <algorithm>
#include <vector>
#include <set>namespace trg {
//---------------------------------------------------------------------------------------------
//---------------------------------------------------------------------------------------------
typedef unsigned char UC;
//---------------------------------------------------------------------------------------------
const char d8[8][2] = {{1,1},{1,0},{1,-1},{0,-1},{-1,-1},{-1,0},{-1,1},{0,1}};
//---------------------------------------------------------------------------------------------
struct Rgb {UC r, g, b;};
struct Rect {int x1, y1, x2, y2;};
struct Dot {int x, y;};
int w;
int h;
//---------------------------------------------------------------------------------------------
std::vector<std::vector<UC> > lum;
std::vector<std::vector<UC> > cyx;
std::vector<std::vector<char> > nei;
std::vector<std::vector<short> > b;
std::vector<Rect> res;
//---------------------------------------------------------------------------------------------
const int dw = 24;
const int dh = 24;
const int h_min = 6;
const int h_max = 66;
const int ns_min = 2;
const double pl_min = 0.5;
//---------------------------------------------------------------------------------------------
//---------------------------------------------------------------------------------------------void print_b(const char* path) {freopen(path, "wt", stdout);for (int y = 0; y < h; ++y) {for (int x = 0; x < w; ++x)printf("%c", (b[y][x] ? (char)(b[y][x] + 48) : ' '));putchar('\n');}
}//---------------------------------------------------------------------------------------------void calc_lums(const std::vector<std::vector<Rgb> > &raw) {lum.assign(h, std::vector<UC>(w));cyx.assign(h, std::vector<UC>(w));int x, y;int zh = h / 1;int zw = w / 3;int y1 = 0;int y2 = y1 + zh;while (y1 < h) {int x1 = 0;int x2 = x1 + zw;while (x1 < w) {int sm = 0;for (y = y1; y < std::min(y2, h); ++y) {for (x = x1; x < std::min(x2, w); ++x) {Rgb px = raw[y][x];// ITU-R 601-2 luma transform:lum[y][x] = (UC)(px.r * 0.299 + px.g * 0.587 + px.b * 0.114);sm += lum[y][x];}}double sr = sm * 1.0 / (std::min(y2, h) - y1) / (std::min(x2, w) - x1);double cc = 0;for (y = y1; y < std::min(y2, h); ++y)for (x = x1; x < std::min(x2, w); ++x)cc += fabs(sr - lum[y][x]);cc = cc / (std::min(y2, h) - y1) / (std::min(x2, w) - x1);for (y = y1; y < std::min(y2, h); ++y)for (x = x1; x < std::min(x2, w); ++x)cyx[y][x] = (UC)std::max(35.0, cc);x1 = x2;x2 = x1 + zw;}y1 = y2;y2 = y1 + zh;}
}//---------------------------------------------------------------------------------------------void count_neis() {nei.clear();nei.resize(h);for (int i = 0; i < h; ++i) nei[i].resize(w, 0);std::set<char> t, t1, t2;t1.insert(6); t1.insert(7); t1.insert(0);t2.insert(7); t2.insert(0); t2.insert(1);for (int i = 1; i < h - 1; ++i) {for (int j = 1; j < w - 1; ++j) {char y = 0;t.clear();for (size_t k = 0; k < 8; ++k) {if (abs((int)lum[i][j] - lum[i + d8[k][0]][j + d8[k][1]]) > cyx[i][j]) {++y;t.insert(k);}}if (y == 3) {if (*(t.begin()) + (y - 1) == *(--t.end()) || t == t1 || t == t2)nei[i][j] = y;elsenei[i][j] = -y;}elsenei[i][j] = y;}}
}//---------------------------------------------------------------------------------------------int stroke_calc(int p1, int q1) {int p2 = std::min(h - 1, p1 + dh);int q2 = std::min(w - 1, q1 + dw);int nm = (p2 - p1) * (q2 - q1);double u = 0.0;std::vector<int> x(10);for (int i = p1; i < p2; ++i) {int fl = 0;for (int j = q1; j < q2; ++j) {if (nei[i][j] >= 0) ++x[nei[i][j]];if (nei[i][j] == 0) ++fl;}if (fl == q2 - q1) u += 1.0;}for (int k = 0; k < 7; ++k)if (x[k] == 0) return 0;if (nm > (x[3] + x[6]) * 20)return 0;int cnt1 = x[3] * 16 + x[6] * 16;int cnt2 = x[0];x[9] = int(cnt1 * cnt2 * 1.0 / nm);x[9] = int(x[9] * (1 + u / (p2 - p1)) * (1 + u / (p2 - p1)));if (x[9] < 600 || x[9] > 3000) x[9] = 0;return x[9];
}//---------------------------------------------------------------------------------------------void build_b() {int ww = w / dw;int hh = h / dh;b.assign(h + 1, std::vector<short>(w + 3));for (int dy = 0; dy <= dh / 2; dy += dh / 2) {std::vector<std::vector<int> > m(hh + 1, std::vector<int>(ww + 3));for (int i = 1 + dy; i < h; i += dh)for (int j = 1; j < w; j += dw)m[(i - 1 - dy) / dh][(j - 1) / dw] = stroke_calc(i, j);for (int i = 0; i < hh + 1; ++i) {for (int j = 0; j < ww; ++j) {if (m[i][j] != 0 && m[i][j + 1] != 0 && m[i][j + 2] != 0 &&m[i][j] + m[i][j + 1] + m[i][j + 2] > 3 * 800) {m[i][ww + 2] = 1;break;}}}for (int i = 0; i < hh + 1; ++i) {if (m[i][ww + 2] == 0)continue;for (int j = 0; j < ww + 1; ++j) {if (m[i][j] != 0) {int h1 = i * dh + 1 + dy;int h2 = h1 + dh;h2 = std::min(h - 1, h2);int w1 = j * dw + 1;int w2 = w1 + dw;w2 = std::min(w - 1, w2);for (int p = h1; p < h2; ++p) {b[p][w + 2] = 1;for (int q = w1; q < w2; ++q)if (nei[p][q] != 0) b[p][q] = 1;}}}}}//print_b("C:/b1.txt");//@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@for (int i = 0; i < h; ++i) {if (b[i][w + 2] == 0) continue;int j = 0;int ss = 0;while (j < w) {if (b[i][j] && b[i][j + 1] && b[i][j + 2]) {int js = j;int sm = 0;int gap = 0;while (j < w && gap < 30) {if (b[i][j]) {gap = 0;++sm;}else++gap;++j;}if (sm > 20 && sm > (j - js - gap) * 0.4) {ss += j - js - gap;for (int k = js; k < j - gap; ++k)b[i][k] += 2;}++j;}else ++j;}if (ss < 40) b[i].assign(w + 3, 0);else b[i][w + 2] = 1;}//print_b("C:/b2.txt");//@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@int cnt = 0;for (int i = 0; i < h + 1; ++i) {if (b[i][w + 2] == 0) {if (cnt > 0 && cnt < 8) {for (int ii = i - cnt; ii < i; ++ii) {b[ii].assign(w + 3, 0);}}cnt = 0;}else ++cnt;}//print_b("C:/b3.txt");//@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
}//---------------------------------------------------------------------------------------------void get_res() {res.clear();for (int i = 0; i < h; ++i) {for (int j = 0; j < w; ++j) {if (b[i][j] > 1) {int cnt = 1;int x1 = w;int x2 = 0;int y1 = h;int y2 = 0;b[i][j] *= -1;Dot dt;std::vector<Dot> v, ww;dt.x = j;dt.y = i;v.push_back(dt);while (!v.empty()) {ww.clear();for (size_t z = 0; z < v.size(); ++z) {Dot vv = v[z];for (size_t k = 0; k < 8; ++k) {int yy = vv.y + d8[k][0];int xx = vv.x + d8[k][1];if (xx < 0 || xx >= w || yy < 0 || yy >= h)continue;if (b[yy][xx] > 1) {b[yy][xx] *= -1;dt.x = xx;dt.y = yy;ww.push_back(dt);x1 = std::min(x1, xx);x2 = std::max(x2, xx);y1 = std::min(y1, yy);y2 = std::max(y2, yy);}}}v.swap(ww);cnt += (int)v.size();}int dx = x2 - x1 + 1;int dy = y2 - y1 + 1;double pl = cnt * 1.0 / dx / dy;if (pl > pl_min && dy > h_min && dy < h_max && dx > dy * ns_min) {Rect r = {x1, y1, x2, y2};res.push_back(r);}}}}
}//---------------------------------------------------------------------------------------------std::vector<Rect> get_textlike_regions(const std::vector<std::vector<Rgb> > &raw) {res.clear();if (raw.empty()) return res;w = raw[0].size();h = raw.size();calc_lums(raw);count_neis();build_b();get_res();return res;
}//---------------------------------------------------------------------------------------------} // End of namespace trg;
????????測試代碼如下:
#include "funset.hpp"
#include <string>
#include <opencv2/opencv.hpp>
#include "trg.hpp"int test_TiRG()
{const std::string name{ "E:/GitCode/OCR_Test/test_data/tirg.jpg" };cv::Mat mat = cv::imread(name, 1);if (!mat.data || mat.channels() != 3) {fprintf(stderr, "read image fail: %s\n", name.c_str());return -1;}const int width{ mat.cols }, height{ mat.rows };std::vector<std::vector<trg::Rgb>> data(height, std::vector<trg::Rgb>(width));for (int y = 0; y < height; ++y) {for (int x = 0; x < width; ++x) {cv::Vec3b pixel = mat.at<cv::Vec3b>(cv::Point(x, y));trg::Rgb rgb = { pixel.val[2], pixel.val[1], pixel.val[0] };data[y][x] = rgb;}}std::vector<trg::Rect> rect = trg::get_textlike_regions(data);for (int i = 0; i < rect.size(); ++i) {cv::rectangle(mat, cv::Point(rect[i].x1, rect[i].y1), cv::Point(rect[i].x2, rect[i].y2), cv::Scalar(0, 255, 0));}cv::imwrite("E:/GitCode/OCR_Test/test_data/result_tirg.jpg", mat);return 0;
}
????? ? 原圖及執行結果如下:
????? ? GitHub:?https://github.com/fengbingchun/OCR_Test?
總結
以上是生活随笔為你收集整理的基于开源TiRG的文本检测与提取实现的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: OpenCV支持中文字符输出实现
- 下一篇: 吴恩达老师深度学习视频课笔记:优化算法