My favorites | Sign in
Project Logo
                
Changes to /trunk/ccmain/baseapi.cpp
r8 vs. r9   Edit
  Compare: vs.   Format:
Revision r9
Go to: 
Project members, sign in to write a code review
/trunk/ccmain/baseapi.cpp   r8 /trunk/ccmain/baseapi.cpp   r9
1 /********************************************************************** 1 /**********************************************************************
2 * File: baseapi.cpp 2 * File: baseapi.cpp
3 * Description: Simple API for calling tesseract. 3 * Description: Simple API for calling tesseract.
4 * Author: Ray Smith 4 * Author: Ray Smith
5 * Created: Fri Oct 06 15:35:01 PDT 2006 5 * Created: Fri Oct 06 15:35:01 PDT 2006
6 * 6 *
7 * (C) Copyright 2006, Google Inc. 7 * (C) Copyright 2006, Google Inc.
8 ** Licensed under the Apache License, Version 2.0 (the "License"); 8 ** Licensed under the Apache License, Version 2.0 (the "License");
9 ** you may not use this file except in compliance with the License. 9 ** you may not use this file except in compliance with the License.
10 ** You may obtain a copy of the License at 10 ** You may obtain a copy of the License at
11 ** http://www.apache.org/licenses/LICENSE-2.0 11 ** http://www.apache.org/licenses/LICENSE-2.0
12 ** Unless required by applicable law or agreed to in writing, software 12 ** Unless required by applicable law or agreed to in writing, software
13 ** distributed under the License is distributed on an "AS IS" BASIS, 13 ** distributed under the License is distributed on an "AS IS" BASIS,
14 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 ** See the License for the specific language governing permissions and 15 ** See the License for the specific language governing permissions and
16 ** limitations under the License. 16 ** limitations under the License.
17 * 17 *
18 **********************************************************************/ 18 **********************************************************************/
19 19
20 #include "baseapi.h" 20 #include "baseapi.h"
21 #include <iostream> 21 #include <iostream>
22 #include <math.h> 22 #include <math.h>
23 23
24 24
25 using namespace std; 25 using namespace std;
26 26
27 #define min(a, b) (((a) < (b)) ? (a) : (b)) 27 #define min(a, b) (((a) < (b)) ? (a) : (b))
28 #define max(a, b) (((a) > (b)) ? (a) : (b)) 28 #define max(a, b) (((a) > (b)) ? (a) : (b))
29 29
30 // Include automatically generated configuration file if running autoconf. 30 // Include automatically generated configuration file if running autoconf.
31 #ifdef HAVE_CONFIG_H 31 #ifdef HAVE_CONFIG_H
32 #include "config_auto.h" 32 #include "config_auto.h"
33 #endif 33 #endif
34 34
35 #ifdef HAVE_LIBLEPT 35 #ifdef HAVE_LIBLEPT
36 // Include leptonica library only if autoconf (or makefile etc) tell us to. 36 // Include leptonica library only if autoconf (or makefile etc) tell us to.
37 #include "allheaders.h" 37 #include "allheaders.h"
38 #endif 38 #endif
39 39
40 #include "tessedit.h" 40 #include "tessedit.h"
41 #include "ocrclass.h" 41 #include "ocrclass.h"
42 #include "pageres.h" 42 #include "pageres.h"
43 #include "tessvars.h" 43 #include "tessvars.h"
44 #include "control.h" 44 #include "control.h"
45 #include "applybox.h" 45 #include "applybox.h"
46 #include "pgedit.h" 46 #include "pgedit.h"
47 #include "varabled.h" 47 #include "varabled.h"
48 #include "variables.h" 48 #include "variables.h"
49 #include "output.h" 49 #include "output.h"
50 #include "globals.h" 50 #include "globals.h"
51 #include "adaptmatch.h" 51 #include "adaptmatch.h"
52 #include "edgblob.h" 52 #include "edgblob.h"
53 #include "tessbox.h" 53 #include "tessbox.h"
54 #include "tordvars.h" 54 #include "tordvars.h"
55 #include "imgs.h" 55 #include "imgs.h"
56 #include "makerow.h" 56 #include "makerow.h"
57 #include "tstruct.h" 57 #include "tstruct.h"
58 #include "tessout.h" 58 #include "tessout.h"
59 #include "tface.h" 59 #include "tface.h"
60 #include "permute.h" 60 #include "permute.h"
61 61
62 BOOL_VAR(tessedit_resegment_from_boxes, FALSE, 62 BOOL_VAR(tessedit_resegment_from_boxes, FALSE,
63 "Take segmentation and labeling from box file"); 63 "Take segmentation and labeling from box file");
64 BOOL_VAR(tessedit_train_from_boxes, FALSE, 64 BOOL_VAR(tessedit_train_from_boxes, FALSE,
65 "Generate training data from boxed chars"); 65 "Generate training data from boxed chars");
66 66
67 // Minimum sensible image size to be worth running tesseract. 67 // Minimum sensible image size to be worth running tesseract.
68 const int kMinRectSize = 10; 68 const int kMinRectSize = 10;
69 69
70 extern bool connected_script; 70 extern bool connected_script;
71 71
72 static STRING input_file = "noname.tif"; 72 static STRING input_file = "noname.tif";
73 73
74 // Set the value of an internal "variable" (of either old or new types). 74 // Set the value of an internal "variable" (of either old or new types).
75 // Supply the name of the variable and the value as a string, just as 75 // Supply the name of the variable and the value as a string, just as
76 // you would in a config file. 76 // you would in a config file.
77 // Returns false if the name lookup failed. 77 // Returns false if the name lookup failed.
78 bool TessBaseAPI::SetVariable(const char* variable, const char* value) { 78 bool TessBaseAPI::SetVariable(const char* variable, const char* value) {
79 if (set_new_style_variable(variable, value)) 79 if (set_new_style_variable(variable, value))
80 return true; 80 return true;
81 return set_old_style_variable(variable, value); 81 return set_old_style_variable(variable, value);
82 } 82 }
83 83
84 void TessBaseAPI::SimpleInit(const char* datapath, 84 void TessBaseAPI::SimpleInit(const char* datapath,
85 const char* language, 85 const char* language,
86 bool numeric_mode) { 86 bool numeric_mode) {
87 InitWithLanguage(datapath, NULL, language, NULL, numeric_mode, 0, NULL); 87 InitWithLanguage(datapath, NULL, language, NULL, numeric_mode, 0, NULL);
88 } 88 }
89 89
90 // Start tesseract. 90 // Start tesseract.
91 // The datapath must be the name of the data directory or some other file 91 // The datapath must be the name of the data directory or some other file
92 // in which the data directory resides (for instance argv[0].) 92 // in which the data directory resides (for instance argv[0].)
93 // The configfile is the name of a file in the tessconfigs directory 93 // The configfile is the name of a file in the tessconfigs directory
94 // (eg batch) or NULL to run on defaults. 94 // (eg batch) or NULL to run on defaults.
95 // Outputbase may also be NULL, and is the basename of various output files. 95 // Outputbase may also be NULL, and is the basename of various output files.
96 // If the output of any of these files is enabled, then a name nmust be given. 96 // If the output of any of these files is enabled, then a name nmust be given.
97 // If numeric_mode is true, only possible digits and roman numbers are 97 // If numeric_mode is true, only possible digits and roman numbers are
98 // returned. Returns 0 if successful. Crashes if not. 98 // returned. Returns 0 if successful. Crashes if not.
99 // The argc and argv may be 0 and NULL respectively. They are used for 99 // The argc and argv may be 0 and NULL respectively. They are used for
100 // providing config files for debug/display purposes. 100 // providing config files for debug/display purposes.
101 // TODO(rays) get the facts straight. Is it OK to call 101 // TODO(rays) get the facts straight. Is it OK to call
102 // it more than once? Make it properly check for errors and return them. 102 // it more than once? Make it properly check for errors and return them.
103 int TessBaseAPI::Init(const char* datapath, const char* outputbase, 103 int TessBaseAPI::Init(const char* datapath, const char* outputbase,
104 const char* configfile, bool numeric_mode, 104 const char* configfile, bool numeric_mode,
105 int argc, char* argv[]) { 105 int argc, char* argv[]) {
106 return InitWithLanguage(datapath, outputbase, NULL, configfile, 106 return InitWithLanguage(datapath, outputbase, NULL, configfile,
107 numeric_mode, argc, argv); 107 numeric_mode, argc, argv);
108 } 108 }
109 109
110 // Start tesseract. 110 // Start tesseract.
111 // Similar to Init() except that it is possible to specify the language. 111 // Similar to Init() except that it is possible to specify the language.
112 // Language is the code of the language for which the data will be loaded. 112 // Language is the code of the language for which the data will be loaded.
113 // (Codes follow ISO 639-3.) If it is NULL, english (eng) will be loaded. 113 // (Codes follow ISO 639-3.) If it is NULL, english (eng) will be loaded.
114 int TessBaseAPI::InitWithLanguage(const char* datapath, const char* outputbase, 114 int TessBaseAPI::InitWithLanguage(const char* datapath, const char* outputbase,
115 const char* language, const char* configfile, 115 const char* language, const char* configfile,
116 bool numeric_mode, int argc, char* argv[]) { 116 bool numeric_mode, int argc, char* argv[]) {
117 int result = init_tesseract(datapath, outputbase, language, 117 int result = init_tesseract(datapath, outputbase, language,
118 configfile, argc, argv); 118 configfile, argc, argv);
119 119
120 bln_numericmode.set_value(numeric_mode); 120 bln_numericmode.set_value(numeric_mode);
121 return result; 121 return result;
122 } 122 }
123 123
124 // Init the lang model component of Tesseract 124 // Init the lang model component of Tesseract
125 int TessBaseAPI::InitLangMod(const char* datapath, const char* outputbase, 125 int TessBaseAPI::InitLangMod(const char* datapath, const char* outputbase,
126 const char* language, const char* configfile, 126 const char* language, const char* configfile,
127 bool numeric_mode, int argc, char* argv[]) { 127 bool numeric_mode, int argc, char* argv[]) {
128 return init_tesseract_lm(datapath, outputbase, language, 128 return init_tesseract_lm(datapath, outputbase, language,
129 configfile, argc, argv); 129 configfile, argc, argv);
130 } 130 }
131 131
132 // Set the name of the input file. Needed only for training and 132 // Set the name of the input file. Needed only for training and
133 // loading a UNLV zone file. 133 // loading a UNLV zone file.
134 void TessBaseAPI::SetInputName(const char* name) { 134 void TessBaseAPI::SetInputName(const char* name) {
135 input_file = name; 135 input_file = name;
136 } 136 }
137 137
138 // Recognize a rectangle from an image and return the result as a string. 138 // Recognize a rectangle from an image and return the result as a string.
139 // May be called many times for a single Init. 139 // May be called many times for a single Init.
140 // Currently has no error checking. 140 // Currently has no error checking.
141 // Greyscale of 8 and color of 24 or 32 bits per pixel may be given. 141 // Greyscale of 8 and color of 24 or 32 bits per pixel may be given.
142 // Palette color images will not work properly and must be converted to 142 // Palette color images will not work properly and must be converted to
143 // 24 bit. 143 // 24 bit.
144 // Binary images of 1 bit per pixel may also be given but they must be 144 // Binary images of 1 bit per pixel may also be given but they must be
145 // byte packed with the MSB of the first byte being the first pixel, and a 145 // byte packed with the MSB of the first byte being the first pixel, and a
146 // one pixel is WHITE. For binary images set bytes_per_pixel=0. 146 // one pixel is WHITE. For binary images set bytes_per_pixel=0.
147 // The recognized text is returned as a char* which (in future will be coded 147 // The recognized text is returned as a char* which (in future will be coded
148 // as UTF8 and) must be freed with the delete [] operator. 148 // as UTF8 and) must be freed with the delete [] operator.
149 char* TessBaseAPI::TesseractRect(const unsigned char* imagedata, 149 char* TessBaseAPI::TesseractRect(const unsigned char* imagedata,
150 int bytes_per_pixel, 150 int bytes_per_pixel,
151 int bytes_per_line, 151 int bytes_per_line,
152 int left, int top, 152 int left, int top,
153 int width, int height) { 153 int width, int height) {
154 if (width < kMinRectSize || height < kMinRectSize) 154 if (width < kMinRectSize || height < kMinRectSize)
155 return NULL; // Nothing worth doing. 155 return NULL; // Nothing worth doing.
156 156
157 // Copy/Threshold the image to the tesseract global page_image. 157 // Copy/Threshold the image to the tesseract global page_image.
158 CopyImageToTesseract(imagedata, bytes_per_pixel, bytes_per_line, 158 CopyImageToTesseract(imagedata, bytes_per_pixel, bytes_per_line,
159 left, top, width, height); 159 left, top, width, height);
160 160
161 return RecognizeToString(); 161 return RecognizeToString();
162 } 162 }
163 163
164 // As TesseractRect but produces a box file as output. 164 // As TesseractRect but produces a box file as output.
165 char* TessBaseAPI::TesseractRectBoxes(const unsigned char* imagedata, 165 char* TessBaseAPI::TesseractRectBoxes(const unsigned char* imagedata,
166 int bytes_per_pixel, 166 int bytes_per_pixel,
167 int bytes_per_line, 167 int bytes_per_line,
168 int left, int top, 168 int left, int top,
169 int width, int height, 169 int width, int height,
170 int imageheight) { 170 int imageheight) {
171 if (width < kMinRectSize || height < kMinRectSize) 171 if (width < kMinRectSize || height < kMinRectSize)
172 return NULL; // Nothing worth doing. 172 return NULL; // Nothing worth doing.
173 173
174 // Copy/Threshold the image to the tesseract global page_image. 174 // Copy/Threshold the image to the tesseract global page_image.
175 CopyImageToTesseract(imagedata, bytes_per_pixel, bytes_per_line, 175 CopyImageToTesseract(imagedata, bytes_per_pixel, bytes_per_line,
176 left, top, width, height); 176 left, top, width, height);
177 177
178 BLOCK_LIST block_list; 178 BLOCK_LIST block_list;
179 179
180 FindLines(&block_list); 180 FindLines(&block_list);
181 181
182 // Now run the main recognition. 182 // Now run the main recognition.
183 PAGE_RES* page_res = Recognize(&block_list, NULL); 183 PAGE_RES* page_res = Recognize(&block_list, NULL);
184 184
185 return TesseractToBoxText(page_res, left, imageheight - (top + height)); 185 return TesseractToBoxText(page_res, left, imageheight - (top + height));
186 } 186 }
187 187
188 char* TessBaseAPI::TesseractRectUNLV(const unsigned char* imagedata, 188 char* TessBaseAPI::TesseractRectUNLV(const unsigned char* imagedata,
189 int bytes_per_pixel, 189 int bytes_per_pixel,
190 int bytes_per_line, 190 int bytes_per_line,
191 int left, int top, 191 int left, int top,
192 int width, int height) { 192 int width, int height) {
193 if (width < kMinRectSize || height < kMinRectSize) 193 if (width < kMinRectSize || height < kMinRectSize)
194 return NULL; // Nothing worth doing. 194 return NULL; // Nothing worth doing.
195 195
196 // Copy/Threshold the image to the tesseract global page_image. 196 // Copy/Threshold the image to the tesseract global page_image.
197 CopyImageToTesseract(imagedata, bytes_per_pixel, bytes_per_line, 197 CopyImageToTesseract(imagedata, bytes_per_pixel, bytes_per_line,
198 left, top, width, height); 198 left, top, width, height);
199 199
200 BLOCK_LIST block_list; 200 BLOCK_LIST block_list;
201 201
202 FindLines(&block_list); 202 FindLines(&block_list);
203 203
204 // Now run the main recognition. 204 // Now run the main recognition.
205 PAGE_RES* page_res = Recognize(&block_list, NULL); 205 PAGE_RES* page_res = Recognize(&block_list, NULL);
206 206
207 return TesseractToUNLV(page_res); 207 return TesseractToUNLV(page_res);
208 } 208 }
209 209
210 // Call between pages or documents etc to free up memory and forget 210 // Call between pages or documents etc to free up memory and forget
211 // adaptive data. 211 // adaptive data.
212 void TessBaseAPI::ClearAdaptiveClassifier() { 212 void TessBaseAPI::ClearAdaptiveClassifier() {
213 ResetAdaptiveClassifier(); 213 ResetAdaptiveClassifier();
214 } 214 }
215 215
216 // Close down tesseract and free up memory. 216 // Close down tesseract and free up memory.
217 void TessBaseAPI::End() { 217 void TessBaseAPI::End() {
218 ResetAdaptiveClassifier(); 218 ResetAdaptiveClassifier();
219 end_tesseract(); 219 end_tesseract();
220 } 220 }
221 221
222 // Dump the internal binary image to a PGM file. 222 // Dump the internal binary image to a PGM file.
223 void TessBaseAPI::DumpPGM(const char* filename) { 223 void TessBaseAPI::DumpPGM(const char* filename) {
224 IMAGELINE line; 224 IMAGELINE line;
225 line.init(page_image.get_xsize()); 225 line.init(page_image.get_xsize());
226 FILE *fp = fopen(filename, "w"); 226 FILE *fp = fopen(filename, "w");
227 fprintf(fp, "P5 " INT32FORMAT " " INT32FORMAT " 255\n", page_image.get_xsize(), 227 fprintf(fp, "P5 " INT32FORMAT " " INT32FORMAT " 255\n", page_image.get_xsize(),
228 page_image.get_ysize()); 228 page_image.get_ysize());
229 for (int j = page_image.get_ysize()-1; j >= 0 ; --j) { 229 for (int j = page_image.get_ysize()-1; j >= 0 ; --j) {
230 page_image.get_line(0, j, page_image.get_xsize(), &line, 0); 230 page_image.get_line(0, j, page_image.get_xsize(), &line, 0);
231 for (int i = 0; i < page_image.get_xsize(); ++i) { 231 for (int i = 0; i < page_image.get_xsize(); ++i) {
232 uinT8 b = line.pixels[i] ? 255 : 0; 232 uinT8 b = line.pixels[i] ? 255 : 0;
233 fwrite(&b, 1, 1, fp); 233 fwrite(&b, 1, 1, fp);
234 } 234 }
235 } 235 }
236 fclose(fp); 236 fclose(fp);
237 } 237 }
238 238
239 #ifdef HAVE_LIBLEPT 239 #ifdef HAVE_LIBLEPT
240 // ONLY available if you have Leptonica installed. 240 // ONLY available if you have Leptonica installed.
241 // Get a copy of the thresholded global image from Tesseract. 241 // Get a copy of the thresholded global image from Tesseract.
242 Pix* TessBaseAPI::GetTesseractImage() { 242 Pix* TessBaseAPI::GetTesseractImage() {
243 return page_image.ToPix(); 243 return page_image.ToPix();
244 } 244 }
245 #endif // HAVE_LIBLEPT 245 #endif // HAVE_LIBLEPT
246 246
247 // Copy the given image rectangle to Tesseract, with adaptive thresholding 247 // Copy the given image rectangle to Tesseract, with adaptive thresholding
248 // if the image is not already binary. 248 // if the image is not already binary.
249 void TessBaseAPI::CopyImageToTesseract(const unsigned char* imagedata, 249 void TessBaseAPI::CopyImageToTesseract(const unsigned char* imagedata,
250 int bytes_per_pixel, 250 int bytes_per_pixel,
251 int bytes_per_line, 251 int bytes_per_line,
252 int left, int top, 252 int left, int top,
253 int width, int height) { 253 int width, int height) {
254 if (bytes_per_pixel > 0) { 254 if (bytes_per_pixel > 0) {
255 // Threshold grey or color. 255 // Threshold grey or color.
256 int* thresholds = new int[bytes_per_pixel]; 256 int* thresholds = new int[bytes_per_pixel];
257 int* hi_values = new int[bytes_per_pixel]; 257 int* hi_values = new int[bytes_per_pixel];
258 258
259 // Compute the thresholds. 259 // Compute the thresholds.
260 OtsuThreshold(imagedata, bytes_per_pixel, bytes_per_line, 260 OtsuThreshold(imagedata, bytes_per_pixel, bytes_per_line,
261 left, top, left + width, top + height, 261 left, top, left + width, top + height,
262 thresholds, hi_values); 262 thresholds, hi_values);
263 263
264 // Threshold the image to the tesseract global page_image. 264 // Threshold the image to the tesseract global page_image.
265 ThresholdRect(imagedata, bytes_per_pixel, bytes_per_line, 265 ThresholdRect(imagedata, bytes_per_pixel, bytes_per_line,
266 left, top, width, height, 266 left, top, width, height,
267 thresholds, hi_values); 267 thresholds, hi_values);
268 268
269 delete [] thresholds; 269 delete [] thresholds;
270 delete [] hi_values; 270 delete [] hi_values;
271 } else { 271 } else {
272 CopyBinaryRect(imagedata, bytes_per_line, left, top, width, height); 272 CopyBinaryRect(imagedata, bytes_per_line, left, top, width, height);
273 } 273 }
274 } 274 }
275 275
276 // Compute the Otsu threshold(s) for the given image rectangle, making one 276 // Compute the Otsu threshold(s) for the given image rectangle, making one
277 // for each channel. Each channel is always one byte per pixel. 277 // for each channel. Each channel is always one byte per pixel.
278 // Returns an array of threshold values and an array of hi_values, such 278 // Returns an array of threshold values and an array of hi_values, such
279 // that a pixel value >threshold[channel] is considered foreground if 279 // that a pixel value >threshold[channel] is considered foreground if
280 // hi_values[channel] is 0 or background if 1. A hi_value of -1 indicates 280 // hi_values[channel] is 0 or background if 1. A hi_value of -1 indicates
281 // that there is no apparent foreground. At least one hi_value will not be -1. 281 // that there is no apparent foreground. At least one hi_value will not be -1.
282 // thresholds and hi_values are assumed to be of bytes_per_pixel size. 282 // thresholds and hi_values are assumed to be of bytes_per_pixel size.
283 void TessBaseAPI::OtsuThreshold(const unsigned char* imagedata, 283 void TessBaseAPI::OtsuThreshold(const unsigned char* imagedata,
284 int bytes_per_pixel, 284 int bytes_per_pixel,
285 int bytes_per_line, 285 int bytes_per_line,
286 int left, int top, int right, int bottom, 286 int left, int top, int right, int bottom,
287 int* thresholds, 287 int* thresholds,
288 int* hi_values) { 288 int* hi_values) {
289 // Of all channels with no good hi_value, keep the best so we can always 289 // Of all channels with no good hi_value, keep the best so we can always
290 // produce at least one answer. 290 // produce at least one answer.
291 int best_hi_value = 0; 291 int best_hi_value = 0;
292 int best_hi_index = 0; 292 int best_hi_index = 0;
293 bool any_good_hivalue = false; 293 bool any_good_hivalue = false;
294 double best_hi_dist = 0.0; 294 double best_hi_dist = 0.0;
295 295
296 for (int ch = 0; ch < bytes_per_pixel; ++ch) { 296 for (int ch = 0; ch < bytes_per_pixel; ++ch) {
297 thresholds[ch] = 0; 297 thresholds[ch] = 0;
298 hi_values[ch] = -1; 298 hi_values[ch] = -1;
299 // Compute the histogram of the image rectangle. 299 // Compute the histogram of the image rectangle.
300 int histogram[256]; 300 int histogram[256];
301 HistogramRect(imagedata + ch, bytes_per_pixel, bytes_per_line, 301 HistogramRect(imagedata + ch, bytes_per_pixel, bytes_per_line,
302 left, top, right, bottom, histogram); 302 left, top, right, bottom, histogram);
303 int H; 303 int H;
304 int best_omega_0; 304 int best_omega_0;
305 int best_t = OtsuStats(histogram, &H, &best_omega_0); 305 int best_t = OtsuStats(histogram, &H, &best_omega_0);
306 if (best_omega_0 == 0 || best_omega_0 == H) { 306 if (best_omega_0 == 0 || best_omega_0 == H) {
307 // This channel is empty. 307 // This channel is empty.
308 continue; 308 continue;
309 } 309 }
310 // To be a convincing foreground we must have a small fraction of H 310 // To be a convincing foreground we must have a small fraction of H
311 // or to be a convincing background we must have a large fraction of H. 311 // or to be a convincing background we must have a large fraction of H.
312 // In between we assume this channel contains no thresholding information. 312 // In between we assume this channel contains no thresholding information.
313 int hi_value = best_omega_0 < H * 0.5; 313 int hi_value = best_omega_0 < H * 0.5;
314 thresholds[ch] = best_t; 314 thresholds[ch] = best_t;
315 if (best_omega_0 > H * 0.75) { 315 if (best_omega_0 > H * 0.75) {
316 any_good_hivalue = true; 316 any_good_hivalue = true;
317 hi_values[ch] = 0; 317 hi_values[ch] = 0;
318 } 318 }
319 else if (best_omega_0 < H * 0.25) { 319 else if (best_omega_0 < H * 0.25) {
320 any_good_hivalue = true; 320 any_good_hivalue = true;
321 hi_values[ch] = 1; 321 hi_values[ch] = 1;
322 } 322 }
323 else { 323 else {
324 // In case all channels are like this, keep the best of the bad lot. 324 // In case all channels are like this, keep the best of the bad lot.
325 double hi_dist = hi_value ? (H - best_omega_0) : best_omega_0; 325 double hi_dist = hi_value ? (H - best_omega_0) : best_omega_0;
326 if (hi_dist > best_hi_dist) { 326 if (hi_dist > best_hi_dist) {
327 best_hi_dist = hi_dist; 327 best_hi_dist = hi_dist;
328 best_hi_value = hi_value; 328 best_hi_value = hi_value;
329 best_hi_index = ch; 329 best_hi_index = ch;
330 } 330 }
331 } 331 }
332 } 332 }
333 if (!any_good_hivalue) { 333 if (!any_good_hivalue) {
334 // Use the best of the ones that were not good enough. 334 // Use the best of the ones that were not good enough.
335 hi_values[best_hi_index] = best_hi_value; 335 hi_values[best_hi_index] = best_hi_value;
336 } 336 }
337 } 337 }
338 338
339 // Compute the histogram for the given image rectangle, and the given 339 // Compute the histogram for the given image rectangle, and the given
340 // channel. (Channel pointed to by imagedata.) Each channel is always 340 // channel. (Channel pointed to by imagedata.) Each channel is always
341 // one byte per pixel. 341 // one byte per pixel.
342 // Bytes per pixel is used to skip channels not being 342 // Bytes per pixel is used to skip channels not being
343 // counted with this call in a multi-channel (pixel-major) image. 343 // counted with this call in a multi-channel (pixel-major) image.
344 // Histogram is always a 256 element array to count occurrences of 344 // Histogram is always a 256 element array to count occurrences of
345 // each pixel value. 345 // each pixel value.
346 void TessBaseAPI::HistogramRect(const unsigned char* imagedata, 346 void TessBaseAPI::HistogramRect(const unsigned char* imagedata,
347 int bytes_per_pixel, 347 int bytes_per_pixel,
348 int bytes_per_line, 348 int bytes_per_line,
349 int left, int top, int right, int bottom, 349 int left, int top, int right, int bottom,
350 int* histogram) { 350 int* histogram) {
351 int width = right - left; 351 int width = right - left;
352 memset(histogram, 0, sizeof(*histogram) * 256); 352 memset(histogram, 0, sizeof(*histogram) * 256);
353 const unsigned char* pixels = imagedata + 353 const unsigned char* pixels = imagedata +
354 top*bytes_per_line + 354 top*bytes_per_line +
355 left*bytes_per_pixel; 355 left*bytes_per_pixel;
356 for (int y = top; y < bottom; ++y) { 356 for (int y = top; y < bottom; ++y) {
357 for (int x = 0; x < width; ++x) { 357 for (int x = 0; x < width; ++x) {
358 ++histogram[pixels[x * bytes_per_pixel]]; 358 ++histogram[pixels[x * bytes_per_pixel]];
359 } 359 }
360 pixels += bytes_per_line; 360 pixels += bytes_per_line;
361 } 361 }
362 362
363 } 363 }
364 364
365 // Compute the Otsu threshold(s) for the given histogram. 365 // Compute the Otsu threshold(s) for the given histogram.
366 // Also returns H = total count in histogram, and 366 // Also returns H = total count in histogram, and
367 // omega0 = count of histogram below threshold. 367 // omega0 = count of histogram below threshold.
368 int TessBaseAPI::OtsuStats(const int* histogram, 368 int TessBaseAPI::OtsuStats(const int* histogram,
369 int* H_out, 369 int* H_out,
370 int* omega0_out) { 370 int* omega0_out) {
371 int H = 0; 371 int H = 0;
372 double mu_T = 0.0; 372 double mu_T = 0.0;
373 for (int i = 0; i < 256; ++i) { 373 for (int i = 0; i < 256; ++i) {
374 H += histogram[i]; 374 H += histogram[i];
375 mu_T += i * histogram[i]; 375 mu_T += i * histogram[i];
376 } 376 }
377 377
378 // Now maximize sig_sq_B over t. 378 // Now maximize sig_sq_B over t.
379 // http://www.ctie.monash.edu.au/hargreave/Cornall_Terry_328.pdf 379 // http://www.ctie.monash.edu.au/hargreave/Cornall_Terry_328.pdf
380 int best_t = -1; 380 int best_t = -1;
381 int omega_0, omega_1; 381 int omega_0, omega_1;
382 int best_omega_0 = 0; 382 int best_omega_0 = 0;
383 double best_sig_sq_B = 0.0; 383 double best_sig_sq_B = 0.0;
384 double mu_0, mu_1, mu_t; 384 double mu_0, mu_1, mu_t;
385 omega_0 = 0; 385 omega_0 = 0;
386 mu_t = 0.0; 386 mu_t = 0.0;
387 for (int t = 0; t < 255; ++t) { 387 for (int t = 0; t < 255; ++t) {
388 omega_0 += histogram[t]; 388 omega_0 += histogram[t];
389 mu_t += t * static_cast<double>(histogram[t]); 389 mu_t += t * static_cast<double>(histogram[t]);
390 if (omega_0 == 0) 390 if (omega_0 == 0)
391 continue; 391 continue;
392 omega_1 = H - omega_0; 392 omega_1 = H - omega_0;
393 mu_0 = mu_t / omega_0; 393 mu_0 = mu_t / omega_0;
394 mu_1 = (mu_T - mu_t) / omega_1; 394 mu_1 = (mu_T - mu_t) / omega_1;
395 double sig_sq_B = mu_1 - mu_0; 395 double sig_sq_B = mu_1 - mu_0;
396 sig_sq_B *= sig_sq_B * omega_0 * omega_1; 396 sig_sq_B *= sig_sq_B * omega_0 * omega_1;
397 if (best_t < 0 || sig_sq_B > best_sig_sq_B) { 397 if (best_t < 0 || sig_sq_B > best_sig_sq_B) {
398 best_sig_sq_B = sig_sq_B; 398 best_sig_sq_B = sig_sq_B;
399 best_t = t; 399 best_t = t;
400 best_omega_0 = omega_0; 400 best_omega_0 = omega_0;
401 } 401 }
402 } 402 }
403 if (H_out != NULL) *H_out = H; 403 if (H_out != NULL) *H_out = H;
404 if (omega0_out != NULL) *omega0_out = best_omega_0; 404 if (omega0_out != NULL) *omega0_out = best_omega_0;
405 return best_t; 405 return best_t;
406 } 406 }
407 407
408 408
409 ////////////DEBAYAN//Deskew begins////////////////////// 409 ////////////DEBAYAN//Deskew begins//////////////////////
410 void deskew(float angle,int srcheight, int srcwidth) 410 void deskew(float angle,int srcheight, int srcwidth)
411 { 411 {
412 //angle=4; //45° for example 412 //angle=4; //45° for example
413 IMAGE tempimage; 413 IMAGE tempimage;
414 414
415 415
416 IMAGELINE line; 416 IMAGELINE line;
417 //Convert degrees to radians 417 //Convert degrees to radians
418 float radians=(2*3.1416*angle)/360; 418 float radians=(2*3.1416*angle)/360;
419 419
420 float cosine=(float)cos(radians); 420 float cosine=(float)cos(radians);
421 float sine=(float)sin(radians); 421 float sine=(float)sin(radians);
422 422
423 float Point1x=(srcheight*sine); 423 float Point1x=(srcheight*sine);
424 float Point1y=(srcheight*cosine); 424 float Point1y=(srcheight*cosine);
425 float Point2x=(srcwidth*cosine-srcheight*sine); 425 float Point2x=(srcwidth*cosine-srcheight*sine);
426 float Point2y=(srcheight*cosine+srcwidth*sine); 426 float Point2y=(srcheight*cosine+srcwidth*sine);
427 float Point3x=(srcwidth*cosine); 427 float Point3x=(srcwidth*cosine);
428 float Point3y=(srcwidth*sine); 428 float Point3y=(srcwidth*sine);
429 429
430 float minx=min(0,min(Point1x,min(Point2x,Point3x))); 430 float minx=min(0,min(Point1x,min(Point2x,Point3x)));
431 float miny=min(0,min(Point1y,min(Point2y,Point3y))); 431 float miny=min(0,min(Point1y,min(Point2y,Point3y)));
432 float maxx=max(Point1x,max(Point2x,Point3x)); 432 float maxx=max(Point1x,max(Point2x,Point3x));
433 float maxy=max(Point1y,max(Point2y,Point3y)); 433 float maxy=max(Point1y,max(Point2y,Point3y));
434 434
435 int DestWidth=(int)ceil(fabs(maxx)-minx); 435 int DestWidth=(int)ceil(fabs(maxx)-minx);
436 int DestHeight=(int)ceil(fabs(maxy)-miny); 436 int DestHeight=(int)ceil(fabs(maxy)-miny);
437 437
438 tempimage.create(DestWidth,DestHeight,1); 438 tempimage.create(DestWidth,DestHeight,1);
439 line.init(DestWidth); 439 line.init(DestWidth);
440 440
441 for(int i=0;i<DestWidth;i++){ //A white line of length=DestWidth 441 for(int i=0;i<DestWidth;i++){ //A white line of length=DestWidth
442 line.pixels[i]=1; 442 line.pixels[i]=1;
443 } 443 }
444 444
445 for(int y=0;y<DestHeight;y++){ //Fill the Destination image with white, else clipmatra wont work 445 for(int y=0;y<DestHeight;y++){ //Fill the Destination image with white, else clipmatra wont work
446 tempimage.put_line(0,y,DestWidth,&line,0); 446 tempimage.put_line(0,y,DestWidth,&line,0);
447 } 447 }
448 line.init(DestWidth); 448 line.init(DestWidth);
449 449
450 450
451 451
452 for(int y=0;y<DestHeight;y++) //Start filling the destination image pixels with corresponding source image pixels 452 for(int y=0;y<DestHeight;y++) //Start filling the destination image pixels with corresponding source image pixels
453 { 453 {
454 for(int x=0;x<DestWidth;x++) 454 for(int x=0;x<DestWidth;x++)
455 { 455 {
456 int Srcx=(int)((x+minx)*cosine+(y+miny)*sine); 456 int Srcx=(int)((x+minx)*cosine+(y+miny)*sine);
457 int Srcy=(int)((y+miny)*cosine-(x+minx)*sine); 457 int Srcy=(int)((y+miny)*cosine-(x+minx)*sine);
458 if(Srcx>=0&&Srcx<srcwidth&&Srcy>=0&& 458 if(Srcx>=0&&Srcx<srcwidth&&Srcy>=0&&
459 Srcy<srcheight) 459 Srcy<srcheight)
460 { 460 {
461 line.pixels[x]= 461 line.pixels[x]=
462 page_image.pixel(Srcx,Srcy); 462 page_image.pixel(Srcx,Srcy);
463 } 463 }
464 } 464 }
465 tempimage.put_line(0,y,DestWidth,&line,0); 465 tempimage.put_line(0,y,DestWidth,&line,0);
466 } 466 }
467 467
468 //tempimage.write("tempimage.tif"); 468 //tempimage.write("tempimage.tif");
469 page_image=tempimage;//Copy deskewed image to global page image, so it can be worked on further 469 page_image=tempimage;//Copy deskewed image to global page image, so it can be worked on further
470 tempimage.destroy(); 470 tempimage.destroy();
471 //page_image.write("page_image.tif"); 471 //page_image.write("page_image.tif");
472 472
473 } 473 }
474 /////////////DEBAYAN//Deskew ends///////////////////// 474 /////////////DEBAYAN//Deskew ends/////////////////////
475 475
476 ////////////DEBAYAN//Find skew begins///////////////// 476 ////////////DEBAYAN//Find skew begins/////////////////
477 float findskew(int height, int width) 477 float findskew(int height, int width)
478 { 478 {
479 int topx=0,topy=0,sign,count=0,offset=1,ifcounter=0; 479 int topx=0,topy=0,sign,count=0,offset=1,ifcounter=0;
480 float slope=-999,avg=0; 480 float slope=-999,avg=0;
481 IMAGELINE line; 481 IMAGELINE line;
482 line.init(1); 482 line.init(1);
483 line.pixels[0]=0; 483 line.pixels[0]=0;
484 ///////Find the top most point of the page: begins/////////// 484 ///////Find the top most point of the page: begins///////////
485 for(int y=height-1;y>0;y--){ 485 for(int y=height-1;y>0;y--){
486 for(int x=width-1;x>0;x--){ 486 for(int x=width-1;x>0;x--){
487 if(page_image.pixel(x,y)==0){ 487 if(page_image.pixel(x,y)==0){
488 topx=x;topy=y; 488 topx=x;topy=y;
489 break; 489 break;
490 } 490 }
491 491
492 } 492 }
493 493
494 if(topx>0){break;}; 494 if(topx>0){break;};
495 } 495 }
496 ///////Find the top most point of the page: ends/////////// 496 ///////Find the top most point of the page: ends///////////
497 497
498 498
499 ///////To find pages with no skew: begins////////////// 499 ///////To find pages with no skew: begins//////////////
500 int c1,c2=0; 500 int c1,c2=0;
501 for(int x=1;x<.25*width;x++){ 501 for(int x=1;x<.25*width;x++){
502 while(page_image.pixel((width/2)+x,c1++)==1){ } 502 while(page_image.pixel((width/2)+x,c1++)==1){ }
503 while(page_image.pixel((width/2)-x,c2++)==1){ } 503 while(page_image.pixel((width/2)-x,c2++)==1){ }
504 if(c1==c2){cout<<"0 ANGLE\n";return (0);} 504 if(c1==c2){cout<<"0 ANGLE\n";return (0);}
505 c1=c2=0; 505 c1=c2=0;
506 } 506 }
507 ///////To find pages with no skew: ends////////////// 507 ///////To find pages with no skew: ends//////////////
508 508
509 cout<<"width="<<width; 509 cout<<"width="<<width;
510 if(topx>0 && topx<.5*width){ 510 if(topx>0 && topx<.5*width){
511 sign=1; 511 sign=1;
512 } 512 }
513 if(topx>0 && topx>.5*width){ 513 if(topx>0 && topx>.5*width){
514 sign=-1; 514 sign=-1;
515 } 515 }
516 516
517 517
518 if(sign==-1){ 518 if(sign==-1){
519 while((topx-offset)>width/2){ 519 while((topx-offset)>width/2){
520 while(page_image.pixel(topx-offset,topy-count)==1){ 520 while(page_image.pixel(topx-offset,topy-count)==1){
521 //page_image.put_line(topx-offset,topy-count,1,&line,0); 521 //page_image.put_line(topx-offset,topy-count,1,&line,0);
522 count++; 522 count++;
523 } 523 }
524 524
525 if((180/3.142)*atan((float)count/offset)<10){ 525 if((180/3.142)*atan((float)count/offset)<10){
526 slope=(float)count/offset; 526 slope=(float)count/offset;
527 ifcounter++; 527 ifcounter++;
528 avg=(avg+slope); 528 avg=(avg+slope);
529 } 529 }
530 count=0; 530 count=0;
531 offset++; 531 offset++;
532 } 532 }
533 avg=(float)avg/ifcounter; 533 avg=(float)avg/ifcounter;
534 //cout<<"avg="<<avg<<"\n"; 534 //cout<<"avg="<<avg<<"\n";
535 page_image.write("findskew.tif"); 535 page_image.write("findskew.tif");
536 //cout<<"(180/3.142)*atan((float)(count/offset)="<<(180/3.142)*atan(avg)<<"\n"; 536 //cout<<"(180/3.142)*atan((float)(count/offset)="<<(180/3.142)*atan(avg)<<"\n";
537 return (sign*(180/3.142)*atan(avg)); 537 return (sign*(180/3.142)*atan(avg));
538 538
539 } 539 }
540 if(sign==1){ 540 if(sign==1){
541 while((topx+offset)<width/2){ 541 while((topx+offset)<width/2){
542 while(page_image.pixel(topx+offset,topy-count)==1){ 542 while(page_image.pixel(topx+offset,topy-count)==1){
543 //page_image.put_line(topx+offset,topy-count,1,&line,0); 543 //page_image.put_line(topx+offset,topy-count,1,&line,0);
544 count++; 544 count++;
545 } 545 }
546 546
547 if((180/3.142)*atan((float)count/offset)<10){ 547 if((180/3.142)*atan((float)count/offset)<10){
548 slope=(float)count/offset; 548 slope=(float)count/offset;
549 ifcounter++; 549 ifcounter++;
550 avg=(avg+slope); 550 avg=(avg+slope);
551 } 551 }
552 count=0; 552 count=0;
553 offset++; 553 offset++;
554 } 554 }
555 avg=(float)avg/ifcounter; 555 avg=(float)avg/ifcounter;
556 //cout<<"avg="<<avg<<"\n"; 556 //cout<<"avg="<<avg<<"\n";
557 page_image.write("findskew.tif"); 557 page_image.write("findskew.tif");
558 //cout<<"(180/3.142)*atan((float)(count/offset)="<<(180/3.142)*atan(avg)<<"\n"; 558 //cout<<"(180/3.142)*atan((float)(count/offset)="<<(180/3.142)*atan(avg)<<"\n";
559 return (sign*(180/3.142)*atan(avg)); 559 return (sign*(180/3.142)*atan(avg));
560 560
561 } 561 }
562 562
563 if(sign==0) 563 if(sign==0)
564 {return 0;} 564 {return 0;}
565 cout<<"SHIT"; 565 cout<<"SHIT";
566 return (0); 566 return (0);
567 } 567 }
568 ////////////DEBAYAN//Find skew ends/////////////////// 568 ////////////DEBAYAN//Find skew ends///////////////////
569 569
570 //Works on the global image page containing devnagri script. 570 //Works on the global image page containing devnagri script.
571 //Clips the maatraas and then makes the global image ready for the Tesseract engine. 571 //Clips the maatraas and then makes the global image ready for the Tesseract engine.
572 //Will be executed for all images during training, but during normal operation, will be 572 //Will be executed for all images during training, but during normal operation, will be
573 //used only if the language belongs to devnagri, eg, ben, hin etc. 573 //used only if the language belongs to devnagri, eg, ben, hin etc.
574 void TessBaseAPI::ClipMaatraa(int height, int width) 574 void TessBaseAPI::ClipMaatraa(int height, int width)
575 { 575 {
576 IMAGELINE line; 576 IMAGELINE line;
577 line.init(width); 577 line.init(width);
578 int count,count1=0,blackpixels[height-1][2],arr_row=0,maxbp=0,maxy=0,matras[100][3],char_height; 578 int count,count1=0,blackpixels[height-1][2],arr_row=0,maxbp=0,maxy=0,matras[100][3],char_height;
579 //cout<<"Connected Script="<<connected_script<<"\n"; 579 //cout<<"Connected Script="<<connected_script<<"\n";
580 580
581 for(int y=0; y<height-1;y++){ 581 for(int y=0; y<height-1;y++){
582 count=0; 582 count=0;
583 for(int x=0;x<width-1;x++){ 583 for(int x=0;x<width-1;x++){
584 if(page_image.pixel(x,y)==0) 584 if(page_image.pixel(x,y)==0)
585 {count++;} 585 {count++;}
586 } 586 }
587 587
588 if(count>=.05*width){ 588 if(count>=.05*width){
589 blackpixels[arr_row][0]=y; 589 blackpixels[arr_row][0]=y;
590 blackpixels[arr_row][1]=count; 590 blackpixels[arr_row][1]=count;
591 arr_row++; 591 arr_row++;
592 } 592 }
593 } 593 }
594 blackpixels[arr_row][0]=blackpixels[arr_row][1]='\0'; 594 blackpixels[arr_row][0]=blackpixels[arr_row][1]='\0';
595 595
596 for(int x=0;x<width-1;x++){ //Black Line 596 for(int x=0;x<width-1;x++){ //Black Line
597 line.pixels[x]=0; 597 line.pixels[x]=0;
598 } 598 }
599 599
600 ////////////line_through_matra() begins////////////////////// 600 ////////////line_through_matra() begins//////////////////////
601 count=1; 601 count=1;
602 //cout<<"\nHeight="<<height<<" arr_row="<<arr_row<<"\n"; 602 //cout<<"\nHeight="<<height<<" arr_row="<<arr_row<<"\n";
603 char_height=blackpixels[0][0]; //max character height per sentence 603 char_height=blackpixels[0][0]; //max character height per sentence
604 //cout<<"Char Height Init="<<char_height; 604 //cout<<"Char Height Init="<<char_height;
605 while(count<=arr_row){ 605 while(count<=arr_row){
606 //if(count==0){max=blackpixels[count][0];} 606 //if(count==0){max=blackpixels[count][0];}
607 if((blackpixels[count][0]-blackpixels[count-1][0]==1) && (blackpixels[count][1]>=maxbp)){ 607 if((blackpixels[count][0]-blackpixels[count-1][0]==1) && (blackpixels[count][1]>=maxbp)){
608 maxbp=blackpixels[count][1]; 608 maxbp=blackpixels[count][1];
609 maxy=blackpixels[count][0]; 609 maxy=blackpixels[count][0];
610 //cout<<"\nMax="<<maxy<<" bpc="<<maxbp; 610 //cout<<"\nMax="<<maxy<<" bpc="<<maxbp;
611 } 611 }
612 612
613 if((blackpixels[count][0]-blackpixels[count-1][0])!=1){ 613 if((blackpixels[count][0]-blackpixels[count-1][0])!=1){
614 /////////////drawline(max)////////////////////// 614 /////////////drawline(max)//////////////////////
615 615
616 // cout<<"\nmax="<<maxy<<" bpc="<<maxbp; 616 // cout<<"\nmax="<<maxy<<" bpc="<<maxbp;
617 // page_image.put_line(0,maxy,width,&line,0); 617 // page_image.put_line(0,maxy,width,&line,0);
618 char_height=blackpixels[count-1][0]-char_height; 618 char_height=blackpixels[count-1][0]-char_height;
619 matras[count1][0]=maxy; matras[count1][1]=maxbp; matras[count1][2]=char_height; count1++; 619 matras[count1][0]=maxy; matras[count1][1]=maxbp; matras[count1][2]=char_height; count1++;
620 char_height=blackpixels[count][0]; 620 char_height=blackpixels[count][0];
621 621
622 //////////// drawline(max)///////////////////// 622 //////////// drawline(max)/////////////////////
623 maxbp=blackpixels[count][1]; 623 maxbp=blackpixels[count][1];
624 } 624 }
625 count++; 625 count++;
626 } 626 }
627 matras[count1][0]=matras[count1][1]=matras[count1][2]='\0'; 627 matras[count1][0]=matras[count1][1]=matras[count1][2]='\0';
628 628
629 //delete blackpixels; 629 //delete blackpixels;
630 ////////////line_through_matra() ends////////////////////// 630 ////////////line_through_matra() ends//////////////////////
631 631
632 ////////////clip_matras() begins/////////////////////////// 632 ////////////clip_matras() begins///////////////////////////
633 for(int i=0;i<100;i++){ //where 100=max number of sentences per page 633 for(int i=0;i<100;i++){ //where 100=max number of sentences per page
634 if(matras[i][0]=='\0'){break;} 634 if(matras[i][0]=='\0'){break;}
635 //cout<<"\nY="<<matras[i][0]<<" bpc="<<matras[i][1]<<" chheight="<<matras[i][2]; 635 //cout<<"\nY="<<matras[i][0]<<" bpc="<<matras[i][1]<<" chheight="<<matras[i][2];
636 count=i; 636 count=i;
637 } 637 }
638 638
639 for(int i=0;i<=count;i++){ 639 for(int i=0;i<=count;i++){
640 640
641 for(int x=0;x<width-1;x++){ 641 for(int x=0;x<width-1;x++){
642 if(page_image.pixel(x,matras[i][0])==0){ 642 if(page_image.pixel(x,matras[i][0])==0){
643 count1=0; 643 count1=0;
644 for(int y=0;y<matras[i][2] && count1==0;y++){ 644 for(int y=0;y<matras[i][2] && count1==0;y++){
645 if(page_image.pixel(x,matras[i][0]-y)==1){count1++; 645 if(page_image.pixel(x,matras[i][0]-y)==1){count1++;
646 for(int z=y+1;z<matras[i][2];z++){ 646 for(int z=y+1;z<matras[i][2];z++){
647 if(page_image.pixel(x,matras[i][0]-z)==1){count1++;}//black pixel encountered... stop counting. 647 if(page_image.pixel(x,matras[i][0]-z)==1){count1++;}//black pixel encountered... stop counting.
648 else 648 else
649 {break;} 649 {break;}
650 } 650 }
651 } 651 }
652 } 652 }
653 int upcount=0;
654 //find upcount
655 while(page_image.pixel(x,matras[i][0]+upcount)==0){
656 upcount++;
657 }
658 //find upcount ends
653 //cout<<"\nWPR @ "<<x<<","<<matras[i][0]<<"="<<count1; 659 //cout<<"\nWPR @ "<<x<<","<<matras[i][0]<<"="<<count1;
654 if(count1>.8*matras[i][2]){ 660 if(count1>.8*matras[i][2]){
655 line.init(matras[i][2]+5); 661 line.init(upcount+count1);
656 for(int j=0;j<matras[i][2]+5;j++){line.pixels[j]=1;} 662 for(int j=0;j<matras[i][2]+5;j++){line.pixels[j]=1;}
657 page_image.put_column(x,matras[i][0]-matras[i][2],matras[i][2]+5,&line,0); 663 page_image.put_column(x,matras[i][0]+upcount,upcount+count1,&line,0);
658 } 664 }
659 } 665 }
660 } 666 }
661 667
662 } 668 }
663 669
664 page_image.write("bentest.tif"); 670 page_image.write("bentest.tif");
665 671
666 ////////////clip_matras() ends///////////////////////////// 672 ////////////clip_matras() ends/////////////////////////////
667 673
668 /////////DEBAYAN///////////////// 674 /////////DEBAYAN/////////////////
669 675
670 676
671 } 677 }
672 678
673 679
674 // Threshold the given grey or color image into the tesseract global 680 // Threshold the given grey or color image into the tesseract global
675 // image ready for recognition. Requires thresholds and hi_value 681 // image ready for recognition. Requires thresholds and hi_value
676 // produced by OtsuThreshold above. 682 // produced by OtsuThreshold above.
677 void TessBaseAPI::ThresholdRect(const unsigned char* imagedata, 683 void TessBaseAPI::ThresholdRect(const unsigned char* imagedata,
678 int bytes_per_pixel, 684 int bytes_per_pixel,
679 int bytes_per_line, 685 int bytes_per_line,
680 int left, int top, 686 int left, int top,
681 int width, int height, 687 int width, int height,
682 const int* thresholds, 688 const int* thresholds,
683 const int* hi_values) { 689 const int* hi_values) {
684 690
685 IMAGELINE line; 691 IMAGELINE line;
686 page_image.create(width, height, 1); 692 page_image.create(width, height, 1);
687 line.init(width); 693 line.init(width);
688 694
689 // For each line in the image, fill the IMAGELINE class and put it into the 695 // For each line in the image, fill the IMAGELINE class and put it into the
690 // Tesseract global page_image. Note that Tesseract stores images with the 696 // Tesseract global page_image. Note that Tesseract stores images with the
691 // bottom at y=0 and 0 is black, so we need 2 kinds of inversion. 697 // bottom at y=0 and 0 is black, so we need 2 kinds of inversion.
692 const unsigned char* data = imagedata + top*bytes_per_line + 698 const unsigned char* data = imagedata + top*bytes_per_line +
693 left*bytes_per_pixel; 699 left*bytes_per_pixel;
694 for (int y = height - 1 ; y >= 0; --y) { 700 for (int y = height - 1 ; y >= 0; --y) {
695 const unsigned char* pix = data; 701 const unsigned char* pix = data;
696 for (int x = 0; x < width; ++x, pix += bytes_per_pixel) { 702 for (int x = 0; x < width; ++x, pix += bytes_per_pixel) {
697 line.pixels[x] = 1; 703 line.pixels[x] = 1;
698 for (int ch = 0; ch < bytes_per_pixel; ++ch) { 704 for (int ch = 0; ch < bytes_per_pixel; ++ch) {
699 if (hi_values[ch] >= 0 && 705 if (hi_values[ch] >= 0 &&
700 (pix[ch] > thresholds[ch]) == (hi_values[ch] == 0)) { 706 (pix[ch] > thresholds[ch]) == (hi_values[ch] == 0)) {
701 line.pixels[x] = 0; 707 line.pixels[x] = 0;
702 break; 708 break;
703 } 709 }
704 } 710 }
705 } 711 }
706 page_image.put_line(0, y, width, &line, 0); 712 page_image.put_line(0, y, width, &line, 0);
707 data += bytes_per_line; 713 data += bytes_per_line;
708 } 714 }
709 715
710 if(connected_script==true){ 716 if(connected_script==true){
711 //page_image.write("benth.tif"); 717 //page_image.write("benth.tif");
712 //float angle=findskew(height,width); 718 //float angle=findskew(height,width);
713 //cout<<"SKEW ANGLE="<<angle<<"\n"; 719 //cout<<"SKEW ANGLE="<<angle<<"\n";
714 /*if(angle!=0){ 720 /*if(angle!=0){
715 deskew(angle,height,width); 721 deskew(angle,height,width);
716 }*/ 722 }*/
717 ClipMaatraa(height,width); 723 ClipMaatraa(height,width);
718 } 724 }
719 725
720 } 726 }
721 727
722 // Cut out the requested rectangle of the binary image to the 728 // Cut out the requested rectangle of the binary image to the
723 // tesseract global image ready for recognition. 729 // tesseract global image ready for recognition.
724 void TessBaseAPI::CopyBinaryRect(const unsigned char* imagedata, 730 void TessBaseAPI::CopyBinaryRect(const unsigned char* imagedata,
725 int bytes_per_line, 731 int bytes_per_line,
726 int left, int top, 732 int left, int top,
727 int width, int height) { 733 int width, int height) {
728 // Copy binary image, cutting out the required rectangle. 734 // Copy binary image, cutting out the required rectangle.
729 IMAGE image; 735 IMAGE image;
730 image.capture(const_cast<unsigned char*>(imagedata), 736 image.capture(const_cast<unsigned char*>(imagedata),
731 bytes_per_line*8, top + height, 1); 737 bytes_per_line*8, top + height, 1);
732 page_image.create(width, height, 1); 738 page_image.create(width, height, 1);
733 739
734 copy_sub_image(&image, left, 0, width, height, &page_image, 0, 0, false); 740 copy_sub_image(&image, left, 0, width, height, &page_image, 0, 0, false);
735 image.write("bentest.tif"); 741 image.write("bentest.tif");
736 } 742 }
737 743
738 // Low-level function to recognize the current global image to a string. 744 // Low-level function to recognize the current global image to a string.
739 char* TessBaseAPI::RecognizeToString() { 745 char* TessBaseAPI::RecognizeToString() {
740 BLOCK_LIST block_list; 746 BLOCK_LIST block_list;
741 747
742 FindLines(&block_list); 748 FindLines(&block_list);
743 749
744 // Now run the main recognition. 750 // Now run the main recognition.
745 PAGE_RES* page_res = Recognize(&block_list, NULL); 751 PAGE_RES* page_res = Recognize(&block_list, NULL);
746 752
747 return TesseractToText(page_res); 753 return TesseractToText(page_res);
748 754
749 } 755 }
750 756
751 // Find lines from the image making the BLOCK_LIST. 757 // Find lines from the image making the BLOCK_LIST.
752 void TessBaseAPI::FindLines(BLOCK_LIST* block_list) { 758 void TessBaseAPI::FindLines(BLOCK_LIST* block_list) {
753 // The following call creates a full-page block and then runs connected 759 // The following call creates a full-page block and then runs connected
754 // component analysis and text line creation. 760 // component analysis and text line creation.
755 pgeditor_read_file(input_file, block_list); 761 pgeditor_read_file(input_file, block_list);
756 } 762 }
757 763
758 // Recognize the tesseract global image and return the result as Tesseract 764 // Recognize the tesseract global image and return the result as Tesseract
759 // internal structures. 765 // internal structures.
760 PAGE_RES* TessBaseAPI::Recognize(BLOCK_LIST* block_list, ETEXT_DESC* monitor) { 766 PAGE_RES* TessBaseAPI::Recognize(BLOCK_LIST* block_list, ETEXT_DESC* monitor) {
761 if (tessedit_resegment_from_boxes) 767 if (tessedit_resegment_from_boxes)
762 apply_boxes(block_list); 768 apply_boxes(block_list);
763 769
764 PAGE_RES* page_res = new PAGE_RES(block_list); 770 PAGE_RES* page_res = new PAGE_RES(block_list);
765 if (interactive_mode) { 771 if (interactive_mode) {
766 pgeditor_main(block_list); //pgeditor user I/F 772 pgeditor_main(block_list); //pgeditor user I/F
767 } else if (tessedit_train_from_boxes) { 773 } else if (tessedit_train_from_boxes) {
768 apply_box_training(block_list); 774 apply_box_training(block_list);
769 } else { 775 } else {
770 // Now run the main recognition. 776 // Now run the main recognition.
771 recog_all_words(page_res, monitor); 777 recog_all_words(page_res, monitor);
772 } 778 }
773 return page_res; 779 return page_res;
774 } 780 }
775 781
776 // Return the maximum length that the output text string might occupy. 782 // Return the maximum length that the output text string might occupy.
777 int TessBaseAPI::TextLength(PAGE_RES* page_res) { 783 int TessBaseAPI::TextLength(PAGE_RES* page_res) {
778 PAGE_RES_IT page_res_it(page_res); 784 PAGE_RES_IT page_res_it(page_res);
779 int total_length = 2; 785 int total_length = 2;
780 // Iterate over the data structures to extract the recognition result. 786 // Iterate over the data structures to extract the recognition result.
781 for (page_res_it.restart_page(); page_res_it.word () != NULL; 787 for (page_res_it.restart_page(); page_res_it.word () != NULL;
782 page_res_it.forward()) { 788 page_res_it.forward()) {
783 WERD_RES *word = page_res_it.word(); 789 WERD_RES *word = page_res_it.word();
784 WERD_CHOICE* choice = word->best_choice; 790 WERD_CHOICE* choice = word->best_choice;
785 if (choice != NULL) { 791 if (choice != NULL) {
786 total_length += choice->string().length() + 1; 792 total_length += choice->string().length() + 1;
787 for (int i = 0; i < word->reject_map.length(); ++i) { 793 for (int i = 0; i < word->reject_map.length(); ++i) {
788 if (word->reject_map[i].rejected()) 794 if (word->reject_map[i].rejected())
789 ++total_length; 795 ++total_length;
790 } 796 }
791 } 797 }
792 } 798 }
793 return total_length; 799 return total_length;
794 } 800 }
795 801
796 // Returns an array of all word confidences, terminated by -1. 802 // Returns an array of all word confidences, terminated by -1.
797 int* TessBaseAPI::AllTextConfidences(PAGE_RES* page_res) { 803 int* TessBaseAPI::AllTextConfidences(PAGE_RES* page_res) {
798 if (!page_res) return NULL; 804 if (!page_res) return NULL;
799 int n_word = 0; 805 int n_word = 0;
800 PAGE_RES_IT res_it(page_res); 806 PAGE_RES_IT res_it(page_res);
801 for (res_it.restart_page(); res_it.word () != NULL; res_it.forward()) 807 for (res_it.restart_page(); res_it.word () != NULL; res_it.forward())
802 n_word++; 808 n_word++;
803 809
804 int* conf = new int[n_word+1]; 810 int* conf = new int[n_word+1];
805 n_word = 0; 811 n_word = 0;
806 for (res_it.restart_page(); res_it.word () != NULL; res_it.forward()) { 812 for (res_it.restart_page(); res_it.word () != NULL; res_it.forward()) {
807 WERD_RES *word = res_it.word(); 813 WERD_RES *word = res_it.word();
808 WERD_CHOICE* choice = word->best_choice; 814 WERD_CHOICE* choice = word->best_choice;
809 int w_conf = static_cast<int>(100 + 5 * choice->certainty()); 815 int w_conf = static_cast<int>(100 + 5 * choice->certainty());
810 // This is the eq for converting Tesseract confidence to 1..100 816 // This is the eq for converting Tesseract confidence to 1..100
811 if (w_conf < 0) w_conf = 0; 817 if (w_conf < 0) w_conf = 0;
812 if (w_conf > 100) w_conf = 100; 818 if (w_conf > 100) w_conf = 100;
813 conf[n_word++] = w_conf; 819 conf[n_word++] = w_conf;
814 } 820 }
815 conf[n_word] = -1; 821 conf[n_word] = -1;
816 return conf; 822 return conf;
817 } 823 }
818 824
819 // Returns the average word confidence for Tesseract page result. 825 // Returns the average word confidence for Tesseract page result.
820 int TessBaseAPI::TextConf(PAGE_RES* page_res) { 826 int TessBaseAPI::TextConf(PAGE_RES* page_res) {
821 int* conf = AllTextConfidences(page_res); 827 int* conf = AllTextConfidences(page_res);
822 if (!conf) return 0; 828 if (!conf) return 0;
823 int sum = 0; 829 int sum = 0;
824 int *pt = conf; 830 int *pt = conf;
825 while (*pt >= 0) sum += *pt++; 831 while (*pt >= 0) sum += *pt++;
826 if (pt != conf) sum /= pt - conf; 832 if (pt != conf) sum /= pt - conf;
827 delete [] conf; 833 delete [] conf;
828 return sum; 834 return sum;
829 } 835 }
830 836
831 // Make a text string from the internal data structures. 837 // Make a text string from the internal data structures.
832 // The input page_res is deleted. 838 // The input page_res is deleted.
833 char* TessBaseAPI::TesseractToText(PAGE_RES* page_res) { 839 char* TessBaseAPI::TesseractToText(PAGE_RES* page_res) {
834 if (page_res != NULL) { 840 if (page_res != NULL) {
835 int total_length = TextLength(page_res); 841 int total_length = TextLength(page_res);
836 PAGE_RES_IT page_res_it(page_res); 842 PAGE_RES_IT page_res_it(page_res);
837 char* result = new char[total_length]; 843 char* result = new char[total_length];
838 char* ptr = result; 844 char* ptr = result;
839 for (page_res_it.restart_page(); page_res_it.word () != NULL; 845 for (page_res_it.restart_page(); page_res_it.word () != NULL;
840 page_res_it.forward()) { 846 page_res_it.forward()) {
841 WERD_RES *word = page_res_it.word(); 847 WERD_RES *word = page_res_it.word();
842 WERD_CHOICE* choice = word->best_choice; 848 WERD_CHOICE* choice = word->best_choice;
843 if (choice != NULL) { 849 if (choice != NULL) {
844 strcpy(ptr, choice->string().string()); 850 strcpy(ptr, choice->string().string());
845 ptr += strlen(ptr); 851 ptr += strlen(ptr);
846 if (word->word->flag(W_EOL)) 852 if (word->word->flag(W_EOL))
847 *ptr++ = '\n'; 853 *ptr++ = '\n';
848 else 854 else
849 *ptr++ = ' '; 855 *ptr++ = ' ';
850 } 856 }
851 } 857 }
852 *ptr++ = '\n'; 858 *ptr++ = '\n';
853 *ptr = '\0'; 859 *ptr = '\0';
854 delete page_res; 860 delete page_res;
855 return result; 861 return result;
856 } 862 }
857 return NULL; 863 return NULL;
858 } 864 }
859 865
860 static int ConvertWordToBoxText(WERD_RES *word, 866 static int ConvertWordToBoxText(WERD_RES *word,
861 ROW_RES* row, 867 ROW_RES* row,
862 int left, 868 int left,
863 int bottom, 869 int bottom,
864 char* word_str) { 870 char* word_str) {
865 // Copy the output word and denormalize it back to image coords. 871 // Copy the output word and denormalize it back to image coords.
866 WERD copy_outword; 872 WERD copy_outword;
867 copy_outword = *(word->outword); 873 copy_outword = *(word->outword);
868 copy_outword.baseline_denormalise(&word->denorm); 874 copy_outword.baseline_denormalise(&word->denorm);
869 PBLOB_IT blob_it; 875 PBLOB_IT blob_it;
870 blob_it.set_to_list(copy_outword.blob_list()); 876 blob_it.set_to_list(copy_outword.blob_list());
871 int length = copy_outword.blob_list()->length(); 877 int length = copy_outword.blob_list()->length();
872 int output_size = 0; 878 int output_size = 0;
873 879
874 if (length > 0) { 880 if (length > 0) {
875 for (int index = 0, offset = 0; index < length; 881 for (int index = 0, offset = 0; index < length;
876 offset += word->best_choice->lengths()[index++], blob_it.forward()) { 882 offset += word->best_choice->lengths()[index++], blob_it.forward()) {
877 PBLOB* blob = blob_it.data(); 883 PBLOB* blob = blob_it.data();
878 TBOX blob_box = blob->bounding_box(); 884 TBOX blob_box = blob->bounding_box();
879 if (word->tess_failed || 885 if (word->tess_failed ||
880 blob_box.left() < 0 || 886 blob_box.left() < 0 ||
881 blob_box.right() > page_image.get_xsize() || 887 blob_box.right() > page_image.get_xsize() ||
882 blob_box.bottom() < 0 || 888 blob_box.bottom() < 0 ||
883 blob_box.top() > page_image.get_ysize()) { 889 blob_box.top() > page_image.get_ysize()) {
884 // Bounding boxes can be illegal when tess fails on a word. 890 // Bounding boxes can be illegal when tess fails on a word.
885 blob_box = word->word->bounding_box(); // Use original word as backup. 891 blob_box = word->word->bounding_box(); // Use original word as backup.
886 tprintf("Using substitute bounding box at (%d,%d)->(%d,%d)\n", 892 tprintf("Using substitute bounding box at (%d,%d)->(%d,%d)\n",
887 blob_box.left(), blob_box.bottom(), 893 blob_box.left(), blob_box.bottom(),
888 blob_box.right(), blob_box.top()); 894 blob_box.right(), blob_box.top());
889 } 895 }
890 896
891 // A single classification unit can be composed of several UTF-8 897 // A single classification unit can be composed of several UTF-8
892 // characters. Append each of them to the result. 898 // characters. Append each of them to the result.
893 for (int sub = 0; sub < word->best_choice->lengths()[index]; ++sub) { 899 for (int sub = 0; sub < word->best_choice->lengths()[index]; ++sub) {
894 char ch = word->best_choice->string()[offset + sub]; 900 char ch = word->best_choice->string()[offset + sub];
895 // Tesseract uses space for recognition failure. Fix to a reject 901 // Tesseract uses space for recognition failure. Fix to a reject
896 // character, '~' so we don't create illegal box files. 902 // character, '~' so we don't create illegal box files.
897 if (ch == ' ') 903 if (ch == ' ')
898 ch = '~'; 904 ch = '~';
899 word_str[output_size++] = ch; 905 word_str[output_size++] = ch;
900 } 906 }
901 sprintf(word_str + output_size, " %d %d %d %d\n", 907 sprintf(word_str + output_size, " %d %d %d %d\n",
902 blob_box.left() + left, blob_box.bottom() + bottom, 908 blob_box.left() + left, blob_box.bottom() + bottom,
903 blob_box.right() + left, blob_box.top() + bottom); 909 blob_box.right() + left, blob_box.top() + bottom);
904 output_size += strlen(word_str + output_size); 910 output_size += strlen(word_str + output_size);
905 } 911 }
906 } 912 }
907 return output_size; 913 return output_size;
908 } 914 }
909 915
910 // Multiplier for textlength assumes 4 numbers @ 5 digits and a space 916 // Multiplier for textlength assumes 4 numbers @ 5 digits and a space
911 // plus the newline and the orginial character = 4*(5+1)+2 917 // plus the newline and the orginial character = 4*(5+1)+2
912 const int kMaxCharsPerChar = 26; 918 const int kMaxCharsPerChar = 26;
913 919
914 // Make a text string from the internal data structures. 920 // Make a text string from the internal data structures.
915 // The input page_res is deleted. 921 // The input page_res is deleted.
916 // The text string takes the form of a box file as needed for training. 922 // The text string takes the form of a box file as needed for training.
917 char* TessBaseAPI::TesseractToBoxText(PAGE_RES* page_res, 923 char* TessBaseAPI::TesseractToBoxText(PAGE_RES* page_res,
918 int left, int bottom) { 924 int left, int bottom) {
919 if (page_res != NULL) { 925 if (page_res != NULL) {
920 int total_length = TextLength(page_res) * kMaxCharsPerChar; 926 int total_length = TextLength(page_res) * kMaxCharsPerChar;
921 PAGE_RES_IT page_res_it(page_res); 927 PAGE_RES_IT page_res_it(page_res);
922 char* result = new char[total_length]; 928 char* result = new char[total_length];
923 char* ptr = result; 929 char* ptr = result;
924 for (page_res_it.restart_page(); page_res_it.word () != NULL; 930 for (page_res_it.restart_page(); page_res_it.word () != NULL;
925 page_res_it.forward()) { 931 page_res_it.forward()) {
926 WERD_RES *word = page_res_it.word(); 932 WERD_RES *word = page_res_it.word();
927 ptr += ConvertWordToBoxText(word,page_res_it.row(),left, bottom, ptr); 933 ptr += ConvertWordToBoxText(word,page_res_it.row(),left, bottom, ptr);
928 } 934 }
929 *ptr = '\0'; 935 *ptr = '\0';
930 delete page_res; 936 delete page_res;
931 return result; 937 return result;
932 } 938 }
933 return NULL; 939 return NULL;
934 } 940 }
935 941
936 // Make a text string from the internal data structures. 942 // Make a text string from the internal data structures.
937 // The input page_res is deleted. The text string is converted 943 // The input page_res is deleted. The text string is converted
938 // to UNLV-format: Latin-1 with specific reject and suspect codes. 944 // to UNLV-format: Latin-1 with specific reject and suspect codes.
939 const char kUnrecognized = '~'; 945 const char kUnrecognized = '~';
940 // Conversion table for non-latin characters. 946 // Conversion table for non-latin characters.
941 // Maps characters out of the latin set into the latin set. 947 // Maps characters out of the latin set into the latin set.
942 // TODO(rays) incorporate this translation into unicharset. 948 // TODO(rays) incorporate this translation into unicharset.
943 const int kUniChs[] = { 949 const int kUniChs[] = {
944 0x20ac, 0x201c, 0x201d, 0x2018, 0x2019, 0x2022, 0x2014, 0 950 0x20ac, 0x201c, 0x201d, 0x2018, 0x2019, 0x2022, 0x2014, 0
945 }; 951 };
946 // Latin chars corresponding to the unicode chars above. 952 // Latin chars corresponding to the unicode chars above.
947 const int kLatinChs[] = { 953 const int kLatinChs[] = {
948 0x00a2, 0x0022, 0x0022, 0x0027, 0x0027, 0x00b7, 0x002d, 0 954 0x00a2, 0x0022, 0x0022, 0x0027, 0x0027, 0x00b7, 0x002d, 0
949 }; 955 };
950 956
951 char* TessBaseAPI::TesseractToUNLV(PAGE_RES* page_res) { 957 char* TessBaseAPI::TesseractToUNLV(PAGE_RES* page_res) {
952 bool tilde_crunch_written = false; 958 bool tilde_crunch_written = false;
953 bool last_char_was_newline = true; 959 bool last_char_was_newline = true;
954 bool last_char_was_tilde = false; 960 bool last_char_was_tilde = false;
955 961
956 if (page_res != NULL) { 962 if (page_res != NULL) {
957 int total_length = TextLength(page_res); 963 int total_length = TextLength(page_res);
958 PAGE_RES_IT page_res_it(page_res); 964 PAGE_RES_IT page_res_it(page_res);
959 char* result = new char[total_length]; 965 char* result = new char[total_length];
960 char* ptr = result; 966 char* ptr = result;
961 for (page_res_it.restart_page(); page_res_it.word () != NULL; 967 for (page_res_it.restart_page(); page_res_it.word () != NULL;
962 page_res_it.forward()) { 968 page_res_it.forward()) {
963 WERD_RES *word = page_res_it.word(); 969 WERD_RES *word = page_res_it.word();
964 // Process the current word. 970 // Process the current word.
965 if (word->unlv_crunch_mode != CR_NONE) { 971 if (word->unlv_crunch_mode != CR_NONE) {
966 if (word->unlv_crunch_mode != CR_DELETE && 972 if (word->unlv_crunch_mode != CR_DELETE &&
967 (!tilde_crunch_written || 973 (!tilde_crunch_written ||
968 (word->unlv_crunch_mode == CR_KEEP_SPACE && 974 (word->unlv_crunch_mode == CR_KEEP_SPACE &&
969 word->word->space () > 0 && 975 word->word->space () > 0 &&
970 !word->word->flag (W_FUZZY_NON) && 976 !word->word->flag (W_FUZZY_NON) &&
971 !word->word->flag (W_FUZZY_SP)))) { 977 !word->word->flag (W_FUZZY_SP)))) {
972 if (!word->word->flag (W_BOL) && 978 if (!word->word->flag (W_BOL) &&
973 word->word->space () > 0 && 979 word->word->space () > 0 &&
974 !word->word->flag (W_FUZZY_NON) && 980 !word->word->flag (W_FUZZY_NON) &&
975 !word->word->flag (W_FUZZY_SP)) { 981 !word->word->flag (W_FUZZY_SP)) {
976 /* Write a space to separate from preceeding good text */ 982 /* Write a space to separate from preceeding good text */
977 *ptr++ = ' '; 983 *ptr++ = ' ';
978 last_char_was_tilde = false; 984 last_char_was_tilde = false;
979 } 985 }
980 if (!last_char_was_tilde) { 986 if (!last_char_was_tilde) {
981 // Write a reject char. 987 // Write a reject char.
982 last_char_was_tilde = true; 988 last_char_was_tilde = true;
983 *ptr++ = kUnrecognized; 989 *ptr++ = kUnrecognized;
984 tilde_crunch_written = true; 990 tilde_crunch_written = true;
985 last_char_was_newline = false; 991 last_char_was_newline = false;
986 } 992 }
987 } 993 }
988 } else { 994 } else {
989 // NORMAL PROCESSING of non tilde crunched words. 995 // NORMAL PROCESSING of non tilde crunched words.
990 tilde_crunch_written = false; 996 tilde_crunch_written = false;
991 997
992 if (last_char_was_tilde && 998 if (last_char_was_tilde &&
993 word->word->space () == 0 && 999 word->word->space () == 0 &&
994 (word->best_choice->string ()[0] == ' ')) { 1000 (word->best_choice->string ()[0] == ' ')) {
995 /* Prevent adjacent tilde across words - we know that adjacent tildes within 1001 /* Prevent adjacent tilde across words - we know that adjacent tildes within
996 words have been removed */ 1002 words have been removed */
997 char* p = (char *) word->best_choice->string().string (); 1003 char* p = (char *) word->best_choice->string().string ();
998 strcpy (p, p + 1); //shuffle up 1004 strcpy (p, p + 1); //shuffle up
999 p = (char *) word->best_choice->lengths().string (); 1005 p = (char *) word->best_choice->lengths().string ();
1000 strcpy (p, p + 1); //shuffle up 1006 strcpy (p, p + 1); //shuffle up
1001 word->reject_map.remove_pos (0); 1007 word->reject_map.remove_pos (0);
1002 PBLOB_IT blob_it = word->outword->blob_list (); 1008 PBLOB_IT blob_it = word->outword->blob_list ();
1003 delete blob_it.extract (); //get rid of reject blob 1009 delete blob_it.extract (); //get rid of reject blob
1004 } 1010 }
1005 1011
1006 if (word->word->flag(W_REP_CHAR) && tessedit_consistent_reps) 1012 if (word->word->flag(W_REP_CHAR) && tessedit_consistent_reps)
1007 ensure_rep_chars_are_consistent(word); 1013 ensure_rep_chars_are_consistent(word);
1008 1014
1009 set_unlv_suspects(word); 1015 set_unlv_suspects(word);
1010 const char* wordstr = word->best_choice->string().string(); 1016 const char* wordstr = word->best_choice->string().string();
1011 if (wordstr[0] != 0) { 1017 if (wordstr[0] != 0) {
1012 if (!last_char_was_newline) 1018 if (!last_char_was_newline)
1013 *ptr++ = ' '; 1019 *ptr++ = ' ';
1014 else 1020 else
1015 last_char_was_newline = false; 1021 last_char_was_newline = false;
1016 int offset = 0; 1022 int offset = 0;
1017 const STRING& lengths = word->best_choice->lengths(); 1023 const STRING& lengths = word->best_choice->lengths();
1018 int length = lengths.length(); 1024 int length = lengths.length();
1019 for (int i = 0; i < length; offset += lengths[i++]) { 1025 for (int i = 0; i < length; offset += lengths[i++]) {
1020 if (wordstr[offset] == ' ' || 1026 if (wordstr[offset] == ' ' ||
1021 wordstr[offset] == '~' || 1027 wordstr[offset] == '~' ||
1022 wordstr[offset] == '|') { 1028 wordstr[offset] == '|') {
1023 *ptr++ = kUnrecognized; 1029 *ptr++ = kUnrecognized;
1024 last_char_was_tilde = true; 1030 last_char_was_tilde = true;
1025 } else { 1031 } else {
1026 if (word->reject_map[i].rejected()) 1032 if (word->reject_map[i].rejected())
1027 *ptr++ = '^'; 1033 *ptr++ = '^';
1028 UNICHAR ch(wordstr + offset, lengths[i]); 1034 UNICHAR ch(wordstr + offset, lengths[i]);
1029 int uni_ch = ch.first_uni(); 1035 int uni_ch = ch.first_uni();
1030 for (int j = 0; kUniChs[j] != 0; ++j) { 1036 for (int j = 0; kUniChs[j] != 0; ++j) {
1031 if (kUniChs[j] == uni_ch) { 1037 if (kUniChs[j] == uni_ch) {
1032 uni_ch = kLatinChs[j]; 1038 uni_ch = kLatinChs[j];
1033 break; 1039 break;
1034 } 1040 }
1035 } 1041 }
1036 if (uni_ch <= 0xff) { 1042 if (uni_ch <= 0xff) {
1037 *ptr++ = static_cast<char>(uni_ch); 1043 *ptr++ = static_cast<char>(uni_ch);
1038 last_char_was_tilde = false; 1044 last_char_was_tilde = false;
1039 } else { 1045 } else {
1040 *ptr++ = kUnrecognized; 1046 *ptr++ = kUnrecognized;
1041 last_char_was_tilde = true; 1047 last_char_was_tilde = true;
1042 } 1048 }
1043 } 1049 }
1044 } 1050 }
1045 } 1051 }
1046 } 1052 }
1047 if (word->word->flag(W_EOL) && !last_char_was_newline) { 1053 if (word->word->flag(W_EOL) && !last_char_was_newline) {
1048 /* Add a new line output */ 1054 /* Add a new line output */
1049 *ptr++ = '\n'; 1055 *ptr++ = '\n';
1050 tilde_crunch_written = false; 1056 tilde_crunch_written = false;
1051 last_char_was_newline = true; 1057 last_char_was_newline = true;
1052 last_char_was_tilde = false; 1058 last_char_was_tilde = false;
1053 } 1059 }
1054 } 1060 }
1055 *ptr++ = '\n'; 1061 *ptr++ = '\n';
1056 *ptr = '\0'; 1062 *ptr = '\0';
1057 delete page_res; 1063 delete page_res;
1058 return result; 1064 return result;
1059 } 1065 }
1060 return NULL; 1066 return NULL;
1061 } 1067 }
1062 // ____________________________________________________________________________ 1068 // ____________________________________________________________________________
1063 // Ocropus add-ons. 1069 // Ocropus add-ons.
1064 1070
1065 // Find lines from the image making the BLOCK_LIST. 1071 // Find lines from the image making the BLOCK_LIST.
1066 BLOCK_LIST* TessBaseAPI::FindLinesCreateBlockList() { 1072 BLOCK_LIST* TessBaseAPI::FindLinesCreateBlockList() {
1067 BLOCK_LIST *block_list = new BLOCK_LIST(); 1073 BLOCK_LIST *block_list = new BLOCK_LIST();
1068 FindLines(block_list); 1074 FindLines(block_list);
1069 return block_list; 1075 return block_list;
1070 } 1076 }
1071 1077
1072 // Delete a block list. 1078 // Delete a block list.
1073 // This is to keep BLOCK_LIST pointer opaque 1079 // This is to keep BLOCK_LIST pointer opaque
1074 // and let go of including the other headers. 1080 // and let go of including the other headers.
1075 void TessBaseAPI::DeleteBlockList(BLOCK_LIST *block_list) { 1081 void TessBaseAPI::DeleteBlockList(BLOCK_LIST *block_list) {
1076 delete block_list; 1082 delete block_list;
1077 } 1083 }
1078 1084
1079 1085
1080 static ROW *make_tess_ocrrow(float baseline, 1086 static ROW *make_tess_ocrrow(float baseline,
1081 float xheight, 1087 float xheight,
1082 float descender, 1088 float descender,
1083 float ascender) { 1089 float ascender) {
1084 inT32 xstarts[] = {-32000}; 1090 inT32 xstarts[] = {-32000};
1085 double quad_coeffs[] = {0,0,baseline}; 1091 double quad_coeffs[] = {0,0,baseline};
1086 return new ROW(1, 1092 return new ROW(1,
1087 xstarts, 1093 xstarts,
1088 quad_coeffs, 1094 quad_coeffs,
1089 xheight, 1095 xheight,
1090 ascender - (baseline + xheight), 1096 ascender - (baseline + xheight),
1091 descender - baseline, 1097 descender - baseline,
1092 0, 1098 0,
1093 0); 1099 0);
1094 } 1100 }
1095 1101
1096 // Almost a copy of make_tess_row() from ccmain/tstruct.cpp. 1102 // Almost a copy of make_tess_row() from ccmain/tstruct.cpp.
1097 static void fill_dummy_row(float baseline, float xheight, 1103 static void fill_dummy_row(float baseline, float xheight,
1098 float descender, float ascender, 1104 float descender, float ascender,
1099 TEXTROW* tessrow) { 1105 TEXTROW* tessrow) {
1100 tessrow->baseline.segments = 1; 1106 tessrow->baseline.segments = 1;
1101 tessrow->baseline.xstarts[0] = -32767; 1107 tessrow->baseline.xstarts[0] = -32767;
1102 tessrow->baseline.xstarts[1] = 32767; 1108 tessrow->baseline.xstarts[1] = 32767;
1103 tessrow->baseline.quads[0].a = 0; 1109 tessrow->baseline.quads[0].a = 0;
1104 tessrow->baseline.quads[0].b = 0; 1110 tessrow->baseline.quads[0].b = 0;
1105 tessrow->baseline.quads[0].c = bln_baseline_offset; 1111 tessrow->baseline.quads[0].c = bln_baseline_offset;
1106 tessrow->xheight.segments = 1; 1112 tessrow->xheight.segments = 1;
1107 tessrow->xheight.xstarts[0] = -32767; 1113 tessrow->xheight.xstarts[0] = -32767;
1108 tessrow->xheight.xstarts[1] = 32767; 1114 tessrow->xheight.xstarts[1] = 32767;
1109 tessrow->xheight.quads[0].a = 0; 1115 tessrow->xheight.quads[0].a = 0;
1110 tessrow->xheight.quads[0].b = 0; 1116 tessrow->xheight.quads[0].b = 0;
1111 tessrow->xheight.quads[0].c = bln_baseline_offset + bln_x_height; 1117 tessrow->xheight.quads[0].c = bln_baseline_offset + bln_x_height;
1112 tessrow->lineheight = bln_x_height; 1118 tessrow->lineheight = bln_x_height;
1113 tessrow->ascrise = bln_x_height * (ascender - (xheight + baseline)) / xheight; 1119 tessrow->ascrise = bln_x_height * (ascender - (xheight + baseline)) / xheight;
1114 tessrow->descdrop = bln_x_height * (descender - baseline) / xheight; 1120 tessrow->descdrop = bln_x_height * (descender - baseline) / xheight;
1115 } 1121 }
1116 1122
1117 1123
1118 /// Return a TBLOB * from the whole page_image. 1124 /// Return a TBLOB * from the whole page_image.
1119 /// To be freed later with free_blob(). 1125 /// To be freed later with free_blob().
1120 TBLOB *make_tesseract_blob(float baseline, float xheight, float descender, float ascender) { 1126 TBLOB *make_tesseract_blob(float baseline, float xheight, float descender, float ascender) {
1121 BLOCK *block = new BLOCK ("a character", 1127 BLOCK *block = new BLOCK ("a character",
1122 TRUE, 1128 TRUE,
1123 0, 0, 1129 0, 0,
1124 0, 0, 1130 0, 0,
1125 page_image.get_xsize(), 1131 page_image.get_xsize(),
1126 page_image.get_ysize()); 1132 page_image.get_ysize());
1127 1133
1128 // Create C_BLOBs from the page 1134 // Create C_BLOBs from the page
1129 extract_edges(NULL, &page_image, &page_image, 1135 extract_edges(NULL, &page_image, &page_image,
1130 ICOORD(page_image.get_xsize(), page_image.get_ysize()), 1136 ICOORD(page_image.get_xsize(), page_image.get_ysize()),
1131 block); 1137 block);
1132 1138
1133 // Create one PBLOB from all C_BLOBs 1139 // Create one PBLOB from all C_BLOBs
1134 C_BLOB_LIST *list = block->blob_list(); 1140 C_BLOB_LIST *list = block->blob_list();
1135 C_BLOB_IT c_blob_it(list); 1141 C_BLOB_IT c_blob_it(list);
1136 PBLOB *pblob = new PBLOB; // will be (hopefully) deleted by the pblob_list 1142 PBLOB *pblob = new PBLOB; // will be (hopefully) deleted by the pblob_list
1137 for (c_blob_it.mark_cycle_pt(); 1143 for (c_blob_it.mark_cycle_pt();
1138 !c_blob_it.cycled_list(); 1144 !c_blob_it.cycled_list();
1139 c_blob_it.forward()) { 1145 c_blob_it.forward()) {
1140 C_BLOB *c_blob = c_blob_it.data(); 1146 C_BLOB *c_blob = c_blob_it.data();
1141 PBLOB c_as_p(c_blob, baseline + xheight); 1147 PBLOB c_as_p(c_blob, baseline + xheight);
1142 merge_blobs(pblob, &c_as_p); 1148 merge_blobs(pblob, &c_as_p);
1143 } 1149 }
1144 PBLOB_LIST *pblob_list = new PBLOB_LIST; // will be deleted by the word 1150 PBLOB_LIST *pblob_list = new PBLOB_LIST; // will be deleted by the word
1145 PBLOB_IT pblob_it(pblob_list); 1151 PBLOB_IT pblob_it(pblob_list);
1146 pblob_it.add_after_then_move(pblob); 1152 pblob_it.add_after_then_move(pblob);
1147 1153
1148 // Normalize PBLOB 1154 // Normalize PBLOB
1149 WERD word(pblob_list, 0, " "); 1155 WERD word(pblob_list, 0, " ");
1150 ROW *row = make_tess_ocrrow(baseline, xheight, descender, ascender); 1156 ROW *row = make_tess_ocrrow(baseline, xheight, descender, ascender);
1151 word.baseline_normalise(row); 1157 word.baseline_normalise(row);
1152 delete row; 1158 delete row;
1153 1159
1154 // Create a TBLOB from PBLOB 1160 // Create a TBLOB from PBLOB
1155 return make_tess_blob(pblob, /* flatten: */ TRUE); 1161 return make_tess_blob(pblob, /* flatten: */ TRUE);
1156 } 1162 }
1157 1163
1158 1164
1159 // Adapt to recognize the current image as the given character. 1165 // Adapt to recognize the current image as the given character.
1160 // The image must be preloaded and be just an image of a single character. 1166 // The image must be preloaded and be just an image of a single character.
1161 void TessBaseAPI::AdaptToCharacter(const char *unichar_repr, 1167 void TessBaseAPI::AdaptToCharacter(const char *unichar_repr,
1162 int length, 1168 int length,
1163 float baseline, 1169 float baseline,
1164 float xheight, 1170 float xheight,
1165 float descender, 1171 float descender,
1166 float ascender) { 1172 float ascender) {
1167 UNICHAR_ID id = unicharset.unichar_to_id(unichar_repr, length); 1173 UNICHAR_ID id = unicharset.unichar_to_id(unichar_repr, length);
1168 LINE_STATS LineStats; 1174 LINE_STATS LineStats;
1169 TEXTROW row; 1175 TEXTROW row;
1170 fill_dummy_row(baseline, xheight, descender, ascender, &row); 1176 fill_dummy_row(baseline, xheight, descender, ascender, &row);
1171 GetLineStatsFromRow(&row, &LineStats); 1177 GetLineStatsFromRow(&row, &LineStats);
1172 1178
1173 TBLOB *blob = make_tesseract_blob(baseline, xheight, descender, ascender); 1179 TBLOB *blob = make_tesseract_blob(baseline, xheight, descender, ascender);
1174 float threshold; 1180 float threshold;
1175 int best_class = 0; 1181 int best_class = 0;
1176 float best_rating = -100; 1182 float best_rating = -100;
1177 1183
1178 1184
1179 // Classify to get a raw choice. 1185 // Classify to get a raw choice.
1180 LIST result = AdaptiveClassifier(blob, NULL, &row); 1186 LIST result = AdaptiveClassifier(blob, NULL, &row);
1181 LIST p; 1187 LIST p;
1182 for (p = result; p != NULL; p = p->next) { 1188 for (p = result; p != NULL; p = p->next) {
1183 A_CHOICE *tesschoice = (A_CHOICE *) p->node; 1189 A_CHOICE *tesschoice = (A_CHOICE *) p->node;
1184 if (tesschoice->rating > best_rating) { 1190 if (tesschoice->rating > best_rating) {
1185 best_rating = tesschoice->rating; 1191 best_rating = tesschoice->rating;
1186 best_class = tesschoice->string[0]; 1192 best_class = tesschoice->string[0];
1187 } 1193 }
1188 } 1194 }
1189 1195
1190 FLOAT32 GetBestRatingFor(TBLOB *Blob, LINE_STATS *LineStats, CLASS_ID ClassId); 1196 FLOAT32 GetBestRatingFor(TBLOB *Blob, LINE_STATS *LineStats, CLASS_ID ClassId);
1191 1197
1192 // We have to use char-level adaptation because otherwise 1198 // We have to use char-level adaptation because otherwise
1193 // someone should do forced alignment somewhere. 1199 // someone should do forced alignment somewhere.
1194 void AdaptToChar(TBLOB *Blob, 1200 void AdaptToChar(TBLOB *Blob,
1195 LINE_STATS *LineStats, 1201 LINE_STATS *LineStats,
1196 CLASS_ID ClassId, 1202 CLASS_ID ClassId,
1197 FLOAT32 Threshold); 1203 FLOAT32 Threshold);
1198 1204
1199 1205
1200 if (id == best_class) 1206 if (id == best_class)
1201 threshold = GoodAdaptiveMatch; 1207 threshold = GoodAdaptiveMatch;
1202 else { 1208 else {
1203 /* the blob was incorrectly classified - find the rating threshold 1209 /* the blob was incorrectly classified - find the rating threshold
1204 needed to create a template which will correct the error with 1210 needed to create a template which will correct the error with
1205 some margin. However, don't waste time trying to make 1211 some margin. However, don't waste time trying to make
1206 templates which are too tight. */ 1212 templates which are too tight. */
1207 threshold = GetBestRatingFor(blob, &LineStats, id); 1213 threshold = GetBestRatingFor(blob, &LineStats, id);
1208 threshold *= .9; 1214 threshold *= .9;
1209 const float max_threshold = .125; 1215 const float max_threshold = .125;
1210 const float min_threshold = .02; 1216 const float min_threshold = .02;
1211 1217
1212 if (threshold > max_threshold) 1218 if (threshold > max_threshold)
1213 threshold = max_threshold; 1219 threshold = max_threshold;
1214 1220
1215 // I have cuddled the following line to set it out of the strike 1221 // I have cuddled the following line to set it out of the strike
1216 // of the coverage testing tool. I have no idea how to trigger 1222 // of the coverage testing tool. I have no idea how to trigger
1217 // this situation nor I have any necessity to do it. --mezhirov 1223 // this situation nor I have any necessity to do it. --mezhirov
1218 if (threshold < min_threshold) threshold = min_threshold; 1224 if (threshold < min_threshold) threshold = min_threshold;
1219 } 1225 }
1220 1226
1221 if (blob->outlines) 1227 if (blob->outlines)
1222 AdaptToChar(blob, &LineStats, id, threshold); 1228 AdaptToChar(blob, &LineStats, id, threshold);
1223 free_blob(blob); 1229 free_blob(blob);
1224 } 1230 }
1225 1231
1226 1232
1227 PAGE_RES* TessBaseAPI::RecognitionPass1(BLOCK_LIST* block_list) { 1233 PAGE_RES* TessBaseAPI::RecognitionPass1(BLOCK_LIST* block_list) {
1228 PAGE_RES *page_res = new PAGE_RES(block_list); 1234 PAGE_RES *page_res = new PAGE_RES(block_list);
1229 recog_all_words(page_res, NULL, NULL, 1); 1235 recog_all_words(page_res, NULL, NULL, 1);
1230 return page_res; 1236 return page_res;
1231 } 1237 }
1232 1238
1233 PAGE_RES* TessBaseAPI::RecognitionPass2(BLOCK_LIST* block_list, 1239 PAGE_RES* TessBaseAPI::RecognitionPass2(BLOCK_LIST* block_list,
1234 PAGE_RES* pass1_result) { 1240 PAGE_RES* pass1_result) {
1235 if (!pass1_result) 1241 if (!pass1_result)
1236 pass1_result = new PAGE_RES(block_list); 1242 pass1_result = new PAGE_RES(block_list);
1237 recog_all_words(pass1_result, NULL, NULL, 2); 1243 recog_all_words(pass1_result, NULL, NULL, 2);
1238 return pass1_result; 1244 return pass1_result;
1239 } 1245 }
1240 1246
1241 // brief Get a bounding box of a PBLOB. 1247 // brief Get a bounding box of a PBLOB.
1242 // TODO(mezhirov) delete this function and replace with blob->bounding_box() 1248 // TODO(mezhirov) delete this function and replace with blob->bounding_box()
1243 static TBOX pblob_get_bbox(PBLOB *blob) { 1249 static TBOX pblob_get_bbox(PBLOB *blob) {
1244 OUTLINE_LIST *outlines = blob->out_list(); 1250 OUTLINE_LIST *outlines = blob->out_list();
1245 OUTLINE_IT it(outlines); 1251 OUTLINE_IT it(outlines);
1246 TBOX result; 1252 TBOX result;
1247 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { 1253 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1248 OUTLINE *outline = it.data(); 1254 OUTLINE *outline = it.data();
1249 outline->compute_bb(); 1255 outline->compute_bb();
1250 result.bounding_union(outline->bounding_box()); 1256 result.bounding_union(outline->bounding_box());
1251 } 1257 }
1252 return result; 1258 return result;
1253 } 1259 }
1254 1260
1255 // TODO(mezhirov) delete this function and replace with word->bounding_box() 1261 // TODO(mezhirov) delete this function and replace with word->bounding_box()
1256 static TBOX c_blob_list_get_bbox(C_BLOB_LIST *cblobs) { 1262 static TBOX c_blob_list_get_bbox(C_BLOB_LIST *cblobs) {
1257 TBOX result; 1263 TBOX result;
1258 C_BLOB_IT c_it(cblobs); 1264 C_BLOB_IT c_it(cblobs);
1259 for (c_it.mark_cycle_pt(); !c_it.cycled_list(); c_it.forward()) { 1265 for (c_it.mark_cycle_pt(); !c_it.cycled_list(); c_it.forward()) {
1260 C_BLOB *blob = c_it.data(); 1266 C_BLOB *blob = c_it.data();
1261 //bboxes.push(tessy_rectangle(blob->bounding_box())); 1267 //bboxes.push(tessy_rectangle(blob->bounding_box()));
1262 result.bounding_union(blob->bounding_box()); 1268 result.bounding_union(blob->bounding_box());
1263 } 1269 }
1264 return result; 1270 return result;
1265 } 1271 }
1266 1272
1267 struct TESS_CHAR : ELIST_LINK { 1273 struct TESS_CHAR : ELIST_LINK {
1268 char *unicode_repr; 1274 char *unicode_repr;
1269 int length; // of unicode_repr 1275 int length; // of unicode_repr
1270 float cost; 1276 float cost;
1271 TBOX box; 1277 TBOX box;
1272 1278
1273 TESS_CHAR(float _cost, const char *repr, int len = -1) : cost(_cost) { 1279 TESS_CHAR(float _cost, const char *repr, int len = -1) : cost(_cost) {
1274 length = (len == -1 ? strlen(repr) : len); 1280 length = (len == -1 ? strlen(repr) : len);
1275 unicode_repr = new char[length + 1]; 1281 unicode_repr = new char[length + 1];
1276 strncpy(unicode_repr, repr, length); 1282 strncpy(unicode_repr, repr, length);
1277 } 1283 }
1278 1284
1279 ~TESS_CHAR() { 1285 ~TESS_CHAR() {
1280 delete unicode_repr; 1286 delete unicode_repr;
1281 } 1287 }
1282 }; 1288 };
1283 1289
1284 1290
1285 static void add_space(ELIST_ITERATOR *it) { 1291 static void add_space(ELIST_ITERATOR *it) {
1286 TESS_CHAR *t = new TESS_CHAR(0, " "); 1292 TESS_CHAR *t = new TESS_CHAR(0, " ");
1287 it->add_after_then_move(t); 1293 it->add_after_then_move(t);
1288 } 1294 }
1289 1295
1290 1296
1291 static float rating_to_cost(float rating) { 1297 static float rating_to_cost(float rating) {
1292 rating = 100 + rating; 1298 rating = 100 + rating;
1293 // cuddled that to save from coverage profiler 1299 // cuddled that to save from coverage profiler
1294 // (I have never seen ratings worse than -100, 1300 // (I have never seen ratings worse than -100,
1295 // but the check won't hurt) 1301 // but the check won't hurt)
1296 if (rating < 0) rating = 0; 1302 if (rating < 0) rating = 0;
1297 return rating; 1303 return rating;
1298 } 1304 }
1299 1305
1300 1306
1301 // Extract the OCR results, costs (penalty points for uncertainty), 1307 // Extract the OCR results, costs (penalty points for uncertainty),
1302 // and the bounding boxes of the characters. 1308 // and the bounding boxes of the characters.
1303 static void extract_result(ELIST_ITERATOR *out, 1309 static void extract_result(ELIST_ITERATOR *out,
1304 PAGE_RES* page_res) { 1310 PAGE_RES* page_res) {
1305 PAGE_RES_IT page_res_it(page_res); 1311 PAGE_RES_IT page_res_it(page_res);
1306 int word_count = 0; 1312 int word_count = 0;
1307 while (page_res_it.word() != NULL) { 1313 while (page_res_it.word() != NULL) {
1308 WERD_RES *word = page_res_it.word(); 1314 WERD_RES *word = page_res_it.word();
1309 const char *str = word->best_choice->string().string(); 1315 const char *str = word->best_choice->string().string();
1310 const char *len = word->best_choice->lengths().string(); 1316 const char *len = word->best_choice->lengths().string();
1311 1317
1312 if (word_count) 1318 if (word_count)
1313 add_space(out); 1319 add_space(out);
1314 TBOX bln_rect; 1320 TBOX bln_rect;
1315 PBLOB_LIST *blobs = word->outword->blob_list(); 1321 PBLOB_LIST *blobs = word->outword->blob_list();
1316 PBLOB_IT it(blobs); 1322 PBLOB_IT it(blobs);
1317 int n = strlen(len); 1323 int n = strlen(len);
1318 TBOX** boxes_to_fix = new TBOX*[n]; 1324 TBOX** boxes_to_fix = new TBOX*[n];
1319 for (int i = 0; i < n; i++) { 1325 for (int i = 0; i < n; i++) {
1320 PBLOB *blob = it.data(); 1326 PBLOB *blob = it.data();
1321 TBOX current = pblob_get_bbox(blob); 1327 TBOX current = pblob_get_bbox(blob);
1322 bln_rect.bounding_union(current); 1328 bln_rect.bounding_union(current);
1323 1329
1324 TESS_CHAR *tc = new TESS_CHAR(rating_to_cost(word->best_choice->rating()), 1330 TESS_CHAR *tc = new TESS_CHAR(rating_to_cost(word->best_choice->rating()),
1325 str, *len); 1331 str, *len);
1326 tc->box = current; 1332 tc->box = current;
1327 boxes_to_fix[i] = &tc->box; 1333 boxes_to_fix[i] = &tc->box;
1328 1334
1329 out->add_after_then_move(tc); 1335 out->add_after_then_move(tc);
1330 it.forward(); 1336 it.forward();
1331 str += *len; 1337 str += *len;
1332 len++; 1338 len++;
1333 } 1339 }
1334 1340
1335 // Find the word bbox before normalization. 1341 // Find the word bbox before normalization.
1336 // Here we can't use the C_BLOB bboxes directly, 1342 // Here we can't use the C_BLOB bboxes directly,
1337 // since connected letters are not yet cut. 1343 // since connected letters are not yet cut.
1338 TBOX real_rect = c_blob_list_get_bbox(word->word->cblob_list()); 1344 TBOX real_rect = c_blob_list_get_bbox(word->word->cblob_list());
1339 1345
1340 // Denormalize boxes by transforming the bbox of the whole bln word 1346 // Denormalize boxes by transforming the bbox of the whole bln word
1341 // into the denorm bbox (`real_rect') of the whole word. 1347 // into the denorm bbox (`real_rect') of the whole word.
1342 double x_stretch = double(real_rect.width()) / bln_rect.width(); 1348 double x_stretch = double(real_rect.width()) / bln_rect.width();
1343 double y_stretch = double(real_rect.height()) / bln_rect.height(); 1349 double y_stretch = double(real_rect.height()) / bln_rect.height();
1344 for (int j = 0; j < n; j++) { 1350 for (int j = 0; j < n; j++) {
1345 TBOX *box = boxes_to_fix[j]; 1351 TBOX *box = boxes_to_fix[j];
1346 int x0 = int(real_rect.left() + 1352 int x0 = int(real_rect.left() +
1347 x_stretch * (box->left() - bln_rect.left()) + 0.5); 1353 x_stretch * (box->left() - bln_rect.left()) + 0.5);
1348 int x1 = int(real_rect.left() + 1354 int x1 = int(real_rect.left() +
1349 x_stretch * (box->right() - bln_rect.left()) + 0.5); 1355 x_stretch * (box->right() - bln_rect.left()) + 0.5);
1350 int y0 = int(real_rect.bottom() + 1356 int y0 = int(real_rect.bottom() +
1351 y_stretch * (box->bottom() - bln_rect.bottom()) + 0.5); 1357 y_stretch * (box->bottom() - bln_rect.bottom()) + 0.5);
1352 int y1 = int(real_rect.bottom() + 1358 int y1 = int(real_rect.bottom() +
1353 y_stretch * (box->top() - bln_rect.bottom()) + 0.5); 1359 y_stretch * (box->top() - bln_rect.bottom()) + 0.5);
1354 *box = TBOX(ICOORD(x0, y0), ICOORD(x1, y1)); 1360 *box = TBOX(ICOORD(x0, y0), ICOORD(x1, y1));
1355 } 1361 }
1356 delete [] boxes_to_fix; 1362 delete [] boxes_to_fix;
1357 1363
1358 page_res_it.forward(); 1364 page_res_it.forward();
1359 word_count++; 1365 word_count++;
1360 } 1366 }
1361 } 1367 }
1362 1368
1363 1369
1364 // Extract the OCR results, costs (penalty points for uncertainty), 1370 // Extract the OCR results, costs (penalty points for uncertainty),
1365 // and the bounding boxes of the characters. 1371 // and the bounding boxes of the characters.
1366 int TessBaseAPI::TesseractExtractResult(char** string, 1372 int TessBaseAPI::TesseractExtractResult(char** string,
1367 int** lengths, 1373 int** lengths,
1368 float** costs, 1374 float** costs,
1369 int** x0, 1375 int** x0,
1370 int** y0, 1376 int** y0,
1371 int** x1, 1377 int** x1,
1372 int** y1, 1378 int** y1,
1373 PAGE_RES* page_res) { 1379 PAGE_RES* page_res) {
1374 ELIST tess_chars; 1380 ELIST tess_chars;
1375 ELIST_ITERATOR tess_chars_it(&tess_chars); 1381 ELIST_ITERATOR tess_chars_it(&tess_chars);
1376 extract_result(&tess_chars_it, page_res); 1382 extract_result(&tess_chars_it, page_res);
1377 tess_chars_it.move_to_first(); 1383 tess_chars_it.move_to_first();
1378 int n = tess_chars.length(); 1384 int n = tess_chars.length();
1379 int string_len = 0; 1385 int string_len = 0;
1380 *lengths = new int[n]; 1386 *lengths = new int[n];
1381 *costs = new float[n]; 1387 *costs = new float[n];
1382 *x0 = new int[n]; 1388 *x0 = new int[n];
1383 *y0 = new int[n]; 1389 *y0 = new int[n];
1384 *x1 = new int[n]; 1390 *x1 = new int[n];
1385 *y1 = new int[n]; 1391 *y1 = new int[n];
1386 int i = 0; 1392 int i = 0;
1387 for (tess_chars_it.mark_cycle_pt(); 1393 for (tess_chars_it.mark_cycle_pt();
1388 !tess_chars_it.cycled_list(); 1394 !tess_chars_it.cycled_list();
1389 tess_chars_it.forward(), i++) { 1395 tess_chars_it.forward(), i++) {
1390 TESS_CHAR *tc = (TESS_CHAR *) tess_chars_it.data(); 1396 TESS_CHAR *tc = (TESS_CHAR *) tess_chars_it.data();
1391 string_len += (*lengths)[i] = tc->length; 1397 string_len += (*lengths)[i] = tc->length;
1392 (*costs)[i] = tc->cost; 1398 (*costs)[i] = tc->cost;
1393 (*x0)[i] = tc->box.left(); 1399 (*x0)[i] = tc->box.left();
1394 (*y0)[i] = tc->box.bottom(); 1400 (*y0)[i] = tc->box.bottom();
1395 (*x1)[i] = tc->box.right(); 1401 (*x1)[i] = tc->box.right();
1396 (*y1)[i] = tc->box.top(); 1402 (*y1)[i] = tc->box.top();
1397 } 1403 }
1398 char *p = *string = new char[string_len]; 1404 char *p = *string = new char[string_len];
1399 1405
1400 tess_chars_it.move_to_first(); 1406 tess_chars_it.move_to_first();
1401 for (tess_chars_it.mark_cycle_pt(); 1407 for (tess_chars_it.mark_cycle_pt();
1402 !tess_chars_it.cycled_list(); 1408 !tess_chars_it.cycled_list();
1403 tess_chars_it.forward()) { 1409 tess_chars_it.forward()) {
1404 TESS_CHAR *tc = (TESS_CHAR *) tess_chars_it.data(); 1410 TESS_CHAR *tc = (TESS_CHAR *) tess_chars_it.data();
1405 strncpy(p, tc->unicode_repr, tc->length); 1411 strncpy(p, tc->unicode_repr, tc->length);
1406 p += tc->length; 1412 p += tc->length;
1407 } 1413 }
1408 return n; 1414 return n;
1409 } 1415 }
1410 1416
1411 // Check whether a word is valid according to Tesseract's language model 1417 // Check whether a word is valid according to Tesseract's language model
1412 // returns 0 if the string is invalid, non-zero if valid 1418 // returns 0 if the string is invalid, non-zero if valid
1413 int TessBaseAPI::IsValidWord(const char *string) { 1419 int TessBaseAPI::IsValidWord(const char *string) {
1414 return valid_word(string); 1420 return valid_word(string);
1415 } 1421 }
1416 1422
Hosted by Google Code