| /trunk/ccmain/baseapi.cpp r8 | /trunk/ccmain/baseapi.cpp r9 | ||
| 1 | /********************************************************************** | 1 | /********************************************************************** |
|---|---|---|---|
| 2 | * File: baseapi.cpp | 2 | * File: baseapi.cpp |
| 3 | * Description: Simple API for calling tesseract. | 3 | * Description: Simple API for calling tesseract. |
| 4 | * Author: Ray Smith | 4 | * Author: Ray Smith |
| 5 | * Created: Fri Oct 06 15:35:01 PDT 2006 | 5 | * Created: Fri Oct 06 15:35:01 PDT 2006 |
| 6 | * | 6 | * |
| 7 | * (C) Copyright 2006, Google Inc. | 7 | * (C) Copyright 2006, Google Inc. |
| 8 | ** Licensed under the Apache License, Version 2.0 (the "License"); | 8 | ** Licensed under the Apache License, Version 2.0 (the "License"); |
| 9 | ** you may not use this file except in compliance with the License. | 9 | ** you may not use this file except in compliance with the License. |
| 10 | ** You may obtain a copy of the License at | 10 | ** You may obtain a copy of the License at |
| 11 | ** http://www.apache.org/licenses/LICENSE-2.0 | 11 | ** http://www.apache.org/licenses/LICENSE-2.0 |
| 12 | ** Unless required by applicable law or agreed to in writing, software | 12 | ** Unless required by applicable law or agreed to in writing, software |
| 13 | ** distributed under the License is distributed on an "AS IS" BASIS, | 13 | ** distributed under the License is distributed on an "AS IS" BASIS, |
| 14 | ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 14 | ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 15 | ** See the License for the specific language governing permissions and | 15 | ** See the License for the specific language governing permissions and |
| 16 | ** limitations under the License. | 16 | ** limitations under the License. |
| 17 | * | 17 | * |
| 18 | **********************************************************************/ | 18 | **********************************************************************/ |
| 19 | 19 | ||
| 20 | #include "baseapi.h" | 20 | #include "baseapi.h" |
| 21 | #include <iostream> | 21 | #include <iostream> |
| 22 | #include <math.h> | 22 | #include <math.h> |
| 23 | 23 | ||
| 24 | 24 | ||
| 25 | using namespace std; | 25 | using namespace std; |
| 26 | 26 | ||
| 27 | #define min(a, b) (((a) < (b)) ? (a) : (b)) | 27 | #define min(a, b) (((a) < (b)) ? (a) : (b)) |
| 28 | #define max(a, b) (((a) > (b)) ? (a) : (b)) | 28 | #define max(a, b) (((a) > (b)) ? (a) : (b)) |
| 29 | 29 | ||
| 30 | // Include automatically generated configuration file if running autoconf. | 30 | // Include automatically generated configuration file if running autoconf. |
| 31 | #ifdef HAVE_CONFIG_H | 31 | #ifdef HAVE_CONFIG_H |
| 32 | #include "config_auto.h" | 32 | #include "config_auto.h" |
| 33 | #endif | 33 | #endif |
| 34 | 34 | ||
| 35 | #ifdef HAVE_LIBLEPT | 35 | #ifdef HAVE_LIBLEPT |
| 36 | // Include leptonica library only if autoconf (or makefile etc) tell us to. | 36 | // Include leptonica library only if autoconf (or makefile etc) tell us to. |
| 37 | #include "allheaders.h" | 37 | #include "allheaders.h" |
| 38 | #endif | 38 | #endif |
| 39 | 39 | ||
| 40 | #include "tessedit.h" | 40 | #include "tessedit.h" |
| 41 | #include "ocrclass.h" | 41 | #include "ocrclass.h" |
| 42 | #include "pageres.h" | 42 | #include "pageres.h" |
| 43 | #include "tessvars.h" | 43 | #include "tessvars.h" |
| 44 | #include "control.h" | 44 | #include "control.h" |
| 45 | #include "applybox.h" | 45 | #include "applybox.h" |
| 46 | #include "pgedit.h" | 46 | #include "pgedit.h" |
| 47 | #include "varabled.h" | 47 | #include "varabled.h" |
| 48 | #include "variables.h" | 48 | #include "variables.h" |
| 49 | #include "output.h" | 49 | #include "output.h" |
| 50 | #include "globals.h" | 50 | #include "globals.h" |
| 51 | #include "adaptmatch.h" | 51 | #include "adaptmatch.h" |
| 52 | #include "edgblob.h" | 52 | #include "edgblob.h" |
| 53 | #include "tessbox.h" | 53 | #include "tessbox.h" |
| 54 | #include "tordvars.h" | 54 | #include "tordvars.h" |
| 55 | #include "imgs.h" | 55 | #include "imgs.h" |
| 56 | #include "makerow.h" | 56 | #include "makerow.h" |
| 57 | #include "tstruct.h" | 57 | #include "tstruct.h" |
| 58 | #include "tessout.h" | 58 | #include "tessout.h" |
| 59 | #include "tface.h" | 59 | #include "tface.h" |
| 60 | #include "permute.h" | 60 | #include "permute.h" |
| 61 | 61 | ||
| 62 | BOOL_VAR(tessedit_resegment_from_boxes, FALSE, | 62 | BOOL_VAR(tessedit_resegment_from_boxes, FALSE, |
| 63 | "Take segmentation and labeling from box file"); | 63 | "Take segmentation and labeling from box file"); |
| 64 | BOOL_VAR(tessedit_train_from_boxes, FALSE, | 64 | BOOL_VAR(tessedit_train_from_boxes, FALSE, |
| 65 | "Generate training data from boxed chars"); | 65 | "Generate training data from boxed chars"); |
| 66 | 66 | ||
| 67 | // Minimum sensible image size to be worth running tesseract. | 67 | // Minimum sensible image size to be worth running tesseract. |
| 68 | const int kMinRectSize = 10; | 68 | const int kMinRectSize = 10; |
| 69 | 69 | ||
| 70 | extern bool connected_script; | 70 | extern bool connected_script; |
| 71 | 71 | ||
| 72 | static STRING input_file = "noname.tif"; | 72 | static STRING input_file = "noname.tif"; |
| 73 | 73 | ||
| 74 | // Set the value of an internal "variable" (of either old or new types). | 74 | // Set the value of an internal "variable" (of either old or new types). |
| 75 | // Supply the name of the variable and the value as a string, just as | 75 | // Supply the name of the variable and the value as a string, just as |
| 76 | // you would in a config file. | 76 | // you would in a config file. |
| 77 | // Returns false if the name lookup failed. | 77 | // Returns false if the name lookup failed. |
| 78 | bool TessBaseAPI::SetVariable(const char* variable, const char* value) { | 78 | bool TessBaseAPI::SetVariable(const char* variable, const char* value) { |
| 79 | if (set_new_style_variable(variable, value)) | 79 | if (set_new_style_variable(variable, value)) |
| 80 | return true; | 80 | return true; |
| 81 | return set_old_style_variable(variable, value); | 81 | return set_old_style_variable(variable, value); |
| 82 | } | 82 | } |
| 83 | 83 | ||
| 84 | void TessBaseAPI::SimpleInit(const char* datapath, | 84 | void TessBaseAPI::SimpleInit(const char* datapath, |
| 85 | const char* language, | 85 | const char* language, |
| 86 | bool numeric_mode) { | 86 | bool numeric_mode) { |
| 87 | InitWithLanguage(datapath, NULL, language, NULL, numeric_mode, 0, NULL); | 87 | InitWithLanguage(datapath, NULL, language, NULL, numeric_mode, 0, NULL); |
| 88 | } | 88 | } |
| 89 | 89 | ||
| 90 | // Start tesseract. | 90 | // Start tesseract. |
| 91 | // The datapath must be the name of the data directory or some other file | 91 | // The datapath must be the name of the data directory or some other file |
| 92 | // in which the data directory resides (for instance argv[0].) | 92 | // in which the data directory resides (for instance argv[0].) |
| 93 | // The configfile is the name of a file in the tessconfigs directory | 93 | // The configfile is the name of a file in the tessconfigs directory |
| 94 | // (eg batch) or NULL to run on defaults. | 94 | // (eg batch) or NULL to run on defaults. |
| 95 | // Outputbase may also be NULL, and is the basename of various output files. | 95 | // Outputbase may also be NULL, and is the basename of various output files. |
| 96 | // If the output of any of these files is enabled, then a name nmust be given. | 96 | // If the output of any of these files is enabled, then a name nmust be given. |
| 97 | // If numeric_mode is true, only possible digits and roman numbers are | 97 | // If numeric_mode is true, only possible digits and roman numbers are |
| 98 | // returned. Returns 0 if successful. Crashes if not. | 98 | // returned. Returns 0 if successful. Crashes if not. |
| 99 | // The argc and argv may be 0 and NULL respectively. They are used for | 99 | // The argc and argv may be 0 and NULL respectively. They are used for |
| 100 | // providing config files for debug/display purposes. | 100 | // providing config files for debug/display purposes. |
| 101 | // TODO(rays) get the facts straight. Is it OK to call | 101 | // TODO(rays) get the facts straight. Is it OK to call |
| 102 | // it more than once? Make it properly check for errors and return them. | 102 | // it more than once? Make it properly check for errors and return them. |
| 103 | int TessBaseAPI::Init(const char* datapath, const char* outputbase, | 103 | int TessBaseAPI::Init(const char* datapath, const char* outputbase, |
| 104 | const char* configfile, bool numeric_mode, | 104 | const char* configfile, bool numeric_mode, |
| 105 | int argc, char* argv[]) { | 105 | int argc, char* argv[]) { |
| 106 | return InitWithLanguage(datapath, outputbase, NULL, configfile, | 106 | return InitWithLanguage(datapath, outputbase, NULL, configfile, |
| 107 | numeric_mode, argc, argv); | 107 | numeric_mode, argc, argv); |
| 108 | } | 108 | } |
| 109 | 109 | ||
| 110 | // Start tesseract. | 110 | // Start tesseract. |
| 111 | // Similar to Init() except that it is possible to specify the language. | 111 | // Similar to Init() except that it is possible to specify the language. |
| 112 | // Language is the code of the language for which the data will be loaded. | 112 | // Language is the code of the language for which the data will be loaded. |
| 113 | // (Codes follow ISO 639-3.) If it is NULL, english (eng) will be loaded. | 113 | // (Codes follow ISO 639-3.) If it is NULL, english (eng) will be loaded. |
| 114 | int TessBaseAPI::InitWithLanguage(const char* datapath, const char* outputbase, | 114 | int TessBaseAPI::InitWithLanguage(const char* datapath, const char* outputbase, |
| 115 | const char* language, const char* configfile, | 115 | const char* language, const char* configfile, |
| 116 | bool numeric_mode, int argc, char* argv[]) { | 116 | bool numeric_mode, int argc, char* argv[]) { |
| 117 | int result = init_tesseract(datapath, outputbase, language, | 117 | int result = init_tesseract(datapath, outputbase, language, |
| 118 | configfile, argc, argv); | 118 | configfile, argc, argv); |
| 119 | 119 | ||
| 120 | bln_numericmode.set_value(numeric_mode); | 120 | bln_numericmode.set_value(numeric_mode); |
| 121 | return result; | 121 | return result; |
| 122 | } | 122 | } |
| 123 | 123 | ||
| 124 | // Init the lang model component of Tesseract | 124 | // Init the lang model component of Tesseract |
| 125 | int TessBaseAPI::InitLangMod(const char* datapath, const char* outputbase, | 125 | int TessBaseAPI::InitLangMod(const char* datapath, const char* outputbase, |
| 126 | const char* language, const char* configfile, | 126 | const char* language, const char* configfile, |
| 127 | bool numeric_mode, int argc, char* argv[]) { | 127 | bool numeric_mode, int argc, char* argv[]) { |
| 128 | return init_tesseract_lm(datapath, outputbase, language, | 128 | return init_tesseract_lm(datapath, outputbase, language, |
| 129 | configfile, argc, argv); | 129 | configfile, argc, argv); |
| 130 | } | 130 | } |
| 131 | 131 | ||
| 132 | // Set the name of the input file. Needed only for training and | 132 | // Set the name of the input file. Needed only for training and |
| 133 | // loading a UNLV zone file. | 133 | // loading a UNLV zone file. |
| 134 | void TessBaseAPI::SetInputName(const char* name) { | 134 | void TessBaseAPI::SetInputName(const char* name) { |
| 135 | input_file = name; | 135 | input_file = name; |
| 136 | } | 136 | } |
| 137 | 137 | ||
| 138 | // Recognize a rectangle from an image and return the result as a string. | 138 | // Recognize a rectangle from an image and return the result as a string. |
| 139 | // May be called many times for a single Init. | 139 | // May be called many times for a single Init. |
| 140 | // Currently has no error checking. | 140 | // Currently has no error checking. |
| 141 | // Greyscale of 8 and color of 24 or 32 bits per pixel may be given. | 141 | // Greyscale of 8 and color of 24 or 32 bits per pixel may be given. |
| 142 | // Palette color images will not work properly and must be converted to | 142 | // Palette color images will not work properly and must be converted to |
| 143 | // 24 bit. | 143 | // 24 bit. |
| 144 | // Binary images of 1 bit per pixel may also be given but they must be | 144 | // Binary images of 1 bit per pixel may also be given but they must be |
| 145 | // byte packed with the MSB of the first byte being the first pixel, and a | 145 | // byte packed with the MSB of the first byte being the first pixel, and a |
| 146 | // one pixel is WHITE. For binary images set bytes_per_pixel=0. | 146 | // one pixel is WHITE. For binary images set bytes_per_pixel=0. |
| 147 | // The recognized text is returned as a char* which (in future will be coded | 147 | // The recognized text is returned as a char* which (in future will be coded |
| 148 | // as UTF8 and) must be freed with the delete [] operator. | 148 | // as UTF8 and) must be freed with the delete [] operator. |
| 149 | char* TessBaseAPI::TesseractRect(const unsigned char* imagedata, | 149 | char* TessBaseAPI::TesseractRect(const unsigned char* imagedata, |
| 150 | int bytes_per_pixel, | 150 | int bytes_per_pixel, |
| 151 | int bytes_per_line, | 151 | int bytes_per_line, |
| 152 | int left, int top, | 152 | int left, int top, |
| 153 | int width, int height) { | 153 | int width, int height) { |
| 154 | if (width < kMinRectSize || height < kMinRectSize) | 154 | if (width < kMinRectSize || height < kMinRectSize) |
| 155 | return NULL; // Nothing worth doing. | 155 | return NULL; // Nothing worth doing. |
| 156 | 156 | ||
| 157 | // Copy/Threshold the image to the tesseract global page_image. | 157 | // Copy/Threshold the image to the tesseract global page_image. |
| 158 | CopyImageToTesseract(imagedata, bytes_per_pixel, bytes_per_line, | 158 | CopyImageToTesseract(imagedata, bytes_per_pixel, bytes_per_line, |
| 159 | left, top, width, height); | 159 | left, top, width, height); |
| 160 | 160 | ||
| 161 | return RecognizeToString(); | 161 | return RecognizeToString(); |
| 162 | } | 162 | } |
| 163 | 163 | ||
| 164 | // As TesseractRect but produces a box file as output. | 164 | // As TesseractRect but produces a box file as output. |
| 165 | char* TessBaseAPI::TesseractRectBoxes(const unsigned char* imagedata, | 165 | char* TessBaseAPI::TesseractRectBoxes(const unsigned char* imagedata, |
| 166 | int bytes_per_pixel, | 166 | int bytes_per_pixel, |
| 167 | int bytes_per_line, | 167 | int bytes_per_line, |
| 168 | int left, int top, | 168 | int left, int top, |
| 169 | int width, int height, | 169 | int width, int height, |
| 170 | int imageheight) { | 170 | int imageheight) { |
| 171 | if (width < kMinRectSize || height < kMinRectSize) | 171 | if (width < kMinRectSize || height < kMinRectSize) |
| 172 | return NULL; // Nothing worth doing. | 172 | return NULL; // Nothing worth doing. |
| 173 | 173 | ||
| 174 | // Copy/Threshold the image to the tesseract global page_image. | 174 | // Copy/Threshold the image to the tesseract global page_image. |
| 175 | CopyImageToTesseract(imagedata, bytes_per_pixel, bytes_per_line, | 175 | CopyImageToTesseract(imagedata, bytes_per_pixel, bytes_per_line, |
| 176 | left, top, width, height); | 176 | left, top, width, height); |
| 177 | 177 | ||
| 178 | BLOCK_LIST block_list; | 178 | BLOCK_LIST block_list; |
| 179 | 179 | ||
| 180 | FindLines(&block_list); | 180 | FindLines(&block_list); |
| 181 | 181 | ||
| 182 | // Now run the main recognition. | 182 | // Now run the main recognition. |
| 183 | PAGE_RES* page_res = Recognize(&block_list, NULL); | 183 | PAGE_RES* page_res = Recognize(&block_list, NULL); |
| 184 | 184 | ||
| 185 | return TesseractToBoxText(page_res, left, imageheight - (top + height)); | 185 | return TesseractToBoxText(page_res, left, imageheight - (top + height)); |
| 186 | } | 186 | } |
| 187 | 187 | ||
| 188 | char* TessBaseAPI::TesseractRectUNLV(const unsigned char* imagedata, | 188 | char* TessBaseAPI::TesseractRectUNLV(const unsigned char* imagedata, |
| 189 | int bytes_per_pixel, | 189 | int bytes_per_pixel, |
| 190 | int bytes_per_line, | 190 | int bytes_per_line, |
| 191 | int left, int top, | 191 | int left, int top, |
| 192 | int width, int height) { | 192 | int width, int height) { |
| 193 | if (width < kMinRectSize || height < kMinRectSize) | 193 | if (width < kMinRectSize || height < kMinRectSize) |
| 194 | return NULL; // Nothing worth doing. | 194 | return NULL; // Nothing worth doing. |
| 195 | 195 | ||
| 196 | // Copy/Threshold the image to the tesseract global page_image. | 196 | // Copy/Threshold the image to the tesseract global page_image. |
| 197 | CopyImageToTesseract(imagedata, bytes_per_pixel, bytes_per_line, | 197 | CopyImageToTesseract(imagedata, bytes_per_pixel, bytes_per_line, |
| 198 | left, top, width, height); | 198 | left, top, width, height); |
| 199 | 199 | ||
| 200 | BLOCK_LIST block_list; | 200 | BLOCK_LIST block_list; |
| 201 | 201 | ||
| 202 | FindLines(&block_list); | 202 | FindLines(&block_list); |
| 203 | 203 | ||
| 204 | // Now run the main recognition. | 204 | // Now run the main recognition. |
| 205 | PAGE_RES* page_res = Recognize(&block_list, NULL); | 205 | PAGE_RES* page_res = Recognize(&block_list, NULL); |
| 206 | 206 | ||
| 207 | return TesseractToUNLV(page_res); | 207 | return TesseractToUNLV(page_res); |
| 208 | } | 208 | } |
| 209 | 209 | ||
| 210 | // Call between pages or documents etc to free up memory and forget | 210 | // Call between pages or documents etc to free up memory and forget |
| 211 | // adaptive data. | 211 | // adaptive data. |
| 212 | void TessBaseAPI::ClearAdaptiveClassifier() { | 212 | void TessBaseAPI::ClearAdaptiveClassifier() { |
| 213 | ResetAdaptiveClassifier(); | 213 | ResetAdaptiveClassifier(); |
| 214 | } | 214 | } |
| 215 | 215 | ||
| 216 | // Close down tesseract and free up memory. | 216 | // Close down tesseract and free up memory. |
| 217 | void TessBaseAPI::End() { | 217 | void TessBaseAPI::End() { |
| 218 | ResetAdaptiveClassifier(); | 218 | ResetAdaptiveClassifier(); |
| 219 | end_tesseract(); | 219 | end_tesseract(); |
| 220 | } | 220 | } |
| 221 | 221 | ||
| 222 | // Dump the internal binary image to a PGM file. | 222 | // Dump the internal binary image to a PGM file. |
| 223 | void TessBaseAPI::DumpPGM(const char* filename) { | 223 | void TessBaseAPI::DumpPGM(const char* filename) { |
| 224 | IMAGELINE line; | 224 | IMAGELINE line; |
| 225 | line.init(page_image.get_xsize()); | 225 | line.init(page_image.get_xsize()); |
| 226 | FILE *fp = fopen(filename, "w"); | 226 | FILE *fp = fopen(filename, "w"); |
| 227 | fprintf(fp, "P5 " INT32FORMAT " " INT32FORMAT " 255\n", page_image.get_xsize(), | 227 | fprintf(fp, "P5 " INT32FORMAT " " INT32FORMAT " 255\n", page_image.get_xsize(), |
| 228 | page_image.get_ysize()); | 228 | page_image.get_ysize()); |
| 229 | for (int j = page_image.get_ysize()-1; j >= 0 ; --j) { | 229 | for (int j = page_image.get_ysize()-1; j >= 0 ; --j) { |
| 230 | page_image.get_line(0, j, page_image.get_xsize(), &line, 0); | 230 | page_image.get_line(0, j, page_image.get_xsize(), &line, 0); |
| 231 | for (int i = 0; i < page_image.get_xsize(); ++i) { | 231 | for (int i = 0; i < page_image.get_xsize(); ++i) { |
| 232 | uinT8 b = line.pixels[i] ? 255 : 0; | 232 | uinT8 b = line.pixels[i] ? 255 : 0; |
| 233 | fwrite(&b, 1, 1, fp); | 233 | fwrite(&b, 1, 1, fp); |
| 234 | } | 234 | } |
| 235 | } | 235 | } |
| 236 | fclose(fp); | 236 | fclose(fp); |
| 237 | } | 237 | } |
| 238 | 238 | ||
| 239 | #ifdef HAVE_LIBLEPT | 239 | #ifdef HAVE_LIBLEPT |
| 240 | // ONLY available if you have Leptonica installed. | 240 | // ONLY available if you have Leptonica installed. |
| 241 | // Get a copy of the thresholded global image from Tesseract. | 241 | // Get a copy of the thresholded global image from Tesseract. |
| 242 | Pix* TessBaseAPI::GetTesseractImage() { | 242 | Pix* TessBaseAPI::GetTesseractImage() { |
| 243 | return page_image.ToPix(); | 243 | return page_image.ToPix(); |
| 244 | } | 244 | } |
| 245 | #endif // HAVE_LIBLEPT | 245 | #endif // HAVE_LIBLEPT |
| 246 | 246 | ||
| 247 | // Copy the given image rectangle to Tesseract, with adaptive thresholding | 247 | // Copy the given image rectangle to Tesseract, with adaptive thresholding |
| 248 | // if the image is not already binary. | 248 | // if the image is not already binary. |
| 249 | void TessBaseAPI::CopyImageToTesseract(const unsigned char* imagedata, | 249 | void TessBaseAPI::CopyImageToTesseract(const unsigned char* imagedata, |
| 250 | int bytes_per_pixel, | 250 | int bytes_per_pixel, |
| 251 | int bytes_per_line, | 251 | int bytes_per_line, |
| 252 | int left, int top, | 252 | int left, int top, |
| 253 | int width, int height) { | 253 | int width, int height) { |
| 254 | if (bytes_per_pixel > 0) { | 254 | if (bytes_per_pixel > 0) { |
| 255 | // Threshold grey or color. | 255 | // Threshold grey or color. |
| 256 | int* thresholds = new int[bytes_per_pixel]; | 256 | int* thresholds = new int[bytes_per_pixel]; |
| 257 | int* hi_values = new int[bytes_per_pixel]; | 257 | int* hi_values = new int[bytes_per_pixel]; |
| 258 | 258 | ||
| 259 | // Compute the thresholds. | 259 | // Compute the thresholds. |
| 260 | OtsuThreshold(imagedata, bytes_per_pixel, bytes_per_line, | 260 | OtsuThreshold(imagedata, bytes_per_pixel, bytes_per_line, |
| 261 | left, top, left + width, top + height, | 261 | left, top, left + width, top + height, |
| 262 | thresholds, hi_values); | 262 | thresholds, hi_values); |
| 263 | 263 | ||
| 264 | // Threshold the image to the tesseract global page_image. | 264 | // Threshold the image to the tesseract global page_image. |
| 265 | ThresholdRect(imagedata, bytes_per_pixel, bytes_per_line, | 265 | ThresholdRect(imagedata, bytes_per_pixel, bytes_per_line, |
| 266 | left, top, width, height, | 266 | left, top, width, height, |
| 267 | thresholds, hi_values); | 267 | thresholds, hi_values); |
| 268 | 268 | ||
| 269 | delete [] thresholds; | 269 | delete [] thresholds; |
| 270 | delete [] hi_values; | 270 | delete [] hi_values; |
| 271 | } else { | 271 | } else { |
| 272 | CopyBinaryRect(imagedata, bytes_per_line, left, top, width, height); | 272 | CopyBinaryRect(imagedata, bytes_per_line, left, top, width, height); |
| 273 | } | 273 | } |
| 274 | } | 274 | } |
| 275 | 275 | ||
| 276 | // Compute the Otsu threshold(s) for the given image rectangle, making one | 276 | // Compute the Otsu threshold(s) for the given image rectangle, making one |
| 277 | // for each channel. Each channel is always one byte per pixel. | 277 | // for each channel. Each channel is always one byte per pixel. |
| 278 | // Returns an array of threshold values and an array of hi_values, such | 278 | // Returns an array of threshold values and an array of hi_values, such |
| 279 | // that a pixel value >threshold[channel] is considered foreground if | 279 | // that a pixel value >threshold[channel] is considered foreground if |
| 280 | // hi_values[channel] is 0 or background if 1. A hi_value of -1 indicates | 280 | // hi_values[channel] is 0 or background if 1. A hi_value of -1 indicates |
| 281 | // that there is no apparent foreground. At least one hi_value will not be -1. | 281 | // that there is no apparent foreground. At least one hi_value will not be -1. |
| 282 | // thresholds and hi_values are assumed to be of bytes_per_pixel size. | 282 | // thresholds and hi_values are assumed to be of bytes_per_pixel size. |
| 283 | void TessBaseAPI::OtsuThreshold(const unsigned char* imagedata, | 283 | void TessBaseAPI::OtsuThreshold(const unsigned char* imagedata, |
| 284 | int bytes_per_pixel, | 284 | int bytes_per_pixel, |
| 285 | int bytes_per_line, | 285 | int bytes_per_line, |
| 286 | int left, int top, int right, int bottom, | 286 | int left, int top, int right, int bottom, |
| 287 | int* thresholds, | 287 | int* thresholds, |
| 288 | int* hi_values) { | 288 | int* hi_values) { |
| 289 | // Of all channels with no good hi_value, keep the best so we can always | 289 | // Of all channels with no good hi_value, keep the best so we can always |
| 290 | // produce at least one answer. | 290 | // produce at least one answer. |
| 291 | int best_hi_value = 0; | 291 | int best_hi_value = 0; |
| 292 | int best_hi_index = 0; | 292 | int best_hi_index = 0; |
| 293 | bool any_good_hivalue = false; | 293 | bool any_good_hivalue = false; |
| 294 | double best_hi_dist = 0.0; | 294 | double best_hi_dist = 0.0; |
| 295 | 295 | ||
| 296 | for (int ch = 0; ch < bytes_per_pixel; ++ch) { | 296 | for (int ch = 0; ch < bytes_per_pixel; ++ch) { |
| 297 | thresholds[ch] = 0; | 297 | thresholds[ch] = 0; |
| 298 | hi_values[ch] = -1; | 298 | hi_values[ch] = -1; |
| 299 | // Compute the histogram of the image rectangle. | 299 | // Compute the histogram of the image rectangle. |
| 300 | int histogram[256]; | 300 | int histogram[256]; |
| 301 | HistogramRect(imagedata + ch, bytes_per_pixel, bytes_per_line, | 301 | HistogramRect(imagedata + ch, bytes_per_pixel, bytes_per_line, |
| 302 | left, top, right, bottom, histogram); | 302 | left, top, right, bottom, histogram); |
| 303 | int H; | 303 | int H; |
| 304 | int best_omega_0; | 304 | int best_omega_0; |
| 305 | int best_t = OtsuStats(histogram, &H, &best_omega_0); | 305 | int best_t = OtsuStats(histogram, &H, &best_omega_0); |
| 306 | if (best_omega_0 == 0 || best_omega_0 == H) { | 306 | if (best_omega_0 == 0 || best_omega_0 == H) { |
| 307 | // This channel is empty. | 307 | // This channel is empty. |
| 308 | continue; | 308 | continue; |
| 309 | } | 309 | } |
| 310 | // To be a convincing foreground we must have a small fraction of H | 310 | // To be a convincing foreground we must have a small fraction of H |
| 311 | // or to be a convincing background we must have a large fraction of H. | 311 | // or to be a convincing background we must have a large fraction of H. |
| 312 | // In between we assume this channel contains no thresholding information. | 312 | // In between we assume this channel contains no thresholding information. |
| 313 | int hi_value = best_omega_0 < H * 0.5; | 313 | int hi_value = best_omega_0 < H * 0.5; |
| 314 | thresholds[ch] = best_t; | 314 | thresholds[ch] = best_t; |
| 315 | if (best_omega_0 > H * 0.75) { | 315 | if (best_omega_0 > H * 0.75) { |
| 316 | any_good_hivalue = true; | 316 | any_good_hivalue = true; |
| 317 | hi_values[ch] = 0; | 317 | hi_values[ch] = 0; |
| 318 | } | 318 | } |
| 319 | else if (best_omega_0 < H * 0.25) { | 319 | else if (best_omega_0 < H * 0.25) { |
| 320 | any_good_hivalue = true; | 320 | any_good_hivalue = true; |
| 321 | hi_values[ch] = 1; | 321 | hi_values[ch] = 1; |
| 322 | } | 322 | } |
| 323 | else { | 323 | else { |
| 324 | // In case all channels are like this, keep the best of the bad lot. | 324 | // In case all channels are like this, keep the best of the bad lot. |
| 325 | double hi_dist = hi_value ? (H - best_omega_0) : best_omega_0; | 325 | double hi_dist = hi_value ? (H - best_omega_0) : best_omega_0; |
| 326 | if (hi_dist > best_hi_dist) { | 326 | if (hi_dist > best_hi_dist) { |
| 327 | best_hi_dist = hi_dist; | 327 | best_hi_dist = hi_dist; |
| 328 | best_hi_value = hi_value; | 328 | best_hi_value = hi_value; |
| 329 | best_hi_index = ch; | 329 | best_hi_index = ch; |
| 330 | } | 330 | } |
| 331 | } | 331 | } |
| 332 | } | 332 | } |
| 333 | if (!any_good_hivalue) { | 333 | if (!any_good_hivalue) { |
| 334 | // Use the best of the ones that were not good enough. | 334 | // Use the best of the ones that were not good enough. |
| 335 | hi_values[best_hi_index] = best_hi_value; | 335 | hi_values[best_hi_index] = best_hi_value; |
| 336 | } | 336 | } |
| 337 | } | 337 | } |
| 338 | 338 | ||
| 339 | // Compute the histogram for the given image rectangle, and the given | 339 | // Compute the histogram for the given image rectangle, and the given |
| 340 | // channel. (Channel pointed to by imagedata.) Each channel is always | 340 | // channel. (Channel pointed to by imagedata.) Each channel is always |
| 341 | // one byte per pixel. | 341 | // one byte per pixel. |
| 342 | // Bytes per pixel is used to skip channels not being | 342 | // Bytes per pixel is used to skip channels not being |
| 343 | // counted with this call in a multi-channel (pixel-major) image. | 343 | // counted with this call in a multi-channel (pixel-major) image. |
| 344 | // Histogram is always a 256 element array to count occurrences of | 344 | // Histogram is always a 256 element array to count occurrences of |
| 345 | // each pixel value. | 345 | // each pixel value. |
| 346 | void TessBaseAPI::HistogramRect(const unsigned char* imagedata, | 346 | void TessBaseAPI::HistogramRect(const unsigned char* imagedata, |
| 347 | int bytes_per_pixel, | 347 | int bytes_per_pixel, |
| 348 | int bytes_per_line, | 348 | int bytes_per_line, |
| 349 | int left, int top, int right, int bottom, | 349 | int left, int top, int right, int bottom, |
| 350 | int* histogram) { | 350 | int* histogram) { |
| 351 | int width = right - left; | 351 | int width = right - left; |
| 352 | memset(histogram, 0, sizeof(*histogram) * 256); | 352 | memset(histogram, 0, sizeof(*histogram) * 256); |
| 353 | const unsigned char* pixels = imagedata + | 353 | const unsigned char* pixels = imagedata + |
| 354 | top*bytes_per_line + | 354 | top*bytes_per_line + |
| 355 | left*bytes_per_pixel; | 355 | left*bytes_per_pixel; |
| 356 | for (int y = top; y < bottom; ++y) { | 356 | for (int y = top; y < bottom; ++y) { |
| 357 | for (int x = 0; x < width; ++x) { | 357 | for (int x = 0; x < width; ++x) { |
| 358 | ++histogram[pixels[x * bytes_per_pixel]]; | 358 | ++histogram[pixels[x * bytes_per_pixel]]; |
| 359 | } | 359 | } |
| 360 | pixels += bytes_per_line; | 360 | pixels += bytes_per_line; |
| 361 | } | 361 | } |
| 362 | 362 | ||
| 363 | } | 363 | } |
| 364 | 364 | ||
| 365 | // Compute the Otsu threshold(s) for the given histogram. | 365 | // Compute the Otsu threshold(s) for the given histogram. |
| 366 | // Also returns H = total count in histogram, and | 366 | // Also returns H = total count in histogram, and |
| 367 | // omega0 = count of histogram below threshold. | 367 | // omega0 = count of histogram below threshold. |
| 368 | int TessBaseAPI::OtsuStats(const int* histogram, | 368 | int TessBaseAPI::OtsuStats(const int* histogram, |
| 369 | int* H_out, | 369 | int* H_out, |
| 370 | int* omega0_out) { | 370 | int* omega0_out) { |
| 371 | int H = 0; | 371 | int H = 0; |
| 372 | double mu_T = 0.0; | 372 | double mu_T = 0.0; |
| 373 | for (int i = 0; i < 256; ++i) { | 373 | for (int i = 0; i < 256; ++i) { |
| 374 | H += histogram[i]; | 374 | H += histogram[i]; |
| 375 | mu_T += i * histogram[i]; | 375 | mu_T += i * histogram[i]; |
| 376 | } | 376 | } |
| 377 | 377 | ||
| 378 | // Now maximize sig_sq_B over t. | 378 | // Now maximize sig_sq_B over t. |
| 379 | // http://www.ctie.monash.edu.au/hargreave/Cornall_Terry_328.pdf | 379 | // http://www.ctie.monash.edu.au/hargreave/Cornall_Terry_328.pdf |
| 380 | int best_t = -1; | 380 | int best_t = -1; |
| 381 | int omega_0, omega_1; | 381 | int omega_0, omega_1; |
| 382 | int best_omega_0 = 0; | 382 | int best_omega_0 = 0; |
| 383 | double best_sig_sq_B = 0.0; | 383 | double best_sig_sq_B = 0.0; |
| 384 | double mu_0, mu_1, mu_t; | 384 | double mu_0, mu_1, mu_t; |
| 385 | omega_0 = 0; | 385 | omega_0 = 0; |
| 386 | mu_t = 0.0; | 386 | mu_t = 0.0; |
| 387 | for (int t = 0; t < 255; ++t) { | 387 | for (int t = 0; t < 255; ++t) { |
| 388 | omega_0 += histogram[t]; | 388 | omega_0 += histogram[t]; |
| 389 | mu_t += t * static_cast<double>(histogram[t]); | 389 | mu_t += t * static_cast<double>(histogram[t]); |
| 390 | if (omega_0 == 0) | 390 | if (omega_0 == 0) |
| 391 | continue; | 391 | continue; |
| 392 | omega_1 = H - omega_0; | 392 | omega_1 = H - omega_0; |
| 393 | mu_0 = mu_t / omega_0; | 393 | mu_0 = mu_t / omega_0; |
| 394 | mu_1 = (mu_T - mu_t) / omega_1; | 394 | mu_1 = (mu_T - mu_t) / omega_1; |
| 395 | double sig_sq_B = mu_1 - mu_0; | 395 | double sig_sq_B = mu_1 - mu_0; |
| 396 | sig_sq_B *= sig_sq_B * omega_0 * omega_1; | 396 | sig_sq_B *= sig_sq_B * omega_0 * omega_1; |
| 397 | if (best_t < 0 || sig_sq_B > best_sig_sq_B) { | 397 | if (best_t < 0 || sig_sq_B > best_sig_sq_B) { |
| 398 | best_sig_sq_B = sig_sq_B; | 398 | best_sig_sq_B = sig_sq_B; |
| 399 | best_t = t; | 399 | best_t = t; |
| 400 | best_omega_0 = omega_0; | 400 | best_omega_0 = omega_0; |
| 401 | } | 401 | } |
| 402 | } | 402 | } |
| 403 | if (H_out != NULL) *H_out = H; | 403 | if (H_out != NULL) *H_out = H; |
| 404 | if (omega0_out != NULL) *omega0_out = best_omega_0; | 404 | if (omega0_out != NULL) *omega0_out = best_omega_0; |
| 405 | return best_t; | 405 | return best_t; |
| 406 | } | 406 | } |
| 407 | 407 | ||
| 408 | 408 | ||
| 409 | ////////////DEBAYAN//Deskew begins////////////////////// | 409 | ////////////DEBAYAN//Deskew begins////////////////////// |
| 410 | void deskew(float angle,int srcheight, int srcwidth) | 410 | void deskew(float angle,int srcheight, int srcwidth) |
| 411 | { | 411 | { |
| 412 | //angle=4; //45° for example | 412 | //angle=4; //45° for example |
| 413 | IMAGE tempimage; | 413 | IMAGE tempimage; |
| 414 | 414 | ||
| 415 | 415 | ||
| 416 | IMAGELINE line; | 416 | IMAGELINE line; |
| 417 | //Convert degrees to radians | 417 | //Convert degrees to radians |
| 418 | float radians=(2*3.1416*angle)/360; | 418 | float radians=(2*3.1416*angle)/360; |
| 419 | 419 | ||
| 420 | float cosine=(float)cos(radians); | 420 | float cosine=(float)cos(radians); |
| 421 | float sine=(float)sin(radians); | 421 | float sine=(float)sin(radians); |
| 422 | 422 | ||
| 423 | float Point1x=(srcheight*sine); | 423 | float Point1x=(srcheight*sine); |
| 424 | float Point1y=(srcheight*cosine); | 424 | float Point1y=(srcheight*cosine); |
| 425 | float Point2x=(srcwidth*cosine-srcheight*sine); | 425 | float Point2x=(srcwidth*cosine-srcheight*sine); |
| 426 | float Point2y=(srcheight*cosine+srcwidth*sine); | 426 | float Point2y=(srcheight*cosine+srcwidth*sine); |
| 427 | float Point3x=(srcwidth*cosine); | 427 | float Point3x=(srcwidth*cosine); |
| 428 | float Point3y=(srcwidth*sine); | 428 | float Point3y=(srcwidth*sine); |
| 429 | 429 | ||
| 430 | float minx=min(0,min(Point1x,min(Point2x,Point3x))); | 430 | float minx=min(0,min(Point1x,min(Point2x,Point3x))); |
| 431 | float miny=min(0,min(Point1y,min(Point2y,Point3y))); | 431 | float miny=min(0,min(Point1y,min(Point2y,Point3y))); |
| 432 | float maxx=max(Point1x,max(Point2x,Point3x)); | 432 | float maxx=max(Point1x,max(Point2x,Point3x)); |
| 433 | float maxy=max(Point1y,max(Point2y,Point3y)); | 433 | float maxy=max(Point1y,max(Point2y,Point3y)); |
| 434 | 434 | ||
| 435 | int DestWidth=(int)ceil(fabs(maxx)-minx); | 435 | int DestWidth=(int)ceil(fabs(maxx)-minx); |
| 436 | int DestHeight=(int)ceil(fabs(maxy)-miny); | 436 | int DestHeight=(int)ceil(fabs(maxy)-miny); |
| 437 | 437 | ||
| 438 | tempimage.create(DestWidth,DestHeight,1); | 438 | tempimage.create(DestWidth,DestHeight,1); |
| 439 | line.init(DestWidth); | 439 | line.init(DestWidth); |
| 440 | 440 | ||
| 441 | for(int i=0;i<DestWidth;i++){ //A white line of length=DestWidth | 441 | for(int i=0;i<DestWidth;i++){ //A white line of length=DestWidth |
| 442 | line.pixels[i]=1; | 442 | line.pixels[i]=1; |
| 443 | } | 443 | } |
| 444 | 444 | ||
| 445 | for(int y=0;y<DestHeight;y++){ //Fill the Destination image with white, else clipmatra wont work | 445 | for(int y=0;y<DestHeight;y++){ //Fill the Destination image with white, else clipmatra wont work |
| 446 | tempimage.put_line(0,y,DestWidth,&line,0); | 446 | tempimage.put_line(0,y,DestWidth,&line,0); |
| 447 | } | 447 | } |
| 448 | line.init(DestWidth); | 448 | line.init(DestWidth); |
| 449 | 449 | ||
| 450 | 450 | ||
| 451 | 451 | ||
| 452 | for(int y=0;y<DestHeight;y++) //Start filling the destination image pixels with corresponding source image pixels | 452 | for(int y=0;y<DestHeight;y++) //Start filling the destination image pixels with corresponding source image pixels |
| 453 | { | 453 | { |
| 454 | for(int x=0;x<DestWidth;x++) | 454 | for(int x=0;x<DestWidth;x++) |
| 455 | { | 455 | { |
| 456 | int Srcx=(int)((x+minx)*cosine+(y+miny)*sine); | 456 | int Srcx=(int)((x+minx)*cosine+(y+miny)*sine); |
| 457 | int Srcy=(int)((y+miny)*cosine-(x+minx)*sine); | 457 | int Srcy=(int)((y+miny)*cosine-(x+minx)*sine); |
| 458 | if(Srcx>=0&&Srcx<srcwidth&&Srcy>=0&& | 458 | if(Srcx>=0&&Srcx<srcwidth&&Srcy>=0&& |
| 459 | Srcy<srcheight) | 459 | Srcy<srcheight) |
| 460 | { | 460 | { |
| 461 | line.pixels[x]= | 461 | line.pixels[x]= |
| 462 | page_image.pixel(Srcx,Srcy); | 462 | page_image.pixel(Srcx,Srcy); |
| 463 | } | 463 | } |
| 464 | } | 464 | } |
| 465 | tempimage.put_line(0,y,DestWidth,&line,0); | 465 | tempimage.put_line(0,y,DestWidth,&line,0); |
| 466 | } | 466 | } |
| 467 | 467 | ||
| 468 | //tempimage.write("tempimage.tif"); | 468 | //tempimage.write("tempimage.tif"); |
| 469 | page_image=tempimage;//Copy deskewed image to global page image, so it can be worked on further | 469 | page_image=tempimage;//Copy deskewed image to global page image, so it can be worked on further |
| 470 | tempimage.destroy(); | 470 | tempimage.destroy(); |
| 471 | //page_image.write("page_image.tif"); | 471 | //page_image.write("page_image.tif"); |
| 472 | 472 | ||
| 473 | } | 473 | } |
| 474 | /////////////DEBAYAN//Deskew ends///////////////////// | 474 | /////////////DEBAYAN//Deskew ends///////////////////// |
| 475 | 475 | ||
| 476 | ////////////DEBAYAN//Find skew begins///////////////// | 476 | ////////////DEBAYAN//Find skew begins///////////////// |
| 477 | float findskew(int height, int width) | 477 | float findskew(int height, int width) |
| 478 | { | 478 | { |
| 479 | int topx=0,topy=0,sign,count=0,offset=1,ifcounter=0; | 479 | int topx=0,topy=0,sign,count=0,offset=1,ifcounter=0; |
| 480 | float slope=-999,avg=0; | 480 | float slope=-999,avg=0; |
| 481 | IMAGELINE line; | 481 | IMAGELINE line; |
| 482 | line.init(1); | 482 | line.init(1); |
| 483 | line.pixels[0]=0; | 483 | line.pixels[0]=0; |
| 484 | ///////Find the top most point of the page: begins/////////// | 484 | ///////Find the top most point of the page: begins/////////// |
| 485 | for(int y=height-1;y>0;y--){ | 485 | for(int y=height-1;y>0;y--){ |
| 486 | for(int x=width-1;x>0;x--){ | 486 | for(int x=width-1;x>0;x--){ |
| 487 | if(page_image.pixel(x,y)==0){ | 487 | if(page_image.pixel(x,y)==0){ |
| 488 | topx=x;topy=y; | 488 | topx=x;topy=y; |
| 489 | break; | 489 | break; |
| 490 | } | 490 | } |
| 491 | 491 | ||
| 492 | } | 492 | } |
| 493 | 493 | ||
| 494 | if(topx>0){break;}; | 494 | if(topx>0){break;}; |
| 495 | } | 495 | } |
| 496 | ///////Find the top most point of the page: ends/////////// | 496 | ///////Find the top most point of the page: ends/////////// |
| 497 | 497 | ||
| 498 | 498 | ||
| 499 | ///////To find pages with no skew: begins////////////// | 499 | ///////To find pages with no skew: begins////////////// |
| 500 | int c1,c2=0; | 500 | int c1,c2=0; |
| 501 | for(int x=1;x<.25*width;x++){ | 501 | for(int x=1;x<.25*width;x++){ |
| 502 | while(page_image.pixel((width/2)+x,c1++)==1){ } | 502 | while(page_image.pixel((width/2)+x,c1++)==1){ } |
| 503 | while(page_image.pixel((width/2)-x,c2++)==1){ } | 503 | while(page_image.pixel((width/2)-x,c2++)==1){ } |
| 504 | if(c1==c2){cout<<"0 ANGLE\n";return (0);} | 504 | if(c1==c2){cout<<"0 ANGLE\n";return (0);} |
| 505 | c1=c2=0; | 505 | c1=c2=0; |
| 506 | } | 506 | } |
| 507 | ///////To find pages with no skew: ends////////////// | 507 | ///////To find pages with no skew: ends////////////// |
| 508 | 508 | ||
| 509 | cout<<"width="<<width; | 509 | cout<<"width="<<width; |
| 510 | if(topx>0 && topx<.5*width){ | 510 | if(topx>0 && topx<.5*width){ |
| 511 | sign=1; | 511 | sign=1; |
| 512 | } | 512 | } |
| 513 | if(topx>0 && topx>.5*width){ | 513 | if(topx>0 && topx>.5*width){ |
| 514 | sign=-1; | 514 | sign=-1; |
| 515 | } | 515 | } |
| 516 | 516 | ||
| 517 | 517 | ||
| 518 | if(sign==-1){ | 518 | if(sign==-1){ |
| 519 | while((topx-offset)>width/2){ | 519 | while((topx-offset)>width/2){ |
| 520 | while(page_image.pixel(topx-offset,topy-count)==1){ | 520 | while(page_image.pixel(topx-offset,topy-count)==1){ |
| 521 | //page_image.put_line(topx-offset,topy-count,1,&line,0); | 521 | //page_image.put_line(topx-offset,topy-count,1,&line,0); |
| 522 | count++; | 522 | count++; |
| 523 | } | 523 | } |
| 524 | 524 | ||
| 525 | if((180/3.142)*atan((float)count/offset)<10){ | 525 | if((180/3.142)*atan((float)count/offset)<10){ |
| 526 | slope=(float)count/offset; | 526 | slope=(float)count/offset; |
| 527 | ifcounter++; | 527 | ifcounter++; |
| 528 | avg=(avg+slope); | 528 | avg=(avg+slope); |
| 529 | } | 529 | } |
| 530 | count=0; | 530 | count=0; |
| 531 | offset++; | 531 | offset++; |
| 532 | } | 532 | } |
| 533 | avg=(float)avg/ifcounter; | 533 | avg=(float)avg/ifcounter; |
| 534 | //cout<<"avg="<<avg<<"\n"; | 534 | //cout<<"avg="<<avg<<"\n"; |
| 535 | page_image.write("findskew.tif"); | 535 | page_image.write("findskew.tif"); |
| 536 | //cout<<"(180/3.142)*atan((float)(count/offset)="<<(180/3.142)*atan(avg)<<"\n"; | 536 | //cout<<"(180/3.142)*atan((float)(count/offset)="<<(180/3.142)*atan(avg)<<"\n"; |
| 537 | return (sign*(180/3.142)*atan(avg)); | 537 | return (sign*(180/3.142)*atan(avg)); |
| 538 | 538 | ||
| 539 | } | 539 | } |
| 540 | if(sign==1){ | 540 | if(sign==1){ |
| 541 | while((topx+offset)<width/2){ | 541 | while((topx+offset)<width/2){ |
| 542 | while(page_image.pixel(topx+offset,topy-count)==1){ | 542 | while(page_image.pixel(topx+offset,topy-count)==1){ |
| 543 | //page_image.put_line(topx+offset,topy-count,1,&line,0); | 543 | //page_image.put_line(topx+offset,topy-count,1,&line,0); |
| 544 | count++; | 544 | count++; |
| 545 | } | 545 | } |
| 546 | 546 | ||
| 547 | if((180/3.142)*atan((float)count/offset)<10){ | 547 | if((180/3.142)*atan((float)count/offset)<10){ |
| 548 | slope=(float)count/offset; | 548 | slope=(float)count/offset; |
| 549 | ifcounter++; | 549 | ifcounter++; |
| 550 | avg=(avg+slope); | 550 | avg=(avg+slope); |
| 551 | } | 551 | } |
| 552 | count=0; | 552 | count=0; |
| 553 | offset++; | 553 | offset++; |
| 554 | } | 554 | } |
| 555 | avg=(float)avg/ifcounter; | 555 | avg=(float)avg/ifcounter; |
| 556 | //cout<<"avg="<<avg<<"\n"; | 556 | //cout<<"avg="<<avg<<"\n"; |
| 557 | page_image.write("findskew.tif"); | 557 | page_image.write("findskew.tif"); |
| 558 | //cout<<"(180/3.142)*atan((float)(count/offset)="<<(180/3.142)*atan(avg)<<"\n"; | 558 | //cout<<"(180/3.142)*atan((float)(count/offset)="<<(180/3.142)*atan(avg)<<"\n"; |
| 559 | return (sign*(180/3.142)*atan(avg)); | 559 | return (sign*(180/3.142)*atan(avg)); |
| 560 | 560 | ||
| 561 | } | 561 | } |
| 562 | 562 | ||
| 563 | if(sign==0) | 563 | if(sign==0) |
| 564 | {return 0;} | 564 | {return 0;} |
| 565 | cout<<"SHIT"; | 565 | cout<<"SHIT"; |
| 566 | return (0); | 566 | return (0); |
| 567 | } | 567 | } |
| 568 | ////////////DEBAYAN//Find skew ends/////////////////// | 568 | ////////////DEBAYAN//Find skew ends/////////////////// |
| 569 | 569 | ||
| 570 | //Works on the global image page containing devnagri script. | 570 | //Works on the global image page containing devnagri script. |
| 571 | //Clips the maatraas and then makes the global image ready for the Tesseract engine. | 571 | //Clips the maatraas and then makes the global image ready for the Tesseract engine. |
| 572 | //Will be executed for all images during training, but during normal operation, will be | 572 | //Will be executed for all images during training, but during normal operation, will be |
| 573 | //used only if the language belongs to devnagri, eg, ben, hin etc. | 573 | //used only if the language belongs to devnagri, eg, ben, hin etc. |
| 574 | void TessBaseAPI::ClipMaatraa(int height, int width) | 574 | void TessBaseAPI::ClipMaatraa(int height, int width) |
| 575 | { | 575 | { |
| 576 | IMAGELINE line; | 576 | IMAGELINE line; |
| 577 | line.init(width); | 577 | line.init(width); |
| 578 | int count,count1=0,blackpixels[height-1][2],arr_row=0,maxbp=0,maxy=0,matras[100][3],char_height; | 578 | int count,count1=0,blackpixels[height-1][2],arr_row=0,maxbp=0,maxy=0,matras[100][3],char_height; |
| 579 | //cout<<"Connected Script="<<connected_script<<"\n"; | 579 | //cout<<"Connected Script="<<connected_script<<"\n"; |
| 580 | 580 | ||
| 581 | for(int y=0; y<height-1;y++){ | 581 | for(int y=0; y<height-1;y++){ |
| 582 | count=0; | 582 | count=0; |
| 583 | for(int x=0;x<width-1;x++){ | 583 | for(int x=0;x<width-1;x++){ |
| 584 | if(page_image.pixel(x,y)==0) | 584 | if(page_image.pixel(x,y)==0) |
| 585 | {count++;} | 585 | {count++;} |
| 586 | } | 586 | } |
| 587 | 587 | ||
| 588 | if(count>=.05*width){ | 588 | if(count>=.05*width){ |
| 589 | blackpixels[arr_row][0]=y; | 589 | blackpixels[arr_row][0]=y; |
| 590 | blackpixels[arr_row][1]=count; | 590 | blackpixels[arr_row][1]=count; |
| 591 | arr_row++; | 591 | arr_row++; |
| 592 | } | 592 | } |
| 593 | } | 593 | } |
| 594 | blackpixels[arr_row][0]=blackpixels[arr_row][1]='\0'; | 594 | blackpixels[arr_row][0]=blackpixels[arr_row][1]='\0'; |
| 595 | 595 | ||
| 596 | for(int x=0;x<width-1;x++){ //Black Line | 596 | for(int x=0;x<width-1;x++){ //Black Line |
| 597 | line.pixels[x]=0; | 597 | line.pixels[x]=0; |
| 598 | } | 598 | } |
| 599 | 599 | ||
| 600 | ////////////line_through_matra() begins////////////////////// | 600 | ////////////line_through_matra() begins////////////////////// |
| 601 | count=1; | 601 | count=1; |
| 602 | //cout<<"\nHeight="<<height<<" arr_row="<<arr_row<<"\n"; | 602 | //cout<<"\nHeight="<<height<<" arr_row="<<arr_row<<"\n"; |
| 603 | char_height=blackpixels[0][0]; //max character height per sentence | 603 | char_height=blackpixels[0][0]; //max character height per sentence |
| 604 | //cout<<"Char Height Init="<<char_height; | 604 | //cout<<"Char Height Init="<<char_height; |
| 605 | while(count<=arr_row){ | 605 | while(count<=arr_row){ |
| 606 | //if(count==0){max=blackpixels[count][0];} | 606 | //if(count==0){max=blackpixels[count][0];} |
| 607 | if((blackpixels[count][0]-blackpixels[count-1][0]==1) && (blackpixels[count][1]>=maxbp)){ | 607 | if((blackpixels[count][0]-blackpixels[count-1][0]==1) && (blackpixels[count][1]>=maxbp)){ |
| 608 | maxbp=blackpixels[count][1]; | 608 | maxbp=blackpixels[count][1]; |
| 609 | maxy=blackpixels[count][0]; | 609 | maxy=blackpixels[count][0]; |
| 610 | //cout<<"\nMax="<<maxy<<" bpc="<<maxbp; | 610 | //cout<<"\nMax="<<maxy<<" bpc="<<maxbp; |
| 611 | } | 611 | } |
| 612 | 612 | ||
| 613 | if((blackpixels[count][0]-blackpixels[count-1][0])!=1){ | 613 | if((blackpixels[count][0]-blackpixels[count-1][0])!=1){ |
| 614 | /////////////drawline(max)////////////////////// | 614 | /////////////drawline(max)////////////////////// |
| 615 | 615 | ||
| 616 | // cout<<"\nmax="<<maxy<<" bpc="<<maxbp; | 616 | // cout<<"\nmax="<<maxy<<" bpc="<<maxbp; |
| 617 | // page_image.put_line(0,maxy,width,&line,0); | 617 | // page_image.put_line(0,maxy,width,&line,0); |
| 618 | char_height=blackpixels[count-1][0]-char_height; | 618 | char_height=blackpixels[count-1][0]-char_height; |
| 619 | matras[count1][0]=maxy; matras[count1][1]=maxbp; matras[count1][2]=char_height; count1++; | 619 | matras[count1][0]=maxy; matras[count1][1]=maxbp; matras[count1][2]=char_height; count1++; |
| 620 | char_height=blackpixels[count][0]; | 620 | char_height=blackpixels[count][0]; |
| 621 | 621 | ||
| 622 | //////////// drawline(max)///////////////////// | 622 | //////////// drawline(max)///////////////////// |
| 623 | maxbp=blackpixels[count][1]; | 623 | maxbp=blackpixels[count][1]; |
| 624 | } | 624 | } |
| 625 | count++; | 625 | count++; |
| 626 | } | 626 | } |
| 627 | matras[count1][0]=matras[count1][1]=matras[count1][2]='\0'; | 627 | matras[count1][0]=matras[count1][1]=matras[count1][2]='\0'; |
| 628 | 628 | ||
| 629 | //delete blackpixels; | 629 | //delete blackpixels; |
| 630 | ////////////line_through_matra() ends////////////////////// | 630 | ////////////line_through_matra() ends////////////////////// |
| 631 | 631 | ||
| 632 | ////////////clip_matras() begins/////////////////////////// | 632 | ////////////clip_matras() begins/////////////////////////// |
| 633 | for(int i=0;i<100;i++){ //where 100=max number of sentences per page | 633 | for(int i=0;i<100;i++){ //where 100=max number of sentences per page |
| 634 | if(matras[i][0]=='\0'){break;} | 634 | if(matras[i][0]=='\0'){break;} |
| 635 | //cout<<"\nY="<<matras[i][0]<<" bpc="<<matras[i][1]<<" chheight="<<matras[i][2]; | 635 | //cout<<"\nY="<<matras[i][0]<<" bpc="<<matras[i][1]<<" chheight="<<matras[i][2]; |
| 636 | count=i; | 636 | count=i; |
| 637 | } | 637 | } |
| 638 | 638 | ||
| 639 | for(int i=0;i<=count;i++){ | 639 | for(int i=0;i<=count;i++){ |
| 640 | 640 | ||
| 641 | for(int x=0;x<width-1;x++){ | 641 | for(int x=0;x<width-1;x++){ |
| 642 | if(page_image.pixel(x,matras[i][0])==0){ | 642 | if(page_image.pixel(x,matras[i][0])==0){ |
| 643 | count1=0; | 643 | count1=0; |
| 644 | for(int y=0;y<matras[i][2] && count1==0;y++){ | 644 | for(int y=0;y<matras[i][2] && count1==0;y++){ |
| 645 | if(page_image.pixel(x,matras[i][0]-y)==1){count1++; | 645 | if(page_image.pixel(x,matras[i][0]-y)==1){count1++; |
| 646 | for(int z=y+1;z<matras[i][2];z++){ | 646 | for(int z=y+1;z<matras[i][2];z++){ |
| 647 | if(page_image.pixel(x,matras[i][0]-z)==1){count1++;}//black pixel encountered... stop counting. | 647 | if(page_image.pixel(x,matras[i][0]-z)==1){count1++;}//black pixel encountered... stop counting. |
| 648 | else | 648 | else |
| 649 | {break;} | 649 | {break;} |
| 650 | } | 650 | } |
| 651 | } | 651 | } |
| 652 | } | 652 | } |
| 653 | int upcount=0; | ||
| 654 | //find upcount | ||
| 655 | while(page_image.pixel(x,matras[i][0]+upcount)==0){ | ||
| 656 | upcount++; | ||
| 657 | } | ||
| 658 | //find upcount ends | ||
| 653 | //cout<<"\nWPR @ "<<x<<","<<matras[i][0]<<"="<<count1; | 659 | //cout<<"\nWPR @ "<<x<<","<<matras[i][0]<<"="<<count1; |
| 654 | if(count1>.8*matras[i][2]){ | 660 | if(count1>.8*matras[i][2]){ |
| 655 | line.init(matras[i][2]+5); | 661 | line.init(upcount+count1); |
| 656 | for(int j=0;j<matras[i][2]+5;j++){line.pixels[j]=1;} | 662 | for(int j=0;j<matras[i][2]+5;j++){line.pixels[j]=1;} |
| 657 | page_image.put_column(x,matras[i][0]-matras[i][2],matras[i][2]+5,&line,0); | 663 | page_image.put_column(x,matras[i][0]+upcount,upcount+count1,&line,0); |
| 658 | } | 664 | } |
| 659 | } | 665 | } |
| 660 | } | 666 | } |
| 661 | 667 | ||
| 662 | } | 668 | } |
| 663 | 669 | ||
| 664 | page_image.write("bentest.tif"); | 670 | page_image.write("bentest.tif"); |
| 665 | 671 | ||
| 666 | ////////////clip_matras() ends///////////////////////////// | 672 | ////////////clip_matras() ends///////////////////////////// |
| 667 | 673 | ||
| 668 | /////////DEBAYAN///////////////// | 674 | /////////DEBAYAN///////////////// |
| 669 | 675 | ||
| 670 | 676 | ||
| 671 | } | 677 | } |
| 672 | 678 | ||
| 673 | 679 | ||
| 674 | // Threshold the given grey or color image into the tesseract global | 680 | // Threshold the given grey or color image into the tesseract global |
| 675 | // image ready for recognition. Requires thresholds and hi_value | 681 | // image ready for recognition. Requires thresholds and hi_value |
| 676 | // produced by OtsuThreshold above. | 682 | // produced by OtsuThreshold above. |
| 677 | void TessBaseAPI::ThresholdRect(const unsigned char* imagedata, | 683 | void TessBaseAPI::ThresholdRect(const unsigned char* imagedata, |
| 678 | int bytes_per_pixel, | 684 | int bytes_per_pixel, |
| 679 | int bytes_per_line, | 685 | int bytes_per_line, |
| 680 | int left, int top, | 686 | int left, int top, |
| 681 | int width, int height, | 687 | int width, int height, |
| 682 | const int* thresholds, | 688 | const int* thresholds, |
| 683 | const int* hi_values) { | 689 | const int* hi_values) { |
| 684 | 690 | ||
| 685 | IMAGELINE line; | 691 | IMAGELINE line; |
| 686 | page_image.create(width, height, 1); | 692 | page_image.create(width, height, 1); |
| 687 | line.init(width); | 693 | line.init(width); |
| 688 | 694 | ||
| 689 | // For each line in the image, fill the IMAGELINE class and put it into the | 695 | // For each line in the image, fill the IMAGELINE class and put it into the |
| 690 | // Tesseract global page_image. Note that Tesseract stores images with the | 696 | // Tesseract global page_image. Note that Tesseract stores images with the |
| 691 | // bottom at y=0 and 0 is black, so we need 2 kinds of inversion. | 697 | // bottom at y=0 and 0 is black, so we need 2 kinds of inversion. |
| 692 | const unsigned char* data = imagedata + top*bytes_per_line + | 698 | const unsigned char* data = imagedata + top*bytes_per_line + |
| 693 | left*bytes_per_pixel; | 699 | left*bytes_per_pixel; |
| 694 | for (int y = height - 1 ; y >= 0; --y) { | 700 | for (int y = height - 1 ; y >= 0; --y) { |
| 695 | const unsigned char* pix = data; | 701 | const unsigned char* pix = data; |
| 696 | for (int x = 0; x < width; ++x, pix += bytes_per_pixel) { | 702 | for (int x = 0; x < width; ++x, pix += bytes_per_pixel) { |
| 697 | line.pixels[x] = 1; | 703 | line.pixels[x] = 1; |
| 698 | for (int ch = 0; ch < bytes_per_pixel; ++ch) { | 704 | for (int ch = 0; ch < bytes_per_pixel; ++ch) { |
| 699 | if (hi_values[ch] >= 0 && | 705 | if (hi_values[ch] >= 0 && |
| 700 | (pix[ch] > thresholds[ch]) == (hi_values[ch] == 0)) { | 706 | (pix[ch] > thresholds[ch]) == (hi_values[ch] == 0)) { |
| 701 | line.pixels[x] = 0; | 707 | line.pixels[x] = 0; |
| 702 | break; | 708 | break; |
| 703 | } | 709 | } |
| 704 | } | 710 | } |
| 705 | } | 711 | } |
| 706 | page_image.put_line(0, y, width, &line, 0); | 712 | page_image.put_line(0, y, width, &line, 0); |
| 707 | data += bytes_per_line; | 713 | data += bytes_per_line; |
| 708 | } | 714 | } |
| 709 | 715 | ||
| 710 | if(connected_script==true){ | 716 | if(connected_script==true){ |
| 711 | //page_image.write("benth.tif"); | 717 | //page_image.write("benth.tif"); |
| 712 | //float angle=findskew(height,width); | 718 | //float angle=findskew(height,width); |
| 713 | //cout<<"SKEW ANGLE="<<angle<<"\n"; | 719 | //cout<<"SKEW ANGLE="<<angle<<"\n"; |
| 714 | /*if(angle!=0){ | 720 | /*if(angle!=0){ |
| 715 | deskew(angle,height,width); | 721 | deskew(angle,height,width); |
| 716 | }*/ | 722 | }*/ |
| 717 | ClipMaatraa(height,width); | 723 | ClipMaatraa(height,width); |
| 718 | } | 724 | } |
| 719 | 725 | ||
| 720 | } | 726 | } |
| 721 | 727 | ||
| 722 | // Cut out the requested rectangle of the binary image to the | 728 | // Cut out the requested rectangle of the binary image to the |
| 723 | // tesseract global image ready for recognition. | 729 | // tesseract global image ready for recognition. |
| 724 | void TessBaseAPI::CopyBinaryRect(const unsigned char* imagedata, | 730 | void TessBaseAPI::CopyBinaryRect(const unsigned char* imagedata, |
| 725 | int bytes_per_line, | 731 | int bytes_per_line, |
| 726 | int left, int top, | 732 | int left, int top, |
| 727 | int width, int height) { | 733 | int width, int height) { |
| 728 | // Copy binary image, cutting out the required rectangle. | 734 | // Copy binary image, cutting out the required rectangle. |
| 729 | IMAGE image; | 735 | IMAGE image; |
| 730 | image.capture(const_cast<unsigned char*>(imagedata), | 736 | image.capture(const_cast<unsigned char*>(imagedata), |
| 731 | bytes_per_line*8, top + height, 1); | 737 | bytes_per_line*8, top + height, 1); |
| 732 | page_image.create(width, height, 1); | 738 | page_image.create(width, height, 1); |
| 733 | 739 | ||
| 734 | copy_sub_image(&image, left, 0, width, height, &page_image, 0, 0, false); | 740 | copy_sub_image(&image, left, 0, width, height, &page_image, 0, 0, false); |
| 735 | image.write("bentest.tif"); | 741 | image.write("bentest.tif"); |
| 736 | } | 742 | } |
| 737 | 743 | ||
| 738 | // Low-level function to recognize the current global image to a string. | 744 | // Low-level function to recognize the current global image to a string. |
| 739 | char* TessBaseAPI::RecognizeToString() { | 745 | char* TessBaseAPI::RecognizeToString() { |
| 740 | BLOCK_LIST block_list; | 746 | BLOCK_LIST block_list; |
| 741 | 747 | ||
| 742 | FindLines(&block_list); | 748 | FindLines(&block_list); |
| 743 | 749 | ||
| 744 | // Now run the main recognition. | 750 | // Now run the main recognition. |
| 745 | PAGE_RES* page_res = Recognize(&block_list, NULL); | 751 | PAGE_RES* page_res = Recognize(&block_list, NULL); |
| 746 | 752 | ||
| 747 | return TesseractToText(page_res); | 753 | return TesseractToText(page_res); |
| 748 | 754 | ||
| 749 | } | 755 | } |
| 750 | 756 | ||
| 751 | // Find lines from the image making the BLOCK_LIST. | 757 | // Find lines from the image making the BLOCK_LIST. |
| 752 | void TessBaseAPI::FindLines(BLOCK_LIST* block_list) { | 758 | void TessBaseAPI::FindLines(BLOCK_LIST* block_list) { |
| 753 | // The following call creates a full-page block and then runs connected | 759 | // The following call creates a full-page block and then runs connected |
| 754 | // component analysis and text line creation. | 760 | // component analysis and text line creation. |
| 755 | pgeditor_read_file(input_file, block_list); | 761 | pgeditor_read_file(input_file, block_list); |
| 756 | } | 762 | } |
| 757 | 763 | ||
| 758 | // Recognize the tesseract global image and return the result as Tesseract | 764 | // Recognize the tesseract global image and return the result as Tesseract |
| 759 | // internal structures. | 765 | // internal structures. |
| 760 | PAGE_RES* TessBaseAPI::Recognize(BLOCK_LIST* block_list, ETEXT_DESC* monitor) { | 766 | PAGE_RES* TessBaseAPI::Recognize(BLOCK_LIST* block_list, ETEXT_DESC* monitor) { |
| 761 | if (tessedit_resegment_from_boxes) | 767 | if (tessedit_resegment_from_boxes) |
| 762 | apply_boxes(block_list); | 768 | apply_boxes(block_list); |
| 763 | 769 | ||
| 764 | PAGE_RES* page_res = new PAGE_RES(block_list); | 770 | PAGE_RES* page_res = new PAGE_RES(block_list); |
| 765 | if (interactive_mode) { | 771 | if (interactive_mode) { |
| 766 | pgeditor_main(block_list); //pgeditor user I/F | 772 | pgeditor_main(block_list); //pgeditor user I/F |
| 767 | } else if (tessedit_train_from_boxes) { | 773 | } else if (tessedit_train_from_boxes) { |
| 768 | apply_box_training(block_list); | 774 | apply_box_training(block_list); |
| 769 | } else { | 775 | } else { |
| 770 | // Now run the main recognition. | 776 | // Now run the main recognition. |
| 771 | recog_all_words(page_res, monitor); | 777 | recog_all_words(page_res, monitor); |
| 772 | } | 778 | } |
| 773 | return page_res; | 779 | return page_res; |
| 774 | } | 780 | } |
| 775 | 781 | ||
| 776 | // Return the maximum length that the output text string might occupy. | 782 | // Return the maximum length that the output text string might occupy. |
| 777 | int TessBaseAPI::TextLength(PAGE_RES* page_res) { | 783 | int TessBaseAPI::TextLength(PAGE_RES* page_res) { |
| 778 | PAGE_RES_IT page_res_it(page_res); | 784 | PAGE_RES_IT page_res_it(page_res); |
| 779 | int total_length = 2; | 785 | int total_length = 2; |
| 780 | // Iterate over the data structures to extract the recognition result. | 786 | // Iterate over the data structures to extract the recognition result. |
| 781 | for (page_res_it.restart_page(); page_res_it.word () != NULL; | 787 | for (page_res_it.restart_page(); page_res_it.word () != NULL; |
| 782 | page_res_it.forward()) { | 788 | page_res_it.forward()) { |
| 783 | WERD_RES *word = page_res_it.word(); | 789 | WERD_RES *word = page_res_it.word(); |
| 784 | WERD_CHOICE* choice = word->best_choice; | 790 | WERD_CHOICE* choice = word->best_choice; |
| 785 | if (choice != NULL) { | 791 | if (choice != NULL) { |
| 786 | total_length += choice->string().length() + 1; | 792 | total_length += choice->string().length() + 1; |
| 787 | for (int i = 0; i < word->reject_map.length(); ++i) { | 793 | for (int i = 0; i < word->reject_map.length(); ++i) { |
| 788 | if (word->reject_map[i].rejected()) | 794 | if (word->reject_map[i].rejected()) |
| 789 | ++total_length; | 795 | ++total_length; |
| 790 | } | 796 | } |
| 791 | } | 797 | } |
| 792 | } | 798 | } |
| 793 | return total_length; | 799 | return total_length; |
| 794 | } | 800 | } |
| 795 | 801 | ||
| 796 | // Returns an array of all word confidences, terminated by -1. | 802 | // Returns an array of all word confidences, terminated by -1. |
| 797 | int* TessBaseAPI::AllTextConfidences(PAGE_RES* page_res) { | 803 | int* TessBaseAPI::AllTextConfidences(PAGE_RES* page_res) { |
| 798 | if (!page_res) return NULL; | 804 | if (!page_res) return NULL; |
| 799 | int n_word = 0; | 805 | int n_word = 0; |
| 800 | PAGE_RES_IT res_it(page_res); | 806 | PAGE_RES_IT res_it(page_res); |
| 801 | for (res_it.restart_page(); res_it.word () != NULL; res_it.forward()) | 807 | for (res_it.restart_page(); res_it.word () != NULL; res_it.forward()) |
| 802 | n_word++; | 808 | n_word++; |
| 803 | 809 | ||
| 804 | int* conf = new int[n_word+1]; | 810 | int* conf = new int[n_word+1]; |
| 805 | n_word = 0; | 811 | n_word = 0; |
| 806 | for (res_it.restart_page(); res_it.word () != NULL; res_it.forward()) { | 812 | for (res_it.restart_page(); res_it.word () != NULL; res_it.forward()) { |
| 807 | WERD_RES *word = res_it.word(); | 813 | WERD_RES *word = res_it.word(); |
| 808 | WERD_CHOICE* choice = word->best_choice; | 814 | WERD_CHOICE* choice = word->best_choice; |
| 809 | int w_conf = static_cast<int>(100 + 5 * choice->certainty()); | 815 | int w_conf = static_cast<int>(100 + 5 * choice->certainty()); |
| 810 | // This is the eq for converting Tesseract confidence to 1..100 | 816 | // This is the eq for converting Tesseract confidence to 1..100 |
| 811 | if (w_conf < 0) w_conf = 0; | 817 | if (w_conf < 0) w_conf = 0; |
| 812 | if (w_conf > 100) w_conf = 100; | 818 | if (w_conf > 100) w_conf = 100; |
| 813 | conf[n_word++] = w_conf; | 819 | conf[n_word++] = w_conf; |
| 814 | } | 820 | } |
| 815 | conf[n_word] = -1; | 821 | conf[n_word] = -1; |
| 816 | return conf; | 822 | return conf; |
| 817 | } | 823 | } |
| 818 | 824 | ||
| 819 | // Returns the average word confidence for Tesseract page result. | 825 | // Returns the average word confidence for Tesseract page result. |
| 820 | int TessBaseAPI::TextConf(PAGE_RES* page_res) { | 826 | int TessBaseAPI::TextConf(PAGE_RES* page_res) { |
| 821 | int* conf = AllTextConfidences(page_res); | 827 | int* conf = AllTextConfidences(page_res); |
| 822 | if (!conf) return 0; | 828 | if (!conf) return 0; |
| 823 | int sum = 0; | 829 | int sum = 0; |
| 824 | int *pt = conf; | 830 | int *pt = conf; |
| 825 | while (*pt >= 0) sum += *pt++; | 831 | while (*pt >= 0) sum += *pt++; |
| 826 | if (pt != conf) sum /= pt - conf; | 832 | if (pt != conf) sum /= pt - conf; |
| 827 | delete [] conf; | 833 | delete [] conf; |
| 828 | return sum; | 834 | return sum; |
| 829 | } | 835 | } |
| 830 | 836 | ||
| 831 | // Make a text string from the internal data structures. | 837 | // Make a text string from the internal data structures. |
| 832 | // The input page_res is deleted. | 838 | // The input page_res is deleted. |
| 833 | char* TessBaseAPI::TesseractToText(PAGE_RES* page_res) { | 839 | char* TessBaseAPI::TesseractToText(PAGE_RES* page_res) { |
| 834 | if (page_res != NULL) { | 840 | if (page_res != NULL) { |
| 835 | int total_length = TextLength(page_res); | 841 | int total_length = TextLength(page_res); |
| 836 | PAGE_RES_IT page_res_it(page_res); | 842 | PAGE_RES_IT page_res_it(page_res); |
| 837 | char* result = new char[total_length]; | 843 | char* result = new char[total_length]; |
| 838 | char* ptr = result; | 844 | char* ptr = result; |
| 839 | for (page_res_it.restart_page(); page_res_it.word () != NULL; | 845 | for (page_res_it.restart_page(); page_res_it.word () != NULL; |
| 840 | page_res_it.forward()) { | 846 | page_res_it.forward()) { |
| 841 | WERD_RES *word = page_res_it.word(); | 847 | WERD_RES *word = page_res_it.word(); |
| 842 | WERD_CHOICE* choice = word->best_choice; | 848 | WERD_CHOICE* choice = word->best_choice; |
| 843 | if (choice != NULL) { | 849 | if (choice != NULL) { |
| 844 | strcpy(ptr, choice->string().string()); | 850 | strcpy(ptr, choice->string().string()); |
| 845 | ptr += strlen(ptr); | 851 | ptr += strlen(ptr); |
| 846 | if (word->word->flag(W_EOL)) | 852 | if (word->word->flag(W_EOL)) |
| 847 | *ptr++ = '\n'; | 853 | *ptr++ = '\n'; |
| 848 | else | 854 | else |
| 849 | *ptr++ = ' '; | 855 | *ptr++ = ' '; |
| 850 | } | 856 | } |
| 851 | } | 857 | } |
| 852 | *ptr++ = '\n'; | 858 | *ptr++ = '\n'; |
| 853 | *ptr = '\0'; | 859 | *ptr = '\0'; |
| 854 | delete page_res; | 860 | delete page_res; |
| 855 | return result; | 861 | return result; |
| 856 | } | 862 | } |
| 857 | return NULL; | 863 | return NULL; |
| 858 | } | 864 | } |
| 859 | 865 | ||
| 860 | static int ConvertWordToBoxText(WERD_RES *word, | 866 | static int ConvertWordToBoxText(WERD_RES *word, |
| 861 | ROW_RES* row, | 867 | ROW_RES* row, |
| 862 | int left, | 868 | int left, |
| 863 | int bottom, | 869 | int bottom, |
| 864 | char* word_str) { | 870 | char* word_str) { |
| 865 | // Copy the output word and denormalize it back to image coords. | 871 | // Copy the output word and denormalize it back to image coords. |
| 866 | WERD copy_outword; | 872 | WERD copy_outword; |
| 867 | copy_outword = *(word->outword); | 873 | copy_outword = *(word->outword); |
| 868 | copy_outword.baseline_denormalise(&word->denorm); | 874 | copy_outword.baseline_denormalise(&word->denorm); |
| 869 | PBLOB_IT blob_it; | 875 | PBLOB_IT blob_it; |
| 870 | blob_it.set_to_list(copy_outword.blob_list()); | 876 | blob_it.set_to_list(copy_outword.blob_list()); |
| 871 | int length = copy_outword.blob_list()->length(); | 877 | int length = copy_outword.blob_list()->length(); |
| 872 | int output_size = 0; | 878 | int output_size = 0; |
| 873 | 879 | ||
| 874 | if (length > 0) { | 880 | if (length > 0) { |
| 875 | for (int index = 0, offset = 0; index < length; | 881 | for (int index = 0, offset = 0; index < length; |
| 876 | offset += word->best_choice->lengths()[index++], blob_it.forward()) { | 882 | offset += word->best_choice->lengths()[index++], blob_it.forward()) { |
| 877 | PBLOB* blob = blob_it.data(); | 883 | PBLOB* blob = blob_it.data(); |
| 878 | TBOX blob_box = blob->bounding_box(); | 884 | TBOX blob_box = blob->bounding_box(); |
| 879 | if (word->tess_failed || | 885 | if (word->tess_failed || |
| 880 | blob_box.left() < 0 || | 886 | blob_box.left() < 0 || |
| 881 | blob_box.right() > page_image.get_xsize() || | 887 | blob_box.right() > page_image.get_xsize() || |
| 882 | blob_box.bottom() < 0 || | 888 | blob_box.bottom() < 0 || |
| 883 | blob_box.top() > page_image.get_ysize()) { | 889 | blob_box.top() > page_image.get_ysize()) { |
| 884 | // Bounding boxes can be illegal when tess fails on a word. | 890 | // Bounding boxes can be illegal when tess fails on a word. |
| 885 | blob_box = word->word->bounding_box(); // Use original word as backup. | 891 | blob_box = word->word->bounding_box(); // Use original word as backup. |
| 886 | tprintf("Using substitute bounding box at (%d,%d)->(%d,%d)\n", | 892 | tprintf("Using substitute bounding box at (%d,%d)->(%d,%d)\n", |
| 887 | blob_box.left(), blob_box.bottom(), | 893 | blob_box.left(), blob_box.bottom(), |
| 888 | blob_box.right(), blob_box.top()); | 894 | blob_box.right(), blob_box.top()); |
| 889 | } | 895 | } |
| 890 | 896 | ||
| 891 | // A single classification unit can be composed of several UTF-8 | 897 | // A single classification unit can be composed of several UTF-8 |
| 892 | // characters. Append each of them to the result. | 898 | // characters. Append each of them to the result. |
| 893 | for (int sub = 0; sub < word->best_choice->lengths()[index]; ++sub) { | 899 | for (int sub = 0; sub < word->best_choice->lengths()[index]; ++sub) { |
| 894 | char ch = word->best_choice->string()[offset + sub]; | 900 | char ch = word->best_choice->string()[offset + sub]; |
| 895 | // Tesseract uses space for recognition failure. Fix to a reject | 901 | // Tesseract uses space for recognition failure. Fix to a reject |
| 896 | // character, '~' so we don't create illegal box files. | 902 | // character, '~' so we don't create illegal box files. |
| 897 | if (ch == ' ') | 903 | if (ch == ' ') |
| 898 | ch = '~'; | 904 | ch = '~'; |
| 899 | word_str[output_size++] = ch; | 905 | word_str[output_size++] = ch; |
| 900 | } | 906 | } |
| 901 | sprintf(word_str + output_size, " %d %d %d %d\n", | 907 | sprintf(word_str + output_size, " %d %d %d %d\n", |
| 902 | blob_box.left() + left, blob_box.bottom() + bottom, | 908 | blob_box.left() + left, blob_box.bottom() + bottom, |
| 903 | blob_box.right() + left, blob_box.top() + bottom); | 909 | blob_box.right() + left, blob_box.top() + bottom); |
| 904 | output_size += strlen(word_str + output_size); | 910 | output_size += strlen(word_str + output_size); |
| 905 | } | 911 | } |
| 906 | } | 912 | } |
| 907 | return output_size; | 913 | return output_size; |
| 908 | } | 914 | } |
| 909 | 915 | ||
| 910 | // Multiplier for textlength assumes 4 numbers @ 5 digits and a space | 916 | // Multiplier for textlength assumes 4 numbers @ 5 digits and a space |
| 911 | // plus the newline and the orginial character = 4*(5+1)+2 | 917 | // plus the newline and the orginial character = 4*(5+1)+2 |
| 912 | const int kMaxCharsPerChar = 26; | 918 | const int kMaxCharsPerChar = 26; |
| 913 | 919 | ||
| 914 | // Make a text string from the internal data structures. | 920 | // Make a text string from the internal data structures. |
| 915 | // The input page_res is deleted. | 921 | // The input page_res is deleted. |
| 916 | // The text string takes the form of a box file as needed for training. | 922 | // The text string takes the form of a box file as needed for training. |
| 917 | char* TessBaseAPI::TesseractToBoxText(PAGE_RES* page_res, | 923 | char* TessBaseAPI::TesseractToBoxText(PAGE_RES* page_res, |
| 918 | int left, int bottom) { | 924 | int left, int bottom) { |
| 919 | if (page_res != NULL) { | 925 | if (page_res != NULL) { |
| 920 | int total_length = TextLength(page_res) * kMaxCharsPerChar; | 926 | int total_length = TextLength(page_res) * kMaxCharsPerChar; |
| 921 | PAGE_RES_IT page_res_it(page_res); | 927 | PAGE_RES_IT page_res_it(page_res); |
| 922 | char* result = new char[total_length]; | 928 | char* result = new char[total_length]; |
| 923 | char* ptr = result; | 929 | char* ptr = result; |
| 924 | for (page_res_it.restart_page(); page_res_it.word () != NULL; | 930 | for (page_res_it.restart_page(); page_res_it.word () != NULL; |
| 925 | page_res_it.forward()) { | 931 | page_res_it.forward()) { |
| 926 | WERD_RES *word = page_res_it.word(); | 932 | WERD_RES *word = page_res_it.word(); |
| 927 | ptr += ConvertWordToBoxText(word,page_res_it.row(),left, bottom, ptr); | 933 | ptr += ConvertWordToBoxText(word,page_res_it.row(),left, bottom, ptr); |
| 928 | } | 934 | } |
| 929 | *ptr = '\0'; | 935 | *ptr = '\0'; |
| 930 | delete page_res; | 936 | delete page_res; |
| 931 | return result; | 937 | return result; |
| 932 | } | 938 | } |
| 933 | return NULL; | 939 | return NULL; |
| 934 | } | 940 | } |
| 935 | 941 | ||
| 936 | // Make a text string from the internal data structures. | 942 | // Make a text string from the internal data structures. |
| 937 | // The input page_res is deleted. The text string is converted | 943 | // The input page_res is deleted. The text string is converted |
| 938 | // to UNLV-format: Latin-1 with specific reject and suspect codes. | 944 | // to UNLV-format: Latin-1 with specific reject and suspect codes. |
| 939 | const char kUnrecognized = '~'; | 945 | const char kUnrecognized = '~'; |
| 940 | // Conversion table for non-latin characters. | 946 | // Conversion table for non-latin characters. |
| 941 | // Maps characters out of the latin set into the latin set. | 947 | // Maps characters out of the latin set into the latin set. |
| 942 | // TODO(rays) incorporate this translation into unicharset. | 948 | // TODO(rays) incorporate this translation into unicharset. |
| 943 | const int kUniChs[] = { | 949 | const int kUniChs[] = { |
| 944 | 0x20ac, 0x201c, 0x201d, 0x2018, 0x2019, 0x2022, 0x2014, 0 | 950 | 0x20ac, 0x201c, 0x201d, 0x2018, 0x2019, 0x2022, 0x2014, 0 |
| 945 | }; | 951 | }; |
| 946 | // Latin chars corresponding to the unicode chars above. | 952 | // Latin chars corresponding to the unicode chars above. |
| 947 | const int kLatinChs[] = { | 953 | const int kLatinChs[] = { |
| 948 | 0x00a2, 0x0022, 0x0022, 0x0027, 0x0027, 0x00b7, 0x002d, 0 | 954 | 0x00a2, 0x0022, 0x0022, 0x0027, 0x0027, 0x00b7, 0x002d, 0 |
| 949 | }; | 955 | }; |
| 950 | 956 | ||
| 951 | char* TessBaseAPI::TesseractToUNLV(PAGE_RES* page_res) { | 957 | char* TessBaseAPI::TesseractToUNLV(PAGE_RES* page_res) { |
| 952 | bool tilde_crunch_written = false; | 958 | bool tilde_crunch_written = false; |
| 953 | bool last_char_was_newline = true; | 959 | bool last_char_was_newline = true; |
| 954 | bool last_char_was_tilde = false; | 960 | bool last_char_was_tilde = false; |
| 955 | 961 | ||
| 956 | if (page_res != NULL) { | 962 | if (page_res != NULL) { |
| 957 | int total_length = TextLength(page_res); | 963 | int total_length = TextLength(page_res); |
| 958 | PAGE_RES_IT page_res_it(page_res); | 964 | PAGE_RES_IT page_res_it(page_res); |
| 959 | char* result = new char[total_length]; | 965 | char* result = new char[total_length]; |
| 960 | char* ptr = result; | 966 | char* ptr = result; |
| 961 | for (page_res_it.restart_page(); page_res_it.word () != NULL; | 967 | for (page_res_it.restart_page(); page_res_it.word () != NULL; |
| 962 | page_res_it.forward()) { | 968 | page_res_it.forward()) { |
| 963 | WERD_RES *word = page_res_it.word(); | 969 | WERD_RES *word = page_res_it.word(); |
| 964 | // Process the current word. | 970 | // Process the current word. |
| 965 | if (word->unlv_crunch_mode != CR_NONE) { | 971 | if (word->unlv_crunch_mode != CR_NONE) { |
| 966 | if (word->unlv_crunch_mode != CR_DELETE && | 972 | if (word->unlv_crunch_mode != CR_DELETE && |
| 967 | (!tilde_crunch_written || | 973 | (!tilde_crunch_written || |
| 968 | (word->unlv_crunch_mode == CR_KEEP_SPACE && | 974 | (word->unlv_crunch_mode == CR_KEEP_SPACE && |
| 969 | word->word->space () > 0 && | 975 | word->word->space () > 0 && |
| 970 | !word->word->flag (W_FUZZY_NON) && | 976 | !word->word->flag (W_FUZZY_NON) && |
| 971 | !word->word->flag (W_FUZZY_SP)))) { | 977 | !word->word->flag (W_FUZZY_SP)))) { |
| 972 | if (!word->word->flag (W_BOL) && | 978 | if (!word->word->flag (W_BOL) && |
| 973 | word->word->space () > 0 && | 979 | word->word->space () > 0 && |
| 974 | !word->word->flag (W_FUZZY_NON) && | 980 | !word->word->flag (W_FUZZY_NON) && |
| 975 | !word->word->flag (W_FUZZY_SP)) { | 981 | !word->word->flag (W_FUZZY_SP)) { |
| 976 | /* Write a space to separate from preceeding good text */ | 982 | /* Write a space to separate from preceeding good text */ |
| 977 | *ptr++ = ' '; | 983 | *ptr++ = ' '; |
| 978 | last_char_was_tilde = false; | 984 | last_char_was_tilde = false; |
| 979 | } | 985 | } |
| 980 | if (!last_char_was_tilde) { | 986 | if (!last_char_was_tilde) { |
| 981 | // Write a reject char. | 987 | // Write a reject char. |
| 982 | last_char_was_tilde = true; | 988 | last_char_was_tilde = true; |
| 983 | *ptr++ = kUnrecognized; | 989 | *ptr++ = kUnrecognized; |
| 984 | tilde_crunch_written = true; | 990 | tilde_crunch_written = true; |
| 985 | last_char_was_newline = false; | 991 | last_char_was_newline = false; |
| 986 | } | 992 | } |
| 987 | } | 993 | } |
| 988 | } else { | 994 | } else { |
| 989 | // NORMAL PROCESSING of non tilde crunched words. | 995 | // NORMAL PROCESSING of non tilde crunched words. |
| 990 | tilde_crunch_written = false; | 996 | tilde_crunch_written = false; |
| 991 | 997 | ||
| 992 | if (last_char_was_tilde && | 998 | if (last_char_was_tilde && |
| 993 | word->word->space () == 0 && | 999 | word->word->space () == 0 && |
| 994 | (word->best_choice->string ()[0] == ' ')) { | 1000 | (word->best_choice->string ()[0] == ' ')) { |
| 995 | /* Prevent adjacent tilde across words - we know that adjacent tildes within | 1001 | /* Prevent adjacent tilde across words - we know that adjacent tildes within |
| 996 | words have been removed */ | 1002 | words have been removed */ |
| 997 | char* p = (char *) word->best_choice->string().string (); | 1003 | char* p = (char *) word->best_choice->string().string (); |
| 998 | strcpy (p, p + 1); //shuffle up | 1004 | strcpy (p, p + 1); //shuffle up |
| 999 | p = (char *) word->best_choice->lengths().string (); | 1005 | p = (char *) word->best_choice->lengths().string (); |
| 1000 | strcpy (p, p + 1); //shuffle up | 1006 | strcpy (p, p + 1); //shuffle up |
| 1001 | word->reject_map.remove_pos (0); | 1007 | word->reject_map.remove_pos (0); |
| 1002 | PBLOB_IT blob_it = word->outword->blob_list (); | 1008 | PBLOB_IT blob_it = word->outword->blob_list (); |
| 1003 | delete blob_it.extract (); //get rid of reject blob | 1009 | delete blob_it.extract (); //get rid of reject blob |
| 1004 | } | 1010 | } |
| 1005 | 1011 | ||
| 1006 | if (word->word->flag(W_REP_CHAR) && tessedit_consistent_reps) | 1012 | if (word->word->flag(W_REP_CHAR) && tessedit_consistent_reps) |
| 1007 | ensure_rep_chars_are_consistent(word); | 1013 | ensure_rep_chars_are_consistent(word); |
| 1008 | 1014 | ||
| 1009 | set_unlv_suspects(word); | 1015 | set_unlv_suspects(word); |
| 1010 | const char* wordstr = word->best_choice->string().string(); | 1016 | const char* wordstr = word->best_choice->string().string(); |
| 1011 | if (wordstr[0] != 0) { | 1017 | if (wordstr[0] != 0) { |
| 1012 | if (!last_char_was_newline) | 1018 | if (!last_char_was_newline) |
| 1013 | *ptr++ = ' '; | 1019 | *ptr++ = ' '; |
| 1014 | else | 1020 | else |
| 1015 | last_char_was_newline = false; | 1021 | last_char_was_newline = false; |
| 1016 | int offset = 0; | 1022 | int offset = 0; |
| 1017 | const STRING& lengths = word->best_choice->lengths(); | 1023 | const STRING& lengths = word->best_choice->lengths(); |
| 1018 | int length = lengths.length(); | 1024 | int length = lengths.length(); |
| 1019 | for (int i = 0; i < length; offset += lengths[i++]) { | 1025 | for (int i = 0; i < length; offset += lengths[i++]) { |
| 1020 | if (wordstr[offset] == ' ' || | 1026 | if (wordstr[offset] == ' ' || |
| 1021 | wordstr[offset] == '~' || | 1027 | wordstr[offset] == '~' || |
| 1022 | wordstr[offset] == '|') { | 1028 | wordstr[offset] == '|') { |
| 1023 | *ptr++ = kUnrecognized; | 1029 | *ptr++ = kUnrecognized; |
| 1024 | last_char_was_tilde = true; | 1030 | last_char_was_tilde = true; |
| 1025 | } else { | 1031 | } else { |
| 1026 | if (word->reject_map[i].rejected()) | 1032 | if (word->reject_map[i].rejected()) |
| 1027 | *ptr++ = '^'; | 1033 | *ptr++ = '^'; |
| 1028 | UNICHAR ch(wordstr + offset, lengths[i]); | 1034 | UNICHAR ch(wordstr + offset, lengths[i]); |
| 1029 | int uni_ch = ch.first_uni(); | 1035 | int uni_ch = ch.first_uni(); |
| 1030 | for (int j = 0; kUniChs[j] != 0; ++j) { | 1036 | for (int j = 0; kUniChs[j] != 0; ++j) { |
| 1031 | if (kUniChs[j] == uni_ch) { | 1037 | if (kUniChs[j] == uni_ch) { |
| 1032 | uni_ch = kLatinChs[j]; | 1038 | uni_ch = kLatinChs[j]; |
| 1033 | break; | 1039 | break; |
| 1034 | } | 1040 | } |
| 1035 | } | 1041 | } |
| 1036 | if (uni_ch <= 0xff) { | 1042 | if (uni_ch <= 0xff) { |
| 1037 | *ptr++ = static_cast<char>(uni_ch); | 1043 | *ptr++ = static_cast<char>(uni_ch); |
| 1038 | last_char_was_tilde = false; | 1044 | last_char_was_tilde = false; |
| 1039 | } else { | 1045 | } else { |
| 1040 | *ptr++ = kUnrecognized; | 1046 | *ptr++ = kUnrecognized; |
| 1041 | last_char_was_tilde = true; | 1047 | last_char_was_tilde = true; |
| 1042 | } | 1048 | } |
| 1043 | } | 1049 | } |
| 1044 | } | 1050 | } |
| 1045 | } | 1051 | } |
| 1046 | } | 1052 | } |
| 1047 | if (word->word->flag(W_EOL) && !last_char_was_newline) { | 1053 | if (word->word->flag(W_EOL) && !last_char_was_newline) { |
| 1048 | /* Add a new line output */ | 1054 | /* Add a new line output */ |
| 1049 | *ptr++ = '\n'; | 1055 | *ptr++ = '\n'; |
| 1050 | tilde_crunch_written = false; | 1056 | tilde_crunch_written = false; |
| 1051 | last_char_was_newline = true; | 1057 | last_char_was_newline = true; |
| 1052 | last_char_was_tilde = false; | 1058 | last_char_was_tilde = false; |
| 1053 | } | 1059 | } |
| 1054 | } | 1060 | } |
| 1055 | *ptr++ = '\n'; | 1061 | *ptr++ = '\n'; |
| 1056 | *ptr = '\0'; | 1062 | *ptr = '\0'; |
| 1057 | delete page_res; | 1063 | delete page_res; |
| 1058 | return result; | 1064 | return result; |
| 1059 | } | 1065 | } |
| 1060 | return NULL; | 1066 | return NULL; |
| 1061 | } | 1067 | } |
| 1062 | // ____________________________________________________________________________ | 1068 | // ____________________________________________________________________________ |
| 1063 | // Ocropus add-ons. | 1069 | // Ocropus add-ons. |
| 1064 | 1070 | ||
| 1065 | // Find lines from the image making the BLOCK_LIST. | 1071 | // Find lines from the image making the BLOCK_LIST. |
| 1066 | BLOCK_LIST* TessBaseAPI::FindLinesCreateBlockList() { | 1072 | BLOCK_LIST* TessBaseAPI::FindLinesCreateBlockList() { |
| 1067 | BLOCK_LIST *block_list = new BLOCK_LIST(); | 1073 | BLOCK_LIST *block_list = new BLOCK_LIST(); |
| 1068 | FindLines(block_list); | 1074 | FindLines(block_list); |
| 1069 | return block_list; | 1075 | return block_list; |
| 1070 | } | 1076 | } |
| 1071 | 1077 | ||
| 1072 | // Delete a block list. | 1078 | // Delete a block list. |
| 1073 | // This is to keep BLOCK_LIST pointer opaque | 1079 | // This is to keep BLOCK_LIST pointer opaque |
| 1074 | // and let go of including the other headers. | 1080 | // and let go of including the other headers. |
| 1075 | void TessBaseAPI::DeleteBlockList(BLOCK_LIST *block_list) { | 1081 | void TessBaseAPI::DeleteBlockList(BLOCK_LIST *block_list) { |
| 1076 | delete block_list; | 1082 | delete block_list; |
| 1077 | } | 1083 | } |
| 1078 | 1084 | ||
| 1079 | 1085 | ||
| 1080 | static ROW *make_tess_ocrrow(float baseline, | 1086 | static ROW *make_tess_ocrrow(float baseline, |
| 1081 | float xheight, | 1087 | float xheight, |
| 1082 | float descender, | 1088 | float descender, |
| 1083 | float ascender) { | 1089 | float ascender) { |
| 1084 | inT32 xstarts[] = {-32000}; | 1090 | inT32 xstarts[] = {-32000}; |
| 1085 | double quad_coeffs[] = {0,0,baseline}; | 1091 | double quad_coeffs[] = {0,0,baseline}; |
| 1086 | return new ROW(1, | 1092 | return new ROW(1, |
| 1087 | xstarts, | 1093 | xstarts, |
| 1088 | quad_coeffs, | 1094 | quad_coeffs, |
| 1089 | xheight, | 1095 | xheight, |
| 1090 | ascender - (baseline + xheight), | 1096 | ascender - (baseline + xheight), |
| 1091 | descender - baseline, | 1097 | descender - baseline, |
| 1092 | 0, | 1098 | 0, |
| 1093 | 0); | 1099 | 0); |
| 1094 | } | 1100 | } |
| 1095 | 1101 | ||
| 1096 | // Almost a copy of make_tess_row() from ccmain/tstruct.cpp. | 1102 | // Almost a copy of make_tess_row() from ccmain/tstruct.cpp. |
| 1097 | static void fill_dummy_row(float baseline, float xheight, | 1103 | static void fill_dummy_row(float baseline, float xheight, |
| 1098 | float descender, float ascender, | 1104 | float descender, float ascender, |
| 1099 | TEXTROW* tessrow) { | 1105 | TEXTROW* tessrow) { |
| 1100 | tessrow->baseline.segments = 1; | 1106 | tessrow->baseline.segments = 1; |
| 1101 | tessrow->baseline.xstarts[0] = -32767; | 1107 | tessrow->baseline.xstarts[0] = -32767; |
| 1102 | tessrow->baseline.xstarts[1] = 32767; | 1108 | tessrow->baseline.xstarts[1] = 32767; |
| 1103 | tessrow->baseline.quads[0].a = 0; | 1109 | tessrow->baseline.quads[0].a = 0; |
| 1104 | tessrow->baseline.quads[0].b = 0; | 1110 | tessrow->baseline.quads[0].b = 0; |
| 1105 | tessrow->baseline.quads[0].c = bln_baseline_offset; | 1111 | tessrow->baseline.quads[0].c = bln_baseline_offset; |
| 1106 | tessrow->xheight.segments = 1; | 1112 | tessrow->xheight.segments = 1; |
| 1107 | tessrow->xheight.xstarts[0] = -32767; | 1113 | tessrow->xheight.xstarts[0] = -32767; |
| 1108 | tessrow->xheight.xstarts[1] = 32767; | 1114 | tessrow->xheight.xstarts[1] = 32767; |
| 1109 | tessrow->xheight.quads[0].a = 0; | 1115 | tessrow->xheight.quads[0].a = 0; |
| 1110 | tessrow->xheight.quads[0].b = 0; | 1116 | tessrow->xheight.quads[0].b = 0; |
| 1111 | tessrow->xheight.quads[0].c = bln_baseline_offset + bln_x_height; | 1117 | tessrow->xheight.quads[0].c = bln_baseline_offset + bln_x_height; |
| 1112 | tessrow->lineheight = bln_x_height; | 1118 | tessrow->lineheight = bln_x_height; |
| 1113 | tessrow->ascrise = bln_x_height * (ascender - (xheight + baseline)) / xheight; | 1119 | tessrow->ascrise = bln_x_height * (ascender - (xheight + baseline)) / xheight; |
| 1114 | tessrow->descdrop = bln_x_height * (descender - baseline) / xheight; | 1120 | tessrow->descdrop = bln_x_height * (descender - baseline) / xheight; |
| 1115 | } | 1121 | } |
| 1116 | 1122 | ||
| 1117 | 1123 | ||
| 1118 | /// Return a TBLOB * from the whole page_image. | 1124 | /// Return a TBLOB * from the whole page_image. |
| 1119 | /// To be freed later with free_blob(). | 1125 | /// To be freed later with free_blob(). |
| 1120 | TBLOB *make_tesseract_blob(float baseline, float xheight, float descender, float ascender) { | 1126 | TBLOB *make_tesseract_blob(float baseline, float xheight, float descender, float ascender) { |
| 1121 | BLOCK *block = new BLOCK ("a character", | 1127 | BLOCK *block = new BLOCK ("a character", |
| 1122 | TRUE, | 1128 | TRUE, |
| 1123 | 0, 0, | 1129 | 0, 0, |
| 1124 | 0, 0, | 1130 | 0, 0, |
| 1125 | page_image.get_xsize(), | 1131 | page_image.get_xsize(), |
| 1126 | page_image.get_ysize()); | 1132 | page_image.get_ysize()); |
| 1127 | 1133 | ||
| 1128 | // Create C_BLOBs from the page | 1134 | // Create C_BLOBs from the page |
| 1129 | extract_edges(NULL, &page_image, &page_image, | 1135 | extract_edges(NULL, &page_image, &page_image, |
| 1130 | ICOORD(page_image.get_xsize(), page_image.get_ysize()), | 1136 | ICOORD(page_image.get_xsize(), page_image.get_ysize()), |
| 1131 | block); | 1137 | block); |
| 1132 | 1138 | ||
| 1133 | // Create one PBLOB from all C_BLOBs | 1139 | // Create one PBLOB from all C_BLOBs |
| 1134 | C_BLOB_LIST *list = block->blob_list(); | 1140 | C_BLOB_LIST *list = block->blob_list(); |
| 1135 | C_BLOB_IT c_blob_it(list); | 1141 | C_BLOB_IT c_blob_it(list); |
| 1136 | PBLOB *pblob = new PBLOB; // will be (hopefully) deleted by the pblob_list | 1142 | PBLOB *pblob = new PBLOB; // will be (hopefully) deleted by the pblob_list |
| 1137 | for (c_blob_it.mark_cycle_pt(); | 1143 | for (c_blob_it.mark_cycle_pt(); |
| 1138 | !c_blob_it.cycled_list(); | 1144 | !c_blob_it.cycled_list(); |
| 1139 | c_blob_it.forward()) { | 1145 | c_blob_it.forward()) { |
| 1140 | C_BLOB *c_blob = c_blob_it.data(); | 1146 | C_BLOB *c_blob = c_blob_it.data(); |
| 1141 | PBLOB c_as_p(c_blob, baseline + xheight); | 1147 | PBLOB c_as_p(c_blob, baseline + xheight); |
| 1142 | merge_blobs(pblob, &c_as_p); | 1148 | merge_blobs(pblob, &c_as_p); |
| 1143 | } | 1149 | } |
| 1144 | PBLOB_LIST *pblob_list = new PBLOB_LIST; // will be deleted by the word | 1150 | PBLOB_LIST *pblob_list = new PBLOB_LIST; // will be deleted by the word |
| 1145 | PBLOB_IT pblob_it(pblob_list); | 1151 | PBLOB_IT pblob_it(pblob_list); |
| 1146 | pblob_it.add_after_then_move(pblob); | 1152 | pblob_it.add_after_then_move(pblob); |
| 1147 | 1153 | ||
| 1148 | // Normalize PBLOB | 1154 | // Normalize PBLOB |
| 1149 | WERD word(pblob_list, 0, " "); | 1155 | WERD word(pblob_list, 0, " "); |
| 1150 | ROW *row = make_tess_ocrrow(baseline, xheight, descender, ascender); | 1156 | ROW *row = make_tess_ocrrow(baseline, xheight, descender, ascender); |
| 1151 | word.baseline_normalise(row); | 1157 | word.baseline_normalise(row); |
| 1152 | delete row; | 1158 | delete row; |
| 1153 | 1159 | ||
| 1154 | // Create a TBLOB from PBLOB | 1160 | // Create a TBLOB from PBLOB |
| 1155 | return make_tess_blob(pblob, /* flatten: */ TRUE); | 1161 | return make_tess_blob(pblob, /* flatten: */ TRUE); |
| 1156 | } | 1162 | } |
| 1157 | 1163 | ||
| 1158 | 1164 | ||
| 1159 | // Adapt to recognize the current image as the given character. | 1165 | // Adapt to recognize the current image as the given character. |
| 1160 | // The image must be preloaded and be just an image of a single character. | 1166 | // The image must be preloaded and be just an image of a single character. |
| 1161 | void TessBaseAPI::AdaptToCharacter(const char *unichar_repr, | 1167 | void TessBaseAPI::AdaptToCharacter(const char *unichar_repr, |
| 1162 | int length, | 1168 | int length, |
| 1163 | float baseline, | 1169 | float baseline, |
| 1164 | float xheight, | 1170 | float xheight, |
| 1165 | float descender, | 1171 | float descender, |
| 1166 | float ascender) { | 1172 | float ascender) { |
| 1167 | UNICHAR_ID id = unicharset.unichar_to_id(unichar_repr, length); | 1173 | UNICHAR_ID id = unicharset.unichar_to_id(unichar_repr, length); |
| 1168 | LINE_STATS LineStats; | 1174 | LINE_STATS LineStats; |
| 1169 | TEXTROW row; | 1175 | TEXTROW row; |
| 1170 | fill_dummy_row(baseline, xheight, descender, ascender, &row); | 1176 | fill_dummy_row(baseline, xheight, descender, ascender, &row); |
| 1171 | GetLineStatsFromRow(&row, &LineStats); | 1177 | GetLineStatsFromRow(&row, &LineStats); |
| 1172 | 1178 | ||
| 1173 | TBLOB *blob = make_tesseract_blob(baseline, xheight, descender, ascender); | 1179 | TBLOB *blob = make_tesseract_blob(baseline, xheight, descender, ascender); |
| 1174 | float threshold; | 1180 | float threshold; |
| 1175 | int best_class = 0; | 1181 | int best_class = 0; |
| 1176 | float best_rating = -100; | 1182 | float best_rating = -100; |
| 1177 | 1183 | ||
| 1178 | 1184 | ||
| 1179 | // Classify to get a raw choice. | 1185 | // Classify to get a raw choice. |
| 1180 | LIST result = AdaptiveClassifier(blob, NULL, &row); | 1186 | LIST result = AdaptiveClassifier(blob, NULL, &row); |
| 1181 | LIST p; | 1187 | LIST p; |
| 1182 | for (p = result; p != NULL; p = p->next) { | 1188 | for (p = result; p != NULL; p = p->next) { |
| 1183 | A_CHOICE *tesschoice = (A_CHOICE *) p->node; | 1189 | A_CHOICE *tesschoice = (A_CHOICE *) p->node; |
| 1184 | if (tesschoice->rating > best_rating) { | 1190 | if (tesschoice->rating > best_rating) { |
| 1185 | best_rating = tesschoice->rating; | 1191 | best_rating = tesschoice->rating; |
| 1186 | best_class = tesschoice->string[0]; | 1192 | best_class = tesschoice->string[0]; |
| 1187 | } | 1193 | } |
| 1188 | } | 1194 | } |
| 1189 | 1195 | ||
| 1190 | FLOAT32 GetBestRatingFor(TBLOB *Blob, LINE_STATS *LineStats, CLASS_ID ClassId); | 1196 | FLOAT32 GetBestRatingFor(TBLOB *Blob, LINE_STATS *LineStats, CLASS_ID ClassId); |
| 1191 | 1197 | ||
| 1192 | // We have to use char-level adaptation because otherwise | 1198 | // We have to use char-level adaptation because otherwise |
| 1193 | // someone should do forced alignment somewhere. | 1199 | // someone should do forced alignment somewhere. |
| 1194 | void AdaptToChar(TBLOB *Blob, | 1200 | void AdaptToChar(TBLOB *Blob, |
| 1195 | LINE_STATS *LineStats, | 1201 | LINE_STATS *LineStats, |
| 1196 | CLASS_ID ClassId, | 1202 | CLASS_ID ClassId, |
| 1197 | FLOAT32 Threshold); | 1203 | FLOAT32 Threshold); |
| 1198 | 1204 | ||
| 1199 | 1205 | ||
| 1200 | if (id == best_class) | 1206 | if (id == best_class) |
| 1201 | threshold = GoodAdaptiveMatch; | 1207 | threshold = GoodAdaptiveMatch; |
| 1202 | else { | 1208 | else { |
| 1203 | /* the blob was incorrectly classified - find the rating threshold | 1209 | /* the blob was incorrectly classified - find the rating threshold |
| 1204 | needed to create a template which will correct the error with | 1210 | needed to create a template which will correct the error with |
| 1205 | some margin. However, don't waste time trying to make | 1211 | some margin. However, don't waste time trying to make |
| 1206 | templates which are too tight. */ | 1212 | templates which are too tight. */ |
| 1207 | threshold = GetBestRatingFor(blob, &LineStats, id); | 1213 | threshold = GetBestRatingFor(blob, &LineStats, id); |
| 1208 | threshold *= .9; | 1214 | threshold *= .9; |
| 1209 | const float max_threshold = .125; | 1215 | const float max_threshold = .125; |
| 1210 | const float min_threshold = .02; | 1216 | const float min_threshold = .02; |
| 1211 | 1217 | ||
| 1212 | if (threshold > max_threshold) | 1218 | if (threshold > max_threshold) |
| 1213 | threshold = max_threshold; | 1219 | threshold = max_threshold; |
| 1214 | 1220 | ||
| 1215 | // I have cuddled the following line to set it out of the strike | 1221 | // I have cuddled the following line to set it out of the strike |
| 1216 | // of the coverage testing tool. I have no idea how to trigger | 1222 | // of the coverage testing tool. I have no idea how to trigger |
| 1217 | // this situation nor I have any necessity to do it. --mezhirov | 1223 | // this situation nor I have any necessity to do it. --mezhirov |
| 1218 | if (threshold < min_threshold) threshold = min_threshold; | 1224 | if (threshold < min_threshold) threshold = min_threshold; |
| 1219 | } | 1225 | } |
| 1220 | 1226 | ||
| 1221 | if (blob->outlines) | 1227 | if (blob->outlines) |
| 1222 | AdaptToChar(blob, &LineStats, id, threshold); | 1228 | AdaptToChar(blob, &LineStats, id, threshold); |
| 1223 | free_blob(blob); | 1229 | free_blob(blob); |
| 1224 | } | 1230 | } |
| 1225 | 1231 | ||
| 1226 | 1232 | ||
| 1227 | PAGE_RES* TessBaseAPI::RecognitionPass1(BLOCK_LIST* block_list) { | 1233 | PAGE_RES* TessBaseAPI::RecognitionPass1(BLOCK_LIST* block_list) { |
| 1228 | PAGE_RES *page_res = new PAGE_RES(block_list); | 1234 | PAGE_RES *page_res = new PAGE_RES(block_list); |
| 1229 | recog_all_words(page_res, NULL, NULL, 1); | 1235 | recog_all_words(page_res, NULL, NULL, 1); |
| 1230 | return page_res; | 1236 | return page_res; |
| 1231 | } | 1237 | } |
| 1232 | 1238 | ||
| 1233 | PAGE_RES* TessBaseAPI::RecognitionPass2(BLOCK_LIST* block_list, | 1239 | PAGE_RES* TessBaseAPI::RecognitionPass2(BLOCK_LIST* block_list, |
| 1234 | PAGE_RES* pass1_result) { | 1240 | PAGE_RES* pass1_result) { |
| 1235 | if (!pass1_result) | 1241 | if (!pass1_result) |
| 1236 | pass1_result = new PAGE_RES(block_list); | 1242 | pass1_result = new PAGE_RES(block_list); |
| 1237 | recog_all_words(pass1_result, NULL, NULL, 2); | 1243 | recog_all_words(pass1_result, NULL, NULL, 2); |
| 1238 | return pass1_result; | 1244 | return pass1_result; |
| 1239 | } | 1245 | } |
| 1240 | 1246 | ||
| 1241 | // brief Get a bounding box of a PBLOB. | 1247 | // brief Get a bounding box of a PBLOB. |
| 1242 | // TODO(mezhirov) delete this function and replace with blob->bounding_box() | 1248 | // TODO(mezhirov) delete this function and replace with blob->bounding_box() |
| 1243 | static TBOX pblob_get_bbox(PBLOB *blob) { | 1249 | static TBOX pblob_get_bbox(PBLOB *blob) { |
| 1244 | OUTLINE_LIST *outlines = blob->out_list(); | 1250 | OUTLINE_LIST *outlines = blob->out_list(); |
| 1245 | OUTLINE_IT it(outlines); | 1251 | OUTLINE_IT it(outlines); |
| 1246 | TBOX result; | 1252 | TBOX result; |
| 1247 | for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { | 1253 | for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { |
| 1248 | OUTLINE *outline = it.data(); | 1254 | OUTLINE *outline = it.data(); |
| 1249 | outline->compute_bb(); | 1255 | outline->compute_bb(); |
| 1250 | result.bounding_union(outline->bounding_box()); | 1256 | result.bounding_union(outline->bounding_box()); |
| 1251 | } | 1257 | } |
| 1252 | return result; | 1258 | return result; |
| 1253 | } | 1259 | } |
| 1254 | 1260 | ||
| 1255 | // TODO(mezhirov) delete this function and replace with word->bounding_box() | 1261 | // TODO(mezhirov) delete this function and replace with word->bounding_box() |
| 1256 | static TBOX c_blob_list_get_bbox(C_BLOB_LIST *cblobs) { | 1262 | static TBOX c_blob_list_get_bbox(C_BLOB_LIST *cblobs) { |
| 1257 | TBOX result; | 1263 | TBOX result; |
| 1258 | C_BLOB_IT c_it(cblobs); | 1264 | C_BLOB_IT c_it(cblobs); |
| 1259 | for (c_it.mark_cycle_pt(); !c_it.cycled_list(); c_it.forward()) { | 1265 | for (c_it.mark_cycle_pt(); !c_it.cycled_list(); c_it.forward()) { |
| 1260 | C_BLOB *blob = c_it.data(); | 1266 | C_BLOB *blob = c_it.data(); |
| 1261 | //bboxes.push(tessy_rectangle(blob->bounding_box())); | 1267 | //bboxes.push(tessy_rectangle(blob->bounding_box())); |
| 1262 | result.bounding_union(blob->bounding_box()); | 1268 | result.bounding_union(blob->bounding_box()); |
| 1263 | } | 1269 | } |
| 1264 | return result; | 1270 | return result; |
| 1265 | } | 1271 | } |
| 1266 | 1272 | ||
| 1267 | struct TESS_CHAR : ELIST_LINK { | 1273 | struct TESS_CHAR : ELIST_LINK { |
| 1268 | char *unicode_repr; | 1274 | char *unicode_repr; |
| 1269 | int length; // of unicode_repr | 1275 | int length; // of unicode_repr |
| 1270 | float cost; | 1276 | float cost; |
| 1271 | TBOX box; | 1277 | TBOX box; |
| 1272 | 1278 | ||
| 1273 | TESS_CHAR(float _cost, const char *repr, int len = -1) : cost(_cost) { | 1279 | TESS_CHAR(float _cost, const char *repr, int len = -1) : cost(_cost) { |
| 1274 | length = (len == -1 ? strlen(repr) : len); | 1280 | length = (len == -1 ? strlen(repr) : len); |
| 1275 | unicode_repr = new char[length + 1]; | 1281 | unicode_repr = new char[length + 1]; |
| 1276 | strncpy(unicode_repr, repr, length); | 1282 | strncpy(unicode_repr, repr, length); |
| 1277 | } | 1283 | } |
| 1278 | 1284 | ||
| 1279 | ~TESS_CHAR() { | 1285 | ~TESS_CHAR() { |
| 1280 | delete unicode_repr; | 1286 | delete unicode_repr; |
| 1281 | } | 1287 | } |
| 1282 | }; | 1288 | }; |
| 1283 | 1289 | ||
| 1284 | 1290 | ||
| 1285 | static void add_space(ELIST_ITERATOR *it) { | 1291 | static void add_space(ELIST_ITERATOR *it) { |
| 1286 | TESS_CHAR *t = new TESS_CHAR(0, " "); | 1292 | TESS_CHAR *t = new TESS_CHAR(0, " "); |
| 1287 | it->add_after_then_move(t); | 1293 | it->add_after_then_move(t); |
| 1288 | } | 1294 | } |
| 1289 | 1295 | ||
| 1290 | 1296 | ||
| 1291 | static float rating_to_cost(float rating) { | 1297 | static float rating_to_cost(float rating) { |
| 1292 | rating = 100 + rating; | 1298 | rating = 100 + rating; |
| 1293 | // cuddled that to save from coverage profiler | 1299 | // cuddled that to save from coverage profiler |
| 1294 | // (I have never seen ratings worse than -100, | 1300 | // (I have never seen ratings worse than -100, |
| 1295 | // but the check won't hurt) | 1301 | // but the check won't hurt) |
| 1296 | if (rating < 0) rating = 0; | 1302 | if (rating < 0) rating = 0; |
| 1297 | return rating; | 1303 | return rating; |
| 1298 | } | 1304 | } |
| 1299 | 1305 | ||
| 1300 | 1306 | ||
| 1301 | // Extract the OCR results, costs (penalty points for uncertainty), | 1307 | // Extract the OCR results, costs (penalty points for uncertainty), |
| 1302 | // and the bounding boxes of the characters. | 1308 | // and the bounding boxes of the characters. |
| 1303 | static void extract_result(ELIST_ITERATOR *out, | 1309 | static void extract_result(ELIST_ITERATOR *out, |
| 1304 | PAGE_RES* page_res) { | 1310 | PAGE_RES* page_res) { |
| 1305 | PAGE_RES_IT page_res_it(page_res); | 1311 | PAGE_RES_IT page_res_it(page_res); |
| 1306 | int word_count = 0; | 1312 | int word_count = 0; |
| 1307 | while (page_res_it.word() != NULL) { | 1313 | while (page_res_it.word() != NULL) { |
| 1308 | WERD_RES *word = page_res_it.word(); | 1314 | WERD_RES *word = page_res_it.word(); |
| 1309 | const char *str = word->best_choice->string().string(); | 1315 | const char *str = word->best_choice->string().string(); |
| 1310 | const char *len = word->best_choice->lengths().string(); | 1316 | const char *len = word->best_choice->lengths().string(); |
| 1311 | 1317 | ||
| 1312 | if (word_count) | 1318 | if (word_count) |
| 1313 | add_space(out); | 1319 | add_space(out); |
| 1314 | TBOX bln_rect; | 1320 | TBOX bln_rect; |
| 1315 | PBLOB_LIST *blobs = word->outword->blob_list(); | 1321 | PBLOB_LIST *blobs = word->outword->blob_list(); |
| 1316 | PBLOB_IT it(blobs); | 1322 | PBLOB_IT it(blobs); |
| 1317 | int n = strlen(len); | 1323 | int n = strlen(len); |
| 1318 | TBOX** boxes_to_fix = new TBOX*[n]; | 1324 | TBOX** boxes_to_fix = new TBOX*[n]; |
| 1319 | for (int i = 0; i < n; i++) { | 1325 | for (int i = 0; i < n; i++) { |
| 1320 | PBLOB *blob = it.data(); | 1326 | PBLOB *blob = it.data(); |
| 1321 | TBOX current = pblob_get_bbox(blob); | 1327 | TBOX current = pblob_get_bbox(blob); |
| 1322 | bln_rect.bounding_union(current); | 1328 | bln_rect.bounding_union(current); |
| 1323 | 1329 | ||
| 1324 | TESS_CHAR *tc = new TESS_CHAR(rating_to_cost(word->best_choice->rating()), | 1330 | TESS_CHAR *tc = new TESS_CHAR(rating_to_cost(word->best_choice->rating()), |
| 1325 | str, *len); | 1331 | str, *len); |
| 1326 | tc->box = current; | 1332 | tc->box = current; |
| 1327 | boxes_to_fix[i] = &tc->box; | 1333 | boxes_to_fix[i] = &tc->box; |
| 1328 | 1334 | ||
| 1329 | out->add_after_then_move(tc); | 1335 | out->add_after_then_move(tc); |
| 1330 | it.forward(); | 1336 | it.forward(); |
| 1331 | str += *len; | 1337 | str += *len; |
| 1332 | len++; | 1338 | len++; |
| 1333 | } | 1339 | } |
| 1334 | 1340 | ||
| 1335 | // Find the word bbox before normalization. | 1341 | // Find the word bbox before normalization. |
| 1336 | // Here we can't use the C_BLOB bboxes directly, | 1342 | // Here we can't use the C_BLOB bboxes directly, |
| 1337 | // since connected letters are not yet cut. | 1343 | // since connected letters are not yet cut. |
| 1338 | TBOX real_rect = c_blob_list_get_bbox(word->word->cblob_list()); | 1344 | TBOX real_rect = c_blob_list_get_bbox(word->word->cblob_list()); |
| 1339 | 1345 | ||
| 1340 | // Denormalize boxes by transforming the bbox of the whole bln word | 1346 | // Denormalize boxes by transforming the bbox of the whole bln word |
| 1341 | // into the denorm bbox (`real_rect') of the whole word. | 1347 | // into the denorm bbox (`real_rect') of the whole word. |
| 1342 | double x_stretch = double(real_rect.width()) / bln_rect.width(); | 1348 | double x_stretch = double(real_rect.width()) / bln_rect.width(); |
| 1343 | double y_stretch = double(real_rect.height()) / bln_rect.height(); | 1349 | double y_stretch = double(real_rect.height()) / bln_rect.height(); |
| 1344 | for (int j = 0; j < n; j++) { | 1350 | for (int j = 0; j < n; j++) { |
| 1345 | TBOX *box = boxes_to_fix[j]; | 1351 | TBOX *box = boxes_to_fix[j]; |
| 1346 | int x0 = int(real_rect.left() + | 1352 | int x0 = int(real_rect.left() + |
| 1347 | x_stretch * (box->left() - bln_rect.left()) + 0.5); | 1353 | x_stretch * (box->left() - bln_rect.left()) + 0.5); |
| 1348 | int x1 = int(real_rect.left() + | 1354 | int x1 = int(real_rect.left() + |
| 1349 | x_stretch * (box->right() - bln_rect.left()) + 0.5); | 1355 | x_stretch * (box->right() - bln_rect.left()) + 0.5); |
| 1350 | int y0 = int(real_rect.bottom() + | 1356 | int y0 = int(real_rect.bottom() + |
| 1351 | y_stretch * (box->bottom() - bln_rect.bottom()) + 0.5); | 1357 | y_stretch * (box->bottom() - bln_rect.bottom()) + 0.5); |
| 1352 | int y1 = int(real_rect.bottom() + | 1358 | int y1 = int(real_rect.bottom() + |
| 1353 | y_stretch * (box->top() - bln_rect.bottom()) + 0.5); | 1359 | y_stretch * (box->top() - bln_rect.bottom()) + 0.5); |
| 1354 | *box = TBOX(ICOORD(x0, y0), ICOORD(x1, y1)); | 1360 | *box = TBOX(ICOORD(x0, y0), ICOORD(x1, y1)); |
| 1355 | } | 1361 | } |
| 1356 | delete [] boxes_to_fix; | 1362 | delete [] boxes_to_fix; |
| 1357 | 1363 | ||
| 1358 | page_res_it.forward(); | 1364 | page_res_it.forward(); |
| 1359 | word_count++; | 1365 | word_count++; |
| 1360 | } | 1366 | } |
| 1361 | } | 1367 | } |
| 1362 | 1368 | ||
| 1363 | 1369 | ||
| 1364 | // Extract the OCR results, costs (penalty points for uncertainty), | 1370 | // Extract the OCR results, costs (penalty points for uncertainty), |
| 1365 | // and the bounding boxes of the characters. | 1371 | // and the bounding boxes of the characters. |
| 1366 | int TessBaseAPI::TesseractExtractResult(char** string, | 1372 | int TessBaseAPI::TesseractExtractResult(char** string, |
| 1367 | int** lengths, | 1373 | int** lengths, |
| 1368 | float** costs, | 1374 | float** costs, |
| 1369 | int** x0, | 1375 | int** x0, |
| 1370 | int** y0, | 1376 | int** y0, |
| 1371 | int** x1, | 1377 | int** x1, |
| 1372 | int** y1, | 1378 | int** y1, |
| 1373 | PAGE_RES* page_res) { | 1379 | PAGE_RES* page_res) { |
| 1374 | ELIST tess_chars; | 1380 | ELIST tess_chars; |
| 1375 | ELIST_ITERATOR tess_chars_it(&tess_chars); | 1381 | ELIST_ITERATOR tess_chars_it(&tess_chars); |
| 1376 | extract_result(&tess_chars_it, page_res); | 1382 | extract_result(&tess_chars_it, page_res); |
| 1377 | tess_chars_it.move_to_first(); | 1383 | tess_chars_it.move_to_first(); |
| 1378 | int n = tess_chars.length(); | 1384 | int n = tess_chars.length(); |
| 1379 | int string_len = 0; | 1385 | int string_len = 0; |
| 1380 | *lengths = new int[n]; | 1386 | *lengths = new int[n]; |
| 1381 | *costs = new float[n]; | 1387 | *costs = new float[n]; |
| 1382 | *x0 = new int[n]; | 1388 | *x0 = new int[n]; |
| 1383 | *y0 = new int[n]; | 1389 | *y0 = new int[n]; |
| 1384 | *x1 = new int[n]; | 1390 | *x1 = new int[n]; |
| 1385 | *y1 = new int[n]; | 1391 | *y1 = new int[n]; |
| 1386 | int i = 0; | 1392 | int i = 0; |
| 1387 | for (tess_chars_it.mark_cycle_pt(); | 1393 | for (tess_chars_it.mark_cycle_pt(); |
| 1388 | !tess_chars_it.cycled_list(); | 1394 | !tess_chars_it.cycled_list(); |
| 1389 | tess_chars_it.forward(), i++) { | 1395 | tess_chars_it.forward(), i++) { |
| 1390 | TESS_CHAR *tc = (TESS_CHAR *) tess_chars_it.data(); | 1396 | TESS_CHAR *tc = (TESS_CHAR *) tess_chars_it.data(); |
| 1391 | string_len += (*lengths)[i] = tc->length; | 1397 | string_len += (*lengths)[i] = tc->length; |
| 1392 | (*costs)[i] = tc->cost; | 1398 | (*costs)[i] = tc->cost; |
| 1393 | (*x0)[i] = tc->box.left(); | 1399 | (*x0)[i] = tc->box.left(); |
| 1394 | (*y0)[i] = tc->box.bottom(); | 1400 | (*y0)[i] = tc->box.bottom(); |
| 1395 | (*x1)[i] = tc->box.right(); | 1401 | (*x1)[i] = tc->box.right(); |
| 1396 | (*y1)[i] = tc->box.top(); | 1402 | (*y1)[i] = tc->box.top(); |
| 1397 | } | 1403 | } |
| 1398 | char *p = *string = new char[string_len]; | 1404 | char *p = *string = new char[string_len]; |
| 1399 | 1405 | ||
| 1400 | tess_chars_it.move_to_first(); | 1406 | tess_chars_it.move_to_first(); |
| 1401 | for (tess_chars_it.mark_cycle_pt(); | 1407 | for (tess_chars_it.mark_cycle_pt(); |
| 1402 | !tess_chars_it.cycled_list(); | 1408 | !tess_chars_it.cycled_list(); |
| 1403 | tess_chars_it.forward()) { | 1409 | tess_chars_it.forward()) { |
| 1404 | TESS_CHAR *tc = (TESS_CHAR *) tess_chars_it.data(); | 1410 | TESS_CHAR *tc = (TESS_CHAR *) tess_chars_it.data(); |
| 1405 | strncpy(p, tc->unicode_repr, tc->length); | 1411 | strncpy(p, tc->unicode_repr, tc->length); |
| 1406 | p += tc->length; | 1412 | p += tc->length; |
| 1407 | } | 1413 | } |
| 1408 | return n; | 1414 | return n; |
| 1409 | } | 1415 | } |
| 1410 | 1416 | ||
| 1411 | // Check whether a word is valid according to Tesseract's language model | 1417 | // Check whether a word is valid according to Tesseract's language model |
| 1412 | // returns 0 if the string is invalid, non-zero if valid | 1418 | // returns 0 if the string is invalid, non-zero if valid |
| 1413 | int TessBaseAPI::IsValidWord(const char *string) { | 1419 | int TessBaseAPI::IsValidWord(const char *string) { |
| 1414 | return valid_word(string); | 1420 | return valid_word(string); |
| 1415 | } | 1421 | } |
| 1416 | 1422 | ||