Skip to content

Commit 023e1b3

Browse files
stweilzdenop
authored andcommitted
Use POSIX data types and macros (#878)
* api: Replace Tesseract data types by POSIX data types Signed-off-by: Stefan Weil <[email protected]> * ccmain: Replace Tesseract data types by POSIX data types Signed-off-by: Stefan Weil <[email protected]> * ccstruct: Replace Tesseract data types by POSIX data types Signed-off-by: Stefan Weil <[email protected]> * classify: Replace Tesseract data types by POSIX data types Signed-off-by: Stefan Weil <[email protected]> * cutil: Replace Tesseract data types by POSIX data types Signed-off-by: Stefan Weil <[email protected]> * dict: Replace Tesseract data types by POSIX data types Signed-off-by: Stefan Weil <[email protected]> * textord: Replace Tesseract data types by POSIX data types Signed-off-by: Stefan Weil <[email protected]> * training: Replace Tesseract data types by POSIX data types Signed-off-by: Stefan Weil <[email protected]> * wordrec: Replace Tesseract data types by POSIX data types Signed-off-by: Stefan Weil <[email protected]> * ccutil: Replace Tesseract data types by POSIX data types Now all Tesseract data types which are no longer needed can be removed from ccutil/host.h. Signed-off-by: Stefan Weil <[email protected]> * ccmain: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX Signed-off-by: Stefan Weil <[email protected]> * ccstruct: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX Signed-off-by: Stefan Weil <[email protected]> * classify: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX Signed-off-by: Stefan Weil <[email protected]> * dict: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX Signed-off-by: Stefan Weil <[email protected]> * lstm: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX Signed-off-by: Stefan Weil <[email protected]> * textord: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX Signed-off-by: Stefan Weil <[email protected]> * wordrec: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX Signed-off-by: Stefan Weil <[email protected]> * ccutil: Replace Tesseract's MIN_*INT, MAX_*INT* by POSIX *INT*_MIN, *INT*_MAX Remove the macros which are now unused from ccutil/host.h. Remove also the obsolete history comments. Signed-off-by: Stefan Weil <[email protected]> * Fix build error caused by ambiguous ClipToRange Error message vom Appveyor CI: C:\projects\tesseract\ccstruct\coutln.cpp(818): error C2672: 'ClipToRange': no matching overloaded function found [C:\projects\tesseract\build\libtesseract.vcxproj] C:\projects\tesseract\ccstruct\coutln.cpp(818): error C2782: 'T ClipToRange(const T &,const T &,const T &)': template parameter 'T' is ambiguous [C:\projects\tesseract\build\libtesseract.vcxproj] c:\projects\tesseract\ccutil\helpers.h(122): note: see declaration of 'ClipToRange' C:\projects\tesseract\ccstruct\coutln.cpp(818): note: could be 'char' C:\projects\tesseract\ccstruct\coutln.cpp(818): note: or 'int' Signed-off-by: Stefan Weil <[email protected]> * unittest: Replace Tesseract's MAX_INT8 by POSIX INT8_MAX Signed-off-by: Stefan Weil <[email protected]> * arch: Replace Tesseract's MAX_INT8 by POSIX INT8_MAX Signed-off-by: Stefan Weil <[email protected]>
1 parent 40c71bf commit 023e1b3

File tree

222 files changed

+2505
-2529
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

222 files changed

+2505
-2529
lines changed

api/baseapi.cpp

+4-4
Original file line numberDiff line numberDiff line change
@@ -285,7 +285,7 @@ bool TessBaseAPI::GetIntVariable(const char *name, int *value) const {
285285
IntParam *p = ParamUtils::FindParam<IntParam>(
286286
name, GlobalParams()->int_params, tesseract_->params()->int_params);
287287
if (p == NULL) return false;
288-
*value = (inT32)(*p);
288+
*value = (int32_t)(*p);
289289
return true;
290290
}
291291

@@ -1697,7 +1697,7 @@ char* TessBaseAPI::GetTSVText(int page_number) {
16971697
/** The 5 numbers output for each box (the usual 4 and a page number.) */
16981698
const int kNumbersPerBlob = 5;
16991699
/**
1700-
* The number of bytes taken by each number. Since we use inT16 for ICOORD,
1700+
* The number of bytes taken by each number. Since we use int16_t for ICOORD,
17011701
* assume only 5 digits max.
17021702
*/
17031703
const int kBytesPerNumber = 5;
@@ -1707,7 +1707,7 @@ const int kBytesPerNumber = 5;
17071707
* original UTF8 characters, and one kMaxBytesPerLine for safety.
17081708
*/
17091709
const int kBytesPerBoxFileLine = (kBytesPerNumber + 1) * kNumbersPerBlob + 1;
1710-
/** Max bytes in the decimal representation of inT64. */
1710+
/** Max bytes in the decimal representation of int64_t. */
17111711
const int kBytesPer64BitNumber = 20;
17121712
/**
17131713
* A maximal single box could occupy kNumbersPerBlob numbers at
@@ -2479,7 +2479,7 @@ ROW *TessBaseAPI::MakeTessOCRRow(float baseline,
24792479
float xheight,
24802480
float descender,
24812481
float ascender) {
2482-
inT32 xstarts[] = {-32000};
2482+
int32_t xstarts[] = {-32000};
24832483
double quad_coeffs[] = {0, 0, baseline};
24842484
return new ROW(1,
24852485
xstarts,

arch/intsimdmatrix.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ void IntSimdMatrix::MatrixDotVector(const GENERIC_2D_ARRAY<int8_t>& w,
103103
int total = 0;
104104
for (int j = 0; j < num_in; ++j) total += wi[j] * u[j];
105105
// Add in the bias and correct for integer values.
106-
v[i] = (static_cast<double>(total) / MAX_INT8 + wi[num_in]) * scales[i];
106+
v[i] = (static_cast<double>(total) / INT8_MAX + wi[num_in]) * scales[i];
107107
}
108108
} else {
109109
const int8_t* w_data = shaped_w_.data();

arch/intsimdmatrixavx2.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ inline void ExtractResults(__m256i& result, __m256i& shift_id,
8484
((int32_t*)&result)[0]
8585
#endif
8686
;
87-
*v++ = (static_cast<double>(res) / MAX_INT8 + *wi++) * *scales++;
87+
*v++ = (static_cast<double>(res) / INT8_MAX + *wi++) * *scales++;
8888
// Rotate the results in int32_t units, so the next result is ready.
8989
result = _mm256_permutevar8x32_epi32(result, shift_id);
9090
}

arch/intsimdmatrixsse.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ static void PartialMatrixDotVector1(const int8_t* wi, const double* scales,
3131
double* v) {
3232
int total = IntDotProductSSE(u, wi, num_in);
3333
// Add in the bias and correct for integer values.
34-
*v = (static_cast<double>(total) / MAX_INT8 + wi[num_in]) * *scales;
34+
*v = (static_cast<double>(total) / INT8_MAX + wi[num_in]) * *scales;
3535
}
3636
#endif // __SSE4_1__
3737

ccmain/adaptions.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444
namespace tesseract {
4545
BOOL8 Tesseract::word_adaptable( //should we adapt?
4646
WERD_RES *word,
47-
uinT16 mode) {
47+
uint16_t mode) {
4848
if (tessedit_adaption_debug) {
4949
tprintf("Running word_adaptable() for %s rating %.4f certainty %.4f\n",
5050
word->best_choice == NULL ? "" :

ccmain/applybox.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -268,11 +268,11 @@ void Tesseract::MaximallyChopWord(const GenericVector<TBOX>& boxes,
268268
}
269269
GenericVector<BLOB_CHOICE*> blob_choices;
270270
ASSERT_HOST(!word_res->chopped_word->blobs.empty());
271-
float rating = static_cast<float>(MAX_INT8);
271+
float rating = static_cast<float>(INT8_MAX);
272272
for (int i = 0; i < word_res->chopped_word->NumBlobs(); ++i) {
273273
// The rating and certainty are not quite arbitrary. Since
274274
// select_blob_to_chop uses the worst certainty to choose, they all have
275-
// to be different, so starting with MAX_INT8, subtract 1/8 for each blob
275+
// to be different, so starting with INT8_MAX, subtract 1/8 for each blob
276276
// in here, and then divide by e each time they are chopped, which
277277
// should guarantee a set of unequal values for the whole tree of blobs
278278
// produced, however much chopping is required. The chops are thus only

ccmain/control.cpp

+21-21
Original file line numberDiff line numberDiff line change
@@ -80,8 +80,8 @@ void Tesseract::recog_pseudo_word(PAGE_RES* page_res,
8080
* @param pr_it the page results iterator
8181
*/
8282
BOOL8 Tesseract::recog_interactive(PAGE_RES_IT* pr_it) {
83-
inT16 char_qual;
84-
inT16 good_char_qual;
83+
int16_t char_qual;
84+
int16_t good_char_qual;
8585

8686
WordData word_data(*pr_it);
8787
SetupWordPassN(2, &word_data);
@@ -636,8 +636,8 @@ void Tesseract::rejection_passes(PAGE_RES* page_res,
636636
stats_.doc_blob_quality += blob_quality;
637637
int outline_errs = word_outline_errs(word);
638638
stats_.doc_outline_errs += outline_errs;
639-
inT16 all_char_quality;
640-
inT16 accepted_all_char_quality;
639+
int16_t all_char_quality;
640+
int16_t accepted_all_char_quality;
641641
word_char_quality(word, page_res_it.row()->row,
642642
&all_char_quality, &accepted_all_char_quality);
643643
stats_.doc_char_quality += all_char_quality;
@@ -757,8 +757,8 @@ void Tesseract::script_pos_pass(PAGE_RES* page_res) {
757757
// Helper finds the gap between the index word and the next.
758758
static void WordGap(const PointerVector<WERD_RES>& words, int index, int* right,
759759
int* next_left) {
760-
*right = -MAX_INT32;
761-
*next_left = MAX_INT32;
760+
*right = -INT32_MAX;
761+
*next_left = INT32_MAX;
762762
if (index < words.size()) {
763763
*right = words[index]->word->bounding_box().right();
764764
if (index + 1 < words.size())
@@ -812,11 +812,11 @@ static int SelectBestWords(double rating_ratio,
812812
// Start of the current run in each.
813813
int start_b = b, start_n = n;
814814
while (b < best_words->size() || n < new_words->size()) {
815-
int b_right = -MAX_INT32;
816-
int next_b_left = MAX_INT32;
815+
int b_right = -INT32_MAX;
816+
int next_b_left = INT32_MAX;
817817
WordGap(*best_words, b, &b_right, &next_b_left);
818-
int n_right = -MAX_INT32;
819-
int next_n_left = MAX_INT32;
818+
int n_right = -INT32_MAX;
819+
int next_n_left = INT32_MAX;
820820
WordGap(*new_words, n, &n_right, &next_n_left);
821821
if (MAX(b_right, n_right) < MIN(next_b_left, next_n_left)) {
822822
// The word breaks overlap. [start_b,b] and [start_n, n] match.
@@ -1793,7 +1793,7 @@ ACCEPTABLE_WERD_TYPE Tesseract::acceptable_word_string(
17931793

17941794
BOOL8 Tesseract::check_debug_pt(WERD_RES *word, int location) {
17951795
BOOL8 show_map_detail = FALSE;
1796-
inT16 i;
1796+
int16_t i;
17971797

17981798
if (!test_pt)
17991799
return FALSE;
@@ -1880,17 +1880,17 @@ BOOL8 Tesseract::check_debug_pt(WERD_RES *word, int location) {
18801880
*/
18811881
static void find_modal_font( // good chars in word
18821882
STATS* fonts, // font stats
1883-
inT16* font_out, // output font
1883+
int16_t* font_out, // output font
18841884
int8_t* font_count // output count
18851885
) {
1886-
inT16 font; //font index
1887-
inT32 count; //pile couat
1886+
int16_t font; //font index
1887+
int32_t count; //pile count
18881888

18891889
if (fonts->get_total () > 0) {
1890-
font = (inT16) fonts->mode ();
1890+
font = (int16_t) fonts->mode ();
18911891
*font_out = font;
18921892
count = fonts->pile_count (font);
1893-
*font_count = count < MAX_INT8 ? count : MAX_INT8;
1893+
*font_count = count < INT8_MAX ? count : INT8_MAX;
18941894
fonts->add (font, -*font_count);
18951895
}
18961896
else {
@@ -1935,7 +1935,7 @@ void Tesseract::set_word_fonts(WERD_RES *word) {
19351935
}
19361936
// Find the top and 2nd choice for the word.
19371937
int score1 = 0, score2 = 0;
1938-
inT16 font_id1 = -1, font_id2 = -1;
1938+
int16_t font_id1 = -1, font_id2 = -1;
19391939
for (int f = 0; f < fontinfo_size; ++f) {
19401940
if (tessedit_debug_fonts && font_total_score[f] > 0) {
19411941
tprintf("Font %s, total score = %d\n",
@@ -1953,10 +1953,10 @@ void Tesseract::set_word_fonts(WERD_RES *word) {
19531953
}
19541954
word->fontinfo = font_id1 >= 0 ? &fontinfo_table_.get(font_id1) : NULL;
19551955
word->fontinfo2 = font_id2 >= 0 ? &fontinfo_table_.get(font_id2) : NULL;
1956-
// Each score has a limit of MAX_UINT16, so divide by that to get the number
1956+
// Each score has a limit of UINT16_MAX, so divide by that to get the number
19571957
// of "votes" for that font, ie number of perfect scores.
1958-
word->fontinfo_id_count = ClipToRange(score1 / MAX_UINT16, 1, MAX_INT8);
1959-
word->fontinfo_id2_count = ClipToRange(score2 / MAX_UINT16, 0, MAX_INT8);
1958+
word->fontinfo_id_count = ClipToRange<int>(score1 / UINT16_MAX, 1, INT8_MAX);
1959+
word->fontinfo_id2_count = ClipToRange<int>(score2 / UINT16_MAX, 0, INT8_MAX);
19601960
if (score1 > 0) {
19611961
FontInfo fi = fontinfo_table_.get(font_id1);
19621962
if (tessedit_debug_fonts) {
@@ -1998,7 +1998,7 @@ void Tesseract::font_recognition_pass(PAGE_RES* page_res) {
19981998
doc_fonts.add(word->fontinfo2->universal_id, word->fontinfo_id2_count);
19991999
}
20002000
}
2001-
inT16 doc_font; // modal font
2001+
int16_t doc_font; // modal font
20022002
int8_t doc_font_count; // modal font
20032003
find_modal_font(&doc_fonts, &doc_font, &doc_font_count);
20042004
if (doc_font_count == 0)

ccmain/docqual.cpp

+20-20
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,8 @@ struct DocQualCallbacks {
5252
}
5353

5454
WERD_RES* word;
55-
inT16 match_count;
56-
inT16 accepted_match_count;
55+
int16_t match_count;
56+
int16_t accepted_match_count;
5757
};
5858

5959
/*************************************************************************
@@ -62,7 +62,7 @@ struct DocQualCallbacks {
6262
* ASSUME blobs in both initial word and box_word are in ascending order of
6363
* left hand blob edge.
6464
*************************************************************************/
65-
inT16 Tesseract::word_blob_quality(WERD_RES *word, ROW *row) {
65+
int16_t Tesseract::word_blob_quality(WERD_RES *word, ROW *row) {
6666
if (word->bln_boxes == NULL ||
6767
word->rebuild_word == NULL || word->rebuild_word->blobs.empty())
6868
return 0;
@@ -74,9 +74,9 @@ inT16 Tesseract::word_blob_quality(WERD_RES *word, ROW *row) {
7474
return cb.match_count;
7575
}
7676

77-
inT16 Tesseract::word_outline_errs(WERD_RES *word) {
78-
inT16 i = 0;
79-
inT16 err_count = 0;
77+
int16_t Tesseract::word_outline_errs(WERD_RES *word) {
78+
int16_t i = 0;
79+
int16_t err_count = 0;
8080

8181
if (word->rebuild_word != NULL) {
8282
for (int b = 0; b < word->rebuild_word->NumBlobs(); ++b) {
@@ -96,8 +96,8 @@ inT16 Tesseract::word_outline_errs(WERD_RES *word) {
9696
*************************************************************************/
9797
void Tesseract::word_char_quality(WERD_RES *word,
9898
ROW *row,
99-
inT16 *match_count,
100-
inT16 *accepted_match_count) {
99+
int16_t *match_count,
100+
int16_t *accepted_match_count) {
101101
if (word->bln_boxes == NULL || word->rebuild_word == NULL ||
102102
word->rebuild_word->blobs.empty()) {
103103
*match_count = 0;
@@ -128,7 +128,7 @@ void Tesseract::unrej_good_chs(WERD_RES *word, ROW *row) {
128128
NewPermanentTessCallback(&cb, &DocQualCallbacks::AcceptIfGoodQuality));
129129
}
130130

131-
inT16 Tesseract::count_outline_errs(char c, inT16 outline_count) {
131+
int16_t Tesseract::count_outline_errs(char c, int16_t outline_count) {
132132
int expected_outline_count;
133133

134134
if (STRING (outlines_odd).contains (c))
@@ -237,15 +237,15 @@ void Tesseract::unrej_good_quality_words( //unreject potential
237237
void Tesseract::doc_and_block_rejection( //reject big chunks
238238
PAGE_RES_IT &page_res_it,
239239
BOOL8 good_quality_doc) {
240-
inT16 block_no = 0;
241-
inT16 row_no = 0;
240+
int16_t block_no = 0;
241+
int16_t row_no = 0;
242242
BLOCK_RES *current_block;
243243
ROW_RES *current_row;
244244

245245
BOOL8 rej_word;
246246
BOOL8 prev_word_rejected;
247-
inT16 char_quality = 0;
248-
inT16 accepted_char_quality;
247+
int16_t char_quality = 0;
248+
int16_t accepted_char_quality;
249249

250250
if (page_res_it.page_res->rej_count * 100.0 /
251251
page_res_it.page_res->char_count > tessedit_reject_doc_percent) {
@@ -596,9 +596,9 @@ void Tesseract::tilde_delete(PAGE_RES_IT &page_res_it) {
596596
PAGE_RES_IT copy_it;
597597
BOOL8 deleting_from_bol = FALSE;
598598
BOOL8 marked_delete_point = FALSE;
599-
inT16 debug_delete_mode;
599+
int16_t debug_delete_mode;
600600
CRUNCH_MODE delete_mode;
601-
inT16 x_debug_delete_mode;
601+
int16_t x_debug_delete_mode;
602602
CRUNCH_MODE x_delete_mode;
603603

604604
page_res_it.restart_page();
@@ -896,7 +896,7 @@ GARBAGE_LEVEL Tesseract::garbage_word(WERD_RES *word, BOOL8 ok_dict_word) {
896896
* >75% of the outline BBs have longest dimension < 0.5xht
897897
*************************************************************************/
898898

899-
CRUNCH_MODE Tesseract::word_deletable(WERD_RES *word, inT16 &delete_mode) {
899+
CRUNCH_MODE Tesseract::word_deletable(WERD_RES *word, int16_t &delete_mode) {
900900
int word_len = word->reject_map.length ();
901901
float rating_per_ch;
902902
TBOX box; //BB of word
@@ -967,7 +967,7 @@ CRUNCH_MODE Tesseract::word_deletable(WERD_RES *word, inT16 &delete_mode) {
967967
return CR_NONE;
968968
}
969969

970-
inT16 Tesseract::failure_count(WERD_RES *word) {
970+
int16_t Tesseract::failure_count(WERD_RES *word) {
971971
const char *str = word->best_choice->unichar_string().string();
972972
int tess_rejs = 0;
973973

@@ -981,9 +981,9 @@ inT16 Tesseract::failure_count(WERD_RES *word) {
981981

982982
BOOL8 Tesseract::noise_outlines(TWERD *word) {
983983
TBOX box; // BB of outline
984-
inT16 outline_count = 0;
985-
inT16 small_outline_count = 0;
986-
inT16 max_dimension;
984+
int16_t outline_count = 0;
985+
int16_t small_outline_count = 0;
986+
int16_t max_dimension;
987987
float small_limit = kBlnXHeight * crunch_small_outlines_size;
988988

989989
for (int b = 0; b < word->NumBlobs(); ++b) {

ccmain/docqual.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,6 @@ enum GARBAGE_LEVEL
3030
G_TERRIBLE
3131
};
3232

33-
inT16 word_blob_quality(WERD_RES *word, ROW *row);
33+
int16_t word_blob_quality(WERD_RES *word, ROW *row);
3434
void reject_whole_page(PAGE_RES_IT &page_res_it);
3535
#endif

ccmain/equationdetect.cpp

+7-7
Original file line numberDiff line numberDiff line change
@@ -664,12 +664,12 @@ void EquationDetect::SplitCPHor(ColPartition* part,
664664
// the previous blob may have a "more right" right side.
665665
// Account for this by always keeping the largest "right"
666666
// so far.
667-
int previous_right = MIN_INT32;
667+
int previous_right = INT32_MIN;
668668

669669
// Look for the next split in the partition.
670670
for (box_it.mark_cycle_pt(); !box_it.cycled_list(); box_it.forward()) {
671671
const TBOX& box = box_it.data()->bounding_box();
672-
if (previous_right != MIN_INT32 &&
672+
if (previous_right != INT32_MIN &&
673673
box.left() - previous_right > kThreshold) {
674674
// We have a split position. Split the partition in two pieces.
675675
// Insert the left piece in the grid and keep processing the right.
@@ -708,17 +708,17 @@ void EquationDetect::SplitCPHorLite(ColPartition* part,
708708
// Account for this by always keeping the largest "right"
709709
// so far.
710710
TBOX union_box;
711-
int previous_right = MIN_INT32;
711+
int previous_right = INT32_MIN;
712712
BLOBNBOX_C_IT box_it(part->boxes());
713713
for (box_it.mark_cycle_pt(); !box_it.cycled_list(); box_it.forward()) {
714714
const TBOX& box = box_it.data()->bounding_box();
715-
if (previous_right != MIN_INT32 &&
715+
if (previous_right != INT32_MIN &&
716716
box.left() - previous_right > kThreshold) {
717717
// We have a split position.
718718
splitted_boxes->push_back(union_box);
719-
previous_right = MIN_INT32;
719+
previous_right = INT32_MIN;
720720
}
721-
if (previous_right == MIN_INT32) {
721+
if (previous_right == INT32_MIN) {
722722
union_box = box;
723723
} else {
724724
union_box += box;
@@ -728,7 +728,7 @@ void EquationDetect::SplitCPHorLite(ColPartition* part,
728728
}
729729

730730
// Add the last piece.
731-
if (previous_right != MIN_INT32) {
731+
if (previous_right != INT32_MIN) {
732732
splitted_boxes->push_back(union_box);
733733
}
734734
}

0 commit comments

Comments
 (0)