kaldi-asr · danpovey · Nov 11, 2019 · Nov 10, 2019 · jtrmal · Nov 10, 2019
diff --git a/egs/wsj/s5/steps/diagnostic/analyze_lattice_depth_stats.py b/egs/wsj/s5/steps/diagnostic/analyze_lattice_depth_stats.py
@@ -9,6 +9,15 @@
 import argparse
 import sys, os
 from collections import defaultdict
+from io import open
+import codecs
+
+# reference: http://www.macfreek.nl/memory/Encoding_of_Python_stdout
+if sys.version_info.major == 2:
+    sys.stdout = codecs.getwriter('utf-8')(sys.stdout, 'strict')
+else:
+    assert sys.version_info.major == 3
+    sys.stdout = codecs.getwriter('utf-8')(sys.stdout.buffer, 'strict')
 
 
 parser = argparse.ArgumentParser(description="This script reads stats created in analyze_lats.sh "
@@ -29,13 +38,13 @@
 # set up phone_int2text to map from phone to printed form.
 phone_int2text = {}
 try:
-    f = open(args.lang + "/phones.txt", "r");
+    f = open(args.lang + "/phones.txt", "r", encoding='utf-8')
     for line in f.readlines():
         [ word, number] = line.split()
         phone_int2text[int(number)] = word
     f.close()
 except:
-    sys.exit("analyze_lattice_depth_stats.py: error opening or reading {0}/phones.txt".format(
+    sys.exit(u"analyze_lattice_depth_stats.py: error opening or reading {0}/phones.txt".format(
             args.lang))
 # this is a special case... for begin- and end-of-sentence stats,
 # we group all nonsilence phones together.
@@ -49,14 +58,14 @@
     # open lang/phones/silence.csl-- while there are many ways of obtaining the
     # silence/nonsilence phones, we read this because it's present in graph
     # directories as well as lang directories.
-    filename = "{0}/phones/silence.csl".format(args.lang)
+    filename = u"{0}/phones/silence.csl".format(args.lang)
     f = open(filename, "r")
     line = f.readline()
     for silence_phone in line.split(":"):
         nonsilence.remove(int(silence_phone))
     f.close()
 except Exception as e:
-    sys.exit("analyze_lattice_depth_stats.py: error processing {0}/phones/silence.csl: {1}".format(
+    sys.exit(u"analyze_lattice_depth_stats.py: error processing {0}/phones/silence.csl: {1}".format(
             args.lang, str(e)))
 
 # phone_depth_counts is a dict of dicts.
@@ -80,7 +89,7 @@
         break
     a = line.split()
     if len(a) != 3:
-        sys.exit("analyze_lattice_depth_stats.py: reading stdin, could not interpret line: " + line)
+        sys.exit(u"analyze_lattice_depth_stats.py: reading stdin, could not interpret line: " + line)
     try:
         phone, depth, count = [ int(x) for x in a ]
 
@@ -92,11 +101,11 @@
         universal_phone = -1
         phone_depth_counts[universal_phone][depth] += count
     except Exception as e:
-        sys.exit("analyze_lattice_depth_stats.py: unexpected phone {0} "
-                 "seen (lang directory mismatch?): line is {1}, error is {2}".format(phone, line, str(e)))
+        sys.exit(u"analyze_lattice_depth_stats.py: unexpected phone {0} "
+                 u"seen (lang directory mismatch?): line is {1}, error is {2}".format(phone, line, str(e)))
 
 if total_frames == 0:
-    sys.exit("analyze_lattice_depth_stats.py: read no input")
+    sys.exit(u"analyze_lattice_depth_stats.py: read no input")
 
 
 # If depth_to_count is a map from depth-in-frames to count,
@@ -125,8 +134,8 @@ def GetMean(depth_to_count):
     return this_total_depth / this_total_frames
 
 
-print("The total amount of data analyzed assuming 100 frames per second "
-      "is {0} hours".format("%.1f" % (total_frames / 360000.0)))
+print(u"The total amount of data analyzed assuming 100 frames per second "
+      u"is {0} hours".format("%.1f" % (total_frames / 360000.0)))
 
 # the next block prints lines like (to give some examples):
 # Nonsilence phones as a group account for 74.4% of phone occurrences, with lattice depth (10,50,90-percentile)=(1,2,7) and mean=3.1
@@ -152,18 +161,18 @@ def GetMean(depth_to_count):
         try:
             phone_text = phone_int2text[phone]
         except:
-            sys.exit("analyze_lattice_depth_stats.py: phone {0} is not covered on phones.txt "
-                     "(lang/alignment mismatch?)".format(phone))
-        preamble = "Phone {phone_text} accounts for {percent}% of frames, with".format(
+            sys.exit(u"analyze_lattice_depth_stats.py: phone {0} is not covered on phones.txt "
+                     u"(lang/alignment mismatch?)".format(phone))
+        preamble = u"Phone {phone_text} accounts for {percent}% of frames, with".format(
             phone_text = phone_text, percent = "%.1f" % frequency_percentage)
     elif phone == 0:
-        preamble = "Nonsilence phones as a group account for {percent}% of frames, with".format(
+        preamble = u"Nonsilence phones as a group account for {percent}% of frames, with".format(
             percent = "%.1f" % frequency_percentage)
     else:
         assert phone == -1
         preamble = "Overall,";
 
-    print("{preamble} lattice depth (10,50,90-percentile)=({p10},{p50},{p90}) and mean={mean}".format(
+    print(u"{preamble} lattice depth (10,50,90-percentile)=({p10},{p50},{p90}) and mean={mean}".format(
             preamble = preamble,
             p10 = depth_percentile_10,
             p50 = depth_percentile_50,

diff --git a/egs/wsj/s5/steps/diagnostic/analyze_phone_length_stats.py b/egs/wsj/s5/steps/diagnostic/analyze_phone_length_stats.py
@@ -8,6 +8,15 @@
 import argparse
 import sys, os
 from collections import defaultdict
+from io import open
+import codecs
+
+# reference: http://www.macfreek.nl/memory/Encoding_of_Python_stdout
+if sys.version_info.major == 2:
+    sys.stdout = codecs.getwriter('utf-8')(sys.stdout, 'strict')
+else:
+    assert sys.version_info.major == 3
+    sys.stdout = codecs.getwriter('utf-8')(sys.stdout.buffer, 'strict')
 
 
 parser = argparse.ArgumentParser(description="This script reads stats created in analyze_alignments.sh "
@@ -31,7 +40,7 @@
 # set up phone_int2text to map from phone to printed form.
 phone_int2text = {}
 try:
-    f = open(args.lang + "/phones.txt", "r");
+    f = open(args.lang + "/phones.txt", "r", encoding='utf-8')
     for line in f.readlines():
         [ word, number] = line.split()
         phone_int2text[int(number)] = word
@@ -112,8 +121,8 @@
     optional_silence_phone_text = phone_int2text[optional_silence_phone]
     f.close()
     if optional_silence_phone in nonsilence:
-        print("analyze_phone_length_stats.py: was expecting the optional-silence phone to "
-              "be a member of the silence phones, it is not.  This script won't work correctly.")
+        print(u"analyze_phone_length_stats.py: was expecting the optional-silence phone to "
+              u"be a member of the silence phones, it is not.  This script won't work correctly.")
 except:
     largest_count = 0
     optional_silence_phone = 1
@@ -124,8 +133,8 @@
                 largest_count = this_count
                 optional_silence_phone = p
     optional_silence_phone_text = phone_int2text[optional_silence_phone]
-    print("analyze_phone_length_stats.py: could not get optional-silence phone from "
-          "{0}/phones/optional_silence.int, guessing that it's {1} from the stats. ".format(
+    print(u"analyze_phone_length_stats.py: could not get optional-silence phone from "
+          u"{0}/phones/optional_silence.int, guessing that it's {1} from the stats. ".format(
             args.lang, optional_silence_phone_text))
 
 
@@ -175,8 +184,8 @@ def GetMean(length_to_count):
     # maybe half a second.  If your database is not like this, you should know;
     # you may want to mess with the segmentation to add more silence.
     if frequency_percentage < 80.0:
-        print("analyze_phone_length_stats.py: WARNING: optional-silence {0} is seen only {1}% "
-              "of the time at utterance {2}.  This may not be optimal.".format(
+        print(u"analyze_phone_length_stats.py: WARNING: optional-silence {0} is seen only {1}% "
+              u"of the time at utterance {2}.  This may not be optimal.".format(
                 optional_silence_phone_text, frequency_percentage, boundary_type))
 
 
@@ -213,8 +222,8 @@ def GetMean(length_to_count):
         except:
             sys.exit("analyze_phone_length_stats.py: phone {0} is not covered on phones.txt "
                      "(lang/alignment mismatch?)".format(phone))
-        print("{text}, {phone_text} accounts for {percent}% of phone occurrences, with "
-              "duration (median, mean, 95-percentile) is ({median},{mean},{percentile95}) frames.".format(
+        print(u"{text}, {phone_text} accounts for {percent}% of phone occurrences, with "
+              u"duration (median, mean, 95-percentile) is ({median},{mean},{percentile95}) frames.".format(
                 text = text, phone_text = phone_text,
                 percent = "%.1f" % frequency_percentage,
                 median = duration_median, mean = "%.1f" % duration_mean,
@@ -245,24 +254,24 @@ def GetMean(length_to_count):
     opt_sil_total_frame_percent = total_optsil_frames * 100.0 / total_frames['all']
     internal_frame_percent = total_frames['internal'] * 100.0 / total_frames['all']
 
-    print("The optional-silence phone {0} occupies {1}% of frames overall ".format(
+    print(u"The optional-silence phone {0} occupies {1}% of frames overall ".format(
             optional_silence_phone_text, "%.1f" % opt_sil_total_frame_percent))
     hours_total = total_frames['all'] / 360000.0;
     hours_nonsil = (total_frames['all'] - total_optsil_frames) / 360000.0
-    print("Limiting the stats to the {0}% of frames not covered by an utterance-[begin/end] phone, "
-          "optional-silence {1} occupies {2}% of frames.".format("%.1f" % internal_frame_percent,
+    print(u"Limiting the stats to the {0}% of frames not covered by an utterance-[begin/end] phone, "
+          u"optional-silence {1} occupies {2}% of frames.".format("%.1f" % internal_frame_percent,
                                                                  optional_silence_phone_text,
                                                                  "%.1f" % opt_sil_internal_frame_percent))
-    print("Assuming 100 frames per second, the alignments represent {0} hours of data, "
-          "or {1} hours if {2} frames are excluded.".format(
+    print(u"Assuming 100 frames per second, the alignments represent {0} hours of data, "
+          u"or {1} hours if {2} frames are excluded.".format(
             "%.1f" % hours_total, "%.1f" % hours_nonsil, optional_silence_phone_text))
 
     opt_sil_internal_phone_percent = (sum(internal_opt_sil_phone_lengths.values()) *
                                       100.0 / total_phones['internal'])
     duration_median = GetPercentile(internal_opt_sil_phone_lengths, 0.5)
     duration_mean = GetMean(internal_opt_sil_phone_lengths)
     duration_percentile_95 = GetPercentile(internal_opt_sil_phone_lengths, 0.95)
-    print("Utterance-internal optional-silences {0} comprise {1}% of utterance-internal phones, with duration "
-          "(median, mean, 95-percentile) = ({2},{3},{4})".format(
+    print(u"Utterance-internal optional-silences {0} comprise {1}% of utterance-internal phones, with duration "
+          u"(median, mean, 95-percentile) = ({2},{3},{4})".format(
                 optional_silence_phone_text, "%.1f" % opt_sil_internal_phone_percent,
                 duration_median, "%0.1f" % duration_mean, duration_percentile_95))