Skip to content

Change the Python diagnostic scripts to accept non-ASCII UTF-8 phone set #3711

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 11, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 24 additions & 15 deletions egs/wsj/s5/steps/diagnostic/analyze_lattice_depth_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,15 @@
import argparse
import sys, os
from collections import defaultdict
from io import open
import codecs

# reference: http://www.macfreek.nl/memory/Encoding_of_Python_stdout
if sys.version_info.major == 2:
sys.stdout = codecs.getwriter('utf-8')(sys.stdout, 'strict')
else:
assert sys.version_info.major == 3
sys.stdout = codecs.getwriter('utf-8')(sys.stdout.buffer, 'strict')


parser = argparse.ArgumentParser(description="This script reads stats created in analyze_lats.sh "
Expand All @@ -29,13 +38,13 @@
# set up phone_int2text to map from phone to printed form.
phone_int2text = {}
try:
f = open(args.lang + "/phones.txt", "r");
f = open(args.lang + "/phones.txt", "r", encoding='utf-8')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this wouldn't work under python2, I believe (it does not have the encoding= option

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi Yenda, thanks for having a look!
You are right that the built-in open() doesn't have an "encoding" option. That's what the "from io import open" line is- i.e. we are re-binding the 'open' variable to point to a function that has this option and works for both Python2 and Python3. I've tested the scripts w/ Python versions 2.7 and 3.6.

for line in f.readlines():
[ word, number] = line.split()
phone_int2text[int(number)] = word
f.close()
except:
sys.exit("analyze_lattice_depth_stats.py: error opening or reading {0}/phones.txt".format(
sys.exit(u"analyze_lattice_depth_stats.py: error opening or reading {0}/phones.txt".format(
args.lang))
# this is a special case... for begin- and end-of-sentence stats,
# we group all nonsilence phones together.
Expand All @@ -49,14 +58,14 @@
# open lang/phones/silence.csl-- while there are many ways of obtaining the
# silence/nonsilence phones, we read this because it's present in graph
# directories as well as lang directories.
filename = "{0}/phones/silence.csl".format(args.lang)
filename = u"{0}/phones/silence.csl".format(args.lang)
f = open(filename, "r")
line = f.readline()
for silence_phone in line.split(":"):
nonsilence.remove(int(silence_phone))
f.close()
except Exception as e:
sys.exit("analyze_lattice_depth_stats.py: error processing {0}/phones/silence.csl: {1}".format(
sys.exit(u"analyze_lattice_depth_stats.py: error processing {0}/phones/silence.csl: {1}".format(
args.lang, str(e)))

# phone_depth_counts is a dict of dicts.
Expand All @@ -80,7 +89,7 @@
break
a = line.split()
if len(a) != 3:
sys.exit("analyze_lattice_depth_stats.py: reading stdin, could not interpret line: " + line)
sys.exit(u"analyze_lattice_depth_stats.py: reading stdin, could not interpret line: " + line)
try:
phone, depth, count = [ int(x) for x in a ]

Expand All @@ -92,11 +101,11 @@
universal_phone = -1
phone_depth_counts[universal_phone][depth] += count
except Exception as e:
sys.exit("analyze_lattice_depth_stats.py: unexpected phone {0} "
"seen (lang directory mismatch?): line is {1}, error is {2}".format(phone, line, str(e)))
sys.exit(u"analyze_lattice_depth_stats.py: unexpected phone {0} "
u"seen (lang directory mismatch?): line is {1}, error is {2}".format(phone, line, str(e)))

if total_frames == 0:
sys.exit("analyze_lattice_depth_stats.py: read no input")
sys.exit(u"analyze_lattice_depth_stats.py: read no input")


# If depth_to_count is a map from depth-in-frames to count,
Expand Down Expand Up @@ -125,8 +134,8 @@ def GetMean(depth_to_count):
return this_total_depth / this_total_frames


print("The total amount of data analyzed assuming 100 frames per second "
"is {0} hours".format("%.1f" % (total_frames / 360000.0)))
print(u"The total amount of data analyzed assuming 100 frames per second "
u"is {0} hours".format("%.1f" % (total_frames / 360000.0)))

# the next block prints lines like (to give some examples):
# Nonsilence phones as a group account for 74.4% of phone occurrences, with lattice depth (10,50,90-percentile)=(1,2,7) and mean=3.1
Expand All @@ -152,18 +161,18 @@ def GetMean(depth_to_count):
try:
phone_text = phone_int2text[phone]
except:
sys.exit("analyze_lattice_depth_stats.py: phone {0} is not covered on phones.txt "
"(lang/alignment mismatch?)".format(phone))
preamble = "Phone {phone_text} accounts for {percent}% of frames, with".format(
sys.exit(u"analyze_lattice_depth_stats.py: phone {0} is not covered on phones.txt "
u"(lang/alignment mismatch?)".format(phone))
preamble = u"Phone {phone_text} accounts for {percent}% of frames, with".format(
phone_text = phone_text, percent = "%.1f" % frequency_percentage)
elif phone == 0:
preamble = "Nonsilence phones as a group account for {percent}% of frames, with".format(
preamble = u"Nonsilence phones as a group account for {percent}% of frames, with".format(
percent = "%.1f" % frequency_percentage)
else:
assert phone == -1
preamble = "Overall,";

print("{preamble} lattice depth (10,50,90-percentile)=({p10},{p50},{p90}) and mean={mean}".format(
print(u"{preamble} lattice depth (10,50,90-percentile)=({p10},{p50},{p90}) and mean={mean}".format(
preamble = preamble,
p10 = depth_percentile_10,
p50 = depth_percentile_50,
Expand Down
41 changes: 25 additions & 16 deletions egs/wsj/s5/steps/diagnostic/analyze_phone_length_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,15 @@
import argparse
import sys, os
from collections import defaultdict
from io import open
import codecs

# reference: http://www.macfreek.nl/memory/Encoding_of_Python_stdout
if sys.version_info.major == 2:
sys.stdout = codecs.getwriter('utf-8')(sys.stdout, 'strict')
else:
assert sys.version_info.major == 3
sys.stdout = codecs.getwriter('utf-8')(sys.stdout.buffer, 'strict')


parser = argparse.ArgumentParser(description="This script reads stats created in analyze_alignments.sh "
Expand All @@ -31,7 +40,7 @@
# set up phone_int2text to map from phone to printed form.
phone_int2text = {}
try:
f = open(args.lang + "/phones.txt", "r");
f = open(args.lang + "/phones.txt", "r", encoding='utf-8')
for line in f.readlines():
[ word, number] = line.split()
phone_int2text[int(number)] = word
Expand Down Expand Up @@ -112,8 +121,8 @@
optional_silence_phone_text = phone_int2text[optional_silence_phone]
f.close()
if optional_silence_phone in nonsilence:
print("analyze_phone_length_stats.py: was expecting the optional-silence phone to "
"be a member of the silence phones, it is not. This script won't work correctly.")
print(u"analyze_phone_length_stats.py: was expecting the optional-silence phone to "
u"be a member of the silence phones, it is not. This script won't work correctly.")
except:
largest_count = 0
optional_silence_phone = 1
Expand All @@ -124,8 +133,8 @@
largest_count = this_count
optional_silence_phone = p
optional_silence_phone_text = phone_int2text[optional_silence_phone]
print("analyze_phone_length_stats.py: could not get optional-silence phone from "
"{0}/phones/optional_silence.int, guessing that it's {1} from the stats. ".format(
print(u"analyze_phone_length_stats.py: could not get optional-silence phone from "
u"{0}/phones/optional_silence.int, guessing that it's {1} from the stats. ".format(
args.lang, optional_silence_phone_text))


Expand Down Expand Up @@ -175,8 +184,8 @@ def GetMean(length_to_count):
# maybe half a second. If your database is not like this, you should know;
# you may want to mess with the segmentation to add more silence.
if frequency_percentage < 80.0:
print("analyze_phone_length_stats.py: WARNING: optional-silence {0} is seen only {1}% "
"of the time at utterance {2}. This may not be optimal.".format(
print(u"analyze_phone_length_stats.py: WARNING: optional-silence {0} is seen only {1}% "
u"of the time at utterance {2}. This may not be optimal.".format(
optional_silence_phone_text, frequency_percentage, boundary_type))


Expand Down Expand Up @@ -213,8 +222,8 @@ def GetMean(length_to_count):
except:
sys.exit("analyze_phone_length_stats.py: phone {0} is not covered on phones.txt "
"(lang/alignment mismatch?)".format(phone))
print("{text}, {phone_text} accounts for {percent}% of phone occurrences, with "
"duration (median, mean, 95-percentile) is ({median},{mean},{percentile95}) frames.".format(
print(u"{text}, {phone_text} accounts for {percent}% of phone occurrences, with "
u"duration (median, mean, 95-percentile) is ({median},{mean},{percentile95}) frames.".format(
text = text, phone_text = phone_text,
percent = "%.1f" % frequency_percentage,
median = duration_median, mean = "%.1f" % duration_mean,
Expand Down Expand Up @@ -245,24 +254,24 @@ def GetMean(length_to_count):
opt_sil_total_frame_percent = total_optsil_frames * 100.0 / total_frames['all']
internal_frame_percent = total_frames['internal'] * 100.0 / total_frames['all']

print("The optional-silence phone {0} occupies {1}% of frames overall ".format(
print(u"The optional-silence phone {0} occupies {1}% of frames overall ".format(
optional_silence_phone_text, "%.1f" % opt_sil_total_frame_percent))
hours_total = total_frames['all'] / 360000.0;
hours_nonsil = (total_frames['all'] - total_optsil_frames) / 360000.0
print("Limiting the stats to the {0}% of frames not covered by an utterance-[begin/end] phone, "
"optional-silence {1} occupies {2}% of frames.".format("%.1f" % internal_frame_percent,
print(u"Limiting the stats to the {0}% of frames not covered by an utterance-[begin/end] phone, "
u"optional-silence {1} occupies {2}% of frames.".format("%.1f" % internal_frame_percent,
optional_silence_phone_text,
"%.1f" % opt_sil_internal_frame_percent))
print("Assuming 100 frames per second, the alignments represent {0} hours of data, "
"or {1} hours if {2} frames are excluded.".format(
print(u"Assuming 100 frames per second, the alignments represent {0} hours of data, "
u"or {1} hours if {2} frames are excluded.".format(
"%.1f" % hours_total, "%.1f" % hours_nonsil, optional_silence_phone_text))

opt_sil_internal_phone_percent = (sum(internal_opt_sil_phone_lengths.values()) *
100.0 / total_phones['internal'])
duration_median = GetPercentile(internal_opt_sil_phone_lengths, 0.5)
duration_mean = GetMean(internal_opt_sil_phone_lengths)
duration_percentile_95 = GetPercentile(internal_opt_sil_phone_lengths, 0.95)
print("Utterance-internal optional-silences {0} comprise {1}% of utterance-internal phones, with duration "
"(median, mean, 95-percentile) = ({2},{3},{4})".format(
print(u"Utterance-internal optional-silences {0} comprise {1}% of utterance-internal phones, with duration "
u"(median, mean, 95-percentile) = ({2},{3},{4})".format(
optional_silence_phone_text, "%.1f" % opt_sil_internal_phone_percent,
duration_median, "%0.1f" % duration_mean, duration_percentile_95))