Skip to content

[WIP] updating AMI tdnn recipe #3578

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Sep 29, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions egs/ami/s5b/conf/mfcc_hires80.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# config for high-resolution MFCC features, intended for neural network training
# Note: we keep all cepstra, so it has the same info as filterbank features,
# but MFCC is more easily compressible (because less correlated) which is why
# we prefer this method.
--use-energy=false # use average of log energy, not energy.
--num-mel-bins=80 # similar to Google's setup.
--num-ceps=80 # there is no dimensionality reduction.
--low-freq=20 # low cutoff frequency for mel bins... this is high-bandwidth data, so
# there might be some information at the low end.
--high-freq=-400 # high cutoff frequently, relative to Nyquist of 8000 (=7600)
70 changes: 39 additions & 31 deletions egs/ami/s5b/local/chain/tuning/run_tdnn_1j.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,17 @@

# local/chain/tuning/run_tdnn_1j.sh --mic sdm1 --use-ihm-ali true --train-set train_cleaned --gmm tri3_cleaned

# local/chain/compare_wer_general.sh sdm1 tdnn1h_sp_bi_ihmali tdnn1i_sp_bi_ihmali
# local/chain/compare_wer_general.sh sdm1 tdnn1i_sp_bi_ihmali tdnn1j_sp_bi_ihmali
# System tdnn1i_sp_bi_ihmali tdnn1i_sp_bi_ihmali
# WER on dev 36.6 32.8
# WER on eval 40.6 36.3
# Final train prob -0.196231 -0.131658
# Final valid prob -0.265572 -0.216094
# Final train prob (xent) -2.48061 -1.53325
# Final valid prob (xent) -2.71794 -1.96188
# WER on dev 36.6 31.7
# WER on eval 40.6 35.1
# Final train prob -0.196231 -0.114088
# Final valid prob -0.265572 -0.214282
# Final train prob (xent) -2.48061 -1.37987
# Final valid prob (xent) -2.71794 -1.8639

# steps/info/chain_dir_info.pl exp/sdm1/chain_cleaned/tdnn1j_sp_bi_ihmali
# exp/sdm1/chain_cleaned/tdnn1j_sp_bi_ihmali: num-iters=196 nj=2..12 num-params=17.7M dim=80+100->3728 combine=-0.145->-0.143 (over 5) xent:train/valid[129,195,final]=(-1.81,-1.56,-1.53/-2.13,-2.02,-1.96) logprob:train/valid[129,195,final]=(-0.164,-0.136,-0.132/-0.226,-0.222,-0.216)
# exp/sdm1/chain_cleaned/tdnn1j_sp_bi_ihmali: num-iters=327 nj=2..12 num-params=34.3M dim=80+100->3728 combine=-0.126->-0.124 (over 4) xent:train/valid[217,326,final]=(-1.69,-1.43,-1.38/-2.06,-1.93,-1.86) logprob:train/valid[217,326,final]=(-0.143,-0.120,-0.114/-0.226,-0.218,-0.214)

set -e -o pipefail
# First the options that are passed through to run_ivector_common.sh
Expand All @@ -31,7 +31,7 @@ ihm_gmm=tri3 # the gmm for the IHM system (if --use-ihm-ali true).
num_threads_ubm=32
ivector_transform_type=pca
nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned
num_epochs=9
num_epochs=15
remove_egs=true

# The rest are configs specific to this script. Most of the parameters
Expand All @@ -40,7 +40,7 @@ train_stage=-10
tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration.
tdnn_affix=1j #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration.
common_egs_dir= # you can set this to use previously dumped egs.

dropout_schedule='0,[email protected],[email protected],0'

# End configuration section.
echo "$0 $@" # Print the command line for logging
Expand All @@ -61,6 +61,7 @@ fi
local/nnet3/run_ivector_common.sh --stage $stage \
--mic $mic \
--nj $nj \
--hires_suffix 80 \
--min-seg-len $min_seg_len \
--train-set $train_set \
--gmm $gmm \
Expand Down Expand Up @@ -171,8 +172,11 @@ if [ $stage -le 15 ]; then

num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}')
learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python)
opts="l2-regularize=0.02"
output_opts="l2-regularize=0.004"
affine_opts="l2-regularize=0.01 dropout-proportion=0.0 dropout-per-dim=true dropout-per-dim-continuous=true"
tdnnf_opts="l2-regularize=0.01 dropout-proportion=0.0 bypass-scale=0.66"
linear_opts="l2-regularize=0.01 orthonormal-constraint=-1.0"
prefinal_opts="l2-regularize=0.01"
output_opts="l2-regularize=0.002"

mkdir -p $dir/configs
cat <<EOF > $dir/configs/network.xconfig
Expand All @@ -185,25 +189,27 @@ if [ $stage -le 15 ]; then
fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat

# the first splicing is moved before the lda layer, so no splicing here
relu-batchnorm-dropout-layer name=tdnn1 $affine_opts dim=1536
tdnnf-layer name=tdnnf2 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=1
tdnnf-layer name=tdnnf3 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=1
tdnnf-layer name=tdnnf4 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=1
tdnnf-layer name=tdnnf5 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=0
tdnnf-layer name=tdnnf6 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
tdnnf-layer name=tdnnf7 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
tdnnf-layer name=tdnnf8 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
tdnnf-layer name=tdnnf9 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
tdnnf-layer name=tdnnf10 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
tdnnf-layer name=tdnnf11 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
tdnnf-layer name=tdnnf12 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
tdnnf-layer name=tdnnf13 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
tdnnf-layer name=tdnnf14 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
tdnnf-layer name=tdnnf15 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
linear-component name=prefinal-l dim=256 $linear_opts
prefinal-layer name=prefinal-chain input=prefinal-l $prefinal_opts big-dim=1536 small-dim=256
relu-batchnorm-dropout-layer name=tdnn1 $affine_opts dim=2136
tdnnf-layer name=tdnnf2 $tdnnf_opts dim=2136 bottleneck-dim=210 time-stride=1
tdnnf-layer name=tdnnf3 $tdnnf_opts dim=2136 bottleneck-dim=210 time-stride=1
tdnnf-layer name=tdnnf4 $tdnnf_opts dim=2136 bottleneck-dim=210 time-stride=1
tdnnf-layer name=tdnnf5 $tdnnf_opts dim=2136 bottleneck-dim=210 time-stride=0
tdnnf-layer name=tdnnf6 $tdnnf_opts dim=2136 bottleneck-dim=210 time-stride=3
tdnnf-layer name=tdnnf7 $tdnnf_opts dim=2136 bottleneck-dim=210 time-stride=3
tdnnf-layer name=tdnnf8 $tdnnf_opts dim=2136 bottleneck-dim=210 time-stride=3
tdnnf-layer name=tdnnf9 $tdnnf_opts dim=2136 bottleneck-dim=210 time-stride=3
tdnnf-layer name=tdnnf10 $tdnnf_opts dim=2136 bottleneck-dim=210 time-stride=3
tdnnf-layer name=tdnnf11 $tdnnf_opts dim=2136 bottleneck-dim=210 time-stride=3
tdnnf-layer name=tdnnf12 $tdnnf_opts dim=2136 bottleneck-dim=210 time-stride=3
tdnnf-layer name=tdnnf13 $tdnnf_opts dim=2136 bottleneck-dim=210 time-stride=3
tdnnf-layer name=tdnnf14 $tdnnf_opts dim=2136 bottleneck-dim=210 time-stride=3
tdnnf-layer name=tdnnf15 $tdnnf_opts dim=2136 bottleneck-dim=210 time-stride=3
linear-component name=prefinal-l dim=512 $linear_opts

prefinal-layer name=prefinal-chain input=prefinal-l $prefinal_opts big-dim=2136 small-dim=512
output-layer name=output include-log-softmax=false dim=$num_targets $output_opts
prefinal-layer name=prefinal-xent input=prefinal-l $prefinal_opts big-dim=1536 small-dim=256

prefinal-layer name=prefinal-xent input=prefinal-l $prefinal_opts big-dim=2136 small-dim=512
output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor $output_opts

EOF
Expand All @@ -226,10 +232,11 @@ if [ $stage -le 16 ]; then
--chain.l2-regularize 0.00005 \
--chain.apply-deriv-weights false \
--chain.lm-opts="--num-extra-lm-states=2000" \
--trainer.dropout-schedule $dropout_schedule \
--egs.dir "$common_egs_dir" \
--egs.opts "--frames-overlap-per-eg 0" \
--egs.chunk-width 150 \
--trainer.num-chunk-per-minibatch 128 \
--trainer.num-chunk-per-minibatch 32 \
--trainer.frames-per-iter 1500000 \
--trainer.num-epochs $num_epochs \
--trainer.optimization.num-jobs-initial 2 \
Expand All @@ -238,6 +245,7 @@ if [ $stage -le 16 ]; then
--trainer.optimization.final-effective-lrate 0.0001 \
--trainer.max-param-change 2.0 \
--cleanup.remove-egs $remove_egs \
--cleanup.preserve-model-interval 50 \
--feat-dir $train_data_dir \
--tree-dir $tree_dir \
--lat-dir $lat_dir \
Expand Down
4 changes: 2 additions & 2 deletions egs/ami/s5b/local/nnet3/run_ivector_common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ num_threads_ubm=32
ivector_transform_type=lda
nnet3_affix=_cleaned # affix for exp/$mic/nnet3 directory to put iVector stuff in, so it
# becomes exp/$mic/nnet3_cleaned or whatever.

hires_suffix=
. ./cmd.sh
. ./path.sh
. ./utils/parse_options.sh
Expand Down Expand Up @@ -72,7 +72,7 @@ if [ $stage -le 2 ]; then
utils/data/perturb_data_dir_volume.sh data/$mic/${train_set}_sp_hires

for datadir in ${train_set}_sp dev eval; do
steps/make_mfcc.sh --nj $nj --mfcc-config conf/mfcc_hires.conf \
steps/make_mfcc.sh --nj $nj --mfcc-config conf/mfcc_hires$hires_suffix.conf \
--cmd "$train_cmd" data/$mic/${datadir}_hires
steps/compute_cmvn_stats.sh data/$mic/${datadir}_hires
utils/fix_data_dir.sh data/$mic/${datadir}_hires
Expand Down