kaldi-asr · danpovey · Sep 29, 2019 · Sep 3, 2019 · Sep 6, 2019 · Sep 6, 2019
diff --git a/egs/ami/s5b/conf/mfcc_hires80.conf b/egs/ami/s5b/conf/mfcc_hires80.conf
@@ -0,0 +1,10 @@
+# config for high-resolution MFCC features, intended for neural network training
+# Note: we keep all cepstra, so it has the same info as filterbank features,
+# but MFCC is more easily compressible (because less correlated) which is why 
+# we prefer this method.
+--use-energy=false   # use average of log energy, not energy.
+--num-mel-bins=80     # similar to Google's setup.
+--num-ceps=80     # there is no dimensionality reduction.
+--low-freq=20     # low cutoff frequency for mel bins... this is high-bandwidth data, so
+                  # there might be some information at the low end.
+--high-freq=-400 # high cutoff frequently, relative to Nyquist of 8000 (=7600) 
diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_1j.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_1j.sh
@@ -5,17 +5,17 @@
 
 # local/chain/tuning/run_tdnn_1j.sh --mic sdm1 --use-ihm-ali true --train-set train_cleaned  --gmm tri3_cleaned
 
-# local/chain/compare_wer_general.sh sdm1 tdnn1h_sp_bi_ihmali tdnn1i_sp_bi_ihmali
+# local/chain/compare_wer_general.sh sdm1 tdnn1i_sp_bi_ihmali tdnn1j_sp_bi_ihmali
 # System                tdnn1i_sp_bi_ihmali tdnn1i_sp_bi_ihmali
-# WER on dev                   36.6                  32.8
-# WER on eval                  40.6                  36.3
-# Final train prob             -0.196231             -0.131658
-# Final valid prob             -0.265572             -0.216094
-# Final train prob (xent)      -2.48061              -1.53325
-# Final valid prob (xent)      -2.71794              -1.96188
+# WER on dev                   36.6                  31.7
+# WER on eval                  40.6                  35.1
+# Final train prob             -0.196231             -0.114088
+# Final valid prob             -0.265572             -0.214282
+# Final train prob (xent)      -2.48061              -1.37987
+# Final valid prob (xent)      -2.71794              -1.8639
 
 # steps/info/chain_dir_info.pl exp/sdm1/chain_cleaned/tdnn1j_sp_bi_ihmali
-# exp/sdm1/chain_cleaned/tdnn1j_sp_bi_ihmali: num-iters=196 nj=2..12 num-params=17.7M dim=80+100->3728 combine=-0.145->-0.143 (over 5) xent:train/valid[129,195,final]=(-1.81,-1.56,-1.53/-2.13,-2.02,-1.96) logprob:train/valid[129,195,final]=(-0.164,-0.136,-0.132/-0.226,-0.222,-0.216)
+# exp/sdm1/chain_cleaned/tdnn1j_sp_bi_ihmali: num-iters=327 nj=2..12 num-params=34.3M dim=80+100->3728 combine=-0.126->-0.124 (over 4) xent:train/valid[217,326,final]=(-1.69,-1.43,-1.38/-2.06,-1.93,-1.86) logprob:train/valid[217,326,final]=(-0.143,-0.120,-0.114/-0.226,-0.218,-0.214)
 
 set -e -o pipefail
 # First the options that are passed through to run_ivector_common.sh
@@ -31,7 +31,7 @@ ihm_gmm=tri3  # the gmm for the IHM system (if --use-ihm-ali true).
 num_threads_ubm=32
 ivector_transform_type=pca
 nnet3_affix=_cleaned  # cleanup affix for nnet3 and chain dirs, e.g. _cleaned
-num_epochs=9
+num_epochs=15
 remove_egs=true
 
 # The rest are configs specific to this script.  Most of the parameters
@@ -40,7 +40,7 @@ train_stage=-10
 tree_affix=  # affix for tree directory, e.g. "a" or "b", in case we change the configuration.
 tdnn_affix=1j  #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration.
 common_egs_dir=  # you can set this to use previously dumped egs.
-
+dropout_schedule='0,[email protected],[email protected],0'
 
 # End configuration section.
 echo "$0 $@"  # Print the command line for logging
@@ -61,6 +61,7 @@ fi
 local/nnet3/run_ivector_common.sh --stage $stage \
                                   --mic $mic \
                                   --nj $nj \
+                                  --hires_suffix 80 \
                                   --min-seg-len $min_seg_len \
                                   --train-set $train_set \
                                   --gmm $gmm \
@@ -171,8 +172,11 @@ if [ $stage -le 15 ]; then
 
   num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}')
   learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python)
-  opts="l2-regularize=0.02"
-  output_opts="l2-regularize=0.004"
+  affine_opts="l2-regularize=0.01 dropout-proportion=0.0 dropout-per-dim=true dropout-per-dim-continuous=true"
+  tdnnf_opts="l2-regularize=0.01 dropout-proportion=0.0 bypass-scale=0.66"
+  linear_opts="l2-regularize=0.01 orthonormal-constraint=-1.0"
+  prefinal_opts="l2-regularize=0.01"
+  output_opts="l2-regularize=0.002"
 
   mkdir -p $dir/configs
   cat <<EOF > $dir/configs/network.xconfig
@@ -185,25 +189,27 @@ if [ $stage -le 15 ]; then
   fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
 
   # the first splicing is moved before the lda layer, so no splicing here
-  relu-batchnorm-dropout-layer name=tdnn1 $affine_opts dim=1536
-  tdnnf-layer name=tdnnf2 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=1
-  tdnnf-layer name=tdnnf3 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=1
-  tdnnf-layer name=tdnnf4 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=1
-  tdnnf-layer name=tdnnf5 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=0
-  tdnnf-layer name=tdnnf6 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
-  tdnnf-layer name=tdnnf7 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
-  tdnnf-layer name=tdnnf8 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
-  tdnnf-layer name=tdnnf9 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
-  tdnnf-layer name=tdnnf10 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
-  tdnnf-layer name=tdnnf11 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
-  tdnnf-layer name=tdnnf12 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
-  tdnnf-layer name=tdnnf13 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
-  tdnnf-layer name=tdnnf14 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
-  tdnnf-layer name=tdnnf15 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
-  linear-component name=prefinal-l dim=256 $linear_opts
-  prefinal-layer name=prefinal-chain input=prefinal-l $prefinal_opts big-dim=1536 small-dim=256
+  relu-batchnorm-dropout-layer name=tdnn1 $affine_opts dim=2136
+  tdnnf-layer name=tdnnf2 $tdnnf_opts dim=2136 bottleneck-dim=210 time-stride=1
+  tdnnf-layer name=tdnnf3 $tdnnf_opts dim=2136 bottleneck-dim=210 time-stride=1
+  tdnnf-layer name=tdnnf4 $tdnnf_opts dim=2136 bottleneck-dim=210 time-stride=1
+  tdnnf-layer name=tdnnf5 $tdnnf_opts dim=2136 bottleneck-dim=210 time-stride=0
+  tdnnf-layer name=tdnnf6 $tdnnf_opts dim=2136 bottleneck-dim=210 time-stride=3
+  tdnnf-layer name=tdnnf7 $tdnnf_opts dim=2136 bottleneck-dim=210 time-stride=3
+  tdnnf-layer name=tdnnf8 $tdnnf_opts dim=2136 bottleneck-dim=210 time-stride=3
+  tdnnf-layer name=tdnnf9 $tdnnf_opts dim=2136 bottleneck-dim=210 time-stride=3
+  tdnnf-layer name=tdnnf10 $tdnnf_opts dim=2136 bottleneck-dim=210 time-stride=3
+  tdnnf-layer name=tdnnf11 $tdnnf_opts dim=2136 bottleneck-dim=210 time-stride=3
+  tdnnf-layer name=tdnnf12 $tdnnf_opts dim=2136 bottleneck-dim=210 time-stride=3
+  tdnnf-layer name=tdnnf13 $tdnnf_opts dim=2136 bottleneck-dim=210 time-stride=3
+  tdnnf-layer name=tdnnf14 $tdnnf_opts dim=2136 bottleneck-dim=210 time-stride=3
+  tdnnf-layer name=tdnnf15 $tdnnf_opts dim=2136 bottleneck-dim=210 time-stride=3
+  linear-component name=prefinal-l dim=512 $linear_opts
+
+  prefinal-layer name=prefinal-chain input=prefinal-l $prefinal_opts big-dim=2136 small-dim=512
   output-layer name=output include-log-softmax=false dim=$num_targets $output_opts
-  prefinal-layer name=prefinal-xent input=prefinal-l $prefinal_opts big-dim=1536 small-dim=256
+
+  prefinal-layer name=prefinal-xent input=prefinal-l $prefinal_opts big-dim=2136 small-dim=512
   output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor $output_opts
 
 EOF
@@ -226,10 +232,11 @@ if [ $stage -le 16 ]; then
     --chain.l2-regularize 0.00005 \
     --chain.apply-deriv-weights false \
     --chain.lm-opts="--num-extra-lm-states=2000" \
+    --trainer.dropout-schedule $dropout_schedule \
     --egs.dir "$common_egs_dir" \
     --egs.opts "--frames-overlap-per-eg 0" \
     --egs.chunk-width 150 \
-    --trainer.num-chunk-per-minibatch 128 \
+    --trainer.num-chunk-per-minibatch 32 \
     --trainer.frames-per-iter 1500000 \
     --trainer.num-epochs $num_epochs \
     --trainer.optimization.num-jobs-initial 2 \
@@ -238,6 +245,7 @@ if [ $stage -le 16 ]; then
     --trainer.optimization.final-effective-lrate 0.0001 \
     --trainer.max-param-change 2.0 \
     --cleanup.remove-egs $remove_egs \
+    --cleanup.preserve-model-interval 50 \
     --feat-dir $train_data_dir \
     --tree-dir $tree_dir \
     --lat-dir $lat_dir \

diff --git a/egs/ami/s5b/local/nnet3/run_ivector_common.sh b/egs/ami/s5b/local/nnet3/run_ivector_common.sh
@@ -21,7 +21,7 @@ num_threads_ubm=32
 ivector_transform_type=lda
 nnet3_affix=_cleaned     # affix for exp/$mic/nnet3 directory to put iVector stuff in, so it
                          # becomes exp/$mic/nnet3_cleaned or whatever.
-
+hires_suffix=
 . ./cmd.sh
 . ./path.sh
 . ./utils/parse_options.sh
@@ -72,7 +72,7 @@ if [ $stage -le 2 ]; then
   utils/data/perturb_data_dir_volume.sh data/$mic/${train_set}_sp_hires
 
   for datadir in ${train_set}_sp dev eval; do
-    steps/make_mfcc.sh --nj $nj --mfcc-config conf/mfcc_hires.conf \
+    steps/make_mfcc.sh --nj $nj --mfcc-config conf/mfcc_hires$hires_suffix.conf \
       --cmd "$train_cmd" data/$mic/${datadir}_hires
     steps/compute_cmvn_stats.sh data/$mic/${datadir}_hires
     utils/fix_data_dir.sh data/$mic/${datadir}_hires