Commit 3c61c318 authored by Yuxin Wu's avatar Yuxin Wu

some notes about TIMIT (#603)

parent bc8da700
...@@ -3,12 +3,23 @@ ...@@ -3,12 +3,23 @@
### Install Dependencies: ### Install Dependencies:
+ python binding for `lmdb` + python binding for `lmdb`
+ `pip install --user lmdb` + `pip install --user lmdb`
+ `bob.ap` package for MFCC extraction + `bob.ap` package for MFCC extraction
+ install blitz and openblas as dependencies of bob.ap + install [blitz](https://github.com/blitzpp/blitz) and openblas as dependencies of bob.ap
+ `pip install --user bob.ap` + `pip install --user bob.extension bob.blitz bob.core bob.sp bob.ap`
### Prepare Data: ### Prepare Data:
We assume the following file structure:
```
TRAIN/
DR1/
FCJF0/
*.WAV # NIST WAV file
*.TXT
*.PHN
...
```
Convert NIST wav format to RIFF wav format: Convert NIST wav format to RIFF wav format:
``` ```
cd /PATH/TO/TIMIT cd /PATH/TO/TIMIT
......
...@@ -8,12 +8,12 @@ import string ...@@ -8,12 +8,12 @@ import string
import numpy as np import numpy as np
import argparse import argparse
import bob.ap
from tensorpack.dataflow import dftools, DataFlow, LMDBDataPoint from tensorpack.dataflow import dftools, DataFlow, LMDBDataPoint
from tensorpack.utils.argtools import memoized from tensorpack.utils.argtools import memoized
from tensorpack.utils.stats import OnlineMoments from tensorpack.utils.stats import OnlineMoments
from tensorpack.utils import serialize, fs, logger from tensorpack.utils import serialize, fs, logger
from tensorpack.utils.utils import get_tqdm from tensorpack.utils.utils import get_tqdm
import bob.ap
CHARSET = set(string.ascii_lowercase + ' ') CHARSET = set(string.ascii_lowercase + ' ')
PHONEME_LIST = [ PHONEME_LIST = [
...@@ -85,7 +85,7 @@ class RawTIMIT(DataFlow): ...@@ -85,7 +85,7 @@ class RawTIMIT(DataFlow):
self.filelists = [k for k in fs.recursive_walk(self.dirname) self.filelists = [k for k in fs.recursive_walk(self.dirname)
if k.endswith('.wav')] if k.endswith('.wav')]
logger.info("Found {} wav files ...".format(len(self.filelists))) logger.info("Found {} wav files ...".format(len(self.filelists)))
assert len(self.filelists), self.filelists assert len(self.filelists), "Found no '.wav' files!"
assert label in ['phoneme', 'letter'], label assert label in ['phoneme', 'letter'], label
self.label = label self.label = label
......
...@@ -79,7 +79,7 @@ class Model(ModelDesc): ...@@ -79,7 +79,7 @@ class Model(ModelDesc):
def get_data(path, isTrain, stat_file): def get_data(path, isTrain, stat_file):
ds = LMDBDataPoint(path, shuffle=isTrain) ds = LMDBDataPoint(path, shuffle=isTrain)
mean, std = serialize.loads(open(stat_file).read()) mean, std = serialize.loads(open(stat_file, 'rb').read())
ds = MapDataComponent(ds, lambda x: (x - mean) / std) ds = MapDataComponent(ds, lambda x: (x - mean) / std)
ds = TIMITBatch(ds, BATCH) ds = TIMITBatch(ds, BATCH)
if isTrain: if isTrain:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment