mirror of
https://github.com/modelscope/FunASR
synced 2025-09-15 14:48:36 +08:00
* add modular saasr * update readme * Delete train_paraformer.yaml * update setup.py * update setup.py * update setup.py
30 lines
1.2 KiB
Python
30 lines
1.2 KiB
Python
import codecs
|
|
import sys
|
|
|
|
rttm_file_path = sys.argv[1]
|
|
segment_file_path = sys.argv[2]
|
|
mode = sys.argv[3] # 0 for diarization, 1 for asr
|
|
|
|
|
|
meeting2spk = {}
|
|
|
|
with codecs.open(rttm_file_path, "r", "utf-8") as fi:
|
|
with codecs.open(segment_file_path + "/segments", "w", "utf-8") as f1:
|
|
with codecs.open(segment_file_path + "/utt2spk", "w", "utf-8") as f2:
|
|
for line in fi.readlines():
|
|
_, sessionid, _, stime, dur, _, _, spkid, _, _ = line.strip().split(" ")
|
|
if float(dur) < 0.3:
|
|
continue
|
|
uttid = "%s-%07d-%07d" % (sessionid, int(float(stime) * 100), int(float(stime) * 100 + float(dur) * 100))
|
|
spkid = "%s-%s" % (sessionid, spkid)
|
|
if int(mode) == 0:
|
|
f1.write("%s %s %.2f %.2f\n" % (uttid, sessionid, float(stime), float(stime) + float(dur)))
|
|
f2.write("%s %s\n" % (uttid, spkid))
|
|
elif int(mode) == 1:
|
|
f1.write("%s %s %.2f %.2f\n" % (uttid, spkid, float(stime), float(stime) + float(dur)))
|
|
f2.write("%s %s\n" % (uttid, spkid))
|
|
else:
|
|
exit("mode only support 0 or 1!")
|
|
|
|
|