diff --git a/docs/m2met2/Organizers.md b/docs/m2met2/Organizers.md index e16c803af..f5a9da2ba 100644 --- a/docs/m2met2/Organizers.md +++ b/docs/m2met2/Organizers.md @@ -1,5 +1,5 @@ # Organizers -***Lei Xie, Professor, Northwestern Polytechnical University, China*** +***Lei Xie, Professor, AISHELL foundation, China*** Email: [lxie@nwpu.edu.cn](mailto:lxie@nwpu.edu.cn) diff --git a/docs/m2met2/_build/doctrees/Organizers.doctree b/docs/m2met2/_build/doctrees/Organizers.doctree index 0f571a3b7..7ecfbdf5a 100644 Binary files a/docs/m2met2/_build/doctrees/Organizers.doctree and b/docs/m2met2/_build/doctrees/Organizers.doctree differ diff --git a/docs/m2met2/_build/doctrees/environment.pickle b/docs/m2met2/_build/doctrees/environment.pickle index ea9c740c1..4e86ccaa1 100644 Binary files a/docs/m2met2/_build/doctrees/environment.pickle and b/docs/m2met2/_build/doctrees/environment.pickle differ diff --git a/docs/m2met2/_build/html/Organizers.html b/docs/m2met2/_build/html/Organizers.html index 0a8811e78..6daffe26e 100644 --- a/docs/m2met2/_build/html/Organizers.html +++ b/docs/m2met2/_build/html/Organizers.html @@ -124,7 +124,7 @@

Organizers

-

Lei Xie, Professor, Northwestern Polytechnical University, China

+

Lei Xie, Professor, AISHELL foundation, China

Email: lxie@nwpu.edu.cn

lxie

Kong Aik Lee, Senior Scientist at Institute for Infocomm Research, A*Star, Singapore

diff --git a/docs/m2met2/_build/html/_sources/Organizers.md.txt b/docs/m2met2/_build/html/_sources/Organizers.md.txt index e16c803af..f5a9da2ba 100644 --- a/docs/m2met2/_build/html/_sources/Organizers.md.txt +++ b/docs/m2met2/_build/html/_sources/Organizers.md.txt @@ -1,5 +1,5 @@ # Organizers -***Lei Xie, Professor, Northwestern Polytechnical University, China*** +***Lei Xie, Professor, AISHELL foundation, China*** Email: [lxie@nwpu.edu.cn](mailto:lxie@nwpu.edu.cn) diff --git a/docs/m2met2/_build/html/searchindex.js b/docs/m2met2/_build/html/searchindex.js index 54443a001..ec81fa7b2 100644 --- a/docs/m2met2/_build/html/searchindex.js +++ b/docs/m2met2/_build/html/searchindex.js @@ -1 +1 @@ -Search.setIndex({"docnames": ["Baseline", "Contact", "Dataset", "Introduction", "Organizers", "Rules", "Track_setting_and_evaluation", "index"], "filenames": ["Baseline.md", "Contact.md", "Dataset.md", "Introduction.md", "Organizers.md", "Rules.md", "Track_setting_and_evaluation.md", "index.rst"], "titles": ["Baseline", "Contact", "Datasets", "Introduction", "Organizers", "Rules", "Track & Evaluation", "ASRU 2023 MULTI-CHANNEL MULTI-PARTY MEETING TRANSCRIPTION CHALLENGE 2.0 (M2MeT2.0)"], "terms": {"we": [0, 2, 3, 7], "releas": [0, 2, 3, 6], "an": [0, 2, 3, 6], "e2": 0, "sa": 0, "asr": [0, 3, 7], "cite": 0, "kanda21b_interspeech": 0, "conduct": [0, 2], "funasr": 0, "time": [0, 6], "accord": [0, 3], "timelin": [0, 2], "The": [0, 2, 3, 5, 6], "model": [0, 2, 3, 5, 6], "architectur": 0, "i": [0, 2, 3, 5], "shown": [0, 2], "figur": [0, 6], "3": [0, 2, 3], "speakerencod": 0, "initi": 0, "pre": [0, 6], "train": [0, 3, 5, 7], "speaker": [0, 2, 3, 7], "verif": 0, "from": [0, 2, 3, 5, 6], "modelscop": [0, 6], "thi": [0, 3, 5, 6], "also": [0, 2, 6], "us": [0, 2, 5, 6], "extract": 0, "embed": 0, "profil": 0, "todo": 0, "fill": 0, "readm": 0, "md": 0, "system": [0, 3, 5, 6, 7], "ar": [0, 2, 3, 5, 6, 7], "tabl": [0, 2], "adopt": 0, "oracl": [0, 6], "dure": [0, 2, 6], "howev": [0, 3, 6], "due": [0, 3], "lack": 0, "label": [0, 5, 6], "evalu": [0, 2, 3, 7], "provid": [0, 2, 6, 7], "addit": [0, 6], "spectral": 0, "cluster": 0, "meanwhil": 0, "eval": [0, 2, 5, 6], "test": [0, 2, 3, 5, 6], "set": [0, 2, 3, 5, 6], "show": 0, "impact": 0, "accuraci": [0, 6], "If": [1, 5, 6], "you": 1, "have": [1, 3], "ani": [1, 5, 6], "question": 1, "about": 1, "m2met2": [1, 3], "0": [1, 2, 3], "challeng": [1, 3, 5, 6], "pleas": 1, "u": [1, 2], "email": [1, 3, 4], "m2met": [1, 3, 6, 7], "alimeet": [1, 6], "gmail": 1, "com": [1, 4], "wechat": 1, "group": [1, 2], "In": [2, 3, 5], "fix": [2, 3, 7], "condit": [2, 3, 7], "restrict": 2, "three": [2, 3, 6], "publicli": [2, 6], "avail": [2, 6], "corpora": 2, "name": 2, "aishel": [2, 4, 6], "4": [2, 6], "cn": [2, 4, 6], "celeb": [2, 6], "To": [2, 3, 7], "perform": [2, 3], "new": [2, 3, 6], "call": 2, "2023": [2, 3, 5, 6], "score": [2, 6], "rank": [2, 3, 6], "describ": 2, "contain": [2, 6], "118": 2, "75": 2, "hour": [2, 3, 6], "speech": [2, 3, 6, 7], "total": [2, 6], "divid": [2, 6], "104": 2, "10": [2, 3, 6], "specif": [2, 6], "212": 2, "8": [2, 3], "20": 2, "session": [2, 3, 6, 7], "respect": 2, "each": [2, 3, 6], "consist": [2, 6], "15": [2, 3], "30": 2, "minut": 2, "discuss": 2, "particip": [2, 5, 6], "number": [2, 3, 6], "456": 2, "25": 2, "60": 2, "balanc": 2, "gender": 2, "coverag": 2, "collect": 2, "13": [2, 3], "meet": [2, 3, 6], "venu": 2, "which": [2, 3, 6], "categor": 2, "type": 2, "small": 2, "medium": 2, "larg": [2, 3], "room": [2, 3], "size": 2, "rang": 2, "m": 2, "2": [2, 6], "55": 2, "differ": [2, 3, 6], "give": 2, "varieti": 2, "acoust": [2, 3, 6], "properti": 2, "layout": 2, "paramet": [2, 5], "togeth": 2, "wall": 2, "materi": 2, "cover": 2, "cement": 2, "glass": 2, "etc": 2, "other": 2, "furnish": 2, "includ": [2, 3, 5, 6], "sofa": 2, "tv": 2, "blackboard": 2, "fan": 2, "air": 2, "condition": 2, "plant": 2, "record": [2, 6], "sit": 2, "around": 2, "microphon": [2, 3], "arrai": [2, 3], "place": 2, "natur": 2, "convers": 2, "distanc": 2, "5": 2, "all": [2, 3, 5, 6], "nativ": 2, "chines": 2, "speak": [2, 3], "mandarin": [2, 3], "without": 2, "strong": 2, "accent": 2, "variou": [2, 3], "kind": 2, "indoor": 2, "nois": [2, 3, 5], "limit": [2, 3, 5], "click": 2, "keyboard": 2, "door": 2, "open": [2, 3, 7], "close": 2, "bubbl": 2, "made": [2, 3], "For": 2, "both": [2, 6], "requir": [2, 3, 6], "remain": [2, 3], "same": [2, 5], "posit": 2, "There": 2, "overlap": [2, 3], "between": [2, 6], "exampl": 2, "fig": 2, "1": 2, "within": [2, 3], "one": [2, 5], "ensur": 2, "ratio": 2, "select": [2, 3, 5, 6], "topic": 2, "medic": 2, "treatment": 2, "educ": 2, "busi": 2, "organ": [2, 3, 5, 6, 7], "manag": 2, "industri": [2, 3], "product": 2, "daili": 2, "routin": 2, "averag": 2, "42": 2, "27": 2, "34": 2, "76": 2, "more": 2, "A": [2, 4], "distribut": 2, "were": 2, "ident": [2, 6], "compris": [2, 3, 7], "therebi": 2, "share": 2, "similar": 2, "configur": 2, "field": [2, 3, 6], "signal": [2, 3], "headset": 2, "onli": [2, 5, 6], "": [2, 6], "own": 2, "transcrib": [2, 3, 6], "It": [2, 6], "worth": [2, 6], "note": [2, 6], "far": [2, 3], "audio": [2, 3, 6], "synchron": 2, "common": 2, "transcript": [2, 3, 5, 6], "prepar": 2, "textgrid": 2, "format": 2, "inform": [2, 3], "durat": 2, "id": 2, "segment": [2, 6], "timestamp": [2, 6], "mention": 2, "abov": 2, "can": [2, 3, 5, 6], "download": 2, "openslr": 2, "via": 2, "follow": [2, 5], "link": 2, "particularli": 2, "baselin": [2, 3, 7], "conveni": 2, "script": 2, "automat": [3, 7], "recognit": [3, 7], "diariz": 3, "signific": 3, "stride": 3, "recent": 3, "year": 3, "result": 3, "surg": 3, "technologi": 3, "applic": 3, "across": 3, "domain": 3, "present": 3, "uniqu": [3, 6], "complex": [3, 5], "divers": 3, "style": 3, "variabl": 3, "confer": 3, "environment": 3, "reverber": [3, 5], "over": 3, "sever": 3, "been": 3, "advanc": [3, 7], "develop": [3, 6], "rich": 3, "comput": [3, 5], "hear": 3, "multisourc": 3, "environ": 3, "chime": 3, "latest": 3, "iter": 3, "ha": 3, "particular": 3, "focu": 3, "distant": 3, "gener": 3, "topologi": 3, "scenario": 3, "while": 3, "progress": 3, "english": 3, "languag": [3, 5], "barrier": 3, "achiev": 3, "compar": 3, "non": 3, "multimod": 3, "base": 3, "process": [3, 6], "misp": 3, "multi": [3, 6], "channel": 3, "parti": [3, 6], "instrument": 3, "seek": 3, "address": 3, "problem": 3, "visual": 3, "everydai": 3, "home": 3, "focus": 3, "tackl": 3, "issu": 3, "offlin": 3, "icassp2022": 3, "two": [3, 5, 7], "main": 3, "task": [3, 6, 7], "former": 3, "involv": [3, 6], "identifi": 3, "who": 3, "spoke": 3, "when": 3, "latter": 3, "aim": 3, "multipl": [3, 6], "simultan": 3, "pose": [3, 6], "technic": 3, "difficulti": 3, "interfer": 3, "build": [3, 6, 7], "success": [3, 7], "previou": 3, "excit": 3, "propos": [3, 7], "asru2023": [3, 7], "special": [3, 5, 7], "origin": [3, 5], "metric": [3, 7], "wa": [3, 6], "independ": 3, "meant": 3, "could": 3, "determin": 3, "correspond": [3, 5], "further": 3, "current": [3, 7], "talker": [3, 7], "toward": 3, "practic": 3, "attribut": [3, 7], "sub": [3, 5, 7], "track": [3, 5, 7], "what": 3, "facilit": [3, 7], "reproduc": [3, 7], "research": [3, 4, 7], "offer": 3, "comprehens": [3, 7], "overview": [3, 7], "dataset": [3, 5, 6, 7], "rule": [3, 7], "furthermor": 3, "carefulli": 3, "curat": 3, "approxim": [3, 6], "design": 3, "enabl": 3, "valid": 3, "state": [3, 6, 7], "art": [3, 7], "area": 3, "april": 3, "29": 3, "registr": 3, "mai": 3, "deadlin": 3, "date": 3, "join": 3, "june": 3, "9": 3, "data": [3, 5, 6], "leaderboard": 3, "final": [3, 5, 6], "submiss": 3, "19": 3, "juli": 3, "paper": [3, 6], "decemb": 3, "12": 3, "16": 3, "asru": 3, "workshop": 3, "interest": 3, "whether": 3, "academia": 3, "must": [3, 5, 6], "regist": 3, "complet": 3, "googl": 3, "form": 3, "below": 3, "work": 3, "dai": 3, "send": 3, "invit": 3, "elig": [3, 5], "team": 3, "qualifi": 3, "adher": [3, 5], "publish": 3, "page": 3, "prior": 3, "submit": 3, "descript": [3, 6], "document": 3, "detail": [3, 6], "approach": [3, 5], "method": 3, "top": 3, "proceed": 3, "lei": 4, "xie": 4, "professor": 4, "northwestern": 4, "polytechn": 4, "univers": 4, "china": 4, "lxie": 4, "nwpu": 4, "edu": 4, "kong": 4, "aik": 4, "lee": 4, "senior": 4, "scientist": 4, "institut": 4, "infocomm": 4, "star": 4, "singapor": 4, "kongaik": 4, "ieee": 4, "org": 4, "zhiji": 4, "yan": 4, "princip": 4, "engin": 4, "alibaba": 4, "yzj": 4, "inc": 4, "shiliang": 4, "zhang": 4, "sly": 4, "zsl": 4, "yanmin": 4, "qian": 4, "shanghai": 4, "jiao": 4, "tong": 4, "yanminqian": 4, "sjtu": 4, "zhuo": 4, "chen": 4, "appli": 4, "microsoft": 4, "usa": 4, "zhuc": 4, "jian": 4, "wu": 4, "wujian": 4, "hui": 4, "bu": 4, "ceo": 4, "foundat": 4, "buhui": 4, "aishelldata": 4, "should": 5, "augment": 5, "allow": [5, 6], "ad": 5, "speed": 5, "perturb": 5, "tone": 5, "chang": 5, "permit": 5, "purpos": 5, "instead": [5, 6], "util": [5, 6], "tune": 5, "violat": 5, "strictli": [5, 6], "prohibit": [5, 6], "fine": 5, "cpcer": [5, 6], "lower": 5, "judg": 5, "superior": 5, "forc": 5, "align": 5, "obtain": [5, 6], "frame": 5, "level": 5, "classif": 5, "basi": 5, "shallow": 5, "fusion": 5, "end": 5, "e": [5, 6], "g": 5, "la": 5, "rnnt": 5, "transform": [5, 6], "come": 5, "right": 5, "interpret": 5, "belong": 5, "case": 5, "circumst": 5, "coordin": 5, "assign": 6, "illustr": 6, "aishell4": 6, "constrain": 6, "sourc": 6, "addition": 6, "corpu": 6, "soon": 6, "simpl": 6, "voic": 6, "activ": 6, "detect": 6, "vad": 6, "concaten": 6, "minimum": 6, "permut": 6, "charact": 6, "error": 6, "rate": 6, "calcul": 6, "step": 6, "firstli": 6, "refer": 6, "hypothesi": 6, "chronolog": 6, "order": 6, "secondli": 6, "cer": 6, "repeat": 6, "possibl": 6, "lowest": 6, "tthe": 6, "insert": 6, "Ins": 6, "substitut": 6, "delet": 6, "del": 6, "output": 6, "text": 6, "frac": 6, "mathcal": 6, "n_": 6, "100": 6, "where": 6, "usag": 6, "third": 6, "hug": 6, "face": 6, "list": 6, "clearli": 6, "privat": 6, "manual": 6, "simul": 6, "thei": 6, "mandatori": 6, "clear": 6, "scheme": 6, "delight": 7, "introduct": 7, "contact": 7}, "objects": {}, "objtypes": {}, "objnames": {}, "titleterms": {"baselin": 0, "overview": [0, 2], "quick": 0, "start": 0, "result": 0, "contact": 1, "dataset": 2, "train": [2, 6], "data": 2, "detail": 2, "alimeet": 2, "corpu": 2, "get": 2, "introduct": 3, "call": 3, "particip": 3, "timelin": 3, "aoe": 3, "time": 3, "guidelin": 3, "organ": 4, "rule": 5, "track": 6, "evalu": 6, "speaker": 6, "attribut": 6, "asr": 6, "metric": 6, "sub": 6, "arrang": 6, "i": 6, "fix": 6, "condit": 6, "ii": 6, "open": 6, "asru": 7, "2023": 7, "multi": 7, "channel": 7, "parti": 7, "meet": 7, "transcript": 7, "challeng": 7, "2": 7, "0": 7, "m2met2": 7, "content": 7}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx": 57}, "alltitles": {"Baseline": [[0, "baseline"]], "Overview": [[0, "overview"]], "Quick start": [[0, "quick-start"]], "Baseline results": [[0, "baseline-results"]], "Contact": [[1, "contact"]], "Datasets": [[2, "datasets"]], "Overview of training data": [[2, "overview-of-training-data"]], "Detail of AliMeeting corpus": [[2, "detail-of-alimeeting-corpus"]], "Get the data": [[2, "get-the-data"]], "Introduction": [[3, "introduction"]], "Call for participation": [[3, "call-for-participation"]], "Timeline(AOE Time)": [[3, "timeline-aoe-time"]], "Guidelines": [[3, "guidelines"]], "Organizers": [[4, "organizers"]], "Rules": [[5, "rules"]], "Track & Evaluation": [[6, "track-evaluation"]], "Speaker-Attributed ASR": [[6, "speaker-attributed-asr"]], "Evaluation metric": [[6, "evaluation-metric"]], "Sub-track arrangement": [[6, "sub-track-arrangement"]], "Sub-track I (Fixed Training Condition):": [[6, "sub-track-i-fixed-training-condition"]], "Sub-track II (Open Training Condition):": [[6, "sub-track-ii-open-training-condition"]], "ASRU 2023 MULTI-CHANNEL MULTI-PARTY MEETING TRANSCRIPTION CHALLENGE 2.0 (M2MeT2.0)": [[7, "asru-2023-multi-channel-multi-party-meeting-transcription-challenge-2-0-m2met2-0"]], "Contents:": [[7, null]]}, "indexentries": {}}) \ No newline at end of file +Search.setIndex({"docnames": ["Baseline", "Contact", "Dataset", "Introduction", "Organizers", "Rules", "Track_setting_and_evaluation", "index"], "filenames": ["Baseline.md", "Contact.md", "Dataset.md", "Introduction.md", "Organizers.md", "Rules.md", "Track_setting_and_evaluation.md", "index.rst"], "titles": ["Baseline", "Contact", "Datasets", "Introduction", "Organizers", "Rules", "Track & Evaluation", "ASRU 2023 MULTI-CHANNEL MULTI-PARTY MEETING TRANSCRIPTION CHALLENGE 2.0 (M2MeT2.0)"], "terms": {"we": [0, 2, 3, 7], "releas": [0, 2, 3, 6], "an": [0, 2, 3, 6], "e2": 0, "sa": 0, "asr": [0, 3, 7], "cite": 0, "kanda21b_interspeech": 0, "conduct": [0, 2], "funasr": 0, "time": [0, 6], "accord": [0, 3], "timelin": [0, 2], "The": [0, 2, 3, 5, 6], "model": [0, 2, 3, 5, 6], "architectur": 0, "i": [0, 2, 3, 5], "shown": [0, 2], "figur": [0, 6], "3": [0, 2, 3], "speakerencod": 0, "initi": 0, "pre": [0, 6], "train": [0, 3, 5, 7], "speaker": [0, 2, 3, 7], "verif": 0, "from": [0, 2, 3, 5, 6], "modelscop": [0, 6], "thi": [0, 3, 5, 6], "also": [0, 2, 6], "us": [0, 2, 5, 6], "extract": 0, "embed": 0, "profil": 0, "todo": 0, "fill": 0, "readm": 0, "md": 0, "system": [0, 3, 5, 6, 7], "ar": [0, 2, 3, 5, 6, 7], "tabl": [0, 2], "adopt": 0, "oracl": [0, 6], "dure": [0, 2, 6], "howev": [0, 3, 6], "due": [0, 3], "lack": 0, "label": [0, 5, 6], "evalu": [0, 2, 3, 7], "provid": [0, 2, 6, 7], "addit": [0, 6], "spectral": 0, "cluster": 0, "meanwhil": 0, "eval": [0, 2, 5, 6], "test": [0, 2, 3, 5, 6], "set": [0, 2, 3, 5, 6], "show": 0, "impact": 0, "accuraci": [0, 6], "If": [1, 5, 6], "you": 1, "have": [1, 3], "ani": [1, 5, 6], "question": 1, "about": 1, "m2met2": [1, 3], "0": [1, 2, 3], "challeng": [1, 3, 5, 6], "pleas": 1, "u": [1, 2], "email": [1, 3, 4], "m2met": [1, 3, 6, 7], "alimeet": [1, 6], "gmail": 1, "com": [1, 4], "wechat": 1, "group": [1, 2], "In": [2, 3, 5], "fix": [2, 3, 7], "condit": [2, 3, 7], "restrict": 2, "three": [2, 3, 6], "publicli": [2, 6], "avail": [2, 6], "corpora": 2, "name": 2, "aishel": [2, 4, 6], "4": [2, 6], "cn": [2, 4, 6], "celeb": [2, 6], "To": [2, 3, 7], "perform": [2, 3], "new": [2, 3, 6], "call": 2, "2023": [2, 3, 5, 6], "score": [2, 6], "rank": [2, 3, 6], "describ": 2, "contain": [2, 6], "118": 2, "75": 2, "hour": [2, 3, 6], "speech": [2, 3, 6, 7], "total": [2, 6], "divid": [2, 6], "104": 2, "10": [2, 3, 6], "specif": [2, 6], "212": 2, "8": [2, 3], "20": 2, "session": [2, 3, 6, 7], "respect": 2, "each": [2, 3, 6], "consist": [2, 6], "15": [2, 3], "30": 2, "minut": 2, "discuss": 2, "particip": [2, 5, 6], "number": [2, 3, 6], "456": 2, "25": 2, "60": 2, "balanc": 2, "gender": 2, "coverag": 2, "collect": 2, "13": [2, 3], "meet": [2, 3, 6], "venu": 2, "which": [2, 3, 6], "categor": 2, "type": 2, "small": 2, "medium": 2, "larg": [2, 3], "room": [2, 3], "size": 2, "rang": 2, "m": 2, "2": [2, 6], "55": 2, "differ": [2, 3, 6], "give": 2, "varieti": 2, "acoust": [2, 3, 6], "properti": 2, "layout": 2, "paramet": [2, 5], "togeth": 2, "wall": 2, "materi": 2, "cover": 2, "cement": 2, "glass": 2, "etc": 2, "other": 2, "furnish": 2, "includ": [2, 3, 5, 6], "sofa": 2, "tv": 2, "blackboard": 2, "fan": 2, "air": 2, "condition": 2, "plant": 2, "record": [2, 6], "sit": 2, "around": 2, "microphon": [2, 3], "arrai": [2, 3], "place": 2, "natur": 2, "convers": 2, "distanc": 2, "5": 2, "all": [2, 3, 5, 6], "nativ": 2, "chines": 2, "speak": [2, 3], "mandarin": [2, 3], "without": 2, "strong": 2, "accent": 2, "variou": [2, 3], "kind": 2, "indoor": 2, "nois": [2, 3, 5], "limit": [2, 3, 5], "click": 2, "keyboard": 2, "door": 2, "open": [2, 3, 7], "close": 2, "bubbl": 2, "made": [2, 3], "For": 2, "both": [2, 6], "requir": [2, 3, 6], "remain": [2, 3], "same": [2, 5], "posit": 2, "There": 2, "overlap": [2, 3], "between": [2, 6], "exampl": 2, "fig": 2, "1": 2, "within": [2, 3], "one": [2, 5], "ensur": 2, "ratio": 2, "select": [2, 3, 5, 6], "topic": 2, "medic": 2, "treatment": 2, "educ": 2, "busi": 2, "organ": [2, 3, 5, 6, 7], "manag": 2, "industri": [2, 3], "product": 2, "daili": 2, "routin": 2, "averag": 2, "42": 2, "27": 2, "34": 2, "76": 2, "more": 2, "A": [2, 4], "distribut": 2, "were": 2, "ident": [2, 6], "compris": [2, 3, 7], "therebi": 2, "share": 2, "similar": 2, "configur": 2, "field": [2, 3, 6], "signal": [2, 3], "headset": 2, "onli": [2, 5, 6], "": [2, 6], "own": 2, "transcrib": [2, 3, 6], "It": [2, 6], "worth": [2, 6], "note": [2, 6], "far": [2, 3], "audio": [2, 3, 6], "synchron": 2, "common": 2, "transcript": [2, 3, 5, 6], "prepar": 2, "textgrid": 2, "format": 2, "inform": [2, 3], "durat": 2, "id": 2, "segment": [2, 6], "timestamp": [2, 6], "mention": 2, "abov": 2, "can": [2, 3, 5, 6], "download": 2, "openslr": 2, "via": 2, "follow": [2, 5], "link": 2, "particularli": 2, "baselin": [2, 3, 7], "conveni": 2, "script": 2, "automat": [3, 7], "recognit": [3, 7], "diariz": 3, "signific": 3, "stride": 3, "recent": 3, "year": 3, "result": 3, "surg": 3, "technologi": 3, "applic": 3, "across": 3, "domain": 3, "present": 3, "uniqu": [3, 6], "complex": [3, 5], "divers": 3, "style": 3, "variabl": 3, "confer": 3, "environment": 3, "reverber": [3, 5], "over": 3, "sever": 3, "been": 3, "advanc": [3, 7], "develop": [3, 6], "rich": 3, "comput": [3, 5], "hear": 3, "multisourc": 3, "environ": 3, "chime": 3, "latest": 3, "iter": 3, "ha": 3, "particular": 3, "focu": 3, "distant": 3, "gener": 3, "topologi": 3, "scenario": 3, "while": 3, "progress": 3, "english": 3, "languag": [3, 5], "barrier": 3, "achiev": 3, "compar": 3, "non": 3, "multimod": 3, "base": 3, "process": [3, 6], "misp": 3, "multi": [3, 6], "channel": 3, "parti": [3, 6], "instrument": 3, "seek": 3, "address": 3, "problem": 3, "visual": 3, "everydai": 3, "home": 3, "focus": 3, "tackl": 3, "issu": 3, "offlin": 3, "icassp2022": 3, "two": [3, 5, 7], "main": 3, "task": [3, 6, 7], "former": 3, "involv": [3, 6], "identifi": 3, "who": 3, "spoke": 3, "when": 3, "latter": 3, "aim": 3, "multipl": [3, 6], "simultan": 3, "pose": [3, 6], "technic": 3, "difficulti": 3, "interfer": 3, "build": [3, 6, 7], "success": [3, 7], "previou": 3, "excit": 3, "propos": [3, 7], "asru2023": [3, 7], "special": [3, 5, 7], "origin": [3, 5], "metric": [3, 7], "wa": [3, 6], "independ": 3, "meant": 3, "could": 3, "determin": 3, "correspond": [3, 5], "further": 3, "current": [3, 7], "talker": [3, 7], "toward": 3, "practic": 3, "attribut": [3, 7], "sub": [3, 5, 7], "track": [3, 5, 7], "what": 3, "facilit": [3, 7], "reproduc": [3, 7], "research": [3, 4, 7], "offer": 3, "comprehens": [3, 7], "overview": [3, 7], "dataset": [3, 5, 6, 7], "rule": [3, 7], "furthermor": 3, "carefulli": 3, "curat": 3, "approxim": [3, 6], "design": 3, "enabl": 3, "valid": 3, "state": [3, 6, 7], "art": [3, 7], "area": 3, "april": 3, "29": 3, "registr": 3, "mai": 3, "deadlin": 3, "date": 3, "join": 3, "june": 3, "9": 3, "data": [3, 5, 6], "leaderboard": 3, "final": [3, 5, 6], "submiss": 3, "19": 3, "juli": 3, "paper": [3, 6], "decemb": 3, "12": 3, "16": 3, "asru": 3, "workshop": 3, "interest": 3, "whether": 3, "academia": 3, "must": [3, 5, 6], "regist": 3, "complet": 3, "googl": 3, "form": 3, "below": 3, "work": 3, "dai": 3, "send": 3, "invit": 3, "elig": [3, 5], "team": 3, "qualifi": 3, "adher": [3, 5], "publish": 3, "page": 3, "prior": 3, "submit": 3, "descript": [3, 6], "document": 3, "detail": [3, 6], "approach": [3, 5], "method": 3, "top": 3, "proceed": 3, "lei": 4, "xie": 4, "professor": 4, "foundat": 4, "china": 4, "lxie": 4, "nwpu": 4, "edu": 4, "kong": 4, "aik": 4, "lee": 4, "senior": 4, "scientist": 4, "institut": 4, "infocomm": 4, "star": 4, "singapor": 4, "kongaik": 4, "ieee": 4, "org": 4, "zhiji": 4, "yan": 4, "princip": 4, "engin": 4, "alibaba": 4, "yzj": 4, "inc": 4, "shiliang": 4, "zhang": 4, "sly": 4, "zsl": 4, "yanmin": 4, "qian": 4, "shanghai": 4, "jiao": 4, "tong": 4, "univers": 4, "yanminqian": 4, "sjtu": 4, "zhuo": 4, "chen": 4, "appli": 4, "microsoft": 4, "usa": 4, "zhuc": 4, "jian": 4, "wu": 4, "wujian": 4, "hui": 4, "bu": 4, "ceo": 4, "buhui": 4, "aishelldata": 4, "should": 5, "augment": 5, "allow": [5, 6], "ad": 5, "speed": 5, "perturb": 5, "tone": 5, "chang": 5, "permit": 5, "purpos": 5, "instead": [5, 6], "util": [5, 6], "tune": 5, "violat": 5, "strictli": [5, 6], "prohibit": [5, 6], "fine": 5, "cpcer": [5, 6], "lower": 5, "judg": 5, "superior": 5, "forc": 5, "align": 5, "obtain": [5, 6], "frame": 5, "level": 5, "classif": 5, "basi": 5, "shallow": 5, "fusion": 5, "end": 5, "e": [5, 6], "g": 5, "la": 5, "rnnt": 5, "transform": [5, 6], "come": 5, "right": 5, "interpret": 5, "belong": 5, "case": 5, "circumst": 5, "coordin": 5, "assign": 6, "illustr": 6, "aishell4": 6, "constrain": 6, "sourc": 6, "addition": 6, "corpu": 6, "soon": 6, "simpl": 6, "voic": 6, "activ": 6, "detect": 6, "vad": 6, "concaten": 6, "minimum": 6, "permut": 6, "charact": 6, "error": 6, "rate": 6, "calcul": 6, "step": 6, "firstli": 6, "refer": 6, "hypothesi": 6, "chronolog": 6, "order": 6, "secondli": 6, "cer": 6, "repeat": 6, "possibl": 6, "lowest": 6, "tthe": 6, "insert": 6, "Ins": 6, "substitut": 6, "delet": 6, "del": 6, "output": 6, "text": 6, "frac": 6, "mathcal": 6, "n_": 6, "100": 6, "where": 6, "usag": 6, "third": 6, "hug": 6, "face": 6, "list": 6, "clearli": 6, "privat": 6, "manual": 6, "simul": 6, "thei": 6, "mandatori": 6, "clear": 6, "scheme": 6, "delight": 7, "introduct": 7, "contact": 7}, "objects": {}, "objtypes": {}, "objnames": {}, "titleterms": {"baselin": 0, "overview": [0, 2], "quick": 0, "start": 0, "result": 0, "contact": 1, "dataset": 2, "train": [2, 6], "data": 2, "detail": 2, "alimeet": 2, "corpu": 2, "get": 2, "introduct": 3, "call": 3, "particip": 3, "timelin": 3, "aoe": 3, "time": 3, "guidelin": 3, "organ": 4, "rule": 5, "track": 6, "evalu": 6, "speaker": 6, "attribut": 6, "asr": 6, "metric": 6, "sub": 6, "arrang": 6, "i": 6, "fix": 6, "condit": 6, "ii": 6, "open": 6, "asru": 7, "2023": 7, "multi": 7, "channel": 7, "parti": 7, "meet": 7, "transcript": 7, "challeng": 7, "2": 7, "0": 7, "m2met2": 7, "content": 7}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx": 57}, "alltitles": {"Baseline": [[0, "baseline"]], "Overview": [[0, "overview"]], "Quick start": [[0, "quick-start"]], "Baseline results": [[0, "baseline-results"]], "Contact": [[1, "contact"]], "Datasets": [[2, "datasets"]], "Overview of training data": [[2, "overview-of-training-data"]], "Detail of AliMeeting corpus": [[2, "detail-of-alimeeting-corpus"]], "Get the data": [[2, "get-the-data"]], "Introduction": [[3, "introduction"]], "Call for participation": [[3, "call-for-participation"]], "Timeline(AOE Time)": [[3, "timeline-aoe-time"]], "Guidelines": [[3, "guidelines"]], "Organizers": [[4, "organizers"]], "Rules": [[5, "rules"]], "Track & Evaluation": [[6, "track-evaluation"]], "Speaker-Attributed ASR": [[6, "speaker-attributed-asr"]], "Evaluation metric": [[6, "evaluation-metric"]], "Sub-track arrangement": [[6, "sub-track-arrangement"]], "Sub-track I (Fixed Training Condition):": [[6, "sub-track-i-fixed-training-condition"]], "Sub-track II (Open Training Condition):": [[6, "sub-track-ii-open-training-condition"]], "ASRU 2023 MULTI-CHANNEL MULTI-PARTY MEETING TRANSCRIPTION CHALLENGE 2.0 (M2MeT2.0)": [[7, "asru-2023-multi-channel-multi-party-meeting-transcription-challenge-2-0-m2met2-0"]], "Contents:": [[7, null]]}, "indexentries": {}}) \ No newline at end of file diff --git a/docs/m2met2_cn/_build/doctrees/environment.pickle b/docs/m2met2_cn/_build/doctrees/environment.pickle index fb92f83a0..91ebda67a 100644 Binary files a/docs/m2met2_cn/_build/doctrees/environment.pickle and b/docs/m2met2_cn/_build/doctrees/environment.pickle differ diff --git a/docs/m2met2_cn/_build/doctrees/简介.doctree b/docs/m2met2_cn/_build/doctrees/简介.doctree index 373ee340a..bcc005b07 100644 Binary files a/docs/m2met2_cn/_build/doctrees/简介.doctree and b/docs/m2met2_cn/_build/doctrees/简介.doctree differ diff --git a/docs/m2met2_cn/_build/html/_sources/简介.md.txt b/docs/m2met2_cn/_build/html/_sources/简介.md.txt index 52df97d6b..f43dbabec 100644 --- a/docs/m2met2_cn/_build/html/_sources/简介.md.txt +++ b/docs/m2met2_cn/_build/html/_sources/简介.md.txt @@ -4,7 +4,7 @@ 为了推动会议场景语音识别的发展,已经有很多相关的挑战赛,如 Rich Transcription evaluation 和 CHIME(Computational Hearing in Multisource Environments) 挑战赛。最新的CHIME挑战赛关注于远距离自动语音识别和开发能在各种不同拓扑结构的阵列和应用场景中通用的系统。然而不同语言之间的差异限制了非英语会议转录的进展。MISP(Multimodal Information Based Speech Processing)和M2MeT(Multi-Channel Multi-Party Meeting Transcription)挑战赛为推动普通话会议场景语音识别做出了贡献。MISP挑战赛侧重于用视听多模态的方法解决日常家庭环境中的远距离多麦克风信号处理问题,而M2MeT挑战则侧重于解决离线会议室中会议转录的语音重叠问题。 -ASSP2022 M2MeT挑战的侧重点是会议场景,它包括两个赛道:说话人日记和多说话人自动语音识别。前者涉及识别“谁在什么时候说了话”,而后者旨在同时识别来自多个说话人的语音,语音重叠和各种噪声带来了巨大的技术困难。 +IASSP2022 M2MeT挑战的侧重点是会议场景,它包括两个赛道:说话人日记和多说话人自动语音识别。前者涉及识别“谁在什么时候说了话”,而后者旨在同时识别来自多个说话人的语音,语音重叠和各种噪声带来了巨大的技术困难。 在上一届M2MET成功举办的基础上,我们将在ASRU2023上继续举办M2MET2.0挑战赛。在上一届M2MET挑战赛中,评估指标是说话人无关的,我们只能得到识别文本,而不能确定相应的说话人。 为了解决这一局限性并将现在的多说话人语音识别系统推向实用化,M2MET2.0挑战赛将在说话人相关的人物上评估,并且同时设立限定数据与不限定数据两个子赛道。通过将语音归属于特定的说话人,这项任务旨在提高多说话人ASR系统在真实世界环境中的准确性和适用性。 diff --git a/docs/m2met2_cn/_build/html/searchindex.js b/docs/m2met2_cn/_build/html/searchindex.js index c9fe167cc..c2af3e243 100644 --- a/docs/m2met2_cn/_build/html/searchindex.js +++ b/docs/m2met2_cn/_build/html/searchindex.js @@ -1 +1 @@ -Search.setIndex({"docnames": ["index", "\u57fa\u7ebf", "\u6570\u636e\u96c6", "\u7b80\u4ecb", "\u7ec4\u59d4\u4f1a", "\u8054\u7cfb\u65b9\u5f0f", "\u89c4\u5219", "\u8d5b\u9053\u8bbe\u7f6e\u4e0e\u8bc4\u4f30"], "filenames": ["index.rst", "\u57fa\u7ebf.md", "\u6570\u636e\u96c6.md", "\u7b80\u4ecb.md", "\u7ec4\u59d4\u4f1a.md", "\u8054\u7cfb\u65b9\u5f0f.md", "\u89c4\u5219.md", "\u8d5b\u9053\u8bbe\u7f6e\u4e0e\u8bc4\u4f30.md"], "titles": ["ASRU 2023 \u591a\u901a\u9053\u591a\u65b9\u4f1a\u8bae\u8f6c\u5f55\u6311\u6218 2.0", "\u57fa\u7ebf", "\u6570\u636e\u96c6", "\u7b80\u4ecb", "\u7ec4\u59d4\u4f1a", "\u8054\u7cfb\u65b9\u5f0f", "\u7ade\u8d5b\u89c4\u5219", "\u8d5b\u9053\u8bbe\u7f6e\u4e0e\u8bc4\u4f30"], "terms": {"m2met": [0, 3, 5, 7], "asru2023": [0, 3], "m2met2": [0, 3, 5, 7], "funasr": 1, "sa": 1, "asr": [1, 3, 7], "speakerencod": 1, "modelscop": [1, 7], "todo": 1, "fill": 1, "with": 1, "the": 1, "readm": 1, "md": 1, "of": 1, "baselin": [1, 2], "aishel": [2, 7], "cn": [2, 4, 7], "celeb": [2, 7], "test": [2, 6, 7], "2023": [2, 3, 6, 7], "118": 2, "75": 2, "104": 2, "train": 2, "eval": [2, 6], "10": [2, 3, 7], "212": 2, "15": [2, 3], "30": 2, "456": 2, "25": 2, "13": [2, 3], "55": 2, "42": 2, "27": 2, "34": 2, "76": 2, "20": 2, "textgrid": 2, "id": 2, "openslr": 2, "automat": 3, "speech": 3, "recognit": 3, "speaker": 3, "diariz": 3, "rich": 3, "transcript": 3, "evalu": 3, "chime": 3, "comput": 3, "hear": 3, "in": 3, "multisourc": 3, "environ": 3, "misp": 3, "multimod": 3, "inform": 3, "base": 3, "process": 3, "multi": 3, "channel": 3, "parti": 3, "meet": 3, "assp2022": 3, "29": 3, "19": 3, "12": 3, "asru": 3, "workshop": 3, "challeng": 3, "session": 3, "lxie": 4, "nwpu": 4, "edu": 4, "kong": 4, "aik": 4, "lee": 4, "star": 4, "kongaik": 4, "ieee": 4, "org": 4, "zhiji": 4, "yzj": 4, "alibaba": 4, "inc": 4, "com": [4, 5], "sli": 4, "zsl": 4, "yanminqian": 4, "sjtu": 4, "zhuc": 4, "microsoft": 4, "wujian": 4, "ceo": 4, "buhui": 4, "aishelldata": 4, "alimeet": [5, 7], "gmail": 5, "cpcer": [6, 7], "las": 6, "rnnt": 6, "transform": 6, "aishell4": 7, "vad": 7, "cer": 7, "ins": 7, "sub": 7, "del": 7, "text": 7, "frac": 7, "mathcal": 7, "n_": 7, "total": 7, "time": 7, "100": 7, "hug": 7, "face": 7}, "objects": {}, "objtypes": {}, "objnames": {}, "titleterms": {"asru": 0, "2023": 0, "alimeet": 2, "aoe": 3}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx": 57}, "alltitles": {"ASRU 2023 \u591a\u901a\u9053\u591a\u65b9\u4f1a\u8bae\u8f6c\u5f55\u6311\u6218 2.0": [[0, "asru-2023-2-0"]], "\u76ee\u5f55:": [[0, null]], "\u57fa\u7ebf": [[1, "id1"]], "\u57fa\u7ebf\u6982\u8ff0": [[1, "id2"]], "\u5feb\u901f\u5f00\u59cb": [[1, "id3"]], "\u57fa\u7ebf\u7ed3\u679c": [[1, "id4"]], "\u6570\u636e\u96c6": [[2, "id1"]], "\u6570\u636e\u96c6\u6982\u8ff0": [[2, "id2"]], "Alimeeting\u6570\u636e\u96c6\u4ecb\u7ecd": [[2, "alimeeting"]], "\u83b7\u53d6\u6570\u636e": [[2, "id3"]], "\u7b80\u4ecb": [[3, "id1"]], "\u7ade\u8d5b\u4ecb\u7ecd": [[3, "id2"]], "\u65f6\u95f4\u5b89\u6392(AOE\u65f6\u95f4)": [[3, "aoe"]], "\u7ade\u8d5b\u62a5\u540d": [[3, "id3"]], "\u7ec4\u59d4\u4f1a": [[4, "id1"]], "\u8054\u7cfb\u65b9\u5f0f": [[5, "id1"]], "\u7ade\u8d5b\u89c4\u5219": [[6, "id1"]], "\u8d5b\u9053\u8bbe\u7f6e\u4e0e\u8bc4\u4f30": [[7, "id1"]], "\u8bf4\u8bdd\u4eba\u76f8\u5173\u7684\u8bed\u97f3\u8bc6\u522b": [[7, "id2"]], "\u8bc4\u4f30\u65b9\u6cd5": [[7, "id3"]], "\u5b50\u8d5b\u9053\u8bbe\u7f6e": [[7, "id4"]], "\u5b50\u8d5b\u9053\u4e00 (\u9650\u5b9a\u8bad\u7ec3\u6570\u636e):": [[7, "id5"]], "\u5b50\u8d5b\u9053\u4e8c (\u5f00\u653e\u8bad\u7ec3\u6570\u636e):": [[7, "id6"]]}, "indexentries": {}}) \ No newline at end of file +Search.setIndex({"docnames": ["index", "\u57fa\u7ebf", "\u6570\u636e\u96c6", "\u7b80\u4ecb", "\u7ec4\u59d4\u4f1a", "\u8054\u7cfb\u65b9\u5f0f", "\u89c4\u5219", "\u8d5b\u9053\u8bbe\u7f6e\u4e0e\u8bc4\u4f30"], "filenames": ["index.rst", "\u57fa\u7ebf.md", "\u6570\u636e\u96c6.md", "\u7b80\u4ecb.md", "\u7ec4\u59d4\u4f1a.md", "\u8054\u7cfb\u65b9\u5f0f.md", "\u89c4\u5219.md", "\u8d5b\u9053\u8bbe\u7f6e\u4e0e\u8bc4\u4f30.md"], "titles": ["ASRU 2023 \u591a\u901a\u9053\u591a\u65b9\u4f1a\u8bae\u8f6c\u5f55\u6311\u6218 2.0", "\u57fa\u7ebf", "\u6570\u636e\u96c6", "\u7b80\u4ecb", "\u7ec4\u59d4\u4f1a", "\u8054\u7cfb\u65b9\u5f0f", "\u7ade\u8d5b\u89c4\u5219", "\u8d5b\u9053\u8bbe\u7f6e\u4e0e\u8bc4\u4f30"], "terms": {"m2met": [0, 3, 5, 7], "asru2023": [0, 3], "m2met2": [0, 3, 5, 7], "funasr": 1, "sa": 1, "asr": [1, 3, 7], "speakerencod": 1, "modelscop": [1, 7], "todo": 1, "fill": 1, "with": 1, "the": 1, "readm": 1, "md": 1, "of": 1, "baselin": [1, 2], "aishel": [2, 7], "cn": [2, 4, 7], "celeb": [2, 7], "test": [2, 6, 7], "2023": [2, 3, 6, 7], "118": 2, "75": 2, "104": 2, "train": 2, "eval": [2, 6], "10": [2, 3, 7], "212": 2, "15": [2, 3], "30": 2, "456": 2, "25": 2, "13": [2, 3], "55": 2, "42": 2, "27": 2, "34": 2, "76": 2, "20": 2, "textgrid": 2, "id": 2, "openslr": 2, "automat": 3, "speech": 3, "recognit": 3, "speaker": 3, "diariz": 3, "rich": 3, "transcript": 3, "evalu": 3, "chime": 3, "comput": 3, "hear": 3, "in": 3, "multisourc": 3, "environ": 3, "misp": 3, "multimod": 3, "inform": 3, "base": 3, "process": 3, "multi": 3, "channel": 3, "parti": 3, "meet": 3, "iassp2022": 3, "29": 3, "19": 3, "12": 3, "asru": 3, "workshop": 3, "challeng": 3, "session": 3, "lxie": 4, "nwpu": 4, "edu": 4, "kong": 4, "aik": 4, "lee": 4, "star": 4, "kongaik": 4, "ieee": 4, "org": 4, "zhiji": 4, "yzj": 4, "alibaba": 4, "inc": 4, "com": [4, 5], "sli": 4, "zsl": 4, "yanminqian": 4, "sjtu": 4, "zhuc": 4, "microsoft": 4, "wujian": 4, "ceo": 4, "buhui": 4, "aishelldata": 4, "alimeet": [5, 7], "gmail": 5, "cpcer": [6, 7], "las": 6, "rnnt": 6, "transform": 6, "aishell4": 7, "vad": 7, "cer": 7, "ins": 7, "sub": 7, "del": 7, "text": 7, "frac": 7, "mathcal": 7, "n_": 7, "total": 7, "time": 7, "100": 7, "hug": 7, "face": 7}, "objects": {}, "objtypes": {}, "objnames": {}, "titleterms": {"asru": 0, "2023": 0, "alimeet": 2, "aoe": 3}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx": 57}, "alltitles": {"ASRU 2023 \u591a\u901a\u9053\u591a\u65b9\u4f1a\u8bae\u8f6c\u5f55\u6311\u6218 2.0": [[0, "asru-2023-2-0"]], "\u76ee\u5f55:": [[0, null]], "\u57fa\u7ebf": [[1, "id1"]], "\u57fa\u7ebf\u6982\u8ff0": [[1, "id2"]], "\u5feb\u901f\u5f00\u59cb": [[1, "id3"]], "\u57fa\u7ebf\u7ed3\u679c": [[1, "id4"]], "\u6570\u636e\u96c6": [[2, "id1"]], "\u6570\u636e\u96c6\u6982\u8ff0": [[2, "id2"]], "Alimeeting\u6570\u636e\u96c6\u4ecb\u7ecd": [[2, "alimeeting"]], "\u83b7\u53d6\u6570\u636e": [[2, "id3"]], "\u7b80\u4ecb": [[3, "id1"]], "\u7ade\u8d5b\u4ecb\u7ecd": [[3, "id2"]], "\u65f6\u95f4\u5b89\u6392(AOE\u65f6\u95f4)": [[3, "aoe"]], "\u7ade\u8d5b\u62a5\u540d": [[3, "id3"]], "\u7ec4\u59d4\u4f1a": [[4, "id1"]], "\u8054\u7cfb\u65b9\u5f0f": [[5, "id1"]], "\u7ade\u8d5b\u89c4\u5219": [[6, "id1"]], "\u8d5b\u9053\u8bbe\u7f6e\u4e0e\u8bc4\u4f30": [[7, "id1"]], "\u8bf4\u8bdd\u4eba\u76f8\u5173\u7684\u8bed\u97f3\u8bc6\u522b": [[7, "id2"]], "\u8bc4\u4f30\u65b9\u6cd5": [[7, "id3"]], "\u5b50\u8d5b\u9053\u8bbe\u7f6e": [[7, "id4"]], "\u5b50\u8d5b\u9053\u4e00 (\u9650\u5b9a\u8bad\u7ec3\u6570\u636e):": [[7, "id5"]], "\u5b50\u8d5b\u9053\u4e8c (\u5f00\u653e\u8bad\u7ec3\u6570\u636e):": [[7, "id6"]]}, "indexentries": {}}) \ No newline at end of file diff --git a/docs/m2met2_cn/_build/html/简介.html b/docs/m2met2_cn/_build/html/简介.html index f1da18e63..b53850647 100644 --- a/docs/m2met2_cn/_build/html/简介.html +++ b/docs/m2met2_cn/_build/html/简介.html @@ -130,7 +130,7 @@

竞赛介绍

语音识别(Automatic Speech Recognition)、说话人日志(Speaker Diarization)等语音处理技术的最新发展激发了众多智能语音的广泛应用。然而会议场景由于其复杂的声学条件和不同的讲话风格,包括重叠的讲话、不同数量的发言者、大会议室的远场信号以及环境噪声和混响,仍然属于一项极具挑战性的任务。

为了推动会议场景语音识别的发展,已经有很多相关的挑战赛,如 Rich Transcription evaluation 和 CHIME(Computational Hearing in Multisource Environments) 挑战赛。最新的CHIME挑战赛关注于远距离自动语音识别和开发能在各种不同拓扑结构的阵列和应用场景中通用的系统。然而不同语言之间的差异限制了非英语会议转录的进展。MISP(Multimodal Information Based Speech Processing)和M2MeT(Multi-Channel Multi-Party Meeting Transcription)挑战赛为推动普通话会议场景语音识别做出了贡献。MISP挑战赛侧重于用视听多模态的方法解决日常家庭环境中的远距离多麦克风信号处理问题,而M2MeT挑战则侧重于解决离线会议室中会议转录的语音重叠问题。

-

ASSP2022 M2MeT挑战的侧重点是会议场景,它包括两个赛道:说话人日记和多说话人自动语音识别。前者涉及识别“谁在什么时候说了话”,而后者旨在同时识别来自多个说话人的语音,语音重叠和各种噪声带来了巨大的技术困难。

+

IASSP2022 M2MeT挑战的侧重点是会议场景,它包括两个赛道:说话人日记和多说话人自动语音识别。前者涉及识别“谁在什么时候说了话”,而后者旨在同时识别来自多个说话人的语音,语音重叠和各种噪声带来了巨大的技术困难。

在上一届M2MET成功举办的基础上,我们将在ASRU2023上继续举办M2MET2.0挑战赛。在上一届M2MET挑战赛中,评估指标是说话人无关的,我们只能得到识别文本,而不能确定相应的说话人。 为了解决这一局限性并将现在的多说话人语音识别系统推向实用化,M2MET2.0挑战赛将在说话人相关的人物上评估,并且同时设立限定数据与不限定数据两个子赛道。通过将语音归属于特定的说话人,这项任务旨在提高多说话人ASR系统在真实世界环境中的准确性和适用性。 我们对数据集、规则、基线系统和评估方法进行了详细介绍,以进一步促进多说话人语音识别领域研究的发展。此外,我们将根据时间表发布一个全新的测试集,包括大约10小时的音频。

diff --git a/docs/m2met2_cn/简介.md b/docs/m2met2_cn/简介.md index 52df97d6b..f43dbabec 100644 --- a/docs/m2met2_cn/简介.md +++ b/docs/m2met2_cn/简介.md @@ -4,7 +4,7 @@ 为了推动会议场景语音识别的发展,已经有很多相关的挑战赛,如 Rich Transcription evaluation 和 CHIME(Computational Hearing in Multisource Environments) 挑战赛。最新的CHIME挑战赛关注于远距离自动语音识别和开发能在各种不同拓扑结构的阵列和应用场景中通用的系统。然而不同语言之间的差异限制了非英语会议转录的进展。MISP(Multimodal Information Based Speech Processing)和M2MeT(Multi-Channel Multi-Party Meeting Transcription)挑战赛为推动普通话会议场景语音识别做出了贡献。MISP挑战赛侧重于用视听多模态的方法解决日常家庭环境中的远距离多麦克风信号处理问题,而M2MeT挑战则侧重于解决离线会议室中会议转录的语音重叠问题。 -ASSP2022 M2MeT挑战的侧重点是会议场景,它包括两个赛道:说话人日记和多说话人自动语音识别。前者涉及识别“谁在什么时候说了话”,而后者旨在同时识别来自多个说话人的语音,语音重叠和各种噪声带来了巨大的技术困难。 +IASSP2022 M2MeT挑战的侧重点是会议场景,它包括两个赛道:说话人日记和多说话人自动语音识别。前者涉及识别“谁在什么时候说了话”,而后者旨在同时识别来自多个说话人的语音,语音重叠和各种噪声带来了巨大的技术困难。 在上一届M2MET成功举办的基础上,我们将在ASRU2023上继续举办M2MET2.0挑战赛。在上一届M2MET挑战赛中,评估指标是说话人无关的,我们只能得到识别文本,而不能确定相应的说话人。 为了解决这一局限性并将现在的多说话人语音识别系统推向实用化,M2MET2.0挑战赛将在说话人相关的人物上评估,并且同时设立限定数据与不限定数据两个子赛道。通过将语音归属于特定的说话人,这项任务旨在提高多说话人ASR系统在真实世界环境中的准确性和适用性。