WEKO3
アイテム
{"_buckets": {"deposit": "35d28d4d-ff44-482b-ab44-48bbd172ce3d"}, "_deposit": {"created_by": 4, "id": "4709", "owners": [4], "pid": {"revision_id": 0, "type": "depid", "value": "4709"}, "status": "published"}, "_oai": {"id": "oai:naist.repo.nii.ac.jp:00004709", "sets": ["36"]}, "author_link": ["12330", "341", "12331", "12332"], "item_1698715929687": {"attribute_name": "会議情報", "attribute_value_mlt": [{"subitem_conference_country": "USA", "subitem_conference_date": {"subitem_conference_date_language": "en", "subitem_conference_period": "September 17-21, 2006"}, "subitem_conference_names": [{"subitem_conference_name": "INTERSPEECH2006: the 9th International Conference on Spoken Language Processing (ICSLP)", "subitem_conference_name_language": "en"}], "subitem_conference_places": [{"subitem_conference_place": "Pittsburgh Pennsylvania", "subitem_conference_place_language": "en"}]}]}, "item_9_biblio_info_7": {"attribute_name": "書誌情報", "attribute_value_mlt": [{"bibliographicIssueDates": {"bibliographicIssueDate": "2006-09", "bibliographicIssueDateType": "Issued"}, "bibliographicPageEnd": "1725", "bibliographicPageStart": "1722"}]}, "item_9_description_5": {"attribute_name": "抄録", "attribute_value_mlt": [{"subitem_description": "The construction of high-performance acoustic models for certain speech recognition tasks is very costly and time-consuming, since it most often requires the collection and transcription of large amounts of task-specific speech data. In this paper acoustic modeling for spoken dialogue systems based on unsupervised selective training is examined. The main idea is to select those training utterances from an (untranscribed) speech data pool, so that the likelihood of a separate small (transcribed) development speech data set is maximized. If only the selected data are employed to retrain the initial acoustic models, a better performance is achieved than when retraining with all collected data. Using the proposed approach it is also possible to considerably reduce the costs for human-labeling of the speech data without compromising the performance. Furthermore, the method provides means for automatic task-adaptation of acoustic models, e.g. to adult or children speech. This is important, since detailed information about each automatically collected utterance is usually not available.", "subitem_description_language": "en", "subitem_description_type": "Abstract"}]}, "item_9_rights_14": {"attribute_name": "権利", "attribute_value_mlt": [{"subitem_rights": "Copyright 2006 ISCA", "subitem_rights_language": "en"}]}, "item_9_text_21": {"attribute_name": "NAIST ID", "attribute_value_mlt": [{"subitem_text_value": "73292716"}]}, "item_9_text_22": {"attribute_name": "artnum", "attribute_value_mlt": [{"subitem_text_value": "1481"}]}, "item_9_version_type_16": {"attribute_name": "著者版フラグ", "attribute_value_mlt": [{"subitem_version_resource": "http://purl.org/coar/version/c_970fb48d4fbd8a85", "subitem_version_type": "VoR"}]}, "item_access_right": {"attribute_name": "アクセス権", "attribute_value_mlt": [{"subitem_access_right": "open access", "subitem_access_right_uri": "http://purl.org/coar/access_right/c_abf2"}]}, "item_creator": {"attribute_name": "著者", "attribute_type": "creator", "attribute_value_mlt": [{"creatorNames": [{"creatorName": "Cincarek, Tobias", "creatorNameLang": "en"}], "nameIdentifiers": [{"nameIdentifier": "12330", "nameIdentifierScheme": "WEKO"}]}, {"creatorNames": [{"creatorName": "Toda, Tomoki", "creatorNameLang": "en"}], "nameIdentifiers": [{"nameIdentifier": "341", "nameIdentifierScheme": "WEKO"}, {"nameIdentifier": "90403328", "nameIdentifierScheme": "e-Rad", "nameIdentifierURI": "https://kaken.nii.ac.jp/ja/search/?qm=90403328"}]}, {"creatorNames": [{"creatorName": "Saruwatari, Hiroshi", "creatorNameLang": "en"}], "nameIdentifiers": [{"nameIdentifier": "12331", "nameIdentifierScheme": "WEKO"}]}, {"creatorNames": [{"creatorName": "Shikano, Kiyohiro", "creatorNameLang": "en"}], "nameIdentifiers": [{"nameIdentifier": "12332", "nameIdentifierScheme": "WEKO"}]}]}, "item_files": {"attribute_name": "ファイル情報", "attribute_type": "file", "attribute_value_mlt": [{"accessrole": "open_date", "date": [{"dateType": "Available", "dateValue": "2023-03-02"}], "displaytype": "detail", "download_preview_message": "", "file_order": 0, "filename": "INTERSPEECH_2006_1722.pdf", "filesize": [{"value": "661.8 kB"}], "format": "application/pdf", "future_date_message": "", "is_thumbnail": false, "licensetype": "license_note", "mimetype": "application/pdf", "size": 661800.0, "url": {"label": "fulltext", "objectType": "fulltext", "url": "https://naist.repo.nii.ac.jp/record/4709/files/INTERSPEECH_2006_1722.pdf"}, "version_id": "d26a8b63-5452-440a-a3f9-e17b700f63a0"}]}, "item_keyword": {"attribute_name": "キーワード", "attribute_value_mlt": [{"subitem_subject": "speech revognition", "subitem_subject_language": "en", "subitem_subject_scheme": "Other"}, {"subitem_subject": "acoustic model", "subitem_subject_language": "en", "subitem_subject_scheme": "Other"}, {"subitem_subject": "unsupervised training", "subitem_subject_language": "en", "subitem_subject_scheme": "Other"}, {"subitem_subject": "utterance-based selection", "subitem_subject_language": "en", "subitem_subject_scheme": "Other"}, {"subitem_subject": "spoken dialogue system", "subitem_subject_language": "en", "subitem_subject_scheme": "Other"}]}, "item_language": {"attribute_name": "言語", "attribute_value_mlt": [{"subitem_language": "eng"}]}, "item_resource_type": {"attribute_name": "資源タイプ", "attribute_value_mlt": [{"resourcetype": "conference paper", "resourceuri": "http://purl.org/coar/resource_type/c_5794"}]}, "item_title": "Acoustic Modeling for Spoken Dialogue Systems Based on Unsupervised Utterance-based Selective Training", "item_titles": {"attribute_name": "タイトル", "attribute_value_mlt": [{"subitem_title": "Acoustic Modeling for Spoken Dialogue Systems Based on Unsupervised Utterance-based Selective Training", "subitem_title_language": "en"}]}, "item_type_id": "9", "owner": "4", "path": ["36"], "permalink_uri": "http://hdl.handle.net/10061/8141", "pubdate": {"attribute_name": "PubDate", "attribute_value": "2012-08-22"}, "publish_date": "2012-08-22", "publish_status": "0", "recid": "4709", "relation": {}, "relation_version_is_last": true, "title": ["Acoustic Modeling for Spoken Dialogue Systems Based on Unsupervised Utterance-based Selective Training"], "weko_shared_id": -1}
Acoustic Modeling for Spoken Dialogue Systems Based on Unsupervised Utterance-based Selective Training
http://hdl.handle.net/10061/8141
http://hdl.handle.net/10061/814141e3bdb6-77f2-4d1f-ad07-541506b7d211
名前 / ファイル | ライセンス | アクション |
---|---|---|
fulltext (661.8 kB)
|
|
Item type | 会議発表論文 / Conference Paper(1) | |||||
---|---|---|---|---|---|---|
公開日 | 2012-08-22 | |||||
タイトル | ||||||
タイトル | Acoustic Modeling for Spoken Dialogue Systems Based on Unsupervised Utterance-based Selective Training | |||||
言語 | ||||||
言語 | eng | |||||
キーワード | ||||||
主題Scheme | Other | |||||
主題 | speech revognition | |||||
キーワード | ||||||
主題Scheme | Other | |||||
主題 | acoustic model | |||||
キーワード | ||||||
主題Scheme | Other | |||||
主題 | unsupervised training | |||||
キーワード | ||||||
主題Scheme | Other | |||||
主題 | utterance-based selection | |||||
キーワード | ||||||
主題Scheme | Other | |||||
主題 | spoken dialogue system | |||||
資源タイプ | ||||||
資源タイプ | conference paper | |||||
アクセス権 | ||||||
アクセス権 | open access | |||||
著者 |
Cincarek, Tobias
× Cincarek, Tobias× Toda, Tomoki× Saruwatari, Hiroshi× Shikano, Kiyohiro |
|||||
抄録 | ||||||
内容記述タイプ | Abstract | |||||
内容記述 | The construction of high-performance acoustic models for certain speech recognition tasks is very costly and time-consuming, since it most often requires the collection and transcription of large amounts of task-specific speech data. In this paper acoustic modeling for spoken dialogue systems based on unsupervised selective training is examined. The main idea is to select those training utterances from an (untranscribed) speech data pool, so that the likelihood of a separate small (transcribed) development speech data set is maximized. If only the selected data are employed to retrain the initial acoustic models, a better performance is achieved than when retraining with all collected data. Using the proposed approach it is also possible to considerably reduce the costs for human-labeling of the speech data without compromising the performance. Furthermore, the method provides means for automatic task-adaptation of acoustic models, e.g. to adult or children speech. This is important, since detailed information about each automatically collected utterance is usually not available. | |||||
書誌情報 |
p. 1722-1725, 発行日 2006-09 |
|||||
artnum | ||||||
1481 | ||||||
会議情報 | ||||||
会議名 | INTERSPEECH2006: the 9th International Conference on Spoken Language Processing (ICSLP) | |||||
開催期間 | September 17-21, 2006 | |||||
開催地 | Pittsburgh Pennsylvania | |||||
開催国 | USA | |||||
権利 | ||||||
権利情報 | Copyright 2006 ISCA | |||||
著者版フラグ | ||||||
出版タイプ | VoR |