WEKO3
アイテム
{"_buckets": {"deposit": "547a7e0c-c343-4a12-b589-5932d4b0ed63"}, "_deposit": {"created_by": 4, "id": "4462", "owners": [4], "pid": {"revision_id": 0, "type": "depid", "value": "4462"}, "status": "published"}, "_oai": {"id": "oai:naist.repo.nii.ac.jp:00004462", "sets": ["36"]}, "author_link": ["11282", "331", "11283", "11284", "11285"], "item_1698715929687": {"attribute_name": "会議情報", "attribute_value_mlt": [{"subitem_conference_country": "SGP", "subitem_conference_date": {"subitem_conference_date_language": "en", "subitem_conference_period": "December 14-17, 2010"}, "subitem_conference_names": [{"subitem_conference_name": "APSIPA Annual Summit and Conference 2010", "subitem_conference_name_language": "en"}], "subitem_conference_places": [{"subitem_conference_place": "Biopolis", "subitem_conference_place_language": "en"}]}]}, "item_9_biblio_info_7": {"attribute_name": "書誌情報", "attribute_value_mlt": [{"bibliographicIssueDates": {"bibliographicIssueDate": "2010-12", "bibliographicIssueDateType": "Issued"}, "bibliographicPageEnd": "489", "bibliographicPageStart": "486"}]}, "item_9_description_5": {"attribute_name": "抄録", "attribute_value_mlt": [{"subitem_description": "In this work, we address the classification in topics of utterances in Japanese received by a speech-oriented guidance system operating in a real environment. The implementation of this kind of systems requires the collection and manual labeling of actual user\u0027s utterances, which is a costly process. Because of this, we are interested in evaluating the influence of the amount of data for training in the context of topic classification. For this, we compared the performance of a Support Vector Machine and a Maximum Entropy classifier using training data of different sizes. We used actual data collected by the speech-oriented guidance system Takemaru-kun, from adults and children, and also evaluated the effect of automatic speech recognition (ASR) errors in the classification performance. To deal with the shortness of the utterances we proposed to use characters as features, which is possible with the Japanese language due to the presence of kanji; ideograms from Chinese characters that represent not only sound but meaning. Experimental results show an average performance decrease of 4.6% for ASR results of utterances from adults, and 2.8% for children, when reducing the amount of data for training to its 25%; and a classification performance improvement from 92.2% to 94.1% for adults and 87.2% to 88.3% for children, when using character as features instead of words.", "subitem_description_language": "en", "subitem_description_type": "Abstract"}]}, "item_9_rights_14": {"attribute_name": "権利", "attribute_value_mlt": [{"subitem_rights": "Copyright 2010 APSIPA", "subitem_rights_language": "en"}]}, "item_9_text_21": {"attribute_name": "NAIST ID", "attribute_value_mlt": [{"subitem_text_value": "73292492"}]}, "item_9_version_type_16": {"attribute_name": "著者版フラグ", "attribute_value_mlt": [{"subitem_version_resource": "http://purl.org/coar/version/c_970fb48d4fbd8a85", "subitem_version_type": "VoR"}]}, "item_access_right": {"attribute_name": "アクセス権", "attribute_value_mlt": [{"subitem_access_right": "open access", "subitem_access_right_uri": "http://purl.org/coar/access_right/c_abf2"}]}, "item_creator": {"attribute_name": "著者", "attribute_type": "creator", "attribute_value_mlt": [{"creatorNames": [{"creatorName": "Torres, Rafael", "creatorNameLang": "en"}], "nameIdentifiers": [{"nameIdentifier": "11282", "nameIdentifierScheme": "WEKO"}]}, {"creatorNames": [{"creatorName": "Kawanami, Hiromichi", "creatorNameLang": "en"}], "nameIdentifiers": [{"nameIdentifier": "331", "nameIdentifierScheme": "WEKO"}, {"nameIdentifier": "80335489", "nameIdentifierScheme": "e-Rad", "nameIdentifierURI": "https://kaken.nii.ac.jp/ja/search/?qm=80335489"}]}, {"creatorNames": [{"creatorName": "Matsui, Tomoko", "creatorNameLang": "en"}], "nameIdentifiers": [{"nameIdentifier": "11283", "nameIdentifierScheme": "WEKO"}]}, {"creatorNames": [{"creatorName": "Saruwatari, Hiroshi", "creatorNameLang": "en"}], "nameIdentifiers": [{"nameIdentifier": "11284", "nameIdentifierScheme": "WEKO"}]}, {"creatorNames": [{"creatorName": "Shikano, Kiyohiro", "creatorNameLang": "en"}], "nameIdentifiers": [{"nameIdentifier": "11285", "nameIdentifierScheme": "WEKO"}]}]}, "item_files": {"attribute_name": "ファイル情報", "attribute_type": "file", "attribute_value_mlt": [{"accessrole": "open_date", "date": [{"dateType": "Available", "dateValue": "2023-03-02"}], "displaytype": "detail", "download_preview_message": "", "file_order": 0, "filename": "APSIPA_2010_486.pdf", "filesize": [{"value": "1.5 MB"}], "format": "application/pdf", "future_date_message": "", "is_thumbnail": false, "licensetype": "license_note", "mimetype": "application/pdf", "size": 1500000.0, "url": {"label": "fulltext", "objectType": "fulltext", "url": "https://naist.repo.nii.ac.jp/record/4462/files/APSIPA_2010_486.pdf"}, "version_id": "c7480431-9c5a-4a97-89f6-c030f5bc99e9"}]}, "item_language": {"attribute_name": "言語", "attribute_value_mlt": [{"subitem_language": "eng"}]}, "item_resource_type": {"attribute_name": "資源タイプ", "attribute_value_mlt": [{"resourcetype": "conference paper", "resourceuri": "http://purl.org/coar/resource_type/c_5794"}]}, "item_title": "Training Data Size Requirements for Topic Classification in a Speech-Oriented Guidance System", "item_titles": {"attribute_name": "タイトル", "attribute_value_mlt": [{"subitem_title": "Training Data Size Requirements for Topic Classification in a Speech-Oriented Guidance System", "subitem_title_language": "en"}]}, "item_type_id": "9", "owner": "4", "path": ["36"], "permalink_uri": "http://hdl.handle.net/10061/7894", "pubdate": {"attribute_name": "PubDate", "attribute_value": "2012-08-22"}, "publish_date": "2012-08-22", "publish_status": "0", "recid": "4462", "relation": {}, "relation_version_is_last": true, "title": ["Training Data Size Requirements for Topic Classification in a Speech-Oriented Guidance System"], "weko_shared_id": -1}
Training Data Size Requirements for Topic Classification in a Speech-Oriented Guidance System
http://hdl.handle.net/10061/7894
http://hdl.handle.net/10061/789402cb8457-57ac-4a82-964a-05426c437371
名前 / ファイル | ライセンス | アクション |
---|---|---|
fulltext (1.5 MB)
|
|
Item type | 会議発表論文 / Conference Paper(1) | |||||
---|---|---|---|---|---|---|
公開日 | 2012-08-22 | |||||
タイトル | ||||||
タイトル | Training Data Size Requirements for Topic Classification in a Speech-Oriented Guidance System | |||||
言語 | ||||||
言語 | eng | |||||
資源タイプ | ||||||
資源タイプ | conference paper | |||||
アクセス権 | ||||||
アクセス権 | open access | |||||
著者 |
Torres, Rafael
× Torres, Rafael× Kawanami, Hiromichi× Matsui, Tomoko× Saruwatari, Hiroshi× Shikano, Kiyohiro |
|||||
抄録 | ||||||
内容記述タイプ | Abstract | |||||
内容記述 | In this work, we address the classification in topics of utterances in Japanese received by a speech-oriented guidance system operating in a real environment. The implementation of this kind of systems requires the collection and manual labeling of actual user's utterances, which is a costly process. Because of this, we are interested in evaluating the influence of the amount of data for training in the context of topic classification. For this, we compared the performance of a Support Vector Machine and a Maximum Entropy classifier using training data of different sizes. We used actual data collected by the speech-oriented guidance system Takemaru-kun, from adults and children, and also evaluated the effect of automatic speech recognition (ASR) errors in the classification performance. To deal with the shortness of the utterances we proposed to use characters as features, which is possible with the Japanese language due to the presence of kanji; ideograms from Chinese characters that represent not only sound but meaning. Experimental results show an average performance decrease of 4.6% for ASR results of utterances from adults, and 2.8% for children, when reducing the amount of data for training to its 25%; and a classification performance improvement from 92.2% to 94.1% for adults and 87.2% to 88.3% for children, when using character as features instead of words. | |||||
書誌情報 |
p. 486-489, 発行日 2010-12 |
|||||
会議情報 | ||||||
会議名 | APSIPA Annual Summit and Conference 2010 | |||||
開催期間 | December 14-17, 2010 | |||||
開催地 | Biopolis | |||||
開催国 | SGP | |||||
権利 | ||||||
権利情報 | Copyright 2010 APSIPA | |||||
著者版フラグ | ||||||
出版タイプ | VoR |