
{"id":2586,"date":"2023-12-17T19:32:00","date_gmt":"2023-12-17T11:32:00","guid":{"rendered":"https:\/\/infernews.com\/?p=2586"},"modified":"2024-05-17T19:36:09","modified_gmt":"2024-05-17T11:36:09","slug":"%e5%a4%a7%e8%a6%8f%e6%a8%a1%e4%b8%ad%e6%96%87%e8%87%aa%e7%84%b6%e8%aa%9e%e8%a8%80%e8%99%95%e7%90%86-nlp_chinese_corpus","status":"publish","type":"post","link":"https:\/\/infernews.com\/blog\/%e5%a4%a7%e8%a6%8f%e6%a8%a1%e4%b8%ad%e6%96%87%e8%87%aa%e7%84%b6%e8%aa%9e%e8%a8%80%e8%99%95%e7%90%86-nlp_chinese_corpus\/","title":{"rendered":"\u5927\u898f\u6a21\u4e2d\u6587\u81ea\u7136\u8a9e\u8a00\u8655\u7406 nlp_chinese_corpus"},"content":{"rendered":"<div class=\"vlp-link-container vlp-layout-spotlight-clone wp-block-visual-link-preview-link\"><a href=\"https:\/\/github.com\/brightmart\/nlp_chinese_corpus\" class=\"vlp-link\" title=\"\" rel=\"nofollow\" target=\"_blank\"><\/a><div class=\"vlp-layout-zone-main\"><div class=\"vlp-block-1 vlp-link-summary\">\u5927\u89c4\u6a21\u4e2d\u6587\u81ea\u7136\u8bed\u8a00\u5904\u7406\u8bed\u6599 Large Scale Chinese Corpus for NLP. Contribute to brightmart\/nlp_chinese_corpus development by creating an account on GitHub.<\/div><div class=\"vlp-block-2 vlp-link-image\"><img decoding=\"async\" src=\"https:\/\/opengraph.githubassets.com\/0647eb9e53a4bf342e63699c36e92d63a07c661d181353c62957556815af1c19\/brightmart\/nlp_chinese_corpus\" style=\"max-width: 1024px; max-height: 1024px\" \/><\/div><\/div><\/div>\n\n\n<p>\u8a9e\u6599\u5eab\u5c07\u6703\u4e0d\u65b7\u64f4\u5145\u3002\u3002\u3002<\/p>\n\n\n\n<p>\u4e00\u671f\u76ee\u6a19\uff1a10\u500b\u767e\u842c\u7d1a\u4e2d\u6587\u8a9e\u6599 &amp; 3\u500b\u5343\u842c\u7d1a\u4e2d\u6587\u8a9e\u6599(2019\u5e745\u67081\u865f)<\/p>\n\n\n\n<p>\u4e8c\u671f\u76ee\u6a19\uff1a30\u500b\u767e\u842c\u7d1a\u4e2d\u6587\u8a9e\u6599 &amp; 10\u500b\u5343\u842c\u7d1a\u4e2d\u6587\u8a9e\u6599 &amp; 1\u500b\u5104\u7d1a\u4e2d\u6587\u8a9e\u6599\uff082019\u5e7412\u670831\u65e5\uff09<\/p>\n\n\n\n<p>Update\uff1a \u589e\u52a0\u9ad8\u8cea\u91cf\u793e\u5340\u554f\u7b54json\u7248(webtext2019zh)\uff0c\u53ef\u7528\u65bc\u8a13\u7df4\u8d85\u5927\u898f\u6a21NLP\u6a21\u578b\uff1b\u6dfb\u52a0520\u842c\u7ffb\u8b6f\u8a9e\u6599(translation2019zh)\u3002<\/p>\n\n\n\n<p>1.<strong>\u7dad\u57fa\u767e\u79d1<\/strong>(wiki2019zh)<strong>\uff0c<\/strong>100<strong>\u842c\u500b\u7d50\u69cb\u826f\u597d\u7684\u4e2d\u6587\u8a5e\u689d<\/strong><a href=\"https:\/\/github.com\/brightmart\/nlp_chinese_corpus#1%E7%BB%B4%E5%9F%BA%E7%99%BE%E7%A7%91wiki2019zh100%E4%B8%87%E4%B8%AA%E7%BB%93%E6%9E%84%E8%89%AF%E5%A5%BD%E7%9A%84%E4%B8%AD%E6%96%87%E8%AF%8D%E6%9D%A1\"><\/a><\/p>\n\n\n\n<p>2.<strong>\u65b0\u805e\u8a9e\u6599<\/strong>(news2016zh)<strong>\uff0c<\/strong>250<strong>\u842c\u7bc7\u65b0\u805e\uff0c\u542b\u95dc\u9375\u8a5e\u3001\u63cf\u8ff0<\/strong><a href=\"https:\/\/github.com\/brightmart\/nlp_chinese_corpus#2%E6%96%B0%E9%97%BB%E8%AF%AD%E6%96%99news2016zh250%E4%B8%87%E7%AF%87%E6%96%B0%E9%97%BB%E5%90%AB%E5%85%B3%E9%94%AE%E8%AF%8D%E6%8F%8F%E8%BF%B0\"><\/a><\/p>\n\n\n\n<p>3.<strong>\u767e\u79d1\u554f\u7b54<\/strong>(baike2018qa)<strong>\uff0c<\/strong>150<strong>\u842c\u500b\u5e36\u554f\u984c\u985e\u578b\u7684\u554f\u7b54<\/strong><a href=\"https:\/\/github.com\/brightmart\/nlp_chinese_corpus#3%E7%99%BE%E7%A7%91%E9%97%AE%E7%AD%94baike2018qa150%E4%B8%87%E4%B8%AA%E5%B8%A6%E9%97%AE%E9%A2%98%E7%B1%BB%E5%9E%8B%E7%9A%84%E9%97%AE%E7%AD%94\"><\/a><\/p>\n\n\n\n<p>4.<strong>\u793e\u5340\u554f\u7b54<\/strong>json<strong>\u7248<\/strong>(webtext2019zh)<strong>\uff0c<\/strong>410<strong>\u842c\u500b\u9ad8\u8cea\u91cf\u793e\u5340\u554f\u7b54\uff0c\u9069\u5408\u8a13\u7df4\u8d85\u5927\u6a21\u578b<\/strong><a href=\"https:\/\/github.com\/brightmart\/nlp_chinese_corpus#4%E7%A4%BE%E5%8C%BA%E9%97%AE%E7%AD%94json%E7%89%88webtext2019zh410%E4%B8%87%E4%B8%AA%E9%AB%98%E8%B4%A8%E9%87%8F%E7%A4%BE%E5%8C%BA%E9%97%AE%E7%AD%94%E9%80%82%E5%90%88%E8%AE%AD%E7%BB%83%E8%B6%85%E5%A4%A7%E6%A8%A1%E5%9E%8B\"><\/a><\/p>\n\n\n\n<p>5.<strong>\u7ffb\u8b6f\u8a9e\u6599<\/strong>(translation2019zh)<strong>\uff0c<\/strong>520<strong>\u842c\u500b\u4e2d\u82f1\u6587\u53e5\u5b50\u5c0d<\/strong><a href=\"https:\/\/github.com\/brightmart\/nlp_chinese_corpus#5%E7%BF%BB%E8%AF%91%E8%AF%AD%E6%96%99translation2019zh520%E4%B8%87%E4%B8%AA%E4%B8%AD%E8%8B%B1%E6%96%87%E5%8F%A5%E5%AD%90%E5%AF%B9\"><\/a><\/p>\n\n\n\n<p><strong>\u70ba\u4ec0\u9ebc\u9700\u8981\u9019\u500b\u9805\u76ee<\/strong><a href=\"https:\/\/github.com\/brightmart\/nlp_chinese_corpus#%E4%B8%BA%E4%BB%80%E4%B9%88%E9%9C%80%E8%A6%81%E8%BF%99%E4%B8%AA%E9%A1%B9%E7%9B%AE\"><\/a><\/p>\n\n\n\n<p>\u4e2d\u6587\u7684\u4fe1\u606f\u7121\u8655\u4e0d\u5728\uff0c\u4f46\u5982\u679c\u60f3\u8981\u7372\u5f97\u5927\u91cf\u7684\u4e2d\u6587\u8a9e\u6599\uff0c\u537b\u662f\u4e0d\u592a\u5bb9\u6613\uff0c\u6709\u6642\u751a\u81f3\u975e\u5e38\u56f0\u96e3\u3002\u57282019\u5e74\u521d\u9019\u500b\u6642\u9ede\u4e0a\uff0c<\/p>\n\n\n\n<p>\u666e\u901a\u7684\u5f9e\u696d\u8005\u3001\u7814\u7a76\u4eba\u54e1\u6216\u5b78\u751f\uff0c\u4e26\u6c92\u6709\u4e00\u500b\u6bd4\u8f03\u597d\u7684\u6e20\u9053\u7372\u5f97\u6975\u5927\u91cf\u7684\u4e2d\u6587\u8a9e\u6599\u3002\u7b46\u8005\u60f3\u8981\u8a13\u7df4\u4e00\u500b\u4e2d\u6587\u7684\u8a5e\u5411\u91cf\uff0c<\/p>\n\n\n\n<p>\u5728\u767e\u5ea6\u548cgithub\u4e0a\u4e0a\u641c\u7d22\u4e86\u597d\u4e45\uff0c\u6536\u7a6b\u537b\u5f88\u5c11\uff1a\u8981\u9ebc\u8a9e\u6599\u7684\u91cf\u7d1a\u592a\u5c0f\uff0c\u8981\u9ebc\u6578\u64da\u904e\u65bc\u6210\u820a\uff0c\u6216\u9700\u8981\u7684\u8655\u7406\u592a\u8907\u96dc\u3002<\/p>\n\n\n\n<p>\u4e0d\u77e5\u9053\u4f60\u662f\u5426\u4e5f\u9047\u5230\u4e86\u9019\u6a23\u7684\u554f\u984c\uff1f<\/p>\n\n\n\n<p>\u6211\u5011\u9019\u500b\u9805\u76ee\uff0c\u5c31\u662f\u70ba\u77ad\u89e3\u6c7a\u9019\u4e00\u554f\u984c\u8ca2\u737b\u5fae\u8584\u4e4b\u529b\u3002<\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u8a9e\u6599\u5eab\u5c07\u6703\u4e0d\u65b7\u64f4\u5145\u3002\u3002\u3002 \u4e00\u671f\u76ee\u6a19\uff1a10\u500b\u767e\u842c\u7d1a\u4e2d\u6587\u8a9e\u6599 &amp; 3\u500b\u5343\u842c\u7d1a\u4e2d\u6587\u8a9e\u6599(2019\u5e745\u67081\u865f) \u4e8c\u671f\u76ee\u6a19\uff1a30\u500b\u767e\u842c\u7d1a\u4e2d\u6587\u8a9e\u6599 &amp; 10\u500b\u5343\u842c\u7d1a\u4e2d\u6587\u8a9e\u6599 &amp; 1\u500b\u5104\u7d1a\u4e2d\u6587\u8a9e\u6599\uff082019\u5e7412\u670831\u65e5\uff09 Update\uff1a \u589e\u52a0\u9ad8\u8cea\u91cf\u793e\u5340\u554f\u7b54json\u7248(webtext2019zh)\uff0c\u53ef\u7528\u65bc\u8a13\u7df4\u8d85\u5927\u898f\u6a21NLP\u6a21\u578b\uff1b\u6dfb\u52a0520\u842c\u7ffb\u8b6f\u8a9e\u6599(translation2019zh)\u3002 1.\u7dad\u57fa\u767e\u79d1(wiki2019zh)\uff0c100\u842c\u500b\u7d50\u69cb\u826f\u597d\u7684\u4e2d\u6587\u8a5e\u689d 2.\u65b0\u805e\u8a9e\u6599(news2016zh)\uff0c250\u842c\u7bc7\u65b0\u805e\uff0c\u542b\u95dc\u9375\u8a5e\u3001\u63cf\u8ff0 3.\u767e\u79d1\u554f\u7b54(baike2018qa)\uff0c150\u842c\u500b\u5e36\u554f\u984c\u985e\u578b\u7684\u554f\u7b54 4.\u793e\u5340\u554f\u7b54json\u7248(webtext2019zh)\uff0c410\u842c\u500b\u9ad8\u8cea\u91cf\u793e\u5340\u554f\u7b54\uff0c\u9069\u5408\u8a13\u7df4\u8d85\u5927\u6a21\u578b 5.\u7ffb\u8b6f\u8a9e\u6599(translation2019zh)\uff0c520\u842c\u500b\u4e2d\u82f1\u6587\u53e5\u5b50\u5c0d \u70ba\u4ec0\u9ebc\u9700\u8981\u9019\u500b\u9805\u76ee \u4e2d\u6587\u7684\u4fe1\u606f\u7121\u8655\u4e0d\u5728\uff0c\u4f46\u5982\u679c\u60f3\u8981\u7372\u5f97\u5927\u91cf\u7684\u4e2d\u6587\u8a9e\u6599\uff0c\u537b\u662f\u4e0d\u592a\u5bb9\u6613\uff0c\u6709\u6642\u751a\u81f3\u975e\u5e38\u56f0\u96e3\u3002\u57282019\u5e74\u521d\u9019\u500b\u6642\u9ede\u4e0a\uff0c \u666e\u901a\u7684\u5f9e\u696d\u8005\u3001\u7814\u7a76\u4eba\u54e1\u6216\u5b78\u751f\uff0c\u4e26\u6c92\u6709\u4e00\u500b\u6bd4\u8f03\u597d\u7684\u6e20\u9053\u7372\u5f97\u6975\u5927\u91cf\u7684\u4e2d\u6587\u8a9e\u6599\u3002\u7b46\u8005\u60f3\u8981\u8a13\u7df4\u4e00\u500b\u4e2d\u6587\u7684\u8a5e\u5411\u91cf\uff0c \u5728\u767e\u5ea6\u548cgithub\u4e0a\u4e0a\u641c\u7d22\u4e86\u597d\u4e45\uff0c\u6536\u7a6b\u537b\u5f88\u5c11\uff1a\u8981\u9ebc\u8a9e\u6599\u7684\u91cf\u7d1a\u592a\u5c0f\uff0c\u8981\u9ebc\u6578\u64da\u904e\u65bc\u6210\u820a\uff0c\u6216\u9700\u8981\u7684\u8655\u7406\u592a\u8907\u96dc\u3002 \u4e0d\u77e5\u9053\u4f60\u662f\u5426\u4e5f\u9047\u5230\u4e86\u9019\u6a23\u7684\u554f\u984c\uff1f \u6211\u5011\u9019\u500b\u9805\u76ee\uff0c\u5c31\u662f\u70ba\u77ad\u89e3\u6c7a\u9019\u4e00\u554f\u984c\u8ca2\u737b\u5fae\u8584\u4e4b\u529b\u3002<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"googlesitekit_rrm_CAowvqSiDA:productID":"","footnotes":""},"categories":[27],"tags":[44,59],"class_list":["post-2586","post","type-post","status-publish","format-standard","hentry","category-paper","tag-github","tag-llm"],"_links":{"self":[{"href":"https:\/\/infernews.com\/blog\/wp-json\/wp\/v2\/posts\/2586","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/infernews.com\/blog\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/infernews.com\/blog\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/infernews.com\/blog\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/infernews.com\/blog\/wp-json\/wp\/v2\/comments?post=2586"}],"version-history":[{"count":0,"href":"https:\/\/infernews.com\/blog\/wp-json\/wp\/v2\/posts\/2586\/revisions"}],"wp:attachment":[{"href":"https:\/\/infernews.com\/blog\/wp-json\/wp\/v2\/media?parent=2586"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/infernews.com\/blog\/wp-json\/wp\/v2\/categories?post=2586"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/infernews.com\/blog\/wp-json\/wp\/v2\/tags?post=2586"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}