commit 6bb4a090f40305d4ab476ebce65f2d7d6f7cfec2 Author: Cheng Mingwei Date: Tue Jul 29 19:43:25 2025 +0800 init commit diff --git a/ChID/OpenDataLab___ChID/README.md b/ChID/OpenDataLab___ChID/README.md new file mode 100644 index 0000000..d0096fa --- /dev/null +++ b/ChID/OpenDataLab___ChID/README.md @@ -0,0 +1,14 @@ + ## 简介 + ChID 是一个用于完形填空测试的大规模中文成语数据集。 ChID 包含 581K 段落和 729K 空白,涵盖多个领域。在 ChID 中,段落中的成语被替换为空白符号。对于每个空白,提供包括黄金成语在内的候选成语列表作为选择。 + ## 类定义 + null + ## 引文 + ``` +@article{zheng2019chid, + title={ChID: A large-scale Chinese IDiom dataset for cloze test}, + author={Zheng, Chujie and Huang, Minlie and Sun, Aixin}, + journal={arXiv preprint arXiv:1906.01265}, + year={2019} +} +``` + ‌​‌‌​​​​‌​​​‌‌‌‌‌​​‌‌​‌​‌​​‌​​​‌‌​‌‌‌​‌‌‌​​‌‌‌‌​‌​​​‌​‌‌‌​​‌‌‌‌​‌​‌‌​​‌‌‌​​‌‌‌‌​‌​​‌‌‌​‌ \ No newline at end of file diff --git a/ChID/OpenDataLab___ChID/metafile.yaml b/ChID/OpenDataLab___ChID/metafile.yaml new file mode 100644 index 0000000..6128fe4 --- /dev/null +++ b/ChID/OpenDataLab___ChID/metafile.yaml @@ -0,0 +1,20 @@ +displayName: ChID(Chinese IDiom dataset) +labelTypes: +- Chinese Corpus +license: +- Apache 2.0 +mediaTypes: +- Text +paperUrl: https://arxiv.org/pdf/1906.01265v3.pdf +publishDate: "2019-01-01" +publishUrl: https://github.com/chujiezheng/ChID-Dataset +publisher: +- Nanyang Technological University +- Tsinghua University +- Beijing National Research Center for Information Science and Technology +tags: [] +taskTypes: +- Machine Reading Comprehension +- Reading Comprehension +- Language Modelling +‌​‌‌​​​​‌​​​‌‌‌‌‌​​‌‌​‌​‌​​‌​​​‌‌​‌‌‌​‌‌‌​​‌‌‌‌​‌​​​‌​‌‌‌​​‌‌‌‌​‌​‌‌​​‌‌‌​​‌‌‌‌​‌​​‌‌‌​‌ \ No newline at end of file diff --git a/ChID/OpenDataLab___ChID/raw/ChID.tar.gz b/ChID/OpenDataLab___ChID/raw/ChID.tar.gz new file mode 100644 index 0000000..90a4dcc Binary files /dev/null and b/ChID/OpenDataLab___ChID/raw/ChID.tar.gz differ