init commit

2025-07-29 19:02:48 +08:00 · 2025-07-29 19:02:48 +08:00 · 1792ad18e7
commit 1792ad18e7
16 changed files with 13957 additions and 0 deletions
--- a/V3Det/V3Det___V3Det/README.md
+++ b/V3Det/V3Det___V3Det/README.md
@ -0,0 +1,24 @@
+<img src="https://raw.githubusercontent.com/V3Det/v3det_resource/main/resource/cover.png" alt="Cover Image" style="width: 820px;">
+
+  ## Introduction
+  V3Det is a Vast Vocabulary Visual Detection Dataset with accurately annotated more than 13,000 object categories, empowering more comprehensive research in object detection.  
+  1)   Vast Vocabulary：V3Det contains bounding boxes of objects from more than 13,000 categories on real-world images.
+  2)   Hierarchical Category Organization：V3Det is organized by a hierarchical category tree which annotates the inclusion relationship among categories.
+  3)   Rich Annotations：V3Det comprises precisely annotated objects in 245k images and professional descriptions of each category written by human experts and chatgpt.
+
+  ###  Data
+![](https://github.com/ztayty/ztayty.github.io/blob/main/image/%E6%95%B0%E6%8D%AE%EF%BC%88%E8%BF%90%E8%90%A5%E6%89%8B%E5%8A%A8%E4%B8%8A%E6%9E%B6%E5%88%B0%E7%B1%BB%E5%AE%9A%E4%B9%89%EF%BC%89.jpg?raw=true)
+  
+  ## Citation
+  Please cite the following paper when using V3Det
+  ```
+  @misc{wang2023v3det,  
+      title={V3Det: Vast Vocabulary Visual Detection Dataset},   
+      author={Jiaqi Wang and Pan Zhang and Tao Chu and Yuhang Cao and Yujie Zhou and Tong Wu and Bin Wang and Conghui He and Dahua Lin},  
+      year={2023},  
+      eprint={2304.03752},  
+      archivePrefix={arXiv},  
+      primaryClass={cs.CV}  
+}
+```
+  ‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌
--- a/V3Det/V3Det___V3Det/metafile.yaml
+++ b/V3Det/V3Det___V3Det/metafile.yaml
@ -0,0 +1,16 @@
+displayName: V3Det
+taskTypes:
+  - Object Detection
+labelTypes:
+  - Box2d
+mediaTypes:
+  - Image
+license:
+  - CC BY 4.0
+publisher:
+  - Shanghai Artificial Intelligence Laboratory
+tags: []
+publishDate: '2023-06-30'
+publishUrl: https://v3det.openxlab.org.cn/
+paperUrl: https://arxiv.org/pdf/2304.03752.pdf
+‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌
--- a/V3Det/V3Det___V3Det/raw/category_name_13204_v3det_2023_v1.txt
+++ b/V3Det/V3Det___V3Det/raw/category_name_13204_v3det_2023_v1.txt
--- a/V3Det/V3Det___V3Det/raw/v3det_2023_v1_category_tree.json
+++ b/V3Det/V3Det___V3Det/raw/v3det_2023_v1_category_tree.json
--- a/V3Det/V3Det___V3Det/raw/v3det_2023_v1_test_image_info.json
+++ b/V3Det/V3Det___V3Det/raw/v3det_2023_v1_test_image_info.json
--- a/V3Det/V3Det___V3Det/raw/v3det_2023_v1_train.json
+++ b/V3Det/V3Det___V3Det/raw/v3det_2023_v1_train.json
--- a/V3Det/V3Det___V3Det/raw/v3det_2023_v1_train_ovd_base.json
+++ b/V3Det/V3Det___V3Det/raw/v3det_2023_v1_train_ovd_base.json
--- a/V3Det/V3Det___V3Det/raw/v3det_2023_v1_val.json
+++ b/V3Det/V3Det___V3Det/raw/v3det_2023_v1_val.json
--- a/V3Det/V3Det___V3Det/raw/v3det_exemplar_image_download.py
+++ b/V3Det/V3Det___V3Det/raw/v3det_exemplar_image_download.py
@ -0,0 +1,133 @@
+import io
+import argparse
+import concurrent.futures
+import json
+import os
+import time
+import urllib.error
+import urllib.request
+import requests
+
+from tqdm import tqdm
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--output_folder", type=str, default="V3Det")
+parser.add_argument("--max_retries", type=int, default=3)
+parser.add_argument("--max_workers", type=int, default=16)
+args = parser.parse_args()
+headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36'}
+
+
+def cache(response):
+    f = io.BytesIO()
+    block_sz = 8192
+    while True:
+        buffer = response.read(block_sz)
+        if not buffer:
+            break
+        f.write(buffer)
+    return f
+
+def download_image(url, path, timeout):
+    result = {
+        "status": "",
+        "url": url,
+        "path": path,
+    }
+    cnt = 0
+    while True:
+        try:
+            if requests.get(url).status_code >= 400:
+                result["status"] = "expired"
+                return result
+
+            response = urllib.request.urlopen(urllib.request.Request(url=url, headers=headers), timeout=timeout)
+            image_path = os.path.join(args.output_folder, path)
+            os.makedirs(os.path.dirname(image_path), exist_ok=True)
+            f = cache(response)
+            with open(image_path, "wb") as fp:
+                fp.write(f.getvalue())
+            result["status"] = "success"
+        except Exception as e:
+            if not isinstance(e, urllib.error.HTTPError):
+                cnt += 1
+                if cnt <= args.max_retries:
+                    continue
+            if isinstance(e, urllib.error.HTTPError):
+                result["status"] = "expired"
+            else:
+                result["status"] = "timeout"
+        break
+    return result
+
+
+def main():
+    start = time.time()
+    if os.path.exists(args.output_folder) and os.listdir(args.output_folder):
+        try:
+            c = input(
+                f"'{args.output_folder}' already exists and is not an empty directory, continue? (y/n) "
+            )
+            if c.lower() not in ["y", "yes"]:
+                exit(0)
+        except KeyboardInterrupt:
+            exit(0)
+    if not os.path.exists(args.output_folder):
+        os.makedirs(args.output_folder)
+    image_folder_path = os.path.join(args.output_folder, "images")
+    record_path = os.path.join(args.output_folder, "records_examplar.json")
+    record = {'success': [], 'expired': [], 'timeout': []}
+    if os.path.isfile(record_path):
+        try:
+            with open(record_path, encoding="utf8") as f:
+                old_record = json.load(f)
+                success = set(old_record['success']) - set(old_record['expired']) - set(old_record['timeout'])
+                record['success'] = list(success)
+        except:
+            pass
+    if not os.path.exists(image_folder_path):
+        os.makedirs(image_folder_path)
+
+    list_url = 'https://raw.githubusercontent.com/V3Det/v3det_resource/main/resource/download_list_exemplar.txt'
+    response = urllib.request.urlopen(urllib.request.Request(url=list_url, headers=headers), timeout=10)
+    url_list = [url for url in response.read().decode('utf-8').split('\n') if len(url) > 0]
+    image2url = {}
+    for url in url_list:
+        response = urllib.request.urlopen(urllib.request.Request(url=url, headers=headers), timeout=10)
+        image2url.update(eval(response.read().decode('utf-8')))
+
+    data = []
+    rec_suc = set(record['success'])
+    for image, url in image2url.items():
+        if image not in rec_suc:
+            data.append((url, image))
+    with tqdm(total=len(data)) as pbar:
+        with concurrent.futures.ThreadPoolExecutor(max_workers=args.max_workers) as executor:
+            # Submit up to `chunk_size` tasks at a time to avoid too many pending tasks.
+            chunk_size = min(5000, args.max_workers * 500)
+            for i in range(0, len(data), chunk_size):
+                futures = [
+                    executor.submit(download_image, url, path, 10)
+                    for url, path in data[i: i + chunk_size]
+                ]
+                for future in concurrent.futures.as_completed(futures):
+                    r = future.result()
+                    record[r["status"]].append(r["path"])
+                    pbar.update(1)
+                with open(record_path, "w", encoding="utf8") as f:
+                    json.dump(record, f, indent=2)
+
+    end = time.time()
+    print(f"consuming time {end - start:.1f} sec")
+    print(f"{len(record['success'])} images downloaded.")
+    print(f"{len(record['timeout'])} urls failed due to request timeout.")
+    print(f"{len(record['expired'])} urls failed due to url expiration.")
+    if len(record['success']) == len(image2url):
+        os.remove(record_path)
+        print('All images have been downloaded!')
+    else:
+        print('Please run this file again to download failed image!')
+
+
+if __name__ == "__main__":
+    main()
--- a/V3Det/V3Det___V3Det/raw/v3det_image_download.py
+++ b/V3Det/V3Det___V3Det/raw/v3det_image_download.py
@ -0,0 +1,126 @@
+import io
+import argparse
+import concurrent.futures
+import json
+import os
+import time
+import urllib.error
+import urllib.request
+
+from tqdm import tqdm
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--output_folder", type=str, default="V3Det")
+parser.add_argument("--max_retries", type=int, default=3)
+parser.add_argument("--max_workers", type=int, default=16)
+args = parser.parse_args()
+headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36'}
+
+
+def cache(response):
+    f = io.BytesIO()
+    block_sz = 8192
+    while True:
+        buffer = response.read(block_sz)
+        if not buffer:
+            break
+        f.write(buffer)
+    return f
+
+def download_image(url, path, timeout):
+    result = {
+        "status": "",
+        "url": url,
+        "path": path,
+    }
+    cnt = 0
+    while True:
+        try:
+            response = urllib.request.urlopen(urllib.request.Request(url=url, headers=headers), timeout=timeout)
+            image_path = os.path.join(args.output_folder, path)
+            os.makedirs(os.path.dirname(image_path), exist_ok=True)
+            f = cache(response)
+            with open(image_path, "wb") as fp:
+                fp.write(f.getvalue())
+            result["status"] = "success"
+        except Exception as e:
+            if not isinstance(e, urllib.error.HTTPError):
+                cnt += 1
+                if cnt <= args.max_retries:
+                    continue
+            if isinstance(e, urllib.error.HTTPError):
+                result["status"] = "expired"
+            else:
+                result["status"] = "timeout"
+        break
+    return result
+
+
+def main():
+    start = time.time()
+    if os.path.exists(args.output_folder) and os.listdir(args.output_folder):
+        try:
+            c = input(
+                f"'{args.output_folder}' already exists and is not an empty directory, continue? (y/n) "
+            )
+            if c.lower() not in ["y", "yes"]:
+                exit(0)
+        except KeyboardInterrupt:
+            exit(0)
+    if not os.path.exists(args.output_folder):
+        os.makedirs(args.output_folder)
+    image_folder_path = os.path.join(args.output_folder, "images")
+    record_path = os.path.join(args.output_folder, "records.json")
+    record = {'success': [], 'expired': [], 'timeout': []}
+    if os.path.isfile(record_path):
+        try:
+            with open(record_path, encoding="utf8") as f:
+                record['success'] = json.load(f)['success']
+        except:
+            pass
+    if not os.path.exists(image_folder_path):
+        os.makedirs(image_folder_path)
+
+    list_url = 'https://raw.githubusercontent.com/V3Det/v3det_resource/main/resource/download_list.txt'
+    response = urllib.request.urlopen(urllib.request.Request(url=list_url, headers=headers), timeout=10)
+    url_list = [url for url in response.read().decode('utf-8').split('\n') if len(url) > 0]
+    image2url = {}
+    for url in url_list:
+        response = urllib.request.urlopen(urllib.request.Request(url=url, headers=headers), timeout=10)
+        image2url.update(eval(response.read().decode('utf-8')))
+
+    data = []
+    rec_suc = set(record['success'])
+    for image, url in image2url.items():
+        if image not in rec_suc:
+            data.append((url, image))
+    with tqdm(total=len(data)) as pbar:
+        with concurrent.futures.ThreadPoolExecutor(max_workers=args.max_workers) as executor:
+            # Submit up to `chunk_size` tasks at a time to avoid too many pending tasks.
+            chunk_size = min(5000, args.max_workers * 500)
+            for i in range(0, len(data), chunk_size):
+                futures = [
+                    executor.submit(download_image, url, path, 10)
+                    for url, path in data[i: i + chunk_size]
+                ]
+                for future in concurrent.futures.as_completed(futures):
+                    r = future.result()
+                    record[r["status"]].append(r["path"])
+                    pbar.update(1)
+                with open(record_path, "w", encoding="utf8") as f:
+                    json.dump(record, f, indent=2)
+
+    end = time.time()
+    print(f"consuming time {end - start:.1f} sec")
+    print(f"{len(record['success'])} images downloaded.")
+    print(f"{len(record['timeout'])} urls failed due to request timeout.")
+    print(f"{len(record['expired'])} urls failed due to url expiration.")
+    if len(record['success']) == len(image2url):
+        os.remove(record_path)
+        print('All images have been downloaded!')
+    else:
+        print('Please run this file again to download failed image!')
+
+
+if __name__ == "__main__":
+    main()
--- a/V3Det/V3Det___V3Det/raw/v3det_test_image_download.py
+++ b/V3Det/V3Det___V3Det/raw/v3det_test_image_download.py
@ -0,0 +1,133 @@
+import io
+import argparse
+import concurrent.futures
+import json
+import os
+import time
+import urllib.error
+import urllib.request
+import requests
+
+from tqdm import tqdm
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--output_folder", type=str, default="V3Det")
+parser.add_argument("--max_retries", type=int, default=3)
+parser.add_argument("--max_workers", type=int, default=16)
+args = parser.parse_args()
+headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36'}
+
+
+def cache(response):
+    f = io.BytesIO()
+    block_sz = 8192
+    while True:
+        buffer = response.read(block_sz)
+        if not buffer:
+            break
+        f.write(buffer)
+    return f
+
+def download_image(url, path, timeout):
+    result = {
+        "status": "",
+        "url": url,
+        "path": path,
+    }
+    cnt = 0
+    while True:
+        try:
+            if requests.get(url).status_code >= 400:
+                result["status"] = "expired"
+                return result
+
+            response = urllib.request.urlopen(urllib.request.Request(url=url, headers=headers), timeout=timeout)
+            image_path = os.path.join(args.output_folder, path)
+            os.makedirs(os.path.dirname(image_path), exist_ok=True)
+            f = cache(response)
+            with open(image_path, "wb") as fp:
+                fp.write(f.getvalue())
+            result["status"] = "success"
+        except Exception as e:
+            if not isinstance(e, urllib.error.HTTPError):
+                cnt += 1
+                if cnt <= args.max_retries:
+                    continue
+            if isinstance(e, urllib.error.HTTPError):
+                result["status"] = "expired"
+            else:
+                result["status"] = "timeout"
+        break
+    return result
+
+
+def main():
+    start = time.time()
+    if os.path.exists(args.output_folder) and os.listdir(args.output_folder):
+        try:
+            c = input(
+                f"'{args.output_folder}' already exists and is not an empty directory, continue? (y/n) "
+            )
+            if c.lower() not in ["y", "yes"]:
+                exit(0)
+        except KeyboardInterrupt:
+            exit(0)
+    if not os.path.exists(args.output_folder):
+        os.makedirs(args.output_folder)
+    image_folder_path = os.path.join(args.output_folder, "images")
+    record_path = os.path.join(args.output_folder, "records_test.json")
+    record = {'success': [], 'expired': [], 'timeout': []}
+    if os.path.isfile(record_path):
+        try:
+            with open(record_path, encoding="utf8") as f:
+                old_record = json.load(f)
+                success = set(old_record['success']) - set(old_record['expired']) - set(old_record['timeout'])
+                record['success'] = list(success)
+        except:
+            pass
+    if not os.path.exists(image_folder_path):
+        os.makedirs(image_folder_path)
+
+    list_url = 'https://raw.githubusercontent.com/V3Det/v3det_resource/main/resource/download_list_test.txt'
+    response = urllib.request.urlopen(urllib.request.Request(url=list_url, headers=headers), timeout=10)
+    url_list = [url for url in response.read().decode('utf-8').split('\n') if len(url) > 0]
+    image2url = {}
+    for url in url_list:
+        response = urllib.request.urlopen(urllib.request.Request(url=url, headers=headers), timeout=10)
+        image2url.update(eval(response.read().decode('utf-8')))
+
+    data = []
+    rec_suc = set(record['success'])
+    for image, url in image2url.items():
+        if image not in rec_suc:
+            data.append((url, image))
+    with tqdm(total=len(data)) as pbar:
+        with concurrent.futures.ThreadPoolExecutor(max_workers=args.max_workers) as executor:
+            # Submit up to `chunk_size` tasks at a time to avoid too many pending tasks.
+            chunk_size = min(5000, args.max_workers * 500)
+            for i in range(0, len(data), chunk_size):
+                futures = [
+                    executor.submit(download_image, url, path, 10)
+                    for url, path in data[i: i + chunk_size]
+                ]
+                for future in concurrent.futures.as_completed(futures):
+                    r = future.result()
+                    record[r["status"]].append(r["path"])
+                    pbar.update(1)
+                with open(record_path, "w", encoding="utf8") as f:
+                    json.dump(record, f, indent=2)
+
+    end = time.time()
+    print(f"consuming time {end - start:.1f} sec")
+    print(f"{len(record['success'])} images downloaded.")
+    print(f"{len(record['timeout'])} urls failed due to request timeout.")
+    print(f"{len(record['expired'])} urls failed due to url expiration.")
+    if len(record['success']) == len(image2url):
+        os.remove(record_path)
+        print('All images have been downloaded!')
+    else:
+        print('Please run this file again to download failed image!')
+
+
+if __name__ == "__main__":
+    main()
--- a/V3Det/V3Det___V3Det/raw/v3det_visualize_tree.py
+++ b/V3Det/V3Det___V3Det/raw/v3det_visualize_tree.py
--- a/V3Det/V3Det___V3Det/sample/image/1.jpg
+++ b/V3Det/V3Det___V3Det/sample/image/1.jpg
--- a/V3Det/V3Det___V3Det/sample/image/2.jpg
+++ b/V3Det/V3Det___V3Det/sample/image/2.jpg
--- a/V3Det/V3Det___V3Det/sample/image/3.jpg
+++ b/V3Det/V3Det___V3Det/sample/image/3.jpg
--- a/V3Det/V3Det___V3Det/sample/image/4.jpg
+++ b/V3Det/V3Det___V3Det/sample/image/4.jpg