init commit

2025-07-29 19:02:48 +08:00 · 2025-07-29 19:02:48 +08:00 · 1792ad18e7
commit 1792ad18e7
16 changed files with 13957 additions and 0 deletions
--- a/V3Det/V3Det___V3Det/README.md
+++ b/V3Det/V3Det___V3Det/README.md
@ -0,0 +1,24 @@
 <img src="https://raw.githubusercontent.com/V3Det/v3det_resource/main/resource/cover.png" alt="Cover Image" style="width: 820px;">
  ## Introduction
  V3Det is a Vast Vocabulary Visual Detection Dataset with accurately annotated more than 13,000 object categories, empowering more comprehensive research in object detection.  
  1)   Vast Vocabulary：V3Det contains bounding boxes of objects from more than 13,000 categories on real-world images.
  2)   Hierarchical Category Organization：V3Det is organized by a hierarchical category tree which annotates the inclusion relationship among categories.
  3)   Rich Annotations：V3Det comprises precisely annotated objects in 245k images and professional descriptions of each category written by human experts and chatgpt.
  ###  Data
 ![](https://github.com/ztayty/ztayty.github.io/blob/main/image/%E6%95%B0%E6%8D%AE%EF%BC%88%E8%BF%90%E8%90%A5%E6%89%8B%E5%8A%A8%E4%B8%8A%E6%9E%B6%E5%88%B0%E7%B1%BB%E5%AE%9A%E4%B9%89%EF%BC%89.jpg?raw=true)
  ## Citation
  Please cite the following paper when using V3Det
  ```
  @misc{wang2023v3det,  
      title={V3Det: Vast Vocabulary Visual Detection Dataset},   
      author={Jiaqi Wang and Pan Zhang and Tao Chu and Yuhang Cao and Yujie Zhou and Tong Wu and Bin Wang and Conghui He and Dahua Lin},  
      year={2023},  
      eprint={2304.03752},  
      archivePrefix={arXiv},  
      primaryClass={cs.CV}  
 }
 ```
  ‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌
--- a/V3Det/V3Det___V3Det/metafile.yaml
+++ b/V3Det/V3Det___V3Det/metafile.yaml
@ -0,0 +1,16 @@
 displayName: V3Det
 taskTypes:
  - Object Detection
 labelTypes:
  - Box2d
 mediaTypes:
  - Image
 license:
  - CC BY 4.0
 publisher:
  - Shanghai Artificial Intelligence Laboratory
 tags: []
 publishDate: '2023-06-30'
 publishUrl: https://v3det.openxlab.org.cn/
 paperUrl: https://arxiv.org/pdf/2304.03752.pdf
 ‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌‌
--- a/V3Det/V3Det___V3Det/raw/category_name_13204_v3det_2023_v1.txt
+++ b/V3Det/V3Det___V3Det/raw/category_name_13204_v3det_2023_v1.txt
--- a/V3Det/V3Det___V3Det/raw/v3det_2023_v1_category_tree.json
+++ b/V3Det/V3Det___V3Det/raw/v3det_2023_v1_category_tree.json
--- a/V3Det/V3Det___V3Det/raw/v3det_2023_v1_test_image_info.json
+++ b/V3Det/V3Det___V3Det/raw/v3det_2023_v1_test_image_info.json
--- a/V3Det/V3Det___V3Det/raw/v3det_2023_v1_train.json
+++ b/V3Det/V3Det___V3Det/raw/v3det_2023_v1_train.json
--- a/V3Det/V3Det___V3Det/raw/v3det_2023_v1_train_ovd_base.json
+++ b/V3Det/V3Det___V3Det/raw/v3det_2023_v1_train_ovd_base.json
--- a/V3Det/V3Det___V3Det/raw/v3det_2023_v1_val.json
+++ b/V3Det/V3Det___V3Det/raw/v3det_2023_v1_val.json
--- a/V3Det/V3Det___V3Det/raw/v3det_exemplar_image_download.py
+++ b/V3Det/V3Det___V3Det/raw/v3det_exemplar_image_download.py
@ -0,0 +1,133 @@
 import io
 import argparse
 import concurrent.futures
 import json
 import os
 import time
 import urllib.error
 import urllib.request
 import requests
 from tqdm import tqdm
 parser = argparse.ArgumentParser()
 parser.add_argument("--output_folder", type=str, default="V3Det")
 parser.add_argument("--max_retries", type=int, default=3)
 parser.add_argument("--max_workers", type=int, default=16)
 args = parser.parse_args()
 headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36'}
 def cache(response):
    f = io.BytesIO()
    block_sz = 8192
    while True:
        buffer = response.read(block_sz)
        if not buffer:
            break
        f.write(buffer)
    return f
 def download_image(url, path, timeout):
    result = {
        "status": "",
        "url": url,
        "path": path,
    }
    cnt = 0
    while True:
        try:
            if requests.get(url).status_code >= 400:
                result["status"] = "expired"
                return result
            response = urllib.request.urlopen(urllib.request.Request(url=url, headers=headers), timeout=timeout)
            image_path = os.path.join(args.output_folder, path)
            os.makedirs(os.path.dirname(image_path), exist_ok=True)
            f = cache(response)
            with open(image_path, "wb") as fp:
                fp.write(f.getvalue())
            result["status"] = "success"
        except Exception as e:
            if not isinstance(e, urllib.error.HTTPError):
                cnt += 1
                if cnt <= args.max_retries:
                    continue
            if isinstance(e, urllib.error.HTTPError):
                result["status"] = "expired"
            else:
                result["status"] = "timeout"
        break
    return result
 def main():
    start = time.time()
    if os.path.exists(args.output_folder) and os.listdir(args.output_folder):
        try:
            c = input(
                f"'{args.output_folder}' already exists and is not an empty directory, continue? (y/n) "
            )
            if c.lower() not in ["y", "yes"]:
                exit(0)
        except KeyboardInterrupt:
            exit(0)
    if not os.path.exists(args.output_folder):
        os.makedirs(args.output_folder)
    image_folder_path = os.path.join(args.output_folder, "images")
    record_path = os.path.join(args.output_folder, "records_examplar.json")
    record = {'success': [], 'expired': [], 'timeout': []}
    if os.path.isfile(record_path):
        try:
            with open(record_path, encoding="utf8") as f:
                old_record = json.load(f)
                success = set(old_record['success']) - set(old_record['expired']) - set(old_record['timeout'])
                record['success'] = list(success)
        except:
            pass
    if not os.path.exists(image_folder_path):
        os.makedirs(image_folder_path)
    list_url = 'https://raw.githubusercontent.com/V3Det/v3det_resource/main/resource/download_list_exemplar.txt'
    response = urllib.request.urlopen(urllib.request.Request(url=list_url, headers=headers), timeout=10)
    url_list = [url for url in response.read().decode('utf-8').split('\n') if len(url) > 0]
    image2url = {}
    for url in url_list:
        response = urllib.request.urlopen(urllib.request.Request(url=url, headers=headers), timeout=10)
        image2url.update(eval(response.read().decode('utf-8')))
    data = []
    rec_suc = set(record['success'])
    for image, url in image2url.items():
        if image not in rec_suc:
            data.append((url, image))
    with tqdm(total=len(data)) as pbar:
        with concurrent.futures.ThreadPoolExecutor(max_workers=args.max_workers) as executor:
            # Submit up to `chunk_size` tasks at a time to avoid too many pending tasks.
            chunk_size = min(5000, args.max_workers * 500)
            for i in range(0, len(data), chunk_size):
                futures = [
                    executor.submit(download_image, url, path, 10)
                    for url, path in data[i: i + chunk_size]
                ]
                for future in concurrent.futures.as_completed(futures):
                    r = future.result()
                    record[r["status"]].append(r["path"])
                    pbar.update(1)
                with open(record_path, "w", encoding="utf8") as f:
                    json.dump(record, f, indent=2)
    end = time.time()
    print(f"consuming time {end - start:.1f} sec")
    print(f"{len(record['success'])} images downloaded.")
    print(f"{len(record['timeout'])} urls failed due to request timeout.")
    print(f"{len(record['expired'])} urls failed due to url expiration.")
    if len(record['success']) == len(image2url):
        os.remove(record_path)
        print('All images have been downloaded!')
    else:
        print('Please run this file again to download failed image!')
 if __name__ == "__main__":
    main()
--- a/V3Det/V3Det___V3Det/raw/v3det_image_download.py
+++ b/V3Det/V3Det___V3Det/raw/v3det_image_download.py
@ -0,0 +1,126 @@
 import io
 import argparse
 import concurrent.futures
 import json
 import os
 import time
 import urllib.error
 import urllib.request
 from tqdm import tqdm
 parser = argparse.ArgumentParser()
 parser.add_argument("--output_folder", type=str, default="V3Det")
 parser.add_argument("--max_retries", type=int, default=3)
 parser.add_argument("--max_workers", type=int, default=16)
 args = parser.parse_args()
 headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36'}
 def cache(response):
    f = io.BytesIO()
    block_sz = 8192
    while True:
        buffer = response.read(block_sz)
        if not buffer:
            break
        f.write(buffer)
    return f
 def download_image(url, path, timeout):
    result = {
        "status": "",
        "url": url,
        "path": path,
    }
    cnt = 0
    while True:
        try:
            response = urllib.request.urlopen(urllib.request.Request(url=url, headers=headers), timeout=timeout)
            image_path = os.path.join(args.output_folder, path)
            os.makedirs(os.path.dirname(image_path), exist_ok=True)
            f = cache(response)
            with open(image_path, "wb") as fp:
                fp.write(f.getvalue())
            result["status"] = "success"
        except Exception as e:
            if not isinstance(e, urllib.error.HTTPError):
                cnt += 1
                if cnt <= args.max_retries:
                    continue
            if isinstance(e, urllib.error.HTTPError):
                result["status"] = "expired"
            else:
                result["status"] = "timeout"
        break
    return result
 def main():
    start = time.time()
    if os.path.exists(args.output_folder) and os.listdir(args.output_folder):
        try:
            c = input(
                f"'{args.output_folder}' already exists and is not an empty directory, continue? (y/n) "
            )
            if c.lower() not in ["y", "yes"]:
                exit(0)
        except KeyboardInterrupt:
            exit(0)
    if not os.path.exists(args.output_folder):
        os.makedirs(args.output_folder)
    image_folder_path = os.path.join(args.output_folder, "images")
    record_path = os.path.join(args.output_folder, "records.json")
    record = {'success': [], 'expired': [], 'timeout': []}
    if os.path.isfile(record_path):
        try:
            with open(record_path, encoding="utf8") as f:
                record['success'] = json.load(f)['success']
        except:
            pass
    if not os.path.exists(image_folder_path):
        os.makedirs(image_folder_path)
    list_url = 'https://raw.githubusercontent.com/V3Det/v3det_resource/main/resource/download_list.txt'
    response = urllib.request.urlopen(urllib.request.Request(url=list_url, headers=headers), timeout=10)
    url_list = [url for url in response.read().decode('utf-8').split('\n') if len(url) > 0]
    image2url = {}
    for url in url_list:
        response = urllib.request.urlopen(urllib.request.Request(url=url, headers=headers), timeout=10)
        image2url.update(eval(response.read().decode('utf-8')))
    data = []
    rec_suc = set(record['success'])
    for image, url in image2url.items():
        if image not in rec_suc:
            data.append((url, image))
    with tqdm(total=len(data)) as pbar:
        with concurrent.futures.ThreadPoolExecutor(max_workers=args.max_workers) as executor:
            # Submit up to `chunk_size` tasks at a time to avoid too many pending tasks.
            chunk_size = min(5000, args.max_workers * 500)
            for i in range(0, len(data), chunk_size):
                futures = [
                    executor.submit(download_image, url, path, 10)
                    for url, path in data[i: i + chunk_size]
                ]
                for future in concurrent.futures.as_completed(futures):
                    r = future.result()
                    record[r["status"]].append(r["path"])
                    pbar.update(1)
                with open(record_path, "w", encoding="utf8") as f:
                    json.dump(record, f, indent=2)
    end = time.time()
    print(f"consuming time {end - start:.1f} sec")
    print(f"{len(record['success'])} images downloaded.")
    print(f"{len(record['timeout'])} urls failed due to request timeout.")
    print(f"{len(record['expired'])} urls failed due to url expiration.")
    if len(record['success']) == len(image2url):
        os.remove(record_path)
        print('All images have been downloaded!')
    else:
        print('Please run this file again to download failed image!')
 if __name__ == "__main__":
    main()
--- a/V3Det/V3Det___V3Det/raw/v3det_test_image_download.py
+++ b/V3Det/V3Det___V3Det/raw/v3det_test_image_download.py
@ -0,0 +1,133 @@
 import io
 import argparse
 import concurrent.futures
 import json
 import os
 import time
 import urllib.error
 import urllib.request
 import requests
 from tqdm import tqdm
 parser = argparse.ArgumentParser()
 parser.add_argument("--output_folder", type=str, default="V3Det")
 parser.add_argument("--max_retries", type=int, default=3)
 parser.add_argument("--max_workers", type=int, default=16)
 args = parser.parse_args()
 headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36'}
 def cache(response):
    f = io.BytesIO()
    block_sz = 8192
    while True:
        buffer = response.read(block_sz)
        if not buffer:
            break
        f.write(buffer)
    return f
 def download_image(url, path, timeout):
    result = {
        "status": "",
        "url": url,
        "path": path,
    }
    cnt = 0
    while True:
        try:
            if requests.get(url).status_code >= 400:
                result["status"] = "expired"
                return result
            response = urllib.request.urlopen(urllib.request.Request(url=url, headers=headers), timeout=timeout)
            image_path = os.path.join(args.output_folder, path)
            os.makedirs(os.path.dirname(image_path), exist_ok=True)
            f = cache(response)
            with open(image_path, "wb") as fp:
                fp.write(f.getvalue())
            result["status"] = "success"
        except Exception as e:
            if not isinstance(e, urllib.error.HTTPError):
                cnt += 1
                if cnt <= args.max_retries:
                    continue
            if isinstance(e, urllib.error.HTTPError):
                result["status"] = "expired"
            else:
                result["status"] = "timeout"
        break
    return result
 def main():
    start = time.time()
    if os.path.exists(args.output_folder) and os.listdir(args.output_folder):
        try:
            c = input(
                f"'{args.output_folder}' already exists and is not an empty directory, continue? (y/n) "
            )
            if c.lower() not in ["y", "yes"]:
                exit(0)
        except KeyboardInterrupt:
            exit(0)
    if not os.path.exists(args.output_folder):
        os.makedirs(args.output_folder)
    image_folder_path = os.path.join(args.output_folder, "images")
    record_path = os.path.join(args.output_folder, "records_test.json")
    record = {'success': [], 'expired': [], 'timeout': []}
    if os.path.isfile(record_path):
        try:
            with open(record_path, encoding="utf8") as f:
                old_record = json.load(f)
                success = set(old_record['success']) - set(old_record['expired']) - set(old_record['timeout'])
                record['success'] = list(success)
        except:
            pass
    if not os.path.exists(image_folder_path):
        os.makedirs(image_folder_path)
    list_url = 'https://raw.githubusercontent.com/V3Det/v3det_resource/main/resource/download_list_test.txt'
    response = urllib.request.urlopen(urllib.request.Request(url=list_url, headers=headers), timeout=10)
    url_list = [url for url in response.read().decode('utf-8').split('\n') if len(url) > 0]
    image2url = {}
    for url in url_list:
        response = urllib.request.urlopen(urllib.request.Request(url=url, headers=headers), timeout=10)
        image2url.update(eval(response.read().decode('utf-8')))
    data = []
    rec_suc = set(record['success'])
    for image, url in image2url.items():
        if image not in rec_suc:
            data.append((url, image))
    with tqdm(total=len(data)) as pbar:
        with concurrent.futures.ThreadPoolExecutor(max_workers=args.max_workers) as executor:
            # Submit up to `chunk_size` tasks at a time to avoid too many pending tasks.
            chunk_size = min(5000, args.max_workers * 500)
            for i in range(0, len(data), chunk_size):
                futures = [
                    executor.submit(download_image, url, path, 10)
                    for url, path in data[i: i + chunk_size]
                ]
                for future in concurrent.futures.as_completed(futures):
                    r = future.result()
                    record[r["status"]].append(r["path"])
                    pbar.update(1)
                with open(record_path, "w", encoding="utf8") as f:
                    json.dump(record, f, indent=2)
    end = time.time()
    print(f"consuming time {end - start:.1f} sec")
    print(f"{len(record['success'])} images downloaded.")
    print(f"{len(record['timeout'])} urls failed due to request timeout.")
    print(f"{len(record['expired'])} urls failed due to url expiration.")
    if len(record['success']) == len(image2url):
        os.remove(record_path)
        print('All images have been downloaded!')
    else:
        print('Please run this file again to download failed image!')
 if __name__ == "__main__":
    main()
--- a/V3Det/V3Det___V3Det/raw/v3det_visualize_tree.py
+++ b/V3Det/V3Det___V3Det/raw/v3det_visualize_tree.py
--- a/V3Det/V3Det___V3Det/sample/image/1.jpg
+++ b/V3Det/V3Det___V3Det/sample/image/1.jpg
--- a/V3Det/V3Det___V3Det/sample/image/2.jpg
+++ b/V3Det/V3Det___V3Det/sample/image/2.jpg
--- a/V3Det/V3Det___V3Det/sample/image/3.jpg
+++ b/V3Det/V3Det___V3Det/sample/image/3.jpg
--- a/V3Det/V3Det___V3Det/sample/image/4.jpg
+++ b/V3Det/V3Det___V3Det/sample/image/4.jpg