init commit
This commit is contained in:
commit
1792ad18e7
24
V3Det/V3Det___V3Det/README.md
Normal file
24
V3Det/V3Det___V3Det/README.md
Normal file
@ -0,0 +1,24 @@
|
||||
<img src="https://raw.githubusercontent.com/V3Det/v3det_resource/main/resource/cover.png" alt="Cover Image" style="width: 820px;">
|
||||
|
||||
## Introduction
|
||||
V3Det is a Vast Vocabulary Visual Detection Dataset with accurately annotated more than 13,000 object categories, empowering more comprehensive research in object detection.
|
||||
1) Vast Vocabulary:V3Det contains bounding boxes of objects from more than 13,000 categories on real-world images.
|
||||
2) Hierarchical Category Organization:V3Det is organized by a hierarchical category tree which annotates the inclusion relationship among categories.
|
||||
3) Rich Annotations:V3Det comprises precisely annotated objects in 245k images and professional descriptions of each category written by human experts and chatgpt.
|
||||
|
||||
### Data
|
||||

|
||||
|
||||
## Citation
|
||||
Please cite the following paper when using V3Det
|
||||
```
|
||||
@misc{wang2023v3det,
|
||||
title={V3Det: Vast Vocabulary Visual Detection Dataset},
|
||||
author={Jiaqi Wang and Pan Zhang and Tao Chu and Yuhang Cao and Yujie Zhou and Tong Wu and Bin Wang and Conghui He and Dahua Lin},
|
||||
year={2023},
|
||||
eprint={2304.03752},
|
||||
archivePrefix={arXiv},
|
||||
primaryClass={cs.CV}
|
||||
}
|
||||
```
|
||||
|
||||
16
V3Det/V3Det___V3Det/metafile.yaml
Normal file
16
V3Det/V3Det___V3Det/metafile.yaml
Normal file
@ -0,0 +1,16 @@
|
||||
displayName: V3Det
|
||||
taskTypes:
|
||||
- Object Detection
|
||||
labelTypes:
|
||||
- Box2d
|
||||
mediaTypes:
|
||||
- Image
|
||||
license:
|
||||
- CC BY 4.0
|
||||
publisher:
|
||||
- Shanghai Artificial Intelligence Laboratory
|
||||
tags: []
|
||||
publishDate: '2023-06-30'
|
||||
publishUrl: https://v3det.openxlab.org.cn/
|
||||
paperUrl: https://arxiv.org/pdf/2304.03752.pdf
|
||||
|
||||
13204
V3Det/V3Det___V3Det/raw/category_name_13204_v3det_2023_v1.txt
Normal file
13204
V3Det/V3Det___V3Det/raw/category_name_13204_v3det_2023_v1.txt
Normal file
File diff suppressed because it is too large
Load Diff
1
V3Det/V3Det___V3Det/raw/v3det_2023_v1_category_tree.json
Normal file
1
V3Det/V3Det___V3Det/raw/v3det_2023_v1_category_tree.json
Normal file
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
1
V3Det/V3Det___V3Det/raw/v3det_2023_v1_train.json
Normal file
1
V3Det/V3Det___V3Det/raw/v3det_2023_v1_train.json
Normal file
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
1
V3Det/V3Det___V3Det/raw/v3det_2023_v1_val.json
Normal file
1
V3Det/V3Det___V3Det/raw/v3det_2023_v1_val.json
Normal file
File diff suppressed because one or more lines are too long
133
V3Det/V3Det___V3Det/raw/v3det_exemplar_image_download.py
Normal file
133
V3Det/V3Det___V3Det/raw/v3det_exemplar_image_download.py
Normal file
@ -0,0 +1,133 @@
|
||||
import io
|
||||
import argparse
|
||||
import concurrent.futures
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
import requests
|
||||
|
||||
from tqdm import tqdm
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--output_folder", type=str, default="V3Det")
|
||||
parser.add_argument("--max_retries", type=int, default=3)
|
||||
parser.add_argument("--max_workers", type=int, default=16)
|
||||
args = parser.parse_args()
|
||||
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36'}
|
||||
|
||||
|
||||
def cache(response):
|
||||
f = io.BytesIO()
|
||||
block_sz = 8192
|
||||
while True:
|
||||
buffer = response.read(block_sz)
|
||||
if not buffer:
|
||||
break
|
||||
f.write(buffer)
|
||||
return f
|
||||
|
||||
def download_image(url, path, timeout):
|
||||
result = {
|
||||
"status": "",
|
||||
"url": url,
|
||||
"path": path,
|
||||
}
|
||||
cnt = 0
|
||||
while True:
|
||||
try:
|
||||
if requests.get(url).status_code >= 400:
|
||||
result["status"] = "expired"
|
||||
return result
|
||||
|
||||
response = urllib.request.urlopen(urllib.request.Request(url=url, headers=headers), timeout=timeout)
|
||||
image_path = os.path.join(args.output_folder, path)
|
||||
os.makedirs(os.path.dirname(image_path), exist_ok=True)
|
||||
f = cache(response)
|
||||
with open(image_path, "wb") as fp:
|
||||
fp.write(f.getvalue())
|
||||
result["status"] = "success"
|
||||
except Exception as e:
|
||||
if not isinstance(e, urllib.error.HTTPError):
|
||||
cnt += 1
|
||||
if cnt <= args.max_retries:
|
||||
continue
|
||||
if isinstance(e, urllib.error.HTTPError):
|
||||
result["status"] = "expired"
|
||||
else:
|
||||
result["status"] = "timeout"
|
||||
break
|
||||
return result
|
||||
|
||||
|
||||
def main():
|
||||
start = time.time()
|
||||
if os.path.exists(args.output_folder) and os.listdir(args.output_folder):
|
||||
try:
|
||||
c = input(
|
||||
f"'{args.output_folder}' already exists and is not an empty directory, continue? (y/n) "
|
||||
)
|
||||
if c.lower() not in ["y", "yes"]:
|
||||
exit(0)
|
||||
except KeyboardInterrupt:
|
||||
exit(0)
|
||||
if not os.path.exists(args.output_folder):
|
||||
os.makedirs(args.output_folder)
|
||||
image_folder_path = os.path.join(args.output_folder, "images")
|
||||
record_path = os.path.join(args.output_folder, "records_examplar.json")
|
||||
record = {'success': [], 'expired': [], 'timeout': []}
|
||||
if os.path.isfile(record_path):
|
||||
try:
|
||||
with open(record_path, encoding="utf8") as f:
|
||||
old_record = json.load(f)
|
||||
success = set(old_record['success']) - set(old_record['expired']) - set(old_record['timeout'])
|
||||
record['success'] = list(success)
|
||||
except:
|
||||
pass
|
||||
if not os.path.exists(image_folder_path):
|
||||
os.makedirs(image_folder_path)
|
||||
|
||||
list_url = 'https://raw.githubusercontent.com/V3Det/v3det_resource/main/resource/download_list_exemplar.txt'
|
||||
response = urllib.request.urlopen(urllib.request.Request(url=list_url, headers=headers), timeout=10)
|
||||
url_list = [url for url in response.read().decode('utf-8').split('\n') if len(url) > 0]
|
||||
image2url = {}
|
||||
for url in url_list:
|
||||
response = urllib.request.urlopen(urllib.request.Request(url=url, headers=headers), timeout=10)
|
||||
image2url.update(eval(response.read().decode('utf-8')))
|
||||
|
||||
data = []
|
||||
rec_suc = set(record['success'])
|
||||
for image, url in image2url.items():
|
||||
if image not in rec_suc:
|
||||
data.append((url, image))
|
||||
with tqdm(total=len(data)) as pbar:
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=args.max_workers) as executor:
|
||||
# Submit up to `chunk_size` tasks at a time to avoid too many pending tasks.
|
||||
chunk_size = min(5000, args.max_workers * 500)
|
||||
for i in range(0, len(data), chunk_size):
|
||||
futures = [
|
||||
executor.submit(download_image, url, path, 10)
|
||||
for url, path in data[i: i + chunk_size]
|
||||
]
|
||||
for future in concurrent.futures.as_completed(futures):
|
||||
r = future.result()
|
||||
record[r["status"]].append(r["path"])
|
||||
pbar.update(1)
|
||||
with open(record_path, "w", encoding="utf8") as f:
|
||||
json.dump(record, f, indent=2)
|
||||
|
||||
end = time.time()
|
||||
print(f"consuming time {end - start:.1f} sec")
|
||||
print(f"{len(record['success'])} images downloaded.")
|
||||
print(f"{len(record['timeout'])} urls failed due to request timeout.")
|
||||
print(f"{len(record['expired'])} urls failed due to url expiration.")
|
||||
if len(record['success']) == len(image2url):
|
||||
os.remove(record_path)
|
||||
print('All images have been downloaded!')
|
||||
else:
|
||||
print('Please run this file again to download failed image!')
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
126
V3Det/V3Det___V3Det/raw/v3det_image_download.py
Normal file
126
V3Det/V3Det___V3Det/raw/v3det_image_download.py
Normal file
@ -0,0 +1,126 @@
|
||||
import io
|
||||
import argparse
|
||||
import concurrent.futures
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
|
||||
from tqdm import tqdm
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--output_folder", type=str, default="V3Det")
|
||||
parser.add_argument("--max_retries", type=int, default=3)
|
||||
parser.add_argument("--max_workers", type=int, default=16)
|
||||
args = parser.parse_args()
|
||||
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36'}
|
||||
|
||||
|
||||
def cache(response):
|
||||
f = io.BytesIO()
|
||||
block_sz = 8192
|
||||
while True:
|
||||
buffer = response.read(block_sz)
|
||||
if not buffer:
|
||||
break
|
||||
f.write(buffer)
|
||||
return f
|
||||
|
||||
def download_image(url, path, timeout):
|
||||
result = {
|
||||
"status": "",
|
||||
"url": url,
|
||||
"path": path,
|
||||
}
|
||||
cnt = 0
|
||||
while True:
|
||||
try:
|
||||
response = urllib.request.urlopen(urllib.request.Request(url=url, headers=headers), timeout=timeout)
|
||||
image_path = os.path.join(args.output_folder, path)
|
||||
os.makedirs(os.path.dirname(image_path), exist_ok=True)
|
||||
f = cache(response)
|
||||
with open(image_path, "wb") as fp:
|
||||
fp.write(f.getvalue())
|
||||
result["status"] = "success"
|
||||
except Exception as e:
|
||||
if not isinstance(e, urllib.error.HTTPError):
|
||||
cnt += 1
|
||||
if cnt <= args.max_retries:
|
||||
continue
|
||||
if isinstance(e, urllib.error.HTTPError):
|
||||
result["status"] = "expired"
|
||||
else:
|
||||
result["status"] = "timeout"
|
||||
break
|
||||
return result
|
||||
|
||||
|
||||
def main():
|
||||
start = time.time()
|
||||
if os.path.exists(args.output_folder) and os.listdir(args.output_folder):
|
||||
try:
|
||||
c = input(
|
||||
f"'{args.output_folder}' already exists and is not an empty directory, continue? (y/n) "
|
||||
)
|
||||
if c.lower() not in ["y", "yes"]:
|
||||
exit(0)
|
||||
except KeyboardInterrupt:
|
||||
exit(0)
|
||||
if not os.path.exists(args.output_folder):
|
||||
os.makedirs(args.output_folder)
|
||||
image_folder_path = os.path.join(args.output_folder, "images")
|
||||
record_path = os.path.join(args.output_folder, "records.json")
|
||||
record = {'success': [], 'expired': [], 'timeout': []}
|
||||
if os.path.isfile(record_path):
|
||||
try:
|
||||
with open(record_path, encoding="utf8") as f:
|
||||
record['success'] = json.load(f)['success']
|
||||
except:
|
||||
pass
|
||||
if not os.path.exists(image_folder_path):
|
||||
os.makedirs(image_folder_path)
|
||||
|
||||
list_url = 'https://raw.githubusercontent.com/V3Det/v3det_resource/main/resource/download_list.txt'
|
||||
response = urllib.request.urlopen(urllib.request.Request(url=list_url, headers=headers), timeout=10)
|
||||
url_list = [url for url in response.read().decode('utf-8').split('\n') if len(url) > 0]
|
||||
image2url = {}
|
||||
for url in url_list:
|
||||
response = urllib.request.urlopen(urllib.request.Request(url=url, headers=headers), timeout=10)
|
||||
image2url.update(eval(response.read().decode('utf-8')))
|
||||
|
||||
data = []
|
||||
rec_suc = set(record['success'])
|
||||
for image, url in image2url.items():
|
||||
if image not in rec_suc:
|
||||
data.append((url, image))
|
||||
with tqdm(total=len(data)) as pbar:
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=args.max_workers) as executor:
|
||||
# Submit up to `chunk_size` tasks at a time to avoid too many pending tasks.
|
||||
chunk_size = min(5000, args.max_workers * 500)
|
||||
for i in range(0, len(data), chunk_size):
|
||||
futures = [
|
||||
executor.submit(download_image, url, path, 10)
|
||||
for url, path in data[i: i + chunk_size]
|
||||
]
|
||||
for future in concurrent.futures.as_completed(futures):
|
||||
r = future.result()
|
||||
record[r["status"]].append(r["path"])
|
||||
pbar.update(1)
|
||||
with open(record_path, "w", encoding="utf8") as f:
|
||||
json.dump(record, f, indent=2)
|
||||
|
||||
end = time.time()
|
||||
print(f"consuming time {end - start:.1f} sec")
|
||||
print(f"{len(record['success'])} images downloaded.")
|
||||
print(f"{len(record['timeout'])} urls failed due to request timeout.")
|
||||
print(f"{len(record['expired'])} urls failed due to url expiration.")
|
||||
if len(record['success']) == len(image2url):
|
||||
os.remove(record_path)
|
||||
print('All images have been downloaded!')
|
||||
else:
|
||||
print('Please run this file again to download failed image!')
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
133
V3Det/V3Det___V3Det/raw/v3det_test_image_download.py
Normal file
133
V3Det/V3Det___V3Det/raw/v3det_test_image_download.py
Normal file
@ -0,0 +1,133 @@
|
||||
import io
|
||||
import argparse
|
||||
import concurrent.futures
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
import requests
|
||||
|
||||
from tqdm import tqdm
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--output_folder", type=str, default="V3Det")
|
||||
parser.add_argument("--max_retries", type=int, default=3)
|
||||
parser.add_argument("--max_workers", type=int, default=16)
|
||||
args = parser.parse_args()
|
||||
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36'}
|
||||
|
||||
|
||||
def cache(response):
|
||||
f = io.BytesIO()
|
||||
block_sz = 8192
|
||||
while True:
|
||||
buffer = response.read(block_sz)
|
||||
if not buffer:
|
||||
break
|
||||
f.write(buffer)
|
||||
return f
|
||||
|
||||
def download_image(url, path, timeout):
|
||||
result = {
|
||||
"status": "",
|
||||
"url": url,
|
||||
"path": path,
|
||||
}
|
||||
cnt = 0
|
||||
while True:
|
||||
try:
|
||||
if requests.get(url).status_code >= 400:
|
||||
result["status"] = "expired"
|
||||
return result
|
||||
|
||||
response = urllib.request.urlopen(urllib.request.Request(url=url, headers=headers), timeout=timeout)
|
||||
image_path = os.path.join(args.output_folder, path)
|
||||
os.makedirs(os.path.dirname(image_path), exist_ok=True)
|
||||
f = cache(response)
|
||||
with open(image_path, "wb") as fp:
|
||||
fp.write(f.getvalue())
|
||||
result["status"] = "success"
|
||||
except Exception as e:
|
||||
if not isinstance(e, urllib.error.HTTPError):
|
||||
cnt += 1
|
||||
if cnt <= args.max_retries:
|
||||
continue
|
||||
if isinstance(e, urllib.error.HTTPError):
|
||||
result["status"] = "expired"
|
||||
else:
|
||||
result["status"] = "timeout"
|
||||
break
|
||||
return result
|
||||
|
||||
|
||||
def main():
|
||||
start = time.time()
|
||||
if os.path.exists(args.output_folder) and os.listdir(args.output_folder):
|
||||
try:
|
||||
c = input(
|
||||
f"'{args.output_folder}' already exists and is not an empty directory, continue? (y/n) "
|
||||
)
|
||||
if c.lower() not in ["y", "yes"]:
|
||||
exit(0)
|
||||
except KeyboardInterrupt:
|
||||
exit(0)
|
||||
if not os.path.exists(args.output_folder):
|
||||
os.makedirs(args.output_folder)
|
||||
image_folder_path = os.path.join(args.output_folder, "images")
|
||||
record_path = os.path.join(args.output_folder, "records_test.json")
|
||||
record = {'success': [], 'expired': [], 'timeout': []}
|
||||
if os.path.isfile(record_path):
|
||||
try:
|
||||
with open(record_path, encoding="utf8") as f:
|
||||
old_record = json.load(f)
|
||||
success = set(old_record['success']) - set(old_record['expired']) - set(old_record['timeout'])
|
||||
record['success'] = list(success)
|
||||
except:
|
||||
pass
|
||||
if not os.path.exists(image_folder_path):
|
||||
os.makedirs(image_folder_path)
|
||||
|
||||
list_url = 'https://raw.githubusercontent.com/V3Det/v3det_resource/main/resource/download_list_test.txt'
|
||||
response = urllib.request.urlopen(urllib.request.Request(url=list_url, headers=headers), timeout=10)
|
||||
url_list = [url for url in response.read().decode('utf-8').split('\n') if len(url) > 0]
|
||||
image2url = {}
|
||||
for url in url_list:
|
||||
response = urllib.request.urlopen(urllib.request.Request(url=url, headers=headers), timeout=10)
|
||||
image2url.update(eval(response.read().decode('utf-8')))
|
||||
|
||||
data = []
|
||||
rec_suc = set(record['success'])
|
||||
for image, url in image2url.items():
|
||||
if image not in rec_suc:
|
||||
data.append((url, image))
|
||||
with tqdm(total=len(data)) as pbar:
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=args.max_workers) as executor:
|
||||
# Submit up to `chunk_size` tasks at a time to avoid too many pending tasks.
|
||||
chunk_size = min(5000, args.max_workers * 500)
|
||||
for i in range(0, len(data), chunk_size):
|
||||
futures = [
|
||||
executor.submit(download_image, url, path, 10)
|
||||
for url, path in data[i: i + chunk_size]
|
||||
]
|
||||
for future in concurrent.futures.as_completed(futures):
|
||||
r = future.result()
|
||||
record[r["status"]].append(r["path"])
|
||||
pbar.update(1)
|
||||
with open(record_path, "w", encoding="utf8") as f:
|
||||
json.dump(record, f, indent=2)
|
||||
|
||||
end = time.time()
|
||||
print(f"consuming time {end - start:.1f} sec")
|
||||
print(f"{len(record['success'])} images downloaded.")
|
||||
print(f"{len(record['timeout'])} urls failed due to request timeout.")
|
||||
print(f"{len(record['expired'])} urls failed due to url expiration.")
|
||||
if len(record['success']) == len(image2url):
|
||||
os.remove(record_path)
|
||||
print('All images have been downloaded!')
|
||||
else:
|
||||
print('Please run this file again to download failed image!')
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
316
V3Det/V3Det___V3Det/raw/v3det_visualize_tree.py
Normal file
316
V3Det/V3Det___V3Det/raw/v3det_visualize_tree.py
Normal file
File diff suppressed because one or more lines are too long
BIN
V3Det/V3Det___V3Det/sample/image/1.jpg
Normal file
BIN
V3Det/V3Det___V3Det/sample/image/1.jpg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 436 KiB |
BIN
V3Det/V3Det___V3Det/sample/image/2.jpg
Normal file
BIN
V3Det/V3Det___V3Det/sample/image/2.jpg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 481 KiB |
BIN
V3Det/V3Det___V3Det/sample/image/3.jpg
Normal file
BIN
V3Det/V3Det___V3Det/sample/image/3.jpg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 499 KiB |
BIN
V3Det/V3Det___V3Det/sample/image/4.jpg
Normal file
BIN
V3Det/V3Det___V3Det/sample/image/4.jpg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 555 KiB |
Loading…
x
Reference in New Issue
Block a user