diff --git a/.jenkins/check/config/filter_linklint.txt b/.jenkins/check/config/filter_linklint.txt index 4da3291ad0a50fc3dedf0d69159a14eb67b1b096..25b2a3941472bb466cac6d27909cec4bcc751e1c 100644 --- a/.jenkins/check/config/filter_linklint.txt +++ b/.jenkins/check/config/filter_linklint.txt @@ -1,3 +1,5 @@ http://dl.yf.io/dla/models/imagenet/dla34-ba72cf86.pth https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/helpers/labels.py_key https://persagen.com/files/misc/wang2014knowledge.pdf +https://s3.amazonaws.com/google-landmark/metadata +https://s3.amazonaws.com/google-landmark/md5sum diff --git a/official/cv/retinanet/src/lr_schedule.py b/official/cv/retinanet/src/lr_schedule.py index 65a846f14b7aa93cfa99aa26978a2045498857c0..3c992737c68078069528bc6df23fdd650a6babbe 100644 --- a/official/cv/retinanet/src/lr_schedule.py +++ b/official/cv/retinanet/src/lr_schedule.py @@ -45,7 +45,7 @@ def get_lr(global_step, lr_init, lr_end, lr_max, warmup_epochs1, warmup_epochs2, warmup_steps4 = warmup_steps3 + steps_per_epoch * warmup_epochs4 warmup_steps5 = warmup_steps4 + steps_per_epoch * warmup_epochs5 step_radio = [1e-4, 1e-3, 1e-2, 0.1] - if hasattr(config, finetune) and config.finetune: + if hasattr(config, "finetune") and config.finetune: step_radio = [1e-4, 1e-2, 0.1, 1] for i in range(total_steps): if i < warmup_steps1: diff --git a/research/cv/delf/README_CN.md b/research/cv/delf/README_CN.md index 125025cb5cc16fde056c80ecdb601b3787bd5d54..f39dda859e71bcc26de9d8dd4be1707397062cea 100755 --- a/research/cv/delf/README_CN.md +++ b/research/cv/delf/README_CN.md @@ -98,9 +98,27 @@ ```shell # Google Landmarks Dataset v2 璁粌闆嗕笅杞戒互鍙婅浆鍖栦负mindrecord鏂囦欢 - # 銆愭敞銆戣鍑嗗鑷冲皯1.1TB鐨勫瓨鍌ㄧ┖闂达紝鑻ョ┖闂翠笉瓒冲彲浠ュ皢鍙€夊弬鏁癧NEED_ROMOVE_TAR]璁剧疆涓�'y'锛岃缃悗鍗犵敤绾�633G瀛樺偍绌洪棿 - bash scripts/download_gldv2.sh 500 [DATASET_PATH] [NEED_ROMOVE_TAR] - # example: bash scripts/download_gldv2.sh 500 /home/gldv2 y + + # 銆愭敞銆戜竴鍏辫涓嬭浇4涓猚sv鏂囦欢锛�500涓猼ar鏂囦欢锛�500涓猰d5鏂囦欢锛屽叡鍗犵敤绾�633G瀛樺偍绌洪棿锛岃棰勭暀瓒冲绌洪棿 + # 涓嬭浇鏁版嵁闆嗘椂闂磋緝闀匡紝骞朵笖鐢变簬缃戠粶娉㈠姩绛夊師鍥犲瓨鍦紝涓€娆℃墽琛屽彲鑳戒細涓嬭浇澶辫触锛宒ownload_gldv2.sh鐨勪笁涓弬鏁板垎鍒唬鐮佷笅杞界殑鏁版嵁闆嗙紪鍙锋渶灏忓€硷紝 + # 鏈€澶у€硷紝鍜屼繚瀛樿矾寰� + bash scripts/download_gldv2.sh 0 499 [DATASET_PATH] + # example: bash scripts/download_gldv2.sh 0 499 /home/gldv2 + # 涓嬭浇瀹屾垚鍚庯紝鍙互姣旇緝涓嬭浇寰楀埌鐨則ar鏂囦欢鐨刴d5鍊煎拰md5鏂囦欢锛岃嫢涓€鑷达紝琛ㄦ槑涓嬭浇姝g‘锛屽惁鍒欎笅杞介敊璇紝闇€瑕侀噸鏂颁笅杞斤紟 + # 閲嶆柊涓嬭浇鏃讹紝淇敼鍓嶄袱涓弬鏁版寚瀹氳涓嬭浇鐨勬枃浠讹紝渚嬪鎸囧畾'1, 1'琛ㄧず涓嬭浇images_001.tar锛屽彟澶栵紝train.csv, train_clean.csv, + # train_attribution.csv, train_label_to_category.csv鑻ュ凡涓嬭浇鎴愬姛锛屽彲鍙傝€冭剼鏈敞閲婅繘琛岄€傚綋淇敼 + + cd [DATASET_PATH]/train + # 瀵逛笅杞藉緱鍒扮殑500涓猼ar鏂囦欢瑙e帇 + tar xvf images_xxx.tar # 000, 001, 002, 003, ... + + python3 src/build_image_dataset.py \ + --train_csv_path=[DATASET_PATH]/train/train.csv \ + --train_clean_csv_path=[DATASET_PATH]/train/train_clean.csv \ + --train_directory=[DATASET_PATH]/train/*/*/*/ \ + --output_directory=[DATASET_PATH]/mindrecord/ \ + --num_shards=128 \ + --validation_split_size=0.2 # Oxford5k鍜孭aris6k浠ュ強瀹冧滑瀵瑰簲鐨刧round truth鏂囦欢涓嬭浇 bash scripts/download_oxf.sh [DATASET_PATH] @@ -276,12 +294,10 @@ 鈥� 浣跨敤浠ヤ笅鍛戒护鍙互涓嬭浇`Google Landmarks Dataset v2`鏁版嵁闆嗙殑璁粌闆嗭紝骞朵笖鑷姩鎻愬彇瀹冪殑clean瀛愰泦锛堝叿浣撳畾涔夊弬鑰僛鏁版嵁闆哴(#鏁版嵁闆�)涓暟鎹泦瀵瑰簲鐨勮鏂囷級杞寲涓簃indrecord鏍煎紡锛� ```shell -bash scripts/download_gldv2.sh 500 [DATASET_PATH] [NEED_ROMOVE_TAR] -# example: bash scripts/download_gldv2.sh 500 /home/gldv2 y +bash scripts/download_gldv2.sh 0 499 [DATASET_PATH] +# example: bash scripts/download_gldv2.sh 0 499 /home/gldv2 ``` -鈥� 璇峰噯澶囪嚦灏�1.1TB鐨勫瓨鍌ㄧ┖闂达紝鑻ョ┖闂翠笉瓒冲彲浠ュ皢鍙€夊弬鏁癭[NEED_ROMOVE_TAR]`璁剧疆涓篳y`锛岃繖鏍蜂細鍦ㄨВ鍘嬪畬tar鍖呭悗灏唗ar鍖呭垹闄わ紝璁剧疆鍚庢暟鎹泦鍗犵敤绾�633G瀛樺偍绌洪棿銆傚鏋滅┖闂翠粛鐒朵笉澶燂紝鍙互灏濊瘯鍦╜src/build_image_dataset.py`涓紝鎼滅储`os.remove`璇彞锛屽皢瀵瑰簲璇彞浠庢敞閲婁腑鎭㈠杩囨潵锛屽姝や竴鏉ワ紝鍦ㄨ浆鍖杕indrecord鏍煎紡鐨勮繃绋嬩腑锛屼細杈硅浆鍖栬竟灏嗘簮鍥惧儚鍒犻櫎锛屾洿鏀硅璁剧疆鍚庢暟鎹泦绾﹀崰鐢�450G瀛樺偍绌洪棿銆傝浆鍖栧畬鎴愬悗锛屽鏋滅┖闂寸揣缂猴紝閭d箞鍙互鐩存帴灏哷train`鐩綍鍒犻櫎锛屽墿涓媊mindrecord`鐩綍鏂囦欢绾﹀崰103G銆� - 鈥� 鐩綍浠ュ強璇存槑锛� ```shell diff --git a/research/cv/delf/scripts/download_gldv2.sh b/research/cv/delf/scripts/download_gldv2.sh old mode 100755 new mode 100644 index ba9a4c922b67245de5021f1af5cd1642c2446c38..4528410c7a542b802215a0ea31faba62f28fc63a --- a/research/cv/delf/scripts/download_gldv2.sh +++ b/research/cv/delf/scripts/download_gldv2.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2021 Huawei Technologies Co., Ltd +# Copyright 2022 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -16,234 +16,46 @@ # This script downloads the Google Landmarks v2 dataset TRAIN split. To download the dataset # run the script like in the following example: -# bash download_gldv2.sh 500 [DATASET_PATH] [NEED_ROMOVE_TAR] +# bash download_gldv2.sh 0 499 [DATASET_PATH] # # The script takes the following parameters, in order: # - number of image files from the TRAIN split to download (maximum 500) # - path for dataset -if [[ $# -lt 2 || $# -gt 3 ]] +if [[ $# -lt 3 ]] then - echo "Usage: bash download_gldv2.sh 500 [DATASET_PATH] [NEED_ROMOVE_TAR] - NEED_ROMOVE_TAR is optional, whether remove tar after extracting the images, choices: 'y' and 'n', default 'n' " + echo "Usage: bash download_gldv2.sh [BEGIN_IDX] [END_IDX] [DATASET_PATH]" exit 1 fi -need_remove_tar="n" -if [ $# == 3 ] -then - if [ "$3" == "y" ] || [ "$3" == "n" ];then - need_remove_tar=$3 - else - echo "weather need remove tar or not, it's value must be in [y, n]" - exit 1 - fi -fi - -image_files_train=$1 # Number of image files to download from the TRAIN split -dataset_root_folder=$2 - +begin_idx=$1 +end_idx=$2 +dataset_root_folder=$3 split="train" metadata_url="https://s3.amazonaws.com/google-landmark/metadata" -csv_train=("${metadata_url}/train.csv" "${metadata_url}/train_clean.csv" "${metadata_url}/train_attribution.csv" "${metadata_url}/train_label_to_category.csv") -export csv_train - images_tar_file_base_url="https://s3.amazonaws.com/google-landmark" images_md5_file_base_url="https://s3.amazonaws.com/google-landmark/md5sum" -num_processes=8 - -make_folder() { - # Creates a folder and checks if it exists. Exits if folder creation fails. - local folder=$1 - if [ -d "${folder}" ]; then - echo "Folder ${folder} already exists. Skipping folder creation." - else - echo "Creating folder ${folder}." - if mkdir -p ${folder}; then - echo "Successfully created folder ${folder}." - else - echo "Failed to create folder ${folder}. Exiting." - exit 1 - fi - fi -} - -download_file() { - # Downloads a file from an URL into a specified folder. - local file_url=$1 - local folder=$2 - # local file_path="${folder}/`basename ${file_url}`" - echo "Downloading file ${file_url} to folder ${folder}." - pushd . > /dev/null - cd ${folder} - curl -Os -C - --retry 10 ${file_url} - popd > /dev/null -} - -validate_md5_checksum() { - # Validate the MD5 checksum of a downloaded file. - local content_file=$1 - local md5_file=$2 - echo "Checking MD5 checksum of file ${content_file} against ${md5_file}" - if [[ "${OSTYPE}" == "linux-gnu" ]]; then - content_md5=`md5sum ${content_file}` - elif [[ "${OSTYPE}" == "darwin"* ]]; then - content_md5=`md5 -r "${content_file}"` - fi - content_md5=`cut -d' ' -f1<<<"${content_md5}"` - expected_md5=`cut -d' ' -f1<<<cat "${md5_file}"` - if [[ "$content_md5" != "" && "$content_md5" = "$expected_md5" ]]; then - echo "Check ${content_file} passed." - return 0 - else - echo "Check failed. MD5 checksums don't match. Exiting." - return 1 - fi -} - -extract_tar_file() { - # Extracts the content of a tar file to a specified folder. - local tar_file=$1 - local folder=$2 - echo "Extracting file ${tar_file} to folder ${folder}" - tar -C ${folder} -xf ${tar_file} - if [ $need_remove_tar == "y" ]; then - rm -rf ${tar_file} - fi -} - -download_image_file() { - # Downloads one image file of a split and untar it. - local split=$1 - local idx=`printf "%03g" $2` - local split_folder=$3 - - local images_md5_file=md5.images_${idx}.txt - local images_md5_file_url=${images_md5_file_base_url}/${split}/${images_md5_file} - local images_md5_file_path=${split_folder}/${images_md5_file} - - download_file "${images_md5_file_url}" "${split_folder}" - - local images_tar_file=images_${idx}.tar - local images_tar_file_url=${images_tar_file_base_url}/${split}/${images_tar_file} - local images_tar_file_path=${split_folder}/${images_tar_file} - - download_file "${images_tar_file_url}" "${split_folder}" - if validate_md5_checksum "${images_tar_file_path}" "${images_md5_file_path}" ; then - echo "${images_tar_file_path} error for wrong md5 file" - download_file "${images_md5_file_url}" "${split_folder}" - validate_md5_checksum "${images_tar_file_path}" "${images_md5_file_path}" - fi - #extract_tar_file "${images_tar_file_path}" "${split_folder}" - -} - -check_image_file() { - # Downloads one image file of a split and untar it. - local split=$1 - local idx=`printf "%03g" $2` - local split_folder=$3 - - local images_md5_file=md5.images_${idx}.txt - local images_md5_file_url=${images_md5_file_base_url}/${split}/${images_md5_file} - local images_md5_file_path=${split_folder}/${images_md5_file} - if ! [ -f "${images_md5_file_path}" ]; then - echo "${images_md5_file_path} not found!" - download_file "${images_md5_file_url}" "${split_folder}" - else - local filesize=`wc -c < "${images_md5_file_path}" ` - echo "md5file size is ${filesize}" - if [[ "${filesize}" -lt 40 ]]; then - echo "${images_md5_file_path} not complete" - download_file "${images_md5_file_url}" "${split_folder}" - fi - fi - - local images_tar_file=images_${idx}.tar - local images_tar_file_url=${images_tar_file_base_url}/${split}/${images_tar_file} - local images_tar_file_path=${split_folder}/${images_tar_file} - if ! [ -f "${images_tar_file_path}" ]; then - echo "${images_tar_file_path} not found!" - download_file "${images_tar_file_url}" "${split_folder}" - if validate_md5_checksum "${images_tar_file_path}" "${images_md5_file_path}" ; then - echo "${images_tar_file_path} error for wrong md5 file" - download_file "${images_md5_file_url}" "${split_folder}" - validate_md5_checksum "${images_tar_file_path}" "${images_md5_file_path}" - fi - - else - if ! validate_md5_checksum "${images_tar_file_path}" "${images_md5_file_path}" ; then - echo "${images_tar_file_path} not complete " - download_file "${images_tar_file_url}" "${split_folder}" - validate_md5_checksum "${images_tar_file_path}" "${images_md5_file_path}" - fi - fi - extract_tar_file "${images_tar_file_path}" "${split_folder}" -} - - -download_image_files() { - # Downloads all image files of a split and untars them. - local split=$1 - local split_folder=$2 - local max_idx=$(expr ${image_files_train} - 1) - echo "Downloading ${image_files_train} files form the split ${split} in the folder ${split_folder}." - for i in $(seq 0 ${num_processes} ${max_idx}); do - local curr_max_idx=$(expr ${i} + ${num_processes} - 1) - local last_idx=$((${curr_max_idx}>${max_idx}?${max_idx}:${curr_max_idx})) - for j in $(seq ${i} 1 ${last_idx}); do download_image_file "${split}" "${j}" "${split_folder}" & done - wait - done -} - -check_image_files() { - # Downloads all image files of a split and untars them. - local split=$1 - local split_folder=$2 - local max_idx=$(expr ${image_files_train} - 1) - echo "Downloading ${image_files_train} files form the split ${split} in the folder ${split_folder}." - for i in $(seq 0 1 ${max_idx}); do - local curr_max_idx=$(expr ${i} + 1 - 1) - local last_idx=$((${curr_max_idx}>${max_idx}?${max_idx}:${curr_max_idx})) - for j in $(seq ${i} 1 ${last_idx}); do check_image_file "${split}" "${j}" "${split_folder}" & done - wait - done -} - -download_csv_files() { - # Downloads all medatada CSV files of a split. - local split=$1 - local split_folder=$2 - local csv_list="csv_${split}[*]" - for csv_file in ${!csv_list}; do - download_file "${csv_file}" "${split_folder}" - done -} - -download_split() { - # Downloads all artifacts, metadata CSV files and image files of a single split. - local split=$1 - local split_folder=${dataset_root_folder}/${split} - make_folder "${split_folder}" - download_csv_files "${split}" "${split_folder}" - download_image_files "${split}" "${split_folder}" - check_image_files "${split}" "${split_folder}" -} - -download_all_splits() { - # Downloads all artifacts, metadata CSV files and image files of all splits. - make_folder "${dataset_root_folder}" - download_split "${split}" -} - -download_all_splits -python3 src/build_image_dataset.py \ - --train_csv_path=${dataset_root_folder}/train/train.csv \ - --train_clean_csv_path=${dataset_root_folder}/train/train_clean.csv \ - --train_directory=${dataset_root_folder}/train/*/*/*/ \ - --output_directory=${dataset_root_folder}/mindrecord/ \ - --num_shards=128 \ - --validation_split_size=0.2 - -exit 0 +mkdir -p ${dataset_root_folder}/${split} + +# if csv files have downloaded success, please comment next 7 lines. +csv_train="train.csv train_clean.csv train_attribution.csv train_label_to_category.csv" +for file_name in ${csv_train}; do + echo "filename $file_name" + file_url=${metadata_url}/${file_name} + echo "Download $file_url to ${dataset_root_folder}/${split}/${file_name} ..." + wget ${file_url} -t 10 -O ${dataset_root_folder}/${split}/${file_name} +done + +for i in $(seq ${begin_idx} 1 ${end_idx}); do + idx=`printf "%03g" $i` + images_md5_file=md5.images_${idx}.txt + images_tar_file=images_${idx}.tar + images_tar_file_url=${images_tar_file_base_url}/${split}/${images_tar_file} + images_md5_file_url=${images_md5_file_base_url}/${split}/${images_md5_file} + + echo "Download ${images_tar_file_url} to ${dataset_root_folder}/${split}/${images_tar_file} ..." + wget ${images_tar_file_url} -t 10 -O ${dataset_root_folder}/${split}/${images_tar_file} + echo "Download ${images_md5_file} to ${dataset_root_folder}/${split}/${images_md5_file} ..." + wget ${images_md5_file_url} -t 10 -O ${dataset_root_folder}/${split}/${images_md5_file} +done \ No newline at end of file