diff --git a/.ci/benchmark.py b/.ci/benchmark.py index ccd0268..f6c9400 100644 --- a/.ci/benchmark.py +++ b/.ci/benchmark.py @@ -1,15 +1,64 @@ +import argparse import os import sys from src.benchmark.utils import read_metrics, to_markdown_table -if __name__ == "__main__": - # Generate statistics report - statistics_path = sys.argv[1] - metrics = read_metrics(statistics_path, metric="accuracy") + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--path", type=str, required=True, help="Report path.") + parser.add_argument("--write-gh-job-summary", action="store_true", help="Write to GitHub job summary.") + parser.add_argument("--update-readme", action="store_true", help="Update statistics report in README.md.") + return parser.parse_args() + + +def generate_report(path: str): + metrics = read_metrics(path, metric="accuracy") html_table = to_markdown_table(metrics) + return html_table - # Write to workflow job summary + +def write_job_summary(report): summary_path = os.environ["GITHUB_STEP_SUMMARY"] with open(summary_path, "a") as f: f.write("## Torchbenchmark statistics report\n") - f.write(html_table) + f.write(report) + + +def update_readme(report): + project_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + readme_path = os.path.join(project_path, "README.md") + print(readme_path) + with open(readme_path, "r") as f: + readme_content = f.read() + + start_marker = "" + end_marker = "" + start_index = readme_content.find(start_marker) + end_index = readme_content.find(end_marker) + assert start_index != -1 + assert end_index != -1 + + start_index += len(start_marker) + new_readme_content = ( + readme_content[:start_index] + "\n\n" + + report + "\n\n" + + readme_content[end_index:] + ) + with open(readme_path, "w") as f: + f.write(new_readme_content) + + +if __name__ == "__main__": + args = parse_args() + + # Generate statistics report + report = generate_report(args.path) + + # Write to workflow job summary + if args.write_gh_job_summary: + write_job_summary(report) + + # Update README.md + if args.update_readme: + update_readme(report) diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..8b68555 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,10 @@ +version: 2 +updates: + - package-ecosystem: "github-actions" + directory: "/" + schedule: + # Check for updates to GitHub Actions every week + interval: "weekly" + open-pull-requests-limit: 2 + reviewers: + - "shink" diff --git a/.github/workflows/_ascend_npu_benchmark.yml b/.github/workflows/_ascend_npu_benchmark.yml index 0449354..6592cd4 100644 --- a/.github/workflows/_ascend_npu_benchmark.yml +++ b/.github/workflows/_ascend_npu_benchmark.yml @@ -100,6 +100,8 @@ jobs: run: | pip install -r benchmark/requirements.txt --constraint ascend_npu/requirements.txt "numpy==1.*" python benchmark/install.py --userbenchmark test_bench --continue_on_fail + env: + HF_ENDPOINT: https://hf-mirror.com - name: Install project dependencies run: | @@ -130,19 +132,26 @@ jobs: overwrite: true - name: Write to workflow job summary - id: report run: | - set -x - realpath benchmark/ascend_npu_benchmark.json - ls benchmark - cat benchmark/ascend_npu_benchmark.json + python .ci/benchmark.py --write-gh-job-summary --path benchmark/ascend_npu_benchmark.json - output_path=$(realpath benchmark/ascend_npu_benchmark.json) - python .ci/benchmark.py ${output_path} - - # TODO(shink) - name: Update README.md - if: ${{ github.event_name == 'push' }} + if: ${{ github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }} + id: update-readme run: | - echo "${{ github.event_name }}" - echo "${{ github.event_name == 'push' }}" + python .ci/benchmark.py --update-readme --path benchmark/ascend_npu_benchmark.json + if git diff --quiet README.md; then + echo "changed=false" >> $GITHUB_OUTPUT + else + echo "changed=true" >> $GITHUB_OUTPUT + fi + + - name: Create a pull request for changes to README.md + if: ${{ steps.update-readme.outputs.changed == 'true' }} + uses: peter-evans/create-pull-request@v7 + with: + add-paths: README.md + branch: ascend-npu/benchmark + title: "[Ascend NPU] Update torchbenchmark report in README.md" + commit-message: "Update README.md" + reviewers: shink diff --git a/.github/workflows/_ascend_npu_test.yml b/.github/workflows/_ascend_npu_ut.yml similarity index 99% rename from .github/workflows/_ascend_npu_test.yml rename to .github/workflows/_ascend_npu_ut.yml index fcfcac8..13b1e1b 100644 --- a/.github/workflows/_ascend_npu_test.yml +++ b/.github/workflows/_ascend_npu_ut.yml @@ -1,4 +1,4 @@ -name: '_ascend_npu_test' +name: '_ascend_npu_ut' on: workflow_call: diff --git a/.github/workflows/ascend_npu_test.yml b/.github/workflows/ascend_npu_test.yml index 5d60ee9..abeb818 100644 --- a/.github/workflows/ascend_npu_test.yml +++ b/.github/workflows/ascend_npu_test.yml @@ -4,18 +4,34 @@ on: push: branches: - 'main' - + paths: + - '.github/workflows/ascend_npu_test.yml' + - '.github/workflows/_ascend_npu_build.yml' + - '.github/workflows/_ascend_npu_ut.yml' + - '.github/workflows/_ascend_npu_benchmark.yml' + - '.github/actions/**' + - '.ci/**' + - 'ascend_npu/**' + - 'src/**' + - '!**/*.md' pull_request: branches: - 'main' - + paths: + - '.github/workflows/ascend_npu_test.yml' + - '.github/workflows/_ascend_npu_build.yml' + - '.github/workflows/_ascend_npu_ut.yml' + - '.github/workflows/_ascend_npu_benchmark.yml' + - '.github/actions/**' + - '.ci/**' + - 'ascend_npu/**' + - 'src/**' + - '!**/*.md' release: types: - 'published' - schedule: - cron: '0 12 * * *' - workflow_dispatch: inputs: runner: @@ -87,7 +103,7 @@ jobs: needs: - prepare - build - uses: ./.github/workflows/_ascend_npu_test.yml + uses: ./.github/workflows/_ascend_npu_ut.yml with: runner: ${{ needs.prepare.outputs.runner }} image: ${{ needs.prepare.outputs.image }} diff --git a/README.md b/README.md index f30065e..6699136 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,11 @@ across various devices by running comprehensive GitHub workflows. ## Accelerator Integration Test Results - +
+ +Torchbenchmark statistics report + + | | [torch_npu][1] | |---------------------------------|----------------| @@ -121,7 +125,9 @@ across various devices by running comprehensive GitHub workflows. [3]: https://github.com/cosdt/pytorch-integration-tests/actions/workflows/ascend_npu_test.yml - + + +
## Overview diff --git a/ascend_npu/matadata.yml b/ascend_npu/matadata.yml new file mode 100644 index 0000000..c5e2cca --- /dev/null +++ b/ascend_npu/matadata.yml @@ -0,0 +1,10 @@ +device: "npu" +backend_extension: "torch_npu" +link: https://github.com/Ascend/pytorch +torchbenchmark: + test: + - train + - eval + models: + skip: + - llava diff --git a/ascend_npu/metadata.json b/ascend_npu/metadata.json deleted file mode 100644 index f41a975..0000000 --- a/ascend_npu/metadata.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "device": "npu", - "test": [ - "train", - "eval" - ], - "models": [ - "BERT_pytorch", - "hf_GPT2" - ] -} \ No newline at end of file diff --git a/src/benchmark/utils.py b/src/benchmark/utils.py index b50968a..983823f 100644 --- a/src/benchmark/utils.py +++ b/src/benchmark/utils.py @@ -25,13 +25,13 @@ class TorchBenchModelMetric: def read_json(path: str) -> dict: - with open(path, 'r') as f: + with open(path, "r") as f: data = json.load(f) return data def save_file(path: str, data) -> None: - with open(path, 'w') as file: + with open(path, "w") as file: file.write(data) @@ -54,7 +54,7 @@ def parse_to_dict(config_str: str): def read_metrics(path: str, *, metric=None) -> List[TorchBenchModelMetric]: output = read_json(path) - metrics_data = output.get('metrics', {}) + metrics_data = output.get("metrics", {}) metrics = [] for metric_key, metric_value in metrics_data.items(): @@ -75,10 +75,11 @@ def read_metrics(path: str, *, metric=None) -> List[TorchBenchModelMetric]: def generate_table_rows(metrics: List[TorchBenchModelMetric]): - models = list({metric.key.name for metric in metrics}) devices = list({metric.key.device for metric in metrics}) + models = list({metric.key.name for metric in metrics}) + models = sorted(models, key=lambda x: x.lower()) - def filter_result(metrics: List[TorchBenchModelMetric], *, model, device): + def filter_metric(metrics: List[TorchBenchModelMetric], *, model, device): for metric in metrics: if metric.key.name == model and metric.key.device == device: return metric @@ -87,10 +88,14 @@ def filter_result(metrics: List[TorchBenchModelMetric], *, model, device): for model in models: row = [model] for device in devices: - metric = filter_result(metrics, model=model, device=device) + metric = filter_metric(metrics, model=model, device=device) if metric is not None: - is_pass = metric.value == "pass" - cell = "✅" if is_pass else "❌" + if metric.value == "pass": + cell = "✅" + elif metric.value == "skip": + cell = "⚠️" + else: + cell = "❌" else: cell = "" row.append(cell) diff --git a/test/benchmark/ascend_npu_benchmark.json b/test/benchmark/ascend_npu_benchmark.json index 2316f39..9f0c413 100644 --- a/test/benchmark/ascend_npu_benchmark.json +++ b/test/benchmark/ascend_npu_benchmark.json @@ -1,11 +1,112 @@ { "name": "test_bench", "environ": { - "pytorch_git_version": "dd2e6d61409aac22198ec771560a38adb0018ba2", - "pytorch_version": "2.6.0.dev20241120" + "pytorch_git_version": "81d824d17afb18b3c696f3e5a4799a246d29cfde", + "pytorch_version": "2.6.0.dev20241031+cpu" }, "metrics": { + "model=pytorch_CycleGAN_and_pix2pix, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=hf_Whisper, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=dcgan, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=basic_gnn_edgecnn, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=functorch_maml_omniglot, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=hf_Reformer, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "eager_two_runs_differ", + "model=detectron2_fasterrcnn_r_101_fpn, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "eager_1st_run_fail", + "model=pytorch_stargan, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=hf_T5_large, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=detectron2_fasterrcnn_r_50_fpn, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "eager_1st_run_fail", + "model=hf_Bert_large, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=detectron2_fasterrcnn_r_50_c4, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "eager_1st_run_fail", + "model=pyhpc_isoneutral_mixing, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=hf_T5, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=hf_Bart, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=detectron2_fasterrcnn_r_50_dc5, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "eager_1st_run_fail", "model=BERT_pytorch, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", - "model=hf_GPT2, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass" + "model=hf_GPT2, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=moco, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "not_implemented", + "model=phlippe_densenet, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=llava, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "NPU out of memory. Tried to allocate 174.00 MiB (NPU 0; 29.50 GiB total capacity; 28.26 GiB already allocated; 28.26 GiB current active; 34.06 MiB free; 29.10 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.", + "model=squeezenet1_1, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=functorch_dp_cifar10, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=Background_Matting, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=hf_Bert, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=doctr_reco_predictor, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=torch_multimodal_clip, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=drq, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=LearningToPaint, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=detectron2_maskrcnn_r_50_c4, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "eager_1st_run_fail", + "model=detectron2_maskrcnn_r_50_fpn, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "eager_1st_run_fail", + "model=tacotron2, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "eager_two_runs_differ", + "model=mobilenet_v2_quantized_qat, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "not_implemented", + "model=hf_GPT2_large, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=detectron2_maskrcnn, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "eager_1st_run_fail", + "model=timm_regnet, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=Super_SloMo, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=lennard_jones, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=phlippe_resnet, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=llama, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=hf_distil_whisper, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=resnet50_quantized_qat, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "not_implemented", + "model=fastNLP_Bert, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=timm_vision_transformer_large, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=demucs, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=pyhpc_equation_of_state, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=hf_T5_base, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=hf_BigBird, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=hf_T5_generate, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=detectron2_maskrcnn_r_101_fpn, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "eager_1st_run_fail", + "model=hf_Longformer, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=sam, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=nanogpt, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=densenet121, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=tts_angular, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=vision_maskrcnn, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "module 'torch' has no attribute '_six'", + "model=basic_gnn_gin, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=hf_DistilBert, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=detectron2_fasterrcnn_r_101_dc5, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "eager_1st_run_fail", + "model=cm3leon_generate, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=dlrm, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=basic_gnn_sage, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=nvidia_deeprecommender, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "'DeepRecommenderInferenceBenchmark' object has no attribute 'rencoder'", + "model=sam_fast, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "No module named 'segment_anything_fast'", + "model=simple_gpt, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "not_implemented", + "model=detectron2_fcos_r_50_fpn, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "eager_1st_run_fail", + "model=hf_Albert, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=maml, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=hf_clip, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=opacus_cifar10, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=speech_transformer, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=yolov3, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=mobilenet_v3_large, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=timm_resnest, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=timm_vision_transformer, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=timm_efficientdet, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "\nobject has no attribute nms:\n File \"/usr/local/python3.10/lib/python3.10/site-packages/torchvision/ops/boxes.py\", line 41\n _log_api_usage_once(nms)\n _assert_has_ops()\n return torch.ops.torchvision.nms(boxes, scores, iou_threshold)\n ~~~~~~~~~~~~~~~~~~~~~~~~~ <--- HERE\n'nms' is being compiled since it was called from '_batched_nms_vanilla'\n File \"/usr/local/python3.10/lib/python3.10/site-packages/torchvision/ops/boxes.py\", line 109\n for class_id in torch.unique(idxs):\n curr_indices = torch.where(idxs == class_id)[0]\n curr_keep_indices = nms(boxes[curr_indices], scores[curr_indices], iou_threshold)\n ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ <--- HERE\n keep_mask[curr_indices[curr_keep_indices]] = True\n keep_indices = torch.where(keep_mask)[0]\n'_batched_nms_vanilla' is being compiled since it was called from 'batched_nms'\n File \"/usr/local/python3.10/lib/python3.10/site-packages/torchvision/ops/boxes.py\", line 73\n # https://github.com/pytorch/vision/issues/1311#issuecomment-781329339\n if boxes.numel() > (4000 if boxes.device.type == \"cpu\" else 20000) and not torchvision._is_tracing():\n return _batched_nms_vanilla(boxes, scores, idxs, iou_threshold)\n ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ <--- HERE\n else:\n return _batched_nms_coordinate_trick(boxes, scores, idxs, iou_threshold)\n'batched_nms' is being compiled since it was called from 'generate_detections'\n File \"/usr/local/python3.10/lib/python3.10/site-packages/effdet/anchors.py\", line 140\n scores[top_detection_idx] = soft_scores\n else:\n top_detection_idx = batched_nms(boxes, scores, classes, iou_threshold=0.5)\n ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ <--- HERE\n\n # keep only top max_det_per_image scoring predictions\n'generate_detections' is being compiled since it was called from '_batch_detection'\n File \"/usr/local/python3.10/lib/python3.10/site-packages/effdet/bench.py\", line 82\n img_scale_i = None if img_scale is None else img_scale[i]\n img_size_i = None if img_size is None else img_size[i]\n detections = generate_detections(\n ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n class_out[i],\n ~~~~~~~~~~~~~\n box_out[i],\n ~~~~~~~~~~~\n anchor_boxes,\n ~~~~~~~~~~~~~\n indices[i],\n ~~~~~~~~~~~\n classes[i],\n ~~~~~~~~~~~\n img_scale_i,\n ~~~~~~~~~~~~\n img_size_i,\n ~~~~~~~~~~~\n max_det_per_image=max_det_per_image,\n ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n soft_nms=soft_nms,\n ~~~~~~~~~~~~~~~~~ <--- HERE\n )\n batch_detections.append(detections)\n", + "model=timm_vovnet, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=vgg16, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=moondream, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "eager_2nd_run_fail", + "model=resnet18, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=basic_gnn_gcn, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=detectron2_fasterrcnn_r_101_c4, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "eager_1st_run_fail", + "model=shufflenet_v2_x1_0, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=timm_efficientnet, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=soft_actor_critic, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=alexnet, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=mobilenet_v2, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=detectron2_maskrcnn_r_101_c4, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "eager_1st_run_fail", + "model=stable_diffusion_text_encoder, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "not_implemented", + "model=timm_nfnet, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=simple_gpt_tp_manual, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "not_implemented", + "model=resnet152, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=pyhpc_turbulent_kinetic_energy, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=maml_omniglot, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=stable_diffusion_unet, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "not_implemented", + "model=pytorch_unet, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=doctr_det_predictor, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=resnet50, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=mnasnet1_0, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=llama_v2_7b_16h, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "not_implemented", + "model=microbench_unbacked_tolist_sum, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=resnext50_32x4d, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass", + "model=hf_Roberta_base, test=eval, device=npu, bs=None, extra_args=['--accuracy'], metric=accuracy": "pass" } } \ No newline at end of file diff --git a/test/benchmark/test_ascend_npu.py b/test/benchmark/test_ascend_npu.py index f78e48d..57c4b9e 100644 --- a/test/benchmark/test_ascend_npu.py +++ b/test/benchmark/test_ascend_npu.py @@ -7,10 +7,9 @@ class TestBenchmark(unittest.TestCase): def test_read_metrics(self): metrics = utils.read_metrics(path, metric="accuracy") - self.assertTrue(len(metrics) == 2) + self.assertTrue(len(metrics) == 103) for metric in metrics: self.assertEqual(metric.key.device, "npu") - self.assertEqual(metric.value, "pass") def test_to_markdown_table(self): metrics = utils.read_metrics(path, metric="accuracy")