|

楼主 |
发表于 2024-11-2 13:15:01
|
显示全部楼层
我试图使用kaggle提供的baseline,- #模型参数设置
- from transformers import AutoTokenizer, AutoConfig
- class Config:
- debug = False
- num_workers = 4
- llm_backbone = "/kaggle/input/llm-detect-deberta-xlarge/pytorch/fold4/1/microsoft-deberta-xlarge_fold4_best.pth"
- tokenizer_path = '/kaggle/input/llm-detect-deberta-xlarge/pytorch/fold0/2/microsoft-deberta-xlarge_fold0_best.pth'
- tokenizer = AutoTokenizer.from_pretrained(
- tokenizer_path, use_fast=True, trust_remote_code=True,
- )
- batch_size = 8
- max_len = 512
- seed = 42
- num_labels = 1
- gradient_checkpointing = False
- CFG = Config()
复制代码
报错提示如下- ---------------------------------------------------------------------------
- HFValidationError Traceback (most recent call last)
- File /opt/conda/lib/python3.10/site-packages/transformers/utils/hub.py:403, in cached_file(path_or_repo_id, filename, cache_dir, force_download, resume_download, proxies, token, revision, local_files_only, subfolder, repo_type, user_agent, _raise_exceptions_for_gated_repo, _raise_exceptions_for_missing_entries, _raise_exceptions_for_connection_errors, _commit_hash, **deprecated_kwargs)
- 401 try:
- 402 # Load from URL or cache if already cached
- --> 403 resolved_file = hf_hub_download(
- 404 path_or_repo_id,
- 405 filename,
- 406 subfolder=None if len(subfolder) == 0 else subfolder,
- 407 repo_type=repo_type,
- 408 revision=revision,
- 409 cache_dir=cache_dir,
- 410 user_agent=user_agent,
- 411 force_download=force_download,
- 412 proxies=proxies,
- 413 resume_download=resume_download,
- 414 token=token,
- 415 local_files_only=local_files_only,
- 416 )
- 417 except GatedRepoError as e:
- File /opt/conda/lib/python3.10/site-packages/huggingface_hub/utils/_deprecation.py:101, in _deprecate_arguments.<locals>._inner_deprecate_positional_args.<locals>.inner_f(*args, **kwargs)
- 100 warnings.warn(message, FutureWarning)
- --> 101 return f(*args, **kwargs)
- File /opt/conda/lib/python3.10/site-packages/huggingface_hub/utils/_validators.py:106, in validate_hf_hub_args.<locals>._inner_fn(*args, **kwargs)
- 105 if arg_name in ["repo_id", "from_id", "to_id"]:
- --> 106 validate_repo_id(arg_value)
- 108 elif arg_name == "token" and arg_value is not None:
- File /opt/conda/lib/python3.10/site-packages/huggingface_hub/utils/_validators.py:154, in validate_repo_id(repo_id)
- 153 if repo_id.count("/") > 1:
- --> 154 raise HFValidationError(
- 155 "Repo id must be in the form 'repo_name' or 'namespace/repo_name':"
- 156 f" '{repo_id}'. Use `repo_type` argument if needed."
- 157 )
- 159 if not REPO_ID_REGEX.match(repo_id):
- HFValidationError: Repo id must be in the form 'repo_name' or 'namespace/repo_name': '/kaggle/input/llm-detect-deberta-xlarge/pytorch/fold0/2/microsoft-deberta-xlarge_fold0_best.pth'. Use `repo_type` argument if needed.
- The above exception was the direct cause of the following exception:
- OSError Traceback (most recent call last)
- Cell In[39], line 3
- 1 #模型参数设置
- 2 from transformers import AutoTokenizer, AutoConfig
- ----> 3 class Config:
- 4 debug = False
- 5 num_workers = 4
- Cell In[39], line 8, in Config()
- 6 llm_backbone = "/kaggle/input/llm-detect-deberta-xlarge/pytorch/fold4/1/microsoft-deberta-xlarge_fold4_best.pth"
- 7 tokenizer_path = '/kaggle/input/llm-detect-deberta-xlarge/pytorch/fold0/2/microsoft-deberta-xlarge_fold0_best.pth'
- ----> 8 tokenizer = AutoTokenizer.from_pretrained(
- 9 tokenizer_path, use_fast=True, trust_remote_code=True,
- 10 )
- 11 batch_size = 8
- 12 max_len = 512
- File /opt/conda/lib/python3.10/site-packages/transformers/models/auto/tokenization_auto.py:844, in AutoTokenizer.from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs)
- 841 return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
- 843 # Next, let's try to use the tokenizer_config file to get the tokenizer class.
- --> 844 tokenizer_config = get_tokenizer_config(pretrained_model_name_or_path, **kwargs)
- 845 if "_commit_hash" in tokenizer_config:
- 846 kwargs["_commit_hash"] = tokenizer_config["_commit_hash"]
- File /opt/conda/lib/python3.10/site-packages/transformers/models/auto/tokenization_auto.py:676, in get_tokenizer_config(pretrained_model_name_or_path, cache_dir, force_download, resume_download, proxies, token, revision, local_files_only, subfolder, **kwargs)
- 673 token = use_auth_token
- 675 commit_hash = kwargs.get("_commit_hash", None)
- --> 676 resolved_config_file = cached_file(
- 677 pretrained_model_name_or_path,
- 678 TOKENIZER_CONFIG_FILE,
- 679 cache_dir=cache_dir,
- 680 force_download=force_download,
- 681 resume_download=resume_download,
- 682 proxies=proxies,
- 683 token=token,
- 684 revision=revision,
- 685 local_files_only=local_files_only,
- 686 subfolder=subfolder,
- 687 _raise_exceptions_for_gated_repo=False,
- 688 _raise_exceptions_for_missing_entries=False,
- 689 _raise_exceptions_for_connection_errors=False,
- 690 _commit_hash=commit_hash,
- 691 )
- 692 if resolved_config_file is None:
- 693 logger.info("Could not locate the tokenizer configuration file, will try to use the model config instead.")
- File /opt/conda/lib/python3.10/site-packages/transformers/utils/hub.py:469, in cached_file(path_or_repo_id, filename, cache_dir, force_download, resume_download, proxies, token, revision, local_files_only, subfolder, repo_type, user_agent, _raise_exceptions_for_gated_repo, _raise_exceptions_for_missing_entries, _raise_exceptions_for_connection_errors, _commit_hash, **deprecated_kwargs)
- 467 raise EnvironmentError(f"There was a specific connection error when trying to load {path_or_repo_id}:\n{err}")
- 468 except HFValidationError as e:
- --> 469 raise EnvironmentError(
- 470 f"Incorrect path_or_model_id: '{path_or_repo_id}'. Please provide either the path to a local folder or the repo_id of a model on the Hub."
- 471 ) from e
- 472 return resolved_file
- OSError: Incorrect path_or_model_id: '/kaggle/input/llm-detect-deberta-xlarge/pytorch/fold0/2/microsoft-deberta-xlarge_fold0_best.pth'. Please provide either the path to a local folder or the repo_id of a model on the Hub.
复制代码 |
|