|
楼主 |
发表于 2024-11-2 13:15:01
|
显示全部楼层
我试图使用kaggle提供的baseline,#模型参数设置
from transformers import AutoTokenizer, AutoConfig
class Config:
debug = False
num_workers = 4
llm_backbone = "/kaggle/input/llm-detect-deberta-xlarge/pytorch/fold4/1/microsoft-deberta-xlarge_fold4_best.pth"
tokenizer_path = '/kaggle/input/llm-detect-deberta-xlarge/pytorch/fold0/2/microsoft-deberta-xlarge_fold0_best.pth'
tokenizer = AutoTokenizer.from_pretrained(
tokenizer_path, use_fast=True, trust_remote_code=True,
)
batch_size = 8
max_len = 512
seed = 42
num_labels = 1
gradient_checkpointing = False
CFG = Config()
报错提示如下---------------------------------------------------------------------------
HFValidationError Traceback (most recent call last)
File /opt/conda/lib/python3.10/site-packages/transformers/utils/hub.py:403, in cached_file(path_or_repo_id, filename, cache_dir, force_download, resume_download, proxies, token, revision, local_files_only, subfolder, repo_type, user_agent, _raise_exceptions_for_gated_repo, _raise_exceptions_for_missing_entries, _raise_exceptions_for_connection_errors, _commit_hash, **deprecated_kwargs)
401 try:
402 # Load from URL or cache if already cached
--> 403 resolved_file = hf_hub_download(
404 path_or_repo_id,
405 filename,
406 subfolder=None if len(subfolder) == 0 else subfolder,
407 repo_type=repo_type,
408 revision=revision,
409 cache_dir=cache_dir,
410 user_agent=user_agent,
411 force_download=force_download,
412 proxies=proxies,
413 resume_download=resume_download,
414 token=token,
415 local_files_only=local_files_only,
416 )
417 except GatedRepoError as e:
File /opt/conda/lib/python3.10/site-packages/huggingface_hub/utils/_deprecation.py:101, in _deprecate_arguments.<locals>._inner_deprecate_positional_args.<locals>.inner_f(*args, **kwargs)
100 warnings.warn(message, FutureWarning)
--> 101 return f(*args, **kwargs)
File /opt/conda/lib/python3.10/site-packages/huggingface_hub/utils/_validators.py:106, in validate_hf_hub_args.<locals>._inner_fn(*args, **kwargs)
105 if arg_name in ["repo_id", "from_id", "to_id"]:
--> 106 validate_repo_id(arg_value)
108 elif arg_name == "token" and arg_value is not None:
File /opt/conda/lib/python3.10/site-packages/huggingface_hub/utils/_validators.py:154, in validate_repo_id(repo_id)
153 if repo_id.count("/") > 1:
--> 154 raise HFValidationError(
155 "Repo id must be in the form 'repo_name' or 'namespace/repo_name':"
156 f" '{repo_id}'. Use `repo_type` argument if needed."
157 )
159 if not REPO_ID_REGEX.match(repo_id):
HFValidationError: Repo id must be in the form 'repo_name' or 'namespace/repo_name': '/kaggle/input/llm-detect-deberta-xlarge/pytorch/fold0/2/microsoft-deberta-xlarge_fold0_best.pth'. Use `repo_type` argument if needed.
The above exception was the direct cause of the following exception:
OSError Traceback (most recent call last)
Cell In[39], line 3
1 #模型参数设置
2 from transformers import AutoTokenizer, AutoConfig
----> 3 class Config:
4 debug = False
5 num_workers = 4
Cell In[39], line 8, in Config()
6 llm_backbone = "/kaggle/input/llm-detect-deberta-xlarge/pytorch/fold4/1/microsoft-deberta-xlarge_fold4_best.pth"
7 tokenizer_path = '/kaggle/input/llm-detect-deberta-xlarge/pytorch/fold0/2/microsoft-deberta-xlarge_fold0_best.pth'
----> 8 tokenizer = AutoTokenizer.from_pretrained(
9 tokenizer_path, use_fast=True, trust_remote_code=True,
10 )
11 batch_size = 8
12 max_len = 512
File /opt/conda/lib/python3.10/site-packages/transformers/models/auto/tokenization_auto.py:844, in AutoTokenizer.from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs)
841 return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
843 # Next, let's try to use the tokenizer_config file to get the tokenizer class.
--> 844 tokenizer_config = get_tokenizer_config(pretrained_model_name_or_path, **kwargs)
845 if "_commit_hash" in tokenizer_config:
846 kwargs["_commit_hash"] = tokenizer_config["_commit_hash"]
File /opt/conda/lib/python3.10/site-packages/transformers/models/auto/tokenization_auto.py:676, in get_tokenizer_config(pretrained_model_name_or_path, cache_dir, force_download, resume_download, proxies, token, revision, local_files_only, subfolder, **kwargs)
673 token = use_auth_token
675 commit_hash = kwargs.get("_commit_hash", None)
--> 676 resolved_config_file = cached_file(
677 pretrained_model_name_or_path,
678 TOKENIZER_CONFIG_FILE,
679 cache_dir=cache_dir,
680 force_download=force_download,
681 resume_download=resume_download,
682 proxies=proxies,
683 token=token,
684 revision=revision,
685 local_files_only=local_files_only,
686 subfolder=subfolder,
687 _raise_exceptions_for_gated_repo=False,
688 _raise_exceptions_for_missing_entries=False,
689 _raise_exceptions_for_connection_errors=False,
690 _commit_hash=commit_hash,
691 )
692 if resolved_config_file is None:
693 logger.info("Could not locate the tokenizer configuration file, will try to use the model config instead.")
File /opt/conda/lib/python3.10/site-packages/transformers/utils/hub.py:469, in cached_file(path_or_repo_id, filename, cache_dir, force_download, resume_download, proxies, token, revision, local_files_only, subfolder, repo_type, user_agent, _raise_exceptions_for_gated_repo, _raise_exceptions_for_missing_entries, _raise_exceptions_for_connection_errors, _commit_hash, **deprecated_kwargs)
467 raise EnvironmentError(f"There was a specific connection error when trying to load {path_or_repo_id}:\n{err}")
468 except HFValidationError as e:
--> 469 raise EnvironmentError(
470 f"Incorrect path_or_model_id: '{path_or_repo_id}'. Please provide either the path to a local folder or the repo_id of a model on the Hub."
471 ) from e
472 return resolved_file
OSError: Incorrect path_or_model_id: '/kaggle/input/llm-detect-deberta-xlarge/pytorch/fold0/2/microsoft-deberta-xlarge_fold0_best.pth'. Please provide either the path to a local folder or the repo_id of a model on the Hub.
|
|