languagemodels.models
1import re 2from huggingface_hub import hf_hub_download, snapshot_download 3from tokenizers import Tokenizer 4import ctranslate2 5 6from languagemodels.config import config, models 7 8 9modelcache = {} 10 11 12class ModelException(Exception): 13 pass 14 15 16def get_model_info(model_type="instruct"): 17 """Gets info about the current model in use 18 19 >>> get_model_info('instruct') 20 {'name': 'LaMini-Flan-T5-248M', 'tuning': 'instruct'... 21 """ 22 model_name = config[f"{model_type}_model"] 23 24 m = [m for m in models if m["name"] == model_name][0] 25 26 param_bits = int(re.search(r"\d+", m["quantization"]).group(0)) 27 28 m["size_gb"] = m["params"] * param_bits / 8 / 1e9 29 if "/" in m["name"]: 30 m["path"] = m["name"] 31 else: 32 m["path"] = f"jncraton/{m['name']}-{m['backend']}-{m['quantization']}" 33 34 return m 35 36 37def initialize_tokenizer(model_type, model_name): 38 model_info = get_model_info(model_type) 39 rev = model_info.get("revision", None) 40 41 tok_config = hf_hub_download( 42 model_info["path"], "tokenizer.json", revision=rev, local_files_only=True 43 ) 44 tokenizer = Tokenizer.from_file(tok_config) 45 46 if model_type == "embedding": 47 tokenizer.no_padding() 48 tokenizer.no_truncation() 49 50 return tokenizer 51 52 53def initialize_model(model_type, model_name, tokenizer_only=False): 54 model_info = get_model_info(model_type) 55 56 allowed = ["*.bin", "*.txt", "*.json"] 57 rev = model_info.get("revision", None) 58 59 # snapshot_download checks for updates by default 60 # This can cause significant lag in offline usecases or high latency networks 61 # To avoid this penalty, we try to use the local cache first. 62 # If the files are not available, then we attempt a download 63 try: 64 path = snapshot_download( 65 model_info["path"], 66 max_workers=1, 67 allow_patterns=allowed, 68 revision=rev, 69 local_files_only=True, 70 ) 71 except FileNotFoundError: 72 path = snapshot_download( 73 model_info["path"], max_workers=1, allow_patterns=allowed, revision=rev 74 ) 75 76 if tokenizer_only: 77 return None 78 79 if model_info["architecture"] == "encoder-only-transformer": 80 return ctranslate2.Encoder( 81 path, 82 "cpu", 83 compute_type="int8", 84 ) 85 elif model_info["architecture"] == "decoder-only-transformer": 86 return ctranslate2.Generator(path, config["device"], compute_type="int8") 87 else: 88 return ctranslate2.Translator(path, config["device"], compute_type="int8") 89 90 91def get_model(model_type, tokenizer_only=False): 92 """Gets a model from the loaded model cache 93 94 If tokenizer_only, the model itself will not be (re)loaded 95 96 >>> tokenizer, model = get_model("instruct") 97 >>> type(tokenizer) 98 <class 'tokenizers.Tokenizer'> 99 100 >>> type(model) 101 <class 'ctranslate2._ext.Translator'> 102 103 >>> tokenizer, model = get_model("embedding") 104 >>> type(tokenizer) 105 <class 'tokenizers.Tokenizer'> 106 107 >>> type(model) 108 <class 'ctranslate2._ext.Encoder'> 109 """ 110 111 model_name = config[f"{model_type}_model"] 112 113 if config["max_ram"] < 4 and not tokenizer_only: 114 for model in modelcache: 115 if model != model_name: 116 try: 117 modelcache[model][1].unload_model() 118 except AttributeError: 119 # Encoder-only models can't be unloaded by ctranslate2 120 pass 121 122 if model_name not in modelcache: 123 model = initialize_model(model_type, model_name, tokenizer_only) 124 tokenizer = initialize_tokenizer(model_type, model_name) 125 modelcache[model_name] = (tokenizer, model) 126 elif not tokenizer_only: 127 # Make sure model is loaded if we've never loaded it 128 if not modelcache[model_name][1]: 129 modelcache[model_name] = ( 130 modelcache[model_name][0], 131 initialize_model(model_type, model_name), 132 ) 133 # Make sure the model is reloaded if we've unloaded it 134 try: 135 modelcache[model_name][1].load_model() 136 except AttributeError: 137 # Encoder-only models can't be unloaded in ctranslate2 138 pass 139 140 return modelcache[model_name]
modelcache =
{}
class
ModelException(builtins.Exception):
Common base class for all non-exit exceptions.
Inherited Members
- builtins.Exception
- Exception
- builtins.BaseException
- with_traceback
- args
def
get_model_info(model_type='instruct'):
17def get_model_info(model_type="instruct"): 18 """Gets info about the current model in use 19 20 >>> get_model_info('instruct') 21 {'name': 'LaMini-Flan-T5-248M', 'tuning': 'instruct'... 22 """ 23 model_name = config[f"{model_type}_model"] 24 25 m = [m for m in models if m["name"] == model_name][0] 26 27 param_bits = int(re.search(r"\d+", m["quantization"]).group(0)) 28 29 m["size_gb"] = m["params"] * param_bits / 8 / 1e9 30 if "/" in m["name"]: 31 m["path"] = m["name"] 32 else: 33 m["path"] = f"jncraton/{m['name']}-{m['backend']}-{m['quantization']}" 34 35 return m
Gets info about the current model in use
>>> get_model_info('instruct')
{'name': 'LaMini-Flan-T5-248M', 'tuning': 'instruct'...
def
initialize_tokenizer(model_type, model_name):
38def initialize_tokenizer(model_type, model_name): 39 model_info = get_model_info(model_type) 40 rev = model_info.get("revision", None) 41 42 tok_config = hf_hub_download( 43 model_info["path"], "tokenizer.json", revision=rev, local_files_only=True 44 ) 45 tokenizer = Tokenizer.from_file(tok_config) 46 47 if model_type == "embedding": 48 tokenizer.no_padding() 49 tokenizer.no_truncation() 50 51 return tokenizer
def
initialize_model(model_type, model_name, tokenizer_only=False):
54def initialize_model(model_type, model_name, tokenizer_only=False): 55 model_info = get_model_info(model_type) 56 57 allowed = ["*.bin", "*.txt", "*.json"] 58 rev = model_info.get("revision", None) 59 60 # snapshot_download checks for updates by default 61 # This can cause significant lag in offline usecases or high latency networks 62 # To avoid this penalty, we try to use the local cache first. 63 # If the files are not available, then we attempt a download 64 try: 65 path = snapshot_download( 66 model_info["path"], 67 max_workers=1, 68 allow_patterns=allowed, 69 revision=rev, 70 local_files_only=True, 71 ) 72 except FileNotFoundError: 73 path = snapshot_download( 74 model_info["path"], max_workers=1, allow_patterns=allowed, revision=rev 75 ) 76 77 if tokenizer_only: 78 return None 79 80 if model_info["architecture"] == "encoder-only-transformer": 81 return ctranslate2.Encoder( 82 path, 83 "cpu", 84 compute_type="int8", 85 ) 86 elif model_info["architecture"] == "decoder-only-transformer": 87 return ctranslate2.Generator(path, config["device"], compute_type="int8") 88 else: 89 return ctranslate2.Translator(path, config["device"], compute_type="int8")
def
get_model(model_type, tokenizer_only=False):
92def get_model(model_type, tokenizer_only=False): 93 """Gets a model from the loaded model cache 94 95 If tokenizer_only, the model itself will not be (re)loaded 96 97 >>> tokenizer, model = get_model("instruct") 98 >>> type(tokenizer) 99 <class 'tokenizers.Tokenizer'> 100 101 >>> type(model) 102 <class 'ctranslate2._ext.Translator'> 103 104 >>> tokenizer, model = get_model("embedding") 105 >>> type(tokenizer) 106 <class 'tokenizers.Tokenizer'> 107 108 >>> type(model) 109 <class 'ctranslate2._ext.Encoder'> 110 """ 111 112 model_name = config[f"{model_type}_model"] 113 114 if config["max_ram"] < 4 and not tokenizer_only: 115 for model in modelcache: 116 if model != model_name: 117 try: 118 modelcache[model][1].unload_model() 119 except AttributeError: 120 # Encoder-only models can't be unloaded by ctranslate2 121 pass 122 123 if model_name not in modelcache: 124 model = initialize_model(model_type, model_name, tokenizer_only) 125 tokenizer = initialize_tokenizer(model_type, model_name) 126 modelcache[model_name] = (tokenizer, model) 127 elif not tokenizer_only: 128 # Make sure model is loaded if we've never loaded it 129 if not modelcache[model_name][1]: 130 modelcache[model_name] = ( 131 modelcache[model_name][0], 132 initialize_model(model_type, model_name), 133 ) 134 # Make sure the model is reloaded if we've unloaded it 135 try: 136 modelcache[model_name][1].load_model() 137 except AttributeError: 138 # Encoder-only models can't be unloaded in ctranslate2 139 pass 140 141 return modelcache[model_name]
Gets a model from the loaded model cache
If tokenizer_only, the model itself will not be (re)loaded
>>> tokenizer, model = get_model("instruct")
>>> type(tokenizer)
<class 'tokenizers.Tokenizer'>
>>> type(model)
<class 'ctranslate2._ext.Translator'>
>>> tokenizer, model = get_model("embedding")
>>> type(tokenizer)
<class 'tokenizers.Tokenizer'>
>>> type(model)
<class 'ctranslate2._ext.Encoder'>