languagemodels.models

  1import re
  2from huggingface_hub import hf_hub_download, snapshot_download
  3from tokenizers import Tokenizer
  4import ctranslate2
  5
  6from languagemodels.config import config, models
  7
  8
  9modelcache = {}
 10
 11
 12class ModelException(Exception):
 13    pass
 14
 15
 16def get_model_info(model_type="instruct"):
 17    """Gets info about the current model in use
 18
 19    >>> get_model_info('instruct')
 20    {'name': 'LaMini-Flan-T5-248M', 'tuning': 'instruct'...
 21    """
 22    model_name = config[f"{model_type}_model"]
 23
 24    m = [m for m in models if m["name"] == model_name][0]
 25
 26    param_bits = int(re.search(r"\d+", m["quantization"]).group(0))
 27
 28    m["size_gb"] = m["params"] * param_bits / 8 / 1e9
 29    if "/" in m["name"]:
 30        m["path"] = m["name"]
 31    else:
 32        m["path"] = f"jncraton/{m['name']}-{m['backend']}-{m['quantization']}"
 33
 34    return m
 35
 36
 37def initialize_tokenizer(model_type, model_name):
 38    model_info = get_model_info(model_type)
 39    rev = model_info.get("revision", None)
 40
 41    tok_config = hf_hub_download(
 42        model_info["path"], "tokenizer.json", revision=rev, local_files_only=True
 43    )
 44    tokenizer = Tokenizer.from_file(tok_config)
 45
 46    if model_type == "embedding":
 47        tokenizer.no_padding()
 48        tokenizer.no_truncation()
 49
 50    return tokenizer
 51
 52
 53def initialize_model(model_type, model_name, tokenizer_only=False):
 54    model_info = get_model_info(model_type)
 55
 56    allowed = ["*.bin", "*.txt", "*.json"]
 57    rev = model_info.get("revision", None)
 58
 59    # snapshot_download checks for updates by default
 60    # This can cause significant lag in offline usecases or high latency networks
 61    # To avoid this penalty, we try to use the local cache first.
 62    # If the files are not available, then we attempt a download
 63    try:
 64        path = snapshot_download(
 65            model_info["path"],
 66            max_workers=1,
 67            allow_patterns=allowed,
 68            revision=rev,
 69            local_files_only=True,
 70        )
 71    except FileNotFoundError:
 72        path = snapshot_download(
 73            model_info["path"], max_workers=1, allow_patterns=allowed, revision=rev
 74        )
 75
 76    if tokenizer_only:
 77        return None
 78
 79    if model_info["architecture"] == "encoder-only-transformer":
 80        return ctranslate2.Encoder(
 81            path,
 82            "cpu",
 83            compute_type="int8",
 84        )
 85    elif model_info["architecture"] == "decoder-only-transformer":
 86        return ctranslate2.Generator(path, config["device"], compute_type="int8")
 87    else:
 88        return ctranslate2.Translator(path, config["device"], compute_type="int8")
 89
 90
 91def get_model(model_type, tokenizer_only=False):
 92    """Gets a model from the loaded model cache
 93
 94    If tokenizer_only, the model itself will not be (re)loaded
 95
 96    >>> tokenizer, model = get_model("instruct")
 97    >>> type(tokenizer)
 98    <class 'tokenizers.Tokenizer'>
 99
100    >>> type(model)
101    <class 'ctranslate2._ext.Translator'>
102
103    >>> tokenizer, model = get_model("embedding")
104    >>> type(tokenizer)
105    <class 'tokenizers.Tokenizer'>
106
107    >>> type(model)
108    <class 'ctranslate2._ext.Encoder'>
109    """
110
111    model_name = config[f"{model_type}_model"]
112
113    if config["max_ram"] < 4 and not tokenizer_only:
114        for model in modelcache:
115            if model != model_name:
116                try:
117                    modelcache[model][1].unload_model()
118                except AttributeError:
119                    # Encoder-only models can't be unloaded by ctranslate2
120                    pass
121
122    if model_name not in modelcache:
123        model = initialize_model(model_type, model_name, tokenizer_only)
124        tokenizer = initialize_tokenizer(model_type, model_name)
125        modelcache[model_name] = (tokenizer, model)
126    elif not tokenizer_only:
127        # Make sure model is loaded if we've never loaded it
128        if not modelcache[model_name][1]:
129            modelcache[model_name] = (
130                modelcache[model_name][0],
131                initialize_model(model_type, model_name),
132            )
133        # Make sure the model is reloaded if we've unloaded it
134        try:
135            modelcache[model_name][1].load_model()
136        except AttributeError:
137            # Encoder-only models can't be unloaded in ctranslate2
138            pass
139
140    return modelcache[model_name]
modelcache = {}
class ModelException(builtins.Exception):
13class ModelException(Exception):
14    pass

Common base class for all non-exit exceptions.

Inherited Members
builtins.Exception
Exception
builtins.BaseException
with_traceback
args
def get_model_info(model_type='instruct'):
17def get_model_info(model_type="instruct"):
18    """Gets info about the current model in use
19
20    >>> get_model_info('instruct')
21    {'name': 'LaMini-Flan-T5-248M', 'tuning': 'instruct'...
22    """
23    model_name = config[f"{model_type}_model"]
24
25    m = [m for m in models if m["name"] == model_name][0]
26
27    param_bits = int(re.search(r"\d+", m["quantization"]).group(0))
28
29    m["size_gb"] = m["params"] * param_bits / 8 / 1e9
30    if "/" in m["name"]:
31        m["path"] = m["name"]
32    else:
33        m["path"] = f"jncraton/{m['name']}-{m['backend']}-{m['quantization']}"
34
35    return m

Gets info about the current model in use

>>> get_model_info('instruct')
{'name': 'LaMini-Flan-T5-248M', 'tuning': 'instruct'...
def initialize_tokenizer(model_type, model_name):
38def initialize_tokenizer(model_type, model_name):
39    model_info = get_model_info(model_type)
40    rev = model_info.get("revision", None)
41
42    tok_config = hf_hub_download(
43        model_info["path"], "tokenizer.json", revision=rev, local_files_only=True
44    )
45    tokenizer = Tokenizer.from_file(tok_config)
46
47    if model_type == "embedding":
48        tokenizer.no_padding()
49        tokenizer.no_truncation()
50
51    return tokenizer
def initialize_model(model_type, model_name, tokenizer_only=False):
54def initialize_model(model_type, model_name, tokenizer_only=False):
55    model_info = get_model_info(model_type)
56
57    allowed = ["*.bin", "*.txt", "*.json"]
58    rev = model_info.get("revision", None)
59
60    # snapshot_download checks for updates by default
61    # This can cause significant lag in offline usecases or high latency networks
62    # To avoid this penalty, we try to use the local cache first.
63    # If the files are not available, then we attempt a download
64    try:
65        path = snapshot_download(
66            model_info["path"],
67            max_workers=1,
68            allow_patterns=allowed,
69            revision=rev,
70            local_files_only=True,
71        )
72    except FileNotFoundError:
73        path = snapshot_download(
74            model_info["path"], max_workers=1, allow_patterns=allowed, revision=rev
75        )
76
77    if tokenizer_only:
78        return None
79
80    if model_info["architecture"] == "encoder-only-transformer":
81        return ctranslate2.Encoder(
82            path,
83            "cpu",
84            compute_type="int8",
85        )
86    elif model_info["architecture"] == "decoder-only-transformer":
87        return ctranslate2.Generator(path, config["device"], compute_type="int8")
88    else:
89        return ctranslate2.Translator(path, config["device"], compute_type="int8")
def get_model(model_type, tokenizer_only=False):
 92def get_model(model_type, tokenizer_only=False):
 93    """Gets a model from the loaded model cache
 94
 95    If tokenizer_only, the model itself will not be (re)loaded
 96
 97    >>> tokenizer, model = get_model("instruct")
 98    >>> type(tokenizer)
 99    <class 'tokenizers.Tokenizer'>
100
101    >>> type(model)
102    <class 'ctranslate2._ext.Translator'>
103
104    >>> tokenizer, model = get_model("embedding")
105    >>> type(tokenizer)
106    <class 'tokenizers.Tokenizer'>
107
108    >>> type(model)
109    <class 'ctranslate2._ext.Encoder'>
110    """
111
112    model_name = config[f"{model_type}_model"]
113
114    if config["max_ram"] < 4 and not tokenizer_only:
115        for model in modelcache:
116            if model != model_name:
117                try:
118                    modelcache[model][1].unload_model()
119                except AttributeError:
120                    # Encoder-only models can't be unloaded by ctranslate2
121                    pass
122
123    if model_name not in modelcache:
124        model = initialize_model(model_type, model_name, tokenizer_only)
125        tokenizer = initialize_tokenizer(model_type, model_name)
126        modelcache[model_name] = (tokenizer, model)
127    elif not tokenizer_only:
128        # Make sure model is loaded if we've never loaded it
129        if not modelcache[model_name][1]:
130            modelcache[model_name] = (
131                modelcache[model_name][0],
132                initialize_model(model_type, model_name),
133            )
134        # Make sure the model is reloaded if we've unloaded it
135        try:
136            modelcache[model_name][1].load_model()
137        except AttributeError:
138            # Encoder-only models can't be unloaded in ctranslate2
139            pass
140
141    return modelcache[model_name]

Gets a model from the loaded model cache

If tokenizer_only, the model itself will not be (re)loaded

>>> tokenizer, model = get_model("instruct")
>>> type(tokenizer)
<class 'tokenizers.Tokenizer'>
>>> type(model)
<class 'ctranslate2._ext.Translator'>
>>> tokenizer, model = get_model("embedding")
>>> type(tokenizer)
<class 'tokenizers.Tokenizer'>
>>> type(model)
<class 'ctranslate2._ext.Encoder'>