languagemodels

  1import requests
  2import datetime
  3import json
  4import re
  5from typing import overload
  6
  7from languagemodels.config import config
  8from languagemodels.inference import (
  9    generate,
 10    rank_instruct,
 11    parse_chat,
 12    list_tokens,
 13)
 14from languagemodels import embeddings
 15
 16docs = embeddings.RetrievalContext()
 17
 18
 19def complete(prompt: str) -> str:
 20    """Provide one completion for a given open-ended prompt
 21
 22    :param prompt: Prompt to use as input to the model
 23    :return: Completion returned from the language model
 24
 25    Examples:
 26
 27    >>> complete("Luke thought that he") #doctest: +SKIP
 28    'was going to be a doctor.'
 29
 30    >>> complete("There are many mythical creatures who") #doctest: +SKIP
 31    'are able to fly'
 32
 33    >>> complete("She hid in her room until") #doctest: +SKIP
 34    'she was sure she was safe'
 35    """
 36
 37    result = generate(
 38        ["Write a sentence"],
 39        prefix=prompt,
 40        max_tokens=config["max_tokens"],
 41        temperature=0.7,
 42        topk=40,
 43    )[0]
 44
 45    if result.startswith(prompt):
 46        prefix_length = len(prompt)
 47        return result[prefix_length:]
 48    else:
 49        return result
 50
 51
 52@overload
 53def do(prompt: list) -> list:
 54    ...
 55
 56
 57@overload
 58def do(prompt: str) -> str:
 59    ...
 60
 61
 62def do(prompt, choices=None):
 63    """Follow a single-turn instructional prompt
 64
 65    :param prompt: Instructional prompt(s) to follow
 66    :param choices: If provided, outputs are restricted to values in choices
 67    :return: Completion returned from the language model
 68
 69    Note that this function is overloaded to return a list of results if
 70    a list if of prompts is provided and a single string if a single
 71    prompt is provided as a string
 72
 73    Examples:
 74
 75    >>> do("Translate Spanish to English: Hola mundo!") #doctest: +SKIP
 76    'Hello world!'
 77
 78    >>> do("Pick the planet from the list: baseball, Texas, Saturn")
 79    '...Saturn...'
 80
 81    >>> do("Answer: What is the capital of England?")
 82    '...London...'
 83
 84    >>> do(["Pick the planet from the list: baseball, Texas, Saturn"] * 2)
 85    ['...Saturn...', '...Saturn...']
 86
 87    >>> do(["Say red", "Say blue"], choices=["red", "blue"])
 88    ['red', 'blue']
 89
 90    >>> do("Classify as positive or negative: LLMs are bad",
 91    ... choices=["Positive", "Negative"])
 92    'Negative'
 93
 94    >>> do("Classify as positive or negative: LLMs are great",
 95    ... choices=["Positive", "Negative"])
 96    'Positive'
 97    """
 98
 99    prompts = [prompt] if isinstance(prompt, str) else prompt
100
101    if choices:
102        results = [r[0] for r in rank_instruct(prompts, choices)]
103    else:
104        results = generate(prompts, max_tokens=config["max_tokens"], topk=1)
105
106        for i, result in enumerate(results):
107            if len(result.split()) == 1:
108                results[i] = result.title()
109
110                if result[-1] not in (".", "!", "?"):
111                    results[i] = results[i] + "."
112
113    return results[0] if isinstance(prompt, str) else results
114
115
116@overload
117def embed(doc: list) -> list:
118    ...
119
120
121@overload
122def embed(doc: str) -> str:
123    ...
124
125
126def embed(doc):
127    """Create embedding for a document
128
129    :param doc: Document(s) to embed
130    :return: Embedding
131
132    Note that this function is overloaded to return a list of embeddings if
133    a list if of docs is provided and a single embedding if a single
134    doc is provided as a string
135
136    Examples:
137
138    >>> embed("Hello, world")
139    [-0.0...]
140
141    >>> embed(["Hello", "world"])
142    [[-0.0...]]
143    """
144
145    docs = [doc] if isinstance(doc, str) else doc
146
147    # Create embeddings and convert to lists of floats
148    emb = [[float(n) for n in e] for e in embeddings.embed(docs)]
149
150    return emb[0] if isinstance(doc, str) else emb
151
152
153def chat(prompt: str) -> str:
154    """Get new message from chat-optimized language model
155
156    The `prompt` for this model is provided as a series of messages as a single
157    plain-text string. Several special tokens are used to delineate chat
158    messages.
159
160    - `system:` - Indicates the start of a system message providing
161    instructions about how the assistant should behave.
162    - `user:` - Indicates the start of a prompter (typically user)
163    message.
164    - `assistant:` - Indicates the start of an assistant message.
165
166    A complete prompt may look something like this:
167
168    ```
169    Assistant is helpful and harmless
170
171    User: What is the capital of Germany?
172
173    Assistant: The capital of Germany is Berlin.
174
175    User: How many people live there?
176
177    Assistant:
178    ```
179
180    The completion from the language model is returned.
181
182    :param message: Prompt using formatting described above
183    :return: Completion returned from the language model
184
185    Examples:
186
187    >>> response = chat('''
188    ...      System: Respond as a helpful assistant. It is 5:00pm.
189    ...
190    ...      User: What time is it?
191    ...
192    ...      Assistant:
193    ...      ''') # doctest: +SKIP
194    "It's 5:00pm."
195    """
196
197    messages = parse_chat(prompt)
198
199    # Suppress starts of all assistant messages to avoid repeat generation
200    suppress = [
201        "Assistant: " + m["content"].split(" ")[0]
202        for m in messages
203        if m["role"] in ["assistant", "user"]
204    ]
205
206    # Suppress all user messages to avoid repeating them
207    suppress += [m["content"] for m in messages if m["role"] == "user"]
208
209    system_msgs = [m for m in messages if m["role"] == "system"]
210    assistant_msgs = [m for m in messages if m["role"] == "assistant"]
211    user_msgs = [m for m in messages if m["role"] == "user"]
212
213    # The current model is tuned on instructions and tends to get
214    # lost if it sees too many questions
215    # Use only the most recent user and assistant message for context
216    # Keep all system messages
217    messages = system_msgs + assistant_msgs[-1:] + user_msgs[-1:]
218
219    rolemap = {
220        "system": "System",
221        "user": "Question",
222        "assistant": "Assistant",
223    }
224
225    messages = [f"{rolemap[m['role']]}: {m['content']}" for m in messages]
226
227    prompt = "\n\n".join(messages) + "\n\n" + "Assistant:"
228
229    if prompt.startswith("System:"):
230        prompt = prompt[7:].strip()
231
232    response = generate(
233        [prompt],
234        max_tokens=config["max_tokens"],
235        temperature=0.3,
236        topk=40,
237        prefix="Assistant:",
238        suppress=suppress,
239    )[0]
240
241    # Remove duplicate assistant being generated
242    if response.startswith("Assistant:"):
243        response = response[10:]
244
245    return response.strip()
246
247
248def code(prompt: str) -> str:
249    """Complete a code prompt
250
251    This assumes that users are expecting Python completions. Default models
252    are fine-tuned on Python where applicable.
253
254    :param prompt: Code context to complete
255    :return: Completion returned from the language model
256
257    Examples:
258
259    >>> code("# Print Hello, world!\\n")
260    'print("Hello, world!")\\n'
261
262    >>> code("def return_4():")
263    '...return 4...'
264    """
265    return generate([prompt], max_tokens=config["max_tokens"], topk=1, model="code")[0]
266
267
268def extract_answer(question: str, context: str) -> str:
269    """Extract an answer to a `question` from a provided `context`
270
271    The returned answer will always be a substring extracted from `context`.
272    It may not always be a correct or meaningful answer, but it will never be
273    an arbitrary hallucination.
274
275    :param question: A question to answer using knowledge from context
276    :param context: Knowledge used to answer the question
277    :return: Answer to the question.
278
279    Examples:
280
281    >>> context = "There is a green ball and a red box"
282    >>> extract_answer("What color is the ball?", context).lower()
283    '...green...'
284
285    >>> extract_answer("Who created Python?", get_wiki('Python')) #doctest: +SKIP
286    '...Guido van Rossum...'
287    """
288
289    return generate([f"{context}\n\n{question}"])[0]
290
291
292def classify(doc: str, label1: str, label2: str) -> str:
293    """Performs binary classification on an input
294
295    :param doc: A plain text input document to classify
296    :param label1: The first label to classify against
297    :param label2: The second label to classify against
298    :return: The closest matching class. The return value will always be
299    `label1` or `label2`
300
301    Examples:
302
303    >>> classify("That book was good.","positive","negative")
304    'positive'
305    >>> classify("That movie was terrible.","positive","negative")
306    'negative'
307    """
308
309    return do(
310        f"Classify as {label1} or {label2}: {doc}\n\nClassification:",
311        choices=[label1, label2],
312    )
313
314
315def store_doc(doc: str, name: str = "") -> None:
316    """Store document for later retrieval
317
318    :param doc: A plain text document to store.
319    :param name: Optional name for the document. This is used as a chunk prefix.
320
321    Examples:
322
323    >>> store_doc("The sky is blue.")
324    """
325    docs.store(doc, name)
326
327
328def load_doc(query: str) -> str:
329    """Load a matching document
330
331    A single document that best matches `query` will be returned.
332
333    :param query: Query to compare to stored documents
334    :return: Content of the closest matching document
335
336    Examples:
337
338    >>> store_doc("Paris is in France.")
339    >>> store_doc("The sky is blue.")
340    >>> load_doc("Where is Paris?")
341    'Paris is in France.'
342    """
343    return docs.get_match(query)
344
345
346def get_doc_context(query: str) -> str:
347    """Loads context from documents
348
349    A string representing the most relevant content from all stored documents
350    will be returned. This may be a blend of chunks from multiple documents.
351
352    :param query: Query to compare to stored documents
353    :return: Up to 128 tokens of context
354
355    Examples:
356
357    >>> store_doc("Paris is in France.")
358    >>> store_doc("Paris is nice.")
359    >>> store_doc("The sky is blue.")
360    >>> get_doc_context("Where is Paris?")
361    'Paris is in France.\\n\\nParis is nice.'
362    """
363    return docs.get_context(query)
364
365
366def get_wiki(topic: str) -> str:
367    """
368    Return Wikipedia summary for a topic
369
370    This function ignores the complexity of disambiguation pages and simply
371    returns the first result that is not a disambiguation page
372
373    :param topic: Topic to search for on Wikipedia
374    :return: Text content of the lead section of the most popular matching article
375
376    Examples:
377
378    >>> get_wiki('Python language')
379    'Python is a high-level...'
380
381    >>> get_wiki('Chemistry')
382    'Chemistry is the scientific study...'
383    """
384
385    url = "https://api.wikimedia.org/core/v1/wikipedia/en/search/title"
386    response = requests.get(url, params={"q": topic, "limit": 5})
387    response = json.loads(response.text)
388
389    for page in response["pages"]:
390        wiki_result = requests.get(
391            f"https://en.wikipedia.org/w/api.php?action=query&prop=extracts|pageprops&"
392            f"exintro&redirects=1&titles={page['title']}&format=json"
393        ).json()
394
395        first = wiki_result["query"]["pages"].popitem()[1]
396        if "disambiguation" in first["pageprops"]:
397            continue
398
399        summary = first["extract"]
400
401        cutoffs = [
402            "See_also",
403            "Notes",
404            "References",
405            "Further_reading",
406            "External_links",
407        ]
408
409        for cutoff in cutoffs:
410            summary = summary.split(f'<span id="{cutoff}">', 1)[0]
411
412        summary = re.sub(r"<p>", "\n\n", summary, flags=re.I)
413        summary = re.sub(r"<!\-\-.*?\-\->", "", summary, flags=re.I | re.DOTALL)
414        summary = re.sub(r"<.*?>", "", summary, flags=re.I)
415        summary = re.sub(r"\s*[\n\r]+\s*[\r\n]+[\s\r\n]*", "\n\n", summary, flags=re.I)
416        summary = summary.strip()
417        return summary
418    else:
419        return "No matching wiki page found."
420
421
422def get_weather(latitude, longitude):
423    """Fetch the current weather for a supplied longitude and latitude
424
425    Weather is provided by the US government and this function only supports
426    locations in the United States.
427
428    :param latitude: Latitude value representing this location
429    :param longitude: Longitude value representing this location
430    :return: Plain text description of the current weather forecast
431
432    Examples:
433
434    >>> get_weather(41.8, -87.6) # doctest: +SKIP
435    'Scattered showers and thunderstorms before 1pm with a high of 73.'
436    """
437
438    res = requests.get(f"https://api.weather.gov/points/{latitude},{longitude}")
439    points = json.loads(res.text)
440    forecast_url = points["properties"]["forecast"]
441
442    res = requests.get(forecast_url)
443    forecast = json.loads(res.text)
444    current = forecast["properties"]["periods"][0]
445
446    return current["detailedForecast"]
447
448
449def get_date() -> str:
450    """Returns the current date and time in natural language
451
452    >>> get_date() # doctest: +SKIP
453    'Friday, May 12, 2023 at 09:27AM'
454    """
455
456    now = datetime.datetime.now()
457
458    return now.strftime("%A, %B %d, %Y at %I:%M%p")
459
460
461def print_tokens(prompt: str) -> None:
462    """Prints a list of tokens in a prompt
463
464    :param prompt: Prompt to use as input to tokenizer
465    :return: Nothing
466
467    Examples:
468
469    >>> print_tokens("Hello world")
470    ' Hello' (token 8774)
471    ' world' (token 296)
472
473    >>> print_tokens("Hola mundo")
474    ' Hol' (token 5838)
475    'a' (token 9)
476    ' mun' (token 13844)
477    'd' (token 26)
478    'o' (token 32)
479    """
480
481    tokens = list_tokens(prompt)
482
483    for token in tokens:
484        print(f"'{token[0].replace('▁',' ')}' (token {token[1]})")
485
486
487def count_tokens(prompt: str) -> None:
488    """Counts tokens in a prompt
489
490    :param prompt: Prompt to use as input to tokenizer
491    :return: Nothing
492
493    Examples:
494
495    >>> count_tokens("Hello world")
496    2
497
498    >>> count_tokens("Hola mundo")
499    5
500    """
501
502    return len(list_tokens(prompt))
503
504
505def set_max_ram(value):
506    """Sets max allowed RAM
507
508    This value takes priority over environment variables
509
510    Returns the numeric value set in GB
511
512    >>> set_max_ram(16)
513    16.0
514
515    >>> set_max_ram('512mb')
516    0.5
517    """
518
519    config["max_ram"] = value
520
521    return config["max_ram"]
522
523
524def require_model_license(match_re):
525    """Require models to match supplied regex
526
527    This can be used to enforce certain licensing constraints when using this
528    package.
529    """
530    config["model_license"] = match_re
def complete(prompt: str) -> str:
20def complete(prompt: str) -> str:
21    """Provide one completion for a given open-ended prompt
22
23    :param prompt: Prompt to use as input to the model
24    :return: Completion returned from the language model
25
26    Examples:
27
28    >>> complete("Luke thought that he") #doctest: +SKIP
29    'was going to be a doctor.'
30
31    >>> complete("There are many mythical creatures who") #doctest: +SKIP
32    'are able to fly'
33
34    >>> complete("She hid in her room until") #doctest: +SKIP
35    'she was sure she was safe'
36    """
37
38    result = generate(
39        ["Write a sentence"],
40        prefix=prompt,
41        max_tokens=config["max_tokens"],
42        temperature=0.7,
43        topk=40,
44    )[0]
45
46    if result.startswith(prompt):
47        prefix_length = len(prompt)
48        return result[prefix_length:]
49    else:
50        return result

Provide one completion for a given open-ended prompt

Parameters
  • prompt: Prompt to use as input to the model
Returns

Completion returned from the language model

Examples:

>>> complete("Luke thought that he") #doctest: +SKIP
'was going to be a doctor.'
>>> complete("There are many mythical creatures who") #doctest: +SKIP
'are able to fly'
>>> complete("She hid in her room until") #doctest: +SKIP
'she was sure she was safe'
def do(prompt, choices=None):
 63def do(prompt, choices=None):
 64    """Follow a single-turn instructional prompt
 65
 66    :param prompt: Instructional prompt(s) to follow
 67    :param choices: If provided, outputs are restricted to values in choices
 68    :return: Completion returned from the language model
 69
 70    Note that this function is overloaded to return a list of results if
 71    a list if of prompts is provided and a single string if a single
 72    prompt is provided as a string
 73
 74    Examples:
 75
 76    >>> do("Translate Spanish to English: Hola mundo!") #doctest: +SKIP
 77    'Hello world!'
 78
 79    >>> do("Pick the planet from the list: baseball, Texas, Saturn")
 80    '...Saturn...'
 81
 82    >>> do("Answer: What is the capital of England?")
 83    '...London...'
 84
 85    >>> do(["Pick the planet from the list: baseball, Texas, Saturn"] * 2)
 86    ['...Saturn...', '...Saturn...']
 87
 88    >>> do(["Say red", "Say blue"], choices=["red", "blue"])
 89    ['red', 'blue']
 90
 91    >>> do("Classify as positive or negative: LLMs are bad",
 92    ... choices=["Positive", "Negative"])
 93    'Negative'
 94
 95    >>> do("Classify as positive or negative: LLMs are great",
 96    ... choices=["Positive", "Negative"])
 97    'Positive'
 98    """
 99
100    prompts = [prompt] if isinstance(prompt, str) else prompt
101
102    if choices:
103        results = [r[0] for r in rank_instruct(prompts, choices)]
104    else:
105        results = generate(prompts, max_tokens=config["max_tokens"], topk=1)
106
107        for i, result in enumerate(results):
108            if len(result.split()) == 1:
109                results[i] = result.title()
110
111                if result[-1] not in (".", "!", "?"):
112                    results[i] = results[i] + "."
113
114    return results[0] if isinstance(prompt, str) else results

Follow a single-turn instructional prompt

Parameters
  • prompt: Instructional prompt(s) to follow
  • choices: If provided, outputs are restricted to values in choices
Returns

Completion returned from the language model

Note that this function is overloaded to return a list of results if a list if of prompts is provided and a single string if a single prompt is provided as a string

Examples:

>>> do("Translate Spanish to English: Hola mundo!") #doctest: +SKIP
'Hello world!'
>>> do("Pick the planet from the list: baseball, Texas, Saturn")
'...Saturn...'
>>> do("Answer: What is the capital of England?")
'...London...'
>>> do(["Pick the planet from the list: baseball, Texas, Saturn"] * 2)
['...Saturn...', '...Saturn...']
>>> do(["Say red", "Say blue"], choices=["red", "blue"])
['red', 'blue']
>>> do("Classify as positive or negative: LLMs are bad",
... choices=["Positive", "Negative"])
'Negative'
>>> do("Classify as positive or negative: LLMs are great",
... choices=["Positive", "Negative"])
'Positive'
def embed(doc):
127def embed(doc):
128    """Create embedding for a document
129
130    :param doc: Document(s) to embed
131    :return: Embedding
132
133    Note that this function is overloaded to return a list of embeddings if
134    a list if of docs is provided and a single embedding if a single
135    doc is provided as a string
136
137    Examples:
138
139    >>> embed("Hello, world")
140    [-0.0...]
141
142    >>> embed(["Hello", "world"])
143    [[-0.0...]]
144    """
145
146    docs = [doc] if isinstance(doc, str) else doc
147
148    # Create embeddings and convert to lists of floats
149    emb = [[float(n) for n in e] for e in embeddings.embed(docs)]
150
151    return emb[0] if isinstance(doc, str) else emb

Create embedding for a document

Parameters
  • doc: Document(s) to embed
Returns

Embedding

Note that this function is overloaded to return a list of embeddings if a list if of docs is provided and a single embedding if a single doc is provided as a string

Examples:

>>> embed("Hello, world")
[-0.0...]
>>> embed(["Hello", "world"])
[[-0.0...]]
def chat(prompt: str) -> str:
154def chat(prompt: str) -> str:
155    """Get new message from chat-optimized language model
156
157    The `prompt` for this model is provided as a series of messages as a single
158    plain-text string. Several special tokens are used to delineate chat
159    messages.
160
161    - `system:` - Indicates the start of a system message providing
162    instructions about how the assistant should behave.
163    - `user:` - Indicates the start of a prompter (typically user)
164    message.
165    - `assistant:` - Indicates the start of an assistant message.
166
167    A complete prompt may look something like this:
168
169    ```
170    Assistant is helpful and harmless
171
172    User: What is the capital of Germany?
173
174    Assistant: The capital of Germany is Berlin.
175
176    User: How many people live there?
177
178    Assistant:
179    ```
180
181    The completion from the language model is returned.
182
183    :param message: Prompt using formatting described above
184    :return: Completion returned from the language model
185
186    Examples:
187
188    >>> response = chat('''
189    ...      System: Respond as a helpful assistant. It is 5:00pm.
190    ...
191    ...      User: What time is it?
192    ...
193    ...      Assistant:
194    ...      ''') # doctest: +SKIP
195    "It's 5:00pm."
196    """
197
198    messages = parse_chat(prompt)
199
200    # Suppress starts of all assistant messages to avoid repeat generation
201    suppress = [
202        "Assistant: " + m["content"].split(" ")[0]
203        for m in messages
204        if m["role"] in ["assistant", "user"]
205    ]
206
207    # Suppress all user messages to avoid repeating them
208    suppress += [m["content"] for m in messages if m["role"] == "user"]
209
210    system_msgs = [m for m in messages if m["role"] == "system"]
211    assistant_msgs = [m for m in messages if m["role"] == "assistant"]
212    user_msgs = [m for m in messages if m["role"] == "user"]
213
214    # The current model is tuned on instructions and tends to get
215    # lost if it sees too many questions
216    # Use only the most recent user and assistant message for context
217    # Keep all system messages
218    messages = system_msgs + assistant_msgs[-1:] + user_msgs[-1:]
219
220    rolemap = {
221        "system": "System",
222        "user": "Question",
223        "assistant": "Assistant",
224    }
225
226    messages = [f"{rolemap[m['role']]}: {m['content']}" for m in messages]
227
228    prompt = "\n\n".join(messages) + "\n\n" + "Assistant:"
229
230    if prompt.startswith("System:"):
231        prompt = prompt[7:].strip()
232
233    response = generate(
234        [prompt],
235        max_tokens=config["max_tokens"],
236        temperature=0.3,
237        topk=40,
238        prefix="Assistant:",
239        suppress=suppress,
240    )[0]
241
242    # Remove duplicate assistant being generated
243    if response.startswith("Assistant:"):
244        response = response[10:]
245
246    return response.strip()

Get new message from chat-optimized language model

The prompt for this model is provided as a series of messages as a single plain-text string. Several special tokens are used to delineate chat messages.

  • system: - Indicates the start of a system message providing instructions about how the assistant should behave.
  • user: - Indicates the start of a prompter (typically user) message.
  • assistant: - Indicates the start of an assistant message.

A complete prompt may look something like this:

Assistant is helpful and harmless

User: What is the capital of Germany?

Assistant: The capital of Germany is Berlin.

User: How many people live there?

Assistant:

The completion from the language model is returned.

Parameters
  • message: Prompt using formatting described above
Returns

Completion returned from the language model

Examples:

>>> response = chat('''
...      System: Respond as a helpful assistant. It is 5:00pm.
...
...      User: What time is it?
...
...      Assistant:
...      ''') # doctest: +SKIP
"It's 5:00pm."
def code(prompt: str) -> str:
249def code(prompt: str) -> str:
250    """Complete a code prompt
251
252    This assumes that users are expecting Python completions. Default models
253    are fine-tuned on Python where applicable.
254
255    :param prompt: Code context to complete
256    :return: Completion returned from the language model
257
258    Examples:
259
260    >>> code("# Print Hello, world!\\n")
261    'print("Hello, world!")\\n'
262
263    >>> code("def return_4():")
264    '...return 4...'
265    """
266    return generate([prompt], max_tokens=config["max_tokens"], topk=1, model="code")[0]

Complete a code prompt

This assumes that users are expecting Python completions. Default models are fine-tuned on Python where applicable.

Parameters
  • prompt: Code context to complete
Returns

Completion returned from the language model

Examples:

>>> code("# Print Hello, world!\n")
'print("Hello, world!")\n'
>>> code("def return_4():")
'...return 4...'
def extract_answer(question: str, context: str) -> str:
269def extract_answer(question: str, context: str) -> str:
270    """Extract an answer to a `question` from a provided `context`
271
272    The returned answer will always be a substring extracted from `context`.
273    It may not always be a correct or meaningful answer, but it will never be
274    an arbitrary hallucination.
275
276    :param question: A question to answer using knowledge from context
277    :param context: Knowledge used to answer the question
278    :return: Answer to the question.
279
280    Examples:
281
282    >>> context = "There is a green ball and a red box"
283    >>> extract_answer("What color is the ball?", context).lower()
284    '...green...'
285
286    >>> extract_answer("Who created Python?", get_wiki('Python')) #doctest: +SKIP
287    '...Guido van Rossum...'
288    """
289
290    return generate([f"{context}\n\n{question}"])[0]

Extract an answer to a question from a provided context

The returned answer will always be a substring extracted from context. It may not always be a correct or meaningful answer, but it will never be an arbitrary hallucination.

Parameters
  • question: A question to answer using knowledge from context
  • context: Knowledge used to answer the question
Returns

Answer to the question.

Examples:

>>> context = "There is a green ball and a red box"
>>> extract_answer("What color is the ball?", context).lower()
'...green...'
>>> extract_answer("Who created Python?", get_wiki('Python')) #doctest: +SKIP
'...Guido van Rossum...'
def classify(doc: str, label1: str, label2: str) -> str:
293def classify(doc: str, label1: str, label2: str) -> str:
294    """Performs binary classification on an input
295
296    :param doc: A plain text input document to classify
297    :param label1: The first label to classify against
298    :param label2: The second label to classify against
299    :return: The closest matching class. The return value will always be
300    `label1` or `label2`
301
302    Examples:
303
304    >>> classify("That book was good.","positive","negative")
305    'positive'
306    >>> classify("That movie was terrible.","positive","negative")
307    'negative'
308    """
309
310    return do(
311        f"Classify as {label1} or {label2}: {doc}\n\nClassification:",
312        choices=[label1, label2],
313    )

Performs binary classification on an input

Parameters
  • doc: A plain text input document to classify
  • label1: The first label to classify against
  • label2: The second label to classify against
Returns

The closest matching class. The return value will always be label1 or label2

Examples:

>>> classify("That book was good.","positive","negative")
'positive'
>>> classify("That movie was terrible.","positive","negative")
'negative'
def store_doc(doc: str, name: str = '') -> None:
316def store_doc(doc: str, name: str = "") -> None:
317    """Store document for later retrieval
318
319    :param doc: A plain text document to store.
320    :param name: Optional name for the document. This is used as a chunk prefix.
321
322    Examples:
323
324    >>> store_doc("The sky is blue.")
325    """
326    docs.store(doc, name)

Store document for later retrieval

Parameters
  • doc: A plain text document to store.
  • name: Optional name for the document. This is used as a chunk prefix.

Examples:

>>> store_doc("The sky is blue.")
def load_doc(query: str) -> str:
329def load_doc(query: str) -> str:
330    """Load a matching document
331
332    A single document that best matches `query` will be returned.
333
334    :param query: Query to compare to stored documents
335    :return: Content of the closest matching document
336
337    Examples:
338
339    >>> store_doc("Paris is in France.")
340    >>> store_doc("The sky is blue.")
341    >>> load_doc("Where is Paris?")
342    'Paris is in France.'
343    """
344    return docs.get_match(query)

Load a matching document

A single document that best matches query will be returned.

Parameters
  • query: Query to compare to stored documents
Returns

Content of the closest matching document

Examples:

>>> store_doc("Paris is in France.")
>>> store_doc("The sky is blue.")
>>> load_doc("Where is Paris?")
'Paris is in France.'
def get_doc_context(query: str) -> str:
347def get_doc_context(query: str) -> str:
348    """Loads context from documents
349
350    A string representing the most relevant content from all stored documents
351    will be returned. This may be a blend of chunks from multiple documents.
352
353    :param query: Query to compare to stored documents
354    :return: Up to 128 tokens of context
355
356    Examples:
357
358    >>> store_doc("Paris is in France.")
359    >>> store_doc("Paris is nice.")
360    >>> store_doc("The sky is blue.")
361    >>> get_doc_context("Where is Paris?")
362    'Paris is in France.\\n\\nParis is nice.'
363    """
364    return docs.get_context(query)

Loads context from documents

A string representing the most relevant content from all stored documents will be returned. This may be a blend of chunks from multiple documents.

Parameters
  • query: Query to compare to stored documents
Returns

Up to 128 tokens of context

Examples:

>>> store_doc("Paris is in France.")
>>> store_doc("Paris is nice.")
>>> store_doc("The sky is blue.")
>>> get_doc_context("Where is Paris?")
'Paris is in France.\n\nParis is nice.'
def get_wiki(topic: str) -> str:
367def get_wiki(topic: str) -> str:
368    """
369    Return Wikipedia summary for a topic
370
371    This function ignores the complexity of disambiguation pages and simply
372    returns the first result that is not a disambiguation page
373
374    :param topic: Topic to search for on Wikipedia
375    :return: Text content of the lead section of the most popular matching article
376
377    Examples:
378
379    >>> get_wiki('Python language')
380    'Python is a high-level...'
381
382    >>> get_wiki('Chemistry')
383    'Chemistry is the scientific study...'
384    """
385
386    url = "https://api.wikimedia.org/core/v1/wikipedia/en/search/title"
387    response = requests.get(url, params={"q": topic, "limit": 5})
388    response = json.loads(response.text)
389
390    for page in response["pages"]:
391        wiki_result = requests.get(
392            f"https://en.wikipedia.org/w/api.php?action=query&prop=extracts|pageprops&"
393            f"exintro&redirects=1&titles={page['title']}&format=json"
394        ).json()
395
396        first = wiki_result["query"]["pages"].popitem()[1]
397        if "disambiguation" in first["pageprops"]:
398            continue
399
400        summary = first["extract"]
401
402        cutoffs = [
403            "See_also",
404            "Notes",
405            "References",
406            "Further_reading",
407            "External_links",
408        ]
409
410        for cutoff in cutoffs:
411            summary = summary.split(f'<span id="{cutoff}">', 1)[0]
412
413        summary = re.sub(r"<p>", "\n\n", summary, flags=re.I)
414        summary = re.sub(r"<!\-\-.*?\-\->", "", summary, flags=re.I | re.DOTALL)
415        summary = re.sub(r"<.*?>", "", summary, flags=re.I)
416        summary = re.sub(r"\s*[\n\r]+\s*[\r\n]+[\s\r\n]*", "\n\n", summary, flags=re.I)
417        summary = summary.strip()
418        return summary
419    else:
420        return "No matching wiki page found."

Return Wikipedia summary for a topic

This function ignores the complexity of disambiguation pages and simply returns the first result that is not a disambiguation page

Parameters
  • topic: Topic to search for on Wikipedia
Returns

Text content of the lead section of the most popular matching article

Examples:

>>> get_wiki('Python language')
'Python is a high-level...'
>>> get_wiki('Chemistry')
'Chemistry is the scientific study...'
def get_weather(latitude, longitude):
423def get_weather(latitude, longitude):
424    """Fetch the current weather for a supplied longitude and latitude
425
426    Weather is provided by the US government and this function only supports
427    locations in the United States.
428
429    :param latitude: Latitude value representing this location
430    :param longitude: Longitude value representing this location
431    :return: Plain text description of the current weather forecast
432
433    Examples:
434
435    >>> get_weather(41.8, -87.6) # doctest: +SKIP
436    'Scattered showers and thunderstorms before 1pm with a high of 73.'
437    """
438
439    res = requests.get(f"https://api.weather.gov/points/{latitude},{longitude}")
440    points = json.loads(res.text)
441    forecast_url = points["properties"]["forecast"]
442
443    res = requests.get(forecast_url)
444    forecast = json.loads(res.text)
445    current = forecast["properties"]["periods"][0]
446
447    return current["detailedForecast"]

Fetch the current weather for a supplied longitude and latitude

Weather is provided by the US government and this function only supports locations in the United States.

Parameters
  • latitude: Latitude value representing this location
  • longitude: Longitude value representing this location
Returns

Plain text description of the current weather forecast

Examples:

>>> get_weather(41.8, -87.6) # doctest: +SKIP
'Scattered showers and thunderstorms before 1pm with a high of 73.'
def get_date() -> str:
450def get_date() -> str:
451    """Returns the current date and time in natural language
452
453    >>> get_date() # doctest: +SKIP
454    'Friday, May 12, 2023 at 09:27AM'
455    """
456
457    now = datetime.datetime.now()
458
459    return now.strftime("%A, %B %d, %Y at %I:%M%p")

Returns the current date and time in natural language

>>> get_date() # doctest: +SKIP
'Friday, May 12, 2023 at 09:27AM'
def count_tokens(prompt: str) -> None:
488def count_tokens(prompt: str) -> None:
489    """Counts tokens in a prompt
490
491    :param prompt: Prompt to use as input to tokenizer
492    :return: Nothing
493
494    Examples:
495
496    >>> count_tokens("Hello world")
497    2
498
499    >>> count_tokens("Hola mundo")
500    5
501    """
502
503    return len(list_tokens(prompt))

Counts tokens in a prompt

Parameters
  • prompt: Prompt to use as input to tokenizer
Returns

Nothing

Examples:

>>> count_tokens("Hello world")
2
>>> count_tokens("Hola mundo")
5
def set_max_ram(value):
506def set_max_ram(value):
507    """Sets max allowed RAM
508
509    This value takes priority over environment variables
510
511    Returns the numeric value set in GB
512
513    >>> set_max_ram(16)
514    16.0
515
516    >>> set_max_ram('512mb')
517    0.5
518    """
519
520    config["max_ram"] = value
521
522    return config["max_ram"]

Sets max allowed RAM

This value takes priority over environment variables

Returns the numeric value set in GB

>>> set_max_ram(16)
16.0
>>> set_max_ram('512mb')
0.5
def require_model_license(match_re):
525def require_model_license(match_re):
526    """Require models to match supplied regex
527
528    This can be used to enforce certain licensing constraints when using this
529    package.
530    """
531    config["model_license"] = match_re

Require models to match supplied regex

This can be used to enforce certain licensing constraints when using this package.