LLM By Examples — Get started with OpenAI (part 1 of 3)

M&E Technical Solutions Ltd.
5 min readMay 18, 2024

--

OpenAI is a leading AI research organization founded in December 2015 by tech visionaries like Elon Musk and Sam Altman. Dedicated to the ethical and safe advancement of AI, OpenAI focuses on transparency and collaboration. Notable achievements include the GPT series, which have significantly advanced natural language processing, transforming industries and enhancing human-computer interactions.

Install

To install OpenAI python client, use below command:

pip install openai

Get Started

If you haven’t had chance to use OpenAI before, you need sign up with a valid email address. Next, you need create an API key to access OpenAI APIs. Once you have the API key, set it as environment variable OPENAI_API_KEY. For example:

# Linux or MacOS
export OPENAI_API_KEY='your-api-key-here'

# Windows
setx OPENAI_API_KEY "your-api-key-here"

Alternatively, you could use the API key as argument when create OpenAI object.

client = OpenAI(api_key="your-api-key-here")

All sample codes below assume the environment variable is set.

Overview

OpenAI provides API reference, documents, cookbook, tutorials, github, playground, ChatGPT, etc. where you could find everything you need. Due to the frequent changes, we found it is very helpful to maintain a set of “quick start” samples here. Below is the libraries and versions to run all samples in this article.

openai==1.30.1
requests==2.31.0
backoff==2.2.1
tenacity==8.2.3
tiktoken==0.5.1
pandas==2.1.3

List Available Models in your account

from openai import OpenAI

client = OpenAI()

models = client.models.list()
for model in models:
print(model.id)

Text Generation / Chat

from openai import OpenAI

client = OpenAI()

response = client.chat.completions.create(
model="gpt-4o",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Who won the world series in 2020?"},
{"role": "assistant", "content": "The Los Angeles Dodgers won the World Series in 2020."},
{"role": "user", "content": "Where was it played?"}
],
temperature=0,
stream=False,
)

response_message = response.choices[0].message.content
tokens_used = response.usage.total_tokens

print(response_message)
print(tokens_used)

Text Generation / Chat with Streaming output

import os
import tiktoken
from openai import OpenAI

client = OpenAI()
model="gpt-4o"
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Who won the world series in 2020?"},
{"role": "assistant", "content": "The Los Angeles Dodgers won the World Series in 2020."},
{"role": "user", "content": "Where was it played?"}
]

response = client.chat.completions.create(
model=model,
messages=messages,
temperature=0,
stream=True,
)

all_content = ''
for chunk in response:
if len(chunk.choices) == 0:
continue
if chunk.choices[0].finish_reason == "length":
raise Exception(
"The chat session exceeds maximum allowed tokens! To reduce total tokens, modify and reload chat history or message. See details from Help command."
)
try:
text = chunk.choices[0].delta.content
if text:
all_content += text
print(text, end='')
except AttributeError:
text = None
messages.append({"role": "assistant", "content": all_content})
print()

# Count used tokens
try:
encoding = tiktoken.encoding_for_model(model)
except KeyError:
encoding = tiktoken.get_encoding("cl100k_base")
tokens_per_message = 3
tokens_per_name = 1
tokens_used = 0
for message in messages:
tokens_used += tokens_per_message
for key, value in message.items():
tokens_used += len(encoding.encode(value))
if key == "name":
tokens_used += tokens_per_name
tokens_used += 3 # every reply is primed with <|start|>assistant<|message|>

print(tokens_used)

Function Calling

import os
import json
from openai import OpenAI

client = OpenAI()

def get_current_weather(location, unit="fahrenheit"):
"""Get the current weather in a given location"""
if "tokyo" in location.lower():
return json.dumps({"location": "Tokyo", "temperature": "10", "unit": unit})
elif "san francisco" in location.lower():
return json.dumps({"location": "San Francisco", "temperature": "72", "unit": unit})
elif "paris" in location.lower():
return json.dumps({"location": "Paris", "temperature": "22", "unit": unit})
else:
return json.dumps({"location": location, "temperature": "unknown"})

messages = [{"role": "user", "content": "What's the weather like in San Francisco, Tokyo, and Paris?"}]
tools = [
{
"type": "function",
"function": {
"name": "get_current_weather",
"description": "Get the current weather in a given location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA",
},
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
},
"required": ["location"],
},
},
}
]
response = client.chat.completions.create(
model="gpt-4o",
messages=messages,
tools=tools,
tool_choice="auto",
)
response_message = response.choices[0].message
tool_calls = response_message.tool_calls
if tool_calls:
print('...tool calls...')
available_functions = {
"get_current_weather": get_current_weather,
}
messages.append(response_message)
for tool_call in tool_calls:
function_name = tool_call.function.name
function_to_call = available_functions[function_name]
function_args = json.loads(tool_call.function.arguments)
function_response = function_to_call(
location=function_args.get("location"),
unit=function_args.get("unit"),
)
messages.append(
{
"tool_call_id": tool_call.id,
"role": "tool",
"name": function_name,
"content": function_response,
}
)
second_response = client.chat.completions.create(
model="gpt-4o",
messages=messages,
)
response_message = second_response.choices[0].message.content
tokens_used = second_response.usage.total_tokens
else:
print('...no tool calls...')
tokens_used = response.usage.total_tokens

print(response_message)
print(tokens_used)

Embedding Creation

import os
import numpy as np
import pandas as pd
from openai import OpenAI


def get_embedding(text, model="text-embedding-3-small"):
text = text.replace("\n", " ")
return client.embeddings.create(input = [text], model=model).data[0].embedding


# 1. Create testing data
data = [
['PROD1', 'USER1', 5, 'Good Quality Dog Food', 'I have bought several of the Vitality canned'],
['PROD1', 'USER2', 15, 'Not as Advertised', 'Product arrived labeled as Jumbo Salted Peanut']
]
df = pd.DataFrame(data, columns=['ProductId', 'UserId', 'Score', 'Summary', 'Comments'])
print(df)
print('-'*20)
df["combined"] = (
"Title: " + df.Summary.str.strip() + "; Content: " + df.Comments.str.strip()
)
print(df)
print()
print()

# 2. Embedding
client = OpenAI()
df['ada_embedding'] = df.combined.apply(lambda x: get_embedding(x, model='text-embedding-3-small'))
print(df)
print('-'*20)

# 3. Persist and Load back
df.to_csv('data/embedded_1k_reviews.csv', index=False)
df = pd.read_csv('data/embedded_1k_reviews.csv')
df['ada_embedding'] = df.ada_embedding.apply(eval).apply(np.array)

print(df)

Embedding dimensions Reduction

# Very helpful when you need load embeddings into limited memory
import os
import sys
import numpy as np
from openai import OpenAI

client = OpenAI()

def normalize_l2(x):
x = np.array(x)
if x.ndim == 1:
norm = np.linalg.norm(x)
if norm == 0:
return x
return x / norm
else:
norm = np.linalg.norm(x, 2, axis=1, keepdims=True)
return np.where(norm == 0, x, x / norm)


response = client.embeddings.create(
model="text-embedding-3-large", input="Testing 123", encoding_format="float"
)

cut_dim = response.data[0].embedding
print(sys.getsizeof(cut_dim))

norm_dim = normalize_l2(cut_dim)
print(sys.getsizeof(norm_dim))

Embedding Semantic Search

import os
import numpy as np
import pandas as pd
from typing import List, Optional
from openai import OpenAI

client = OpenAI()

# Ref: https://github.com/openai/openai-cookbook/blob/main/examples/utils/embeddings_utils.py
def get_embedding(text: str, model="text-embedding-3-small", **kwargs) -> List[float]:
text = text.replace("\n", " ")
response = client.embeddings.create(input=[text], model=model, **kwargs)
return response.data[0].embedding

def cosine_similarity(a, b):
return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

data = [
['PROD1', 'USER1', 5, 'Good Quality Dog Food', 'I have bought several of the Vitality canned'],
['PROD1', 'USER2', 15, 'Not as Advertised', 'Product arrived labeled as Jumbo Salted Peanut']
]
df = pd.DataFrame(data, columns=['ProductId', 'UserId', 'Score', 'Summary', 'Comments'])
df["combined"] = (
"Title: " + df.Summary.str.strip() + "; Content: " + df.Comments.str.strip()
)

def search_reviews(df, product_description, n=3, pprint=True):
embedding = get_embedding(product_description, model='text-embedding-3-small')

df['ada_embedding'] = df.combined.apply(lambda x: get_embedding(x, model='text-embedding-3-small'))
df['similarities'] = df.ada_embedding.apply(lambda x: cosine_similarity(x, embedding))
res = df.sort_values('similarities', ascending=False).head(n)
return res

res = search_reviews(df, 'Good Quality', n=1)
print(res)

For more samples, please check below link:

Enjoy!

--

--

M&E Technical Solutions Ltd.

Digital Transformation | FinOps | DevOps | Software Architecture/Solutions | Microservices | Data Lake | Kubernetes | Python | SpringBoot | Certifications