LLM By Examples — Get started with OpenAI (part 3 of 3)

MB20261
5 min readMay 18, 2024

--

To continue from:

Assistant Calls with Text Generation / Chat

import os
import time
from pprint import pprint
from openai import OpenAI
from openai.types.beta.threads import (
ImageFileContentBlock,
TextContentBlock
)

client = OpenAI()

def __assist_wait_on_run(run, thread_id):
while run.status == "queued" or run.status == "in_progress":
run = client.beta.threads.runs.retrieve(
thread_id=thread_id,
run_id=run.id,
)
time.sleep(0.5)
return run

def __assist_save_output_file(file_id, write_path):
data = client.files.content(file_id)
data_bytes = data.read()
with open(write_path, "wb") as file:
file.write(data_bytes)

def __upload_file(file_path):
file = client.files.create(
file=open(file_path, "rb"),
purpose='assistants'
)
return file.id

def __create_assistant(name, description:str=None, model:str='gpt-4o', instructions:str = None, use_code_interpreter:bool=False, use_file_search:bool=False, file_ids=[]):
tools = []
tool_resources = {}
if use_code_interpreter:
tools.append({"type": "code_interpreter"})
tool_resources["code_interpreter"] = {
"file_ids": file_ids
}
assistant = client.beta.assistants.create(
name=name,
description=description,
model=model,
tools=tools,
tool_resources = tool_resources
)
return assistant.id
elif use_file_search:
tools.append({"type": "file_search"})
if instructions is None:
instructions = description
assistant = client.beta.assistants.create(
name=name,
instructions=instructions,
model=model,
tools=tools,
tool_resources = tool_resources
)
vector_store = client.beta.vector_stores.create(name=name)
file_batch = client.beta.vector_stores.file_batches.create_and_poll(
vector_store_id=vector_store.id, file_ids=file_ids
)
assistant = client.beta.assistants.update(
assistant_id=assistant.id,
tool_resources={"file_search": {"vector_store_ids": [vector_store.id]}},
)
return assistant.id
else:
assistant = client.beta.assistants.create(
name=name,
description=description,
model=model
)
return assistant.id

def __chat_with_assistant(assistant_id, messages):
thread = client.beta.threads.create(
messages=messages
)
run = client.beta.threads.runs.create(
thread_id=thread.id,
assistant_id=assistant_id
)
run = __assist_wait_on_run(run=run, thread_id=thread.id)
response_messages = []
response = client.beta.threads.messages.list(thread_id=thread.id, order="asc")
for result in response:
if result.role != 'assistant':
continue
response_message = {
"role": "assistant",
"content": []
}
for c in result.content:
if isinstance(c, TextContentBlock):
response_message['content'].append(
{
"type": "text",
"text": c.text.value
}
)
elif isinstance(c, ImageFileContentBlock):
filename = f'{c.image_file.file_id}.png'
__assist_save_output_file(c.image_file.file_id, filename)
response_message['content'].append(
{
"type": "image_file",
"image_file": {"file_id": c.image_file.file_id}
}
)
response_messages.append(response_message)
return response_messages

# REF: https://platform.openai.com/docs/assistants/how-it-works/managing-threads-and-messages
# You can attach a maximum of 20 files to code_interpreter and 10,000 files to file_search (using vector_store objects).
# Each file can be at most 512 MB in size and have a maximum of 5,000,000 tokens.

file_id = __upload_file("data/fine_food_reviews_1k.csv")
assistant_id = __create_assistant(
name="Data visualizer",
description="You are great at creating beautiful data visualizations. You analyze data present in .csv files, understand trends, and come up with data visualizations relevant to those trends. You also share a brief text summary of the trends observed.",
model="gpt-4o",
use_code_interpreter = True,
file_ids=[file_id]
)

messages=[
{
"role": "user",
"content": "Create 3 data visualizations based on the trends in this file.",
"attachments": [
{
"file_id": file_id,
"tools": [{"type": "code_interpreter"}]
}
]
}
]
response_messages = __chat_with_assistant(assistant_id=assistant_id, messages=messages)
pprint(response_messages)

Assistant Calls with Images / Multimodal

import os
import time
from pprint import pprint
from openai import OpenAI
from openai.types.beta.threads import (
ImageFileContentBlock,
TextContentBlock
)

client = OpenAI()

def __assist_wait_on_run(run, thread_id):
while run.status == "queued" or run.status == "in_progress":
run = client.beta.threads.runs.retrieve(
thread_id=thread_id,
run_id=run.id,
)
time.sleep(0.5)
return run

def __assist_save_output_file(file_id, write_path):
data = client.files.content(file_id)
data_bytes = data.read()
with open(write_path, "wb") as file:
file.write(data_bytes)

def __upload_file(file_path):
file = client.files.create(
file=open(file_path, "rb"),
purpose='assistants'
)
return file.id

def __create_assistant(name, description:str=None, model:str='gpt-4o', instructions:str = None, use_code_interpreter:bool=False, use_file_search:bool=False, file_ids=[]):
tools = []
tool_resources = {}
if use_code_interpreter:
tools.append({"type": "code_interpreter"})
tool_resources["code_interpreter"] = {
"file_ids": file_ids
}
assistant = client.beta.assistants.create(
name=name,
description=description,
model=model,
tools=tools,
tool_resources = tool_resources
)
return assistant.id
elif use_file_search:
tools.append({"type": "file_search"})
if instructions is None:
instructions = description
assistant = client.beta.assistants.create(
name=name,
instructions=instructions,
model=model,
tools=tools,
tool_resources = tool_resources
)
vector_store = client.beta.vector_stores.create(name=name)
file_batch = client.beta.vector_stores.file_batches.create_and_poll(
vector_store_id=vector_store.id, file_ids=file_ids
)
assistant = client.beta.assistants.update(
assistant_id=assistant.id,
tool_resources={"file_search": {"vector_store_ids": [vector_store.id]}},
)
return assistant.id
else:
assistant = client.beta.assistants.create(
name=name,
description=description,
model=model
)
return assistant.id

def __chat_with_assistant(assistant_id, messages):
thread = client.beta.threads.create(
messages=messages
)
run = client.beta.threads.runs.create(
thread_id=thread.id,
assistant_id=assistant_id
)
run = __assist_wait_on_run(run=run, thread_id=thread.id)
response_messages = []
response = client.beta.threads.messages.list(thread_id=thread.id, order="asc")
for result in response:
if result.role != 'assistant':
continue
response_message = {
"role": "assistant",
"content": []
}
for c in result.content:
if isinstance(c, TextContentBlock):
response_message['content'].append(
{
"type": "text",
"text": c.text.value
}
)
elif isinstance(c, ImageFileContentBlock):
filename = f'{c.image_file.file_id}.png'
__assist_save_output_file(c.image_file.file_id, filename)
response_message['content'].append(
{
"type": "image_file",
"image_file": {"file_id": c.image_file.file_id}
}
)
response_messages.append(response_message)
return response_messages

# REF: https://platform.openai.com/docs/assistants/how-it-works/managing-threads-and-messages
# You can attach a maximum of 20 files to code_interpreter and 10,000 files to file_search (using vector_store objects).
# Each file can be at most 512 MB in size and have a maximum of 5,000,000 tokens.

file_id1 = __upload_file(file_path="data/04_origin.jpg")
file_id2 = __upload_file(file_path="data/04_generated.png")

assistant_id = __create_assistant(
name="Image Compare",
description="You will be asked to compare images",
model="gpt-4o",
use_code_interpreter=False
)

messages=[
{
"role": "user",
"content": [
{
"type": "text",
"text": "What is the difference between these images?"
},
{
"type": "image_file",
"image_file": {"file_id": file_id1}
},
{
"type": "image_file",
"image_file": {"file_id": file_id2}
},
],
}
]
response_messages = __chat_with_assistant(assistant_id=assistant_id, messages=messages)
pprint(response_messages)
print('-'*30)
print()

messages=[
{
"role": "user",
"content": [
{
"type": "text",
"text": "What is this an image of?"
},
{
"type": "image_file",
"image_file": {"file_id": file_id1, "detail": "high"}
},
],
}
]
response_messages = __chat_with_assistant(assistant_id=assistant_id, messages=messages)
pprint(response_messages)

Assistant Calls with Q&A on files

import os
import time
from pprint import pprint
from openai import OpenAI
from openai.types.beta.threads import (
ImageFileContentBlock,
TextContentBlock
)

client = OpenAI()

def __assist_wait_on_run(run, thread_id):
while run.status == "queued" or run.status == "in_progress":
run = client.beta.threads.runs.retrieve(
thread_id=thread_id,
run_id=run.id,
)
time.sleep(0.5)
return run

def __assist_save_output_file(file_id, write_path):
data = client.files.content(file_id)
data_bytes = data.read()
with open(write_path, "wb") as file:
file.write(data_bytes)

def __upload_file(file_path):
file = client.files.create(
file=open(file_path, "rb"),
purpose='assistants'
)
return file.id

def __create_assistant(name, description:str=None, model:str='gpt-4o', instructions:str = None, use_code_interpreter:bool=False, use_file_search:bool=False, file_ids=[]):
tools = []
tool_resources = {}
if use_code_interpreter:
tools.append({"type": "code_interpreter"})
tool_resources["code_interpreter"] = {
"file_ids": file_ids
}
assistant = client.beta.assistants.create(
name=name,
description=description,
model=model,
tools=tools,
tool_resources = tool_resources
)
return assistant.id
elif use_file_search:
tools.append({"type": "file_search"})
if instructions is None:
instructions = description
assistant = client.beta.assistants.create(
name=name,
instructions=instructions,
model=model,
tools=tools,
tool_resources = tool_resources
)
vector_store = client.beta.vector_stores.create(name=name)
file_batch = client.beta.vector_stores.file_batches.create_and_poll(
vector_store_id=vector_store.id, file_ids=file_ids
)
assistant = client.beta.assistants.update(
assistant_id=assistant.id,
tool_resources={"file_search": {"vector_store_ids": [vector_store.id]}},
)
return assistant.id
else:
assistant = client.beta.assistants.create(
name=name,
description=description,
model=model
)
return assistant.id

def __chat_with_assistant(assistant_id, messages):
thread = client.beta.threads.create(
messages=messages
)
run = client.beta.threads.runs.create(
thread_id=thread.id,
assistant_id=assistant_id
)
run = __assist_wait_on_run(run=run, thread_id=thread.id)
response_messages = []
response = client.beta.threads.messages.list(thread_id=thread.id, order="asc")
for result in response:
if result.role != 'assistant':
continue
response_message = {
"role": "assistant",
"content": []
}
for c in result.content:
if isinstance(c, TextContentBlock):
response_message['content'].append(
{
"type": "text",
"text": c.text.value
}
)
elif isinstance(c, ImageFileContentBlock):
filename = f'{c.image_file.file_id}.png'
__assist_save_output_file(c.image_file.file_id, filename)
response_message['content'].append(
{
"type": "image_file",
"image_file": {"file_id": c.image_file.file_id}
}
)
response_messages.append(response_message)
return response_messages

# REF: https://platform.openai.com/docs/assistants/how-it-works/managing-threads-and-messages
# You can attach a maximum of 20 files to code_interpreter and 10,000 files to file_search (using vector_store objects).
# Each file can be at most 512 MB in size and have a maximum of 5,000,000 tokens.

file_id = __upload_file("data/finacial_sample_reports.pdf")

assistant_id = __create_assistant(
name="Financial Analyst Assistant",
instructions="You are an expert financial analyst. Use you knowledge base to answer questions about audited financial statements.",
model="gpt-4o",
use_file_search=True,
file_ids=[file_id]
)

messages=[
{
"role": "user",
"content": "Could you summarize detailed ratio analysis for Library Medical Group?",
"attachments": [
{ "file_id": file_id, "tools": [{"type": "file_search"}] }
],
}
]
response_messages = __chat_with_assistant(assistant_id=assistant_id, messages=messages)
pprint(response_messages)
print('-'*30)
print()

Enjoy!

--

--

MB20261

Digital Transformation | FinOps | DevOps | AI | Software Architecture/Solutions | Microservices | Data Lake | Kubernetes | Python | SpringBoot | Certifications