LLM By Examples — Get started with OpenAI (part 2of 3)

MB20261
2 min readMay 18, 2024

--

To continue from:

Image Generation

import os
import shutil
import requests
from openai import OpenAI

client = OpenAI()

response = client.images.generate(
model="dall-e-3",
prompt="a white siamese cat",
size="1792x1024",
quality="hd",
n=1
)

url = response.data[0].url
output_image_file = 'cat.png'

r = requests.get(url, stream=True)
if r.status_code == 200:
with open(output_image_file, 'wb') as f:
r.raw.decode_content = True
shutil.copyfileobj(r.raw, f)

Image Recognition / Vision

import os
import base64
import requests
from openai import OpenAI

def encode_image(image_path):
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')

base64_image = encode_image('cat.png')

client = OpenAI()

response = client.chat.completions.create(
model="gpt-4o",
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": "What’s in this image?"},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
}
},
],
}
],
max_tokens=300,
)

print(response.choices[0])

Image Comparison / Vision

import os
import base64
from openai import OpenAI


def encode_image(image_path):
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')

base64_image1 = encode_image('data/sample2.png')
base64_image2 = encode_image('data/sample3.png')

client = OpenAI()

response = client.chat.completions.create(
model="gpt-4o",
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": "What are in these images? Is there any difference between them?"},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image1}"
}
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image2}"
}
},
],
}
],
max_tokens=300,
)

print(response.choices[0])

Text to Speech

import os
from openai import OpenAI

client = OpenAI()

speech_file_path = "speech.mp3"
response = client.audio.speech.create(
model="tts-1",
voice="alloy", # alloy, echo, fable, onyx, nova, and shimmer
input="Today is a wonderful day to build something people love!"
)

response.write_to_file(speech_file_path)

Speech to Text

import os
from openai import OpenAI

client = OpenAI()

speech_file_path = "speech.mp3"

audio_file= open(speech_file_path, "rb")
transcription = client.audio.transcriptions.create(
model="whisper-1",
file=audio_file
)
print(transcription.text)

For assistant examples, please continue with below article:

Enjoy!

--

--

MB20261

Digital Transformation | FinOps | DevOps | AI | Software Architecture/Solutions | Microservices | Data Lake | Kubernetes | Python | SpringBoot | Certifications