(SwiftUI) 利用 ARKit 製作能讀懂你情緒變化的聊天機器人
目的:串接 OpenAI 的 API 製作聊天機器人,並且在得到回覆後透過 ARKit 偵測使用者情緒變化
情緒偵測:滿意、不滿、憤怒
製作 TextRowView
import SwiftUI
struct TextRowView: View {
var text = "What is CNN in ML??"
var image: Image
@State var bool = true
// 逐個字元進行顯示 property
@State private var displayText = ""
@State private var currentIndex = 0
@State private var isDisplayingText = false
var body: some View {
HStack(alignment: .top) {
if bool == false {
Spacer()
}
image
.resizable()
.scaledToFill()
.frame(width: bool ? 50 : 0, height: bool ? 50 : 0)
.clipShape(Circle())
Text("\(bool ? displayText : text)")
.padding()
.background(bool ? Color(red: 102/255, green: 66/255, blue: 1) : Color(red: 239/255, green: 239/255, blue: 239/255))
.foregroundColor(bool ? Color.white : Color.black)
.cornerRadius(15)
.padding(EdgeInsets(top: 5, leading: 10, bottom: 5, trailing: 10))
// 確保一開始產生的 Image 在最左邊
if bool {
Spacer()
}
}
.onAppear {
startDisplayingText(text: text)
}
}
// 逐個字元進行顯示
func startDisplayingText(text: String) {
currentIndex = 0
isDisplayingText = true
Timer.scheduledTimer(withTimeInterval: 0.005, repeats: true) { timer in
if currentIndex < text.count {
let index = text.index(text.startIndex, offsetBy: currentIndex)
displayText = String(text[...index])
currentIndex += 1
} else {
timer.invalidate()
isDisplayingText = false
}
}
}
}
在程式碼中會有 bool
來控制顯示的對話筐,若為 Chat 回答,將會逐字顯示每一個單字
製作 ChatViewModel
使用 SPM (Swift Package Manager) 將 OpenAISwift 加入到專案中
File -> Add Packages..
GitHub
將上方連結輸入後,點選 Add Package
申請 OpenAI API
申請後將產生的 API 保存起來sk-SeEWRME6qGD27cLy3wNYT3Blxxxxxxxxxxxxx
在 ChatViewModel 新增以下程式碼
import SwiftUI
import OpenAISwift
final class ViewModel: ObservableObject {
init() {}
private var client: OpenAISwift?
func setup() {
client = OpenAISwift(authToken: "sk-SeEWRME6qGD27cLy3wNYT3Blxxxxxxxxxxxxx")
}
func send(text: String,
completion: @escaping (String) -> Void) {
client?.sendCompletion(with: text,
maxTokens: 200,
completionHandler: { result in
switch result {
case .success(let model):
let output = model.choices?.first?.text ?? ""
completion(output)
case .failure:
break
}
})
}
}
設計畫面
struct ChatViewModel: View {
@ObservedObject var viewModel = ViewModel()
@State var text = ""
@State var models = [String]()
var body: some View {
VStack(alignment: .leading, spacing: 20) {
ScrollView {
VStack(alignment: .leading, spacing: 20) {
ForEach(models.indices, id: \.self) { index in
if (index % 2) != 0 {
TextRowView(text: models[index], image: Image("openAI"), bool: true)
}else {
TextRowView(text: models[index], image: Image(""), bool: false)
}
}
Spacer()
}
}
HStack {
TextField("Message", text: $text, onCommit: {
send()
})
.textFieldStyle(.roundedBorder)
if text == "" {
Image(systemName: "arrow.up.circle.fill")
.foregroundColor(.gray)
}else {
Button {
send()
print(models)
} label: {
Image(systemName: "arrow.up.circle.fill")
.foregroundColor(Color(red: 102/255, green: 66/255, blue: 1))
}
}
}
}
.onAppear {
viewModel.setup()
}
.padding()
}
func send() {
guard !text.trimmingCharacters(in: .whitespaces).isEmpty else {
return
}
models.append("\(text)")
let chatMessage = text
viewModel.send(text: chatMessage) { response in
DispatchQueue.main.async {
// newresponse 保存最初的 response
var newresponse = response
// 有時候最前面會產生 ? 必須移除
// response 定義是 let 不能移除
if newresponse.hasPrefix("?") || newresponse.hasPrefix("!") {
newresponse.removeFirst()
}
// 將空白部分移除
self.models.append(newresponse.trimmingCharacters(in: .whitespacesAndNewlines))
print(newresponse.trimmingCharacters(in: .whitespacesAndNewlines))
// 重置文本
self.text = ""
}
}
}
}
有時候 OpenAI 會產生一些奇怪的符號和大量的空格,這可能會導致顯示上的問題。因此,我們需要對回應 response
進行處理
// newresponse 保存最初的 response
var newresponse = response
// 有時候最前面會產生 ? 必須移除
// response 定義是 let 不能移除
if newresponse.hasPrefix("?") || newresponse.hasPrefix("!") {
newresponse.removeFirst()
}
// 將空白部分移除
self.models.append(newresponse.trimmingCharacters(in: .whitespacesAndNewlines))
製作語音功能
import Library
import AVFoundation
import NaturalLanguage
新增以下程式碼
class SpeechManager: NSObject, ObservableObject {
@Published var isSpeaking = false
let synthesizer = AVSpeechSynthesizer()
func speak(text: String) {
let utterance = AVSpeechUtterance(string: text)
utterance.voice = AVSpeechSynthesisVoice(language: returnLanguage(text))
synthesizer.speak(utterance)
}
override init() {
super.init()
synthesizer.delegate = self
}
// 回傳要合成聲音的語言
func returnLanguage(_ input_text: String) -> String {
let languageRecognizer = NLLanguageRecognizer()
languageRecognizer.processString(input_text)
var Language = ""
if let dominantLanguage = languageRecognizer.dominantLanguage {
// 判斷為哪一種語言
switch String(dominantLanguage.rawValue) {
case "en":
Language = "en-US"
case "ja":
Language = "ja-JP"
case "zh-Hant":
Language = "zh-TW"
// 有時候會得到 簡體中文
case "zh-Hans":
Language = "zh-TW"
default:
Language = "en-US"
}
}
return Language
}
}
extension SpeechManager: AVSpeechSynthesizerDelegate {
func speechSynthesizer(_ synthesizer: AVSpeechSynthesizer, didStart utterance: AVSpeechUtterance) {
DispatchQueue.main.async {
self.isSpeaking = true
}
}
func speechSynthesizer(_ synthesizer: AVSpeechSynthesizer, didFinish utterance: AVSpeechUtterance) {
DispatchQueue.main.async {
self.isSpeaking = false
}
}
}
returnLanguage
會先判斷為哪一種語言,最後回傳對應要產生的 language 給 AVSpeechSynthesisVoice
在 ChatViewModel 新增 speechManager
struct ChatViewModel: View {
...
@StateObject private var speechManager = SpeechManager()
...
在 send function 新增以下程式碼
func send() {
...
viewModel.send(text: chatMessage) { response in
DispatchQueue.main.async {
...
speechManager.speak(text: newresponse.trimmingCharacters(in: .whitespacesAndNewlines))
...
}
}
}
這樣一來你的機器人就會說三種語言:中文、日語、英語
製作 LanguageViewModel
新增一個 Swift File SegmentControlView
,並新增以下程式碼
import Foundation
import SwiftUI
struct SegmentControlView<ID: Identifiable, Content: View, Background: Shape>: View {
let segments: [ID]
@Binding var selected: ID
var titleNormalColor: Color
var titleSelectedColor: Color
var bgColor: Color
let animation: Animation
@ViewBuilder var content: (ID) -> Content
@ViewBuilder var background: () -> Background
@Namespace private var namespace
var body: some View {
GeometryReader { bounds in
HStack(spacing: 0) {
ForEach(segments) { segment in
NewSegmentButtonView(id: segment,
selectedId: $selected,
titleNormalColor: titleNormalColor,
titleSelectedColor: titleSelectedColor,
bgColor: bgColor,
animation: animation,
namespace: namespace) {
content(segment)
} background: {
background()
}
.frame(width: bounds.size.width / CGFloat(segments.count))
}
}
.background {
background()
.fill(bgColor.opacity(0.1))
.overlay(
background()
.stroke(style: StrokeStyle(lineWidth: 1.5))
.foregroundColor(bgColor.opacity(0.2))
)
}
}
}
}
fileprivate struct NewSegmentButtonView<ID: Identifiable, Content: View, Background: Shape> : View {
let id: ID
@Binding var selectedId: ID
var titleNormalColor: Color
var titleSelectedColor: Color
var bgColor: Color
var animation: Animation
var namespace: Namespace.ID
@ViewBuilder var content: () -> Content
@ViewBuilder var background: () -> Background
var body: some View {
GeometryReader { bounds in
Button {
withAnimation(animation) {
selectedId = id
}
} label: {
content()
}
.frame(width: bounds.size.width, height: bounds.size.height)
.scaleEffect(selectedId.id == id.id ? 1 : 0.8)
.clipShape(background())
.foregroundColor(selectedId.id == id.id ? titleSelectedColor : titleNormalColor)
.background(buttonBackground)
}
}
@ViewBuilder private var buttonBackground: some View {
if selectedId.id == id.id {
background()
.fill(bgColor)
.matchedGeometryEffect(id: "SelectedTab", in: namespace)
}
}
}
enum Segment: Identifiable, CaseIterable {
case English, TraditionalChinese, Japanese
var id: String {
title
}
var title: String {
switch self {
case .English:
return "English"
case .TraditionalChinese:
return "中文"
case .Japanese:
return "日本"
}
}
}
新增一個 SwiftUI View LanguageViewModel
,並新增以下程式碼
import SwiftUI
struct LanguageViewModel: View {
@Binding var selectedSegment1: Segment
@State var animation: Animation = .spring(response: 0.6, dampingFraction: 0.6, blendDuration: 0.6)
@State var themeColor: Color = Color(red: 102/255, green: 66/255, blue: 1)
@State var cornerRadius: CGFloat = 20
@State var selectedAnimationIndex: Int = 0
// 目前選定的語言
@Binding var language: String
@Binding var closeBool: Bool
@Binding var token: Int
var body: some View {
NavigationStack {
GeometryReader { bounds in
VStack(spacing: 10) {
Text("Select chat reply language")
.font(.title)
VStack(spacing: 20) {
SegmentControlView(segments: Segment.allCases,
selected: $selectedSegment1,
titleNormalColor: themeColor,
titleSelectedColor: .white,
bgColor: themeColor,
animation: animation) { segment in
Text(segment.title)
.font(.system(size: 20, weight: .semibold, design: .rounded))
.padding(.horizontal)
.padding(.vertical, 8)
} background: {
RoundedRectangle(cornerRadius: cornerRadius, style: .continuous)
}
.frame(height: 37)
Text("maxToken : \(token)")
Slider(value: Binding<Double>(
get: { Double(token) },
set: { token = Int($0) }
), in: 200...1000)
.accentColor(Color(red: 102/255, green: 66/255, blue: 1))
}
.padding()
.background(Color.white.cornerRadius(20))
.onChange(of: selectedSegment1) { newValue in
language = "\(newValue)"
switch language {
case "English":
token = 200
case "TraditionalChinese":
token = 700
case "Japanese":
token = 700
default:
token = 200
}
}
}
.padding(.top, 0)
.padding(.horizontal)
.toolbar {
Button {
closeBool = false
} label: {
Image(systemName: "x.circle.fill")
.resizable()
.font(.title3)
.foregroundColor(Color(red: 128/255, green: 128/255, blue: 132/255))
}
}
}
}
}
}
struct LanguageViewModel_Previews: PreviewProvider {
static var previews: some View {
LanguageViewModel(selectedSegment1: .constant(Segment.English), language: .constant("English"), closeBool: .constant(true), token: .constant(200))
}
}
回到 ChatViewModel ,在 ChatViewModel 新增以下變數
// 回覆的語言,預設為 英文
@State private var replyLanguage = "English"
@State var selectedSegment1: Segment = .English
@State var token = 200
@State private var showBool = false
接下來使用 NavigationStack 將原本的畫面包起來
// 原本的
var body: some View {
VStack(alignment: .leading, spacing: 20) {
...
}
}
// 使用 NavigationStack 包起來
var body: some View {
NavigationStack {
VStack(alignment: .leading, spacing: 20) {
...
}
}
}
在 onAppear 底下新增 toolbar ,並新增以下程式碼
var body: some View {
NavigationStack {
...
}
.onAppear {
viewModel.setup()
}
.toolbar {
Button {
showBool.toggle()
} label: {
Image(systemName: "gearshape")
}
.sheet(isPresented: $showBool) {
LanguageViewModel(selectedSegment1: $selectedSegment1, language: $replyLanguage, closeBool: $showBool, token: $token)
.presentationDetents([.height(250), .medium])
}
}
修改 send function,新增 maxTokens 來控制產生的 Token 長度
func send(text: String, maxTokens: Int,
completion: @escaping (String) -> Void) {
client?.sendCompletion(with: text,
maxTokens: maxTokens,
completionHandler: { result in
switch result {
case .success(let model):
let output = model.choices?.first?.text ?? ""
completion(output)
case .failure:
break
}
})
}
viewModel.send 中的 maxTokens 輸入 token
viewModel.send(text: chatMessage, maxTokens: token) { response in
DispatchQueue.main.async {
當使用除了英文回覆的語言外,必須給較大的 token 才能產生完整的回覆,所以當切換中文或是日語時,預設的 maxToken 都會為 700
新增語音的終止按鍵,將下方的 TextField 放入下方的判斷式中
// 判斷是否有在使用 AVSpeechSynthesizer 說話
if speechManager.isSpeaking {
HStack(spacing: 50) {
Spacer()
Text("Speaking...")
Spacer()
Button {
speechManager.synthesizer.stopSpeaking(at: .immediate)
} label: {
Image(systemName: "stop.circle")
.foregroundColor(.red)
}
}
}else {
HStack {
TextField("Message", text: $text, onCommit: {
send()
})
.textFieldStyle(.roundedBorder)
if text == "" {
Image(systemName: "arrow.up.circle.fill")
.foregroundColor(.gray)
}else {
Button {
send()
print(models)
} label: {
Image(systemName: "arrow.up.circle.fill")
.foregroundColor(Color(red: 102/255, green: 66/255, blue: 1))
}
}
}
}
製作情緒偵測功能
新增一個 Swift File ARModel
,並新增以下程式碼
import Foundation
import RealityKit
import ARKit
import SwiftUI
class Coordinator: NSObject, ARSCNViewDelegate {
@Binding var analysis: String
@Binding var isDetecting: Bool
init(analysis: Binding<String>, isDetecting: Binding<Bool>) {
_analysis = analysis
_isDetecting = isDetecting
}
func renderer(_ renderer: SCNSceneRenderer, didUpdate node: SCNNode, for anchor: ARAnchor) {
guard isDetecting else { return }
if let faceAnchor = anchor as? ARFaceAnchor {
expression(anchor: faceAnchor)
DispatchQueue.main.async {
self.analysis = self.analysis
}
}
}
func expression(anchor: ARFaceAnchor) {
// 情緒偵測邏輯
let smileLift = anchor.blendShapes[.mouthSmileLeft]
let smileRight = anchor.blendShapes[.mouthSmileRight]
let frownLeft = anchor.blendShapes[.browDownLeft]
let frownRight = anchor.blendShapes[.browDownRight]
if ((smileLift?.decimalValue ?? 0.0) + (smileRight?.decimalValue ?? 0.0)) < 0.4 {
// 正在無奈
self.analysis = "不是很滿意🙂"
}
if ((frownLeft?.decimalValue ?? 0.0) + (frownRight?.decimalValue ?? 0.0)) > 0.9 {
// 正在憤怒
self.analysis = "非常不滿意😡"
}
if ((smileLift?.decimalValue ?? 0.0) + (smileRight?.decimalValue ?? 0.0)) > 0.9 {
// 正在微笑
self.analysis = "滿意答覆😄"
}
}
}
這裡使用了 smileLift, frownRight, frownLeft, frownRight 臉部的數值,來判斷你現在的情緒變化
新增一個 Swift File ARViewModel
,並新增以下程式碼
import Foundation
import RealityKit
import ARKit
import SwiftUI
struct ARViewContainer: UIViewRepresentable {
@Binding var analysis: String
@Binding var isDetecting: Bool
func makeUIView(context: Context) -> ARSCNView {
let arView = ARSCNView()
arView.delegate = context.coordinator
let configuration = ARFaceTrackingConfiguration()
arView.session.run(configuration)
// 背景顏色
arView.scene.background.contents = UIColor.white
return arView
}
func updateUIView(_ uiView: ARSCNView, context: Context) {
// Update analysis label
}
func makeCoordinator() -> Coordinator {
Coordinator(analysis: $analysis, isDetecting: $isDetecting)
}
}
授予 App 訪問相機的權限
點選你的專案點選 Info,並將鼠標移動到任意位置會出現 + ,點選後新增 Privacy — Camera Usage Description
若是沒有給予相機權限,使用 APP 相機時會閃退
回到 ChatViewModel ,並新增以下變數
// AR
@State private var analysis = "analysis"
@State private var isDetecting = false
@State var showAR: Bool = false
回到 TextRowView ,新增以下變數,並修改 startDisplayingText
function
// 判斷 Chat 是否已經產生完字
@Binding var doneBool: Bool
// 逐個字元進行顯示
func startDisplayingText(text: String) {
currentIndex = 0
isDisplayingText = true
// 確保程式只給 chat 執行
if bool {
Timer.scheduledTimer(withTimeInterval: 0.005, repeats: true) { timer in
if currentIndex < text.count {
let index = text.index(text.startIndex, offsetBy: currentIndex)
displayText = String(text[...index])
currentIndex += 1
} else {
timer.invalidate()
isDisplayingText = false
doneBool = true
}
}
}
}
最後回到 ChatViewModel 修改 TextRowView
ForEach(models.indices, id: \.self) { index in
if (index % 2) != 0 {
TextRowView(text: models[index], image: Image("openAI"), bool: true, doneBool: $showAR)
}else {
TextRowView(text: models[index], image: Image(""), bool: false, doneBool: $showAR)
}
}
在NavigationStack 底層新增 ZStack
var body: some View {
NavigationStack {
ZStack {
VStack(alignment: .leading, spacing: 20) {
....
並在 toolbar 底下新增以下程式碼(臉部辨識的視窗)
.toolbar {
... }
.padding()
// 第二層
if showAR {
VStack {
Text(analysis)
.font(.title2)
.padding()
.background(.black)
.foregroundColor(.green)
.cornerRadius(25)
Spacer()
ARViewContainer(analysis: $analysis, isDetecting: $showAR)
.frame(width: 1, height: 1) // 設置 ARView 的高度
.onAppear {
DispatchQueue.main.asyncAfter(deadline: .now() + 3) {
showAR = false
}
}
}
}
執行結果
OpenAI 其他應用
情緒偵測也有其缺點。例如,當手機角度傾斜較大時,情緒偵測可能會變得不太準確。此外,每個人的臉部參數都不盡相同,因此我的臉部參數可能無法代表所有人的參數。
為了解決這些問題,未來可以新增一個調整參數的頁面,讓使用者可以根據自己的需求進行參數調整,從而提升情緒偵測的準確性和可靠性。這樣的設計可以讓聊天機器人更加貼近用戶需求,提升使用者體驗。