(SwiftUI) 利用 ARKit 製作能讀懂你情緒變化的聊天機器人

目的:串接 OpenAI 的 API 製作聊天機器人,並且在得到回覆後透過 ARKit 偵測使用者情緒變化

情緒偵測:滿意、不滿、憤怒

製作 TextRowView

import SwiftUI

struct TextRowView: View {

var text = "What is CNN in ML??"
var image: Image

@State var bool = true

// 逐個字元進行顯示 property
@State private var displayText = ""
@State private var currentIndex = 0
@State private var isDisplayingText = false

var body: some View {

HStack(alignment: .top) {

if bool == false {
Spacer()
}

image
.resizable()
.scaledToFill()
.frame(width: bool ? 50 : 0, height: bool ? 50 : 0)
.clipShape(Circle())

Text("\(bool ? displayText : text)")
.padding()
.background(bool ? Color(red: 102/255, green: 66/255, blue: 1) : Color(red: 239/255, green: 239/255, blue: 239/255))
.foregroundColor(bool ? Color.white : Color.black)
.cornerRadius(15)
.padding(EdgeInsets(top: 5, leading: 10, bottom: 5, trailing: 10))

// 確保一開始產生的 Image 在最左邊
if bool {
Spacer()
}
}
.onAppear {
startDisplayingText(text: text)
}
}

// 逐個字元進行顯示
func startDisplayingText(text: String) {
currentIndex = 0
isDisplayingText = true

Timer.scheduledTimer(withTimeInterval: 0.005, repeats: true) { timer in
if currentIndex < text.count {
let index = text.index(text.startIndex, offsetBy: currentIndex)
displayText = String(text[...index])
currentIndex += 1
} else {
timer.invalidate()
isDisplayingText = false
}
}
}
}
左:Chat 回答, 右:使用者輸入

在程式碼中會有 bool 來控制顯示的對話筐,若為 Chat 回答,將會逐字顯示每一個單字

製作 ChatViewModel

使用 SPM (Swift Package Manager) 將 OpenAISwift 加入到專案中

File -> Add Packages..

GitHub

將上方連結輸入後,點選 Add Package

申請 OpenAI API

申請後將產生的 API 保存起來sk-SeEWRME6qGD27cLy3wNYT3Blxxxxxxxxxxxxx

在 ChatViewModel 新增以下程式碼

import SwiftUI
import OpenAISwift


final class ViewModel: ObservableObject {
init() {}

private var client: OpenAISwift?

func setup() {
client = OpenAISwift(authToken: "sk-SeEWRME6qGD27cLy3wNYT3Blxxxxxxxxxxxxx")

}

func send(text: String,
completion: @escaping (String) -> Void) {
client?.sendCompletion(with: text,
maxTokens: 200,
completionHandler: { result in
switch result {
case .success(let model):
let output = model.choices?.first?.text ?? ""
completion(output)
case .failure:
break
}
})
}
}

設計畫面

struct ChatViewModel: View {

@ObservedObject var viewModel = ViewModel()
@State var text = ""
@State var models = [String]()



var body: some View {
VStack(alignment: .leading, spacing: 20) {
ScrollView {
VStack(alignment: .leading, spacing: 20) {

ForEach(models.indices, id: \.self) { index in
if (index % 2) != 0 {
TextRowView(text: models[index], image: Image("openAI"), bool: true)
}else {
TextRowView(text: models[index], image: Image(""), bool: false)
}
}
Spacer()
}
}

HStack {
TextField("Message", text: $text, onCommit: {
send()
})
.textFieldStyle(.roundedBorder)

if text == "" {
Image(systemName: "arrow.up.circle.fill")
.foregroundColor(.gray)
}else {
Button {
send()
print(models)
} label: {
Image(systemName: "arrow.up.circle.fill")
.foregroundColor(Color(red: 102/255, green: 66/255, blue: 1))
}
}

}
}
.onAppear {
viewModel.setup()
}
.padding()
}

func send() {
guard !text.trimmingCharacters(in: .whitespaces).isEmpty else {
return
}

models.append("\(text)")

let chatMessage = text

viewModel.send(text: chatMessage) { response in
DispatchQueue.main.async {

// newresponse 保存最初的 response
var newresponse = response

// 有時候最前面會產生 ? 必須移除
// response 定義是 let 不能移除
if newresponse.hasPrefix("?") || newresponse.hasPrefix("!") {
newresponse.removeFirst()
}
// 將空白部分移除
self.models.append(newresponse.trimmingCharacters(in: .whitespacesAndNewlines))

print(newresponse.trimmingCharacters(in: .whitespacesAndNewlines))

// 重置文本
self.text = ""
}
}
}
}

有時候 OpenAI 會產生一些奇怪的符號和大量的空格,這可能會導致顯示上的問題。因此,我們需要對回應 response 進行處理

                // newresponse 保存最初的 response
var newresponse = response

// 有時候最前面會產生 ? 必須移除
// response 定義是 let 不能移除
if newresponse.hasPrefix("?") || newresponse.hasPrefix("!") {
newresponse.removeFirst()
}
// 將空白部分移除
self.models.append(newresponse.trimmingCharacters(in: .whitespacesAndNewlines))

製作語音功能

import Library

import AVFoundation
import NaturalLanguage

新增以下程式碼

class SpeechManager: NSObject, ObservableObject {

@Published var isSpeaking = false
let synthesizer = AVSpeechSynthesizer()
func speak(text: String) {
let utterance = AVSpeechUtterance(string: text)
utterance.voice = AVSpeechSynthesisVoice(language: returnLanguage(text))
synthesizer.speak(utterance)
}

override init() {
super.init()
synthesizer.delegate = self
}

// 回傳要合成聲音的語言
func returnLanguage(_ input_text: String) -> String {
let languageRecognizer = NLLanguageRecognizer()
languageRecognizer.processString(input_text)
var Language = ""
if let dominantLanguage = languageRecognizer.dominantLanguage {
// 判斷為哪一種語言
switch String(dominantLanguage.rawValue) {
case "en":
Language = "en-US"
case "ja":
Language = "ja-JP"
case "zh-Hant":
Language = "zh-TW"
// 有時候會得到 簡體中文
case "zh-Hans":
Language = "zh-TW"
default:
Language = "en-US"
}
}
return Language
}



}

extension SpeechManager: AVSpeechSynthesizerDelegate {
func speechSynthesizer(_ synthesizer: AVSpeechSynthesizer, didStart utterance: AVSpeechUtterance) {
DispatchQueue.main.async {
self.isSpeaking = true
}
}

func speechSynthesizer(_ synthesizer: AVSpeechSynthesizer, didFinish utterance: AVSpeechUtterance) {
DispatchQueue.main.async {
self.isSpeaking = false
}
}
}

returnLanguage 會先判斷為哪一種語言,最後回傳對應要產生的 languageAVSpeechSynthesisVoice

在 ChatViewModel 新增 speechManager

struct ChatViewModel: View {

...

@StateObject private var speechManager = SpeechManager()

...

在 send function 新增以下程式碼

func send() {
...

viewModel.send(text: chatMessage) { response in
DispatchQueue.main.async {

...

speechManager.speak(text: newresponse.trimmingCharacters(in: .whitespacesAndNewlines))

...
}
}
}

這樣一來你的機器人就會說三種語言:中文、日語、英語

製作 LanguageViewModel

新增一個 Swift File SegmentControlView ,並新增以下程式碼

import Foundation
import SwiftUI


struct SegmentControlView<ID: Identifiable, Content: View, Background: Shape>: View {
let segments: [ID]
@Binding var selected: ID
var titleNormalColor: Color
var titleSelectedColor: Color
var bgColor: Color
let animation: Animation
@ViewBuilder var content: (ID) -> Content
@ViewBuilder var background: () -> Background

@Namespace private var namespace

var body: some View {
GeometryReader { bounds in
HStack(spacing: 0) {
ForEach(segments) { segment in
NewSegmentButtonView(id: segment,
selectedId: $selected,
titleNormalColor: titleNormalColor,
titleSelectedColor: titleSelectedColor,
bgColor: bgColor,
animation: animation,
namespace: namespace) {
content(segment)
} background: {
background()
}
.frame(width: bounds.size.width / CGFloat(segments.count))
}
}
.background {
background()
.fill(bgColor.opacity(0.1))
.overlay(
background()
.stroke(style: StrokeStyle(lineWidth: 1.5))
.foregroundColor(bgColor.opacity(0.2))
)
}
}
}
}

fileprivate struct NewSegmentButtonView<ID: Identifiable, Content: View, Background: Shape> : View {
let id: ID
@Binding var selectedId: ID
var titleNormalColor: Color
var titleSelectedColor: Color
var bgColor: Color
var animation: Animation
var namespace: Namespace.ID
@ViewBuilder var content: () -> Content
@ViewBuilder var background: () -> Background


var body: some View {
GeometryReader { bounds in
Button {
withAnimation(animation) {
selectedId = id
}
} label: {
content()
}
.frame(width: bounds.size.width, height: bounds.size.height)
.scaleEffect(selectedId.id == id.id ? 1 : 0.8)
.clipShape(background())
.foregroundColor(selectedId.id == id.id ? titleSelectedColor : titleNormalColor)
.background(buttonBackground)
}
}

@ViewBuilder private var buttonBackground: some View {
if selectedId.id == id.id {
background()
.fill(bgColor)
.matchedGeometryEffect(id: "SelectedTab", in: namespace)
}
}
}

enum Segment: Identifiable, CaseIterable {
case English, TraditionalChinese, Japanese

var id: String {
title
}

var title: String {
switch self {
case .English:
return "English"
case .TraditionalChinese:
return "中文"
case .Japanese:
return "日本"
}
}
}

新增一個 SwiftUI View LanguageViewModel ,並新增以下程式碼

import SwiftUI

struct LanguageViewModel: View {

@Binding var selectedSegment1: Segment

@State var animation: Animation = .spring(response: 0.6, dampingFraction: 0.6, blendDuration: 0.6)
@State var themeColor: Color = Color(red: 102/255, green: 66/255, blue: 1)
@State var cornerRadius: CGFloat = 20
@State var selectedAnimationIndex: Int = 0
// 目前選定的語言
@Binding var language: String

@Binding var closeBool: Bool

@Binding var token: Int


var body: some View {

NavigationStack {
GeometryReader { bounds in
VStack(spacing: 10) {
Text("Select chat reply language")
.font(.title)
VStack(spacing: 20) {
SegmentControlView(segments: Segment.allCases,
selected: $selectedSegment1,
titleNormalColor: themeColor,
titleSelectedColor: .white,
bgColor: themeColor,
animation: animation) { segment in
Text(segment.title)
.font(.system(size: 20, weight: .semibold, design: .rounded))
.padding(.horizontal)
.padding(.vertical, 8)

} background: {
RoundedRectangle(cornerRadius: cornerRadius, style: .continuous)
}
.frame(height: 37)

Text("maxToken : \(token)")

Slider(value: Binding<Double>(
get: { Double(token) },
set: { token = Int($0) }
), in: 200...1000)
.accentColor(Color(red: 102/255, green: 66/255, blue: 1))


}
.padding()
.background(Color.white.cornerRadius(20))
.onChange(of: selectedSegment1) { newValue in
language = "\(newValue)"
switch language {
case "English":
token = 200
case "TraditionalChinese":
token = 700
case "Japanese":
token = 700
default:
token = 200
}
}

}
.padding(.top, 0)
.padding(.horizontal)
.toolbar {
Button {
closeBool = false
} label: {
Image(systemName: "x.circle.fill")
.resizable()
.font(.title3)
.foregroundColor(Color(red: 128/255, green: 128/255, blue: 132/255))
}
}
}
}
}

}

struct LanguageViewModel_Previews: PreviewProvider {
static var previews: some View {
LanguageViewModel(selectedSegment1: .constant(Segment.English), language: .constant("English"), closeBool: .constant(true), token: .constant(200))
}
}

回到 ChatViewModel ,在 ChatViewModel 新增以下變數

    // 回覆的語言,預設為 英文
@State private var replyLanguage = "English"
@State var selectedSegment1: Segment = .English
@State var token = 200
@State private var showBool = false

接下來使用 NavigationStack 將原本的畫面包起來

// 原本的
var body: some View {
VStack(alignment: .leading, spacing: 20) {
...
}

}
// 使用 NavigationStack 包起來
var body: some View {
NavigationStack {
VStack(alignment: .leading, spacing: 20) {
...
}
}
}

在 onAppear 底下新增 toolbar ,並新增以下程式碼

var body: some View {
NavigationStack {
...
}
.onAppear {
viewModel.setup()
}
.toolbar {
Button {
showBool.toggle()
} label: {
Image(systemName: "gearshape")
}
.sheet(isPresented: $showBool) {
LanguageViewModel(selectedSegment1: $selectedSegment1, language: $replyLanguage, closeBool: $showBool, token: $token)
.presentationDetents([.height(250), .medium])
}
}

修改 send function,新增 maxTokens 來控制產生的 Token 長度

func send(text: String, maxTokens: Int,
completion: @escaping (String) -> Void) {
client?.sendCompletion(with: text,
maxTokens: maxTokens,
completionHandler: { result in
switch result {
case .success(let model):
let output = model.choices?.first?.text ?? ""
completion(output)
case .failure:
break
}
})
}

viewModel.send 中的 maxTokens 輸入 token

viewModel.send(text: chatMessage, maxTokens: token) { response in
DispatchQueue.main.async {

當使用除了英文回覆的語言外,必須給較大的 token 才能產生完整的回覆,所以當切換中文或是日語時,預設的 maxToken 都會為 700

新增語音的終止按鍵,將下方的 TextField 放入下方的判斷式中

                // 判斷是否有在使用 AVSpeechSynthesizer 說話
if speechManager.isSpeaking {
HStack(spacing: 50) {
Spacer()
Text("Speaking...")
Spacer()
Button {
speechManager.synthesizer.stopSpeaking(at: .immediate)
} label: {
Image(systemName: "stop.circle")
.foregroundColor(.red)
}
}
}else {
HStack {
TextField("Message", text: $text, onCommit: {
send()
})
.textFieldStyle(.roundedBorder)

if text == "" {
Image(systemName: "arrow.up.circle.fill")
.foregroundColor(.gray)
}else {
Button {
send()
print(models)
} label: {
Image(systemName: "arrow.up.circle.fill")
.foregroundColor(Color(red: 102/255, green: 66/255, blue: 1))
}
}
}
}

製作情緒偵測功能

新增一個 Swift File ARModel,並新增以下程式碼

import Foundation
import RealityKit
import ARKit
import SwiftUI

class Coordinator: NSObject, ARSCNViewDelegate {
@Binding var analysis: String
@Binding var isDetecting: Bool

init(analysis: Binding<String>, isDetecting: Binding<Bool>) {
_analysis = analysis
_isDetecting = isDetecting
}

func renderer(_ renderer: SCNSceneRenderer, didUpdate node: SCNNode, for anchor: ARAnchor) {
guard isDetecting else { return }

if let faceAnchor = anchor as? ARFaceAnchor {
expression(anchor: faceAnchor)

DispatchQueue.main.async {
self.analysis = self.analysis
}
}
}

func expression(anchor: ARFaceAnchor) {
// 情緒偵測邏輯
let smileLift = anchor.blendShapes[.mouthSmileLeft]
let smileRight = anchor.blendShapes[.mouthSmileRight]

let frownLeft = anchor.blendShapes[.browDownLeft]
let frownRight = anchor.blendShapes[.browDownRight]

if ((smileLift?.decimalValue ?? 0.0) + (smileRight?.decimalValue ?? 0.0)) < 0.4 {
// 正在無奈
self.analysis = "不是很滿意🙂"
}

if ((frownLeft?.decimalValue ?? 0.0) + (frownRight?.decimalValue ?? 0.0)) > 0.9 {
// 正在憤怒
self.analysis = "非常不滿意😡"
}

if ((smileLift?.decimalValue ?? 0.0) + (smileRight?.decimalValue ?? 0.0)) > 0.9 {
// 正在微笑
self.analysis = "滿意答覆😄"
}

}
}

這裡使用了 smileLift, frownRight, frownLeft, frownRight 臉部的數值,來判斷你現在的情緒變化

新增一個 Swift File ARViewModel,並新增以下程式碼

import Foundation
import RealityKit
import ARKit
import SwiftUI


struct ARViewContainer: UIViewRepresentable {
@Binding var analysis: String
@Binding var isDetecting: Bool

func makeUIView(context: Context) -> ARSCNView {
let arView = ARSCNView()
arView.delegate = context.coordinator
let configuration = ARFaceTrackingConfiguration()
arView.session.run(configuration)

// 背景顏色
arView.scene.background.contents = UIColor.white

return arView
}

func updateUIView(_ uiView: ARSCNView, context: Context) {
// Update analysis label
}

func makeCoordinator() -> Coordinator {
Coordinator(analysis: $analysis, isDetecting: $isDetecting)
}

}

授予 App 訪問相機的權限

點選你的專案點選 Info,並將鼠標移動到任意位置會出現 + ,點選後新增 Privacy — Camera Usage Description

若是沒有給予相機權限,使用 APP 相機時會閃退

回到 ChatViewModel ,並新增以下變數

    // AR
@State private var analysis = "analysis"
@State private var isDetecting = false
@State var showAR: Bool = false

回到 TextRowView ,新增以下變數,並修改 startDisplayingText function

    // 判斷 Chat 是否已經產生完字
@Binding var doneBool: Bool
    // 逐個字元進行顯示
func startDisplayingText(text: String) {
currentIndex = 0
isDisplayingText = true

// 確保程式只給 chat 執行
if bool {
Timer.scheduledTimer(withTimeInterval: 0.005, repeats: true) { timer in
if currentIndex < text.count {
let index = text.index(text.startIndex, offsetBy: currentIndex)
displayText = String(text[...index])
currentIndex += 1
} else {
timer.invalidate()
isDisplayingText = false
doneBool = true
}
}
}
}

最後回到 ChatViewModel 修改 TextRowView

ForEach(models.indices, id: \.self) { index in
if (index % 2) != 0 {
TextRowView(text: models[index], image: Image("openAI"), bool: true, doneBool: $showAR)
}else {
TextRowView(text: models[index], image: Image(""), bool: false, doneBool: $showAR)
}
}

在NavigationStack 底層新增 ZStack

var body: some View {
NavigationStack {
ZStack {
VStack(alignment: .leading, spacing: 20) {
....

並在 toolbar 底下新增以下程式碼(臉部辨識的視窗)

                .toolbar {
... }
.padding()
// 第二層
if showAR {
VStack {
Text(analysis)
.font(.title2)
.padding()
.background(.black)
.foregroundColor(.green)
.cornerRadius(25)
Spacer()
ARViewContainer(analysis: $analysis, isDetecting: $showAR)
.frame(width: 1, height: 1) // 設置 ARView 的高度
.onAppear {
DispatchQueue.main.asyncAfter(deadline: .now() + 3) {
showAR = false


}

}
}
}

執行結果

OpenAI 其他應用

情緒偵測也有其缺點。例如,當手機角度傾斜較大時,情緒偵測可能會變得不太準確。此外,每個人的臉部參數都不盡相同,因此我的臉部參數可能無法代表所有人的參數。

為了解決這些問題,未來可以新增一個調整參數的頁面,讓使用者可以根據自己的需求進行參數調整,從而提升情緒偵測的準確性和可靠性。這樣的設計可以讓聊天機器人更加貼近用戶需求,提升使用者體驗。

Reference

--

--