JavaScript 中的 Chat GPT 加 Eleven Labs





5.00/5 (6投票s)
使用 Eleven Labs 文本转语音与 Chat GPT 对话的 Web 应用程序
- 下载ChatGpt11Labs.zip - 3.3 KB
引言
这个应用程序是我尝试使用 JavaScript 创建一个与 Chat GPT 通信的客户端应用。我的目标是演示如何使用 Chat GPT API,以及 Eleven Labs 的语音转文本和文本转语音功能。这意味着你可以对着浏览器说话,而浏览器会用类似人类的声音回复你。
背景
这篇文章是我的上一篇文章 Chat GPT in JavaScript 的续篇。
Using the Code
- 从 https://beta.openai.com/account/api-keys 获取
OPENAI_API_KEY
。 - 打开 ChatGPT_11Labs.js 并在第一行添加 API Key。
- 从 https://beta.elevenlabs.io/speech-synthesis > Profile > API Key 获取
ELEVEN_LABS_API_KEY
。 - 打开 ChatGPT_11Labs.js 并在第二行添加 API Key。
这是代码。基本上,它使用 XMLHttpRequest
将 JSON 发布到 OpenAI 的端点,然后将 Chat GPT 的回复发布到 Eleven Labs 端点。
ChatGpt_11Labs.js 的代码
var OPENAI_API_KEY = "";
var ELEVEN_LABS_API_KEY = "";
var sVoiceId = "21m00Tcm4TlvDq8ikWAM"; //Rachel
var bSpeechInProgress = false;
var oSpeechRecognizer = null
function OnLoad() {
if ("webkitSpeechRecognition" in window) {
} else {
//speech to text not supported
lblSpeak.style.display = "none";
}
GetVoiceList();
}
function ChangeLang(o) {
if (oSpeechRecognizer) {
oSpeechRecognizer.lang = selLang.value;
//SpeechToText()
}
}
function GetVoiceList() {
var oHttp = new XMLHttpRequest();
oHttp.open("GET", "https://api.elevenlabs.io/v1/voices");
oHttp.setRequestHeader("Accept", "application/json");
oHttp.setRequestHeader("Content-Type", "application/json");
oHttp.setRequestHeader("xi-api-key", ELEVEN_LABS_API_KEY)
oHttp.onreadystatechange = function () {
if (oHttp.readyState === 4) {
var oJson = { voices: []};
try {
oJson = JSON.parse(oHttp.responseText);
} catch (ex) {
txtOutput.value += "Error: " + ex.message
}
for (var i = 0; i < oJson.voices.length; i++) {
selVoices.options[selVoices.length] = new Option(oJson.voices[i].name, oJson.voices[i].voice_id);
};
}
};
oHttp.send();
}
function SayIt() {
var s = txtMsg.value;
if (s == "") {
txtMsg.focus();
return;
}
TextToSpeech(s);
}
function TextToSpeech(s) {
if (chkMute.checked) return;
if (selVoices.length > 0 && selVoices.selectedIndex != -1) {
sVoiceId = selVoices.value;
}
spMsg.innerHTML = "Eleven labs text-to-speech...";
var oHttp = new XMLHttpRequest();
oHttp.open("POST", "https://api.elevenlabs.io/v1/text-to-speech/" + sVoiceId);
oHttp.setRequestHeader("Accept", "audio/mpeg");
oHttp.setRequestHeader("Content-Type", "application/json");
oHttp.setRequestHeader("xi-api-key", ELEVEN_LABS_API_KEY)
oHttp.onload = function () {
if (oHttp.readyState === 4) {
spMsg.innerHTML = "";
var oBlob = new Blob([this.response], { "type": "audio/mpeg" });
var audioURL = window.URL.createObjectURL(oBlob);
var audio = new Audio();
audio.src = audioURL;
audio.play();
}
};
var data = {
text: s,
voice_settings: { stability: 0, similarity_boost: 0 }
};
oHttp.responseType = "arraybuffer";
oHttp.send(JSON.stringify(data));
}
function SetModels() {
selModel.length = 0;
var oHttp = new XMLHttpRequest();
oHttp.open("GET", "https://api.openai.com/v1/models");
oHttp.setRequestHeader("Accept", "application/json");
oHttp.setRequestHeader("Content-Type", "application/json");
oHttp.setRequestHeader("Authorization", "Bearer " + OPENAI_API_KEY);
oHttp.onreadystatechange = function () {
if (oHttp.readyState === 4) {
var oJson = { voices: [] };
try {
oJson = JSON.parse(oHttp.responseText);
} catch (ex) {
txtOutput.value += "Error: " + ex.message
}
var l = [];
for (var i = 0; i < oJson.data.length; i++) {
l.push(oJson.data[i].id);
};
l.sort();
for (var i = 0; i < l.length; i++) {
selModel.options[selModel.length] = new Option(l[i], l[i]);
};
for (var i = 0; i < selModel.length; i++) {
if (selModel.options[i].value == "text-davinci-003") {
selModel.selectedIndex = i;
break;
}
};
}
};
oHttp.send();
}
function Send() {
var sQuestion = txtMsg.value;
if (sQuestion == "") {
alert("Type in your question!");
txtMsg.focus();
return;
}
spMsg.innerHTML = "Chat GPT is thinking...";
var sUrl = "https://api.openai.com/v1/completions";
var sModel = selModel.value;// "text-davinci-003";
if (sModel.indexOf("gpt-3.5-turbo") != -1) {
//https://openai.com/research/gpt-4
sUrl = "https://api.openai.com/v1/chat/completions";
}
var oHttp = new XMLHttpRequest();
oHttp.open("POST", sUrl);
oHttp.setRequestHeader("Accept", "application/json");
oHttp.setRequestHeader("Content-Type", "application/json");
oHttp.setRequestHeader("Authorization", "Bearer " + OPENAI_API_KEY)
oHttp.onreadystatechange = function () {
if (oHttp.readyState === 4) {
//console.log(oHttp.status);
spMsg.innerHTML = "";
var oJson = {}
if (txtOutput.value != "") txtOutput.value += "\n";
try {
oJson = JSON.parse(oHttp.responseText);
} catch (ex) {
txtOutput.value += "Error: " + ex.message
}
if (oJson.error && oJson.error.message) {
txtOutput.value += "Error: " + oJson.error.message;
} else if (oJson.choices) {
var s = "";
if (oJson.choices[0].text) {
s = oJson.choices[0].text;
} else if (oJson.choices[0].message) {
//GPT-4
s = oJson.choices[0].message.content;
}
if (selLang.value != "en-US") {
var a = s.split("?\n");
if (a.length == 2) {
s = a[1];
}
}
if (s == "") {
s = "No response";
} else {
txtOutput.value += "Chat GPT: " + s;
TextToSpeech(s);
}
}
}
};
var iMaxTokens = 2048;
var sUserId = "1";
var dTemperature = 0.5;
var data = {
model: sModel,
prompt: sQuestion,
max_tokens: iMaxTokens,
user: sUserId,
temperature: dTemperature,
frequency_penalty: 0.0, //Number between -2.0 and 2.0 Positive value decrease the model's likelihood to repeat the same line verbatim.
presence_penalty: 0.0, //Number between -2.0 and 2.0. Positive values increase the model's likelihood to talk about new topics.
stop: ["#", ";"] //Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
}
//chat GPT-4 gpt-4
if (sModel.indexOf("gpt-3.5-turbo") != -1) {
data = {
"model": sModel,
"messages": [
//{
// "role": "system",
// "content": "You are a helpful assistant." assistant messages help store prior responses
//},
{
"role": "user", //system,user,assistant
"content": sQuestion
}
]
}
}
oHttp.send(JSON.stringify(data));
if (txtOutput.value != "") txtOutput.value += "\n";
txtOutput.value += "Me: " + sQuestion;
txtMsg.value = "";
}
function Mute(b) {
if (b) {
selVoices.style.display = "none";
} else {
selVoices.style.display = "";
}
}
function SpeechToText() {
if (oSpeechRecognizer) {
if (chkSpeak.checked) {
oSpeechRecognizer.start();
} else {
oSpeechRecognizer.stop();
}
return;
}
oSpeechRecognizer = new webkitSpeechRecognition();
oSpeechRecognizer.continuous = true;
oSpeechRecognizer.interimResults = true;
oSpeechRecognizer.lang = selLang.value;
oSpeechRecognizer.start();
oSpeechRecognizer.onresult = function (event) {
var interimTranscripts = "";
for (var i = event.resultIndex; i < event.results.length; i++) {
var transcript = event.results[i][0].transcript;
if (event.results[i].isFinal) {
txtMsg.value = transcript;
Send();
} else {
transcript.replace("\n", "<br>");
interimTranscripts += transcript;
}
var oDiv = document.getElementById("idText");
oDiv.innerHTML = '<span style="color: #999;">' + interimTranscripts + '</span>';
}
};
oSpeechRecognizer.onerror = function (event) {
};
}
HTML 页面 ChatGpt_11Labs.html 的代码
<!DOCTYPE html>
<html>
<head>
<title>Chat GPT Plus Eleven Labs</title>
<script src="ChatGpt_11Labs.js?v=6"></script>
</head>
<body onload="OnLoad()">
<div id="idContainer">
<textarea id="txtOutput" rows="10" style="margin-top: 10px; width: 100%;" placeholder="Output"></textarea>
<div>
<button type="button" onclick="Send()" id="btnSend">Send</button>
<button type="button" onclick="SayIt()" style="display: none">Say It</button>
<label id="lblSpeak"><input id="chkSpeak" type="checkbox" onclick="SpeechToText()" />Listen</label>
<label id="lblMute"><input id="chkMute" type="checkbox" onclick="Mute(this.checked)" />Mute</label>
<select id="selModel">
<option value="text-davinci-003">text-davinci-003</option>
<option value="text-davinci-002">text-davinci-002</option>
<option value="code-davinci-002">code-davinci-002</option>
<option value="gpt-3.5-turbo">gpt-3.5-turbo</option>
<option value="gpt-3.5-turbo-0301">gpt-3.5-turbo-0301</option>
</select>
<button type="button" onclick="SetModels()" id="btnSetModels" title="Load all models">...</button>
<select id="selLang" onchange="ChangeLang(this)">
<option value="en-US">English (United States)</option>
<option value="fr-FR">French (France)</option>
<option value="ru-RU">Russian (Russia)</option>
<option value="pt-BR">Portuguese (Brazil)</option>
<option value="es-ES">Spanish (Spain)</option>
<option value="de-DE">German (Germany)</option>
<option value="it-IT">Italian (Italy)</option>
<option value="pl-PL">Polish (Poland)</option>
<option value="nl-NL">Dutch (Netherlands)</option>
</select>
<select id="selVoices"></select>
<span id="spMsg"></span>
</div>
<textarea id="txtMsg" rows="5" wrap="soft" style="width: 98%; margin-left: 3px; margin-top: 6px" placeholder="Input Text"></textarea>
<div id="idText"></div>
</div>
</body>
</html>
关注点
并非所有浏览器都支持语音转文本。Chrome 和 Edge 似乎支持它,而 Firefox 不支持。这就是为什么在 Firefox 中“收听”复选框将被隐藏的原因。
历史
- 2023年3月20日:创建版本 1
- 2023年5月24日,支持 Chat GPT4