C# 中的简单鹦鹉图灵算法






2.63/5 (5投票s)
2006 年 4 月 18 日
3分钟阅读

31987
本文实现了一个简单的聊天机器人,试图通过图灵测试,但结果惨败。
引言
我对机器学习非常感兴趣。然而,实现神经网络和贝叶斯图等可能需要很长时间,并且会受到领域定义问题的限制。虽然它们是最前沿的,但有时坐下来放松一下,做一些理性的计算机科学家不会做的事情:实现一个毫无价值的算法,也是不错的。
实际上,该算法并非毫无价值。设计代码并运行它揭示了该方法的弱点,这使得更容易概念化更高级的方法并准确定义领域。
该算法会将键入的消息分解为单词,分析单词的频率,然后根据单词的频率、最小句子大小和最大句子大小生成随机响应。一旦实现,它就是一个有趣的算法,可以玩玩(为人类交互实现一个 IParticipant
),因为它有时会产生一些有趣的单词组合。
关于代码的评论
除非绝对必要,否则我不喜欢附加包含大量代码的 Zip 文件。类似这样的东西,一半的乐趣在于实现。它也有助于理解。此外,我讨厌必须下载 Zip 文件才能找到我想要的答案。如果我在本教程中提出一个问题并给出一个答案,那么它就在本教程中,而无需下载、解压缩和 grep!
背景
如果您不熟悉图灵测试,请访问:图灵测试。其他有趣的图灵问题是 验证码,实现它们也很有趣。事实上,我的下一个教程可能是关于验证码。
使用代码
下面是一些未解释的代码,这些代码应该是显而易见的,但可能需要理解本教程的其他方面。由于我不明白的原因,我让 IParticpant
调用 Conversation.addParticipant(IParticipant)
。可能有一个更好的设计模式实现,我只是没有花时间在设计问题上。
public struct Message{
public IParticipant speaker;
public Conversation conversation;
public DateTime timeStamp;
public string message;
/**
* <summary>
* Provides a tidy string rep of this object
* </summary>
*/
public override string ToString() {
return timeStamp.ToShortTimeString() +
" " + speaker.getName() + ": " + message;
}//end ToString
/**
* <summary />
*/
public Message(IParticipant speaker, Conversation
conversation, string message){
this.timeStamp = System.DateTime.Now;
this.speaker = speaker;
this.conversation = conversation ;
this.message = message;
}//end Message
}//end Class Message
public interface IParticipant{
string getName();
void newMessage(Message message);
void joinConversation(Conversation conversation);
}//end IParticipant
public class Conversation{
List<IParticipant> participants = new List<IParticipant>();
//Allows the message log to be saved
//List<Message> messages = new List<Message>();
/**
* <summary>
* This method will allow a message to be "spoken"
* </summary>
*/
public void speak(Message message){
//messages.Add(message);
foreach(IParticipant p in participants){
//Notify all particpants
//of a new message in this conversation
p.newMessage(message);
}//end foreach
}//end speak
/**
* <summary>
* This method will add a participant to the conversation
* </summary>
*/
public void addParticipant(IParticipant participant){
if(!participants.Contains(participant)){
participants.Add(participant);
}
}//end addParticipant
}//end Conversation
下面是实现鹦鹉逻辑的类。在大多数情况下,此代码简单明了,因此我不会用大量的解释来烦扰读者。
/**
* <summary>
* This class holds word frequencies
* </summary>
*/
public class Word : IComparer<Word>{
private string word;
private int frequency = 0;
public event Action<Word> FrequencyChanged;
/**
* <summary>
* This method will return the word
* </summary>
*/
public string getWord(){
return word;
}//end getWord
/**
* <summary>
* This method will return the frequency
* </summary>
*/
public int getFrequency(){
return frequency;
}//end getFrequency
/**
* <summary>
* This method will increment the frequency
* </summary>
*/
public void incrementFrequency(){
frequency++;
onFrequencyChanged();
}//end incrementFrequency
/**
* <summary>
* This method is called when the frequency is changed
* </summary>
*/
protected void onFrequencyChanged(){
if(FrequencyChanged != null){
FrequencyChanged(this);
}//end if
}//end OnFrequencyChanged
/**
* <summary>
* Compares words by frequncy
* </summary>
*/
public int Compare(Word x, Word y) {
return x.getFrequency().CompareTo(y.getFrequency());
}//end Compare
/**
* <summary />
*/
public Word(string word){
this.frequency = 1;
this.word = word;
}//end word
}//end word
/**
* <summary>
* The bob algorithm is a trivial turing test
* </summary>
*/
public class ParrotAlgorithm : IParticipant{
private static int instanceCount = 0;
private string name =
"Parrot #" + instanceCount.ToString();
private Conversation conversation = null;
private List<Word> list = new List<Word>();
private Dictionary<string, Word> hash =
new Dictionary<string,Word>();
private int minMessageSize = 3;
private int maxMessageSize = 10;
private Random random = new Random();
private double chaos = .2d;
private int wordCount = 0;
/**
* <summary>
* This method will return the name of the speaker
* </summary>
*/
public string getName(){
return name;
}//end getName
/**
* <summary>
* This method will add a word
* </summary>
*/
private void addWord(string wordString){
wordCount++;
Word word = null;
if(!hash.ContainsKey(wordString)) {
word = new Word(wordString);
hash.Add(wordString, word);
list.Add(word);
word.FrequencyChanged +=
new Action<Word>(word_FrequencyChanged);
}//end if
else{
word = hash[wordString];
word.incrementFrequency();
}
}//end add word
/**
* <summary>
* This method is executed when the word frequency is updated
* </summary>
*/
public void word_FrequencyChanged(Word obj){
/*
* Resort using quick sort
* MSDN:
* On average, this method is an O(n log n) operation,
* where n is Count; in the worst case
* it is an O(n ^ 2) operation.
*/
list.Sort(obj);
}//end word
/**
* <summary>
* This method will generate a repsonse
* </summary>
*/
private void respond(){
//Only respond if necessary
int wordLength = (int)(minMessageSize +
(random.NextDouble() *
(maxMessageSize - minMessageSize)));
double runningTotal = 0.0d;
string message = "";
for(int i=0;i<wordLength;i++){
if(random.NextDouble() < chaos){
//Just pick a random word
message += list[(int)(random.NextDouble()
* list.Count)].getWord() + " ";
}
else{
double requiredOdds = random.NextDouble();
//Pick a word based on frequency
runningTotal = 0.0d;
foreach(Word w in list){
runningTotal += ((double)w.getFrequency()/
(double)wordCount);
if(runningTotal > requiredOdds){
message += w.getWord() + " ";
break;
}
}//end foreach
}
}//end for
if(message.Length > 0){
Message mm = new Message(this,
this.conversation, message);
conversation.speak(mm);
}
}//end respond
/**
* <summary>
* This method receive a new message
* </summary>
* <remarks>
* ParrotAlgorithm is a stupid parrot.
* He repeats what he hears most often
* </remarks>
*/
public void newMessage(Message message){
//ignore own messages
if(message.speaker != this){
string[] tokens =
message.message.Split(new char[]{' '});
if(tokens.Length > maxMessageSize)
maxMessageSize = tokens.Length;
foreach(string s in tokens){
addWord(s);
}//end foreach
respond();
}//end if
}//end new Message
/**
* <summary>
* This method will instruct this
* class to join a conversation
* </summary>
*/
public void joinConversation(Conversation conversation){
this.conversation = conversation;
conversation.addParticipant(this);
}//end joinConversation
/**
* <summary />
*/
public ParrotAlgorithm() {
instanceCount++;
}//end Constructor
}//end ParrotAlgorithm
首先要解释的是 Word
类。它的用途是什么?你为什么这样做?它包装消息中的单词或标记,并存储相关信息,例如频率。每当频率更新时,都会调用事件 FrequencyChanged
,以允许列表被重新排序。这可能不是最有效的方法。添加所有新单词然后对列表进行排序可能更有效。
private void addWord(string wordString){
wordCount++;
Word word = null;
if(!hash.ContainsKey(wordString)) {
word = new Word(wordString);
hash.Add(wordString, word);
list.Add(word);
word.FrequencyChanged +=
new Action<Word>(word_FrequencyChanged);
}//end if
else{
word = hash[wordString];
word.incrementFrequency();
}
}//end add word
上面的代码会将单词添加到记忆库中。一个字典对象,我称之为 hash
,用于更新已经使用和存储的单词的频率。该列表是一个 List<Word>
对象(.NET 2.0 泛型),它按排序顺序存储单词。这对于稍后获取随机单词非常重要。
/**
* <summary>
* This method will generate a repsonse
* </summary>
*/
private void respond(){
//Only respond if necessary
int wordLength = (int)(minMessageSize +
(random.NextDouble() * (maxMessageSize - minMessageSize)));
double runningTotal = 0.0d;
string message = "";
for(int i=0;i<wordLength;i++){
if(random.NextDouble() < chaos){
//Just pick a random word
message += list[(int)(random.NextDouble() *
list.Count)].getWord() + " ";
}
else{
double requiredOdds = random.NextDouble();
//Pick a word based on frequency
runningTotal = 0.0d;
foreach(Word w in list){
runningTotal += ((double)w.getFrequency()/
(double)wordCount);
if(runningTotal > requiredOdds){
message += w.getWord() + " ";
break;
}
}//end foreach
}
}//end for
if(message.Length > 0){
Message mm = new Message(this, this.conversation, message);
conversation.speak(mm);
}
}//end respond
此方法包含鹦鹉背后的所有大脑。
int wordLength = (int)(minMessageSize +
(random.NextDouble() * (maxMessageSize - minMessageSize)));
随机选择响应的长度。此代码保证了最小句子大小,并且除非它的同伴也这样做,否则不会让鹦鹉用巨大的句子来回应。
if(random.NextDouble() < chaos){
随机性至关重要。这就是让事情变得真正有趣的原因。必须要有突变。删除此代码或更改混沌值以查看它如何影响结果。
runningTotal += ((double)w.getFrequency()/(double)wordCount);
此函数将频率值标准化,使所有频率的总和 == 1(可能)。因此,您可以循环,从最低频率开始,然后将法线加起来,与您想要的随机数进行比较。老实说,我还没有证明这个陈述,但它似乎在直觉上是正确的。欢迎有统计学书籍和空闲时间的人来证明或反驳它。在过渡时期,它对我来说有效。