解析和解码一些电子邮件消息字段的值
基于正则表达式的解决方案,用于解码电子邮件地址字段(“From”、“To”)和“Subject”字段的值
引言
假设您需要自动处理邮件消息。
您正在使用 POP3 客户端自动获取消息,并且在客户端获取消息后,您拥有原始邮件消息源。
任何邮件消息的原始源都是纯文本,仅包含 ASCII 字符,并包括所有邮件消息头以及邮件消息正文和附件(如果有)。
根据 RFC 标准,任何邮件消息可能仅包含 ASCII 字符。任何非 ASCII 字符应使用 MIME Base64 或 MIME Quoted-Printable 算法进行编码,因此邮件消息的原始源可能包含如下编码的 string
From: =?gb2312?B?p6+n2qfcp9qn5KfRIKezp+Sn0afip+Cn1aflp9Kn6KfWp9M=?=
To: =?gb2312?B?J0NvbWl0qKYgTm9ydWVnbyBkZWwgTm9iZWwn?=
Subject: =?gb2312?B?TGEgaW5jcmWoqmJsZSB5IHRyaXN0ZSBoaXN0b3JpYSBkZSBsYSBj?=
=?gb2312?B?qKJuZGlkYSBFcqimbmRpcmEgeSBkZSBzdSBhYnVlbGEgZGVzYWxtYWRh?=
或
From: =?UTF-8?Q?Garc=C3=ADa_M=C3=A1rquez?=
To: =?UTF-8?Q?Comit=C3=A9_Noruego_del_Nobel?=
Subject: La =?UTF-8?Q?incre=C3=ADble=20y=20triste=20historia=20de=20la=20c=C3=A1nd?=
=?UTF-8?Q?ida=20Er=C3=A9ndira=20y=20de=20su=20abuela=20desalmada?=
但是,任何邮件程序都会向用户显示以下内容
From: García Márquez
To: Comité Noruego del Nobel
Subject: La increíble y triste historia de la cándida Eréndira y de su abuela desalmada
如何使用 C# 代码获得相同的结果?
建议使用名为“decodeMailPropertyValue
”的方法来完成此操作。
该方法通过字段名称查找电子邮件字段值并解码找到的值。
它不仅在通过 POP3 客户端获取邮件后处理电子邮件时,而且在从电子邮件消息的原始源中提取电子邮件字段值时也很有用。
例如,您可能需要在本地机器上的数据库或文件系统中处理存储的电子邮件。
Using the Code
假设您正在使用某个 POP3 客户端并创建了该客户端的 PopHandler
类的实例
PopHandler MyHandler = new PopHandler(server, port, user, password, false);
获取 PopMail 类对象
PopMail mail = MyHandler.GetMail(i);
其中 i 是从 MyHandler.GetList() 获取的邮件列表中任何邮件消息的索引。
现在您可以像这样调用方法“decodeMailPropertyValue
”
string From = decodeMailPropertyValue(mail.Source, "FROM");
string To = decodeMailPropertyValue(mail.Source, "TO");
string Subject = decodeMailPropertyValue(mail.Source, "SUBJECT", false);
这是该方法本身
/// <summary>
/// Decodes email field value ("From", "To" or "Subject")
/// </summary>
/// <param name="mailSource">Raw source of email message (string)</param>
/// <param name="fieldName">Case insensitive email field name ("From", "To" or "Subject")</param>
/// <param name="addressField">"true" for address fields ("From" or "To"),
/// "false" for other fields ("Subject"). Default is true.</param>
/// <returns>email field decoded value (string)</returns>
private string decodeMailPropertyValue(string mailSource, string fieldName, bool addressField = true)
{
//looking for string(s) that contains value of field "fieldName"
Match temp =
Regex
.Match
(
mailSource,
@"(?:(?:\A|\r?\n)Field name: ([^\r\n]+)\r?\n){1}(?:(\s[^\r\n]*)\r?\n)*"
.Replace("Field name", fieldName),
RegexOptions.IgnoreCase
);
string tempStr = string.Empty;
string fieldValue = string
.Join
(null,
new string[1] { temp.Groups[1].Value }
.Concat(temp.Groups[2].Captures.OfType<Capture>().Select(x => x.Value))
.ToArray()
);
//if field "fieldName" has value
if (!string.IsNullOrEmpty(fieldValue.Trim()))
{
//only for address fields
if (addressField)
Regex
.Matches
(
fieldValue,
@"(?:\A|\s+)(<([\w-\.]+@(?:(?:\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|" +
@"(?:(?:[\w-]+\.)+))(?:[a-zA-Z]{2,4}|[0-9]{1,3})(?:\]?))>)",
RegexOptions.IgnoreCase
)
.OfType<Match>()
.Select
(
x =>
{
return fieldValue =
fieldValue
//removes brackets from email addresses those look like "<email>"
.Replace(x.Groups[1].Value, x.Groups[2].Value);
}
)
.ToList();
tempStr = fieldValue;
//looking for MIME Quoted-Printable encoding(s) in the value of field "fieldName"
MatchCollection temp2 =
Regex
.Matches
(
tempStr,
@"(?:(=\?[\w\s-_]+\?Q\?[^\?]+\?=\s*)+\S*)+",
RegexOptions.IgnoreCase
);
if (temp2.Count > 0)
{
temp2
.OfType<Match>()
.Select
(x =>
{
var captures = x.Groups[1].Captures.OfType<Capture>();
captures
.Select
(
(y, index) =>
{
temp =
Regex
.Match
(
y.Value,
@"=\?([\w\s-_]+)\?Q\?(?:""([^\?]+)""|([^\?]+))\?=\s*",
RegexOptions.IgnoreCase
);
string decodedStr =
Attachment
.CreateAttachmentFromString
(
string.Empty,
string
.Format
(
"=?{0}?Q?{1}?=",
temp.Groups[1].Value,
Regex
.Unescape
(
(temp.Groups[2].Success ?
temp.Groups[2] : temp.Groups[3])
.Value
//Character "_" in Quoted-Printable
//replaces spaces.
//It remains the same after decoding
//(only for .NET version < 4.0),
//so replace "_" with space
//before decoding.
.Replace('_', ' ')
)
)
)
.Name;
return
tempStr =
tempStr
.Replace
(
y.Value,
decodedStr
.PadRight(addressField &&
index == captures.Count() - 1 ?
decodedStr.Length + 1 : 0)
);
}
)
.ToList();
return
tempStr;
}
)
.ToList();
fieldValue = tempStr;
}
//looking for Base64 encoding(s) in the value of field "fieldName"
else
{
temp2 =
Regex
.Matches
(
tempStr,
@"(?:(=\?[\w\s-_]+\?B\?[^\?]+\?=\s*)+\S*)+",
RegexOptions.IgnoreCase
);
if (temp2.Count > 0)
{
temp2
.OfType<Match>()
.Select
(x =>
{
var captures = x.Groups[1].Captures.OfType<Capture>();
captures
.Select
(
(y, index) =>
{
temp =
Regex
.Match
(
y.Value,
@"=\?([\w\s-_]+)\?B\?([^\?]+)\?=\s*",
RegexOptions.IgnoreCase
);
string decodedStr = Encoding.GetEncoding(temp.Groups[1].Value)
.GetString(Convert.FromBase64String(temp.Groups[2].Value));
return
tempStr =
tempStr
.Replace
(
y.Value,
decodedStr
.PadRight(addressField &&
index == captures.Count() - 1 ?
decodedStr.Length + 1 : 0)
);
}
)
.ToList();
return
tempStr;
}
)
.ToList();
fieldValue = tempStr;
}
//only for address fields:
//looking for non-encoded strings that can contain sender/recipient names
//and their addresses
else if (addressField)
{
temp =
Regex
.Match
(
tempStr,
@"(?:(""[^\r\n]+"")*\s*\S+)+",
RegexOptions.IgnoreCase
);
if (temp.Success)
{
fieldValue =
Regex
.Unescape
(
Regex
.Replace
(
fieldValue,
@"(?<!\\)""",
string.Empty,
RegexOptions.IgnoreCase
)
);
}
}
}
}
return fieldValue;
}