使用 .NET 3.5 解析压缩的纯文本文件并将结果保存到本地数据库
使用 .NET 3.5、LINQ to SQL、后台工作线程和 UI 解析压缩的纯文本文件并将结果保存到本地数据库。
引言
我编写这个应用程序是为了处理大约 3500 个包含一些纯文本文件的压缩文件。每个纯文本文件都包含一个错误描述。我的任务是解析这些压缩的纯文本文件,并将提取的数据保存到本地数据库中,以便稍后进行分析。我使用 .NET 创建了这个应用程序。
背景
这是错误信息。错误是邪恶的!
使用代码
我所做的是创建了一个 Windows 窗体应用程序,并附加了一个本地 MS SQL Server 数据库文件。(您可以使用 MS SQL Express 创建数据库和表来保存记录。)生成数据库上下文,您应该得到
实现可能的接口并创建解析类
public interface IParcer
{
DAO.Uhee Parse(string file);
}
...以及这个
public enum ExpectedDataType
{
DateTime,
String,
Integer
}
public class UHEEParser : IParcer
{
public DAO.Uhee Parse(string file)
{
if (File.Exists(file))
{
try
{
List lines = new List();
StreamReader sr = new StreamReader(file);
do
{
string line = sr.ReadLine();
lines.Add(line);
} while (sr.Peek() != -1);
DAO.Uhee uhee = new DAO.Uhee();
uhee.ID = Guid.NewGuid();
uhee.User = GetValue(lines[2], ExpectedDataType.String);
uhee.UserDevice = GetValue(lines[3], ExpectedDataType.String);
uhee.ErrorDateTime =
DateTime.Parse(GetValue(lines[4], ExpectedDataType.DateTime));
uhee.AppName = GetValue(lines[7], ExpectedDataType.String);
uhee.Version = GetValue(lines[8], ExpectedDataType.String);
uhee.Host = GetValue(lines[10], ExpectedDataType.String);
uhee.Port = GetValue(lines[11], ExpectedDataType.String);
uhee.WebApp = GetValue(lines[12], ExpectedDataType.String);
uhee.CurrentForm = GetValue(lines[14], ExpectedDataType.String);
uhee.Message = GetValue(lines[15], ExpectedDataType.String);
uhee.StackTrace = GetValue(lines[16], ExpectedDataType.String);
uhee.ParsingTime = DateTime.Now;
return uhee;
}
catch (Exception)
{
throw;
}
}
return null;
}
string GetValue(string line, ExpectedDataType dataType)
{
string[] strings = line.Split(new Char[] { ':' });
if (strings.Length > 1)
{
switch (dataType)
{
case ExpectedDataType.DateTime:
string mergedDateTimeString =
strings[1] + ":" + strings[2] + ":" + strings[2];
return mergedDateTimeString;
case ExpectedDataType.String:
return strings[1];
case ExpectedDataType.Integer:
return strings[1];
default:
return strings[1];
}
}
else
return string.Empty;
}
}
现在是时候下载 DotNetZip dll(在 C#、VB 和任何 .NET 语言中进行压缩和解压缩:http://dotnetzip.codeplex.com/)并在您的项目中添加对其引用。
最后一步是在主窗体类下添加一些代码
这是代码
public partial class FormMain : Form
{
#region Properties
string zipFolder { set; get; }
const string searchMask = "*.zip";
const string unpackDirectory = @"C:\Windows\Temp\UnpackDirectory";
string fileNameContainsCharacters { set; get; }
string fileExtention { set; get; }
List<string> files = new List<string>();
List<DAO.Uhee> UHEEs = new List<DAO.Uhee>();
string connectionString { set; get; }
BindingSource bindingSource1 = new BindingSource();
#endregion
#region Init the app
public FormMain()
{
InitializeComponent();
connectionString = ConfigurationManager.ConnectionStrings[
"UHEEParser.Properties.Settings.UHEEConnectionString"].ConnectionString;
btCancel.Enabled = false;
bw.WorkerReportsProgress = true;
bw.WorkerSupportsCancellation = true;
bw.DoWork += new DoWorkEventHandler(bw_DoWork);
bw.ProgressChanged += new ProgressChangedEventHandler(bw_ProgressChanged);
bw.RunWorkerCompleted +=
new RunWorkerCompletedEventHandler(bw_RunWorkerCompleted);
}
#endregion
#region Form Events
private void btCleanLog_Click(object sender, EventArgs e)
{
rtbLog.Clear();
}
private void btnZIPFolder_Click(object sender, EventArgs e)
{
fbd.ShowDialog();
zipFolder = fbd.SelectedPath;
tbZipFolder.Text = zipFolder;
btStart.Enabled = true;
}
private void btCancel_Click(object sender, EventArgs e)
{
CancelParsing();
}
void CancelParsing()
{
if (bw.WorkerSupportsCancellation == true)
{
bw.CancelAsync();
}
}
private void btStart_Click(object sender, EventArgs e)
{
fileNameContainsCharacters = tbFileNameContains.Text.Trim().ToLower();
fileExtention = tbFileExtention.Text.Trim().ToLower();
if (bw.IsBusy != true)
{
// Start the asynchronous operation.
bw.RunWorkerAsync();
}
}
#endregion
#region Background Worker Job
private void bw_DoWork(object sender, DoWorkEventArgs e)
{
BackgroundWorker worker = sender as BackgroundWorker;
WriteLog("--- Start parsing ---");
WriteLog("Get ZIP files...");
// Process the list of files found in the directory.
string[] fileEntries = Directory.GetFiles(zipFolder, searchMask);
WriteLog("Get ZIP files... Found: " + fileEntries.Count().ToString());
WriteLog("Get ZIP files... DONE");
if (!Directory.Exists(unpackDirectory))
{
Directory.CreateDirectory(unpackDirectory);
WriteLog("Creating temp directory... DONE: " + unpackDirectory);
}
else
{
try
{
Directory.Delete(unpackDirectory, true);
WriteLog("Deleting temp directory... DONE: " + unpackDirectory);
Directory.CreateDirectory(unpackDirectory);
WriteLog("Creating temp directory... DONE: " + unpackDirectory);
}
catch (Exception ex)
{
WriteLog("Deleting temp directory... " +
unpackDirectory + " ERROR: " + ex.Message);
}
}
int counter = 1;
SetupProgressBar(fileEntries.Length);
foreach (string fileName in fileEntries)
{
if ((worker.CancellationPending == true))
{
WriteLog("Processing file... CANCELED OPERATION: " + fileName);
e.Cancel = true;
break;
}
else
{
// do something with fileName
WriteLog("Processing file... " + fileName);
ExtractFiles(fileName);
// Perform a time consuming operation and report progress.
worker.ReportProgress((counter++ * 10));
}
}
ParseFiles();
TransferData();
if (Directory.Exists(unpackDirectory))
{
try
{
Directory.Delete(unpackDirectory, true);
WriteLog("Deleting temp directory... DONE: " + unpackDirectory);
}
catch (Exception ex)
{
WriteLog("Deleting temp directory... " +
unpackDirectory + " ERROR: " + ex.Message);
}
}
WriteLog("--- End parsing ---");
}
private void TransferData()
{
if (UHEEs.Count > 0)
{
WriteLog("There are " + UHEEs.Count.ToString() + " records to transfer");
try
{
DAO.DataClassesUHEEDataContext db =
new UHEEParser.DAO.DataClassesUHEEDataContext(connectionString);
foreach (DAO.Uhee u in UHEEs)
{
db.Uhees.InsertOnSubmit(u);
db.SubmitChanges();
}
}
catch (Exception ex)
{
WriteLog("Saving UHEE... ERROR: " + ex.Message);
}
WriteLog("There are " + UHEEs.Count.ToString() +
" records has been transfered");
}
else
{
WriteLog("There are no records to transfer");
}
UHEEs.Clear();
files.Clear();
}
private void ParseFiles()
{
if (files.Count > 0)
{
WriteLog("There are " + files.Count.ToString() + " files to parce.");
UHEEParser.ENTITIES.UHEEParser parser =
new UHEEParser.ENTITIES.UHEEParser();
foreach (string file in files)
{
WriteLog("Parsing file... " + file);
DAO.Uhee uhee = parser.Parse(file);
if (uhee != null)
UHEEs.Add(uhee);
WriteLog("Parsing file... " + " DONE: " + file);
}
}
else
{
WriteLog("There are no files to parce.");
}
}
private void bw_RunWorkerCompleted(object sender,
RunWorkerCompletedEventArgs e)
{
if ((e.Cancelled == true))
{
this.tbProgress.Text = "Canceled!";
}
else if (!(e.Error == null))
{
this.tbProgress.Text = ("Error: " + e.Error.Message);
}
else
{
this.tbProgress.Text = "Done!";
this.btCancel.Enabled = false;
}
}
private void bw_ProgressChanged(object sender, ProgressChangedEventArgs e)
{
this.tbProgress.Text = (e.ProgressPercentage.ToString() + "%");
pb1.PerformStep();
}
#endregion
#region Extract files from ZIP
private List<string> ExtractFiles(string zipToUnpack)
{
using (ZipFile zip1 = ZipFile.Read(zipToUnpack))
{
// here, we extract every entry, but we could extract conditionally
// based on entry name, size, date, checkbox status, etc.
foreach (ZipEntry e in zip1)
{
string completePathExtractedFile = @unpackDirectory +
@"\" + e.FileName.Replace(@"/", @"\");
string fileName = Path.GetFileName(completePathExtractedFile);
if (!string.IsNullOrEmpty(fileExtention) &&
!string.IsNullOrEmpty(fileNameContainsCharacters))
{
if (fileName.ToLower().Contains(fileNameContainsCharacters) &&
fileName.EndsWith(fileExtention))
{
WriteLog("Extracting from ZIP archive... ");
e.Extract(unpackDirectory,
ExtractExistingFileAction.OverwriteSilently);
files.Add(completePathExtractedFile);
WriteLog("Extracting from ZIP archive... DONE: " +
completePathExtractedFile);
}
}
else if (!string.IsNullOrEmpty(fileNameContainsCharacters))
{
if (fileName.ToLower().Contains(fileNameContainsCharacters))
{
WriteLog("Extracting from ZIP archive... ");
e.Extract(unpackDirectory,
ExtractExistingFileAction.OverwriteSilently);
files.Add(completePathExtractedFile);
WriteLog("Extracting from ZIP archive... DONE: " +
completePathExtractedFile);
}
}
else if (!string.IsNullOrEmpty(fileExtention))
{
if (fileName.EndsWith(fileExtention))
{
WriteLog("Extracting from ZIP archive... ");
e.Extract(unpackDirectory,
ExtractExistingFileAction.OverwriteSilently);
files.Add(completePathExtractedFile);
WriteLog("Extracting from ZIP archive... DONE: " +
completePathExtractedFile);
}
}
}
}
return files;
}
#endregion
#region Write Log
private delegate void stringDelegate(string s);
private void WriteLog(string text)
{
if (rtbLog.InvokeRequired)
{
stringDelegate sd = new stringDelegate(WriteLog);
this.Invoke(sd, new object[] { text });
}
else
{
rtbLog.Text += text + Environment.NewLine;
}
}
#endregion
#region Setup Forms Controls
private delegate void integerDelegate(int i);
private void SetupProgressBar(int i)
{
if (pb1.InvokeRequired)
{
integerDelegate sd = new integerDelegate(SetupProgressBar);
this.Invoke(sd, new object[] { i });
}
else
{
pb1.Maximum = i;
pb1.Value = 0;
}
}
#endregion
#region Statistics
private void btGetData_Click(object sender, EventArgs e)
{
GetData();
}
void GetData()
{
DAO.DataClassesUHEEDataContext db =
new UHEEParser.DAO.DataClassesUHEEDataContext(connectionString);
var itemsUHEE = from c in db.Uhees
orderby c.ErrorDateTime descending
select c;
bindingSource1.DataSource = itemsUHEE;
bindingNavigator1.BindingSource = bindingSource1;
dgv1.DataSource = bindingSource1;
}
private void btnClean_Click(object sender, EventArgs e)
{
DAO.DataClassesUHEEDataContext db =
new UHEEParser.DAO.DataClassesUHEEDataContext(connectionString);
var itemsUHEE = from c in db.Uhees
select c;
db.Uhees.DeleteAllOnSubmit(itemsUHEE);
db.SubmitChanges();
GetData();
}
#endregion
#region Menu Actions
private void exitToolStripMenuItem_Click(object sender, EventArgs e)
{
CancelParsing();
Application.Exit();
}
#endregion
}
您可以随意添加任何您希望改进的代码,例如
- 使用正则表达式解析文本
- 导出到 MS Excel
- 甚至将此应用程序转换为 Windows 服务 ;-)
感谢您的阅读,祝您编码愉快!
历史
这是应用程序的第一个版本。