HTML 转数据库转换器应用程序






3.03/5 (19投票s)
2004年9月3日

112297

3953
VB.NET 中的 HTML 到数据库转换器应用程序
引言
这是一个小型应用程序,可用于将 HTML 页面转换为数据库格式。
详细说明
通常,我们从互联网获取的数据并非以数据库可存储的格式存在。此应用程序附带源代码和包含数据库的演示文件,将演示如何从 HTML 页面提取字段并将其提交到 Access 数据库。它可以循环运行以将数据库转换为 Access。它不会使您的计算机卡顿,并且可以完成所需的文件,但 HTML 页面的格式应相同,它们应具有相同的字段、表等。
部分源代码
Public con As OleDb.OleDbConnection
Private Sub enableParseButton()
btnParse.Enabled = (txtDocumentName.Text.Length > 0)
End Sub
Private Function ExtractEmailAddressesFromString(
ByVal source As String) As String()
On Error Resume Next
Dim mc As MatchCollection
Dim i As Integer
mc = Regex.Matches(source,
"([a-zA-Z0-9_\-\.]+)@([a-zA-Z0-9_\-\.]+)\.([a-zA-Z]{2,5})")
Dim results(mc.Count - 1) As String
For i = 0 To results.Length - 1
results(i) = mc(i).Value
Next
Return results
End Function
Private Function ExtractheadFromString(ByVal a As String) As String
On Error Resume Next
Dim i, mypos, mypos1, mypos2 As Integer
Dim b As String
mypos = InStr(a, "Agency head", CompareMethod.Text)
mypos = InStr(mypos, a, vbCr, CompareMethod.Text)
mypos1 = InStr(mypos + 1, a, vbCr, CompareMethod.Text)
b = Mid(a, mypos + 2, (mypos1 - mypos - 2))
Return b
End Function
Private Function ExtractAgencyFromString(ByVal a As String) As String
On Error Resume Next
Dim i, mypos, mypos1, mypos2 As Integer
Dim b As String
mypos = InStr(a, "Agency listings -", CompareMethod.Text)
mypos = InStr(mypos, a, "-", CompareMethod.Text)
mypos1 = InStr(mypos, a, vbCr, CompareMethod.Text)
b = Mid(a, mypos + 1, (mypos1 - mypos - 1))
Return b
End Function
Private Function ExtractheadoffFromString(ByVal a As String) As String
On Error Resume Next
Dim i, mypos, mypos1, mypos2 As Integer
Dim b As String
mypos = InStr(a, "Head office add", CompareMethod.Text)
mypos = InStr(mypos, a, vbCr, CompareMethod.Text)
mypos1 = InStr(mypos + 5, a, vbCr, CompareMethod.Text)
b = Mid(a, mypos + 2, (mypos1 - mypos - 2))
Return b
End Function
Private Function ExtracttfaxtelFromString(ByVal a As String) As String()
Dim i, mypos, mypos1, mypos2, mypos3 As Integer
Dim b(5) As String
On Error Resume Next
mypos = InStr(a, "Tel/Fax/email", CompareMethod.Text)
mypos = InStr(mypos, a, vbCr, CompareMethod.Text)
mypos1 = InStr(mypos, a, ";", CompareMethod.Text)
b(0) = Mid(a, mypos + 2, (mypos1 - mypos - 1)) 'tel
'b(0) = "jkjh"
mypos = InStr(a, "Fax:", CompareMethod.Text)
mypos = InStr(mypos, a, " ", CompareMethod.Text)
mypos1 = InStr(mypos, a, ";", CompareMethod.Text)
b(1) = Mid(a, mypos, (mypos1 - mypos - 1)) 'fax
mypos2 = InStr(mypos1 + 1, a, ";", CompareMethod.Text)
b(2) = Mid(a, mypos1 + 1, (mypos2 - mypos1 - 1)) 'email
mypos3 = InStr(mypos2 + 1, a, vbCr, CompareMethod.Text)
b(3) = Mid(a, mypos2 + 1, (mypos3 - mypos2 - 1))
mypos1 = InStr(mypos1, a, vbCr, CompareMethod.Text) 'url
Return b
End Function
Private Sub Form1_Load(ByVal sender As System.Object,
ByVal e As System.EventArgs) Handles MyBase.Load
txtDocumentName.Text = ""
enableParseButton()
End Sub
Private Sub txtDocumentName_TextChanged(ByVal sender As System.Object,
ByVal e As System.EventArgs) Handles txtDocumentName.TextChanged
enableParseButton()
End Sub
Private Sub btnParse_Click(ByVal sender As System.Object,
ByVal e As System.EventArgs) Handles btnParse.Click
' Dim app As Word.Application
' Dim doc As Word.Document
Dim app As Object
Dim doc As Object
Dim docFileName As String
Dim docPath As String
Dim contents As String
Cursor.Current = Cursors.WaitCursor
Try
' init UI controls
Lblfindcount.Text = ""
Txtresults.Text = ""
txtDocContents.Text = ""
' validate file name
docFileName = txtDocumentName.Text
If docFileName.Length = 0 Then
MsgBox("Please enter a file name")
txtDocumentName.Focus()
Return
End If
' if no path use APP_BASE
docPath = Path.GetDirectoryName(docFileName)
If docPath.Length = 0 Then
docFileName = Application.StartupPath & "\" & docFileName
End If
' ensure file exists
If Not File.Exists(docFileName) Then
MsgBox("File does not exist")
txtDocumentName.SelectAll()
txtDocumentName.Focus()
Return
End If
' extract contents of file
contents = ""
If Path.GetExtension(docFileName).ToLower = ".txt" Then
Dim fs As StreamReader
Try
fs = New StreamReader(docFileName)
contents = fs.ReadToEnd
Catch ex As Exception
MsgBox("Unable to read from text input file")
contents = ""
Finally
If Not fs Is Nothing Then fs.Close()
End Try
Else
Try
Try
'app = New Word.Application
app = CreateObject("Word.Application")
Catch ex As Exception
MsgBox("Unable to start Word")
Throw ex
End Try
Try
doc = app.Documents.Open(docFileName)
Catch ex As Exception
MsgBox("Unable to load document in Word")
Throw ex
End Try
contents = doc.Content.Text
Catch ex As Exception
contents = ""
Finally
If Not app Is Nothing Then app.Quit()
End Try
End If
If contents.Length = 0 Then Return
' search for email addresses
Dim emails, aglist As String()
Dim email As String
Dim results As New StringBuilder()
Dim results1 As New StringBuilder()
emails = ExtractEmailAddressesFromString(contents)
For Each email In emails
results.Append(email)
Next
Dim i As Integer
' display results
Lblfindcount.Text = String.Format("{0} match(es) found.", emails.Length)
Txtresults.Text = results.ToString
TextBox8.Text = ExtractAgencyFromString(contents)
TextBox9.Text = ExtractheadFromString(contents)
TextBox6.Text = ExtractheadoffFromString(contents)
TextBox5.Text = (ExtracttfaxtelFromString(contents))(0)
TextBox2.Text = (ExtracttfaxtelFromString(contents))(1)
TextBox3.Text = (ExtracttfaxtelFromString(contents))(2)
'TextBox1.Text = (ExtracttfaxtelFromString(contents))(3)
'TextBox1.Text = results1.ToString
txtDocContents.Text = contents
Finally
Cursor.Current = Cursors.Default
End Try
End Sub
Private Sub btnBrowse_Click(ByVal sender As System.Object,
ByVal e As System.EventArgs) Handles btnBrowse.Click
Dim ofd As OpenFileDialog
Try
ofd = New OpenFileDialog()
ofd.CheckFileExists = True
ofd.CheckPathExists = True
'ofd.Filter = "*.*|*.doc|Rich Text Documents
'(*.rtf)|*.rtf|Text Documents (*.txt)|*.txt"
ofd.Title = "Select Document"
If ofd.ShowDialog = DialogResult.OK Then
txtDocumentName.Text = ofd.FileName
End If
Finally
If Not ofd Is Nothing Then ofd.Dispose()
End Try
End Sub
Public Function CREATE_CON_MS(ByVal DATA_SOURCE As String) As Boolean
con = New OleDbConnection("Provider=Microsoft.jet.OLEDB.4.0;Data Source="
+ DATA_SOURCE + ";Persist Security Info=False")
con.Open()
If con.State = ConnectionState.Open Then
Dim ss As String = "pixel.gif"
Dim SqlIns As String = "Insert into FashionDesigner(FD_Name,FD_ID,
FD_Address,FD_Phone,FD_Fax,FD_Email,FD_URL,FD_Logo,FD_ContactPerson)
values ('" & TextBox8.Text & "','" & TextBox4.Text & "','"
& TextBox6.Text & "','" & TextBox5.Text & "','" &
TextBox2.Text & "','" & Txtresults.Text & "','"
& TextBox1.Text & "','" & ss & "','" & TextBox9.Text & "')"
Dim MyCmd1 As OleDb.OleDbCommand = New OleDb.OleDbCommand(SqlIns, con)
Dim MyDataR1 As OleDb.OleDbDataReader
MyDataR1 = MyCmd1.ExecuteReader
con.Close()
con.Dispose()
Return True
Else
Return False
End If
End Function
Private Sub btnExit_Click(ByVal sender As System.Object,
ByVal e As System.EventArgs) Handles btnExit.Click
Me.Close()
End Sub
Private Sub Button1_Click(ByVal sender As System.Object,
ByVal e As System.EventArgs) Handles Button1.Click
CREATE_CON_MS("c:\DisplayDatsap.mdb")
End Sub
Private Sub OleDbDataAdapter1_RowUpdated(ByVal sender As System.Object,
ByVal e As System.Data.OleDb.OleDbRowUpdatedEventArgs)
End Sub
Private Sub TabPage3_Click(ByVal sender As System.Object,
ByVal e As System.EventArgs) Handles TabPage3.Click
End Sub
Private Sub Label12_Click(ByVal sender As System.Object,
ByVal e As System.EventArgs) Handles Label12.Click
End Sub
End Class