65.9K
CodeProject 正在变化。 阅读更多。
Home

HTML 转数据库转换器应用程序

starIconstarIconstarIcon
emptyStarIcon
starIcon
emptyStarIcon

3.03/5 (19投票s)

2004年9月3日

viewsIcon

112297

downloadIcon

3953

VB.NET 中的 HTML 到数据库转换器应用程序

引言

这是一个小型应用程序,可用于将 HTML 页面转换为数据库格式。

详细说明

通常,我们从互联网获取的数据并非以数据库可存储的格式存在。此应用程序附带源代码和包含数据库的演示文件,将演示如何从 HTML 页面提取字段并将其提交到 Access 数据库。它可以循环运行以将数据库转换为 Access。它不会使您的计算机卡顿,并且可以完成所需的文件,但 HTML 页面的格式应相同,它们应具有相同的字段、表等。

部分源代码

 Public con As OleDb.OleDbConnection
    Private Sub enableParseButton()
        btnParse.Enabled = (txtDocumentName.Text.Length > 0)
    End Sub
    Private Function ExtractEmailAddressesFromString(
     ByVal source As String) As String()
        On Error Resume Next
        Dim mc As MatchCollection
        Dim i As Integer
        mc = Regex.Matches(source, 
         "([a-zA-Z0-9_\-\.]+)@([a-zA-Z0-9_\-\.]+)\.([a-zA-Z]{2,5})")
        Dim results(mc.Count - 1) As String
        For i = 0 To results.Length - 1
            results(i) = mc(i).Value
        Next
        Return results
    End Function
    Private Function ExtractheadFromString(ByVal a As String) As String
        On Error Resume Next
        Dim i, mypos, mypos1, mypos2 As Integer
        Dim b As String
        mypos = InStr(a, "Agency head", CompareMethod.Text)
        mypos = InStr(mypos, a, vbCr, CompareMethod.Text)
        mypos1 = InStr(mypos + 1, a, vbCr, CompareMethod.Text)
        b = Mid(a, mypos + 2, (mypos1 - mypos - 2))
        

        Return b
    End Function
    Private Function ExtractAgencyFromString(ByVal a As String) As String
        On Error Resume Next
        Dim i, mypos, mypos1, mypos2 As Integer
        Dim b As String
        mypos = InStr(a, "Agency listings -", CompareMethod.Text)
        mypos = InStr(mypos, a, "-", CompareMethod.Text)
        mypos1 = InStr(mypos, a, vbCr, CompareMethod.Text)
        b = Mid(a, mypos + 1, (mypos1 - mypos - 1))
       
        Return b
    End Function
    Private Function ExtractheadoffFromString(ByVal a As String) As String
        On Error Resume Next
        Dim i, mypos, mypos1, mypos2 As Integer
        Dim b As String
        mypos = InStr(a, "Head office add", CompareMethod.Text)
        mypos = InStr(mypos, a, vbCr, CompareMethod.Text)
        mypos1 = InStr(mypos + 5, a, vbCr, CompareMethod.Text)
        b = Mid(a, mypos + 2, (mypos1 - mypos - 2))
        Return b
    End Function
    Private Function ExtracttfaxtelFromString(ByVal a As String) As String()
        Dim i, mypos, mypos1, mypos2, mypos3 As Integer
        Dim b(5) As String
        On Error Resume Next
        mypos = InStr(a, "Tel/Fax/email", CompareMethod.Text)
        mypos = InStr(mypos, a, vbCr, CompareMethod.Text)
        mypos1 = InStr(mypos, a, ";", CompareMethod.Text)
        b(0) = Mid(a, mypos + 2, (mypos1 - mypos - 1)) 'tel
        'b(0) = "jkjh"
        mypos = InStr(a, "Fax:", CompareMethod.Text)
        mypos = InStr(mypos, a, " ", CompareMethod.Text)
        mypos1 = InStr(mypos, a, ";", CompareMethod.Text)
        b(1) = Mid(a, mypos, (mypos1 - mypos - 1))  'fax
        mypos2 = InStr(mypos1 + 1, a, ";", CompareMethod.Text)
        b(2) = Mid(a, mypos1 + 1, (mypos2 - mypos1 - 1)) 'email
        mypos3 = InStr(mypos2 + 1, a, vbCr, CompareMethod.Text)
        b(3) = Mid(a, mypos2 + 1, (mypos3 - mypos2 - 1))
        mypos1 = InStr(mypos1, a, vbCr, CompareMethod.Text) 'url
        Return b
    End Function

    Private Sub Form1_Load(ByVal sender As System.Object, 
     ByVal e As System.EventArgs) Handles MyBase.Load
        txtDocumentName.Text = ""
        enableParseButton()
    End Sub
    Private Sub txtDocumentName_TextChanged(ByVal sender As System.Object, 
     ByVal e As System.EventArgs) Handles txtDocumentName.TextChanged
        enableParseButton()
    End Sub
    Private Sub btnParse_Click(ByVal sender As System.Object,
     ByVal e As System.EventArgs) Handles btnParse.Click
        ' Dim app As Word.Application
        ' Dim doc As Word.Document
        Dim app As Object
        Dim doc As Object
        Dim docFileName As String
        Dim docPath As String
        Dim contents As String
        Cursor.Current = Cursors.WaitCursor
        Try
            ' init UI controls
            Lblfindcount.Text = ""
            Txtresults.Text = ""
            txtDocContents.Text = ""
            ' validate file name
            docFileName = txtDocumentName.Text
            If docFileName.Length = 0 Then
                MsgBox("Please enter a file name")
                txtDocumentName.Focus()
                Return
            End If
            ' if no path use APP_BASE
            docPath = Path.GetDirectoryName(docFileName)
            If docPath.Length = 0 Then
                docFileName = Application.StartupPath & "\" & docFileName
            End If
            ' ensure file exists
            If Not File.Exists(docFileName) Then
                MsgBox("File does not exist")
                txtDocumentName.SelectAll()
                txtDocumentName.Focus()
                Return
            End If
            ' extract contents of file
            contents = ""
            If Path.GetExtension(docFileName).ToLower = ".txt" Then
                Dim fs As StreamReader
                Try
                    fs = New StreamReader(docFileName)
                    contents = fs.ReadToEnd
                Catch ex As Exception
                    MsgBox("Unable to read from text input file")
                    contents = ""
                Finally
                    If Not fs Is Nothing Then fs.Close()
                End Try
            Else
                Try
                    Try
                        'app = New Word.Application
                        app = CreateObject("Word.Application")
                    Catch ex As Exception
                        MsgBox("Unable to start Word")
                        Throw ex
                    End Try
                    Try
                        doc = app.Documents.Open(docFileName)
                    Catch ex As Exception
                        MsgBox("Unable to load document in Word")
                        Throw ex
                    End Try
                    contents = doc.Content.Text
                Catch ex As Exception
                    contents = ""
                Finally
                    If Not app Is Nothing Then app.Quit()
                End Try
            End If
            If contents.Length = 0 Then Return
            ' search for email addresses
            Dim emails, aglist As String()
            Dim email As String
            Dim results As New StringBuilder()
            Dim results1 As New StringBuilder()
            emails = ExtractEmailAddressesFromString(contents)

            For Each email In emails
                results.Append(email)
            Next
            
            Dim i As Integer
            ' display results
            Lblfindcount.Text = String.Format("{0} match(es) found.", emails.Length)
            Txtresults.Text = results.ToString
            TextBox8.Text = ExtractAgencyFromString(contents)
            TextBox9.Text = ExtractheadFromString(contents)
            TextBox6.Text = ExtractheadoffFromString(contents)
            TextBox5.Text = (ExtracttfaxtelFromString(contents))(0)
            TextBox2.Text = (ExtracttfaxtelFromString(contents))(1)
            TextBox3.Text = (ExtracttfaxtelFromString(contents))(2)
            'TextBox1.Text = (ExtracttfaxtelFromString(contents))(3)
            'TextBox1.Text = results1.ToString
            txtDocContents.Text = contents
        Finally
            Cursor.Current = Cursors.Default
        End Try
    End Sub
    Private Sub btnBrowse_Click(ByVal sender As System.Object,
     ByVal e As System.EventArgs) Handles btnBrowse.Click
        Dim ofd As OpenFileDialog
        Try
            ofd = New OpenFileDialog()
            ofd.CheckFileExists = True
            ofd.CheckPathExists = True
            'ofd.Filter = "*.*|*.doc|Rich Text Documents 
            '(*.rtf)|*.rtf|Text Documents (*.txt)|*.txt"
            ofd.Title = "Select Document"
            If ofd.ShowDialog = DialogResult.OK Then
                txtDocumentName.Text = ofd.FileName
            End If
        Finally
            If Not ofd Is Nothing Then ofd.Dispose()
        End Try
    End Sub
    Public Function CREATE_CON_MS(ByVal DATA_SOURCE As String) As Boolean
        con = New OleDbConnection("Provider=Microsoft.jet.OLEDB.4.0;Data Source=" 
         + DATA_SOURCE + ";Persist Security Info=False")
        con.Open()
        If con.State = ConnectionState.Open Then
            
            Dim ss As String = "pixel.gif"

            Dim SqlIns As String = "Insert into FashionDesigner(FD_Name,FD_ID,
             FD_Address,FD_Phone,FD_Fax,FD_Email,FD_URL,FD_Logo,FD_ContactPerson) 
             values ('" & TextBox8.Text & "','" & TextBox4.Text & "','" 
             & TextBox6.Text & "','" & TextBox5.Text & "','" & 
             TextBox2.Text & "','" & Txtresults.Text & "','" 
             & TextBox1.Text & "','" & ss & "','" & TextBox9.Text & "')"
        Dim MyCmd1 As OleDb.OleDbCommand = New OleDb.OleDbCommand(SqlIns, con)
        Dim MyDataR1 As OleDb.OleDbDataReader
        MyDataR1 = MyCmd1.ExecuteReader
            con.Close()
            con.Dispose()
            Return True
        Else
            Return False
        End If

    End Function



    Private Sub btnExit_Click(ByVal sender As System.Object, 
    ByVal e As System.EventArgs) Handles btnExit.Click
       
        Me.Close()
    End Sub
    Private Sub Button1_Click(ByVal sender As System.Object, 
     ByVal e As System.EventArgs) Handles Button1.Click
        CREATE_CON_MS("c:\DisplayDatsap.mdb")
    End Sub
    Private Sub OleDbDataAdapter1_RowUpdated(ByVal sender As System.Object,
     ByVal e As System.Data.OleDb.OleDbRowUpdatedEventArgs)
    End Sub
    Private Sub TabPage3_Click(ByVal sender As System.Object,
     ByVal e As System.EventArgs) Handles TabPage3.Click
    End Sub
    Private Sub Label12_Click(ByVal sender As System.Object,
     ByVal e As System.EventArgs) Handles Label12.Click
    End Sub
End Class
© . All rights reserved.