65.9K
CodeProject 正在变化。 阅读更多。
Home

从旧的 SharePoint 服务器迁移? 需要将什么导入 SharePoint 2013?

emptyStarIconemptyStarIconemptyStarIconemptyStarIconemptyStarIcon

0/5 (0投票)

2014 年 6 月 28 日

CPOL
viewsIcon

13434

从旧的 SharePoint 服务器迁移? 需要将什么导入 SharePoint 2013?

引言

本文描述了一种将数据从旧系统快速、免费且安全地迁移到 SharePoint 2013 的方法。
您也可以使用此技术将 Frontpage、静态 HTML、PHP 等迁移到 SharePoint 2013。

必备组件

Internet Explorer 知识、C#、SharePoint 2013

Using the Code

让我们从经典的 Program.cs 开始

namespace SharePoint.Import.SpiderAgent {
    static class Program {
        /// <summary>
        /// The main entry point for the application.
        /// </summary>
        [STAThread]
        static void Main() {
            Application.EnableVisualStyles();
            Application.SetCompatibleTextRenderingDefault(false);
            Application.Run(new SpiderAgent());
        }
    }
}

现在我们需要一些东西来浏览旧的应用程序服务器,我们将封装“WebProxy”类以避免公司安全限制(通过这种方式,您可以“合法地”绕过所有安全请求)

namespace SharePoint.Import.SpiderAgent
{
    public class PersonalWebClient: WebClient
    {
        /// <summary>
        /// Initializes a new instance of the <see cref="PersonalWebClient"/> class.
        /// </summary>
        /// <param name="proxyUser">The proxy user.</param>
        /// <param name="proxyPassword">The proxy password.</param>
        public PersonalWebClient(string proxyUser, string proxyPassword)
        {
            try
            {
                this.UseDefaultCredentials = true;

                //WebClient object
                if (!string.IsNullOrEmpty(proxyUser))
                    this.Proxy = setProxy(proxyUser, proxyPassword);
                //this.Credentials = new NetworkCredential(proxyUser, proxyPassword);
            }
            catch { }
        }

        /// <summary>
        /// Sets the proxy.
        /// </summary>
        /// <param name="proxyUser">The proxy user.</param>
        /// <param name="proxyPassword">The proxy password.</param>
        /// <returns></returns>
        static public WebProxy setProxy(string proxyUser, string proxyPassword)
        {
            //string proxyUser = "Windows Domain\\user";
            //string proxyPassword = "user password";
            string proxyDomain = "intranet proxy:8080";

            WebProxy p = new WebProxy(proxyDomain, true);
            p.Credentials = new System.Net.NetworkCredential(proxyUser, proxyPassword);

            return p;
        }
    }
}

现在我们需要 HtmlParse 一个很酷的工具 :D

这段代码是爬虫的摘录,只是为了说明算法

using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.IO;
using System.Security.Cryptography;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using mshtml;
using System.Net;
using HtmlAgilityPack;
using System.Web;
using System.Xml.Linq;
using System.Xml;
using System.Net.Mime;
using System.Windows.Forms;
using System.Threading;
using System.Diagnostics;

namespace SharePoint.Import.SpiderAgent
{
    /// <summary>
    /// The Parser class
    /// </summary>
    public class HtmlParser
    {
        static SortedList<string, Uri> md5VisitedPages;
        // omiss.....

        /// <summary>
        /// Removes the special characters.
        /// </summary>
        /// <param name="input">The input.</param>
        /// <returns></returns


// a nice idea to remove noise

        public static string RemoveSpecialCharacters(string input)
        {
            //input = FileOrFolder.cleaner(input);
            //Regex r = new Regex("(?-|?:[^a-z0-9 ]|(?<=['\"])s)", 
            RegexOptions.IgnoreCase | RegexOptions.CultureInvariant | RegexOptions.Compiled);
            //string result = r.Replace(input, String.Empty).Replace(Environment.NewLine, string.Empty);

            input = Regex.Replace(input, @"<[^>]*>", String.Empty);
            input = input.Replace(Environment.NewLine, " ").Replace('\t', ' ').Replace
            (@"&nbsp", "").Replace(';', ' ').Trim();

            RegexOptions options = RegexOptions.None;
            Regex regex = new Regex(@"[ ]{2,}", options);
            input = regex.Replace(input, @" ");

            return input;
        }

        /// <summary>
        /// Processes the HTML body.
        /// </summary>
        /// <param name="htmlDoc">The HTML document.</param>
        /// <param name="url">The URL.</param>
        /// <param name="lt">The lt.</param>
        /// <param name="looker">The looker.</param>
        private void processHTMLBody(HtmlAgilityPack.HtmlDocument htmlDoc, Uri url, LockerType lt, string looker)
        {
            int pushed = 0;
            try
            {
                foreach (HtmlNode link in htmlDoc.DocumentNode.SelectNodes("//a[@href]"))
                {
                    HtmlAttribute att = link.Attributes["href"];
                    string sUrl = att.Value;

                    string baseUrl = url.ToString();
                    if (type == "html" || type == "htm")
                    {
                        string str = baseUrl.Substring(0, baseUrl.ToLower().LastIndexOf('/') + 1) + sUrl;
                        FileOrFolder fof = new FileOrFolder(FileOrFolderType.Folder);
                        fof.title = RemoveSpecialCharacters(att.OwnerNode.InnerText).Trim();
                        if (String.IsNullOrEmpty(fof.title))
                            continue;
                        stack.Peek().Children.Add(fof);
                        stack.Push(fof);
                        fof.sourceUrl = url.ToString();
                        retrieveHTML(str, LockerType.Leaves, "*");
                        continue;
                    }

                    FileOrFolder fs = new FileOrFolder(FileOrFolderType.File);
                    fs.sourceUrl = baseUrl.Substring(0, baseUrl.LastIndexOf('/') + 1) + sUrl;
                    string fileTarget = prefixFilePath + "\\";
                    string strs = sUrl.Replace('/', '\\');
                    string fileName = strs.Substring(strs.LastIndexOf('\\') + 1);
                    fileTarget += fileName;
                    fs.fileName = fileName;
                    notify("Downloading " + fs.sourceUrl);
                    try
                    {
                        fs.title = RemoveSpecialCharacters(att.OwnerNode.InnerText).Trim();
                        string str = downloadFileAvoidDuplicates(fs, fileTarget);
                        if (string.IsNullOrEmpty(str))
                            continue;
                        fs.fileName = str;
                        stack.Peek().Children.Add(fs);
                    }
                    catch (Exception exe)
                    {
                        notify(exe, url.ToString());
                    }
                }
                for (int i = pushed; i > 0; i--)
                    stack.Pop();
            }
            catch (Exception exe)
            {
                try
                {
                    notify(exe, url.ToString());
                }
                catch { }
            }
        }

        /// <summary>
        /// Processes the web.
        /// </summary>
        /// <param name="htmlDoc">The HTML document.</param>
        /// <param name="url">The URL.</param>
        private void processWeb(HtmlAgilityPack.HtmlDocument htmlDoc, Uri url)
        {
            const string sResult = "/url?q=";
            try
            {
                foreach (HtmlNode link in htmlDoc.DocumentNode.SelectNodes("//a[@href]"))
                {
                    HtmlAttribute att = link.Attributes["href"];
                    string sUrl = att.Value;

                    if (!sUrl.StartsWith(sResult))
                        continue;
                    sUrl = sUrl.Substring(sResult.Length);
                    sUrl = sUrl.Substring(0, sUrl.IndexOf('&'));

                    if (bGoogle && (sUrl.ToLower().Contains("webcache") 
                    || sUrl.ToLower().Contains(@"q=related")))
                        continue;

                    FileOrFolder fof = new FileOrFolder(FileOrFolderType.Folder);
                    fof.title = RemoveSpecialCharacters(att.OwnerNode.InnerText).Trim();
                    if (String.IsNullOrEmpty(fof.title))
                        continue;
                    stack.Peek().Children.Add(fof);
                    stack.Push(fof);
                    fof.sourceUrl = url.ToString();
                    notify("Navigating " + sUrl);
                    retrieveHTML(sUrl, LockerType.Words, null);
                    stack.Pop();
                }
            }
            catch (Exception exe)
            {
                try
                {
                    notify(exe, url.ToString());
                }
                catch { }
            }
        }

        /// <summary>
        /// Processes the HTML class.
        /// </summary>
        /// <param name="htmlDoc">The HTML document.</param>
        /// <param name="url">The URL.</param>
        /// <param name="lt">The lt.</param>
        /// <param name="looker">The looker.</param>
        private void processHTMLClass(HtmlAgilityPack.HtmlDocument htmlDoc, Uri url, LockerType lt, string looker)
        {
            try
            {
                var nomedocumento = from foo in htmlDoc.DocumentNode.SelectNodes(looker) select foo;
                foreach (var nodes in nomedocumento)
                {
                    foreach (var childNode in nodes.ChildNodes)
                    {
                        string sValue = childNode.InnerText;
                        try
                        {
                            string link = childNode.InnerHtml;
                            link = link.Substring(9);
                            string tagType = string.Empty;
                            if (link.Contains("pdf.png") || link.Contains("pdf.gif"))
                                tagType = "pdf";
                            else if (link.Contains("link.png") || 
                            link.Contains("link.gif") || link.Contains("folder.gif"))
                                tagType = "link";
                            else if (link.Contains("txt.png") || link.Contains("txt.gif"))
                                tagType = "txt";
                            int pos = link.IndexOf(">");
                            link = link.Substring(0, pos - 1);
                            if (link.EndsWith("\" target=\"_blank"))
                            {
                                pos = link.LastIndexOf("\" target=\"_blank");
                                link = link.Substring(0, pos); { Uri a = new Uri(link, UriKind.Absolute); }
                            }
                            else { Uri a = new Uri(link, UriKind.Relative); }
                            notify("Parsing: " + sValue + " " + tagType + " " + link);
                            documents.Add(sValue);
                            documents.Add(tagType);
                            documents.Add(link);
                        }
                        catch
                        {
                            documents.Add(sValue);
                        }
                    }
                    notify("Adding: " + nodes.ParentNode.ChildNodes[1].InnerText);
                    documents.Add(nodes.ParentNode.ChildNodes[1].InnerText);
                }
            }
            catch (Exception exe)
            {
                try
                {
                    notify(exe, url.ToString());
                }
                catch { }
            }
        }

        /// <summary>
        /// Processes the HTML class.
        /// </summary>
        /// <param name="htmlDoc">The HTML document.</param>
        /// <param name="url">The URL.</param>
        private void processLeaves(HtmlAgilityPack.HtmlDocument htmlDoc, Uri url)
        {
            //if(System.Diagnostics.Debugger.IsAttached)
            //return;
            FileOrFolder f = new FileOrFolder(FileOrFolderType.Folder);
            string[] prs = url.ToString().Split('/');

            f.sourceUrl = url.ToString();
            f.fileName = prs[prs.Length - 2];
            stack.Peek().Children.Add(f);
            stack.Push(f);

            try
            {
                FileOrFolder folder = stack.Peek();

                foreach (HtmlNode link in htmlDoc.DocumentNode.SelectNodes("//a[@href]"))
                {
                    HtmlAttribute att = link.Attributes["href"];
                    string sUrl = url.ToString();

                    if (att.Value.ToLower().EndsWith("leaft.html"))
                        continue;

                    FileOrFolder fs = new FileOrFolder(FileOrFolderType.File);
                    fs.bLeaf = true;
                    fs.sourceUrl = sUrl.Substring(0, sUrl.ToLower().LastIndexOf("leaft.html")) + att.Value;
                    notify("Downloading " + fs.sourceUrl);
                    try
                    {
                        string str = downloadFile(fs.sourceUrl, prefixFilePath + "\\" + att.Value);
                        if (string.IsNullOrEmpty(str))
                            continue;
                        fs.fileName = str;
                        folder.Children.Add(fs);
                    }
                    catch (Exception exe)
                    {
                        notify(exe, fs.sourceUrl);
                    }
                }
            }
            catch (Exception exe)
            {
                try
                {
                    notify(exe, url.ToString());
                }
                catch { }
            }
            stack.Pop();
        }

        /// <summary>
        /// Processes the HTML table.
        /// </summary>
        /// <param name="htmlDoc">The HTML document.</param>
        /// <param name="url">The URL.</param>
        /// <param name="lt">The lt.</param>
        /// <param name="looker">The looker.</param>
        private void processHTMLTable(HtmlAgilityPack.HtmlDocument htmlDoc, Uri url, LockerType lt, string looker)
        {
            try
            {
                string selector = "//table[@id='" + looker + "']";
                var tableA = from table in htmlDoc.DocumentNode.SelectNodes(selector).Cast<HtmlNode>()
                             from row in table.SelectNodes("tr").Cast<HtmlNode>()
                             from cell in row.SelectNodes("th|td").Cast<HtmlNode>()
                             select cell;

                foreach (var childNode in tableA)
                {
                    string sValue = childNode.OuterHtml;
                    if (sValue.Contains("../immagini/pdf.gif"))
                    {
                    }
                }
            }
            catch (Exception exe)
            {
                try
                {
                    notify(exe, url.ToString());
                }
                catch { }
            }
        }

        /// <summary>
        /// Extracts the attribute.
        /// </summary>
        /// <param name="element">The element.</param>
        /// <returns></returns>
        string extractAttribute(HtmlAgilityPack.HtmlNode element)
        {
            string link = string.Empty;
            try
            {
                link = element.GetAttributeValue("href", null);
                link = link.ToLower().Trim();
                if (link.EndsWith(".css") ||
                    link.Contains("googleapis.com") ||
                    link == @"#"
                    )
                {
                    link = string.Empty;
                }
            }
            catch (Exception exe)
            {
                try
                {
                    notify(exe, element.OuterHtml);
                }
                catch { }
            }
            return link;
        }

        /// <summary>
        /// Prepares the URL.
        /// </summary>
        /// <param name="iUrl">The i URL.</param>
        /// <returns></returns>
        private string prepareUrl(string iUrl)
        {
            if (string.IsNullOrEmpty(iUrl))
                return null;

            if (string.IsNullOrEmpty(iUrl.Trim()))
                return null;

            string[] seq = iUrl.Split('.');

            string oUrl = null;

            switch (seq.Length - 1)
            {
                case 0:
                    return null;
                case 1:
                    oUrl = @"http://www." + iUrl;
                    break;
                case 2:
                    if (iUrl.Contains("http"))
                        oUrl = iUrl;
                    else
                        oUrl = @"http://" + iUrl;
                    break;
                default:
                    oUrl = iUrl;
                    break;
            }

            return oUrl;
        }

        /// <summary>
        /// Trasforms the share point URL.
        /// </summary>
        /// <param name="url">The URL.</param>
        /// <returns></returns>
        private string TrasformSharePointUrl(string url)
        {
            url = url.Replace("Shared%20Documents/", "/");
            url = url.Replace(".aspx", "/Forms/AllItems.aspx");
            return url;
        }

        /// <summary>
        /// Retrieves the HTML.
        /// </summary>
        /// <param name="proxyUser">The proxy user.</param>
        /// <param name="proxyPassword">The proxy password.</param>
        /// <param name="url">The URL.</param>
        /// <returns></returns>
        public static bool TestUrl(string proxyUser, string proxyPassword, string url)
        {
            //WebClient object

            try
            {
                using (PersonalWebClient client = new PersonalWebClient(proxyUser, proxyPassword))
                // Retrieve resource as a stream
                using (Stream data = client.OpenRead(new Uri(url)))

                // Retrive the text
                using (StreamReader reader = new StreamReader(data))
                {
                    string htmlContent = reader.ReadToEnd();

                    // Call function to process HTML Content

                    // Cleanup
                    data.Close();
                    reader.Close();
                }
            }
            catch
            {
                return false;
            }
            return true;
        }
    }
}

完整代码和咨询

应要求,我会发送完整的代码,并为您提供几个小时的支持,请从我的亚马逊愿望清单中发送一些东西给我:http://www.amazon.it/registry/wishlist/3DUGGYP0KMLF8

© . All rights reserved.