65.9K
CodeProject 正在变化。 阅读更多。
Home

字符串压缩 - SevenZip vs GZip

starIconstarIconstarIconstarIcon
emptyStarIcon
starIcon

4.80/5 (6投票s)

2016年9月18日

CPOL

1分钟阅读

viewsIcon

23250

downloadIcon

10

一种替代方案,如何更快、更强大地压缩/解压缩字符串

引言

GZipStream 是一个提供压缩/解压缩流的方法的类,包含在 .NET Framework 2.0 及更高版本中。

SevenZip 是一个用于压缩/解压缩文件的程序集,使其更加强大。

Using the Code

using Encoder = SevenZip.Sdk.Compression.Lzma.Encoder;

public static string Compress(string text) {

            var watcher = new Watcher();
            watcher.Start();

            string result = null;

            using (var t = new MemoryStream(Encoding.Default.GetBytes(text)))
            using (var m = new MemoryStream()) {

                var encoder = new Encoder();

                encoder.WriteCoderProperties(m);
                m.Write(BitConverter.GetBytes(t.Length), 0, 8);
                encoder.Code(t, m, t.Length, -1, null);

                m.Position = 0;
                result = Encoding.Default.GetString(m.ToArray());

                t.Close();
                m.Close();
            }

            watcher.Stop();
            Console.WriteLine($"{watcher.TotalMilliseconds()}ms in compress 
            {text.Length} chars with SevenZip! = {result.Length} new length");

            return result;
        }

现在 GZipStream

public static string Compress(string text) {

            var watcher = new Watcher();
            watcher.Start();

            var buffer = Encoding.Default.GetBytes(text);
            var t = new MemoryStream();
            using (var m = new GZipStream(t, CompressionMode.Compress, true))
                m.Write(buffer, 0, buffer.Length);

            t.Position = 0;

            var outStream = new MemoryStream();
            var compressed = new byte[t.Length];
            t.Read(compressed, 0, compressed.Length);

            var gzBuffer = new byte[compressed.Length + 4];
            Buffer.BlockCopy(compressed, 0, gzBuffer, 4, compressed.Length);
            Buffer.BlockCopy(BitConverter.GetBytes(buffer.Length), 0, gzBuffer, 0, 4);

            watcher.Stop();
            var result = Convert.ToBase64String(gzBuffer);
            Console.WriteLine($"{watcher.TotalMilliseconds()}ms in compress 
            {text.Length} chars with GZipStream! = {result.Length} new length");
        }

解压缩

using Decoder = SevenZip.Sdk.Compression.Lzma.Decoder;

public static string Decompress(string text) {

            var watcher = new Watcher();
            watcher.Start();

            string result = null;

            using (var t = new MemoryStream(Encoding.Default.GetBytes(text)))
            using (var m = new MemoryStream())
            {
                var decoder = new Decoder();

                var numArray = new byte[5];
                t.Read(numArray, 0, 5);
                var numArray1 = new byte[8];
                t.Read(numArray1, 0, 8);

                var num = BitConverter.ToInt64(numArray1, 0);

                decoder.SetDecoderProperties(numArray);
                decoder.Code(t, m, t.Length, num, null);

                m.Position = 0;
                result = new StreamReader(m).ReadToEnd();

                t.Close();
                m.Close();
            }

            watcher.Stop();
            Console.WriteLine($"{watcher.TotalMilliseconds()} ms in decompress 
            {text.Length} chars with SevenZip! = {result.Length} new length");

            return result;
        }

结果!

我们将进行多次测试:)

尝试

  • 操作系统:Windows 10 版本 10.0.14393
  • 内存:3GB Ddr2
  • CPU:奔腾双核 2.70Ghz (2 核)

尝试

  • 操作系统:Windows Server 2012 版本 6.3.9600
  • 内存:3.75GB
  • CPU:Intel Xeon 2.30Ghz (2 核)

最后,我使用一个包含 150,000 个字符的真实文档文件进行了测试。该文档可在文章的资源中下载。

代码

static void Main()
        {
            //======================= 1 runs sync!

            Console.WriteLine($"{Environment.NewLine}1 runs sync!");
            var text = Generate(1000);

            var compressed = Compress(text);
            var decompressed = Decompress(compressed);
            CompressGZipStream(text);

            //====================================

            //=================== 2 runs Parallel!

            Console.WriteLine($"{Environment.NewLine}2 runs Parallel!");

            var list = new List<string>();
            for (var i = 2; i <= 6; i++)
                list.Add(Generate(15000 * i));

            Parallel.ForEach(list, new ParallelOptions 
            { MaxDegreeOfParallelism = 2 }, (x) => CompressParallel(x));

            //===================================

            //========= 1 run sync with real-text!

            string path = $"{Environment.CurrentDirectory}\\150,000.txt";
            if (!File.Exists(path))
                return;

            Console.WriteLine($"{Environment.NewLine}1 run sync with real-text!");
            text = File.ReadAllText(path);

            var compressedReal = Compress(text);
            var decompressedReal = Decompress(compressedReal);
            CompressGZipStream(text);

            //====================================

            Console.ReadLine();
        }

并行方法

static void CompressParallel(string text) {
            Compress(text);
            CompressGZipStream(text);
            Console.WriteLine();
        }

观察者

public class Watcher
    {
        DateTime _start = DateTime.MinValue;
        DateTime _stop = DateTime.MinValue;

        TimeSpan Timer {
            get
            {
                return _stop.Subtract(_start);
            }
        }

        public void Start() { _start = DateTime.Now; }

        public void Stop() { _stop = DateTime.Now; }

        public double TotalMilliseconds() {
            if (_stop == DateTime.MinValue)
                return 0;
            return Timer.TotalMilliseconds;
        }
    }

生成随机文本

static string Generate(int length) {
            var rnd = new Random();
            string chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
            return new string(Enumerable.Repeat
            (chars, length).Select(x => x[rnd.Next(x.Length)]).ToArray());
        }

结论

GZipStream 在压缩方面速度极快,快了近 60%,但其结果并不令人信服。

SevenZip 在压缩时功能强大得多,但其速度并非世界一流。

如果需要异步压缩/解压缩数千条数据,使用 GZipStream 会很有用,因为无论硬件如何,其算法都不需要很多资源。

SevenZip 非常适合将大量数据发送到服务器,例如图像、文件、文档等的字节,甚至即时通讯工具。

© . All rights reserved.