65.9K
CodeProject 正在变化。 阅读更多。
Home

MatchKit 库

starIconstarIconstarIconstarIconstarIcon

5.00/5 (16投票s)

2013 年 4 月 21 日

Ms-PL

2分钟阅读

viewsIcon

27736

MatchKit 是一个 .NET 库,提供一组类用于构建模式以匹配简单和复杂的字符串

引言

MatchKit 是一个 .NET 库,帮助匹配简单和复杂的字符串。 它提供

  • 一个灵活且可扩展的基础设施,用于自定义匹配过程并创建自定义模式
  • 一组内置模式,用于匹配常见的字符集
  • 一种通过类似反序列化的机制,使用内置和自定义模式匹配字符串的方法

.NET Framework 提供了正则表达式框架,但通常模式的构建非常复杂,在某些情况下,编写自定义代码来解析字符串更好。

下载次数

通过示例学习

学习使用 MatchKit 的最佳方法是探索文档中的示例库,如下面的示例选择所示。

查找数字

此示例使用内置的 DigitPattern 和 Find 扩展方法来搜索字符串中的单个数字。

string content = "0, 1, 2, 99";
IPattern pattern = new DigitPattern();
 
IEnumerable<Match> matches = content.Find(pattern);
 
// matches results: 
//    matches[0].MatchedString = "0" 
//    matches[1].MatchedString = "1" 
//    matches[2].MatchedString = "2" 
//    matches[3].MatchedString = "9" 
//    matches[4].MatchedString = "9"

匹配一个数字

此示例使用内置的 NumberPattern 来完全匹配一个数字。

string content = "+101.56";
IPattern pattern = new NumberPattern();
 
Match match = content.Match(pattern);
 
// match results: 
//    match.MatchedString = "+101.56" 
//    match.Value = 101.56 (decimal)

提取协议和端口

此示例构建一个复杂模式,作为最简单模式的组合,从 URL 中提取协议和端口号。

string url = "http://www.domain.com:8080/readme.html";

IPattern pattern = new SequencePattern(
    new WordPattern().CaptureAs("proto"),
    new StringPattern("://", false, false),
    new TillPattern(':', '/'),
    new SequencePattern(
        Symbols.Colon,
        new NumberPattern().CaptureAs("port")
    ).AsOptional(),
    Symbols.Slash
    );  

Match m = url.Match(pattern);

// m["proto"].Value = "http" 
// m["port"].Value = 8080

扫描 HTML HRef

此示例构建一个复杂模式,作为最简单模式的组合,从 HTML 字符串中提取 HRef。

public void RunSample()
{
    string inputString = "My favorite web sites include:</P>" +
                "<A HREF=\"http://msdn2.microsoft.com\">" +
                "MSDN Home Page</A></P>" +
                "<A HREF=\"http://www.microsoft.com\">" +
                "Microsoft Corporation Home Page</A></P>" +
                "<A HREF=\"http://blogs.msdn.com/bclteam\">" +
                ".NET Base Class Library blog</A></P>";
 
     var matches = DumpHRefs(inputString);
 
    // var capture0 = matches[0].GetCapture("url"); 
    // capture0.Value = "http://msdn2.microsoft.com" 
    // capture0.Location.Index = 42 
    // var capture1 = matches[1].GetCapture("url"); 
    // capture1.Value = "http://www.microsoft.com" 
    // capture1.Location.Index = 101 
    // var capture2 = matches[2].GetCapture("url"); 
    // capture2.Value = "http://blogs.msdn.com/bclteam" 
    // capture2.Location.Index = 175
}
 
private Match[] DumpHRefs(string inputString)
{
    var pattern = new SequencePattern(
        new StringPattern("href", true, false),
        Spaces.ZeroOrMore(),
        Symbols.Equal,
        Spaces.ZeroOrMore(),
        new ExclusivePattern(
            new LiteralPattern('"'),
            new LiteralPattern('\'')
            ).CaptureAs("url")
        );
    return inputString.Find(pattern).ToArray();
}
 

使用自定义模式匹配十六进制字符串

此示例创建一个自定义模式来匹配十六进制字符串。

public void RunSample()
{
    string content = "FFAA";
 
    MatchContext context = new MatchContext(content);
 
    IPattern pattern = new RepeaterPattern(new HexPattern());
 
    Match match = pattern.Match(context);
 
    // match.MatchedString = "FFAA" 
    // match.Value = new byte[] { 0xFF, 0xAA };
}
 
// custom pattern to match a single hex byte 00 > FF 
class HexPattern : BasePattern
{
    protected override Match OnMatch(MatchContext context)
    {
        var str = context.Current.ToString();
        str += context.NextCharacter();
 
        if (IsHexChar(str[0]) && IsHexChar(str[1]))
        {
            context.NextCharacter();
            return Success(context, Convert.ToByte(str, 16));
        }
        else
        {
            return Error(context);
        }
    }
 
    private bool IsHexChar(char ch)
    {
        return (ch >= '0' && ch <= '9')
            || (ch >= 'A' && ch <= 'F')
            || (ch >= 'a' && ch <= 'f');
    }
 
    public override string HelpString
    {
        get { return "Hex byte"; }
    }
}

取消异步匹配过程

此示例显示如何取消异步长时间匹配过程。

string content = "very long string";
IPattern pattern = new StringPattern("string");
CancellationFlag cancellation = new CancellationFlag();
 
IAsyncResult async = content.BeginFind(pattern
    , new MatchOptions { CancellationFlag = cancellation });
 
cancellation.Cancel();

try
{
    var matches = content.EndFind(async);
}
catch (MatchCanceledException)
{
    // some code
}
 

清理输入字符串

此示例显示如何从虚拟字符替换输入字符串。

string content = "a simple text. a bit -dirty";

IPattern pattern = new ExclusivePattern(new WhiteSpacePattern(), Symbols.Dot, Symbols.Minus);
 
string replaced = content.Replace(pattern, "");
 
// replaced = "asimpletextabitdirty"

映射一系列固定数字

此示例显示了如何在可匹配的自定义类上映射一系列数字。

public void NumberPatternPattern()
{
    string content = "101 -35.95 15";
 
    NumberPatternClass mapped = ObjectMapper.Map<NumberPatternClass>(content);
 
    // mapped results: 
    //    mapped.LongValue = 101 
    //    mapped.DecimalValue = -35.95 
    //    mapped.ByteValue = 15
}
 
[MatchableClass]
class NumberPatternClass
{
    [MatchableMember(0)]
    public long LongValue;
 
    [MatchableMember(1)]
    private const char _space = ' ';
 
    [MatchableMember(2)]
    public decimal DecimalValue;
 
    [MatchableMember(3)]
    private const char _space2 = ' ';
 
    [MatchableMember(4)]
    public byte ByteValue;
}

映射一系列可变数字

此示例显示了如何在可匹配的自定义类上映射一系列可变数字。

public void RepeaterPattern()
{
    string content = "10,20,45,102";
 
    RepeaterPatternClass mapped = ObjectMapper.Map<RepeaterPatternClass>(content);
 
    // mapped results: 
    //    mapped.Numbers[0].Number = 10 
    //    mapped.Numbers[1].Number = 20 
    //    mapped.Numbers[2].Number = 45 
    //    mapped.Numbers[3].Number = 102
}
 
[MatchableClass]
class RepeaterPatternClass
{
    [MatchableMember(0)]
    public RepeatableClass[] Numbers;
}
 
[MatchableClass]
class RepeatableClass
{
    [MatchableMember(0)]
    public int Number;
 
    [MatchableMember(1, IsSeparatorElement = true)]
    private const char _comma = ',';
}
 

替换可匹配的类

此示例显示了如何从匹配的自定义类中替换文本。

public void RunSample()
{
    string code = "public class Sample { }";
    IMatchBag bag = new MatchBag();
 
    ObjectMapperOptions options = new ObjectMapperOptions
    {
        EndsWithEos = true,
        IgnoreBlanks = IgnoreBlank.All,
        MatchBag = bag
    };
 
    CSharpClass mapped = ObjectMapper.Map<CSharpClass>(code, options);
 
    var replacer = new ObjectReplacer(code, bag);
    replacer.Replace(mapped, o => o.Name, "RenamedClass");
 
    var replaced = replacer.Apply();

    // replaced = "public class RenamedClass { }"
}
 
[MatchableClass]
class CSharpClass
{
    [MatchableMember(0)]
    private const string @public = "public";
 
    [MatchableMember(1)]
    private const string @class = "class";
 
    [MatchableMember(2)]
    public string Name;
 
    [MatchableMember(3)]
    private const char open = '{';
 
    [MatchableMember(4)]
    private const char close = '}';
}
 

一个复杂的例子 - 映射数学表达式

此示例创建一组可自定义的可匹配类,以映射数学表达式。

测试代码

ObjectMapperOptions settings = new ObjectMapperOptions
{
    IgnoreBlanks = IgnoreBlank.All,
    EndsWithEos = true
};
 
string expr = "(150 != (var / 32)) - _k2 + 98 * (90 / 123 + (_j5 * (15.5 - 0.32)))";
 
Expr mapped = ObjectMapper.Map<Expr>(expr, settings);

可匹配的类

[MatchableClass]
class Expr
{
    // an expression is an enclosed expression or a single value

    [MatchableMember(0)]
    [ObjectPattern(0, typeof(EnclosedExpr))]
    [ObjectPattern(1, typeof(Value))]
    public object Value;
 
    // followed by a list of operator/expression

    [MatchableMember(1)]
    public Item[] Values;
}
 
[MatchableClass]
class Item
{
    [MatchableMember(0)]
    [StringPattern(0, "+")]
    [StringPattern(1, "-")]
    [StringPattern(2, "*")]
    [StringPattern(3, "/")]
    [StringPattern(4, "==")]
    [StringPattern(5, "!=")]
    [StringPattern(6, "<=")]
    [StringPattern(7, "<")]
    [StringPattern(8, ">=")]
    [StringPattern(9, ">")]
    [StringPattern(10, "&&")]
    [StringPattern(11, "||")]
    public string Operator;
 
    [MatchableMember(1)]
    [ObjectPattern(0, typeof(EnclosedExpr))]
    [ObjectPattern(1, typeof(Value))]
    public object Value;
}
 

[MatchableClass]
class Value
{
    // a value is a number or an identifier

    [MatchableMember(0)]
    [NumberPattern(0)]
    [Pattern(1, typeof(IdentifierPattern))]
    public object Val;
}
 
[MatchableClass]
class EnclosedExpr
{
    [MatchableMember(0, IsPointOfNoReturn = true)]
    private const char _open = '(';
 
    [MatchableMember(1)]
    public Expr Value;
 
    [MatchableMember(2)]
    private const char _close = ')';
}
 
// custom pattern to match an identifier 
sealed class IdentifierPattern : BasePattern
{
    protected override Match OnMatch(MatchContext context)
    {
        if (!context.IsLetter && !context.IsUnderscore)
            return Error(context);
        context.NextTo(ch => char.IsLetter(ch) || char.IsDigit(ch) || ch == '_');
        return Success(context, MatchedString.Value);
    }
 
    public override string HelpString
    {
        get { return "Identifier"; }
    }
}

此选择中未显示的其他功能

MatchKit 提供了其他功能,例如

  • 使用跟踪器来转储匹配过程
  • 处理可匹配类中的成员分配以更改匹配的值或取消整个过程
  • 仅使用可匹配的自定义类,使用匹配包来检索特定实例的已映射成员的匹配信息(行、列、索引等)
  • 自定义 .NET 数据类型的默认模式

文档示例库中提供的其他复杂示例显示了如何匹配命令行、INI 文件、XML、JSON 字符串、SQL 查询、C#-style 源代码。

© . All rights reserved.