65.9K
CodeProject 正在变化。 阅读更多。
Home

展开 SQL 层次结构:二元方法

starIconstarIcon
emptyStarIcon
starIcon
emptyStarIconemptyStarIcon

2.40/5 (5投票s)

2006年10月18日

MIT

3分钟阅读

viewsIcon

24989

本文讨论了一种从关系数据库检索分层数据的方法。

引言

是的,这又是存储在关系数据库中的层次结构的老生常谈了。网页邮件内容、公司组织架构、任务依赖关系等等。我在为一个网络门户设计一个迷你论坛组件时遇到了这个问题,并且在网上找不到解决方案。所以,这是我的方法。我使用了 SQL Server 2000 和 2005。

背景

假设你有一个表,其中存储了一些实体(消息、员工等)。每个实体实例都可以是许多其他实例的父级。一些实例没有子节点 - 它们是树形层次结构中的叶节点。一些实例没有父节点 - 它们是根节点。考虑一个简单的表

create table Messages
(
    [MessageID] [uniqueidentifier] primary key clustered NOT NULL ,
    [ParentID] [uniqueidentifier] NULL ,
    [ReceivedDate] [datetime] NOT NULL,
    [Subject] varchar(256) NULL,
    CONSTRAINT [FK_Messages_Messages] FOREIGN KEY ([ParentID]) 
REFERENCES Messages ([MessageID]) )

是的,主键是一个 GUID,所以你不会想用它来进行排序。不,没有一个名为 "NestedLevel" 的字段来保存生成级别。是的,MessageID 是一个聚集主键,所以记录在数据库中以随机顺序存储。我们不要作弊。父子关系由一对 GUID 定义,所有排序都使用 datetime 字段执行。就是这样。Subject 字段只是一个装饰性的功能。

让我们用一些有意义的数据填充我们的表格

insert into Messages (MessageID, ParentID, ReceivedDate, Subject) 
values ('F24B7175-1905-4783-A8D6-0DA6F1759F2C', NULL, getdate(), 'Message 1') insert into Messages (MessageID, ParentID, ReceivedDate, Subject)
values ('E10B2FC6-E860-4BFF-8642-29A318CDB6CE',
'F24B7175-1905-4783-A8D6-0DA6F1759F2C', dateadd(day, 1, getdate()),
'Message 1-1') insert into Messages (MessageID, ParentID, ReceivedDate, Subject)
values ('E931AC59-4356-48AD-A587-9F0246532733',
'F24B7175-1905-4783-A8D6-0DA6F1759F2C', dateadd(day, 2, getdate()),
'Message 1-2') insert into Messages (MessageID, ParentID, ReceivedDate, Subject)
values ('FAEF6F5B-54AF-452F-A72E-3DBD21F643FD',
'E931AC59-4356-48AD-A587-9F0246532733', dateadd(day, 2, getdate()),
'Message 1-2-1') insert into Messages (MessageID, ParentID, ReceivedDate, Subject)
values ('5B1D05CE-7889-4BD9-8ABC-20A1269668F5',
'FAEF6F5B-54AF-452F-A72E-3DBD21F643FD', dateadd(day, 2, getdate()),
'Message 1-2-1-1') insert into Messages (MessageID, ParentID, ReceivedDate, Subject)
values ('A3E5F551-6531-4D12-8A44-7C45C81C181A',
'FAEF6F5B-54AF-452F-A72E-3DBD21F643FD', dateadd(day, 3, getdate()),
'Message 1-2-1-2') insert into Messages (MessageID, ParentID, ReceivedDate, Subject)
values ('298D6A02-5BB6-4EDD-8030-7E15CD16A021',
'A3E5F551-6531-4D12-8A44-7C45C81C181A', dateadd(day, 3, getdate()),
'Message 1-2-1-2-1') insert into Messages (MessageID, ParentID, ReceivedDate, Subject)
values ('CBCE69FF-19C1-47E7-842A-4944BA146BAA',
'A3E5F551-6531-4D12-8A44-7C45C81C181A', dateadd(day, 4, getdate()),
'Message 1-2-1-2-2') insert into Messages (MessageID, ParentID, ReceivedDate, Subject)
values ('3C521C3A-AA40-49D0-83FA-E5101913A0E3',
'CBCE69FF-19C1-47E7-842A-4944BA146BAA', dateadd(day, 4, getdate()),
'Message 1-2-1-2-2-1') insert into Messages (MessageID, ParentID, ReceivedDate, Subject)
values ('50D2CA2B-A484-482F-A41F-ADFF1A636BE3',
'A3E5F551-6531-4D12-8A44-7C45C81C181A', dateadd(day, 5, getdate()),
'Message 1-2-1-2-3') insert into Messages (MessageID, ParentID, ReceivedDate, Subject)
values ('71AD6FBD-EE92-473A-A038-C59CB7FF658A',
'E931AC59-4356-48AD-A587-9F0246532733', dateadd(day, 3, getdate()),
'Message 1-2-2') insert into Messages (MessageID, ParentID, ReceivedDate, Subject)
values ('C300611C-D8EF-419B-8736-97AAE62108D8',
'F24B7175-1905-4783-A8D6-0DA6F1759F2C', dateadd(day, 3, getdate()),
'Message 1-3') insert into Messages (MessageID, ParentID, ReceivedDate, Subject)
values ('A4E8E001-99F1-450F-A4D7-3E0A491A085E',
'C300611C-D8EF-419B-8736-97AAE62108D8', dateadd(day, 3, getdate()),
'Message 1-3-1') insert into Messages (MessageID, ParentID, ReceivedDate, Subject)
values ('CD7F8A67-B735-4590-A92C-D91F0341DD7F',
'C300611C-D8EF-419B-8736-97AAE62108D8', dateadd(day, 4, getdate()),
'Message 1-3-2') insert into Messages (MessageID, ParentID, ReceivedDate, Subject)
values ('31A05919-1BCB-403E-807A-56B50B776C4E', NULL, getdate(), 'Message 2') insert into Messages (MessageID, ParentID, ReceivedDate, Subject)
values ('A731F5DC-88F7-49E7-BCBB-F912C69BFC7C',
'31A05919-1BCB-403E-807A-56B50B776C4E', dateadd(day, 1, getdate()),
'Message 2-1')

我们正在寻找检索这些记录的方法,以便它们可以很容易地显示在例如网页邮件应用程序页面上

Id                                   Indent      Subject             
------------------------------------ ----------- -------------------
F24B7175-1905-4783-A8D6-0DA6F1759F2C 1           Message 1
E10B2FC6-E860-4BFF-8642-29A318CDB6CE 2           Message 1-1
E931AC59-4356-48AD-A587-9F0246532733 2           Message 1-2
FAEF6F5B-54AF-452F-A72E-3DBD21F643FD 3           Message 1-2-1
5B1D05CE-7889-4BD9-8ABC-20A1269668F5 4           Message 1-2-1-1
A3E5F551-6531-4D12-8A44-7C45C81C181A 4           Message 1-2-1-2
298D6A02-5BB6-4EDD-8030-7E15CD16A021 5           Message 1-2-1-2-1
CBCE69FF-19C1-47E7-842A-4944BA146BAA 5           Message 1-2-1-2-2
3C521C3A-AA40-49D0-83FA-E5101913A0E3 6           Message 1-2-1-2-2-1
50D2CA2B-A484-482F-A41F-ADFF1A636BE3 5           Message 1-2-1-2-3
71AD6FBD-EE92-473A-A038-C59CB7FF658A 3           Message 1-2-2
C300611C-D8EF-419B-8736-97AAE62108D8 2           Message 1-3
A4E8E001-99F1-450F-A4D7-3E0A491A085E 3           Message 1-3-1
CD7F8A67-B735-4590-A92C-D91F0341DD7F 3           Message 1-3-2
31A05919-1BCB-403E-807A-56B50B776C4E 1           Message 2
A731F5DC-88F7-49E7-BCBB-F912C69BFC7C 2           Message 2-1
        

你想要它快吗?

在网上搜索:到处都是使用游标、递归和堆栈表的示例。它们有效。但我想让它快速工作。我喜欢连接(joins)。SQL Server 擅长做连接,让我们使用它们。

我们可以编写一些 SQL 代码,将前几个级别的所有消息连接起来。

-- Retrieve the whole hierarchy from level 1 to level 4
-- Temp table @t: store UIDs of 3 parent 
declare @t table (
l1 uniqueidentifier, l2 uniqueidentifier, l3 uniqueidentifier,  
l4 uniqueidentifier, id uniqueidentifier, depth int) -- Populate @t: expand msg hierarchy for levels 1-4 insert into @t select m1.MessageID, m2.MessageID, m3.MessageID, m4.MessageID, null, 1 from Messages as m1 left outer join Messages m2 on m1.MessageID=m2.ParentID left outer join Messages m3 on m2.MessageID=m3.ParentID left outer join Messages m4 on m3.MessageID=m4.ParentID where m1.ParentID is NULL -- Calculate node level for each node and get tree depth declare @depth int update @t set depth = depth + 1 where l2 is not null update @t set depth = depth + 1 where l3 is not null update @t set depth = depth + 1 where l4 is not null select @depth = max(depth) from @t -- Since we have made several joins, we have only leaf nodes of level 4 in
-- @t. Add missing leaf nodes of level 1
insert into @t select distinct l1, NULL, NULL, NULL, NULL, 1 from @t
where l2 is not NULL -- Add missing leaf nodes of level 2 insert into @t select distinct l1, l2, NULL, NULL, NULL, 2 from @t
where l3 is not NULL -- Add missing leaf nodes of level 3 insert into @t select distinct l1, l2, l3, NULL, NULL, 3 from @t
where l4 is not NULL -- Populate id field, get the rightmost msg id from @t update @t set id=coalesce(l4, l3, l2, l1) -- Final select: order items select id, depth, m.Subject from @t as t left outer join Messages as m1 on m1.MessageID=t.l1 left outer join Messages as m2 on m2.MessageID=t.l2 left outer join Messages as m3 on m3.MessageID=t.l3 left outer join Messages as m4 on m4.MessageID=t.l4 left outer join Messages as m on m.MessageID=t.id order by m1.ReceivedDate, l1, m2.ReceivedDate, l2, m3.ReceivedDate, l3,
m4.ReceivedDate, l4

它似乎工作正常。看看结果,从 1 到 4 的所有级别都以正确的顺序显示

id                                   depth       Subject         
------------------------------------ ----------- -----------
F24B7175-1905-4783-A8D6-0DA6F1759F2C 1           Message 1
E10B2FC6-E860-4BFF-8642-29A318CDB6CE 2           Message 1-1
E931AC59-4356-48AD-A587-9F0246532733 2           Message 1-2
FAEF6F5B-54AF-452F-A72E-3DBD21F643FD 3           Message 1-2-1
5B1D05CE-7889-4BD9-8ABC-20A1269668F5 4           Message 1-2-1-1
A3E5F551-6531-4D12-8A44-7C45C81C181A 4           Message 1-2-1-2
71AD6FBD-EE92-473A-A038-C59CB7FF658A 3           Message 1-2-2
C300611C-D8EF-419B-8736-97AAE62108D8 2           Message 1-3
A4E8E001-99F1-450F-A4D7-3E0A491A085E 3           Message 1-3-1
CD7F8A67-B735-4590-A92C-D91F0341DD7F 3           Message 1-3-2
31A05919-1BCB-403E-807A-56B50B776C4E 1           Message 2
A731F5DC-88F7-49E7-BCBB-F912C69BFC7C 2           Message 2-1        

你想要它处理大量数据吗?

主要问题:此代码可以处理的级别数是硬编码的。看来是时候从你的袖子里拿出王牌,编写一个简单的代码生成器,它构建 SQL 代码,进行十个、二十个、三十个连接,并在一次操作中将整个层次结构带给你。我就是这么做的。而且它奏效了。但是

  • 仅适用于适量的数据和深度; 30 条消息深的论坛线程并不罕见,并且 20 个连接对于 SQL Server 来说已经很繁重了;
  • 仅适用于有限的深度:使用 19 个连接的查询不适合 varchar(8000) 变量,无法执行。

我甚至不打算在这里发布这段代码,相信我:它有效,但它看起来真的很可怕。不使用代码生成器的第三个原因是……好吧,你可以称之为宗教信仰:我就是讨厌动态 SQL。每次我必须调用 exec(@sql) 时,我都会感觉我释放了一个小野兽,它最终会让程序员的生活变得痛苦。

你想要它工作吗?

是时候记住游标和递归了。我已经实现了一个递归函数,因为 SQL Server 允许表值函数,并且代码看起来很流畅。你的 SQL 方言可能会提供其他递归的机会,所以不要犹豫去尝试它们。这个想法很简单:一次性提取几个(四个)层次结构级别,分析,如果需要使用游标和递归向下钻取。

CREATE FUNCTION BrowseMessages
( @rootMessageID uniqueidentifier,
  @rootDepth int,
  @isDrillDown bit )
RETURNS @RtnValue table (Id uniqueidentifier, Depth int, 
Subject varchar(256)) AS begin declare @t table ( l1 uniqueidentifier, l2 uniqueidentifier, l3 uniqueidentifier,
l4 uniqueidentifier, id uniqueidentifier, depth int) insert into @t select m1.MessageID, m2.MessageID, m3.MessageID, m4.MessageID, null, 1 from Messages as m1 left outer join Messages m2 on m1.MessageID=m2.ParentID left outer join Messages m3 on m2.MessageID=m3.ParentID left outer join Messages m4 on m3.MessageID=m4.ParentID -- Add your filtering and paging here where (@rootMessageID is NULL and m1.ParentID is NULL) or
(@rootMessageID is NOT NULL and m1.ParentID=@rootMessageID) -- Get tree depth declare @depth int update @t set depth = depth + 1 where l2 is not null update @t set depth = depth + 1 where l3 is not null update @t set depth = depth + 1 where l4 is not null select @depth = max(depth) from @t -- Add missing leaf nodes of levels 1,2,3 insert into @t select distinct l1, NULL, NULL, NULL, NULL, 1 from @t
where l2 is not NULL insert into @t select distinct l1, l2, NULL, NULL, NULL, 2 from @t
where l3 is not NULL insert into @t select distinct l1, l2, l3, NULL, NULL, 3 from @t
where l4 is not NULL -- Populate id field update @t set id=coalesce(l4, l3, l2, l1) -- Collect result nodes in @tt and: -- - add @rootDepth -- - order them declare @tt table (Id uniqueidentifier, Depth int, Subject varchar(256)) insert into @tt select Id=id, Depth=depth+@rootDepth, m.Subject from @t as t left outer join Messages as m1 on m1.MessageID=t.l1 left outer join Messages as m2 on m2.MessageID=t.l2 left outer join Messages as m3 on m3.MessageID=t.l3 left outer join Messages as m4 on m4.MessageID=t.l4 left outer join Messages as m on m.MessageID=t.id order by m1.ReceivedDate, l1, m2.ReceivedDate, l2, m3.ReceivedDate,
l3, m4.ReceivedDate, l4 -- Check depth: if it doesn't exceed 3, then we do not want to drill
-- down further
declare @maxCurrentDepth int select @maxCurrentDepth=max(Depth)-@rootDepth from @tt -- Drill down or simply select the whole bunch if (@isDrillDown <> 1 or @maxCurrentDepth < 4) begin insert into @RtnValue select * from @tt end else begin declare crs cursor read_only fast_forward for select Id, Depth,
Subject from @tt open crs declare @nextId uniqueidentifier, @nextDepth int,
@nextSubject varchar(256) while 1=1 begin fetch next from crs into @nextId, @nextDepth, @nextSubject if @@fetch_status <> 0 break insert into @RtnValue values (@nextId, @nextDepth, @nextSubject) if (@nextDepth-@rootDepth = 4) begin insert into @RtnValue select * from
BrowseMessages(@nextId, @nextDepth, 1) end end close crs deallocate crs end return end

它工作了(它检索了第 5 级和第 6 级的那些节点)

select * from BrowseMessages(NULL, 0, 1)



Id                                   Depth       Subject            
------------------------------------ ----------- -------------------
F24B7175-1905-4783-A8D6-0DA6F1759F2C 1           Message 1
E10B2FC6-E860-4BFF-8642-29A318CDB6CE 2           Message 1-1
E931AC59-4356-48AD-A587-9F0246532733 2           Message 1-2
FAEF6F5B-54AF-452F-A72E-3DBD21F643FD 3           Message 1-2-1
5B1D05CE-7889-4BD9-8ABC-20A1269668F5 4           Message 1-2-1-1
A3E5F551-6531-4D12-8A44-7C45C81C181A 4           Message 1-2-1-2
298D6A02-5BB6-4EDD-8030-7E15CD16A021 5           Message 1-2-1-2-1
CBCE69FF-19C1-47E7-842A-4944BA146BAA 5           Message 1-2-1-2-2
3C521C3A-AA40-49D0-83FA-E5101913A0E3 6           Message 1-2-1-2-2-1
50D2CA2B-A484-482F-A41F-ADFF1A636BE3 5           Message 1-2-1-2-3
71AD6FBD-EE92-473A-A038-C59CB7FF658A 3           Message 1-2-2
C300611C-D8EF-419B-8736-97AAE62108D8 2           Message 1-3
A4E8E001-99F1-450F-A4D7-3E0A491A085E 3           Message 1-3-1
CD7F8A67-B735-4590-A92C-D91F0341DD7F 3           Message 1-3-2
31A05919-1BCB-403E-807A-56B50B776C4E 1           Message 2
A731F5DC-88F7-49E7-BCBB-F912C69BFC7C 2           Message 2-1
        
        

接下来呢?

根据你的具体需求调整此代码。例如,如果你有几个根节点和很深的层次结构,请考虑增加连接数。如果你有成千上万个只有一级子节点的根节点,请考虑仅连接前三个级别而不是四个级别。添加分页和过滤。使用你在层次结构表中的其他有用的字段。使用适当的索引。如果你预计会有非常深的树,并且 4x32=128 级不够(4 是连接数,SQL Server 2000 将嵌套调用或嵌套级别的数量限制为 32),请增加连接数或不要使用递归函数。

尽情享用!

© . All rights reserved.