展开 SQL 层次结构:二元方法






2.40/5 (5投票s)
本文讨论了一种从关系数据库检索分层数据的方法。
引言
是的,这又是存储在关系数据库中的层次结构的老生常谈了。网页邮件内容、公司组织架构、任务依赖关系等等。我在为一个网络门户设计一个迷你论坛组件时遇到了这个问题,并且在网上找不到解决方案。所以,这是我的方法。我使用了 SQL Server 2000 和 2005。
背景
假设你有一个表,其中存储了一些实体(消息、员工等)。每个实体实例都可以是许多其他实例的父级。一些实例没有子节点 - 它们是树形层次结构中的叶节点。一些实例没有父节点 - 它们是根节点。考虑一个简单的表
create table Messages
(
[MessageID] [uniqueidentifier] primary key clustered NOT NULL ,
[ParentID] [uniqueidentifier] NULL ,
[ReceivedDate] [datetime] NOT NULL,
[Subject] varchar(256) NULL,
CONSTRAINT [FK_Messages_Messages] FOREIGN KEY ([ParentID])
REFERENCES Messages ([MessageID])
)
是的,主键是一个 GUID,所以你不会想用它来进行排序。不,没有一个名为 "NestedLevel" 的字段来保存生成级别。是的,MessageID 是一个聚集主键,所以记录在数据库中以随机顺序存储。我们不要作弊。父子关系由一对 GUID 定义,所有排序都使用 datetime 字段执行。就是这样。Subject 字段只是一个装饰性的功能。
让我们用一些有意义的数据填充我们的表格
insert into Messages (MessageID, ParentID, ReceivedDate, Subject)
values ('F24B7175-1905-4783-A8D6-0DA6F1759F2C', NULL, getdate(), 'Message 1')
insert into Messages (MessageID, ParentID, ReceivedDate, Subject)
values ('E10B2FC6-E860-4BFF-8642-29A318CDB6CE',
'F24B7175-1905-4783-A8D6-0DA6F1759F2C', dateadd(day, 1, getdate()),
'Message 1-1')
insert into Messages (MessageID, ParentID, ReceivedDate, Subject)
values ('E931AC59-4356-48AD-A587-9F0246532733',
'F24B7175-1905-4783-A8D6-0DA6F1759F2C', dateadd(day, 2, getdate()),
'Message 1-2')
insert into Messages (MessageID, ParentID, ReceivedDate, Subject)
values ('FAEF6F5B-54AF-452F-A72E-3DBD21F643FD',
'E931AC59-4356-48AD-A587-9F0246532733', dateadd(day, 2, getdate()),
'Message 1-2-1')
insert into Messages (MessageID, ParentID, ReceivedDate, Subject)
values ('5B1D05CE-7889-4BD9-8ABC-20A1269668F5',
'FAEF6F5B-54AF-452F-A72E-3DBD21F643FD', dateadd(day, 2, getdate()),
'Message 1-2-1-1')
insert into Messages (MessageID, ParentID, ReceivedDate, Subject)
values ('A3E5F551-6531-4D12-8A44-7C45C81C181A',
'FAEF6F5B-54AF-452F-A72E-3DBD21F643FD', dateadd(day, 3, getdate()),
'Message 1-2-1-2')
insert into Messages (MessageID, ParentID, ReceivedDate, Subject)
values ('298D6A02-5BB6-4EDD-8030-7E15CD16A021',
'A3E5F551-6531-4D12-8A44-7C45C81C181A', dateadd(day, 3, getdate()),
'Message 1-2-1-2-1')
insert into Messages (MessageID, ParentID, ReceivedDate, Subject)
values ('CBCE69FF-19C1-47E7-842A-4944BA146BAA',
'A3E5F551-6531-4D12-8A44-7C45C81C181A', dateadd(day, 4, getdate()),
'Message 1-2-1-2-2')
insert into Messages (MessageID, ParentID, ReceivedDate, Subject)
values ('3C521C3A-AA40-49D0-83FA-E5101913A0E3',
'CBCE69FF-19C1-47E7-842A-4944BA146BAA', dateadd(day, 4, getdate()),
'Message 1-2-1-2-2-1')
insert into Messages (MessageID, ParentID, ReceivedDate, Subject)
values ('50D2CA2B-A484-482F-A41F-ADFF1A636BE3',
'A3E5F551-6531-4D12-8A44-7C45C81C181A', dateadd(day, 5, getdate()),
'Message 1-2-1-2-3')
insert into Messages (MessageID, ParentID, ReceivedDate, Subject)
values ('71AD6FBD-EE92-473A-A038-C59CB7FF658A',
'E931AC59-4356-48AD-A587-9F0246532733', dateadd(day, 3, getdate()),
'Message 1-2-2')
insert into Messages (MessageID, ParentID, ReceivedDate, Subject)
values ('C300611C-D8EF-419B-8736-97AAE62108D8',
'F24B7175-1905-4783-A8D6-0DA6F1759F2C', dateadd(day, 3, getdate()),
'Message 1-3')
insert into Messages (MessageID, ParentID, ReceivedDate, Subject)
values ('A4E8E001-99F1-450F-A4D7-3E0A491A085E',
'C300611C-D8EF-419B-8736-97AAE62108D8', dateadd(day, 3, getdate()),
'Message 1-3-1')
insert into Messages (MessageID, ParentID, ReceivedDate, Subject)
values ('CD7F8A67-B735-4590-A92C-D91F0341DD7F',
'C300611C-D8EF-419B-8736-97AAE62108D8', dateadd(day, 4, getdate()),
'Message 1-3-2')
insert into Messages (MessageID, ParentID, ReceivedDate, Subject)
values ('31A05919-1BCB-403E-807A-56B50B776C4E', NULL, getdate(), 'Message 2')
insert into Messages (MessageID, ParentID, ReceivedDate, Subject)
values ('A731F5DC-88F7-49E7-BCBB-F912C69BFC7C',
'31A05919-1BCB-403E-807A-56B50B776C4E', dateadd(day, 1, getdate()),
'Message 2-1')
我们正在寻找检索这些记录的方法,以便它们可以很容易地显示在例如网页邮件应用程序页面上
Id Indent Subject
------------------------------------ ----------- -------------------
F24B7175-1905-4783-A8D6-0DA6F1759F2C 1 Message 1
E10B2FC6-E860-4BFF-8642-29A318CDB6CE 2 Message 1-1
E931AC59-4356-48AD-A587-9F0246532733 2 Message 1-2
FAEF6F5B-54AF-452F-A72E-3DBD21F643FD 3 Message 1-2-1
5B1D05CE-7889-4BD9-8ABC-20A1269668F5 4 Message 1-2-1-1
A3E5F551-6531-4D12-8A44-7C45C81C181A 4 Message 1-2-1-2
298D6A02-5BB6-4EDD-8030-7E15CD16A021 5 Message 1-2-1-2-1
CBCE69FF-19C1-47E7-842A-4944BA146BAA 5 Message 1-2-1-2-2
3C521C3A-AA40-49D0-83FA-E5101913A0E3 6 Message 1-2-1-2-2-1
50D2CA2B-A484-482F-A41F-ADFF1A636BE3 5 Message 1-2-1-2-3
71AD6FBD-EE92-473A-A038-C59CB7FF658A 3 Message 1-2-2
C300611C-D8EF-419B-8736-97AAE62108D8 2 Message 1-3
A4E8E001-99F1-450F-A4D7-3E0A491A085E 3 Message 1-3-1
CD7F8A67-B735-4590-A92C-D91F0341DD7F 3 Message 1-3-2
31A05919-1BCB-403E-807A-56B50B776C4E 1 Message 2
A731F5DC-88F7-49E7-BCBB-F912C69BFC7C 2 Message 2-1
你想要它快吗?
在网上搜索:到处都是使用游标、递归和堆栈表的示例。它们有效。但我想让它快速工作。我喜欢连接(joins)。SQL Server 擅长做连接,让我们使用它们。
我们可以编写一些 SQL 代码,将前几个级别的所有消息连接起来。
-- Retrieve the whole hierarchy from level 1 to level 4
-- Temp table @t: store UIDs of 3 parent
declare @t table (
l1 uniqueidentifier, l2 uniqueidentifier, l3 uniqueidentifier,
l4 uniqueidentifier,
id uniqueidentifier, depth int)
-- Populate @t: expand msg hierarchy for levels 1-4
insert into @t
select m1.MessageID, m2.MessageID, m3.MessageID, m4.MessageID, null, 1
from Messages as m1
left outer join Messages m2 on m1.MessageID=m2.ParentID
left outer join Messages m3 on m2.MessageID=m3.ParentID
left outer join Messages m4 on m3.MessageID=m4.ParentID
where m1.ParentID is NULL
-- Calculate node level for each node and get tree depth
declare @depth int
update @t set depth = depth + 1 where l2 is not null
update @t set depth = depth + 1 where l3 is not null
update @t set depth = depth + 1 where l4 is not null
select @depth = max(depth) from @t
-- Since we have made several joins, we have only leaf nodes of level 4 in
-- @t. Add missing leaf nodes of level 1
insert into @t select distinct l1, NULL, NULL, NULL, NULL, 1 from @t
where l2 is not NULL
-- Add missing leaf nodes of level 2
insert into @t select distinct l1, l2, NULL, NULL, NULL, 2 from @t
where l3 is not NULL
-- Add missing leaf nodes of level 3
insert into @t select distinct l1, l2, l3, NULL, NULL, 3 from @t
where l4 is not NULL
-- Populate id field, get the rightmost msg id from @t
update @t set id=coalesce(l4, l3, l2, l1)
-- Final select: order items
select id, depth, m.Subject
from @t as t
left outer join Messages as m1 on m1.MessageID=t.l1
left outer join Messages as m2 on m2.MessageID=t.l2
left outer join Messages as m3 on m3.MessageID=t.l3
left outer join Messages as m4 on m4.MessageID=t.l4
left outer join Messages as m on m.MessageID=t.id
order by m1.ReceivedDate, l1, m2.ReceivedDate, l2, m3.ReceivedDate, l3,
m4.ReceivedDate, l4
它似乎工作正常。看看结果,从 1 到 4 的所有级别都以正确的顺序显示
id depth Subject
------------------------------------ ----------- -----------
F24B7175-1905-4783-A8D6-0DA6F1759F2C 1 Message 1
E10B2FC6-E860-4BFF-8642-29A318CDB6CE 2 Message 1-1
E931AC59-4356-48AD-A587-9F0246532733 2 Message 1-2
FAEF6F5B-54AF-452F-A72E-3DBD21F643FD 3 Message 1-2-1
5B1D05CE-7889-4BD9-8ABC-20A1269668F5 4 Message 1-2-1-1
A3E5F551-6531-4D12-8A44-7C45C81C181A 4 Message 1-2-1-2
71AD6FBD-EE92-473A-A038-C59CB7FF658A 3 Message 1-2-2
C300611C-D8EF-419B-8736-97AAE62108D8 2 Message 1-3
A4E8E001-99F1-450F-A4D7-3E0A491A085E 3 Message 1-3-1
CD7F8A67-B735-4590-A92C-D91F0341DD7F 3 Message 1-3-2
31A05919-1BCB-403E-807A-56B50B776C4E 1 Message 2
A731F5DC-88F7-49E7-BCBB-F912C69BFC7C 2 Message 2-1
你想要它处理大量数据吗?
主要问题:此代码可以处理的级别数是硬编码的。看来是时候从你的袖子里拿出王牌,编写一个简单的代码生成器,它构建 SQL 代码,进行十个、二十个、三十个连接,并在一次操作中将整个层次结构带给你。我就是这么做的。而且它奏效了。但是
- 仅适用于适量的数据和深度; 30 条消息深的论坛线程并不罕见,并且 20 个连接对于 SQL Server 来说已经很繁重了;
- 仅适用于有限的深度:使用 19 个连接的查询不适合 varchar(8000) 变量,无法执行。
我甚至不打算在这里发布这段代码,相信我:它有效,但它看起来真的很可怕。不使用代码生成器的第三个原因是……好吧,你可以称之为宗教信仰:我就是讨厌动态 SQL。每次我必须调用 exec(@sql)
时,我都会感觉我释放了一个小野兽,它最终会让程序员的生活变得痛苦。
你想要它工作吗?
是时候记住游标和递归了。我已经实现了一个递归函数,因为 SQL Server 允许表值函数,并且代码看起来很流畅。你的 SQL 方言可能会提供其他递归的机会,所以不要犹豫去尝试它们。这个想法很简单:一次性提取几个(四个)层次结构级别,分析,如果需要使用游标和递归向下钻取。
CREATE FUNCTION BrowseMessages
( @rootMessageID uniqueidentifier,
@rootDepth int,
@isDrillDown bit )
RETURNS @RtnValue table (Id uniqueidentifier, Depth int,
Subject varchar(256))
AS
begin
declare @t table (
l1 uniqueidentifier, l2 uniqueidentifier, l3 uniqueidentifier,
l4 uniqueidentifier,
id uniqueidentifier, depth int)
insert into @t
select
m1.MessageID, m2.MessageID, m3.MessageID, m4.MessageID, null, 1
from Messages as m1
left outer join Messages m2 on m1.MessageID=m2.ParentID
left outer join Messages m3 on m2.MessageID=m3.ParentID
left outer join Messages m4 on m3.MessageID=m4.ParentID
-- Add your filtering and paging here
where (@rootMessageID is NULL and m1.ParentID is NULL) or
(@rootMessageID is NOT NULL and m1.ParentID=@rootMessageID)
-- Get tree depth
declare @depth int
update @t set depth = depth + 1 where l2 is not null
update @t set depth = depth + 1 where l3 is not null
update @t set depth = depth + 1 where l4 is not null
select @depth = max(depth) from @t
-- Add missing leaf nodes of levels 1,2,3
insert into @t select distinct l1, NULL, NULL, NULL, NULL, 1 from @t
where l2 is not NULL
insert into @t select distinct l1, l2, NULL, NULL, NULL, 2 from @t
where l3 is not NULL
insert into @t select distinct l1, l2, l3, NULL, NULL, 3 from @t
where l4 is not NULL
-- Populate id field
update @t set id=coalesce(l4, l3, l2, l1)
-- Collect result nodes in @tt and:
-- - add @rootDepth
-- - order them
declare @tt table (Id uniqueidentifier, Depth int, Subject varchar(256))
insert into @tt
select Id=id, Depth=depth+@rootDepth, m.Subject
from @t as t
left outer join Messages as m1 on m1.MessageID=t.l1
left outer join Messages as m2 on m2.MessageID=t.l2
left outer join Messages as m3 on m3.MessageID=t.l3
left outer join Messages as m4 on m4.MessageID=t.l4
left outer join Messages as m on m.MessageID=t.id
order by m1.ReceivedDate, l1, m2.ReceivedDate, l2, m3.ReceivedDate,
l3, m4.ReceivedDate, l4
-- Check depth: if it doesn't exceed 3, then we do not want to drill
-- down further
declare @maxCurrentDepth int
select @maxCurrentDepth=max(Depth)-@rootDepth from @tt
-- Drill down or simply select the whole bunch
if (@isDrillDown <> 1 or @maxCurrentDepth < 4)
begin
insert into @RtnValue select * from @tt
end
else
begin
declare crs cursor read_only fast_forward for select Id, Depth,
Subject from @tt
open crs
declare @nextId uniqueidentifier, @nextDepth int,
@nextSubject varchar(256)
while 1=1
begin
fetch next from crs into @nextId, @nextDepth, @nextSubject
if @@fetch_status <> 0 break
insert into @RtnValue values (@nextId, @nextDepth, @nextSubject)
if (@nextDepth-@rootDepth = 4)
begin
insert into @RtnValue select * from
BrowseMessages(@nextId, @nextDepth, 1)
end
end
close crs
deallocate crs
end
return
end
它工作了(它检索了第 5 级和第 6 级的那些节点)
select * from BrowseMessages(NULL, 0, 1) Id Depth Subject ------------------------------------ ----------- ------------------- F24B7175-1905-4783-A8D6-0DA6F1759F2C 1 Message 1 E10B2FC6-E860-4BFF-8642-29A318CDB6CE 2 Message 1-1 E931AC59-4356-48AD-A587-9F0246532733 2 Message 1-2 FAEF6F5B-54AF-452F-A72E-3DBD21F643FD 3 Message 1-2-1 5B1D05CE-7889-4BD9-8ABC-20A1269668F5 4 Message 1-2-1-1 A3E5F551-6531-4D12-8A44-7C45C81C181A 4 Message 1-2-1-2 298D6A02-5BB6-4EDD-8030-7E15CD16A021 5 Message 1-2-1-2-1 CBCE69FF-19C1-47E7-842A-4944BA146BAA 5 Message 1-2-1-2-2 3C521C3A-AA40-49D0-83FA-E5101913A0E3 6 Message 1-2-1-2-2-1 50D2CA2B-A484-482F-A41F-ADFF1A636BE3 5 Message 1-2-1-2-3 71AD6FBD-EE92-473A-A038-C59CB7FF658A 3 Message 1-2-2 C300611C-D8EF-419B-8736-97AAE62108D8 2 Message 1-3 A4E8E001-99F1-450F-A4D7-3E0A491A085E 3 Message 1-3-1 CD7F8A67-B735-4590-A92C-D91F0341DD7F 3 Message 1-3-2 31A05919-1BCB-403E-807A-56B50B776C4E 1 Message 2 A731F5DC-88F7-49E7-BCBB-F912C69BFC7C 2 Message 2-1
接下来呢?
根据你的具体需求调整此代码。例如,如果你有几个根节点和很深的层次结构,请考虑增加连接数。如果你有成千上万个只有一级子节点的根节点,请考虑仅连接前三个级别而不是四个级别。添加分页和过滤。使用你在层次结构表中的其他有用的字段。使用适当的索引。如果你预计会有非常深的树,并且 4x32=128 级不够(4 是连接数,SQL Server 2000 将嵌套调用或嵌套级别的数量限制为 32),请增加连接数或不要使用递归函数。
尽情享用!