展开 SQL 层次结构:二元方法






2.40/5 (5投票s)
本文讨论了一种从关系数据库检索分层数据的方法。
引言
是的,这又是存储在关系数据库中的层次结构的老生常谈了。网页邮件内容、公司组织架构、任务依赖关系等等。我在为一个网络门户设计一个迷你论坛组件时遇到了这个问题,并且在网上找不到解决方案。所以,这是我的方法。我使用了 SQL Server 2000 和 2005。
背景
假设你有一个表,其中存储了一些实体(消息、员工等)。每个实体实例都可以是许多其他实例的父级。一些实例没有子节点 - 它们是树形层次结构中的叶节点。一些实例没有父节点 - 它们是根节点。考虑一个简单的表
create table Messages
(
    [MessageID] [uniqueidentifier] primary key clustered NOT NULL ,
    [ParentID] [uniqueidentifier] NULL ,
    [ReceivedDate] [datetime] NOT NULL,
    [Subject] varchar(256) NULL,
    CONSTRAINT [FK_Messages_Messages] FOREIGN KEY ([ParentID]) 
    REFERENCES Messages ([MessageID])
)
是的,主键是一个 GUID,所以你不会想用它来进行排序。不,没有一个名为 "NestedLevel" 的字段来保存生成级别。是的,MessageID 是一个聚集主键,所以记录在数据库中以随机顺序存储。我们不要作弊。父子关系由一对 GUID 定义,所有排序都使用 datetime 字段执行。就是这样。Subject 字段只是一个装饰性的功能。
让我们用一些有意义的数据填充我们的表格
insert into Messages (MessageID, ParentID, ReceivedDate, Subject) 
values ('F24B7175-1905-4783-A8D6-0DA6F1759F2C', NULL, getdate(), 'Message 1')
insert into Messages (MessageID, ParentID, ReceivedDate, Subject) 
values ('E10B2FC6-E860-4BFF-8642-29A318CDB6CE',  
'F24B7175-1905-4783-A8D6-0DA6F1759F2C', dateadd(day, 1, getdate()),  
'Message 1-1')
insert into Messages (MessageID, ParentID, ReceivedDate, Subject) 
values ('E931AC59-4356-48AD-A587-9F0246532733',  
'F24B7175-1905-4783-A8D6-0DA6F1759F2C', dateadd(day, 2, getdate()),  
'Message 1-2')
insert into Messages (MessageID, ParentID, ReceivedDate, Subject)  
values ('FAEF6F5B-54AF-452F-A72E-3DBD21F643FD',  
'E931AC59-4356-48AD-A587-9F0246532733', dateadd(day, 2, getdate()),  
'Message 1-2-1')
insert into Messages (MessageID, ParentID, ReceivedDate, Subject)  
values ('5B1D05CE-7889-4BD9-8ABC-20A1269668F5',  
'FAEF6F5B-54AF-452F-A72E-3DBD21F643FD', dateadd(day, 2, getdate()),  
'Message 1-2-1-1')
insert into Messages (MessageID, ParentID, ReceivedDate, Subject)  
values ('A3E5F551-6531-4D12-8A44-7C45C81C181A',  
'FAEF6F5B-54AF-452F-A72E-3DBD21F643FD', dateadd(day, 3, getdate()),  
'Message 1-2-1-2')
insert into Messages (MessageID, ParentID, ReceivedDate, Subject)  
values ('298D6A02-5BB6-4EDD-8030-7E15CD16A021',  
'A3E5F551-6531-4D12-8A44-7C45C81C181A', dateadd(day, 3, getdate()),  
'Message 1-2-1-2-1')
insert into Messages (MessageID, ParentID, ReceivedDate, Subject)  
values ('CBCE69FF-19C1-47E7-842A-4944BA146BAA',  
'A3E5F551-6531-4D12-8A44-7C45C81C181A', dateadd(day, 4, getdate()),  
'Message 1-2-1-2-2')
insert into Messages (MessageID, ParentID, ReceivedDate, Subject)  
values ('3C521C3A-AA40-49D0-83FA-E5101913A0E3',  
'CBCE69FF-19C1-47E7-842A-4944BA146BAA', dateadd(day, 4, getdate()),  
'Message 1-2-1-2-2-1')
insert into Messages (MessageID, ParentID, ReceivedDate, Subject)  
values ('50D2CA2B-A484-482F-A41F-ADFF1A636BE3',  
'A3E5F551-6531-4D12-8A44-7C45C81C181A', dateadd(day, 5, getdate()),  
'Message 1-2-1-2-3')
insert into Messages (MessageID, ParentID, ReceivedDate, Subject)  
values ('71AD6FBD-EE92-473A-A038-C59CB7FF658A',  
'E931AC59-4356-48AD-A587-9F0246532733', dateadd(day, 3, getdate()),  
'Message 1-2-2')
insert into Messages (MessageID, ParentID, ReceivedDate, Subject)  
values ('C300611C-D8EF-419B-8736-97AAE62108D8',  
'F24B7175-1905-4783-A8D6-0DA6F1759F2C', dateadd(day, 3, getdate()),  
'Message 1-3')
insert into Messages (MessageID, ParentID, ReceivedDate, Subject)  
values ('A4E8E001-99F1-450F-A4D7-3E0A491A085E',  
'C300611C-D8EF-419B-8736-97AAE62108D8', dateadd(day, 3, getdate()),  
'Message 1-3-1')
insert into Messages (MessageID, ParentID, ReceivedDate, Subject)  
values ('CD7F8A67-B735-4590-A92C-D91F0341DD7F',  
'C300611C-D8EF-419B-8736-97AAE62108D8', dateadd(day, 4, getdate()), 
'Message 1-3-2')
insert into Messages (MessageID, ParentID, ReceivedDate, Subject)  
values ('31A05919-1BCB-403E-807A-56B50B776C4E', NULL, getdate(), 'Message 2')
insert into Messages (MessageID, ParentID, ReceivedDate, Subject)  
values ('A731F5DC-88F7-49E7-BCBB-F912C69BFC7C',  
'31A05919-1BCB-403E-807A-56B50B776C4E', dateadd(day, 1, getdate()),  
'Message 2-1')
        
我们正在寻找检索这些记录的方法,以便它们可以很容易地显示在例如网页邮件应用程序页面上
Id                                   Indent      Subject             
------------------------------------ ----------- -------------------
F24B7175-1905-4783-A8D6-0DA6F1759F2C 1           Message 1
E10B2FC6-E860-4BFF-8642-29A318CDB6CE 2           Message 1-1
E931AC59-4356-48AD-A587-9F0246532733 2           Message 1-2
FAEF6F5B-54AF-452F-A72E-3DBD21F643FD 3           Message 1-2-1
5B1D05CE-7889-4BD9-8ABC-20A1269668F5 4           Message 1-2-1-1
A3E5F551-6531-4D12-8A44-7C45C81C181A 4           Message 1-2-1-2
298D6A02-5BB6-4EDD-8030-7E15CD16A021 5           Message 1-2-1-2-1
CBCE69FF-19C1-47E7-842A-4944BA146BAA 5           Message 1-2-1-2-2
3C521C3A-AA40-49D0-83FA-E5101913A0E3 6           Message 1-2-1-2-2-1
50D2CA2B-A484-482F-A41F-ADFF1A636BE3 5           Message 1-2-1-2-3
71AD6FBD-EE92-473A-A038-C59CB7FF658A 3           Message 1-2-2
C300611C-D8EF-419B-8736-97AAE62108D8 2           Message 1-3
A4E8E001-99F1-450F-A4D7-3E0A491A085E 3           Message 1-3-1
CD7F8A67-B735-4590-A92C-D91F0341DD7F 3           Message 1-3-2
31A05919-1BCB-403E-807A-56B50B776C4E 1           Message 2
A731F5DC-88F7-49E7-BCBB-F912C69BFC7C 2           Message 2-1
        
你想要它快吗?
在网上搜索:到处都是使用游标、递归和堆栈表的示例。它们有效。但我想让它快速工作。我喜欢连接(joins)。SQL Server 擅长做连接,让我们使用它们。
我们可以编写一些 SQL 代码,将前几个级别的所有消息连接起来。
-- Retrieve the whole hierarchy from level 1 to level 4
-- Temp table @t: store UIDs of 3 parent 
declare @t table (
l1 uniqueidentifier, l2 uniqueidentifier, l3 uniqueidentifier,  
l4 uniqueidentifier,
id uniqueidentifier, depth int)
-- Populate @t: expand msg hierarchy for levels 1-4
insert into @t
select m1.MessageID, m2.MessageID, m3.MessageID, m4.MessageID, null, 1
from Messages as m1
left outer join Messages m2 on m1.MessageID=m2.ParentID
left outer join Messages m3 on m2.MessageID=m3.ParentID
left outer join Messages m4 on m3.MessageID=m4.ParentID
where m1.ParentID is NULL
-- Calculate node level for each node and get tree depth
declare @depth int
update @t set depth = depth + 1 where l2 is not null
update @t set depth = depth + 1 where l3 is not null
update @t set depth = depth + 1 where l4 is not null
select @depth = max(depth) from @t
-- Since we have made several joins, we have only leaf nodes of level 4 in  
-- @t. Add missing leaf nodes of level 1
insert into @t select distinct l1, NULL, NULL, NULL, NULL, 1 from @t 
where l2 is not NULL
-- Add missing leaf nodes of level 2
insert into @t select distinct l1, l2, NULL, NULL, NULL, 2 from @t
where l3 is not NULL
-- Add missing leaf nodes of level 3
insert into @t select distinct l1, l2, l3, NULL, NULL, 3 from @t 
where l4 is not NULL
-- Populate id field, get the rightmost msg id from @t
update @t set id=coalesce(l4, l3, l2, l1)
-- Final select: order items
select id, depth, m.Subject
from @t as t
left outer join Messages as m1 on m1.MessageID=t.l1
left outer join Messages as m2 on m2.MessageID=t.l2
left outer join Messages as m3 on m3.MessageID=t.l3
left outer join Messages as m4 on m4.MessageID=t.l4
left outer join Messages as m on m.MessageID=t.id
order by m1.ReceivedDate, l1, m2.ReceivedDate, l2, m3.ReceivedDate, l3, 
      m4.ReceivedDate, l4
        
它似乎工作正常。看看结果,从 1 到 4 的所有级别都以正确的顺序显示
id                                   depth       Subject         
------------------------------------ ----------- -----------
F24B7175-1905-4783-A8D6-0DA6F1759F2C 1           Message 1
E10B2FC6-E860-4BFF-8642-29A318CDB6CE 2           Message 1-1
E931AC59-4356-48AD-A587-9F0246532733 2           Message 1-2
FAEF6F5B-54AF-452F-A72E-3DBD21F643FD 3           Message 1-2-1
5B1D05CE-7889-4BD9-8ABC-20A1269668F5 4           Message 1-2-1-1
A3E5F551-6531-4D12-8A44-7C45C81C181A 4           Message 1-2-1-2
71AD6FBD-EE92-473A-A038-C59CB7FF658A 3           Message 1-2-2
C300611C-D8EF-419B-8736-97AAE62108D8 2           Message 1-3
A4E8E001-99F1-450F-A4D7-3E0A491A085E 3           Message 1-3-1
CD7F8A67-B735-4590-A92C-D91F0341DD7F 3           Message 1-3-2
31A05919-1BCB-403E-807A-56B50B776C4E 1           Message 2
A731F5DC-88F7-49E7-BCBB-F912C69BFC7C 2           Message 2-1        
你想要它处理大量数据吗?
主要问题:此代码可以处理的级别数是硬编码的。看来是时候从你的袖子里拿出王牌,编写一个简单的代码生成器,它构建 SQL 代码,进行十个、二十个、三十个连接,并在一次操作中将整个层次结构带给你。我就是这么做的。而且它奏效了。但是
- 仅适用于适量的数据和深度; 30 条消息深的论坛线程并不罕见,并且 20 个连接对于 SQL Server 来说已经很繁重了;
- 仅适用于有限的深度:使用 19 个连接的查询不适合 varchar(8000) 变量,无法执行。
我甚至不打算在这里发布这段代码,相信我:它有效,但它看起来真的很可怕。不使用代码生成器的第三个原因是……好吧,你可以称之为宗教信仰:我就是讨厌动态 SQL。每次我必须调用 exec(@sql) 时,我都会感觉我释放了一个小野兽,它最终会让程序员的生活变得痛苦。
你想要它工作吗?
是时候记住游标和递归了。我已经实现了一个递归函数,因为 SQL Server 允许表值函数,并且代码看起来很流畅。你的 SQL 方言可能会提供其他递归的机会,所以不要犹豫去尝试它们。这个想法很简单:一次性提取几个(四个)层次结构级别,分析,如果需要使用游标和递归向下钻取。
CREATE FUNCTION BrowseMessages
( @rootMessageID uniqueidentifier,
  @rootDepth int,
  @isDrillDown bit )
RETURNS @RtnValue table (Id uniqueidentifier, Depth int, 
        Subject varchar(256))
AS
begin
    declare @t table (
    l1 uniqueidentifier, l2 uniqueidentifier, l3 uniqueidentifier, 
    l4 uniqueidentifier,
    id uniqueidentifier, depth int)
    insert into @t
    select
    m1.MessageID, m2.MessageID, m3.MessageID, m4.MessageID, null, 1
    from Messages as m1
    left outer join Messages m2 on m1.MessageID=m2.ParentID
    left outer join Messages m3 on m2.MessageID=m3.ParentID
    left outer join Messages m4 on m3.MessageID=m4.ParentID
    -- Add your filtering and paging here
    where (@rootMessageID is NULL and m1.ParentID is NULL) or 
          (@rootMessageID is NOT NULL and m1.ParentID=@rootMessageID)
    
    -- Get tree depth
    declare @depth int
    update @t set depth = depth + 1 where l2 is not null
    update @t set depth = depth + 1 where l3 is not null
    update @t set depth = depth + 1 where l4 is not null
    select @depth = max(depth) from @t
    
    -- Add missing leaf nodes of levels 1,2,3
    insert into @t select distinct l1, NULL, NULL,  NULL, NULL, 1 from @t 
    where l2 is not NULL
    insert into @t select distinct l1, l2, NULL, NULL, NULL, 2 from @t 
    where l3 is not NULL
    insert into @t select distinct l1, l2, l3, NULL, NULL, 3 from @t 
    where l4 is not NULL
    
    -- Populate id field
    update @t set id=coalesce(l4, l3, l2, l1)
    
    -- Collect result nodes in @tt and:
    -- - add @rootDepth
    -- - order them
    declare @tt table (Id uniqueidentifier, Depth int, Subject varchar(256))
    insert into @tt
    select Id=id, Depth=depth+@rootDepth, m.Subject
    from @t as t
    left outer join Messages as m1 on m1.MessageID=t.l1
    left outer join Messages as m2 on m2.MessageID=t.l2
    left outer join Messages as m3 on m3.MessageID=t.l3
    left outer join Messages as m4 on m4.MessageID=t.l4
    left outer join Messages as m on m.MessageID=t.id
    order by m1.ReceivedDate, l1, m2.ReceivedDate, l2, m3.ReceivedDate, 
         l3, m4.ReceivedDate, l4
    -- Check depth: if it doesn't exceed 3, then we do not want to drill 
    -- down further
    declare @maxCurrentDepth int
    select @maxCurrentDepth=max(Depth)-@rootDepth from @tt
    
    -- Drill down or simply select the whole bunch
    if (@isDrillDown <> 1 or @maxCurrentDepth < 4)
    begin
        insert into @RtnValue select * from @tt
    end
    else
    begin
        declare crs cursor read_only fast_forward for select Id, Depth, 
                Subject from @tt
        open crs
        declare @nextId uniqueidentifier, @nextDepth int, 
                @nextSubject varchar(256)
        while 1=1
        begin
            fetch next from crs into @nextId, @nextDepth, @nextSubject
            if @@fetch_status <> 0 break
            insert into @RtnValue values (@nextId, @nextDepth, @nextSubject)
            if (@nextDepth-@rootDepth = 4)
            begin
                insert into @RtnValue select * from 
                                     BrowseMessages(@nextId, @nextDepth, 1)
            end
        end
        close crs
        deallocate crs
    end
    return
end
        
它工作了(它检索了第 5 级和第 6 级的那些节点)
select * from BrowseMessages(NULL, 0, 1)
Id                                   Depth       Subject            
------------------------------------ ----------- -------------------
F24B7175-1905-4783-A8D6-0DA6F1759F2C 1           Message 1
E10B2FC6-E860-4BFF-8642-29A318CDB6CE 2           Message 1-1
E931AC59-4356-48AD-A587-9F0246532733 2           Message 1-2
FAEF6F5B-54AF-452F-A72E-3DBD21F643FD 3           Message 1-2-1
5B1D05CE-7889-4BD9-8ABC-20A1269668F5 4           Message 1-2-1-1
A3E5F551-6531-4D12-8A44-7C45C81C181A 4           Message 1-2-1-2
298D6A02-5BB6-4EDD-8030-7E15CD16A021 5           Message 1-2-1-2-1
CBCE69FF-19C1-47E7-842A-4944BA146BAA 5           Message 1-2-1-2-2
3C521C3A-AA40-49D0-83FA-E5101913A0E3 6           Message 1-2-1-2-2-1
50D2CA2B-A484-482F-A41F-ADFF1A636BE3 5           Message 1-2-1-2-3
71AD6FBD-EE92-473A-A038-C59CB7FF658A 3           Message 1-2-2
C300611C-D8EF-419B-8736-97AAE62108D8 2           Message 1-3
A4E8E001-99F1-450F-A4D7-3E0A491A085E 3           Message 1-3-1
CD7F8A67-B735-4590-A92C-D91F0341DD7F 3           Message 1-3-2
31A05919-1BCB-403E-807A-56B50B776C4E 1           Message 2
A731F5DC-88F7-49E7-BCBB-F912C69BFC7C 2           Message 2-1
        
        
接下来呢?
根据你的具体需求调整此代码。例如,如果你有几个根节点和很深的层次结构,请考虑增加连接数。如果你有成千上万个只有一级子节点的根节点,请考虑仅连接前三个级别而不是四个级别。添加分页和过滤。使用你在层次结构表中的其他有用的字段。使用适当的索引。如果你预计会有非常深的树,并且 4x32=128 级不够(4 是连接数,SQL Server 2000 将嵌套调用或嵌套级别的数量限制为 32),请增加连接数或不要使用递归函数。
尽情享用!

