patches nested_set to add tree rebuilding methods that are not using

save

The original nested_set implementation uses save on each node (issue,
project). With the callbacks defined on issues, this can lead to stale
object errors when an issues has been loaded by the rebuilding method
but changed by a callback.

The rebuild_silently! method can also take root nodes as their
parameter. With such provided the method will only fix their trees.

This is build upon by the selectively_rebuild_silently! method which
first looks for invalid nodes, determines their root node and then calls
rebuild_silently! with those root nodes. This should speed up rebuilding
on large trees.

Additionally, methods where added as a byproduct to retreive nodes
who's tree attributes are invalid.
pull/291/head
Jens Ulferts 11 years ago
parent 242601f6f4
commit b9590aff9a
  1. 72
      app/models/issue.rb
  2. 164
      lib/open_project/nested_set/rebuild_patch.rb
  3. 221
      lib/open_project/nested_set/root_id_handling.rb
  4. 104
      lib/open_project/nested_set/root_id_rebuilding.rb
  5. 204
      lib/open_project/nested_set/with_root_id_scope.rb
  6. 12
      spec/models/work_package_rebuild_nested_set.rb

@ -67,26 +67,6 @@ class Issue < WorkPackage
title << ')'
end
# find all issues
# * having set a parent_id where the root_id
# 1) points to self
# 2) points to an issue with a parent
# 3) points to an issue having a different root_id
# * having not set a parent_id but a root_id
# This unfortunately does not find the issue with the id 3 in the following example
# | id | parent_id | root_id |
# | 1 | | 1 |
# | 2 | 1 | 2 |
# | 3 | 2 | 2 |
# This would only be possible using recursive statements
#scope :invalid_root_ids, { :conditions => "(issues.parent_id IS NOT NULL AND " +
# "(issues.root_id = issues.id OR " +
# "(issues.root_id = parent_issues.id AND parent_issues.parent_id IS NOT NULL) OR " +
# "(issues.root_id != parent_issues.root_id))" +
# ") OR " +
# "(issues.parent_id IS NULL AND issues.root_id != issues.id)",
# :joins => "LEFT OUTER JOIN issues parent_issues ON parent_issues.id = issues.parent_id" }
before_create :default_assign
before_save :close_duplicates, :update_done_ratio_from_issue_status
before_destroy :remove_attachments
@ -566,58 +546,6 @@ class Issue < WorkPackage
projects
end
# method from acts_as_nested_set
def self.valid?
super && invalid_root_ids.empty?
end
def self.all_invalid
(super + invalid_root_ids).uniq
end
def self.rebuild_silently!(roots = nil)
invalid_root_ids_to_fix = if roots.is_a? Array
roots
elsif roots.present?
[roots]
else
[]
end
known_issue_parents = Hash.new do |hash, ancestor_id|
hash[ancestor_id] = Issue.find_by_id(ancestor_id)
end
fix_known_invalid_root_ids = lambda do
issues = invalid_root_ids
issues_roots = []
issues.each do |issue|
# At this point we can not trust nested set methods as the root_id is invalid.
# Therefore we trust the parent_id to fetch all ancestors until we find the root
ancestor = issue
while ancestor.parent_id do
ancestor = known_issue_parents[ancestor.parent_id]
end
issues_roots << ancestor
if invalid_root_ids_to_fix.empty? || invalid_root_ids_to_fix.map(&:id).include?(ancestor.id)
Issue.update_all({ :root_id => ancestor.id },
{ :id => issue.id })
end
end
fix_known_invalid_root_ids.call unless (issues_roots.map(&:id) & invalid_root_ids_to_fix.map(&:id)).empty?
end
fix_known_invalid_root_ids.call
super
end
private

@ -0,0 +1,164 @@
#-- encoding: UTF-8
#-- copyright
# OpenProject is a project management system.
#
# Copyright (C) 2012-2013 the OpenProject Team
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License version 3.
#
# See doc/COPYRIGHT.rdoc for more details.
#++
# When included, it adds the ability to rebuild nested sets, thus fixing
# corrupted trees.
#
# AwesomeNestedSet has this functionality as well but it fixes the sets with
# running the callbacks defined in the model. This has two drawbacks:
#
# * It is prone to fail when a validation fails that has nothing to do with
# nested sets.
# * It is slow.
#
# The methods included are purely sql based. The code in here is partly copied
# over from awesome_nested_set's non sql methods.
module OpenProject::NestedSet::RebuildPatch
def self.included(base)
base.class_eval do
scope :invalid_left_and_rights,
:joins => "LEFT OUTER JOIN #{quoted_table_name} AS parent ON " +
"#{quoted_table_name}.#{quoted_parent_column_name} = parent.#{primary_key}",
:conditions =>
"#{quoted_table_name}.#{quoted_left_column_name} IS NULL OR " +
"#{quoted_table_name}.#{quoted_right_column_name} IS NULL OR " +
"#{quoted_table_name}.#{quoted_left_column_name} >= " +
"#{quoted_table_name}.#{quoted_right_column_name} OR " +
"(#{quoted_table_name}.#{quoted_parent_column_name} IS NOT NULL AND " +
"(#{quoted_table_name}.#{quoted_left_column_name} <= parent.#{quoted_left_column_name} OR " +
"#{quoted_table_name}.#{quoted_right_column_name} >= parent.#{quoted_right_column_name}))"
scope :invalid_duplicates_in_columns, lambda {
scope_string = Array(acts_as_nested_set_options[:scope]).map do |c|
"#{quoted_table_name}.#{connection.quote_column_name(c)} = duplicates.#{connection.quote_column_name(c)}"
end.join(" AND ")
scope_string = scope_string.size > 0 ? scope_string + " AND " : ""
{ :joins => "LEFT OUTER JOIN #{quoted_table_name} AS duplicates ON " +
scope_string +
"#{quoted_table_name}.#{primary_key} != duplicates.#{primary_key} AND " +
"(#{quoted_table_name}.#{quoted_left_column_name} = duplicates.#{quoted_left_column_name} OR " +
"#{quoted_table_name}.#{quoted_right_column_name} = duplicates.#{quoted_right_column_name})",
:conditions => "duplicates.#{primary_key} IS NOT NULL" }
}
scope :invalid_roots, lambda {
scope_string = Array(acts_as_nested_set_options[:scope]).map do |c|
"#{quoted_table_name}.#{connection.quote_column_name(c)} = other.#{connection.quote_column_name(c)}"
end.join(" AND ")
scope_string = scope_string.size > 0 ? scope_string + " AND " : ""
{ :joins => "LEFT OUTER JOIN #{quoted_table_name} AS other ON " +
"#{quoted_table_name}.#{primary_key} != other.#{primary_key} AND " +
"#{quoted_table_name}.#{parent_column_name} IS NULL AND " +
"other.#{parent_column_name} IS NULL AND " +
scope_string +
"#{quoted_table_name}.#{quoted_left_column_name} <= other.#{quoted_right_column_name} AND " +
"#{quoted_table_name}.#{quoted_right_column_name} >= other.#{quoted_left_column_name}",
:conditions => "other.#{primary_key} IS NOT NULL",
:order => quoted_left_column_name }
}
extend(ClassMethods)
end
end
module ClassMethods
def selectively_rebuild_silently!
all_invalid
invalid_roots, invalid_descendants = all_invalid.partition{ |node| node.send(parent_column_name).nil? }
while invalid_descendants.size > 0 do
invalid_descendants_parents = invalid_descendants.map{ |node| find(node.send(parent_column_name)) }
new_invalid_roots, invalid_descendants = invalid_descendants_parents.partition{ |node| node.send(parent_column_name).nil? }
invalid_roots += new_invalid_roots
invalid_descendants.uniq!
end
rebuild_silently!(invalid_roots.uniq)
end
# Rebuilds the left & rights if unset or invalid. Also very useful for converting from acts_as_tree.
# Very similar to original nested_set implementation but uses update_all so that callbacks are not triggered
def rebuild_silently!(roots = nil)
# Don't rebuild a valid tree.
return true if valid?
scope = lambda{ |node| }
if acts_as_nested_set_options[:scope]
scope = lambda{ |node|
scope_column_names.inject(""){|str, column_name|
str << "AND #{connection.quote_column_name(column_name)} = #{connection.quote(node.send(column_name.to_sym))} "
}
}
end
# setup index
indices = Hash.new do |h, k|
h[k] = 0
end
set_left_and_rights = lambda do |node|
# set left
node[left_column_name] = indices[scope.call(node)] += 1
# find
children = all(:conditions => ["#{quoted_parent_column_name} = ? #{scope.call(node)}", node],
:order => [quoted_left_column_name,
quoted_right_column_name,
acts_as_nested_set_options[:order]].compact.join(", "))
children.each{ |n| set_left_and_rights.call(n) }
# set right
node[right_column_name] = indices[scope.call(node)] += 1
changes = node.changes.inject({}) do |hash, (attribute, values)|
hash[attribute] = node.send(attribute.to_s)
hash
end
update_all(changes, { :id => node.id }) unless changes.empty?
end
# Find root node(s)
# or take provided
root_nodes = if roots.is_a? Array
roots
elsif roots.present?
[roots]
else
all(:conditions => "#{quoted_parent_column_name} IS NULL",
:order => [quoted_left_column_name,
quoted_right_column_name,
acts_as_nested_set_options[:order]].compact.join(", "))
end
root_nodes.each do |root_node|
set_left_and_rights.call(root_node)
end
end
def all_invalid
invalid = invalid_roots + invalid_left_and_rights + invalid_duplicates_in_columns
invalid.uniq
end
end
end

@ -0,0 +1,221 @@
#-- encoding: UTF-8
#-- copyright
# OpenProject is a project management system.
#
# Copyright (C) 2012-2013 the OpenProject Team
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License version 3.
#
# See doc/COPYRIGHT.rdoc for more details.
#++
# When included it adds the nested_set behaviour scoped by the attribute
# 'root_id'
#
# AwesomeNestedSet offers beeing scoped but does not handle inserting and
# updating with the scoped beeing set right. This module adds this.
#
# When beeing scoped, we no longer have one big set over the the entire table
# but a forest of sets instead.
#
# The idea of this extension is to always place the node in the correct set
# before standard awesome_nested_set does something. This is necessary as all
# awesome_nested_set methods check for the scope. Operations crossing the
# border of a set are not supported.
#
# One goal of this implementation is to avoid using move_to of
# awesome_nested_set so that the callbacks defined for move_to (:before_move,
# :after_move and :around_move) can safely be used.
module OpenProject::NestedSet
module RootIdHandling
def self.included(base)
base.class_eval do
after_save :manage_root_id
acts_as_nested_set :scope => 'root_id', :dependent => :destroy
# callback from awesome_nested_set
# we call it by hand as we have to set the scope first
skip_callback :create, :before, :set_default_left_and_right
validate :validate_correct_parent
include InstanceMethods
end
end
module InstanceMethods
# The number of "items" this issue spans in it's nested set
#
# A parent issue would span all of it's children + 1 left + 1 right (3)
#
# | parent |
# || child ||
#
# A child would span only itself (1)
#
# |child|
def nested_set_span
rgt - lft
end
# Does this issue have children?
def children?
!leaf?
end
def validate_correct_parent
# Checks parent issue assignment
if parent
if !Setting.cross_project_issue_relations? && parent.project_id != self.project_id
errors.add :parent_id, :not_a_valid_parent
elsif !new_record?
# moving an existing issue
if parent.root_id != root_id
# we can always move to another tree
elsif move_possible?(parent)
# move accepted inside tree
else
errors.add :parent_id, :not_a_valid_parent
end
end
end
end
def parent_issue_id=(arg)
warn "[DEPRECATION] No longer use parent_issue_id= - Use parent_id= instead."
self.parent_id = arg
end
def parent_issue_id
warn "[DEPRECATION] No longer use parent_issue_id - Use parent_id instead."
parent_id
end
private
def manage_root_id
if root_id.nil? # new node
initial_root_id
elsif parent_id_changed?
update_root_id
end
end
# Places the node in the correct set upon creation.
#
# If a parent is provided on creation, the new node is placed in the set
# of the parent. If no parent is provided, the new node defines it's own
# set.
def initial_root_id
if parent_id
self.root_id = parent.root_id
else
self.root_id = id
end
set_default_left_and_right
persist_nested_set_attributes
end
# Places the node in a new set when necessary, so that it can be assigned
# to a different parent.
#
# This method does nothing if the new parent is within the same set. The
# method puts the node and all it's descendants in the set of the
# designated parent if the designated parent is within another set.
def update_root_id
new_root_id = parent_id.nil? ? id : parent.root_id
if new_root_id != root_id
# as the following actions depend on the
# node having current values, we reload them here
self.reload_nested_set
# and save them in order to be save between removing the node from
# the set and fixing the former set's attributes
old_root_id = root_id
old_rgt = rgt
moved_span = nested_set_span + 1
move_subtree_to_new_set(new_root_id)
correct_former_set_attributes(old_root_id, moved_span, old_rgt)
end
end
def persist_nested_set_attributes
self.class.update_all("root_id = #{root_id}, " +
"#{quoted_left_column_name} = #{lft}, " +
"#{quoted_right_column_name} = #{rgt}",
["id = ?", id])
end
# Moves the node and all it's descendants to the set with the provided
# root_id. It does not change the parent/child relationships.
#
# The subtree is placed to the right of the existing tree. All the
# subtree's nodes receive new lft/rgt values that are higher than the
# maximum rgt value of the set.
#
# The set than has two roots. As such this method should only be used
# internally and the results should only be persisted for a short time.
def move_subtree_to_new_set(new_root_id)
old_root_id = self.root_id
self.root_id = new_root_id
target_maxright = nested_set_scope.maximum(right_column_name) || 0
offset = target_maxright + 1 - lft
# update all the sutree's nodes. The lft and right values are incremented
# by the maximum of the set's right value.
self.class.update_all("root_id = #{root_id}, " +
"#{quoted_left_column_name} = lft + #{offset}, " +
"#{quoted_right_column_name} = rgt + #{offset}",
["root_id = ? AND " +
"#{quoted_left_column_name} >= ? AND " +
"#{quoted_right_column_name} <= ? ", old_root_id, lft, rgt])
self[left_column_name] = lft + offset
self[right_column_name] = rgt + offset
end
# Update all nodes left and right values in the former set having a right
# value larger than self's former right value.
#
# It calculates what will have to be subtracted from the left and right
# values of the nodes in question. Then it will always subtract this
# value from the right value of every node. It will only subtract the
# value from the left value if the left value is larger than the removed
# node's right value.
#
# Given a set:
# 1*6
# / \
# 2*3 4*5
# for wich the node with lft = 2 and rgt = 3 is self and was removed, the
# resulting set will be:
# 1*4
# |
# 2*3
def correct_former_set_attributes(old_root_id, removed_span, rgt_offset)
# As every node takes two integers we can multiply the amount of
# removed_nodes by 2 to calculate the value by which right and left
# will have to be reduced.
#removed_span = removed_nodes * 2
self.class.update_all("#{quoted_right_column_name} = #{quoted_right_column_name} - #{removed_span}, " +
"#{quoted_left_column_name} = CASE " +
"WHEN #{quoted_left_column_name} > #{rgt_offset} " +
"THEN #{quoted_left_column_name} - #{removed_span} " +
"ELSE #{quoted_left_column_name} END",
["root_id = ? AND #{quoted_right_column_name} > ?", old_root_id, rgt_offset])
end
end
end
end

@ -0,0 +1,104 @@
#-- encoding: UTF-8
#-- copyright
# OpenProject is a project management system.
#
# Copyright (C) 2012-2013 the OpenProject Team
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License version 3.
#
# See doc/COPYRIGHT.rdoc for more details.
#++
# This module, when included, adds the ability to rebuild nested sets that are
# scoped by a root_id attribute.
#
# For the details of rebuilding see the included RebuildPatch.
module OpenProject::NestedSet
module RootIdRebuilding
def self.included(base)
base.class_eval do
include RebuildPatch
# find all nodes
# * having set a parent_id where the root_id
# 1) points to self
# 2) points to a node with a parent
# 3) points to a node having a different root_id
# * having not set a parent_id but a root_id
# This unfortunately does not find the node with the id 3 in the following example
# | id | parent_id | root_id |
# | 1 | | 1 |
# | 2 | 1 | 2 |
# | 3 | 2 | 2 |
# This would only be possible using recursive statements
scope :invalid_root_ids, { :conditions => "(#{quoted_parent_column_full_name} IS NOT NULL AND " +
"(#{quoted_table_name}.root_id = #{quoted_table_name}.id OR " +
"(#{quoted_table_name}.root_id = parents.#{quoted_primary_key} AND parents.#{quoted_parent_column_name} IS NOT NULL) OR " +
"(#{quoted_table_name}.root_id != parents.root_id))" +
") OR " +
"(#{quoted_table_name}.parent_id IS NULL AND #{quoted_table_name}.root_id != #{quoted_table_name}.#{quoted_primary_key})",
:joins => "LEFT OUTER JOIN #{quoted_table_name} parents ON parents.#{quoted_primary_key} = #{quoted_parent_column_full_name}" }
extend ClassMethods
end
end
module ClassMethods
# method from acts_as_nested_set
def valid?
super && invalid_root_ids.empty?
end
def all_invalid
(super + invalid_root_ids).uniq
end
def rebuild_silently!(roots = nil)
invalid_root_ids_to_fix = if roots.is_a? Array
roots
elsif roots.present?
[roots]
else
[]
end
known_node_parents = Hash.new do |hash, ancestor_id|
hash[ancestor_id] = find_by_id(ancestor_id)
end
fix_known_invalid_root_ids = lambda do
invalid_nodes = invalid_root_ids
invalid_roots = []
invalid_nodes.each do |node|
# At this point we can not trust nested set methods as the root_id is invalid.
# Therefore we trust the parent_id to fetch all ancestors until we find the root
ancestor = node
while ancestor.parent_id do
ancestor = known_node_parents[ancestor.parent_id]
end
invalid_roots << ancestor
if invalid_root_ids_to_fix.empty? || invalid_root_ids_to_fix.map(&:id).include?(ancestor.id)
update_all({ :root_id => ancestor.id },
{ :id => node.id })
end
end
fix_known_invalid_root_ids.call unless (invalid_roots.map(&:id) & invalid_root_ids_to_fix.map(&:id)).empty?
end
fix_known_invalid_root_ids.call
super
end
end
end
end

@ -10,212 +10,12 @@
# See doc/COPYRIGHT.rdoc for more details.
#++
# When included it adds the nested_set behaviour scoped by the attribute
# 'root_id'
#
# AwesomeNestedSet offers beeing scoped but does not handle inserting and
# updating with the scoped beeing set right. This module adds this.
#
# When beeing scoped, we no longer have one big set over the the entire table
# but a forest of sets instead.
#
# The idea of this extension is to always place the node in the correct set
# before standard awesome_nested_set does something. This is necessary as all
# awesome_nested_set methods check for the scope. Operations crossing the
# border of a set are not supported.
#
# One goal of this implementation is to avoid using move_to of
# awesome_nested_set so that the callbacks defined for move_to (:before_move,
# :after_move and :around_move) can safely be used.
module OpenProject::NestedSet
module WithRootIdScope
def self.included(base)
base.class_eval do
after_save :manage_root_id
acts_as_nested_set :scope => 'root_id', :dependent => :destroy
# callback from awesome_nested_set
# we call it by hand as we have to set the scope first
skip_callback :create, :before, :set_default_left_and_right
validate :validate_correct_parent
include InstanceMethods
end
end
module InstanceMethods
# The number of "items" this issue spans in it's nested set
#
# A parent issue would span all of it's children + 1 left + 1 right (3)
#
# | parent |
# || child ||
#
# A child would span only itself (1)
#
# |child|
def nested_set_span
rgt - lft
end
# Does this issue have children?
def children?
!leaf?
end
def validate_correct_parent
# Checks parent issue assignment
if parent
if !Setting.cross_project_issue_relations? && parent.project_id != self.project_id
errors.add :parent_id, :not_a_valid_parent
elsif !new_record?
# moving an existing issue
if parent.root_id != root_id
# we can always move to another tree
elsif move_possible?(parent)
# move accepted inside tree
else
errors.add :parent_id, :not_a_valid_parent
end
end
end
end
def parent_issue_id=(arg)
warn "[DEPRECATION] No longer use parent_issue_id= - Use parent_id= instead."
self.parent_id = arg
end
def parent_issue_id
warn "[DEPRECATION] No longer use parent_issue_id - Use parent_id instead."
parent_id
end
private
def manage_root_id
if root_id.nil? # new node
initial_root_id
elsif parent_id_changed?
update_root_id
end
end
# Places the node in the correct set upon creation.
#
# If a parent is provided on creation, the new node is placed in the set
# of the parent. If no parent is provided, the new node defines it's own
# set.
def initial_root_id
if parent_id
self.root_id = parent.root_id
else
self.root_id = id
end
set_default_left_and_right
persist_nested_set_attributes
end
# Places the node in a new set when necessary, so that it can be assigned
# to a different parent.
#
# This method does nothing if the new parent is within the same set. The
# method puts the node and all it's descendants in the set of the
# designated parent if the designated parent is within another set.
def update_root_id
new_root_id = parent_id.nil? ? id : parent.root_id
if new_root_id != root_id
# as the following actions depend on the
# node having current values, we reload them here
self.reload_nested_set
# and save them in order to be save between removing the node from
# the set and fixing the former set's attributes
old_root_id = root_id
old_rgt = rgt
moved_span = nested_set_span + 1
move_subtree_to_new_set(new_root_id)
correct_former_set_attributes(old_root_id, moved_span, old_rgt)
end
end
def persist_nested_set_attributes
self.class.update_all("root_id = #{root_id}, " +
"#{quoted_left_column_name} = #{lft}, " +
"#{quoted_right_column_name} = #{rgt}",
["id = ?", id])
end
# Moves the node and all it's descendants to the set with the provided
# root_id. It does not change the parent/child relationships.
#
# The subtree is placed to the right of the existing tree. All the
# subtree's nodes receive new lft/rgt values that are higher than the
# maximum rgt value of the set.
#
# The set than has two roots. As such this method should only be used
# internally and the results should only be persisted for a short time.
def move_subtree_to_new_set(new_root_id)
old_root_id = self.root_id
self.root_id = new_root_id
target_maxright = nested_set_scope.maximum(right_column_name) || 0
offset = target_maxright + 1 - lft
# update all the sutree's nodes. The lft and right values are incremented
# by the maximum of the set's right value.
self.class.update_all("root_id = #{root_id}, " +
"#{quoted_left_column_name} = lft + #{offset}, " +
"#{quoted_right_column_name} = rgt + #{offset}",
["root_id = ? AND " +
"#{quoted_left_column_name} >= ? AND " +
"#{quoted_right_column_name} <= ? ", old_root_id, lft, rgt])
self[left_column_name] = lft + offset
self[right_column_name] = rgt + offset
end
# Update all nodes left and right values in the former set having a right
# value larger than self's former right value.
#
# It calculates what will have to be subtracted from the left and right
# values of the nodes in question. Then it will always subtract this
# value from the right value of every node. It will only subtract the
# value from the left value if the left value is larger than the removed
# node's right value.
#
# Given a set:
# 1*6
# / \
# 2*3 4*5
# for wich the node with lft = 2 and rgt = 3 is self and was removed, the
# resulting set will be:
# 1*4
# |
# 2*3
def correct_former_set_attributes(old_root_id, removed_span, rgt_offset)
# As every node takes two integers we can multiply the amount of
# removed_nodes by 2 to calculate the value by which right and left
# will have to be reduced.
#removed_span = removed_nodes * 2
self.class.update_all("#{quoted_right_column_name} = #{quoted_right_column_name} - #{removed_span}, " +
"#{quoted_left_column_name} = CASE " +
"WHEN #{quoted_left_column_name} > #{rgt_offset} " +
"THEN #{quoted_left_column_name} - #{removed_span} " +
"ELSE #{quoted_left_column_name} END",
["root_id = ? AND #{quoted_right_column_name} > ?", old_root_id, rgt_offset])
include RootIdHandling
include RootIdRebuilding
end
end
end

@ -1,3 +1,15 @@
#-- encoding: UTF-8
#-- copyright
# OpenProject is a project management system.
#
# Copyright (C) 2012-2013 the OpenProject Team
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License version 3.
#
# See doc/COPYRIGHT.rdoc for more details.
#++
require File.dirname(__FILE__) + '/../spec_helper'
describe WorkPackage, "rebuilding nested set" do

Loading…
Cancel
Save