Skip to content
Snippets Groups Projects
Unverified Commit 5c9abdef authored by Eugen Rochko's avatar Eugen Rochko Committed by GitHub
Browse files

Add retention policy for cached content and media (#19232)

parent 3e0999cd
No related branches found
No related tags found
No related merge requests found
Showing
with 329 additions and 93 deletions
...@@ -7,9 +7,7 @@ class RedisConfiguration ...@@ -7,9 +7,7 @@ class RedisConfiguration
@pool = ConnectionPool.new(size: new_pool_size) { new.connection } @pool = ConnectionPool.new(size: new_pool_size) { new.connection }
end end
def with delegate :with, to: :pool
pool.with { |redis| yield redis }
end
def pool def pool
@pool ||= establish_pool(pool_size) @pool ||= establish_pool(pool_size)
...@@ -17,7 +15,7 @@ class RedisConfiguration ...@@ -17,7 +15,7 @@ class RedisConfiguration
def pool_size def pool_size
if Sidekiq.server? if Sidekiq.server?
Sidekiq.options[:concurrency] Sidekiq[:concurrency]
else else
ENV['MAX_THREADS'] || 5 ENV['MAX_THREADS'] || 5
end end
......
# frozen_string_literal: true
module Vacuum; end
# frozen_string_literal: true # frozen_string_literal: true
class Scheduler::DoorkeeperCleanupScheduler class Vacuum::AccessTokensVacuum
include Sidekiq::Worker def perform
vacuum_revoked_access_tokens!
vacuum_revoked_access_grants!
end
sidekiq_options retry: 0 private
def perform def vacuum_revoked_access_tokens!
Doorkeeper::AccessToken.where('revoked_at IS NOT NULL').where('revoked_at < NOW()').delete_all Doorkeeper::AccessToken.where('revoked_at IS NOT NULL').where('revoked_at < NOW()').delete_all
end
def vacuum_revoked_access_grants!
Doorkeeper::AccessGrant.where('revoked_at IS NOT NULL').where('revoked_at < NOW()').delete_all Doorkeeper::AccessGrant.where('revoked_at IS NOT NULL').where('revoked_at < NOW()').delete_all
SystemKey.expired.delete_all
end end
end end
# frozen_string_literal: true
class Vacuum::BackupsVacuum
def initialize(retention_period)
@retention_period = retention_period
end
def perform
vacuum_expired_backups! if retention_period?
end
private
def vacuum_expired_backups!
backups_past_retention_period.in_batches.destroy_all
end
def backups_past_retention_period
Backup.unscoped.where(Backup.arel_table[:created_at].lt(@retention_period.ago))
end
def retention_period?
@retention_period.present?
end
end
# frozen_string_literal: true
class Vacuum::FeedsVacuum
def perform
vacuum_inactive_home_feeds!
vacuum_inactive_list_feeds!
end
private
def vacuum_inactive_home_feeds!
inactive_users.select(:id, :account_id).find_in_batches do |users|
feed_manager.clean_feeds!(:home, users.map(&:account_id))
end
end
def vacuum_inactive_list_feeds!
inactive_users_lists.select(:id).find_in_batches do |lists|
feed_manager.clean_feeds!(:list, lists.map(&:id))
end
end
def inactive_users
User.confirmed.inactive
end
def inactive_users_lists
List.where(account_id: inactive_users.select(:account_id))
end
def feed_manager
FeedManager.instance
end
end
# frozen_string_literal: true
class Vacuum::MediaAttachmentsVacuum
TTL = 1.day.freeze
def initialize(retention_period)
@retention_period = retention_period
end
def perform
vacuum_cached_files! if retention_period?
vacuum_orphaned_records!
end
private
def vacuum_cached_files!
media_attachments_past_retention_period.find_each do |media_attachment|
media_attachment.file.destroy
media_attachment.thumbnail.destroy
media_attachment.save
end
end
def vacuum_orphaned_records!
orphaned_media_attachments.in_batches.destroy_all
end
def media_attachments_past_retention_period
MediaAttachment.unscoped.remote.cached.where(MediaAttachment.arel_table[:created_at].lt(@retention_period.ago)).where(MediaAttachment.arel_table[:updated_at].lt(@retention_period.ago))
end
def orphaned_media_attachments
MediaAttachment.unscoped.unattached.where(MediaAttachment.arel_table[:created_at].lt(TTL.ago))
end
def retention_period?
@retention_period.present?
end
end
# frozen_string_literal: true
class Vacuum::PreviewCardsVacuum
TTL = 1.day.freeze
def initialize(retention_period)
@retention_period = retention_period
end
def perform
vacuum_cached_images! if retention_period?
vacuum_orphaned_records!
end
private
def vacuum_cached_images!
preview_cards_past_retention_period.find_each do |preview_card|
preview_card.image.destroy
preview_card.save
end
end
def vacuum_orphaned_records!
orphaned_preview_cards.in_batches.destroy_all
end
def preview_cards_past_retention_period
PreviewCard.cached.where(PreviewCard.arel_table[:updated_at].lt(@retention_period.ago))
end
def orphaned_preview_cards
PreviewCard.where('NOT EXISTS (SELECT 1 FROM preview_cards_statuses WHERE preview_cards_statuses.preview_card_id = preview_cards.id)').where(PreviewCard.arel_table[:created_at].lt(TTL.ago))
end
def retention_period?
@retention_period.present?
end
end
# frozen_string_literal: true
class Vacuum::StatusesVacuum
include Redisable
def initialize(retention_period)
@retention_period = retention_period
end
def perform
vacuum_statuses! if retention_period?
end
private
def vacuum_statuses!
statuses_scope.find_in_batches do |statuses|
# Side-effects not covered by foreign keys, such
# as the search index, must be handled first.
remove_from_account_conversations(statuses)
remove_from_search_index(statuses)
# Foreign keys take care of most associated records
# for us. Media attachments will be orphaned.
Status.where(id: statuses.map(&:id)).delete_all
end
end
def statuses_scope
Status.unscoped.kept.where(account: Account.remote).where(Status.arel_table[:id].lt(retention_period_as_id)).select(:id, :visibility)
end
def retention_period_as_id
Mastodon::Snowflake.id_at(@retention_period.ago, with_random: false)
end
def analyze_statuses!
ActiveRecord::Base.connection.execute('ANALYZE statuses')
end
def remove_from_account_conversations(statuses)
Status.where(id: statuses.select(&:direct_visibility?).map(&:id)).includes(:account, mentions: :account).each(&:unlink_from_conversations)
end
def remove_from_search_index(statuses)
with_redis { |redis| redis.sadd('chewy:queue:StatusesIndex', statuses.map(&:id)) } if Chewy.enabled?
end
def retention_period?
@retention_period.present?
end
end
# frozen_string_literal: true
class Vacuum::SystemKeysVacuum
def perform
vacuum_expired_system_keys!
end
private
def vacuum_expired_system_keys!
SystemKey.expired.delete_all
end
end
# frozen_string_literal: true
class ContentRetentionPolicy
def self.current
new
end
def media_cache_retention_period
retention_period Setting.media_cache_retention_period
end
def content_cache_retention_period
retention_period Setting.content_cache_retention_period
end
def backups_retention_period
retention_period Setting.backups_retention_period
end
private
def retention_period(value)
value.days if value.is_a?(Integer) && value.positive?
end
end
...@@ -32,6 +32,9 @@ class Form::AdminSettings ...@@ -32,6 +32,9 @@ class Form::AdminSettings
show_domain_blocks_rationale show_domain_blocks_rationale
noindex noindex
require_invite_text require_invite_text
media_cache_retention_period
content_cache_retention_period
backups_retention_period
).freeze ).freeze
BOOLEAN_KEYS = %i( BOOLEAN_KEYS = %i(
...@@ -64,6 +67,7 @@ class Form::AdminSettings ...@@ -64,6 +67,7 @@ class Form::AdminSettings
validates :bootstrap_timeline_accounts, existing_username: { multiple: true } validates :bootstrap_timeline_accounts, existing_username: { multiple: true }
validates :show_domain_blocks, inclusion: { in: %w(disabled users all) } validates :show_domain_blocks, inclusion: { in: %w(disabled users all) }
validates :show_domain_blocks_rationale, inclusion: { in: %w(disabled users all) } validates :show_domain_blocks_rationale, inclusion: { in: %w(disabled users all) }
validates :media_cache_retention_period, :content_cache_retention_period, :backups_retention_period, numericality: { only_integer: true }
def initialize(_attributes = {}) def initialize(_attributes = {})
super super
......
...@@ -45,7 +45,6 @@ ...@@ -45,7 +45,6 @@
.fields-group .fields-group
= f.input :require_invite_text, as: :boolean, wrapper: :with_label, label: t('admin.settings.registrations.require_invite_text.title'), hint: t('admin.settings.registrations.require_invite_text.desc_html'), disabled: !approved_registrations? = f.input :require_invite_text, as: :boolean, wrapper: :with_label, label: t('admin.settings.registrations.require_invite_text.title'), hint: t('admin.settings.registrations.require_invite_text.desc_html'), disabled: !approved_registrations?
.fields-group
%hr.spacer/ %hr.spacer/
...@@ -100,5 +99,12 @@ ...@@ -100,5 +99,12 @@
= f.input :site_terms, wrapper: :with_block_label, as: :text, label: t('admin.settings.site_terms.title'), hint: t('admin.settings.site_terms.desc_html'), input_html: { rows: 8 } = f.input :site_terms, wrapper: :with_block_label, as: :text, label: t('admin.settings.site_terms.title'), hint: t('admin.settings.site_terms.desc_html'), input_html: { rows: 8 }
= f.input :custom_css, wrapper: :with_block_label, as: :text, input_html: { rows: 8 }, label: t('admin.settings.custom_css.title'), hint: t('admin.settings.custom_css.desc_html') = f.input :custom_css, wrapper: :with_block_label, as: :text, input_html: { rows: 8 }, label: t('admin.settings.custom_css.title'), hint: t('admin.settings.custom_css.desc_html')
%hr.spacer/
.fields-group
= f.input :media_cache_retention_period, wrapper: :with_block_label, input_html: { pattern: '[0-9]+' }
= f.input :content_cache_retention_period, wrapper: :with_block_label, input_html: { pattern: '[0-9]+' }
= f.input :backups_retention_period, wrapper: :with_block_label, input_html: { pattern: '[0-9]+' }
.actions .actions
= f.button :button, t('generic.save_changes'), type: :submit = f.button :button, t('generic.save_changes'), type: :submit
# frozen_string_literal: true
class Scheduler::BackupCleanupScheduler
include Sidekiq::Worker
sidekiq_options retry: 0
def perform
old_backups.reorder(nil).find_each(&:destroy!)
end
private
def old_backups
Backup.where('created_at < ?', 7.days.ago)
end
end
# frozen_string_literal: true
class Scheduler::FeedCleanupScheduler
include Sidekiq::Worker
include Redisable
sidekiq_options retry: 0
def perform
clean_home_feeds!
clean_list_feeds!
end
private
def clean_home_feeds!
feed_manager.clean_feeds!(:home, inactive_account_ids)
end
def clean_list_feeds!
feed_manager.clean_feeds!(:list, inactive_list_ids)
end
def inactive_account_ids
@inactive_account_ids ||= User.confirmed.inactive.pluck(:account_id)
end
def inactive_list_ids
List.where(account_id: inactive_account_ids).pluck(:id)
end
def feed_manager
FeedManager.instance
end
end
# frozen_string_literal: true
class Scheduler::MediaCleanupScheduler
include Sidekiq::Worker
sidekiq_options retry: 0
def perform
unattached_media.find_each(&:destroy)
end
private
def unattached_media
MediaAttachment.reorder(nil).unattached.where('created_at < ?', 1.day.ago)
end
end
# frozen_string_literal: true
class Scheduler::VacuumScheduler
include Sidekiq::Worker
sidekiq_options retry: 0
def perform
vacuum_operations.each do |operation|
operation.perform
rescue => e
Rails.logger.error("Error while running #{operation.class.name}: #{e}")
end
end
private
def vacuum_operations
[
statuses_vacuum,
media_attachments_vacuum,
preview_cards_vacuum,
backups_vacuum,
access_tokens_vacuum,
feeds_vacuum,
]
end
def statuses_vacuum
Vacuum::StatusesVacuum.new(content_retention_policy.content_cache_retention_period)
end
def media_attachments_vacuum
Vacuum::MediaAttachmentsVacuum.new(content_retention_policy.media_cache_retention_period)
end
def preview_cards_vacuum
Vacuum::PreviewCardsVacuum.new(content_retention_policy.media_cache_retention_period)
end
def backups_vacuum
Vacuum::BackupsVacuum.new(content_retention_policy.backups_retention_period)
end
def access_tokens_vacuum
Vacuum::AccessTokensVacuum.new
end
def feeds_vacuum
Vacuum::FeedsVacuum.new
end
def content_retention_policy
ContentRetentionPolicy.current
end
end
...@@ -73,6 +73,10 @@ en: ...@@ -73,6 +73,10 @@ en:
actions: actions:
hide: Completely hide the filtered content, behaving as if it did not exist hide: Completely hide the filtered content, behaving as if it did not exist
warn: Hide the filtered content behind a warning mentioning the filter's title warn: Hide the filtered content behind a warning mentioning the filter's title
form_admin_settings:
backups_retention_period: Keep generated user archives for the specified number of days.
content_cache_retention_period: Posts from other servers will be deleted after the specified number of days when set to a positive value. This may be irreversible.
media_cache_retention_period: Downloaded media files will be deleted after the specified number of days when set to a positive value, and re-downloaded on demand.
form_challenge: form_challenge:
current_password: You are entering a secure area current_password: You are entering a secure area
imports: imports:
...@@ -207,6 +211,10 @@ en: ...@@ -207,6 +211,10 @@ en:
actions: actions:
hide: Hide completely hide: Hide completely
warn: Hide with a warning warn: Hide with a warning
form_admin_settings:
backups_retention_period: User archive retention period
content_cache_retention_period: Content cache retention period
media_cache_retention_period: Media cache retention period
interactions: interactions:
must_be_follower: Block notifications from non-followers must_be_follower: Block notifications from non-followers
must_be_following: Block notifications from people you don't follow must_be_following: Block notifications from people you don't follow
......
...@@ -70,6 +70,7 @@ defaults: &defaults ...@@ -70,6 +70,7 @@ defaults: &defaults
show_domain_blocks: 'disabled' show_domain_blocks: 'disabled'
show_domain_blocks_rationale: 'disabled' show_domain_blocks_rationale: 'disabled'
require_invite_text: false require_invite_text: false
backups_retention_period: 7
development: development:
<<: *defaults <<: *defaults
......
...@@ -25,22 +25,14 @@ ...@@ -25,22 +25,14 @@
every: '5m' every: '5m'
class: Scheduler::IndexingScheduler class: Scheduler::IndexingScheduler
queue: scheduler queue: scheduler
media_cleanup_scheduler: vacuum_scheduler:
cron: '<%= Random.rand(0..59) %> <%= Random.rand(3..5) %> * * *' cron: '<%= Random.rand(0..59) %> <%= Random.rand(3..5) %> * * *'
class: Scheduler::MediaCleanupScheduler class: Scheduler::VacuumScheduler
queue: scheduler
feed_cleanup_scheduler:
cron: '<%= Random.rand(0..59) %> <%= Random.rand(0..2) %> * * *'
class: Scheduler::FeedCleanupScheduler
queue: scheduler queue: scheduler
follow_recommendations_scheduler: follow_recommendations_scheduler:
cron: '<%= Random.rand(0..59) %> <%= Random.rand(6..9) %> * * *' cron: '<%= Random.rand(0..59) %> <%= Random.rand(6..9) %> * * *'
class: Scheduler::FollowRecommendationsScheduler class: Scheduler::FollowRecommendationsScheduler
queue: scheduler queue: scheduler
doorkeeper_cleanup_scheduler:
cron: '<%= Random.rand(0..59) %> <%= Random.rand(0..2) %> * * 0'
class: Scheduler::DoorkeeperCleanupScheduler
queue: scheduler
user_cleanup_scheduler: user_cleanup_scheduler:
cron: '<%= Random.rand(0..59) %> <%= Random.rand(4..6) %> * * *' cron: '<%= Random.rand(0..59) %> <%= Random.rand(4..6) %> * * *'
class: Scheduler::UserCleanupScheduler class: Scheduler::UserCleanupScheduler
...@@ -49,10 +41,6 @@ ...@@ -49,10 +41,6 @@
cron: '<%= Random.rand(0..59) %> <%= Random.rand(3..5) %> * * *' cron: '<%= Random.rand(0..59) %> <%= Random.rand(3..5) %> * * *'
class: Scheduler::IpCleanupScheduler class: Scheduler::IpCleanupScheduler
queue: scheduler queue: scheduler
backup_cleanup_scheduler:
cron: '<%= Random.rand(0..59) %> <%= Random.rand(3..5) %> * * *'
class: Scheduler::BackupCleanupScheduler
queue: scheduler
pghero_scheduler: pghero_scheduler:
cron: '0 0 * * *' cron: '0 0 * * *'
class: Scheduler::PgheroScheduler class: Scheduler::PgheroScheduler
......
Fabricator :access_grant, from: 'Doorkeeper::AccessGrant' do
application
resource_owner_id { Fabricate(:user).id }
expires_in 3_600
redirect_uri { Doorkeeper.configuration.native_redirect_uri }
end
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment