option:vacuum,type: :boolean,default: false,desc: 'Reduce the file size and update the statistics. This option locks the table for a long time, so run it offline'
option:batch_size,type: :numeric,default: 1_000,aliases: [:b],desc: 'Number of records in each batch'
desc'remove','Remove unreferenced statuses'
long_desc<<~LONG_DESC
Remove statuses that are not referenced by local user activity, such as
...
...
@@ -25,52 +28,89 @@ module Mastodon
indices before commencing, and removes them afterward.
LONG_DESC
defremove
ifoptions[:batch_size]<1
say('Cannot run with this batch_size setting, must be at least 1',:red)
exit(1)
end
say('Creating temporary database indices...')
ActiveRecord::Base.connection.add_index(:accounts,:id,name: :index_accounts_local,where: 'domain is null',algorithm: :concurrently)unlessActiveRecord::Base.connection.index_name_exists?(:accounts,:index_accounts_local)
ActiveRecord::Base.connection.add_index(:media_attachments,:remote_url,name: :index_media_attachments_remote_url,where: 'remote_url is not null',algorithm: :concurrently)unlessActiveRecord::Base.connection.index_name_exists?(:media_attachments,:index_media_attachments_remote_url)
ActiveRecord::Base.connection.add_index(:accounts,:id,name: :index_accounts_local,where: 'domain is null',algorithm: :concurrently,if_not_exists: true)
ActiveRecord::Base.connection.add_index(:media_attachments,:remote_url,name: :index_media_attachments_remote_url,where: 'remote_url is not null',algorithm: :concurrently,if_not_exists: true)
SELECT statuses.id FROM statuses WHERE deleted_at IS NULL AND NOT local AND uri IS NOT NULL AND (id < $1)
AND NOT EXISTS (SELECT 1 FROM statuses AS statuses1 WHERE statuses.id = statuses1.in_reply_to_id)
AND NOT EXISTS (SELECT 1 FROM statuses AS statuses1 WHERE statuses1.id = statuses.reblog_of_id AND (statuses1.uri IS NULL OR statuses1.local))
AND NOT EXISTS (SELECT 1 FROM statuses AS statuses1 WHERE statuses.id = statuses1.reblog_of_id AND (statuses1.uri IS NULL OR statuses1.local OR statuses1.id >= $1))
AND NOT EXISTS (SELECT 1 FROM status_pins WHERE statuses.id = status_id)
AND NOT EXISTS (SELECT 1 FROM mentions WHERE statuses.id = mentions.status_id AND mentions.account_id IN (SELECT accounts.id FROM accounts WHERE domain IS NULL))
AND NOT EXISTS (SELECT 1 FROM favourites WHERE statuses.id = favourites.status_id AND favourites.account_id IN (SELECT accounts.id FROM accounts WHERE domain IS NULL))
AND NOT EXISTS (SELECT 1 FROM bookmarks WHERE statuses.id = bookmarks.status_id AND bookmarks.account_id IN (SELECT accounts.id FROM accounts WHERE domain IS NULL))
#{clean_followed_sql}
SQL
say('Removing temporary database indices to restore write performance...')
say('Beginning removal... This might take a while...')
scope=Status.remote.where('id < ?',max_id)
# Skip reblogs of local statuses
scope=scope.where('reblog_of_id NOT IN (SELECT statuses1.id FROM statuses AS statuses1 WHERE statuses1.id = statuses.reblog_of_id AND (statuses1.uri IS NULL OR statuses1.local))')
# Skip statuses that are pinned on profiles
scope=scope.where('id NOT IN (SELECT status_pins.status_id FROM status_pins WHERE statuses.id = status_id)')
# Skip statuses that mention local accounts
scope=scope.where('id NOT IN (SELECT mentions.status_id FROM mentions WHERE statuses.id = mentions.status_id AND mentions.account_id IN (SELECT accounts.id FROM accounts WHERE domain IS NULL))')
# Skip statuses which have replies
scope=scope.where('id NOT IN (SELECT statuses1.in_reply_to_id FROM statuses AS statuses1 WHERE statuses.id = statuses1.in_reply_to_id)')
# Skip statuses reblogged by local accounts or with recent boosts
scope=scope.where('id NOT IN (SELECT statuses1.reblog_of_id FROM statuses AS statuses1 WHERE statuses.id = statuses1.reblog_of_id AND (statuses1.uri IS NULL OR statuses1.local OR statuses1.id >= ?))',max_id)
# Skip statuses favourited by local users
scope=scope.where('id NOT IN (SELECT favourites.status_id FROM favourites WHERE statuses.id = favourites.status_id AND favourites.account_id IN (SELECT accounts.id FROM accounts WHERE domain IS NULL))')
# Skip statuses bookmarked by local users
scope=scope.where('id NOT IN (SELECT bookmarks.status_id FROM bookmarks WHERE statuses.id = bookmarks.status_id AND bookmarks.account_id IN (SELECT accounts.id FROM accounts WHERE domain IS NULL))')
unlessoptions[:clean_followed]
# Skip accounts followed by local accounts
scope=scope.where('account_id NOT IN (SELECT follows.target_account_id FROM follows WHERE statuses.account_id = follows.target_account_id)')