Skip to content
Snippets Groups Projects
Unverified Commit e7aa2be8 authored by Eugen Rochko's avatar Eugen Rochko Committed by GitHub
Browse files

Change how hashtags are normalized (#18795)

* Change how hashtags are normalized

* Fix tests
parent 12ed2d79
No related branches found
No related tags found
No related merge requests found
Showing
with 114 additions and 36 deletions
......@@ -16,6 +16,8 @@ module Admin
if @tag.update(tag_params.merge(reviewed_at: Time.now.utc))
redirect_to admin_tag_path(@tag.id), notice: I18n.t('admin.tags.updated_msg')
else
@time_period = (6.days.ago.to_date...Time.now.utc.to_date)
render :show
end
end
......@@ -27,7 +29,7 @@ module Admin
end
def tag_params
params.require(:tag).permit(:name, :trendable, :usable, :listable)
params.require(:tag).permit(:name, :display_name, :trendable, :usable, :listable)
end
end
end
......@@ -13,9 +13,7 @@ class Api::V1::FeaturedTagsController < Api::BaseController
end
def create
@featured_tag = current_account.featured_tags.new(featured_tag_params)
@featured_tag.reset_data
@featured_tag.save!
@featured_tag = current_account.featured_tags.create!(featured_tag_params)
render json: @featured_tag, serializer: REST::FeaturedTagSerializer
end
......
......@@ -11,7 +11,6 @@ class Settings::FeaturedTagsController < Settings::BaseController
def create
@featured_tag = current_account.featured_tags.new(featured_tag_params)
@featured_tag.reset_data
if @featured_tag.save
redirect_to settings_featured_tags_path
......
......@@ -606,7 +606,20 @@ function insertIntoTagHistory(recognizedTags, text) {
const state = getState();
const oldHistory = state.getIn(['compose', 'tagHistory']);
const me = state.getIn(['meta', 'me']);
const names = recognizedTags.map(tag => text.match(new RegExp(`#${tag.name}`, 'i'))[0].slice(1));
// FIXME: Matching input hashtags with recognized hashtags has become more
// complicated because of new normalization rules, it's no longer just
// a case sensitivity issue
const names = recognizedTags.map(tag => {
const matches = text.match(new RegExp(`#${tag.name}`, 'i'));
if (matches && matches.length > 0) {
return matches[0].slice(1);
} else {
return tag.name;
}
});
const intersectedOldHistory = oldHistory.filter(name => names.findIndex(newName => newName.toLowerCase() === name.toLowerCase()) === -1);
names.push(...intersectedOldHistory.toJS());
......
# frozen_string_literal: true
class ASCIIFolding
NON_ASCII_CHARS = 'ÀÁÂÃÄÅàáâãäåĀāĂ㥹ÇçĆćĈĉĊċČčÐðĎďĐđÈÉÊËèéêëĒēĔĕĖėĘęĚěĜĝĞğĠġĢģĤĥĦħÌÍÎÏìíîïĨĩĪīĬĭĮįİıĴĵĶķĸĹĺĻļĽľĿŀŁłÑñŃńŅņŇňʼnŊŋÒÓÔÕÖØòóôõöøŌōŎŏŐőŔŕŖŗŘřŚśŜŝŞşŠšſŢţŤťŦŧÙÚÛÜùúûüŨũŪūŬŭŮůŰűŲųŴŵÝýÿŶŷŸŹźŻżŽž'
EQUIVALENT_ASCII_CHARS = 'AAAAAAaaaaaaAaAaAaCcCcCcCcCcDdDdDdEEEEeeeeEeEeEeEeEeGgGgGgGgHhHhIIIIiiiiIiIiIiIiIiJjKkkLlLlLlLlLlNnNnNnNnnNnOOOOOOooooooOoOoOoRrRrRrSsSsSsSssTtTtTtUUUUuuuuUuUuUuUuUuUuWwYyyYyYZzZzZz'
def fold(str)
str.tr(NON_ASCII_CHARS, EQUIVALENT_ASCII_CHARS)
end
end
# frozen_string_literal: true
class HashtagNormalizer
def normalize(str)
remove_invalid_characters(ascii_folding(lowercase(cjk_width(str))))
end
private
def remove_invalid_characters(str)
str.gsub(/[^[:alnum:]#{Tag::HASHTAG_SEPARATORS}]/, '')
end
def ascii_folding(str)
ASCIIFolding.new.fold(str)
end
def lowercase(str)
str.mb_chars.downcase.to_s
end
def cjk_width(str)
str.unicode_normalize(:nfkc)
end
end
......@@ -62,7 +62,7 @@ class Account < ApplicationRecord
)
USERNAME_RE = /[a-z0-9_]+([a-z0-9_\.-]+[a-z0-9_]+)?/i
MENTION_RE = /(?<=^|[^\/[:word:]])@((#{USERNAME_RE})(?:@[[:word:]\.\-]+[[:word:]]+)?)/i
MENTION_RE = /(?<=^|[^\/[:word:]])@((#{USERNAME_RE})(?:@[[:alnum:]\.\-]+[[:alnum:]]+)?)/i
URL_PREFIX_RE = /\Ahttp(s?):\/\/[^\/]+/
include Attachmentable
......
......@@ -3,14 +3,14 @@
#
# Table name: custom_filters
#
# id :bigint not null, primary key
# account_id :bigint
# id :bigint(8) not null, primary key
# account_id :bigint(8)
# expires_at :datetime
# phrase :text default(""), not null
# context :string default([]), not null, is an Array
# created_at :datetime not null
# updated_at :datetime not null
# action :integer default(0), not null
# action :integer default("warn"), not null
#
class CustomFilter < ApplicationRecord
......
......@@ -3,8 +3,8 @@
#
# Table name: custom_filter_keywords
#
# id :bigint not null, primary key
# custom_filter_id :bigint not null
# id :bigint(8) not null, primary key
# custom_filter_id :bigint(8) not null
# keyword :text default(""), not null
# whole_word :boolean default(TRUE), not null
# created_at :datetime not null
......
......@@ -13,17 +13,19 @@
#
class FeaturedTag < ApplicationRecord
belongs_to :account, inverse_of: :featured_tags, required: true
belongs_to :tag, inverse_of: :featured_tags, required: true
belongs_to :account, inverse_of: :featured_tags
belongs_to :tag, inverse_of: :featured_tags, optional: true # Set after validation
delegate :name, to: :tag, allow_nil: true
validates_associated :tag, on: :create
validates :name, presence: true, on: :create
validate :validate_tag_name, on: :create
validate :validate_featured_tags_limit, on: :create
def name=(str)
self.tag = Tag.find_or_create_by_names(str.strip)&.first
before_create :set_tag
before_create :reset_data
attr_writer :name
def name
tag_id.present? ? tag.name : @name
end
def increment(timestamp)
......@@ -34,14 +36,23 @@ class FeaturedTag < ApplicationRecord
update(statuses_count: [0, statuses_count - 1].max, last_status_at: account.statuses.where(visibility: %i(public unlisted)).tagged_with(tag).where.not(id: deleted_status_id).select(:created_at).first&.created_at)
end
private
def set_tag
self.tag = Tag.find_or_create_by_names(@name)&.first
end
def reset_data
self.statuses_count = account.statuses.where(visibility: %i(public unlisted)).tagged_with(tag).count
self.last_status_at = account.statuses.where(visibility: %i(public unlisted)).tagged_with(tag).select(:created_at).first&.created_at
end
private
def validate_featured_tags_limit
errors.add(:base, I18n.t('featured_tags.errors.limit')) if account.featured_tags.count >= 10
end
def validate_tag_name
errors.add(:name, :blank) if @name.blank?
errors.add(:name, :invalid) unless @name.match?(/\A(#{Tag::HASHTAG_NAME_RE})\z/i)
end
end
......@@ -15,6 +15,7 @@
# last_status_at :datetime
# max_score :float
# max_score_at :datetime
# display_name :string
#
class Tag < ApplicationRecord
......@@ -24,11 +25,12 @@ class Tag < ApplicationRecord
has_many :featured_tags, dependent: :destroy, inverse_of: :tag
HASHTAG_SEPARATORS = "_\u00B7\u200c"
HASHTAG_NAME_RE = "([[:word:]_][[:word:]#{HASHTAG_SEPARATORS}]*[[:alpha:]#{HASHTAG_SEPARATORS}][[:word:]#{HASHTAG_SEPARATORS}]*[[:word:]_])|([[:word:]_]*[[:alpha:]][[:word:]_]*)"
HASHTAG_NAME_RE = "([[:alnum:]_][[:alnum:]#{HASHTAG_SEPARATORS}]*[[:alpha:]#{HASHTAG_SEPARATORS}][[:alnum:]#{HASHTAG_SEPARATORS}]*[[:alnum:]_])|([[:alnum:]_]*[[:alpha:]][[:alnum:]_]*)"
HASHTAG_RE = /(?:^|[^\/\)\w])#(#{HASHTAG_NAME_RE})/i
validates :name, presence: true, format: { with: /\A(#{HASHTAG_NAME_RE})\z/i }
validate :validate_name_change, if: -> { !new_record? && name_changed? }
validate :validate_display_name_change, if: -> { !new_record? && display_name_changed? }
scope :reviewed, -> { where.not(reviewed_at: nil) }
scope :unreviewed, -> { where(reviewed_at: nil) }
......@@ -46,6 +48,10 @@ class Tag < ApplicationRecord
name
end
def display_name
attributes['display_name'] || name
end
def usable
boolean_with_default('usable', true)
end
......@@ -90,8 +96,10 @@ class Tag < ApplicationRecord
class << self
def find_or_create_by_names(name_or_names)
Array(name_or_names).map(&method(:normalize)).uniq { |str| str.mb_chars.downcase.to_s }.map do |normalized_name|
tag = matching_name(normalized_name).first || create(name: normalized_name)
names = Array(name_or_names).map { |str| [normalize(str), str] }.uniq(&:first)
names.map do |(normalized_name, display_name)|
tag = matching_name(normalized_name).first || create(name: normalized_name, display_name: display_name)
yield tag if block_given?
......@@ -129,7 +137,7 @@ class Tag < ApplicationRecord
end
def normalize(str)
str.gsub(/\A#/, '')
HashtagNormalizer.new.normalize(str)
end
end
......@@ -138,4 +146,8 @@ class Tag < ApplicationRecord
def validate_name_change
errors.add(:name, I18n.t('tags.does_not_match_previous_name')) unless name_was.mb_chars.casecmp(name.mb_chars).zero?
end
def validate_display_name_change
errors.add(:display_name, I18n.t('tags.does_not_match_previous_name')) unless HashtagNormalizer.new.normalize(display_name).casecmp(name.mb_chars).zero?
end
end
......@@ -10,11 +10,11 @@ class ActivityPub::HashtagSerializer < ActivityPub::Serializer
end
def name
"##{object.name}"
"##{object.display_name}"
end
def href
if object.class.name == 'FeaturedTag'
if object.instance_of?(FeaturedTag)
short_account_tag_url(object.account, object.tag)
else
tag_url(object)
......
......@@ -12,4 +12,8 @@ class REST::FeaturedTagSerializer < ActiveModel::Serializer
def url
short_account_tag_url(object.account, object.tag)
end
def name
object.display_name
end
end
......@@ -8,4 +8,8 @@ class REST::TagSerializer < ActiveModel::Serializer
def url
tag_url(object)
end
def name
object.display_name
end
end
......@@ -75,7 +75,7 @@
= link_to short_account_tag_path(@account, featured_tag.tag) do
%h4
= fa_icon 'hashtag'
= featured_tag.name
= featured_tag.display_name
%small
- if featured_tag.last_status_at.nil?
= t('accounts.nothing_here')
......
......@@ -28,7 +28,7 @@ RSS::Builder.build do |doc|
end
status.tags.each do |tag|
item.category(tag.name)
item.category(tag.display_name)
end
end
end
......
......@@ -2,7 +2,7 @@
= javascript_pack_tag 'admin', async: true, crossorigin: 'anonymous'
- content_for :page_title do
= "##{@tag.name}"
= "##{@tag.display_name}"
- if current_user.can?(:view_dashboard)
- content_for :heading_actions do
......@@ -53,7 +53,7 @@
= render 'shared/error_messages', object: @tag
.fields-group
= f.input :name, wrapper: :with_block_label
= f.input :display_name, wrapper: :with_block_label
.fields-group
= f.input :usable, as: :boolean, wrapper: :with_label
......
......@@ -6,7 +6,7 @@
.pending-account__header
= link_to admin_tag_path(tag.id) do
= fa_icon 'hashtag'
= tag.name
= tag.display_name
%br/
......
<%= raw t('admin_mailer.new_trends.new_trending_tags.title') %>
<% @tags.each do |tag| %>
- #<%= tag.name %>
- #<%= tag.display_name %>
<%= raw t('admin.trends.tags.usage_comparison', today: tag.history.get(Time.now.utc).accounts, yesterday: tag.history.get(Time.now.utc - 1.day).accounts) %><%= t('admin.trends.tags.current_score', score: Trends.tags.score(tag.id).round(2)) %>
<% end %>
<% if @lowest_trending_tag %>
<%= raw t('admin_mailer.new_trends.new_trending_tags.requirements', lowest_tag_name: @lowest_trending_tag.name, lowest_tag_score: Trends.tags.score(@lowest_trending_tag.id).round(2), rank: Trends.tags.options[:review_threshold]) %>
<%= raw t('admin_mailer.new_trends.new_trending_tags.requirements', lowest_tag_name: @lowest_trending_tag.display_name, lowest_tag_score: Trends.tags.score(@lowest_trending_tag.id).round(2), rank: Trends.tags.options[:review_threshold]) %>
<% else %>
<%= raw t('admin_mailer.new_trends.new_trending_tags.no_approved_tags') %>
<% end %>
......
......@@ -9,7 +9,7 @@
= render 'shared/error_messages', object: @featured_tag
.fields-group
= f.input :name, wrapper: :with_block_label, hint: safe_join([t('simple_form.hints.featured_tag.name'), safe_join(@recently_used_tags.map { |tag| link_to("##{tag.name}", settings_featured_tags_path(featured_tag: { name: tag.name }), method: :post) }, ', ')], ' ')
= f.input :name, wrapper: :with_block_label, hint: safe_join([t('simple_form.hints.featured_tag.name'), safe_join(@recently_used_tags.map { |tag| link_to("##{tag.display_name}", settings_featured_tags_path(featured_tag: { name: tag.name }), method: :post) }, ', ')], ' ')
.actions
= f.button :button, t('featured_tags.add_new'), type: :submit
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment