From 40e5d2303ba1edc51beae66cc15263675980106a Mon Sep 17 00:00:00 2001
From: Akihiko Odaki <akihiko.odaki.4i@stu.hosei.ac.jp>
Date: Mon, 26 Mar 2018 21:02:10 +0900
Subject: [PATCH] Validate HTTP response length while receiving (#6891)

to_s method of HTTP::Response keeps blocking while it receives the whole
content, no matter how it is big. This means it may waste time to receive
unacceptably large files. It may also consume memory and disk in the
process. This solves the inefficency by checking response length while
receiving.
---
 app/helpers/jsonld_helper.rb                  |  2 +-
 app/lib/exceptions.rb                         |  1 +
 app/lib/provider_discovery.rb                 |  2 +-
 app/lib/request.rb                            | 31 +++++++++++-
 app/models/account.rb                         |  1 -
 app/models/application_record.rb              |  1 +
 app/models/concerns/account_avatar.rb         |  4 +-
 app/models/concerns/account_header.rb         |  4 +-
 app/models/concerns/remotable.rb              |  6 +--
 app/models/custom_emoji.rb                    |  6 ++-
 app/models/media_attachment.rb                |  7 +--
 app/models/preview_card.rb                    |  5 +-
 app/services/fetch_atom_service.rb            | 11 +++--
 app/services/fetch_link_card_service.rb       |  2 +-
 app/services/resolve_account_service.rb       |  2 +-
 .../pubsubhubbub/confirmation_worker.rb       |  2 +-
 spec/lib/request_spec.rb                      | 49 +++++++++++++++++++
 spec/models/concerns/remotable_spec.rb        |  5 +-
 18 files changed, 115 insertions(+), 26 deletions(-)

diff --git a/app/helpers/jsonld_helper.rb b/app/helpers/jsonld_helper.rb
index 957a2cbc98..dfb8fcb8b1 100644
--- a/app/helpers/jsonld_helper.rb
+++ b/app/helpers/jsonld_helper.rb
@@ -61,7 +61,7 @@ module JsonLdHelper
 
   def fetch_resource_without_id_validation(uri)
     build_request(uri).perform do |response|
-      response.code == 200 ? body_to_json(response.to_s) : nil
+      response.code == 200 ? body_to_json(response.body_with_limit) : nil
     end
   end
 
diff --git a/app/lib/exceptions.rb b/app/lib/exceptions.rb
index 95e3365c2b..e88e98eae5 100644
--- a/app/lib/exceptions.rb
+++ b/app/lib/exceptions.rb
@@ -5,6 +5,7 @@ module Mastodon
   class NotPermittedError < Error; end
   class ValidationError < Error; end
   class HostValidationError < ValidationError; end
+  class LengthValidationError < ValidationError; end
   class RaceConditionError < Error; end
 
   class UnexpectedResponseError < Error
diff --git a/app/lib/provider_discovery.rb b/app/lib/provider_discovery.rb
index bbd3a2d43e..3bec7211bd 100644
--- a/app/lib/provider_discovery.rb
+++ b/app/lib/provider_discovery.rb
@@ -18,7 +18,7 @@ class ProviderDiscovery < OEmbed::ProviderDiscovery
              else
                Request.new(:get, url).perform do |res|
                  raise OEmbed::NotFound, url if res.code != 200 || res.mime_type != 'text/html'
-                 Nokogiri::HTML(res.to_s)
+                 Nokogiri::HTML(res.body_with_limit)
                end
              end
 
diff --git a/app/lib/request.rb b/app/lib/request.rb
index 8a127c65f2..dca93a6e95 100644
--- a/app/lib/request.rb
+++ b/app/lib/request.rb
@@ -40,7 +40,7 @@ class Request
     end
 
     begin
-      yield response
+      yield response.extend(ClientLimit)
     ensure
       http_client.close
     end
@@ -99,6 +99,33 @@ class Request
     @http_client ||= HTTP.timeout(:per_operation, timeout).follow(max_hops: 2)
   end
 
+  module ClientLimit
+    def body_with_limit(limit = 1.megabyte)
+      raise Mastodon::LengthValidationError if content_length.present? && content_length > limit
+
+      if charset.nil?
+        encoding = Encoding::BINARY
+      else
+        begin
+          encoding = Encoding.find(charset)
+        rescue ArgumentError
+          encoding = Encoding::BINARY
+        end
+      end
+
+      contents = String.new(encoding: encoding)
+
+      while (chunk = readpartial)
+        contents << chunk
+        chunk.clear
+
+        raise Mastodon::LengthValidationError if contents.bytesize > limit
+      end
+
+      contents
+    end
+  end
+
   class Socket < TCPSocket
     class << self
       def open(host, *args)
@@ -118,5 +145,5 @@ class Request
     end
   end
 
-  private_constant :Socket
+  private_constant :ClientLimit, :Socket
 end
diff --git a/app/models/account.rb b/app/models/account.rb
index 9a83d979fb..25e7d7436f 100644
--- a/app/models/account.rb
+++ b/app/models/account.rb
@@ -55,7 +55,6 @@ class Account < ApplicationRecord
   include AccountHeader
   include AccountInteractions
   include Attachmentable
-  include Remotable
   include Paginable
 
   enum protocol: [:ostatus, :activitypub]
diff --git a/app/models/application_record.rb b/app/models/application_record.rb
index 71fbba5b32..83134d41a4 100644
--- a/app/models/application_record.rb
+++ b/app/models/application_record.rb
@@ -2,4 +2,5 @@
 
 class ApplicationRecord < ActiveRecord::Base
   self.abstract_class = true
+  include Remotable
 end
diff --git a/app/models/concerns/account_avatar.rb b/app/models/concerns/account_avatar.rb
index 9e34a94617..2d5ebfca35 100644
--- a/app/models/concerns/account_avatar.rb
+++ b/app/models/concerns/account_avatar.rb
@@ -4,6 +4,7 @@ module AccountAvatar
   extend ActiveSupport::Concern
 
   IMAGE_MIME_TYPES = ['image/jpeg', 'image/png', 'image/gif'].freeze
+  LIMIT = 2.megabytes
 
   class_methods do
     def avatar_styles(file)
@@ -19,7 +20,8 @@ module AccountAvatar
     # Avatar upload
     has_attached_file :avatar, styles: ->(f) { avatar_styles(f) }, convert_options: { all: '-strip' }, processors: [:lazy_thumbnail]
     validates_attachment_content_type :avatar, content_type: IMAGE_MIME_TYPES
-    validates_attachment_size :avatar, less_than: 2.megabytes
+    validates_attachment_size :avatar, less_than: LIMIT
+    remotable_attachment :avatar, LIMIT
   end
 
   def avatar_original_url
diff --git a/app/models/concerns/account_header.rb b/app/models/concerns/account_header.rb
index 04c576b289..ef40b8126b 100644
--- a/app/models/concerns/account_header.rb
+++ b/app/models/concerns/account_header.rb
@@ -4,6 +4,7 @@ module AccountHeader
   extend ActiveSupport::Concern
 
   IMAGE_MIME_TYPES = ['image/jpeg', 'image/png', 'image/gif'].freeze
+  LIMIT = 2.megabytes
 
   class_methods do
     def header_styles(file)
@@ -19,7 +20,8 @@ module AccountHeader
     # Header upload
     has_attached_file :header, styles: ->(f) { header_styles(f) }, convert_options: { all: '-strip' }, processors: [:lazy_thumbnail]
     validates_attachment_content_type :header, content_type: IMAGE_MIME_TYPES
-    validates_attachment_size :header, less_than: 2.megabytes
+    validates_attachment_size :header, less_than: LIMIT
+    remotable_attachment :header, LIMIT
   end
 
   def header_original_url
diff --git a/app/models/concerns/remotable.rb b/app/models/concerns/remotable.rb
index 0f18c5d963..3b8c507c31 100644
--- a/app/models/concerns/remotable.rb
+++ b/app/models/concerns/remotable.rb
@@ -3,8 +3,8 @@
 module Remotable
   extend ActiveSupport::Concern
 
-  included do
-    attachment_definitions.each_key do |attachment_name|
+  class_methods do
+    def remotable_attachment(attachment_name, limit)
       attribute_name  = "#{attachment_name}_remote_url".to_sym
       method_name     = "#{attribute_name}=".to_sym
       alt_method_name = "reset_#{attachment_name}!".to_sym
@@ -33,7 +33,7 @@ module Remotable
                         File.extname(filename)
                       end
 
-            send("#{attachment_name}=", StringIO.new(response.to_s))
+            send("#{attachment_name}=", StringIO.new(response.body_with_limit(limit)))
             send("#{attachment_name}_file_name=", basename + extname)
 
             self[attribute_name] = url if has_attribute?(attribute_name)
diff --git a/app/models/custom_emoji.rb b/app/models/custom_emoji.rb
index a77b53c98b..476178e86e 100644
--- a/app/models/custom_emoji.rb
+++ b/app/models/custom_emoji.rb
@@ -19,6 +19,8 @@
 #
 
 class CustomEmoji < ApplicationRecord
+  LIMIT = 50.kilobytes
+
   SHORTCODE_RE_FRAGMENT = '[a-zA-Z0-9_]{2,}'
 
   SCAN_RE = /(?<=[^[:alnum:]:]|\n|^)
@@ -29,14 +31,14 @@ class CustomEmoji < ApplicationRecord
 
   has_attached_file :image, styles: { static: { format: 'png', convert_options: '-coalesce -strip' } }
 
-  validates_attachment :image, content_type: { content_type: 'image/png' }, presence: true, size: { in: 0..50.kilobytes }
+  validates_attachment :image, content_type: { content_type: 'image/png' }, presence: true, size: { less_than: LIMIT }
   validates :shortcode, uniqueness: { scope: :domain }, format: { with: /\A#{SHORTCODE_RE_FRAGMENT}\z/ }, length: { minimum: 2 }
 
   scope :local,      -> { where(domain: nil) }
   scope :remote,     -> { where.not(domain: nil) }
   scope :alphabetic, -> { order(domain: :asc, shortcode: :asc) }
 
-  include Remotable
+  remotable_attachment :image, LIMIT
 
   def local?
     domain.nil?
diff --git a/app/models/media_attachment.rb b/app/models/media_attachment.rb
index a4d9cd9d12..ac2aa7ed21 100644
--- a/app/models/media_attachment.rb
+++ b/app/models/media_attachment.rb
@@ -56,6 +56,8 @@ class MediaAttachment < ApplicationRecord
     },
   }.freeze
 
+  LIMIT = 8.megabytes
+
   belongs_to :account, inverse_of: :media_attachments, optional: true
   belongs_to :status,  inverse_of: :media_attachments, optional: true
 
@@ -64,10 +66,9 @@ class MediaAttachment < ApplicationRecord
                     processors: ->(f) { file_processors f },
                     convert_options: { all: '-quality 90 -strip' }
 
-  include Remotable
-
   validates_attachment_content_type :file, content_type: IMAGE_MIME_TYPES + VIDEO_MIME_TYPES
-  validates_attachment_size :file, less_than: 8.megabytes
+  validates_attachment_size :file, less_than: LIMIT
+  remotable_attachment :file, LIMIT
 
   validates :account, presence: true
   validates :description, length: { maximum: 420 }, if: :local?
diff --git a/app/models/preview_card.rb b/app/models/preview_card.rb
index 86eecdfe5b..0c82f06ce0 100644
--- a/app/models/preview_card.rb
+++ b/app/models/preview_card.rb
@@ -26,6 +26,7 @@
 
 class PreviewCard < ApplicationRecord
   IMAGE_MIME_TYPES = ['image/jpeg', 'image/png', 'image/gif'].freeze
+  LIMIT = 1.megabytes
 
   self.inheritance_column = false
 
@@ -36,11 +37,11 @@ class PreviewCard < ApplicationRecord
   has_attached_file :image, styles: { original: { geometry: '400x400>', file_geometry_parser: FastGeometryParser } }, convert_options: { all: '-quality 80 -strip' }
 
   include Attachmentable
-  include Remotable
 
   validates :url, presence: true, uniqueness: true
   validates_attachment_content_type :image, content_type: IMAGE_MIME_TYPES
-  validates_attachment_size :image, less_than: 1.megabytes
+  validates_attachment_size :image, less_than: LIMIT
+  remotable_attachment :image, LIMIT
 
   before_save :extract_dimensions, if: :link?
 
diff --git a/app/services/fetch_atom_service.rb b/app/services/fetch_atom_service.rb
index 48ad5dcd37..62dea8298a 100644
--- a/app/services/fetch_atom_service.rb
+++ b/app/services/fetch_atom_service.rb
@@ -38,13 +38,14 @@ class FetchAtomService < BaseService
     return nil if response.code != 200
 
     if response.mime_type == 'application/atom+xml'
-      [@url, { prefetched_body: response.to_s }, :ostatus]
+      [@url, { prefetched_body: response.body_with_limit }, :ostatus]
     elsif ['application/activity+json', 'application/ld+json; profile="https://www.w3.org/ns/activitystreams"'].include?(response.mime_type)
-      json = body_to_json(response.to_s)
+      body = response.body_with_limit
+      json = body_to_json(body)
       if supported_context?(json) && json['type'] == 'Person' && json['inbox'].present?
-        [json['id'], { prefetched_body: response.to_s, id: true }, :activitypub]
+        [json['id'], { prefetched_body: body, id: true }, :activitypub]
       elsif supported_context?(json) && json['type'] == 'Note'
-        [json['id'], { prefetched_body: response.to_s, id: true }, :activitypub]
+        [json['id'], { prefetched_body: body, id: true }, :activitypub]
       else
         @unsupported_activity = true
         nil
@@ -61,7 +62,7 @@ class FetchAtomService < BaseService
   end
 
   def process_html(response)
-    page = Nokogiri::HTML(response.to_s)
+    page = Nokogiri::HTML(response.body_with_limit)
 
     json_link = page.xpath('//link[@rel="alternate"]').find { |link| ['application/activity+json', 'application/ld+json; profile="https://www.w3.org/ns/activitystreams"'].include?(link['type']) }
     atom_link = page.xpath('//link[@rel="alternate"]').find { |link| link['type'] == 'application/atom+xml' }
diff --git a/app/services/fetch_link_card_service.rb b/app/services/fetch_link_card_service.rb
index 26deb5ecc4..d5920a417e 100644
--- a/app/services/fetch_link_card_service.rb
+++ b/app/services/fetch_link_card_service.rb
@@ -45,7 +45,7 @@ class FetchLinkCardService < BaseService
 
     Request.new(:get, @url).perform do |res|
       if res.code == 200 && res.mime_type == 'text/html'
-        @html = res.to_s
+        @html = res.body_with_limit
         @html_charset = res.charset
       else
         @html = nil
diff --git a/app/services/resolve_account_service.rb b/app/services/resolve_account_service.rb
index 034821dc07..744ea24f4d 100644
--- a/app/services/resolve_account_service.rb
+++ b/app/services/resolve_account_service.rb
@@ -181,7 +181,7 @@ class ResolveAccountService < BaseService
 
     @atom_body = Request.new(:get, atom_url).perform do |response|
       raise Mastodon::UnexpectedResponseError, response unless response.code == 200
-      response.to_s
+      response.body_with_limit
     end
   end
 
diff --git a/app/workers/pubsubhubbub/confirmation_worker.rb b/app/workers/pubsubhubbub/confirmation_worker.rb
index cc2d1225bf..c0e7b677e4 100644
--- a/app/workers/pubsubhubbub/confirmation_worker.rb
+++ b/app/workers/pubsubhubbub/confirmation_worker.rb
@@ -57,7 +57,7 @@ class Pubsubhubbub::ConfirmationWorker
 
   def callback_get_with_params
     Request.new(:get, subscription.callback_url, params: callback_params).perform do |response|
-      @callback_response_body = response.body.to_s
+      @callback_response_body = response.body_with_limit
     end
   end
 
diff --git a/spec/lib/request_spec.rb b/spec/lib/request_spec.rb
index 4d6b20dd53..939ac006ae 100644
--- a/spec/lib/request_spec.rb
+++ b/spec/lib/request_spec.rb
@@ -1,6 +1,7 @@
 # frozen_string_literal: true
 
 require 'rails_helper'
+require 'securerandom'
 
 describe Request do
   subject { Request.new(:get, 'http://example.com') }
@@ -64,6 +65,12 @@ describe Request do
         expect_any_instance_of(HTTP::Client).to receive(:close)
         expect { |block| subject.perform &block }.to yield_control
       end
+
+      it 'returns response which implements body_with_limit' do
+        subject.perform do |response|
+          expect(response).to respond_to :body_with_limit
+        end
+      end
     end
 
     context 'with private host' do
@@ -81,4 +88,46 @@ describe Request do
       end
     end
   end
+
+  describe "response's body_with_limit method" do
+    it 'rejects body more than 1 megabyte by default' do
+      stub_request(:any, 'http://example.com').to_return(body: SecureRandom.random_bytes(2.megabytes))
+      expect { subject.perform { |response| response.body_with_limit } }.to raise_error Mastodon::LengthValidationError
+    end
+
+    it 'accepts body less than 1 megabyte by default' do
+      stub_request(:any, 'http://example.com').to_return(body: SecureRandom.random_bytes(2.kilobytes))
+      expect { subject.perform { |response| response.body_with_limit } }.not_to raise_error
+    end
+
+    it 'rejects body by given size' do
+      stub_request(:any, 'http://example.com').to_return(body: SecureRandom.random_bytes(2.kilobytes))
+      expect { subject.perform { |response| response.body_with_limit(1.kilobyte) } }.to raise_error Mastodon::LengthValidationError
+    end
+
+    it 'rejects too large chunked body' do
+      stub_request(:any, 'http://example.com').to_return(body: SecureRandom.random_bytes(2.megabytes), headers: { 'Transfer-Encoding' => 'chunked' })
+      expect { subject.perform { |response| response.body_with_limit } }.to raise_error Mastodon::LengthValidationError
+    end
+
+    it 'rejects too large monolithic body' do
+      stub_request(:any, 'http://example.com').to_return(body: SecureRandom.random_bytes(2.megabytes), headers: { 'Content-Length' => 2.megabytes })
+      expect { subject.perform { |response| response.body_with_limit } }.to raise_error Mastodon::LengthValidationError
+    end
+
+    it 'uses binary encoding if Content-Type does not tell encoding' do
+      stub_request(:any, 'http://example.com').to_return(body: '', headers: { 'Content-Type' => 'text/html' })
+      expect(subject.perform { |response| response.body_with_limit.encoding }).to eq Encoding::BINARY
+    end
+
+    it 'uses binary encoding if Content-Type tells unknown encoding' do
+      stub_request(:any, 'http://example.com').to_return(body: '', headers: { 'Content-Type' => 'text/html; charset=unknown' })
+      expect(subject.perform { |response| response.body_with_limit.encoding }).to eq Encoding::BINARY
+    end
+
+    it 'uses encoding specified by Content-Type' do
+      stub_request(:any, 'http://example.com').to_return(body: '', headers: { 'Content-Type' => 'text/html; charset=UTF-8' })
+      expect(subject.perform { |response| response.body_with_limit.encoding }).to eq Encoding::UTF_8
+    end
+  end
 end
diff --git a/spec/models/concerns/remotable_spec.rb b/spec/models/concerns/remotable_spec.rb
index 0b2dad23f0..b39233739c 100644
--- a/spec/models/concerns/remotable_spec.rb
+++ b/spec/models/concerns/remotable_spec.rb
@@ -29,7 +29,10 @@ RSpec.describe Remotable do
 
   context 'Remotable module is included' do
     before do
-      class Foo; include Remotable; end
+      class Foo
+        include Remotable
+        remotable_attachment :hoge, 1.kilobyte
+      end
     end
 
     let(:attribute_name) { "#{hoge}_remote_url".to_sym }
-- 
GitLab