From 6755eacf6bf09d5cf481a56c9d76e455c1d8ca5e Mon Sep 17 00:00:00 2001 From: Jan Lehnardt Date: Thu, 29 Mar 2018 13:10:55 +0200 Subject: [PATCH 1/7] feat: validate new document writes against max_http_request_size The validation path is now the following: If a new doc body is > max_document_size, we throw an error. If a new attachment is > max_attachment_size, we throw an error. If the new doc body in combination with new and/or existing attachments is > max_http_request_size, we throw an error. This also sets the max_document_size to 2 GB, to restore 1.x and 2.0.x compatibility. Closes #1200 --- rel/overlay/etc/default.ini | 2 +- src/couch/src/couch_att.erl | 6 +- src/couch/src/couch_doc.erl | 15 +++++ src/couch/test/couch_doc_json_tests.erl | 88 ++++++++++++++++++++++++- 4 files changed, 107 insertions(+), 4 deletions(-) diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini index df438773543..e37cba123d7 100644 --- a/rel/overlay/etc/default.ini +++ b/rel/overlay/etc/default.ini @@ -135,7 +135,7 @@ enable_xframe_options = false ; x_forwarded_proto = X-Forwarded-Proto ; x_forwarded_ssl = X-Forwarded-Ssl ; Maximum allowed http request size. Applies to both clustered and local port. -max_http_request_size = 67108864 ; 64 MB +max_http_request_size = 4294967296 ; 2 GB ; [httpd_design_handlers] ; _view = diff --git a/src/couch/src/couch_att.erl b/src/couch/src/couch_att.erl index 16edd66cea7..500ac220dbc 100644 --- a/src/couch/src/couch_att.erl +++ b/src/couch/src/couch_att.erl @@ -47,7 +47,8 @@ -export([ upgrade/1, - downgrade/1 + downgrade/1, + to_tuple/1 ]). -export([ @@ -708,6 +709,9 @@ upgrade(#att{} = Att) -> upgrade(Att) -> Att. +to_tuple(#att{name=Name, att_len=Len, type=Type, encoding=Encoding}) -> + {att, Name, Len, Type, Encoding}. + %% Downgrade is exposed for interactive convenience. In practice, unless done %% manually, upgrades are always one-way. diff --git a/src/couch/src/couch_doc.erl b/src/couch/src/couch_doc.erl index f960ec5c2d9..b80354d5eb2 100644 --- a/src/couch/src/couch_doc.erl +++ b/src/couch/src/couch_doc.erl @@ -136,12 +136,27 @@ from_json_obj_validate(EJson, DbName) -> case couch_ejson_size:encoded_size(Doc#doc.body) =< MaxSize of true -> validate_attachment_sizes(Doc#doc.atts), + validate_total_document_size(Doc), Doc; false -> throw({request_entity_too_large, Doc#doc.id}) end. +% sum up the json body size + attachment body size and +% make sure it is < max_http_request_size +validate_total_document_size(#doc{id=DocId, body=Body, atts=Atts0}) -> + MaxReqSize = config:get_integer("httpd", "max_http_request_size", 4294967296), % 2 GB + Boundary = couch_uuids:random(), % mock boundary, is only used for the length + Atts = lists:map(fun couch_att:to_tuple/1, Atts0), + {_, DocSum} = couch_httpd_multipart:length_multipart_stream(Boundary, + ?JSON_ENCODE(Body), Atts), + case DocSum =< MaxReqSize of + true -> ok; + false -> throw({request_entity_too_large, DocId}) + end. + + validate_attachment_sizes([]) -> ok; validate_attachment_sizes(Atts) -> diff --git a/src/couch/test/couch_doc_json_tests.erl b/src/couch/test/couch_doc_json_tests.erl index bcff0646a57..484acdf3e8d 100644 --- a/src/couch/test/couch_doc_json_tests.erl +++ b/src/couch/test/couch_doc_json_tests.erl @@ -38,8 +38,11 @@ mock(couch_log) -> ok; mock(config) -> meck:new(config, [passthrough]), - meck:expect(config, get_integer, - fun("couchdb", "max_document_size", 4294967296) -> 1024 end), + meck:expect(config, get_integer, fun + ("couchdb", "max_document_size", 4294967296) -> 1024; + ("httpd", "max_http_request_size", 4294967296) -> 1024 + end), + meck:expect(config, get, fun(_, _) -> undefined end), meck:expect(config, get, fun(_, _, Default) -> Default end), ok. @@ -124,6 +127,44 @@ from_json_success_cases() -> ]}, "Attachments are parsed correctly." }, + % see if we count our bytes correctly. This doc should be *exactly* 1024 bytes + { + {[ + {<<"_attachments">>, {[ + {<<"big.xml">>, {[ + {<<"content_type">>, <<"xml/yay">>}, + {<<"revpos">>, 1}, + {<<"length">>, 319}, + {<<"stub">>, true} + ]}}, + {<<"big.json">>, {[ + {<<"content_type">>, <<"json/ftw">>}, + {<<"revpos">>, 1}, + {<<"length">>, 319}, + {<<"stub">>, true} + ]}} + ]}} + ]}, + #doc{atts = [ + couch_att:new([ + {name, <<"big.xml">>}, + {data, stub}, + {type, <<"xml/yay">>}, + {att_len, 319}, + {disk_len, 319}, + {revpos, 1} + ]), + couch_att:new([ + {name, <<"big.json">>}, + {data, stub}, + {type, <<"json/ftw">>}, + {att_len, 319}, + {disk_len, 319}, + {revpos, 1} + ]) + ]}, + "Document and attachments == max_http_request_size" + }, { {[{<<"_deleted">>, true}]}, #doc{deleted = true}, @@ -281,6 +322,49 @@ from_json_error_cases() -> end, {request_entity_too_large, <<"large_doc">>}, "Document too large." + }, + % doc json body and each attachment are small enough, but combined are > + % max_http_request_size + { + {[ + {<<"_id">>, <<"normal_doc_with_atts">>}, + {<<"_attachments">>, {[ + {<<"big.xml">>, {[ + {<<"content_type">>, <<"xml/yay">>}, + {<<"revpos">>, 1}, + {<<"length">>, 768}, + {<<"stub">>, true} + ]}}, + {<<"big.json">>, {[ + {<<"content_type">>, <<"json/ftw">>}, + {<<"revpos">>, 1}, + {<<"length">>, 768}, + {<<"stub">>, true} + ]}} + ]}} + ]}, + {request_entity_too_large, <<"normal_doc_with_atts">>}, + "Document too large because of attachments." + }, + % see if we count our bytes correctly. This doc should be *exactly* 1025 bytes + { + {[ + {<<"_attachments">>, {[ + {<<"big.xml">>, {[ + {<<"content_type">>, <<"xml/yay">>}, + {<<"revpos">>, 1}, + {<<"length">>, 320}, + {<<"stub">>, true} + ]}}, + {<<"big.json">>, {[ + {<<"content_type">>, <<"json/ftw">>}, + {<<"revpos">>, 1}, + {<<"length">>, 319}, + {<<"stub">>, true} + ]}} + ]}} + ]}, + "Document and attachments == max_http_request_size + 1" } ], From ee70b5698f880ade0cd60b6a445ac80171952018 Mon Sep 17 00:00:00 2001 From: Jan Lehnardt Date: Thu, 29 Mar 2018 18:32:40 +0200 Subject: [PATCH 2/7] wip --- rel/overlay/etc/local.ini | 2 ++ src/couch/src/couch_doc.erl | 3 ++- test/javascript/tests/attachments.js | 3 ++- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/rel/overlay/etc/local.ini b/rel/overlay/etc/local.ini index 6b46f0fa114..5ee0867b709 100644 --- a/rel/overlay/etc/local.ini +++ b/rel/overlay/etc/local.ini @@ -46,6 +46,8 @@ [query_servers] ;nodejs = /usr/local/bin/couchjs-node /path/to/couchdb/share/server/main.js +[log] +level = debug [httpd_global_handlers] ;_google = {couch_httpd_proxy, handle_proxy_req, <<"http://www.google.com">>} diff --git a/src/couch/src/couch_doc.erl b/src/couch/src/couch_doc.erl index b80354d5eb2..601a6f062c4 100644 --- a/src/couch/src/couch_doc.erl +++ b/src/couch/src/couch_doc.erl @@ -145,7 +145,8 @@ from_json_obj_validate(EJson, DbName) -> % sum up the json body size + attachment body size and % make sure it is < max_http_request_size -validate_total_document_size(#doc{id=DocId, body=Body, atts=Atts0}) -> +validate_total_document_size(#doc{id=DocId, body=Body, atts=Atts0}=Doc) -> + couch_log:debug("~nData: ~p~n", [Doc]), MaxReqSize = config:get_integer("httpd", "max_http_request_size", 4294967296), % 2 GB Boundary = couch_uuids:random(), % mock boundary, is only used for the length Atts = lists:map(fun couch_att:to_tuple/1, Atts0), diff --git a/test/javascript/tests/attachments.js b/test/javascript/tests/attachments.js index 2e831a731eb..bd446e9916c 100644 --- a/test/javascript/tests/attachments.js +++ b/test/javascript/tests/attachments.js @@ -291,7 +291,8 @@ couchTests.attachments= function(debug) { _attachments:{ "foo.txt": { content_type:"text/plain", - data: "VGhpcyBpcyBhIGJhc2U2NCBlbmNvZGVkIHRleHQ=" + data: "VGhpcyBpcyBhIGJhc2U2NCBlbmNvZGVkIHRleHQ=", + length: "This is a base64 encoded text".length } } }; From 0e06697fa01c8eb42d74985f3bd45135db89e5a6 Mon Sep 17 00:00:00 2001 From: Jan Lehnardt Date: Fri, 13 Jul 2018 16:25:18 +0200 Subject: [PATCH 3/7] fix comment --- rel/overlay/etc/default.ini | 2 +- src/couch/src/couch_doc.erl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini index e37cba123d7..1d61fffacf6 100644 --- a/rel/overlay/etc/default.ini +++ b/rel/overlay/etc/default.ini @@ -135,7 +135,7 @@ enable_xframe_options = false ; x_forwarded_proto = X-Forwarded-Proto ; x_forwarded_ssl = X-Forwarded-Ssl ; Maximum allowed http request size. Applies to both clustered and local port. -max_http_request_size = 4294967296 ; 2 GB +max_http_request_size = 4294967296 ; 4 GB ; [httpd_design_handlers] ; _view = diff --git a/src/couch/src/couch_doc.erl b/src/couch/src/couch_doc.erl index 601a6f062c4..56020bb0985 100644 --- a/src/couch/src/couch_doc.erl +++ b/src/couch/src/couch_doc.erl @@ -147,7 +147,7 @@ from_json_obj_validate(EJson, DbName) -> % make sure it is < max_http_request_size validate_total_document_size(#doc{id=DocId, body=Body, atts=Atts0}=Doc) -> couch_log:debug("~nData: ~p~n", [Doc]), - MaxReqSize = config:get_integer("httpd", "max_http_request_size", 4294967296), % 2 GB + MaxReqSize = config:get_integer("httpd", "max_http_request_size", 4294967296), % 4 GB Boundary = couch_uuids:random(), % mock boundary, is only used for the length Atts = lists:map(fun couch_att:to_tuple/1, Atts0), {_, DocSum} = couch_httpd_multipart:length_multipart_stream(Boundary, From 616ec02aefbddff4bb37f87d2edf2539127c40f7 Mon Sep 17 00:00:00 2001 From: Jan Lehnardt Date: Fri, 13 Jul 2018 16:25:31 +0200 Subject: [PATCH 4/7] hardcode mock uuid --- src/couch/src/couch_doc.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/couch/src/couch_doc.erl b/src/couch/src/couch_doc.erl index 56020bb0985..e62b58eadd4 100644 --- a/src/couch/src/couch_doc.erl +++ b/src/couch/src/couch_doc.erl @@ -148,7 +148,7 @@ from_json_obj_validate(EJson, DbName) -> validate_total_document_size(#doc{id=DocId, body=Body, atts=Atts0}=Doc) -> couch_log:debug("~nData: ~p~n", [Doc]), MaxReqSize = config:get_integer("httpd", "max_http_request_size", 4294967296), % 4 GB - Boundary = couch_uuids:random(), % mock boundary, is only used for the length + Boundary = <<"d07e231b4fc27759fd822449377fcba7">>, Atts = lists:map(fun couch_att:to_tuple/1, Atts0), {_, DocSum} = couch_httpd_multipart:length_multipart_stream(Boundary, ?JSON_ENCODE(Body), Atts), From 797c6d7b2fa00d2d4a196c39cfe27798aaf95560 Mon Sep 17 00:00:00 2001 From: Jan Lehnardt Date: Fri, 13 Jul 2018 16:58:45 +0200 Subject: [PATCH 5/7] remove debug log --- src/couch/src/couch_doc.erl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/couch/src/couch_doc.erl b/src/couch/src/couch_doc.erl index e62b58eadd4..465d3d72f4b 100644 --- a/src/couch/src/couch_doc.erl +++ b/src/couch/src/couch_doc.erl @@ -146,7 +146,6 @@ from_json_obj_validate(EJson, DbName) -> % sum up the json body size + attachment body size and % make sure it is < max_http_request_size validate_total_document_size(#doc{id=DocId, body=Body, atts=Atts0}=Doc) -> - couch_log:debug("~nData: ~p~n", [Doc]), MaxReqSize = config:get_integer("httpd", "max_http_request_size", 4294967296), % 4 GB Boundary = <<"d07e231b4fc27759fd822449377fcba7">>, Atts = lists:map(fun couch_att:to_tuple/1, Atts0), From e6ae447368d1f8683846fffcbd6283cb11ac63a5 Mon Sep 17 00:00:00 2001 From: Jan Lehnardt Date: Fri, 13 Jul 2018 17:55:22 +0200 Subject: [PATCH 6/7] smarter uuid --- src/couch/src/couch_doc.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/couch/src/couch_doc.erl b/src/couch/src/couch_doc.erl index 465d3d72f4b..508603ae01b 100644 --- a/src/couch/src/couch_doc.erl +++ b/src/couch/src/couch_doc.erl @@ -147,7 +147,7 @@ from_json_obj_validate(EJson, DbName) -> % make sure it is < max_http_request_size validate_total_document_size(#doc{id=DocId, body=Body, atts=Atts0}=Doc) -> MaxReqSize = config:get_integer("httpd", "max_http_request_size", 4294967296), % 4 GB - Boundary = <<"d07e231b4fc27759fd822449377fcba7">>, + Boundary = <<"00000000000000000000000000000000">>, Atts = lists:map(fun couch_att:to_tuple/1, Atts0), {_, DocSum} = couch_httpd_multipart:length_multipart_stream(Boundary, ?JSON_ENCODE(Body), Atts), From 244442cd1d81af8a180e0ed74372d84eca6b909a Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Fri, 13 Jul 2018 12:33:04 -0400 Subject: [PATCH 7/7] Better total (body + attachments) size checking for documents Use the already computed (conservative) body size. Switch multipart length calcuation to accept body and boundary sizes. Issue #1200 Issue #1253 --- src/couch/src/couch_doc.erl | 20 +++++++++++++------- src/couch/src/couch_httpd_multipart.erl | 15 ++++++++------- 2 files changed, 21 insertions(+), 14 deletions(-) diff --git a/src/couch/src/couch_doc.erl b/src/couch/src/couch_doc.erl index 508603ae01b..c55725a3e9b 100644 --- a/src/couch/src/couch_doc.erl +++ b/src/couch/src/couch_doc.erl @@ -133,10 +133,11 @@ from_json_obj_validate(EJson) -> from_json_obj_validate(EJson, DbName) -> MaxSize = config:get_integer("couchdb", "max_document_size", 4294967296), Doc = from_json_obj(EJson, DbName), - case couch_ejson_size:encoded_size(Doc#doc.body) =< MaxSize of + BodySize = couch_ejson_size:encoded_size(Doc#doc.body), + case BodySize =< MaxSize of true -> validate_attachment_sizes(Doc#doc.atts), - validate_total_document_size(Doc), + validate_total_document_size(Doc, BodySize), Doc; false -> throw({request_entity_too_large, Doc#doc.id}) @@ -145,12 +146,11 @@ from_json_obj_validate(EJson, DbName) -> % sum up the json body size + attachment body size and % make sure it is < max_http_request_size -validate_total_document_size(#doc{id=DocId, body=Body, atts=Atts0}=Doc) -> +validate_total_document_size(#doc{id=DocId, atts=Atts0}=Doc, BodySize) -> MaxReqSize = config:get_integer("httpd", "max_http_request_size", 4294967296), % 4 GB - Boundary = <<"00000000000000000000000000000000">>, Atts = lists:map(fun couch_att:to_tuple/1, Atts0), - {_, DocSum} = couch_httpd_multipart:length_multipart_stream(Boundary, - ?JSON_ENCODE(Body), Atts), + {_, DocSum} = couch_httpd_multipart:length_multipart_stream(32, BodySize, + Atts), case DocSum =< MaxReqSize of true -> ok; false -> throw({request_entity_too_large, DocId}) @@ -436,7 +436,13 @@ merge_stubs(#doc{id=Id,atts=MemBins}=StubsDoc, #doc{atts=DiskBins}) -> len_doc_to_multi_part_stream(Boundary, JsonBytes, Atts, SendEncodedAtts) -> AttsToInclude = lists:filter(fun(Att) -> not couch_att:is_stub(Att) end, Atts), AttsDecoded = decode_attributes(AttsToInclude, SendEncodedAtts), - couch_httpd_multipart:length_multipart_stream(Boundary, JsonBytes, AttsDecoded). + case couch_httpd_multipart:length_multipart_stream(byte_size(Boundary), + iolist_size(JsonBytes), AttsDecoded) of + {json, Len} -> + {<<"application/json">>, Len}; + {multipart, Len} -> + {<<"multipart/related; boundary=\"", Boundary/binary, "\"">>, Len} + end. doc_to_multi_part_stream(Boundary, JsonBytes, Atts, WriteFun, diff --git a/src/couch/src/couch_httpd_multipart.erl b/src/couch/src/couch_httpd_multipart.erl index 33795a3a1bf..e0b313e77d3 100644 --- a/src/couch/src/couch_httpd_multipart.erl +++ b/src/couch/src/couch_httpd_multipart.erl @@ -263,14 +263,15 @@ atts_to_mp([{Att, Name, Len, Type, Encoding} | RestAtts], Boundary, WriteFun, WriteFun(<<"\r\n--", Boundary/binary>>), atts_to_mp(RestAtts, Boundary, WriteFun, AttFun). -length_multipart_stream(Boundary, JsonBytes, Atts) -> +length_multipart_stream(BoundarySize, JsonByteSize, Atts) when + is_integer(BoundarySize), is_integer(JsonByteSize) -> AttsSize = lists:foldl(fun({_Att, Name, Len, Type, Encoding}, AccAttsSize) -> AccAttsSize + 4 + % "\r\n\r\n" length(integer_to_list(Len)) + Len + 4 + % "\r\n--" - size(Boundary) + + BoundarySize + % attachment headers % (the length of the Content-Length has already been set) size(Name) + @@ -287,15 +288,15 @@ length_multipart_stream(Boundary, JsonBytes, Atts) -> end end, 0, Atts), if AttsSize == 0 -> - {<<"application/json">>, iolist_size(JsonBytes)}; + {json, JsonByteSize}; true -> - {<<"multipart/related; boundary=\"", Boundary/binary, "\"">>, + {multipart, 2 + % "--" - size(Boundary) + + BoundarySize + 36 + % "\r\ncontent-type: application/json\r\n\r\n" - iolist_size(JsonBytes) + + JsonByteSize + 4 + % "\r\n--" - size(Boundary) + + BoundarySize + + AttsSize + 2 % "--" }