-
Notifications
You must be signed in to change notification settings - Fork 2
/
pdf_controller.rb
338 lines (280 loc) · 13 KB
/
pdf_controller.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
class PDFController
def index
@bates_numbering = %w{Center Left Right}
@bates_numbering << "None (merge without indexing)"
@notice = flash[:notice]
render :bates
end
def bates
# redirect back to the form page if there's no posted data.
return redirect_to '' unless params[:file] && params[:file][0]
# catch any exception and tell the user which PDF caused the exception.
begin
# set the file_name variable
# this will be used in case an exception is caught
# to state which file caused the exception.
file_name = ''
# container for the complete pdf
# the complete pdf container will hold all
# the merged pdf data.
completed_pdf = CombinePDF.new
# get the output file name
# this will be used to name the download for the client's browser
output_name = params[:bates][:output].to_s + '.pdf'
# get some paramaters that will be used while combining pages
params[:bates] ||= {}
first_page_number = params[:bates][:first_page_number] || 1
first_index_number = params[:bates][:first_index_number] || 1
# we will add an option for the stamping to ignore the first pdf
# this is useful for the court cases that use bates numbering
# (the "cover PDF" will be the briefs of submissions that contain exhibits to be bates stamped)
ignore_first_file = params[:bates][:first_pdf_is_cover]
first_page = nil
# we will pick some data up while running combining the different pdf files.
# this will be used for the table of contents later on.
pdfs_pages_count = []
pdf_dates = []
pdf_titles = []
# we will be creating title pages before each PDF file.
# the title pages will be sized using the mediabox variable
# wich will be set with the dimentions of each pdf file's first page.
mediabox = nil
# register UNICODE fonts if necessary
# in this example I will register the Hebrew font David from an existing PDF file.
unless CombinePDF::Fonts.get_font :my_new_david
# if your running Rails, consider Rails.root instead of Root
fonts = CombinePDF.new(Root.join("assets", "fonts", "david+bold.pdf").to_s).fonts(true)
# I know the first font is David regular, after using the
# ruby console and looking at the fonts array for the file.
CombinePDF.register_font_from_pdf_object :my_new_david, fonts[0]
# the second font of the array was the latin font for a newline and space... useless
# the third was the david bold. I will now add that font.
CombinePDF.register_font_from_pdf_object :my_new_david_bold, fonts[2]
end
# itiriate through the different data sent from the client's browser's web form
params[:file].each do |k,v|
# set the file_name variable in case an exception will be raised
file_name = v[:name]
# parse the pdf data
# we will use the CombinePDF.parse method which allows us
# to parse data without saving the PDF to the file system.
# our javascript encoded the file data using base64, which we will need to decode.
# (this is specific to my form which uses the HTML5 File API in this specific manner)
PL.info "parsing file: #{file_name}"
pdf_file = CombinePDF.parse(
Base64.urlsafe_decode64(
v[:data].slice( "data:application/pdf;base64,".length,
v[:data].length )) )
# we will use the pages array a few times, so in order to avoid
# recomputing the array every time, we will save it to a local variable.
pdf_file_pages = pdf_file.pages
# we will add the page count to the page count array,
# used by the table of contents
pdfs_pages_count << pdf_file_pages.length
######
# create and add title page to arrays.
if ignore_first_file && first_page.nil?
# if the first PDF file is a "cover page" PDF,
# we will not add a title, nor add the file.
# instead we will save the data in a variable to add it after we're done
pdf_dates << ""
pdf_titles << ""
first_page = pdf_file
else
# set title page mediabox size
# the mediabox data (page size) is contained in the page's
# :CropBox or :MediaBox keys (pages are Hash objects).
mediabox ||= pdf_file_pages[0][:CropBox] || pdf_file_pages[0][:MediaBox]
# create a title page unless we're only merging or there is no indexing
if params[:bates][:should_index] && params[:bates][:numbering] != 3
# create an empty page object
title_page = CombinePDF.create_page mediabox
# write the content to the title page.
# we will be using the I18n.t shortcut to write some of the data.
# the rest of the data, like the title, we got from the form.
title_page.textbox( "#{params[:bates][:title_type]} #{pdfs_pages_count.length + first_index_number - (ignore_first_file ? 2 : 1)}" ,
max_font_size: 34,
font: :my_new_david,
y: (mediabox[3] - mediabox[1])/2 ) unless params[:bates][:title_type].to_s.empty?
title_page.textbox v[:title].to_s, max_font_size: 36, font: :my_new_david_bold
title_page.textbox v[:date].to_s, max_font_size: 24, font: :my_new_david, height: (mediabox[3] - mediabox[1])/2
# we will add the page object to the completed pdf object.
# notice that page objects are created as "floating" pages,
# not attached to any specific PDF file/object.
completed_pdf << title_page
# we will add some data that will be used to create the
# table of contents at a later stage.
page_count = pdfs_pages_count.pop + 1
pdfs_pages_count << page_count
pdf_dates << v[:date].to_s
pdf_titles << v[:title].to_s
end
# now we are ready to add the pdf file data to the completed pdf object.
# there is no need to add each page, we can add the pdf as a whole.
# (it's actually faster, as the PDF page catalog isn't recomputed for each page)
completed_pdf << pdf_file
end
end
##########
# create the index pdf...
# ...unless we're only merging or there is no indexing
if params[:bates][:should_index] && params[:bates][:numbering] != 3
# set the fonts and formatting for the table of contents.
#
# also, add an empty array for the table data.
#
# the table data array will contain arrays of String objects, each one
# corresponding to a row in the table.
table_options = { font: :my_new_david,
header_font: :my_new_david_bold,
max_font_size: 12,
column_widths: (params[:bates][:date_header].to_s.empty? ? [3, 40, 4] : [3, 10, 30, 4]),
table_data: [] }
# set the table header array.
# this is an array of strings. we will use the I18n .t shortcut
# to chose the localized strings
table_options[:headers] = [ params[:bates][:number_header] , # (I18n.t :bates_pdf_file),
(params[:bates][:date_header].to_s.empty? ? nil : params[:bates][:date_header]),
params[:bates][:title_header].to_s,
params[:bates][:page_header].to_s ]
table_options[:headers].compact!
# by default, there are 25 rows per page for table pdf created by CombinePDF
# we can override this in the formatting (but we didn't).
#
# the 25 rows include 1 header row per page - so there are only 24 effective rows.
#
# we will calculate how many pages the table of contents pdf will have once completes,
# so we can add the count to the page numbers in the index.
index_page_length = pdfs_pages_count.length / 24
index_page_length += 1 if pdfs_pages_count.length % 24 > 0
# set the page number for the first entry in the table of contents.
page_number = first_page_number + index_page_length
# set the index count to 0, we will use it to change the index for each entry.
# we need a different variable in case the first PDF file is a "cover page".
index_count = 0
# itirate over the data we collected before
# and add it to the table data.
pdfs_pages_count.each_index do |i|
# add the data unless it is set to be ignored
unless ignore_first_file
# add an array of strings to the :table_data array,
# representing a row in our table.
# remember there might not be a date column.
if params[:bates][:date_header].to_s.empty?
table_options[:table_data] << [ (first_index_number + index_count).to_s,
pdf_titles[i], page_number ]
else
table_options[:table_data] << [ (first_index_number + index_count).to_s,
pdf_dates[i],
pdf_titles[i], page_number ]
end
# if the data was added to the index table, bump the index count
index_count += 1
end
# make sure future data will not be ignored
ignore_first_file = false
# add the page count to the page number, so that the next
# index's page number is up to date.
page_number += pdfs_pages_count[i]
end
# if out current locale is hebrew, which is a right to left language,
# set the direction for the table to Right-To-left (:rtl).
#
# notice that RTL text should be automatically recognized, but that
# feature isn't available (and shouldn't be available) for tables.
if params[:bates][:dir] == 'rtl'
table_options[:direction] = :rtl
end
# if there is table data, we will create an index pdf.
unless table_options[:table_data].empty?
# create the index PDF from the table data and options we have.
index_pdf = CombinePDF.create_table table_options
# We will now add the words "Table of Contents" (or the I18n equivilant)
# to the first page of our new index_pdf PDF object.
#
# the table PDF object was created by CombinePDF using writable PDF pages,
# so we have properties like .mediabox and methods like .textbox
# at our disposal.
# get the first page of the index_pdf object, we will use this reference a lot.
title_page = index_pdf.pages[0]
# write the textbox, using the mediabox page data [x,y,width,height] to place
# the text we want to write.
#
# we will use the I18n.t shortcut to choose the text to write down.
title_page.textbox params[:bates][:index_title],
{ y: ((title_page.mediabox[3] - title_page.mediabox[1])*0.91),
height: ((title_page.mediabox[3] - title_page.mediabox[1])*0.03),
font: :my_new_david,
max_font_size: 24,
text_valign: :bottom }
# now we will add the index_pdf to the BEGINING of the completed pdf.
# for this we will use the >> operator instead of the << operator.
completed_pdf >> index_pdf
end
end
#####
# number pages
# unless no numbering
if params[:bates][:numbering] != 3
# list the numbering options
numbering_options = [[:top, :bottom], [:top_left, :bottom_left], [:top_right, :bottom_right]]
# set the first visible page number to the page where numbering starts
# this assumes that the bates numbering include the numbering of the "cover page",
# yet at the same time the numbering isn't visible on the "cover page"
first_page_number += pdfs_pages_count[0] if params[:bates][:first_pdf_is_cover]
# call the page numbering method and
# add the special properties we want for the textbox
completed_pdf.number_pages({ start_at: first_page_number,
font_name: :my_new_david,
font_size: 14,
font_color: [0,0,0.4],
box_color: [0.8, 0.8, 0.8],
border_width: 1,
border_color: [0.3,0.3,0.3],
box_radius: 8,
number_location: numbering_options[ params[:bates][:numbering] ],
opacity: 0.75
})
end
####
# finish up
# add first file if it was skipped
if !first_page.nil?
completed_pdf >> first_page
end
# send the completed PDF to the client.
# if the completed PDF is empty, raise an error.
if completed_pdf.pages.length > 0
#make sure the PDF version is high enough for the opacity we used in the page numbering.
completed_pdf.version = [completed_pdf.version, 1.6].max
# we will format the PDF to a pdf file WITHOUT saving it to the file system,
# using the .to_pdf method (instead of the .save method).
# we will send the raw PDF data stream.
# on Rails use the .send_data method, setting the type of the stream and the file name for the stream.
# send_data , type: 'application/pdf', filename: output_name
# but I'm running this on Plezi, which has a similar method...
send_data completed_pdf.to_pdf, type: 'application/pdf', filename: output_name
# finish execution
return true
else
# inform the client there was an unknown error.
redirect_to '', notice: (I18n.t :bates_unknown_zero_pages_error)
end
rescue Exception => e
PL.error e
PL.error "The file causing the exception is: #{file_name}"
# if an exception was raised, tell the user which PDF caused the exception
redirect_to '', notice: ( I18n.t(:bates_file_unsupported_error) + "\n#{file_name}")
return true
end
end
def code
redirect_to 'https://github.com/boazsegev/combine_pdf_demo/blob/master/app/controllers/pdf_controller.rb'
end
protected
def link_to text, path, options={}
extra = ""
options.each {|k,v| extra << " #{k}='#{v}'"}
"<a href='#{path}'#{extra}>#{text}</a>"
end
end