Skip to content

Commit

Permalink
Merge pull request #645 from solita/feature/AE-2362
Browse files Browse the repository at this point in the history
Add new internal endpoint for updating public csv data into S3
  • Loading branch information
etahto authored Jan 20, 2025
2 parents e8d0c8d + 5eb8877 commit c9a399f
Show file tree
Hide file tree
Showing 8 changed files with 307 additions and 54 deletions.
4 changes: 2 additions & 2 deletions etp-core/etp-backend/src/dev/clj/user.clj
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,6 @@
(defn generate-aineisto!
"Generates the aineisto with `aineisto-id` into aws-s3-client"
[aineisto-id]
(require 'solita.etp.service.aineisto)
((resolve 'solita.etp.service.aineisto/update-aineisto-in-s3!)
(require 'solita.etp.service.csv-to-s3)
((resolve 'solita.etp.service.csv-to-s3/update-aineisto-in-s3!)
(db 2) {:id -5 :rooli -1} (aws-s3-client) aineisto-id))
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
[solita.etp.schema.common :as common-schema]
[solita.etp.service.aineisto :as aineisto-service]
[solita.etp.service.rooli :as rooli-service]
[solita.etp.service.csv-to-s3 :as csv-to-s3-service]
[solita.etp.service.kayttaja :as kayttaja-service]))

(defn first-address [x-forwarded-for]
Expand Down Expand Up @@ -65,7 +66,7 @@
:handler (fn [{:keys [db whoami aws-s3-client]}]
(r/response
(concurrent/run-background
#(aineisto-service/update-aineistot-in-s3!
#(csv-to-s3-service/update-aineistot-in-s3!
db
whoami
aws-s3-client)
Expand Down
22 changes: 22 additions & 0 deletions etp-core/etp-backend/src/main/clj/solita/etp/api/public_csv.clj
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
(ns solita.etp.api.public-csv
(:require [ring.util.response :as r]
[solita.etp.security :as security]
[solita.etp.service.concurrent :as concurrent]
[solita.etp.service.csv-to-s3 :as csv-to-s3]))


(def internal-routes
[["/public-csv"
["/update"
{:post {:summary "Päivitä julkiset CSV-tiedostot S3:ssa."
:middleware [[security/wrap-db-application-name]]
:responses {200 {:body nil}}
:handler (fn [{:keys [db whoami aws-s3-client]}]
(r/response
(concurrent/run-background
#(csv-to-s3/update-public-csv-in-s3!
db
whoami
aws-s3-client
{:where nil})
"Public csv update failed")))}}]]])
4 changes: 3 additions & 1 deletion etp-core/etp-backend/src/main/clj/solita/etp/handler.clj
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
[ring.middleware.cookies :as cookies]
[schema.coerce]
[schema.core]
[solita.etp.api.public-csv :as public-csv-api]
[schema.core :as s]
[solita.etp.api.aineisto :as aineisto-api]
[solita.etp.api.energiatodistus :as energiatodistus-api]
Expand Down Expand Up @@ -169,7 +170,8 @@
(concat (tag "Laskutus API" laskutus-api/routes)
(tag "Laatija Internal API" laatija-api/internal-routes)
(tag "Energiatodistus Internal API" energiatodistus-api/internal-routes)
(tag "Aineisto Internal API" aineisto-api/internal-routes))]]
(tag "Aineisto Internal API" aineisto-api/internal-routes)
(tag "Public csv Internal API" public-csv-api/internal-routes))]]
(when config/allow-palveluvayla-api
["/palveluvayla" ["/openapi.json" {:get {:no-doc true :openapi {:info {:title "Energiatodistuspalvelu API" :description "Hae energiatodistuksia pdf tai json muodoissa"}
:id "Palveluväylä"}
Expand Down
48 changes: 13 additions & 35 deletions etp-core/etp-backend/src/main/clj/solita/etp/service/aineisto.clj
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
(ns solita.etp.service.aineisto
(:require
[clojure.java.jdbc :as jdbc]
[clojure.network.ip :as ip]
[clojure.tools.logging :as log]
[solita.etp.service.file :as file]
[solita.etp.service.energiatodistus-csv :as energiatodistus-csv]
[solita.etp.db :as db]
[solita.etp.service.luokittelu :as luokittelu-service])
[clojure.java.jdbc :as jdbc]
[clojure.network.ip :as ip]
[clojure.tools.logging :as log]
[solita.etp.service.file :as file]
[solita.etp.service.energiatodistus-csv :as energiatodistus-csv]
[solita.etp.db :as db]
[solita.etp.service.luokittelu :as luokittelu-service])
(:import (java.time Instant)
(java.nio.charset StandardCharsets)
(java.nio ByteBuffer)))
Expand Down Expand Up @@ -89,31 +89,9 @@

(defn aineisto-reducible-query [db whoami aineisto-id]
(as-> aineisto-id val
(aineisto-key val)
(aineisto-sources val)
(not-nil-aineisto-source val)
(val db whoami)))

(defn update-aineisto-in-s3! [db whoami aws-s3-client aineisto-id]
(log/info (str "Starting updating of aineisto (id: " aineisto-id ")."))
(let [csv-reducible-query (aineisto-reducible-query db whoami aineisto-id)
key (str "/api/signed/aineistot/" aineisto-id "/energiatodistukset.csv")
;; This part is used to store rows until it reaches 5MB which
;; is the minimum requirement by `upload-part-fn`.
current-part (ByteBuffer/allocate (* 8 1024 1024))
upload-parts-fn (fn [upload-part-fn]
(csv-reducible-query (fn [^String row]
(let [row-bytes (.getBytes row StandardCharsets/UTF_8)]
(.put current-part row-bytes)
(when (< (* 5 1024 1024) (.position current-part))
(upload-part-fn (extract-byte-array-and-reset! current-part))))))
;;The last part needs to be uploaded separately (unless the size was a multiple of 5MB)
(when (not= 0 (.position current-part))
(upload-part-fn (extract-byte-array-and-reset! current-part))))]
(file/upsert-file-in-parts aws-s3-client key upload-parts-fn)
(log/info (str "Updating of aineisto (id: " aineisto-id ") finished."))))

(defn update-aineistot-in-s3! [db whoami aws-s3-client]
(update-aineisto-in-s3! db whoami aws-s3-client 1)
(update-aineisto-in-s3! db whoami aws-s3-client 2)
(update-aineisto-in-s3! db whoami aws-s3-client 3))
(aineisto-key val)
(aineisto-sources val)
(not-nil-aineisto-source val)
(val db whoami)))


58 changes: 58 additions & 0 deletions etp-core/etp-backend/src/main/clj/solita/etp/service/csv_to_s3.clj
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
(ns solita.etp.service.csv-to-s3
(:require [clojure.tools.logging :as log]
[solita.etp.service.energiatodistus-csv :as energiatodistus-csv]
[solita.etp.service.file :as file]
[solita.etp.service.aineisto :as aineisto-service])
(:import [java.nio ByteBuffer]
[java.nio.charset StandardCharsets]))

(def ^:private buffer-size (* 8 1024 1024)) ; 8MB
(def ^:private upload-threshold (* 5 1024 1024)) ; 5MB

(defn- create-buffer []
(ByteBuffer/allocate buffer-size))

(def public-csv-key
"/api/public/energiatodistukset/csv/energiatodistukset.csv")

(defn aineisto-key [aineisto-id]
(str "/api/signed/aineistot/" aineisto-id "/energiatodistukset.csv"))

(defn- create-upload-parts-fn [csv-reducible-query]
(let [current-part (create-buffer)]
(fn [upload-part-fn]
(csv-reducible-query
(fn [^String row]
(let [row-bytes (.getBytes row StandardCharsets/UTF_8)]
(.put current-part row-bytes)
(when (> (.position current-part) upload-threshold)
(upload-part-fn (aineisto-service/extract-byte-array-and-reset! current-part))))))
;; Upload any remaining data
(when (not= 0 (.position current-part))
(upload-part-fn (aineisto-service/extract-byte-array-and-reset! current-part))))))

(defn- process-csv-to-s3! [aws-s3-client key csv-reducible-query log-start log-end]
(log/info log-start)
(let [upload-parts-fn (create-upload-parts-fn csv-reducible-query)]
(file/upsert-file-in-parts aws-s3-client key upload-parts-fn)
(log/info log-end)))

(defn update-aineisto-in-s3! [db whoami aws-s3-client aineisto-id]
(let [csv-query (aineisto-service/aineisto-reducible-query db whoami aineisto-id)
key (aineisto-key aineisto-id)
start-msg (str "Starting updating of aineisto (id: " aineisto-id ").")
end-msg (str "Updating of aineisto (id: " aineisto-id ") finished.")]
(process-csv-to-s3! aws-s3-client key csv-query start-msg end-msg)))

(defn update-public-csv-in-s3! [db whoami aws-s3-client query]
(let [csv-query (energiatodistus-csv/energiatodistukset-public-csv db whoami query)]
(process-csv-to-s3!
aws-s3-client
public-csv-key
csv-query
"Starting updating of public energiatodistus."
"Updating of public energiatodistus finished.")))

(defn update-aineistot-in-s3! [db whoami aws-s3-client]
(doseq [id [1 2 3]]
(update-aineisto-in-s3! db whoami aws-s3-client id)))
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
(ns solita.etp.service.aineisto-test
(:require [solita.etp.service.aineisto :as aineisto]
[solita.etp.service.csv-to-s3 :as csv-to-s3]
[solita.etp.service.file :as file]
[solita.etp.service.kayttaja :as kayttaja-service]
[solita.etp.test-data.generators :as generators]
Expand Down Expand Up @@ -99,15 +100,15 @@

(t/deftest update-aineistot-test
(t/testing "Aineistot don't exist before generating"
(t/is (false? (file/file-exists? ts/*aws-s3-client* "/api/signed/aineistot/1/energiatodistukset.csv")))
(t/is (false? (file/file-exists? ts/*aws-s3-client* "/api/signed/aineistot/2/energiatodistukset.csv")))
(t/is (false? (file/file-exists? ts/*aws-s3-client* "/api/signed/aineistot/3/energiatodistukset.csv"))))
(t/is (false? (file/file-exists? ts/*aws-s3-client* (csv-to-s3/aineisto-key 1))))
(t/is (false? (file/file-exists? ts/*aws-s3-client* (csv-to-s3/aineisto-key 2))))
(t/is (false? (file/file-exists? ts/*aws-s3-client* (csv-to-s3/aineisto-key 3)))))

(t/testing "Aineistot exist after generating"
(aineisto/update-aineistot-in-s3! ts/*db* {:id -5 :rooli 2} ts/*aws-s3-client*)
(t/is (true? (file/file-exists? ts/*aws-s3-client* "/api/signed/aineistot/1/energiatodistukset.csv")))
(t/is (true? (file/file-exists? ts/*aws-s3-client* "/api/signed/aineistot/2/energiatodistukset.csv")))
(t/is (true? (file/file-exists? ts/*aws-s3-client* "/api/signed/aineistot/3/energiatodistukset.csv"))))
(csv-to-s3/update-aineistot-in-s3! ts/*db* {:id -5 :rooli 2} ts/*aws-s3-client*)
(t/is (true? (file/file-exists? ts/*aws-s3-client* (csv-to-s3/aineisto-key 1))))
(t/is (true? (file/file-exists? ts/*aws-s3-client* (csv-to-s3/aineisto-key 2))))
(t/is (true? (file/file-exists? ts/*aws-s3-client* (csv-to-s3/aineisto-key 3)))))

(t/testing "New energiatodistus shows up correctly when updating aineistot"
(let [;; Add laatija
Expand Down Expand Up @@ -136,21 +137,21 @@

;; Update aineistot. Todistus-1 should be included after the update,
;; but todistus-2 should be not as it's not signed yet.
(aineisto/update-aineistot-in-s3! ts/*db* whoami ts/*aws-s3-client*)
(csv-to-s3/update-aineistot-in-s3! ts/*db* whoami ts/*aws-s3-client*)

;; Aineisto 1 - Test that rakennustunnus-1 exists, but that there is only one row of energiatodistukset.
(let [[first second] (get-first-two-energiatodistus-lines-from-aineisto "/api/signed/aineistot/1/energiatodistukset.csv")]
(let [[first second] (get-first-two-energiatodistus-lines-from-aineisto (csv-to-s3/aineisto-key 1))]
(t/is (true? (str/includes? first rakennustunnus-1)))
(t/is (nil? second)))

;; Aineisto 2 - Test that rakennustunnus-1 exists, but that there is only one row of energiatodistukset.
(let [[first second] (get-first-two-energiatodistus-lines-from-aineisto "/api/signed/aineistot/2/energiatodistukset.csv")]
(let [[first second] (get-first-two-energiatodistus-lines-from-aineisto (csv-to-s3/aineisto-key 2))]
(t/is (true? (str/includes? first rakennustunnus-1)))
(t/is (nil? second)))

;; Aineisto 3 - Test that one row exists and that the rakennustunnus can't be found as this set should be
;; anonymized.
(let [[first second] (get-first-two-energiatodistus-lines-from-aineisto "/api/signed/aineistot/3/energiatodistukset.csv")]
(let [[first second] (get-first-two-energiatodistus-lines-from-aineisto (csv-to-s3/aineisto-key 3))]
(t/is (false? (str/includes? first rakennustunnus-1)))
(t/is (false? (nil? first)))
(t/is (nil? second)))
Expand All @@ -159,23 +160,23 @@
(test-data.energiatodistus/sign! todistus-2-id laatija-id true)

;; Update aineistot. Now todistus-1 and todistus-2 should be in the csv.
(aineisto/update-aineistot-in-s3! ts/*db* whoami ts/*aws-s3-client*)
(csv-to-s3/update-aineistot-in-s3! ts/*db* whoami ts/*aws-s3-client*)

;; Aineisto 1 - Test that both rakennustunnus exist. It does not matter which one is which
;; as the order of them is not guaranteed.
(let [csv-et-lines (get-first-two-energiatodistus-lines-from-aineisto "/api/signed/aineistot/1/energiatodistukset.csv")]
(let [csv-et-lines (get-first-two-energiatodistus-lines-from-aineisto (csv-to-s3/aineisto-key 1))]
(t/is (true? (is-included-in-exactly-one? rakennustunnus-1 csv-et-lines)))
(t/is (true? (is-included-in-exactly-one? rakennustunnus-2 csv-et-lines))))

;; Aineisto 2 - Test that both rakennustunnus exist. It does not matter which one is which
;; as the order of them is not guaranteed.
(let [csv-et-lines (get-first-two-energiatodistus-lines-from-aineisto "/api/signed/aineistot/2/energiatodistukset.csv")]
(let [csv-et-lines (get-first-two-energiatodistus-lines-from-aineisto (csv-to-s3/aineisto-key 2))]
(t/is (true? (is-included-in-exactly-one? rakennustunnus-1 csv-et-lines)))
(t/is (true? (is-included-in-exactly-one? rakennustunnus-2 csv-et-lines))))

;; Aineisto 3 - Test that two rows exists and that either of the rakennustunnukset can't be found
;; as this set is be anonymized.
(let [[first second] (get-first-two-energiatodistus-lines-from-aineisto "/api/signed/aineistot/3/energiatodistukset.csv")]
(let [[first second] (get-first-two-energiatodistus-lines-from-aineisto (csv-to-s3/aineisto-key 3))]
;; Rakennustunnus-1 can't be found
(t/is (false? (str/includes? first rakennustunnus-1)))
(t/is (false? (str/includes? second rakennustunnus-1)))
Expand Down
Loading

0 comments on commit c9a399f

Please sign in to comment.