From b80839007af658c6929e11ea7af6f951afb86a22 Mon Sep 17 00:00:00 2001 From: Brent Salisbury Date: Wed, 26 Jun 2024 01:33:04 -0400 Subject: [PATCH] POC: Deep Search PDF to MD file conversion Signed-off-by: Brent Salisbury --- .env.example | 3 +- .gitignore | 1 - mock/go-deepseed/deepseed-mock.go | 162 +++++++++++ mock/go-deepseed/go.mod | 34 +++ mock/go-deepseed/go.sum | 89 +++++++ src/app/api/conversion/route.ts | 125 +++++++++ src/app/api/pr/knowledge/route.ts | 2 +- src/app/api/pr/skill/route.ts | 2 +- src/app/api/upload/route.ts | 19 +- src/app/contribute/knowledgefiles/page.tsx | 19 ++ .../edit-submission/knowledge/[id]/page.tsx | 2 +- src/components/AppLayout.tsx | 3 +- .../Knowledge/FileSelectionModal.tsx | 100 +++++++ .../Contribute/Knowledge/UploadFile.tsx | 133 +++++---- src/components/Contribute/Knowledge/index.tsx | 252 ++++++++++++++++-- .../Contribute/KnowledgeFiles/index.tsx | 126 +++++++++ src/lib/api/deepsearch/index.ts | 147 ++++++++++ src/utils/fileManagerGithub.ts | 34 +++ src/utils/github.ts | 28 ++ 19 files changed, 1182 insertions(+), 99 deletions(-) create mode 100644 mock/go-deepseed/deepseed-mock.go create mode 100644 mock/go-deepseed/go.mod create mode 100644 mock/go-deepseed/go.sum create mode 100644 src/app/api/conversion/route.ts create mode 100644 src/app/contribute/knowledgefiles/page.tsx create mode 100644 src/components/Contribute/Knowledge/FileSelectionModal.tsx create mode 100644 src/components/Contribute/KnowledgeFiles/index.tsx create mode 100644 src/lib/api/deepsearch/index.ts create mode 100644 src/utils/fileManagerGithub.ts diff --git a/.env.example b/.env.example index c79f9f05..1f0da67c 100644 --- a/.env.example +++ b/.env.example @@ -9,6 +9,7 @@ IL_GRANITE_MODEL_NAME= IL_MERLINITE_API= IL_MERLINITE_MODEL_NAME= GITHUB_TOKEN= -TAXONOMY_DOCUMENTS_REPO=github.com// +NEXT_PUBLIC_TAXONOMY_DOCUMENTS_REPO=github.com// NEXT_PUBLIC_TAXONOMY_REPO_OWNER= NEXT_PUBLIC_TAXONOMY_REPO= +DEEP_SEARCH_ENDPOINT=http://localhost:8080 diff --git a/.gitignore b/.gitignore index d909b5d7..d00a474e 100644 --- a/.gitignore +++ b/.gitignore @@ -5,7 +5,6 @@ npm-debug.log .env *.env coverage -lib taxonomy config.yaml generated diff --git a/mock/go-deepseed/deepseed-mock.go b/mock/go-deepseed/deepseed-mock.go new file mode 100644 index 00000000..37e5b29d --- /dev/null +++ b/mock/go-deepseed/deepseed-mock.go @@ -0,0 +1,162 @@ +package main + +import ( + "encoding/base64" + "log" + "net/http" + "strings" + "time" + + "github.com/gin-gonic/gin" +) + +type AuthRequest struct { + Username string `json:"username"` + ApiKey string `json:"apiKey"` +} + +type AuthResponse struct { + AccessToken string `json:"access_token"` +} + +type TaskRequest struct { + FileURL []string `json:"file_url"` +} + +type TaskResponse struct { + TaskID string `json:"task_id"` + TaskStatus string `json:"task_status"` + TransactionID string `json:"transaction_id"` +} + +type Task struct { + TaskID string `json:"task_id"` + TaskStatus string `json:"task_status"` + Result struct { + TransactionID string `json:"transaction_id"` + } `json:"result"` +} + +type DocumentHash struct { + DocumentHash string `json:"document_hash"` +} + +type DocumentArtifacts struct { + DocumentPDF string `json:"document_pdf"` + DocumentMD string `json:"document_md"` + DocumentJSON string `json:"document_json"` + PageImages []struct { + PageNo int `json:"page_no"` + URL string `json:"url"` + } `json:"page_images"` +} + +var token = "mock-token" +var taskID = "mock-task-id" +var transactionID = "mock-transaction-id" +var documentHash = "mock-document-hash" + +func main() { + router := gin.Default() + + router.POST("/api/cps/user/v1/user/token", authenticate) + router.POST("/api/cps/public/v1/project/:projKey/data_indices/:indexKey/actions/ccs_convert_upload", launchConvert) + router.GET("/api/cps/public/v2/project/:projKey/celery_tasks/:taskId", waitForTask) + router.GET("/api/cps/public/v2/project/:projKey/data_indices/:indexKey/documents/transactions/:transactionId", getDocumentHashes) + router.GET("/api/cps/public/v2/project/:projKey/data_indices/:indexKey/documents/:documentHash/artifacts", getDocumentArtifacts) + + router.Run(":8080") +} + +func authenticate(c *gin.Context) { + var authRequest AuthRequest + authHeader := c.GetHeader("Authorization") + if strings.HasPrefix(authHeader, "Basic ") { + encoded := strings.TrimPrefix(authHeader, "Basic ") + decoded, _ := base64.StdEncoding.DecodeString(encoded) + parts := strings.Split(string(decoded), ":") + authRequest.Username = parts[0] + authRequest.ApiKey = parts[1] + } + + authResponse := AuthResponse{AccessToken: token} + c.JSON(http.StatusOK, authResponse) +} + +func launchConvert(c *gin.Context) { + var taskRequest TaskRequest + + if err := c.BindJSON(&taskRequest); err != nil { + c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()}) + return + } + + if len(taskRequest.FileURL) > 0 { + fileURL := taskRequest.FileURL[0] + if strings.HasPrefix(fileURL, "data:application/pdf;base64,") { + base64Data := strings.TrimPrefix(fileURL, "data:application/pdf;base64,") + if len(base64Data) > 20 { + log.Printf("Received base64 string (last 20 chars): %s", base64Data[len(base64Data)-20:]) + } else { + log.Printf("Received base64 string: %s", base64Data) + } + } else { + log.Printf("FileURL does not start with expected base64 prefix") + } + } else { + log.Printf("No file URL received in request") + } + + taskResponse := TaskResponse{ + TaskID: taskID, + TaskStatus: "PENDING", + TransactionID: transactionID, + } + + log.Printf("taskRequest -> %v", c.Request) + log.Printf("taskResponse -> %v", taskResponse) + + c.JSON(http.StatusOK, taskResponse) +} + +func waitForTask(c *gin.Context) { + taskID := c.Param("taskId") + status := "PENDING" + + // Simulate task completion after a delay + time.Sleep(15 * time.Second) + status = "SUCCESS" + + task := Task{ + TaskID: taskID, + TaskStatus: status, + Result: struct { + TransactionID string `json:"transaction_id"` + }{ + TransactionID: transactionID, + }, + } + + c.JSON(http.StatusOK, task) +} + +func getDocumentHashes(c *gin.Context) { + documentHashes := []DocumentHash{{DocumentHash: documentHash}} + c.JSON(http.StatusOK, gin.H{"documents": documentHashes}) +} + +func getDocumentArtifacts(c *gin.Context) { + documentArtifacts := DocumentArtifacts{ + DocumentPDF: "mock-document-pdf", + DocumentMD: "# This is a mock markdown content\n\nSample content for the markdown file.", + DocumentJSON: "mock-document-json", + PageImages: []struct { + PageNo int `json:"page_no"` + URL string `json:"url"` + }{ + {PageNo: 1, URL: "mock-url-1"}, + {PageNo: 2, URL: "mock-url-2"}, + }, + } + c.JSON(http.StatusOK, gin.H{"artifacts": documentArtifacts}) +} diff --git a/mock/go-deepseed/go.mod b/mock/go-deepseed/go.mod new file mode 100644 index 00000000..42ce00d7 --- /dev/null +++ b/mock/go-deepseed/go.mod @@ -0,0 +1,34 @@ +module mock-deepseed + +go 1.22.1 + +require github.com/gin-gonic/gin v1.10.0 + +require ( + github.com/bytedance/sonic v1.11.6 // indirect + github.com/bytedance/sonic/loader v0.1.1 // indirect + github.com/cloudwego/base64x v0.1.4 // indirect + github.com/cloudwego/iasm v0.2.0 // indirect + github.com/gabriel-vasile/mimetype v1.4.3 // indirect + github.com/gin-contrib/sse v0.1.0 // indirect + github.com/go-playground/locales v0.14.1 // indirect + github.com/go-playground/universal-translator v0.18.1 // indirect + github.com/go-playground/validator/v10 v10.20.0 // indirect + github.com/goccy/go-json v0.10.2 // indirect + github.com/json-iterator/go v1.1.12 // indirect + github.com/klauspost/cpuid/v2 v2.2.7 // indirect + github.com/leodido/go-urn v1.4.0 // indirect + github.com/mattn/go-isatty v0.0.20 // indirect + github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.2 // indirect + github.com/pelletier/go-toml/v2 v2.2.2 // indirect + github.com/twitchyliquid64/golang-asm v0.15.1 // indirect + github.com/ugorji/go/codec v1.2.12 // indirect + golang.org/x/arch v0.8.0 // indirect + golang.org/x/crypto v0.23.0 // indirect + golang.org/x/net v0.25.0 // indirect + golang.org/x/sys v0.20.0 // indirect + golang.org/x/text v0.15.0 // indirect + google.golang.org/protobuf v1.34.1 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect +) diff --git a/mock/go-deepseed/go.sum b/mock/go-deepseed/go.sum new file mode 100644 index 00000000..7f08abb2 --- /dev/null +++ b/mock/go-deepseed/go.sum @@ -0,0 +1,89 @@ +github.com/bytedance/sonic v1.11.6 h1:oUp34TzMlL+OY1OUWxHqsdkgC/Zfc85zGqw9siXjrc0= +github.com/bytedance/sonic v1.11.6/go.mod h1:LysEHSvpvDySVdC2f87zGWf6CIKJcAvqab1ZaiQtds4= +github.com/bytedance/sonic/loader v0.1.1 h1:c+e5Pt1k/cy5wMveRDyk2X4B9hF4g7an8N3zCYjJFNM= +github.com/bytedance/sonic/loader v0.1.1/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4yY2JpfqGeCtNLU= +github.com/cloudwego/base64x v0.1.4 h1:jwCgWpFanWmN8xoIUHa2rtzmkd5J2plF/dnLS6Xd/0Y= +github.com/cloudwego/base64x v0.1.4/go.mod h1:0zlkT4Wn5C6NdauXdJRhSKRlJvmclQ1hhJgA0rcu/8w= +github.com/cloudwego/iasm v0.2.0 h1:1KNIy1I1H9hNNFEEH3DVnI4UujN+1zjpuk6gwHLTssg= +github.com/cloudwego/iasm v0.2.0/go.mod h1:8rXZaNYT2n95jn+zTI1sDr+IgcD2GVs0nlbbQPiEFhY= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/gabriel-vasile/mimetype v1.4.3 h1:in2uUcidCuFcDKtdcBxlR0rJ1+fsokWf+uqxgUFjbI0= +github.com/gabriel-vasile/mimetype v1.4.3/go.mod h1:d8uq/6HKRL6CGdk+aubisF/M5GcPfT7nKyLpA0lbSSk= +github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE= +github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI= +github.com/gin-gonic/gin v1.10.0 h1:nTuyha1TYqgedzytsKYqna+DfLos46nTv2ygFy86HFU= +github.com/gin-gonic/gin v1.10.0/go.mod h1:4PMNQiOhvDRa013RKVbsiNwoyezlm2rm0uX/T7kzp5Y= +github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s= +github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4= +github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA= +github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY= +github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY= +github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY= +github.com/go-playground/validator/v10 v10.20.0 h1:K9ISHbSaI0lyB2eWMPJo+kOS/FBExVwjEviJTixqxL8= +github.com/go-playground/validator/v10 v10.20.0/go.mod h1:dbuPbCMFw/DrkbEynArYaCwl3amGuJotoKCe95atGMM= +github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU= +github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= +github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU= +github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= +github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= +github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= +github.com/klauspost/cpuid/v2 v2.2.7 h1:ZWSB3igEs+d0qvnxR/ZBzXVmxkgt8DdzP6m9pfuVLDM= +github.com/klauspost/cpuid/v2 v2.2.7/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= +github.com/knz/go-libedit v1.10.1/go.mod h1:MZTVkCWyz0oBc7JOWP3wNAzd002ZbM/5hgShxwh4x8M= +github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ= +github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI= +github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= +github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= +github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/pelletier/go-toml/v2 v2.2.2 h1:aYUidT7k73Pcl9nb2gScu7NSrKCSHIDE89b3+6Wq+LM= +github.com/pelletier/go-toml/v2 v2.2.2/go.mod h1:1t835xjRzz80PqgE6HHgN2JOsmgYu/h4qDAS4n929Rs= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= +github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI= +github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08= +github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE= +github.com/ugorji/go/codec v1.2.12/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg= +golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8= +golang.org/x/arch v0.8.0 h1:3wRIsP3pM4yUptoR96otTUOXI367OS0+c9eeRi9doIc= +golang.org/x/arch v0.8.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys= +golang.org/x/crypto v0.23.0 h1:dIJU/v2J8Mdglj/8rJ6UUOM3Zc9zLZxVZwwxMooUSAI= +golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8= +golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac= +golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM= +golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y= +golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/text v0.15.0 h1:h1V/4gjBv8v9cjcR6+AR5+/cIYK5N/WAgiv4xlsEtAk= +golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/protobuf v1.34.1 h1:9ddQBjfCyZPOHPUiPxpYESBLc+T8P3E+Vo4IbKZgFWg= +google.golang.org/protobuf v1.34.1/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +nullprogram.com/x/optparse v1.0.0/go.mod h1:KdyPE+Igbe0jQUrVfMqDMeJQIJZEuyV7pjYmp6pbG50= +rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4= diff --git a/src/app/api/conversion/route.ts b/src/app/api/conversion/route.ts new file mode 100644 index 00000000..cd2dc189 --- /dev/null +++ b/src/app/api/conversion/route.ts @@ -0,0 +1,125 @@ +'use server'; + +import { NextResponse, NextRequest } from 'next/server'; +import fetch from 'node-fetch'; + +export async function POST(req: NextRequest) { + const { repoUrl, documentNames } = await req.json(); + const USERNAME = process.env.DS_USERNAME; + const API_KEY = process.env.DS_API_KEY; + const HOST = process.env.DS_HOST; + const PROJ_KEY = process.env.DS_PROJ_KEY; + const BRANCH = 'main'; + + if (!USERNAME || !API_KEY || !HOST || !PROJ_KEY) { + console.error('Missing environment variables'); + return NextResponse.json({ error: 'Missing environment variables' }, { status: 500 }); + } + + const pdfFileName = documentNames.find((name) => name.endsWith('.pdf')); + if (!pdfFileName) { + console.error('No PDF file found for conversion'); + return NextResponse.json({ error: 'No PDF file found for conversion' }, { status: 400 }); + } + + const [repoOwner, repoName] = repoUrl.replace('https://github.com/', '').split('/'); + const PDF_URL = `https://raw.githubusercontent.com/${repoOwner}/${repoName}/${BRANCH}/${pdfFileName}`; + console.log(`PDF URL for conversion: ${PDF_URL}`); + + try { + console.log('Starting authentication...'); + const authResponse = await fetch(`${HOST}/api/cps/user/v1/user/token`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + Authorization: `Basic ${Buffer.from(`${USERNAME}:${API_KEY}`).toString('base64')}` + }, + body: JSON.stringify({}) + }); + + if (!authResponse.ok) { + const error = await authResponse.text(); + console.error('Error during authentication:', error); + return NextResponse.json({ error }, { status: authResponse.status }); + } + + const authData = await authResponse.json(); + const token = authData.access_token; + console.log('Authentication successful. Token obtained.'); + + console.log('Starting PDF conversion...'); + const convertResponse = await fetch(`${HOST}/api/cps/public/v2/project/${PROJ_KEY}/convert`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + Authorization: token + }, + body: JSON.stringify({ + http_source: { url: PDF_URL, headers: {} } + }) + }); + + if (!convertResponse.ok) { + const error = await convertResponse.text(); + console.error('Error during PDF conversion:', error); + return NextResponse.json({ error }, { status: convertResponse.status }); + } + + const convertData = await convertResponse.json(); + const taskId = convertData.task_id; + console.log(`PDF conversion started. Task ID: ${taskId}`); + + console.log('Checking conversion task status...'); + let taskStatus; + while (true) { + const taskResponse = await fetch(`${HOST}/api/cps/public/v2/project/${PROJ_KEY}/convert_tasks/${taskId}?wait=10`, { + method: 'GET', + headers: { + Authorization: token + } + }); + + if (!taskResponse.ok) { + const error = await taskResponse.text(); + console.error('Error during task status check:', error); + return NextResponse.json({ error }, { status: taskResponse.status }); + } + + const taskText = await taskResponse.text(); + try { + taskStatus = JSON.parse(taskText); + } catch (parseError) { + console.error('Error parsing task status response:', taskText); + return NextResponse.json({ error: 'Failed to parse task status response' }, { status: 500 }); + } + + console.log(`Task status: ${taskStatus.task_status}`); + + if (taskStatus.result && ['SUCCESS', 'FAILURE'].includes(taskStatus.task_status)) { + break; + } + await new Promise((resolve) => setTimeout(resolve, 10000)); // Wait for 10 seconds before polling again + } + + if (taskStatus.task_status === 'FAILURE') { + console.error('PDF Conversion Task failed.'); + return NextResponse.json({ error: 'PDF Conversion Task failed' }, { status: 500 }); + } + + const result = { + json_file_url: taskStatus.result.json_file_url, + md_file_url: taskStatus.result.md_file_url, + document_hash: taskStatus.result.document_hash + }; + + console.log('Task completed successfully.'); + console.log(`JSON file URL: ${result.json_file_url}`); + console.log(`Markdown file URL: ${result.md_file_url}`); + console.log(`Document hash: ${result.document_hash}`); + + return NextResponse.json(result); + } catch (error) { + console.error('Unexpected error:', error); + return NextResponse.json({ error: error.message }, { status: 500 }); + } +} diff --git a/src/app/api/pr/knowledge/route.ts b/src/app/api/pr/knowledge/route.ts index b8bf2d33..d84f61a6 100644 --- a/src/app/api/pr/knowledge/route.ts +++ b/src/app/api/pr/knowledge/route.ts @@ -12,7 +12,7 @@ const BASE_BRANCH = 'main'; export async function POST(req: NextRequest) { const token = await getToken({ req, secret: process.env.NEXTAUTH_SECRET! }); - console.log('GitHub Token:', token); + // console.log('GitHub Token:', token); if (!token || !token.accessToken) { console.error('Unauthorized: Missing or invalid access token'); diff --git a/src/app/api/pr/skill/route.ts b/src/app/api/pr/skill/route.ts index 9c29dda9..133240e1 100644 --- a/src/app/api/pr/skill/route.ts +++ b/src/app/api/pr/skill/route.ts @@ -12,7 +12,7 @@ const BASE_BRANCH = 'main'; export async function POST(req: NextRequest) { const token = await getToken({ req, secret: process.env.NEXTAUTH_SECRET! }); - console.log('GitHub Token:', token); + // console.log('GitHub Token:', token); if (!token || !token.accessToken) { console.error('Unauthorized: Missing or invalid access token'); diff --git a/src/app/api/upload/route.ts b/src/app/api/upload/route.ts index b1e2dc78..e9406199 100644 --- a/src/app/api/upload/route.ts +++ b/src/app/api/upload/route.ts @@ -4,12 +4,12 @@ import { getToken } from 'next-auth/jwt'; import { NextRequest } from 'next/server'; const GITHUB_API_URL = 'https://api.github.com'; -const TAXONOMY_DOCUMENTS_REPO = process.env.TAXONOMY_DOCUMENTS_REPO!; +const TAXONOMY_DOCUMENTS_REPO = process.env.NEXT_PUBLIC_TAXONOMY_DOCUMENTS_REPO!; const BASE_BRANCH = 'main'; export async function POST(req: NextRequest) { const token = await getToken({ req, secret: process.env.NEXTAUTH_SECRET! }); - console.log('GitHub Token:', token); + // console.log('GitHub Token:', token); if (!token || !token.accessToken) { console.error('Unauthorized: Missing or invalid access token'); @@ -46,9 +46,6 @@ export async function POST(req: NextRequest) { if (!repoForked) { // Fork the repository if it is not already forked await forkRepo(headers, repoOwner, repoName, githubUsername); - // Add a delay to ensure the fork operation completes to avoid a race condition when retrieving the bas SHA - // This only occurs if this is the first time submitting and the fork isn't present. - // TODO change to a retry console.log('Pause 5s for the forking operation to complete'); await new Promise((resolve) => setTimeout(resolve, 5000)); console.log('Repository forked'); @@ -64,7 +61,8 @@ export async function POST(req: NextRequest) { const [name, extension] = file.fileName.split(/\.(?=[^.]+$)/); return { fileName: `${name}-${timestamp}.${extension}`, - fileContent: file.fileContent + fileContent: file.fileContent, + encoding: extension === 'pdf' ? 'base64' : 'utf-8' }; }); @@ -160,7 +158,7 @@ async function createFilesCommit( owner: string, repo: string, branchName: string, - files: { fileName: string; fileContent: string }[], + files: { fileName: string; fileContent: string; encoding: string }[], userEmail: string, baseSha: string ): Promise { @@ -173,7 +171,7 @@ async function createFilesCommit( headers, body: JSON.stringify({ content: file.fileContent, - encoding: 'utf-8' + encoding: file.encoding }) }).then((response) => response.json()) ) @@ -202,12 +200,9 @@ async function createFilesCommit( } const treeData = await createTreeResponse.json(); - console.log('Tree created:', treeData); + // console.log('Tree created:', treeData); // Create commit with DCO sign-off - // TODO: if the user's github does not have an associated github email, we need to specify one in the upload section - // or reuse the one from the form. If we use the email field from the form, it needs to be null checked when - // the user clicks the upload documents button. const createCommitResponse = await fetch(`${GITHUB_API_URL}/repos/${owner}/${repo}/git/commits`, { method: 'POST', headers, diff --git a/src/app/contribute/knowledgefiles/page.tsx b/src/app/contribute/knowledgefiles/page.tsx new file mode 100644 index 00000000..47502366 --- /dev/null +++ b/src/app/contribute/knowledgefiles/page.tsx @@ -0,0 +1,19 @@ +// src/app/contribute/knowledgefiles/page.tsx +'use client'; + +import * as React from 'react'; +import '@patternfly/react-core/dist/styles/base.css'; +import { AppLayout } from '@/components/AppLayout'; +import KnowledgeFiles from '@/components/Contribute/KnowledgeFiles'; + +const repoName = process.env.NEXT_PUBLIC_TAXONOMY_DOCUMENTS_REPO!.split('/').pop()!; + +const KFiles: React.FunctionComponent = () => { + return ( + + + + ); +}; + +export default KFiles; diff --git a/src/app/edit-submission/knowledge/[id]/page.tsx b/src/app/edit-submission/knowledge/[id]/page.tsx index 309e919b..6da10ba7 100644 --- a/src/app/edit-submission/knowledge/[id]/page.tsx +++ b/src/app/edit-submission/knowledge/[id]/page.tsx @@ -446,7 +446,7 @@ Creator names: ${creators} className={useFileUpload ? 'button-active' : 'button-secondary'} onClick={() => setUseFileUpload(true)} > - Automatically Upload Documents + Upload Documents diff --git a/src/components/AppLayout.tsx b/src/components/AppLayout.tsx index 7e63005d..50afcc46 100644 --- a/src/components/AppLayout.tsx +++ b/src/components/AppLayout.tsx @@ -59,7 +59,8 @@ const AppLayout: React.FunctionComponent = ({ children }) => { label: 'Contribute', children: [ { path: '/contribute/skill', label: 'Skill' }, - { path: '/contribute/knowledge', label: 'Knowledge' } + { path: '/contribute/knowledge', label: 'Knowledge' }, + { path: '/contribute/knowledgefiles', label: 'Knowledge Files' } ] }, { diff --git a/src/components/Contribute/Knowledge/FileSelectionModal.tsx b/src/components/Contribute/Knowledge/FileSelectionModal.tsx new file mode 100644 index 00000000..2dc9772b --- /dev/null +++ b/src/components/Contribute/Knowledge/FileSelectionModal.tsx @@ -0,0 +1,100 @@ +// src/components/Contribute/Knowledge/FileSelectionModal.tsx +import React, { useEffect, useState } from 'react'; +import { Modal, Button, DataList, DataListItem, DataListItemRow, DataListCell, DataListCheck, Spinner, Alert } from '@patternfly/react-core'; +import { fetchGitHubRepoFiles } from '@/utils/fileManagerGithub'; +import { useSession } from 'next-auth/react'; +import { getGitHubUsername } from '@/utils/github'; + +interface FileSelectionModalProps { + isOpen: boolean; + onClose: () => void; + onSelectFiles: (files: string[]) => void; + repoName: string; +} + +export const FileSelectionModal: React.FC = ({ isOpen, onClose, onSelectFiles, repoName }) => { + const { data: session } = useSession(); // Get the session data from NextAuth + const [files, setFiles] = useState([]); // State for storing the list of files from the repository + const [loading, setLoading] = useState(true); // State for managing the loading state + const [error, setError] = useState(null); + const [selectedFiles, setSelectedFiles] = useState([]); // State for storing selected files + const [githubUsername, setGithubUsername] = useState(null); + + useEffect(() => { + const loadFiles = async () => { + if (!session || !session.accessToken) { + setError('Unauthorized: Missing or invalid access token'); + setLoading(false); + return; + } + + try { + const username = await getGitHubUsername(session.accessToken as string); + setGithubUsername(username); + const repoFiles = await fetchGitHubRepoFiles(session.accessToken as string); + setFiles(repoFiles); + } catch (err) { + setError('Failed to load files'); + console.error(err); + } finally { + setLoading(false); + } + }; + + loadFiles(); + }, [session]); + + const handleSelectFile = (filePath: string, isSelected: boolean) => { + setSelectedFiles((prevSelectedFiles) => (isSelected ? [...prevSelectedFiles, filePath] : prevSelectedFiles.filter((file) => file !== filePath))); + }; + + // Function to confirm the selection of files and close the modal + const handleConfirmSelection = () => { + // Pass selected files to the parent component + onSelectFiles(selectedFiles); + onClose(); + }; + + return ( + + Confirm + , + + ]} + > + {loading && } + {error && ( + + {error} + + )} + {!loading && !error && ( + + {files.map((file, index) => ( + + + handleSelectFile(file.path, checked)} + /> + + {file.path} + + + + ))} + + )} + + ); +}; + +export default FileSelectionModal; diff --git a/src/components/Contribute/Knowledge/UploadFile.tsx b/src/components/Contribute/Knowledge/UploadFile.tsx index 75eae440..a678a8dc 100644 --- a/src/components/Contribute/Knowledge/UploadFile.tsx +++ b/src/components/Contribute/Knowledge/UploadFile.tsx @@ -1,5 +1,4 @@ // src/components/Contribute/Knowledge/UploadFile.tsx -'use client'; import React, { useState, useEffect } from 'react'; import { MultipleFileUploadStatusItem, @@ -13,21 +12,31 @@ import { ExclamationTriangleIcon } from '@patternfly/react-icons/dist/dynamic/ic import { FileRejection, DropEvent } from 'react-dropzone'; import { Button } from '@patternfly/react-core/dist/dynamic/components/Button'; import { HelperText, HelperTextItem } from '@patternfly/react-core/dist/dynamic/components/HelperText'; +import { Spinner } from '@patternfly/react-core/dist/dynamic/components/Spinner'; -interface readFile { +interface ReadFile { fileName: string; data?: string; loadResult?: 'danger' | 'success'; loadError?: DOMException; } -export const UploadFile: React.FunctionComponent<{ onFilesChange: (files: File[]) => void }> = ({ onFilesChange }) => { - const [currentFiles, setCurrentFiles] = useState([]); - const [readFileData, setReadFileData] = useState([]); +interface UploadFileProps { + onFilesChange: (files: File[]) => void; + files: File[]; + isConverting: boolean; + conversionMessage: string; +} + +export const UploadFile: React.FunctionComponent = ({ onFilesChange, files, isConverting, conversionMessage }) => { + // State hooks for managing file upload state and modal state + const [currentFiles, setCurrentFiles] = useState(files || []); + const [readFileData, setReadFileData] = useState([]); const [showStatus, setShowStatus] = useState(false); const [statusIcon, setStatusIcon] = useState<'inProgress' | 'success' | 'danger'>('inProgress'); const [modalText, setModalText] = useState(''); + // Effect hook to show or hide the upload status based on current files useEffect(() => { if (currentFiles.length > 0) { setShowStatus(true); @@ -36,6 +45,7 @@ export const UploadFile: React.FunctionComponent<{ onFilesChange: (files: File[] } }, [currentFiles]); + // Effect hook to update the status icon based on the read file results useEffect(() => { if (readFileData.length < currentFiles.length) { setStatusIcon('inProgress'); @@ -46,6 +56,18 @@ export const UploadFile: React.FunctionComponent<{ onFilesChange: (files: File[] } }, [readFileData, currentFiles]); + // Effect hook to trigger the onFilesChange callback when current files are updated + useEffect(() => { + console.log('Current files updated:', currentFiles); + onFilesChange(currentFiles); + }, [currentFiles, onFilesChange]); + + // Effect hook to set current files from props + useEffect(() => { + setCurrentFiles(files); + }, [files]); + + // Function to remove files from the current file list const removeFiles = (namesOfFilesToRemove: string[]) => { const newCurrentFiles = currentFiles.filter((file) => !namesOfFilesToRemove.includes(file.name)); const newReadFiles = readFileData.filter((file) => !namesOfFilesToRemove.includes(file.fileName)); @@ -53,48 +75,7 @@ export const UploadFile: React.FunctionComponent<{ onFilesChange: (files: File[] setReadFileData(newReadFiles); }; - const handleFileDrop = (_event: DropEvent, droppedFiles: File[]) => { - const currentFileNames = currentFiles.map((file) => file.name); - const reUploads = droppedFiles.filter((file) => currentFileNames.includes(file.name)); - - const newFiles = [ - ...currentFiles.filter((file) => !reUploads.includes(file)), - ...droppedFiles.filter((file) => !currentFileNames.includes(file.name)) - ]; - setCurrentFiles(newFiles); - onFilesChange(newFiles); - }; - - const handleReadSuccess = (data: string, file: File) => { - setReadFileData((prevReadFiles) => { - const existingFile = prevReadFiles.find((readFile) => readFile.fileName === file.name); - if (existingFile) { - return prevReadFiles; - } - return [...prevReadFiles, { data, fileName: file.name, loadResult: 'success' }]; - }); - }; - - const handleReadFail = (error: DOMException, file: File) => { - setReadFileData((prevReadFiles) => { - const existingFile = prevReadFiles.find((readFile) => readFile.fileName === file.name); - if (existingFile) { - return prevReadFiles; - } - return [...prevReadFiles, { loadError: error, fileName: file.name, loadResult: 'danger' }]; - }); - }; - - const handleDropRejected = (fileRejections: FileRejection[]) => { - console.warn('Files rejected:', fileRejections); - if (fileRejections.length === 1) { - setModalText(`${fileRejections[0].file.name} is not an accepted file type`); - } else { - const rejectedMessages = fileRejections.reduce((acc, fileRejection) => (acc += `${fileRejection.file.name}, `), ''); - setModalText(`${rejectedMessages} are not accepted file types`); - } - }; - + // Function to create helper text for file upload status const createHelperText = (file: File) => { const fileResult = readFileData.find((readFile) => readFile.fileName === file.name); if (fileResult?.loadError) { @@ -113,20 +94,46 @@ export const UploadFile: React.FunctionComponent<{ onFilesChange: (files: File[] return ( <> { + const newFiles = droppedFiles.reduce( + (acc, file) => { + const index = acc.findIndex((f) => f.name === file.name); + if (index !== -1) { + acc[index] = file; // Overwrite existing file + } else { + acc.push(file); + } + return acc; + }, + [...currentFiles] + ); + + setCurrentFiles(newFiles); + console.log('Files after drop:', newFiles); + }} dropzoneProps={{ accept: { 'application/pdf': ['.pdf'], 'text/markdown': ['.md'] }, - onDropRejected: handleDropRejected + // Handle file rejection + onDropRejected: (fileRejections) => { + console.warn('Files rejected:', fileRejections); + if (fileRejections.length === 1) { + setModalText(`${fileRejections[0].file.name} is not an accepted file type`); + } else { + const rejectedMessages = fileRejections.reduce((acc, fileRejection) => (acc += `${fileRejection.file.name}, `), ''); + setModalText(`${rejectedMessages} are not accepted file types`); + } + } }} > } titleText="Drag and drop files here" titleTextSeparator="or" - infoText="Accepted file types: PDF, Markdown" + infoText="Accepted file types are PDF and Markdown. PDF files will be converted to Markdown via a backend service. All documents will be automatically stored in a fork in the user's GitHub account." /> {showStatus && ( removeFiles([file.name])} - onReadSuccess={handleReadSuccess} - onReadFail={handleReadFail} + onReadSuccess={(data, file) => { + setReadFileData((prevReadFiles) => { + const existingFile = prevReadFiles.find((readFile) => readFile.fileName === file.name); + if (existingFile) { + return prevReadFiles; + } + return [...prevReadFiles, { data, fileName: file.name, loadResult: 'success' }]; + }); + }} + onReadFail={(error, file) => { + setReadFileData((prevReadFiles) => { + const existingFile = prevReadFiles.find((readFile) => readFile.fileName === file.name); + if (existingFile) { + return prevReadFiles; + } + return [...prevReadFiles, { loadError: error, fileName: file.name, loadResult: 'danger' }]; + }); + }} progressHelperText={createHelperText(file)} /> ))} @@ -161,6 +184,12 @@ export const UploadFile: React.FunctionComponent<{ onFilesChange: (files: File[] + {isConverting && ( +
+ + {conversionMessage} +
+ )} ); }; diff --git a/src/components/Contribute/Knowledge/index.tsx b/src/components/Contribute/Knowledge/index.tsx index 64ee0da4..1dcf77d3 100644 --- a/src/components/Contribute/Knowledge/index.tsx +++ b/src/components/Contribute/Knowledge/index.tsx @@ -13,10 +13,11 @@ import { TextArea } from '@patternfly/react-core/dist/dynamic/components/TextAre import { PlusIcon, MinusCircleIcon, CodeIcon } from '@patternfly/react-icons/dist/dynamic/icons/'; import yaml from 'js-yaml'; import { validateFields, validateEmail, validateUniqueItems } from '../../../utils/validation'; -import { getGitHubUsername } from '../../../utils/github'; +import { getGitHubUsername, fetchKnowledgeFileContent } from '../../../utils/github'; import { useSession } from 'next-auth/react'; import YamlCodeModal from '../../YamlCodeModal'; import { UploadFile } from './UploadFile'; +import FileSelectionModal from './FileSelectionModal'; import { SchemaVersion } from '@/types'; export const KnowledgeForm: React.FunctionComponent = () => { @@ -68,9 +69,12 @@ export const KnowledgeForm: React.FunctionComponent = () => { const [useFileUpload, setUseFileUpload] = useState(false); const [uploadedFiles, setUploadedFiles] = useState([]); + const [selectedFiles, setSelectedFiles] = useState([]); const [isModalOpen, setIsModalOpen] = useState(false); const [yamlContent, setYamlContent] = useState(''); + const [isConverting, setIsConverting] = useState(false); + const [conversionMessage, setConversionMessage] = useState(''); const handleInputChange = (index: number, type: string, value: string) => { switch (type) { @@ -120,14 +124,17 @@ export const KnowledgeForm: React.FunctionComponent = () => { setCreators(''); setRevision(''); setUploadedFiles([]); + setSelectedFiles([]); }; const onCloseSuccessAlert = () => { setIsSuccessAlertVisible(false); + setIsConverting(false); }; const onCloseFailureAlert = () => { setIsFailureAlertVisible(false); + setIsConverting(false); }; const handleFilesChange = (files: File[]) => { @@ -135,6 +142,115 @@ export const KnowledgeForm: React.FunctionComponent = () => { setPatterns(files.map((file) => file.name).join(', ')); // Populate the patterns field }; + const handleSelectFiles = async (files: string[]) => { + console.log('Selected files:', files); // Log selected files + + try { + const downloadedFiles = await Promise.all( + files.map(async (filePath) => { + console.log('Fetching file content for:', filePath); // Log each file path before fetching + const fileBlob = await fetchKnowledgeFileContent(session!.accessToken as string, githubUsername!, filePath); + console.log('Fetched file blob:', fileBlob); // Log the fetched content + + const file = new File([fileBlob], filePath.split('/').pop()!, { type: fileBlob.type }); + console.log('Created file:', file); // Log the created file + return file; + }) + ); + + console.log('Downloaded files:', downloadedFiles); + + const pdfFiles = downloadedFiles.filter((file) => file.type === 'application/pdf' || file.name.endsWith('.pdf')); + const otherFiles = downloadedFiles.filter((file) => file.type !== 'application/pdf' && !file.name.endsWith('.pdf')); + + console.log('PDF files:', pdfFiles); + console.log('Other files:', otherFiles); + + // Handle PDF conversion + if (pdfFiles.length > 0) { + const pdfFilesBase64 = await Promise.all( + pdfFiles.map( + (file) => + new Promise<{ fileName: string; fileContent: string }>((resolve, reject) => { + const reader = new FileReader(); + reader.onload = (e) => { + console.log('File read as base64:', e.target!.result); // Log base64 content + resolve({ + fileName: file.name, + fileContent: (e.target!.result as string).split(',')[1] // Extract base64 content + }); + }; + reader.onerror = (error) => { + console.error('Error reading file as base64:', error); + reject(error); + }; + reader.readAsDataURL(file); + }) + ) + ); + + console.log('Base64 encoded PDF files:', pdfFilesBase64); + + const pdfUploadResponse = await fetch('/api/upload', { + method: 'POST', + headers: { + 'Content-Type': 'application/json' + }, + body: JSON.stringify({ files: pdfFilesBase64 }) + }); + + const pdfUploadResult = await pdfUploadResponse.json(); + if (!pdfUploadResponse.ok) { + throw new Error(pdfUploadResult.error || 'Failed to upload PDF files'); + } + + console.log('PDF upload result:', pdfUploadResult); + + const conversionResponse = await fetch('/api/conversion', { + method: 'POST', + headers: { + 'Content-Type': 'application/json' + }, + body: JSON.stringify({ repoUrl: pdfUploadResult.repoUrl, documentNames: pdfUploadResult.documentNames }) + }); + + const conversionResult = await conversionResponse.json(); + if (!conversionResponse.ok) { + throw new Error(conversionResult.error || 'Failed to convert PDF'); + } + + console.log('PDF conversion result:', conversionResult); + + const mdFileUrl = conversionResult.md_file_url; + const mdFileResponse = await fetch(mdFileUrl); + const mdFileContent = await mdFileResponse.text(); + + console.log('Converted Markdown file content:', mdFileContent); + + otherFiles.push(new File([mdFileContent], pdfFiles[0].name.replace('.pdf', '.md'), { type: 'text/markdown' })); + } + + const updatedFiles = otherFiles.reduce( + (acc, file) => { + const index = acc.findIndex((f) => f.name === file.name); + if (index !== -1) { + acc[index] = file; // Overwrite existing file + } else { + acc.push(file); + } + return acc; + }, + [...uploadedFiles] + ); + + setUploadedFiles(updatedFiles); + console.log('Successfully read file count:', downloadedFiles.length); + console.log('Current files:', updatedFiles); + } catch (error) { + console.error('Error fetching file content:', error); // Log any errors + } + }; + const handleSubmit = async (event: React.FormEvent) => { event.preventDefault(); @@ -229,7 +345,12 @@ export const KnowledgeForm: React.FunctionComponent = () => { const handleDocumentUpload = async () => { if (uploadedFiles.length > 0) { - const fileContents: { fileName: string; fileContent: string }[] = []; + setIsConverting(true); + setConversionMessage('Files are being processed...'); + + const fileContents: { fileName: string; fileContent: string; fileType: string }[] = []; + const markdownFiles: { fileName: string; fileContent: string }[] = []; + const pdfFiles: { fileName: string; fileContent: string }[] = []; await Promise.all( uploadedFiles.map( @@ -238,46 +359,110 @@ export const KnowledgeForm: React.FunctionComponent = () => { const reader = new FileReader(); reader.onload = (e) => { const fileContent = e.target!.result as string; - fileContents.push({ fileName: file.name, fileContent }); + const fileType = file.type; + + if (fileType === 'application/pdf') { + // For PDF files, extract base64 content + pdfFiles.push({ + fileName: file.name, + fileContent: fileContent.split(',')[1] + }); + } else { + // For Markdown and other text files, use the full content + markdownFiles.push({ + fileName: file.name, + fileContent + }); + } resolve(); }; reader.onerror = reject; - reader.readAsText(file); + + if (file.type === 'application/pdf') { + reader.readAsDataURL(file); // Read PDF files as base64 + } else { + reader.readAsText(file); // Read Markdown and other files as text + } }) ) ); - if (fileContents.length === uploadedFiles.length) { - try { - const response = await fetch('/api/upload', { + try { + // Upload PDF files for conversion + if (pdfFiles.length > 0) { + const pdfUploadResponse = await fetch('/api/upload', { method: 'POST', headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ files: fileContents }) + body: JSON.stringify({ files: pdfFiles }) }); - const result = await response.json(); - if (response.ok) { - setRepo(result.repoUrl); - setCommit(result.commitSha); - setPatterns(result.documentNames.join(', ')); // Populate the patterns field - console.log('Files uploaded:', result.documentNames); - setSuccessAlertTitle('Document uploaded successfully!'); - setSuccessAlertMessage('Documents have been uploaded to your repo to be referenced in the knowledge submission.'); - setSuccessAlertLink(result.prUrl); - setIsSuccessAlertVisible(true); - setUseFileUpload(false); // Switch back to manual mode to display the newly created values in the knowledge submission - } else { - throw new Error(result.error || 'Failed to upload document'); + const pdfUploadResult = await pdfUploadResponse.json(); + if (!pdfUploadResponse.ok) { + throw new Error(pdfUploadResult.error || 'Failed to upload PDF files'); + } + + const conversionResponse = await fetch('/api/conversion', { + method: 'POST', + headers: { + 'Content-Type': 'application/json' + }, + body: JSON.stringify({ repoUrl: pdfUploadResult.repoUrl, documentNames: pdfUploadResult.documentNames }) + }); + + const conversionResult = await conversionResponse.json(); + if (!conversionResponse.ok) { + throw new Error(conversionResult.error || 'Failed to convert PDF'); } - } catch (error: unknown) { - if (error instanceof Error) { - setFailureAlertTitle('Failed to upload document'); - setFailureAlertMessage(error.message); - setIsFailureAlertVisible(true); + + const mdFileUrl = conversionResult.md_file_url; + + // Download the converted Markdown file + const mdFileResponse = await fetch(mdFileUrl); + const mdFileContent = await mdFileResponse.text(); + + // Add converted Markdown to the list of Markdown files + markdownFiles.push({ + fileName: pdfFiles[0].fileName.replace('.pdf', '.md'), + fileContent: mdFileContent + }); + } + + // Upload all Markdown files in a single commit + if (markdownFiles.length > 0) { + const markdownUploadResponse = await fetch('/api/upload', { + method: 'POST', + headers: { + 'Content-Type': 'application/json' + }, + body: JSON.stringify({ files: markdownFiles }) + }); + + const markdownUploadResult = await markdownUploadResponse.json(); + if (!markdownUploadResponse.ok) { + throw new Error(markdownUploadResult.error || 'Failed to upload Markdown files'); } + + setRepo(markdownUploadResult.repoUrl); + setCommit(markdownUploadResult.commitSha); + setPatterns(markdownUploadResult.documentNames.join(', ')); // Populate the patterns field + console.log('Markdown files uploaded:', markdownUploadResult.documentNames); + + setSuccessAlertTitle('Document uploaded successfully!'); + setSuccessAlertMessage('Documents have been uploaded to your repo to be referenced in the knowledge submission.'); + setSuccessAlertLink(markdownUploadResult.prUrl); + setIsSuccessAlertVisible(true); + setUseFileUpload(false); // Switch back to manual mode to display the newly created values in the knowledge submission } + } catch (error: unknown) { + if (error instanceof Error) { + setFailureAlertTitle('Failed to upload document'); + setFailureAlertMessage(error.message); + setIsFailureAlertVisible(true); + } + } finally { + setIsConverting(false); } } }; @@ -413,6 +598,7 @@ Creator names: ${creators} return (
setIsModalOpen(!isModalOpen)} yamlContent={yamlContent} /> + setIsModalOpen(false)} onSelectFiles={handleSelectFiles} repoName={repo} />
+ {useFileUpload && ( + + )}
@@ -579,7 +773,7 @@ Creator names: ${creators} ) : ( <> - + diff --git a/src/components/Contribute/KnowledgeFiles/index.tsx b/src/components/Contribute/KnowledgeFiles/index.tsx new file mode 100644 index 00000000..1889102f --- /dev/null +++ b/src/components/Contribute/KnowledgeFiles/index.tsx @@ -0,0 +1,126 @@ +// src/components/Contribute/KnowledgeFiles.tsx +import React, { useEffect, useState } from 'react'; +import { + PageSection, + Title, + Card, + CardHeader, + CardTitle, + Spinner, + Alert, + AlertActionCloseButton, + DataList, + DataListItem, + DataListItemRow, + DataListCell, + Button, + Flex, + FlexItem, + Chip +} from '@patternfly/react-core'; +import { ExternalLinkAltIcon } from '@patternfly/react-icons'; +import { fetchGitHubRepoFiles } from '@/utils/fileManagerGithub'; +import { useSession } from 'next-auth/react'; +import { getGitHubUsername } from '@/utils/github'; + +interface KnowledgeFilesProps { + repoName: string; +} + +const BASE_BRANCH = 'main'; + +export const KnowledgeFiles: React.FC = ({ repoName }) => { + const { data: session } = useSession(); + const [files, setFiles] = useState([]); + const [loading, setLoading] = useState(true); + const [error, setError] = useState(null); + const [githubUsername, setGithubUsername] = useState(null); + + useEffect(() => { + const loadFiles = async () => { + if (!session || !session.accessToken) { + setError('Unauthorized: Missing or invalid access token'); + setLoading(false); + return; + } + + try { + const username = await getGitHubUsername(session.accessToken as string); + setGithubUsername(username); + const repoFiles = await fetchGitHubRepoFiles(session.accessToken as string); + setFiles(repoFiles); + } catch (err) { + setError('Failed to load files'); + console.error(err); + } finally { + setLoading(false); + } + }; + + loadFiles(); + }, [session]); + + const handleCloseError = () => { + setError(null); + }; + + const handleConvertToMarkdown = (filePath: string) => { + // Add your convert to markdown logic here + console.log(`Convert to Markdown: ${filePath}`); + }; + + return ( + + Knowledge Files + {loading && } + {error && ( + }> + {error} + + )} + {!loading && !error && githubUsername && ( + + {files.map((file, index) => ( + + + + + + + + + + + {file.path} + + + + + {file.path.endsWith('.pdf') ? 'PDF' : 'Markdown'} + + + {file.path.endsWith('.pdf') && ( + + + + )} + + + + + + + ))} + + )} + + ); +}; + +export default KnowledgeFiles; diff --git a/src/lib/api/deepsearch/index.ts b/src/lib/api/deepsearch/index.ts new file mode 100644 index 00000000..3812e84c --- /dev/null +++ b/src/lib/api/deepsearch/index.ts @@ -0,0 +1,147 @@ +// src/lib/api/index.ts +const taskPollWaitSeconds = 10; + +interface ApiResult { + status: number; + error?: string; + payload?: R; +} + +interface Task { + task_id: string; + task_status: 'SUCCESS' | 'FAILURE' | string; + result: { transaction_id: string }; +} + +interface DocumentArtifactsPageImage { + page_no: number; + url: string; +} + +interface DocumentArtifacts { + document_pdf: string; + document_md: string; + document_json: string; + page_images: DocumentArtifactsPageImage[]; +} + +export default class Client { + private host: string; + private token: string; + + constructor(host: string) { + this.host = host; + this.token = ''; + } + + async authenticate(userName: string, apiKey: string): Promise> { + const url = `${this.host}/api/cps/user/v1/user/token`; + + const response = await fetch(url, { + method: 'POST', + headers: { + Authorization: 'Basic ' + btoa(`${userName}:${apiKey}`), + 'Content-Type': 'application/json' + }, + body: JSON.stringify({}) + }); + + const result = await this.payloadOrError<{ access_token: string }>(response); + + const tokenResult = this.mapResult(result, (r) => r['access_token']); + + if (tokenResult.payload) { + this.token = tokenResult.payload ?? ''; + } + + return tokenResult; + } + + async launchConvert(projKey: string, indexKey: string, sourceURL: string): Promise> { + return await this.post<{ file_url: string[] }, Task>({ + path: `api/cps/public/v1/project/${projKey}/data_indices/${indexKey}/actions/ccs_convert_upload`, + payload: { file_url: [sourceURL] } + }); + } + + async waitForTask(projKey: string, taskId: string): Promise> { + while (true) { + const response = await this.get({ + path: `api/cps/public/v2/project/${projKey}/celery_tasks/${taskId}?wait=${taskPollWaitSeconds}` + }); + + if (response.payload) { + console.debug('Task status: ', response.payload.task_status); + + if (['SUCCESS', 'FAILURE'].includes(response.payload.task_status)) { + return response; + } + } else { + console.debug('Failed to retrieve task status: ', response.status, response.error); + } + } + } + + async getDocumentHashes(projKey: string, indexKey: string, transactionId: string): Promise> { + const response = await this.get<{ documents: { document_hash: string }[] }>({ + path: `api/cps/public/v2/project/${projKey}/data_indices/${indexKey}/documents/transactions/${transactionId}` + }); + + return this.mapResult(response, (r) => r.documents.map((d: any) => d.document_hash)); + } + + async getDocumentArtifacts(projKey: string, indexKey: string, documentHash: string): Promise> { + const response = await this.get<{ artifacts: DocumentArtifacts }>({ + path: `api/cps/public/v2/project/${projKey}/data_indices/${indexKey}/documents/${documentHash}/artifacts` + }); + + return this.mapResult(response, (r) => r.artifacts); + } + + private async post({ path, payload }: { path: string; payload: P }): Promise> { + try { + const response = await fetch(`${this.host}/${path}`, { + method: 'POST', + body: JSON.stringify(payload), + headers: { + Authorization: this.token, + 'Content-Type': 'application/json' + } + }); + + return this.payloadOrError(response); + } catch (ex) { + console.error(ex); + throw ex; + } + } + + private async get({ path }: { path: string }): Promise> { + const response = await fetch(`${this.host}/${path}`, { + method: 'GET', + headers: { + Authorization: this.token + } + }); + + return this.payloadOrError(response); + } + + /** + * Convert an API response into a usable payload or an error message. + */ + private async payloadOrError(response: Response): Promise> { + if (response.ok) { + return { status: response.status, payload: (await response.json()) as R }; + } else { + return { status: response.status, error: response.statusText }; + } + } + + /** + * Map API results payload into something else. + */ + private mapResult(result: ApiResult, f: (input: I) => O): ApiResult { + return result.payload ? { ...result, payload: f(result.payload) } : ({ ...result } as ApiResult); + } +} diff --git a/src/utils/fileManagerGithub.ts b/src/utils/fileManagerGithub.ts new file mode 100644 index 00000000..edfc2dac --- /dev/null +++ b/src/utils/fileManagerGithub.ts @@ -0,0 +1,34 @@ +// src/utils/fileManagerGithub.ts +import axios from 'axios'; +import { getGitHubUsername } from '@/utils/github'; + +const GITHUB_API_URL = 'https://api.github.com'; +const TAXONOMY_DOCUMENTS_REPO = process.env.NEXT_PUBLIC_TAXONOMY_DOCUMENTS_REPO!; +const BASE_BRANCH = 'main'; + +export const fetchGitHubRepoFiles = async (token: string) => { + try { + const githubUsername = await getGitHubUsername(token); + console.log('Fetching repository files for user:', githubUsername); + + const repoPath = TAXONOMY_DOCUMENTS_REPO.replace('github.com/', ''); + const [repoOwner, repoName] = repoPath.split('/'); + + const response = await axios.get(`${GITHUB_API_URL}/repos/${githubUsername}/${repoName}/git/trees/${BASE_BRANCH}?recursive=1`, { + headers: { + Authorization: `Bearer ${token}`, + Accept: 'application/vnd.github+json' + } + }); + + console.log('Fetched files:', response.data); + return response.data.tree.filter((item: any) => item.type === 'blob'); // Only return files (blobs) + } catch (error) { + if (axios.isAxiosError(error)) { + console.error('Error fetching repository files:', error.response ? error.response.data : error.message); + } else { + console.error('Error fetching repository files:', error); + } + throw error; + } +}; diff --git a/src/utils/github.ts b/src/utils/github.ts index c90ac52c..31b5f4c7 100644 --- a/src/utils/github.ts +++ b/src/utils/github.ts @@ -4,6 +4,7 @@ import { PullRequestUpdateData } from '@/types'; const UPSTREAM_REPO_OWNER = process.env.NEXT_PUBLIC_TAXONOMY_REPO_OWNER!; const UPSTREAM_REPO_NAME = process.env.NEXT_PUBLIC_TAXONOMY_REPO!; +const NEXT_PUBLIC_TAXONOMY_DOCUMENTS_REPO_NAME = process.env.NEXT_PUBLIC_TAXONOMY_DOCUMENTS_REPO_NAME!; export const fetchPullRequests = async (token: string) => { try { @@ -92,6 +93,33 @@ export const fetchFileContent = async (token: string, filePath: string, ref: str } }; +export const fetchKnowledgeFileContent = async (token: string, username: string, filePath: string): Promise => { + try { + console.log(`Fetching knowledge file content for path: ${filePath} from repo: ${NEXT_PUBLIC_TAXONOMY_DOCUMENTS_REPO_NAME}`); + const response = await axios.get(`https://api.github.com/repos/${username}/${NEXT_PUBLIC_TAXONOMY_DOCUMENTS_REPO_NAME}/contents/${filePath}`, { + headers: { + Authorization: `Bearer ${token}`, + Accept: 'application/vnd.github.v3.raw' + }, + responseType: 'blob' + }); + + if (response.status === 404) { + throw new Error(`File not found: ${filePath}`); + } + + console.log('Fetched knowledge file content:', response.data); + return response.data; // return the Blob content + } catch (error) { + if (axios.isAxiosError(error)) { + console.error('Error fetching knowledge file content:', error.response ? error.response.data : error.message); + } else { + console.error('Error fetching knowledge file content:', error); + } + throw error; + } +}; + export const updatePullRequest = async (token: string, prNumber: number, data: PullRequestUpdateData) => { try { console.log(`Updating PR Number: ${prNumber} with data:`, data);