From f7e8742308da54a348b0c5228ad5740476e79e84 Mon Sep 17 00:00:00 2001 From: tykim96 Date: Mon, 30 Oct 2023 16:15:36 +0900 Subject: [PATCH 1/6] feat: Add semi-structed --- pkg/semistructed/json.go | 334 +++++++++++++++++++++++++++++++++++++++ pkg/semistructed/xml.go | 299 +++++++++++++++++++++++++++++++++++ 2 files changed, 633 insertions(+) create mode 100644 pkg/semistructed/json.go create mode 100644 pkg/semistructed/xml.go diff --git a/pkg/semistructed/json.go b/pkg/semistructed/json.go new file mode 100644 index 0000000..214ae67 --- /dev/null +++ b/pkg/semistructed/json.go @@ -0,0 +1,334 @@ +package semistructed + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + "sync" + + "github.com/brianvoe/gofakeit/v6" + "github.com/cloud-barista/cm-data-mold/pkg/utils" +) + +// Structures to be used to generate json dummy data +type bookInfo struct { + BookID string `json:"book_id" fake:"{password:true,true,true,false,false,24}"` + Title string `json:"title" xml:"name" fake:"{booktitle}"` + Author string `json:"author" xml:"author" fake:"{bookauthor}"` + Genre string `json:"genre" xml:"genre" fake:"{bookgenre}"` +} + +// Structures to be used to generate json dummy data +type carInfo struct { + CarID string `json:"car_id" fake:"{password:true,true,true,false,false,24}"` + Type string `json:"type" xml:"type" fake:"{cartype}"` + Fuel string `json:"fuel" xml:"fuel" fake:"{carfueltype}"` + Transmission string `json:"transmission" xml:"transmission" fake:"{cartransmissiontype}"` + Brand string `json:"brand" xml:"brand" fake:"{carmaker}"` + Model string `json:"model" xml:"model" fake:"{carmodel}"` + Year int `json:"year" xml:"year" fake:"{year}"` +} + +// Structures to be used to generate json dummy data +type addressInfo struct { + AddrID string `json:"addr_id" fake:"{password:true,true,true,false,false,24}"` + CountryAbr string `json:"countryabr" xml:"countryabr" fake:"{countryabr}"` + Street string `json:"street" xml:"street" fake:"{street}"` + City string `json:"city" xml:"city" fake:"{city}"` + State string `json:"state" xml:"state" fake:"{state}"` + Zip string `json:"zip" xml:"zip" fake:"{zip}"` + Country string `json:"country" xml:"country" fake:"{country}"` + Latitude int `json:"latitude" xml:"latitude" fake:"{number:-90,90}"` + Longitude int `json:"longitude" xml:"longitude" fake:"{number:-180,180}"` +} + +// Structures to be used to generate json dummy data +type movieInfo struct { + MovID string `json:"mov_id" fake:"{password:true,true,true,false,false,24}"` + Name string `json:"name" xml:"name" fake:"{moviename}"` + Genre string `json:"genre" xml:"genre" fake:"{moviegenre}"` +} + +// Structures to be used to generate json dummy data +type creditCardInfo struct { + CardID string `json:"card_id" fake:"{password:true,true,true,false,false,24}"` + Type string `json:"type" xml:"type" fake:"{creditcardtype}"` + Number string `json:"number" xml:"number" fake:"{creditcardnumber}"` + Exp string `json:"exp" xml:"exp" fake:"{creditcardexp}"` + Cvv string `json:"cvv" xml:"cvv" fake:"{creditcardcvv}"` +} + +// Structures to be used to generate json dummy data +type jobInfo struct { + JobID string `json:"job_id" fake:"{password:true,true,true,false,false,24}"` + Company string `json:"company" xml:"company" fake:"{company}"` + Title string `json:"title" xml:"title" fake:"{jobtitle}"` + Descriptor string `json:"descriptor" xml:"descriptor" fake:"{jobdescriptor}"` + Level string `json:"level" xml:"level" fake:"{joblevel}"` +} + +// Structures to be used to generate json dummy data +type personInfo struct { + PersonID string `json:"person_id" fake:"{password:true,true,true,false,false,24}"` + Name string `json:"name" xml:"name" fake:"{name}"` + FirstName string `json:"first_name" xml:"first_name" fake:"{firstname}"` + LastName string `json:"last_name" xml:"last_name" fake:"{lastname}"` + Gender string `json:"gender" xml:"gender" fake:"{gender}"` + SSN string `json:"ssn" xml:"ssn" fake:"{ssn}"` + Hobby string `json:"hobby" xml:"hobby" fake:"{hobby}"` + Job *jobInfo `json:"job" xml:"job"` + Address *addressInfo `json:"address" xml:"address"` + Contact *gofakeit.ContactInfo `json:"contact" xml:"contact"` + CreditCard *creditCardInfo `json:"credit_card" xml:"credit_card"` +} + +// json generation function using gofakeit +// +// CapacitySize is in GB and generates json files +// within the entered dummyDir path. +func GenerateRandomJSON(dummyDir string, capacitySize int) error { + dummyDir = filepath.Join(dummyDir, "json") + if err := utils.IsDir(dummyDir); err != nil { + return err + } + + countNum := make(chan int, capacitySize*1000) + resultChan := make(chan error, capacitySize*1000) + + var wg sync.WaitGroup + for i := 0; i < capacitySize; i++ { + wg.Add(1) + go func() { + defer wg.Done() + randomJsonWorker(countNum, dummyDir, resultChan) + }() + } + + for i := 0; i < capacitySize*1000; i++ { + countNum <- i + } + close(countNum) + + go func() { + wg.Wait() + close(resultChan) + }() + + for ret := range resultChan { + if ret != nil { + return ret + } + } + + return nil +} + +// json worker +func randomJsonWorker(countNum chan int, dirPath string, resultChan chan<- error) { + for cnt := range countNum { + gofakeit.Seed(0) + dataGenerators := []func(int, string, int) error{ + generateJSONBook, + generateJSONCar, + generateJSONAddress, + generateJSONCreditCard, + generateJSONJob, + generateJSONMovie, + generateJSONPerson, + } + + for _, generator := range dataGenerators { + resultChan <- generator(cnt, dirPath, 475) + } + } +} + +// generate book.json +func generateJSONBook(cnt int, dirPath string, count int) error { + file, err := os.Create(filepath.Join(dirPath, fmt.Sprintf("book_%d.json", cnt))) + if err != nil { + return err + } + defer file.Close() + + Books := []*bookInfo{} + + for i := 0; i < count; i++ { + b := &bookInfo{} + if err := gofakeit.Struct(b); err != nil { + return err + } + Books = append(Books, b) + } + + data, err := json.MarshalIndent(Books, "", " ") + if err != nil { + return err + } + + _, err = file.Write(data) + return err +} + +// generate car.json +func generateJSONCar(cnt int, dirPath string, count int) error { + file, err := os.Create(filepath.Join(dirPath, fmt.Sprintf("car_%d.json", cnt))) + if err != nil { + return err + } + defer file.Close() + + Cars := []*carInfo{} + + for i := 0; i < count; i++ { + c := &carInfo{} + if err := gofakeit.Struct(c); err != nil { + return err + } + Cars = append(Cars, c) + } + + data, err := json.MarshalIndent(Cars, "", " ") + if err != nil { + return err + } + + _, err = file.Write(data) + return err +} + +// generate address.json +func generateJSONAddress(cnt int, dirPath string, count int) error { + file, err := os.Create(filepath.Join(dirPath, fmt.Sprintf("address_%d.json", cnt))) + if err != nil { + return err + } + defer file.Close() + + Addresses := []*addressInfo{} + + for i := 0; i < count; i++ { + a := &addressInfo{} + if err := gofakeit.Struct(a); err != nil { + return err + } + Addresses = append(Addresses, a) + } + + data, err := json.MarshalIndent(Addresses, "", " ") + if err != nil { + return err + } + + _, err = file.Write(data) + return err +} + +// generate creditcard.json +func generateJSONCreditCard(cnt int, dirPath string, count int) error { + file, err := os.Create(filepath.Join(dirPath, fmt.Sprintf("creditcard_%d.json", cnt))) + if err != nil { + return err + } + defer file.Close() + + CreditCards := []*creditCardInfo{} + + for i := 0; i < count; i++ { + c := &creditCardInfo{} + if err := gofakeit.Struct(c); err != nil { + return err + } + CreditCards = append(CreditCards, c) + } + + data, err := json.MarshalIndent(CreditCards, "", " ") + if err != nil { + return err + } + + _, err = file.Write(data) + return err +} + +// generate job.json +func generateJSONJob(cnt int, dirPath string, count int) error { + file, err := os.Create(filepath.Join(dirPath, fmt.Sprintf("job_%d.json", cnt))) + if err != nil { + return err + } + defer file.Close() + + Jobs := []*jobInfo{} + + for i := 0; i < count; i++ { + j := &jobInfo{} + if err := gofakeit.Struct(j); err != nil { + return err + } + Jobs = append(Jobs, j) + } + + data, err := json.MarshalIndent(Jobs, "", " ") + if err != nil { + return err + } + + _, err = file.Write(data) + return err +} + +// generate movie.json +func generateJSONMovie(cnt int, dirPath string, count int) error { + file, err := os.Create(filepath.Join(dirPath, fmt.Sprintf("movie_%d.json", cnt))) + if err != nil { + return err + } + defer file.Close() + + Movies := []*movieInfo{} + + for i := 0; i < count; i++ { + m := &movieInfo{} + if err := gofakeit.Struct(m); err != nil { + return err + } + Movies = append(Movies, m) + } + + data, err := json.MarshalIndent(Movies, "", " ") + if err != nil { + return err + } + + _, err = file.Write(data) + return err +} + +// generate person.json +func generateJSONPerson(cnt int, dirPath string, count int) error { + file, err := os.Create(filepath.Join(dirPath, fmt.Sprintf("person_%d.json", cnt))) + if err != nil { + return err + } + defer file.Close() + + Persons := []*personInfo{} + + for i := 0; i < count; i++ { + p := &personInfo{} + if err := gofakeit.Struct(p); err != nil { + return err + } + Persons = append(Persons, p) + } + + data, err := json.MarshalIndent(Persons, "", " ") + if err != nil { + return err + } + + _, err = file.Write(data) + return err +} diff --git a/pkg/semistructed/xml.go b/pkg/semistructed/xml.go new file mode 100644 index 0000000..aeccef6 --- /dev/null +++ b/pkg/semistructed/xml.go @@ -0,0 +1,299 @@ +package semistructed + +import ( + "encoding/xml" + "fmt" + "os" + "path/filepath" + "sync" + + "github.com/brianvoe/gofakeit/v6" + "github.com/cloud-barista/cm-data-mold/pkg/utils" +) + +// xml generation function using gofakeit +// +// CapacitySize is in GB and generates xml files +// within the entered dummyDir path. +func GenerateRandomXML(dummyDir string, capacitySize int) error { + dummyDir = filepath.Join(dummyDir, "xml") + if err := utils.IsDir(dummyDir); err != nil { + return err + } + + size := capacitySize * 10 + countNum := make(chan int, size) + resultChan := make(chan error, size) + + var wg sync.WaitGroup + for i := 0; i < capacitySize; i++ { + wg.Add(1) + go func() { + defer wg.Done() + randomXMLWorker(countNum, dummyDir, resultChan) + }() + } + + for i := 0; i < size; i++ { + countNum <- i + } + close(countNum) + + go func() { + wg.Wait() + close(resultChan) + }() + + for ret := range resultChan { + if ret != nil { + return ret + } + } + + return nil +} + +// xml worker +func randomXMLWorker(countNum chan int, dirPath string, resultChan chan<- error) { + for cnt := range countNum { + gofakeit.Seed(0) + + dataGenerators := []func(int, string, int) error{ + generateXMLBook, + generateXMLCar, + generateXMLAddress, + generateXMLCreditCard, + generateXMLJob, + generateXMLMovie, + generateXMLPerson, + } + + for _, generator := range dataGenerators { + resultChan <- generator(cnt, dirPath, 49350) + } + } +} + +// generate book.xml +func generateXMLBook(cnt int, dirPath string, count int) error { + file, err := os.Create(filepath.Join(dirPath, fmt.Sprintf("book_%d.xml", cnt))) + if err != nil { + return err + } + defer file.Close() + + type Book struct { + XMLName xml.Name `xml:"catalog"` + Books []*gofakeit.BookInfo `xml:"book"` + } + + book := Book{} + + for i := 0; i < count; i++ { + b := gofakeit.Book() + if err := gofakeit.Struct(b); err != nil { + return err + } + book.Books = append(book.Books, b) + } + + data, err := xml.MarshalIndent(book, "", " ") + if err != nil { + return err + } + + _, err = file.Write(data) + return err +} + +// generate car.xml +func generateXMLCar(cnt int, dirPath string, count int) error { + file, err := os.Create(filepath.Join(dirPath, fmt.Sprintf("car_%d.xml", cnt))) + if err != nil { + return err + } + defer file.Close() + + type Car struct { + XMLName xml.Name `xml:"catalog"` + Cars []*gofakeit.CarInfo `xml:"car"` + } + + car := Car{} + + for i := 0; i < count; i++ { + c := gofakeit.Car() + if err := gofakeit.Struct(c); err != nil { + return err + } + car.Cars = append(car.Cars, c) + } + + data, err := xml.MarshalIndent(car, "", " ") + if err != nil { + return err + } + + _, err = file.Write(data) + return err +} + +// generate address.xml +func generateXMLAddress(cnt int, dirPath string, count int) error { + file, err := os.Create(filepath.Join(dirPath, fmt.Sprintf("address_%d.xml", cnt))) + if err != nil { + return err + } + defer file.Close() + + type Address struct { + XMLName xml.Name `xml:"catalog"` + Addresses []*gofakeit.AddressInfo `xml:"address"` + } + + address := Address{} + + for i := 0; i < count; i++ { + a := gofakeit.Address() + if err := gofakeit.Struct(a); err != nil { + return err + } + address.Addresses = append(address.Addresses, a) + } + + data, err := xml.MarshalIndent(address, "", " ") + if err != nil { + return err + } + + _, err = file.Write(data) + return err +} + +// generate creditcard.xml +func generateXMLCreditCard(cnt int, dirPath string, count int) error { + file, err := os.Create(filepath.Join(dirPath, fmt.Sprintf("creditcard_%d.xml", cnt))) + if err != nil { + return err + } + defer file.Close() + + type CreditCard struct { + XMLName xml.Name `xml:"catalog"` + CreditCards []*gofakeit.CreditCardInfo `xml:"creditcard"` + } + + creditCard := CreditCard{} + + for i := 0; i < count; i++ { + c := gofakeit.CreditCard() + if err := gofakeit.Struct(c); err != nil { + return err + } + creditCard.CreditCards = append(creditCard.CreditCards, c) + } + + data, err := xml.MarshalIndent(creditCard, "", " ") + if err != nil { + return err + } + + _, err = file.Write(data) + return err +} + +// generate job.xml +func generateXMLJob(cnt int, dirPath string, count int) error { + file, err := os.Create(filepath.Join(dirPath, fmt.Sprintf("job_%d.xml", cnt))) + if err != nil { + return err + } + defer file.Close() + + type Job struct { + XMLName xml.Name `xml:"catalog"` + Jobs []*gofakeit.JobInfo `xml:"job"` + } + + job := Job{} + + for i := 0; i < count; i++ { + j := gofakeit.Job() + if err := gofakeit.Struct(j); err != nil { + return err + } + job.Jobs = append(job.Jobs, j) + } + + data, err := xml.MarshalIndent(job, "", " ") + if err != nil { + return err + } + + _, err = file.Write(data) + return err +} + +// generate movie.xml +func generateXMLMovie(cnt int, dirPath string, count int) error { + file, err := os.Create(filepath.Join(dirPath, fmt.Sprintf("movie_%d.xml", cnt))) + if err != nil { + return err + } + defer file.Close() + + type Movie struct { + XMLName xml.Name `xml:"catalog"` + Movies []*gofakeit.MovieInfo `xml:"movie"` + } + + movie := Movie{} + + for i := 0; i < count; i++ { + m := gofakeit.Movie() + if err := gofakeit.Struct(m); err != nil { + return err + } + movie.Movies = append(movie.Movies, m) + } + + data, err := xml.MarshalIndent(movie, "", " ") + if err != nil { + return err + } + + _, err = file.Write(data) + return err +} + +// generate person.xml +func generateXMLPerson(cnt int, dirPath string, count int) error { + file, err := os.Create(filepath.Join(dirPath, fmt.Sprintf("person_%d.xml", cnt))) + if err != nil { + return err + } + defer file.Close() + + type Person struct { + XMLName xml.Name `xml:"catalog"` + Persons []*gofakeit.PersonInfo `xml:"person"` + } + + person := Person{} + + for i := 0; i < count; i++ { + p := gofakeit.Person() + if err := gofakeit.Struct(p); err != nil { + return err + } + person.Persons = append(person.Persons, p) + } + + data, err := xml.MarshalIndent(person, "", " ") + if err != nil { + return err + } + + _, err = file.Write(data) + return err +} From aaf1662abc4c5236de1c0d15923d8e3089557bc2 Mon Sep 17 00:00:00 2001 From: tykim96 Date: Mon, 30 Oct 2023 16:17:20 +0900 Subject: [PATCH 2/6] feat: Add structed --- pkg/structed/csv.go | 361 ++++++++++++++++++++++++++++++++++++++++++++ pkg/structed/sql.go | 205 +++++++++++++++++++++++++ 2 files changed, 566 insertions(+) create mode 100644 pkg/structed/csv.go create mode 100644 pkg/structed/sql.go diff --git a/pkg/structed/csv.go b/pkg/structed/csv.go new file mode 100644 index 0000000..bc54262 --- /dev/null +++ b/pkg/structed/csv.go @@ -0,0 +1,361 @@ +package structed + +import ( + "encoding/csv" + "fmt" + "os" + "path/filepath" + "strconv" + "sync" + + "github.com/brianvoe/gofakeit/v6" + "github.com/cloud-barista/cm-data-mold/pkg/utils" +) + +// CSV generation function using gofakeit +// +// CapacitySize is in GB and generates csv files +// within the entered dummyDir path. +func GenerateRandomCSV(dummyDir string, capacitySize int) error { + dummyDir = filepath.Join(dummyDir, "csv") + if err := utils.IsDir(dummyDir); err != nil { + return err + } + + countNum := make(chan int, capacitySize*10) + resultChan := make(chan error, capacitySize*10) + + var wg sync.WaitGroup + for i := 0; i < 10; i++ { + wg.Add(1) + go func() { + defer wg.Done() + randomCSVWorker(countNum, dummyDir, resultChan) + }() + } + + for i := 0; i < capacitySize*10; i++ { + countNum <- i + } + close(countNum) + + go func() { + wg.Wait() + close(resultChan) + }() + + for ret := range resultChan { + if ret != nil { + return ret + } + } + + return nil +} + +// csv worker +func randomCSVWorker(countNum chan int, dirPath string, resultChan chan<- error) { + for cnt := range countNum { + gofakeit.Seed(0) + dataGenerators := []func(int, string, int) error{ + generateCSVBook, + generateCSVCar, + generateCSVAddress, + generateCSVCreditCard, + generateCSVJob, + generateCSVMovie, + generateCSVPerson, + } + + for _, generator := range dataGenerators { + resultChan <- generator(cnt, dirPath, 121000) + } + } +} + +// generate book.csv +func generateCSVBook(cnt int, dirPath string, count int) error { + file, err := os.Create(filepath.Join(dirPath, fmt.Sprintf("book_%d.csv", cnt))) + if err != nil { + return err + } + defer file.Close() + + type Book struct { + Books []*gofakeit.BookInfo `csv:"book"` + } + + book := Book{} + + for i := 0; i < count; i++ { + b := gofakeit.Book() + if err := gofakeit.Struct(b); err != nil { + return err + } + book.Books = append(book.Books, b) + } + + csvWriter := csv.NewWriter(file) + + err = csvWriter.Write([]string{"Title", "Author", "Genre"}) + if err != nil { + return err + } + + for _, b := range book.Books { + record := []string{b.Title, b.Author, b.Genre} + err := csvWriter.Write(record) + if err != nil { + return err + } + } + + csvWriter.Flush() + return csvWriter.Error() +} + +// generate car.csv +func generateCSVCar(cnt int, dirPath string, count int) error { + file, err := os.Create(filepath.Join(dirPath, fmt.Sprintf("car_%d.csv", cnt))) + if err != nil { + return err + } + defer file.Close() + + type Car struct { + Cars []*gofakeit.CarInfo `csv:"car"` + } + + car := Car{} + + for i := 0; i < count; i++ { + c := gofakeit.Car() + if err := gofakeit.Struct(c); err != nil { + return err + } + car.Cars = append(car.Cars, c) + } + + csvWriter := csv.NewWriter(file) + + err = csvWriter.Write([]string{"Type", "Fuel", "Transmission", "Brand", "Model", "Year"}) + if err != nil { + return err + } + + for _, c := range car.Cars { + record := []string{c.Type, c.Fuel, c.Transmission, c.Brand, c.Model, fmt.Sprint(c.Year)} + err := csvWriter.Write(record) + if err != nil { + return err + } + } + + csvWriter.Flush() + return csvWriter.Error() +} + +// generate address.csv +func generateCSVAddress(cnt int, dirPath string, count int) error { + file, err := os.Create(filepath.Join(dirPath, fmt.Sprintf("address_%d.csv", cnt))) + if err != nil { + return err + } + defer file.Close() + + type Address struct { + Addresses []*gofakeit.AddressInfo `csv:"address"` + } + + address := Address{} + + for i := 0; i < count; i++ { + a := gofakeit.Address() + if err := gofakeit.Struct(a); err != nil { + return err + } + address.Addresses = append(address.Addresses, a) + } + + csvWriter := csv.NewWriter(file) + + err = csvWriter.Write([]string{"Street", "City", "State", "Zip", "Country", "Latitude", "Longitude"}) + if err != nil { + return err + } + + for _, a := range address.Addresses { + record := []string{a.Street, a.City, a.State, a.Zip, a.Country, strconv.FormatFloat(a.Latitude, 'f', -1, 64), strconv.FormatFloat(a.Longitude, 'f', -1, 64)} + err := csvWriter.Write(record) + if err != nil { + return err + } + } + + csvWriter.Flush() + return csvWriter.Error() +} + +// generate creditcard.csv +func generateCSVCreditCard(cnt int, dirPath string, count int) error { + file, err := os.Create(filepath.Join(dirPath, fmt.Sprintf("creditcard_%d.csv", cnt))) + if err != nil { + return err + } + defer file.Close() + + type CreditCard struct { + CreditCards []*gofakeit.CreditCardInfo `csv:"creditcard"` + } + + creditCard := CreditCard{} + + for i := 0; i < count; i++ { + c := gofakeit.CreditCard() + if err := gofakeit.Struct(c); err != nil { + return err + } + creditCard.CreditCards = append(creditCard.CreditCards, c) + } + + csvWriter := csv.NewWriter(file) + + err = csvWriter.Write([]string{"Type", "Number", "Exp", "Cvv"}) + if err != nil { + return err + } + + for _, c := range creditCard.CreditCards { + record := []string{c.Type, c.Number, c.Exp, c.Cvv} + err := csvWriter.Write(record) + if err != nil { + return err + } + } + + csvWriter.Flush() + return csvWriter.Error() +} + +// generate job.csv +func generateCSVJob(cnt int, dirPath string, count int) error { + file, err := os.Create(filepath.Join(dirPath, fmt.Sprintf("job_%d.csv", cnt))) + if err != nil { + return err + } + defer file.Close() + + type Job struct { + Jobs []*gofakeit.JobInfo `csv:"job"` + } + + job := Job{} + + for i := 0; i < count; i++ { + j := gofakeit.Job() + if err := gofakeit.Struct(j); err != nil { + return err + } + job.Jobs = append(job.Jobs, j) + } + + csvWriter := csv.NewWriter(file) + + err = csvWriter.Write([]string{"Company", "Title", "Descriptor", "Level"}) + if err != nil { + return err + } + + for _, j := range job.Jobs { + record := []string{j.Company, j.Title, j.Descriptor, j.Level} + err := csvWriter.Write(record) + if err != nil { + return err + } + } + + csvWriter.Flush() + return csvWriter.Error() +} + +// generate movie.csv +func generateCSVMovie(cnt int, dirPath string, count int) error { + file, err := os.Create(filepath.Join(dirPath, fmt.Sprintf("movie_%d.csv", cnt))) + if err != nil { + return err + } + defer file.Close() + + type Movie struct { + Movies []*gofakeit.MovieInfo `csv:"movie"` + } + + movie := Movie{} + + for i := 0; i < count; i++ { + m := gofakeit.Movie() + if err := gofakeit.Struct(m); err != nil { + return err + } + movie.Movies = append(movie.Movies, m) + } + + csvWriter := csv.NewWriter(file) + + err = csvWriter.Write([]string{"Name", "Genre"}) + if err != nil { + return err + } + + for _, m := range movie.Movies { + record := []string{m.Name, m.Genre} + err := csvWriter.Write(record) + if err != nil { + return err + } + } + + csvWriter.Flush() + return csvWriter.Error() +} + +// generate person.csv +func generateCSVPerson(cnt int, dirPath string, count int) error { + file, err := os.Create(filepath.Join(dirPath, fmt.Sprintf("person_%d.csv", cnt))) + if err != nil { + return err + } + defer file.Close() + + type Person struct { + Persons []*gofakeit.PersonInfo `csv:"person"` + } + + person := Person{} + + for i := 0; i < count; i++ { + p := gofakeit.Person() + if err := gofakeit.Struct(p); err != nil { + return err + } + person.Persons = append(person.Persons, p) + } + + csvWriter := csv.NewWriter(file) + + err = csvWriter.Write([]string{"FirstName", "LastName", "Gender", "SSN", "Image", "Hobby"}) + if err != nil { + return err + } + + for _, p := range person.Persons { + record := []string{p.FirstName, p.LastName, p.Gender, p.SSN, p.Image, p.Hobby} + err := csvWriter.Write(record) + if err != nil { + return err + } + } + + csvWriter.Flush() + return csvWriter.Error() +} diff --git a/pkg/structed/sql.go b/pkg/structed/sql.go new file mode 100644 index 0000000..06b36b0 --- /dev/null +++ b/pkg/structed/sql.go @@ -0,0 +1,205 @@ +package structed + +import ( + "bytes" + "fmt" + "html/template" + "io" + "os" + "path/filepath" + "sync" + "time" + + "github.com/brianvoe/gofakeit/v6" + "github.com/cloud-barista/cm-data-mold/pkg/utils" +) + +type sqlData struct { + DBName string + Books []books + Members []members + BorrowedBooks []borrowedBooks +} + +type books struct { + BookID uint8 `fake: "{uint8}"` + Title string `fake: "{booktitle}"` + Author string `fake: "{bookauthor}"` + PublicationYear uint8 `fake: "{year}"` + Publisher string `fake: "{bookGenre}"` + Quantity uint8 `fake: "{number:1,100}"` +} + +type members struct { + MemberID uint8 `fake: "{uint8}"` + Name string `fake: "{name}"` + Address string `fake: "{country}"` + PhoneNo string `fake: "{phone}"` + Email string `fake: "{email}"` + JoinedDate time.Time `fake: "{date}"` + ExpiryDate time.Time `fake: "{date}"` + IsActive bool +} + +type borrowedBooks struct { + BorrowID uint8 `fake: "{uint8}"` + MemberID uint8 `fake: "{uint8}"` + BookID uint8 `fake: "{uint8}"` + BorrowedDate time.Time `fake: "{date}"` + DueDate time.Time `fake: "{date}"` + ReturnedDate *time.Time `fake: "{date}"` + FinePaid uint8 `fake: "{number:0,100}"` +} + +const createSql string = ` +CREATE DATABASE IF NOT EXISTS {{ .DBName }}; + +USE {{ .DBName }}; + +DROP TABLE IF EXISTS Books; + +CREATE TABLE Books ( + BookID INT AUTO_INCREMENT, + Title VARCHAR(255), + Author VARCHAR(255), + PublicationYear INT, + Publisher VARCHAR(255), + Quantity INT, + PRIMARY KEY (BookID) +); +{{range .Books}} +INSERT INTO Books (Title, Author, PublicationYear, Publisher, Quantity) VALUES ("{{.Title}}", "{{.Author}}", {{.PublicationYear}}, "{{.Publisher}}", {{.Quantity}}); +{{end}} + +DROP TABLE IF EXISTS Members; + +CREATE TABLE Members ( + MemberID INT AUTO_INCREMENT, + Name VARCHAR(255), + Address VARCHAR(255), + PhoneNo VARCHAR(20), + Email VARCHAR(50), + JoinedDate DATE, + ExpiryDate DATE, + IsActive BOOLEAN DEFAULT 1, + PRIMARY KEY (MemberID) +); +{{range .Members}} +INSERT INTO Members (Name, Address, PhoneNo ,Email ,JoinedDate ,ExpiryDate ,IsActive ) VALUES ("{{.Name}}", "{{.Address}}", "{{.PhoneNo}}" ,"{{.Email}}" ,"{{formatTime .JoinedDate}}" ,"{{formatTime .ExpiryDate}}" , {{if .IsActive }}1 {{else}}0 {{end}}); +{{end}} + +DROP TABLE IF EXISTS BorrowedBooks; + +CREATE TABLE BorrowedBooks ( + BorrowID INT AUTO_INCREMENT, + MemberID INT, + BookID INT, + BorrowedDate DATE, + DueDate DATE, + ReturnedDate DATE NULL DEFAULT NULL, + FinePaid DECIMAL(5,2) DEFAULT 0.00, + PRIMARY KEY (BorrowID) +); +{{range .BorrowedBooks}} +INSERT INTO BorrowedBooks (MemberID, BookID, BorrowedDate, DueDate, ReturnedDate, FinePaid) VALUES ({{.MemberID}}, {{.BookID}}, "{{formatTime .BorrowedDate}}", "{{formatTime .DueDate}}", "{{formatTime .ReturnedDate}}", {{.FinePaid}}); +{{end}} +` + +// SQL generation function using gofakeit +// +// CapacitySize is in GB and generates sql files +// within the entered dummyDir path. +func GenerateRandomSQL(dummyDir string, capacitySize int) error { + dummyDir = filepath.Join(dummyDir, "sql") + if err := utils.IsDir(dummyDir); err != nil { + return err + } + + size := capacitySize * 1000 + + countNum := make(chan int, size) + resultChan := make(chan error, size) + + var wg sync.WaitGroup + for i := 0; i < 10; i++ { + wg.Add(1) + go func() { + defer wg.Done() + randomSQLWorker(countNum, dummyDir, resultChan) + }() + } + + for i := 0; i < size; i++ { + countNum <- i + } + close(countNum) + + go func() { + wg.Wait() + close(resultChan) + }() + + for ret := range resultChan { + if ret != nil { + return ret + } + } + + return nil +} + +// sql worker +func randomSQLWorker(countNum chan int, dirPath string, resultChan chan<- error) { + funcMap := template.FuncMap{ + "formatTime": func(t time.Time) string { + return t.Format("2006-01-02") + }, + } + + tmpl, err := template.New("mysqlData").Funcs(funcMap).Parse(createSql) + if err != nil { + resultChan <- err + } + + for num := range countNum { + + data := sqlData{} + data.DBName = fmt.Sprintf("LibraryManagement_%d", num) + + for i := 0; i < 2350; i++ { + book := books{} + gofakeit.Struct(&book) + data.Books = append(data.Books, book) + + members := members{} + gofakeit.Struct(&members) + data.Members = append(data.Members, members) + + borrowedBooks := borrowedBooks{} + gofakeit.Struct(&borrowedBooks) + data.BorrowedBooks = append(data.BorrowedBooks, borrowedBooks) + } + + var buffer bytes.Buffer + if err := tmpl.Execute(&buffer, data); err != nil { + resultChan <- err + continue + } + + file, err := os.Create(filepath.Join(dirPath, fmt.Sprintf("LibraryManagement_%d.sql", num))) + if err != nil { + resultChan <- err + continue + } + defer file.Close() + + if _, err := io.Copy(file, &buffer); err != nil { + resultChan <- err + continue + } + + file.Close() + + resultChan <- nil + } +} From 888e80883d1e1ddfbb525fa8320032cee63a4fbe Mon Sep 17 00:00:00 2001 From: tykim96 Date: Mon, 30 Oct 2023 16:17:37 +0900 Subject: [PATCH 3/6] feat: Add unstructed --- pkg/unstructed/gif.go | 138 ++++++++++++++++++++++++++++++++++++++++++ pkg/unstructed/img.go | 72 ++++++++++++++++++++++ pkg/unstructed/txt.go | 72 ++++++++++++++++++++++ pkg/unstructed/zip.go | 116 +++++++++++++++++++++++++++++++++++ 4 files changed, 398 insertions(+) create mode 100644 pkg/unstructed/gif.go create mode 100644 pkg/unstructed/img.go create mode 100644 pkg/unstructed/txt.go create mode 100644 pkg/unstructed/zip.go diff --git a/pkg/unstructed/gif.go b/pkg/unstructed/gif.go new file mode 100644 index 0000000..21a49ce --- /dev/null +++ b/pkg/unstructed/gif.go @@ -0,0 +1,138 @@ +package unstructed + +import ( + "fmt" + "image" + "image/color/palette" + "image/draw" + "image/gif" + "image/png" + "math/rand" + "os" + "path/filepath" + "sync" + "time" + + "github.com/cloud-barista/cm-data-mold/pkg/utils" +) + +// GIF generation function using gofakeit +// +// CapacitySize is in GB and generates gif files +// within the entered dummyDir path. +func GenerateRandomGIF(dummyDir string, capacitySize int) error { + dummyDir = filepath.Join(dummyDir, "gif") + if err := utils.IsDir(dummyDir); err != nil { + return err + } + + tempPath := filepath.Join(dummyDir, "tmpImg") + if err := os.MkdirAll(tempPath, 0755); err != nil { + return err + } + defer os.RemoveAll(tempPath) + + if err := GenerateRandomPNGImage(tempPath, 1); err != nil { + return err + } + var files []string + + size := capacitySize * 34 * 10 + + err := filepath.Walk(tempPath, func(path string, _ os.FileInfo, err error) error { + if err != nil { + return err + } + + files = append(files, path) + + return nil + }) + + if err != nil { + return err + } + + var imgList []image.Image + for _, imgName := range files { + fileExt := filepath.Ext(imgName) + if fileExt == ".png" { + imgFile, err := os.Open(imgName) + if err != nil { + return err + } + defer imgFile.Close() + + img, err := png.Decode(imgFile) + if err != nil { + return err + } + imgList = append(imgList, img) + } + + } + + countNum := make(chan int, size) + resultChan := make(chan error, size) + + var wg sync.WaitGroup + for i := 0; i < 20; i++ { + wg.Add(1) + go func() { + defer wg.Done() + randomGIFWorker(imgList, countNum, dummyDir, resultChan) + }() + } + + for i := 0; i < size; i++ { + countNum <- i + } + close(countNum) + + go func() { + wg.Wait() + close(resultChan) + }() + + for err := range resultChan { + if err != nil { + return err + } + } + + return nil +} + +// gif worker +func randomGIFWorker(imgList []image.Image, countNum chan int, tmpDir string, resultChan chan<- error) { + for cnt := range countNum { + randGen := rand.New(rand.NewSource(time.Now().UnixNano())) + + randGen.Shuffle(len(imgList), func(i, j int) { + imgList[i], imgList[j] = imgList[j], imgList[i] + }) + + delay := 10 + gifImage := &gif.GIF{} + + for i, img := range imgList { + if i == 10 { + break + } + bounds := img.Bounds() + palettedImage := image.NewPaletted(bounds, palette.Plan9) + draw.FloydSteinberg.Draw(palettedImage, bounds, img, image.Point{}) + + gifImage.Image = append(gifImage.Image, palettedImage) + gifImage.Delay = append(gifImage.Delay, delay) + } + + gifFile, err := os.Create(fmt.Sprintf("%s/randomGIF_%d.gif", tmpDir, cnt)) + if err != nil { + resultChan <- err + } + defer gifFile.Close() + + resultChan <- gif.EncodeAll(gifFile, gifImage) + } +} diff --git a/pkg/unstructed/img.go b/pkg/unstructed/img.go new file mode 100644 index 0000000..17c7186 --- /dev/null +++ b/pkg/unstructed/img.go @@ -0,0 +1,72 @@ +package unstructed + +import ( + "fmt" + "os" + "path/filepath" + "sync" + + "github.com/brianvoe/gofakeit/v6" + "github.com/cloud-barista/cm-data-mold/pkg/utils" +) + +type ImageType string + +// PNG generation function using gofakeit +// +// CapacitySize is in GB and generates png files +// within the entered dummyDir path. +func GenerateRandomPNGImage(dummyDir string, capacitySize int) error { + dummyDir = filepath.Join(dummyDir, "png") + if err := utils.IsDir(dummyDir); err != nil { + return err + } + + size := capacitySize * 10 * 145 + + countNum := make(chan int, size) + resultChan := make(chan error, size) + + var wg sync.WaitGroup + for i := 0; i < 10; i++ { + wg.Add(1) + go func() { + defer wg.Done() + randomImageWorker(countNum, dummyDir, resultChan) + }() + } + + for i := 0; i < size; i++ { + countNum <- i + } + close(countNum) + + go func() { + wg.Wait() + close(resultChan) + }() + + for ret := range resultChan { + if ret != nil { + return ret + } + } + + return nil +} + +// png worker +func randomImageWorker(countNum chan int, dirPath string, resultChan chan<- error) { + for num := range countNum { + file, err := os.Create(fmt.Sprintf("%s/randomImage_%d.png", dirPath, num)) + if err != nil { + resultChan <- err + } + defer file.Close() + + if _, err := file.Write(gofakeit.ImagePng(500, 500)); err != nil { + resultChan <- err + } + file.Close() + } +} diff --git a/pkg/unstructed/txt.go b/pkg/unstructed/txt.go new file mode 100644 index 0000000..ad11bd8 --- /dev/null +++ b/pkg/unstructed/txt.go @@ -0,0 +1,72 @@ +package unstructed + +import ( + "fmt" + "os" + "path/filepath" + "sync" + + "github.com/brianvoe/gofakeit/v6" + "github.com/cloud-barista/cm-data-mold/pkg/utils" +) + +// TXT generation function using gofakeit +// +// CapacitySize is in GB and generates txt files +// within the entered dummyDir path. +func GenerateRandomTXT(dummyDir string, capacitySize int) error { + dummyDir = filepath.Join(dummyDir, "txt") + if err := utils.IsDir(dummyDir); err != nil { + return err + } + + countNum := make(chan int, capacitySize*10) + resultChan := make(chan error, capacitySize*10) + + var wg sync.WaitGroup + for i := 0; i < 10; i++ { + wg.Add(1) + go func() { + defer wg.Done() + randomTxtWorker(countNum, dummyDir, resultChan) + }() + } + + for i := 0; i < capacitySize*10; i++ { + countNum <- i + } + close(countNum) + + go func() { + wg.Wait() + close(resultChan) + }() + + for ret := range resultChan { + if ret != nil { + return ret + } + } + + return nil +} + +// txt worker +func randomTxtWorker(countNum chan int, dirPath string, resultChan chan<- error) { + for num := range countNum { + file, err := os.Create(fmt.Sprintf("%s/randomTxt_%d.txt", dirPath, num)) + if err != nil { + resultChan <- err + } + + for i := 0; i < 1000; i++ { + if _, err := file.WriteString(fmt.Sprintf("%s\n", gofakeit.HipsterParagraph(10, 10, 120, " "))); err != nil { + resultChan <- err + } + } + + if err := file.Close(); err != nil { + resultChan <- err + } + } +} diff --git a/pkg/unstructed/zip.go b/pkg/unstructed/zip.go new file mode 100644 index 0000000..bca8381 --- /dev/null +++ b/pkg/unstructed/zip.go @@ -0,0 +1,116 @@ +package unstructed + +import ( + "archive/zip" + "fmt" + "io" + "os" + "path/filepath" + "sync" + + "github.com/cloud-barista/cm-data-mold/pkg/utils" +) + +// ZIP generation function using gofakeit +// +// CapacitySize is in GB and generates zip files +// within the entered dummyDir path. +func GenerateRandomZIP(dummyDir string, capacitySize int) error { + dummyDir = filepath.Join(dummyDir, "zip") + if err := utils.IsDir(dummyDir); err != nil { + return err + } + + tempPath := filepath.Join(dummyDir, "tmpTxt") + if err := os.MkdirAll(tempPath, 0755); err != nil { + return err + } + defer os.RemoveAll(tempPath) + + if err := GenerateRandomTXT(tempPath, 1); err != nil { + return err + } + + countNum := make(chan int, capacitySize) + resultChan := make(chan error, capacitySize) + + var wg sync.WaitGroup + for i := 0; i < 10; i++ { + wg.Add(1) + go func() { + defer wg.Done() + randomZIPWorker(countNum, dummyDir, tempPath, resultChan) + }() + } + + for i := 0; i < capacitySize; i++ { + countNum <- i + } + close(countNum) + + go func() { + wg.Wait() + close(resultChan) + }() + + for ret := range resultChan { + if ret != nil { + return ret + } + } + + return nil +} + +// txt worker +func randomZIPWorker(countNum chan int, dummyDir, tempPath string, resultChan chan<- error) { + for num := range countNum { + w, err := os.Create(filepath.Join(dummyDir, fmt.Sprintf("datamold-dummy-data_%d.zip", num))) + if err != nil { + resultChan <- err + } + defer w.Close() + + zipWriter := zip.NewWriter(w) + defer zipWriter.Close() + + if err := gzip(tempPath, zipWriter); err != nil { + resultChan <- err + } + + resultChan <- nil + } +} + +func gzip(srcDir string, zipWriter *zip.Writer) error { + return filepath.Walk(srcDir, func(fp string, info os.FileInfo, err error) error { + if err != nil { + return err + } + + if !info.IsDir() { + fileToZip, err := os.Open(fp) + if err != nil { + return err + } + defer fileToZip.Close() + + infoHeader, err := zip.FileInfoHeader(info) + if err != nil { + return err + } + + infoHeader.Name = filepath.Join(filepath.Base(srcDir), filepath.Base(fp)) + + writer, err := zipWriter.CreateHeader(infoHeader) + if err != nil { + return err + } + + _, err = io.Copy(writer, fileToZip) + + return err + } + return nil + }) +} From 582ef522fd54a5423e7c176b524d47b5051ccdaa Mon Sep 17 00:00:00 2001 From: tykim96 Date: Mon, 30 Oct 2023 16:18:25 +0900 Subject: [PATCH 4/6] feat: Add utils --- pkg/utils/utils.go | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 pkg/utils/utils.go diff --git a/pkg/utils/utils.go b/pkg/utils/utils.go new file mode 100644 index 0000000..918db6b --- /dev/null +++ b/pkg/utils/utils.go @@ -0,0 +1,25 @@ +package utils + +import ( + "os" +) + +// Distinguish between directory and file or directory +func IsDir(path string) error { + fInfo, err := os.Stat(path) + if err != nil { + if os.IsNotExist(err) { + err := os.MkdirAll(path, 0755) + if err != nil { + return err + } + } else { + return err + } + } else { + if !fInfo.IsDir() { + return err + } + } + return nil +} From 89dcab492d279b4c5f7a35cc3dfda528af55f71f Mon Sep 17 00:00:00 2001 From: tykim96 Date: Mon, 30 Oct 2023 16:19:15 +0900 Subject: [PATCH 5/6] feat: Add go.mod --- go.mod | 5 +++++ go.sum | 2 ++ 2 files changed, 7 insertions(+) create mode 100644 go.mod create mode 100644 go.sum diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..296bde8 --- /dev/null +++ b/go.mod @@ -0,0 +1,5 @@ +module github.com/cloud-barista/cm-data-mold + +go 1.20 + +require github.com/brianvoe/gofakeit/v6 v6.24.0 diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..ed97943 --- /dev/null +++ b/go.sum @@ -0,0 +1,2 @@ +github.com/brianvoe/gofakeit/v6 v6.24.0 h1:74yq7RRz/noddscZHRS2T84oHZisW9muwbb8sRnU52A= +github.com/brianvoe/gofakeit/v6 v6.24.0/go.mod h1:Ow6qC71xtwm79anlwKRlWZW6zVq9D2XHE4QSSMP/rU8= From c04fe96ece75b5a4e56b936bdc22d0eb9b864b59 Mon Sep 17 00:00:00 2001 From: tykim96 Date: Mon, 30 Oct 2023 17:07:31 +0900 Subject: [PATCH 6/6] feat: Updata go version --- go.mod | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go.mod b/go.mod index 296bde8..cbe8ac4 100644 --- a/go.mod +++ b/go.mod @@ -1,5 +1,5 @@ module github.com/cloud-barista/cm-data-mold -go 1.20 +go 1.21.3 require github.com/brianvoe/gofakeit/v6 v6.24.0