diff --git a/crawler.go b/crawler.go index 7d58936..058d7bd 100644 --- a/crawler.go +++ b/crawler.go @@ -1,6 +1,122 @@ package main -func Crawl(month int, year int, outputPath string) []string { +import ( + "fmt" + "io" + "io/ioutil" + "net/http" + "os" + "strings" + + "github.com/dadosjusbr/coletores/status" +) + +const ( + viURLType int = 0 + remuURLType int = 1 +) + +type requestURLs struct { + remunerationURL string + benefitsURL string +} + +// Retorna as url para download de cada planilha em questão +func getRequestURLs(year, month int) (requestURLs, error) { + remuIDURL := fmt.Sprint("https://servicos-portal.mpro.mp.br/plcVis/frameset?__report=..%2FROOT%2Frel%2Fcontracheque%2Fmembros%2FremuneracaoMembrosAtivos.rptdesign&anomes=", year, fmt.Sprintf("%02d", month), "&nome=&cargo=&lotacao=") + remuSessionID, err := getSessionID(remuIDURL) + if err != nil { + return requestURLs{}, err + } + remuDownloadURL := fmt.Sprintf("%s&__sessionId=%s&__format=xls&__asattachment=true&__overwrite=false", remuIDURL, remuSessionID) + + viIDURL := fmt.Sprint("https://servicos-portal.mpro.mp.br/plcVis/frameset?__report=..%2FROOT%2Frel%2Fcontracheque%2Fmembros%2FverbasIndenizatoriasMembrosAtivos.rptdesign&anomes=", year, fmt.Sprintf("%02d", month)) + benefitsSessionID, err := getSessionID(viIDURL) + if err != nil { + return requestURLs{}, err + } + benefitsURL := fmt.Sprintf("%s&__sessionId=%s&__format=xls&__asattachment=true&__overwrite=false", viIDURL, benefitsSessionID) + + return requestURLs{remuDownloadURL, benefitsURL}, nil +} + +// Inicializa o id de sessão para uma dada url +func getSessionID(url string) (string, error) { + resp, err := http.Get(url) + if err != nil { + return "", status.NewError(status.ConnectionError, fmt.Errorf("Was not possible to get a season id to the url: %s. %q", url, err)) + } + defer resp.Body.Close() + + page, err := ioutil.ReadAll(resp.Body) + if err != nil { + return "", status.NewError(status.ConnectionError, fmt.Errorf("Was not possible to get a season id to the url: %s. %q", url, err)) + } + + id := strings.Split(string(page), "Constants.viewingSessionId = \"") + + return id[1][0:19], err +} + +func download(url string, filePath string, outputPath string) error { + resp, err := http.Get(url) + if err != nil { + return status.NewError(status.ConnectionError, fmt.Errorf("Problem doing GET on the URL(%s) to download the file(%s). Error: %q", url, filePath, err)) + } + defer resp.Body.Close() + + _, err = os.Stat(outputPath) + if os.IsNotExist(err) { + err = os.Mkdir(outputPath, 0755) + if err != nil { + return status.NewError(status.SystemError, fmt.Errorf("Error creating outputfolder (%s). Error: %q", outputPath, err)) + } + } + + file, err := os.Create(filePath) + if err != nil { + return status.NewError(status.DataUnavailable, fmt.Errorf("Error creating downloaded (%s) file(%s). Error: %q", url, filePath, err)) + } + defer file.Close() + + if _, err := io.Copy(file, resp.Body); err != nil { + return status.NewError(status.SystemError, fmt.Errorf("Was not possible to save the downloaded file: %s. The following mistake was teken: %q", filePath, err)) + } + + return nil +} + +func Crawl(month int, year int, outputPath string) ([]string, error) { var paths []string - return paths + + request, err := getRequestURLs(year, month) + if err != nil { + return paths, err + } + + for typ := 0; typ < 2; typ++ { + switch typ { + case remuURLType: + var fileName = fmt.Sprintf("%d_%02d_remu.xls", year, month) + var filePath = fmt.Sprint(outputPath, "/", fileName) + + err = download(request.remunerationURL, filePath, outputPath) + if err != nil { + return paths, err + } + + paths = append(paths, filePath) + case viURLType: + var fileName = fmt.Sprintf("%d_%02d_vi.xls", year, month) + var filePath = fmt.Sprint(outputPath, "/", fileName) + + err = download(request.benefitsURL, filePath, outputPath) + if err != nil { + return paths, err + } + + paths = append(paths, filePath) + } + } + return paths, nil } diff --git a/main.go b/main.go index d5e8087..5c4f70a 100644 --- a/main.go +++ b/main.go @@ -14,7 +14,7 @@ import ( type Environment struct { Month int `envconfig:"MONTH" required:"true"` Year int `envconfig:"YEAR" required:"true"` - OutputFolder string `envconfig:"OUTPUT_FOLDER" default:"/output"` + OutputFolder string `envconfig:"OUTPUT_FOLDER" default:"./output"` GitCommit string `envconfig:"GIT_COMMIT" required:"true"` } @@ -46,7 +46,10 @@ func main() { } // Main execution - fileNames := Crawl(month, year, outputPath) + fileNames, err := Crawl(month, year, outputPath) + if err != nil { + os.Exit(1) + } employees := Parse(month, year, fileNames) cr := coletores.ExecutionResult{ @@ -67,7 +70,6 @@ func main() { result, err := json.MarshalIndent(cr, "", " ") if err != nil { status.ExitFromError(status.NewError(status.SystemError, fmt.Errorf("JSON marshiling error: %q", err))) - os.Exit(1) } fmt.Println(string(result)) }