-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #31 from SKHUMING/develop
Crawling 구현
- Loading branch information
Showing
14 changed files
with
398 additions
and
289 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
24 changes: 24 additions & 0 deletions
24
src/main/java/com/itcontest/skhuming/notice/api/CrawlingController.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
package com.itcontest.skhuming.notice.api; | ||
|
||
import com.itcontest.skhuming.notice.application.CrawlingService; | ||
import org.springframework.http.HttpStatus; | ||
import org.springframework.http.ResponseEntity; | ||
import org.springframework.web.bind.annotation.PostMapping; | ||
import org.springframework.web.bind.annotation.RestController; | ||
|
||
import java.io.IOException; | ||
|
||
@RestController | ||
public class CrawlingController { | ||
private final CrawlingService crawlingService; | ||
|
||
public CrawlingController(CrawlingService crawlingService) { | ||
this.crawlingService = crawlingService; | ||
} | ||
|
||
@PostMapping("/api/admin/get-crawling") | ||
public ResponseEntity<String> crawlingSuccess() throws IOException { | ||
crawlingService.getNoticeData(); | ||
return new ResponseEntity<>("SUCCESS", HttpStatus.OK); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
102 changes: 102 additions & 0 deletions
102
src/main/java/com/itcontest/skhuming/notice/application/CrawlingService.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
package com.itcontest.skhuming.notice.application; | ||
|
||
import com.itcontest.skhuming.notice.domain.Notice; | ||
import com.itcontest.skhuming.notice.domain.repository.NoticeRepository; | ||
import org.jsoup.Jsoup; | ||
import org.jsoup.nodes.Document; | ||
import org.jsoup.nodes.Element; | ||
import org.jsoup.select.Elements; | ||
import org.springframework.stereotype.Service; | ||
import org.springframework.transaction.annotation.Transactional; | ||
|
||
import java.io.IOException; | ||
import java.util.ArrayList; | ||
import java.util.List; | ||
|
||
@Service | ||
@Transactional(readOnly = true) | ||
public class CrawlingService { | ||
private final NoticeRepository noticeRepository; | ||
private final String BASE_URL = "https://www.skhu.ac.kr"; | ||
private final String URL = BASE_URL + "/skhu/4198/subview.do"; | ||
|
||
public CrawlingService(NoticeRepository noticeRepository) { | ||
this.noticeRepository = noticeRepository; | ||
} | ||
|
||
@Transactional | ||
public void getNoticeData() throws IOException { | ||
List<Notice> notices = new ArrayList<>(); | ||
|
||
Document document = Jsoup.connect(URL).get(); | ||
|
||
for (int page = 1; page <= 5; page++) { | ||
Elements noticeElement = document.select("table.board-table.horizon1 tbody tr"); | ||
|
||
for (Element content : noticeElement) { | ||
// 번호 | ||
String number = content.select("td.td-num").text(); | ||
|
||
// 일반 공지 (중요 공지) 제외 | ||
if (number.equals("일반공지")) { | ||
continue; | ||
} | ||
|
||
// 상태 | ||
String status = content.select("td.td-state").text(); | ||
boolean statusBoolean = "진행중".equals(status); // true가 진행중 | ||
|
||
// 제목 | ||
String title = content.select("td.td-subject strong").text(); | ||
|
||
// 작성일 | ||
String writeDate = content.select("td.td-date").text(); | ||
|
||
// 작성자 | ||
String author = content.select("td.td-write").text(); | ||
|
||
// 공지사항 url | ||
Element linkElement = content.select("a").first(); | ||
String relativeUrl = linkElement.attr("href"); | ||
|
||
// BASE_URL(기본 https) + relativeUrl | ||
String fullUrl = BASE_URL + relativeUrl; | ||
|
||
// 상세 공지 내용 | ||
StringBuilder contents = new StringBuilder(); | ||
Document detailDocument = Jsoup.connect(fullUrl).get(); | ||
Elements contentElements = detailDocument.select("._fnctWrap .view-con"); | ||
|
||
// <p>태그로 구분하여 줄바꿈 | ||
for (Element element : contentElements.select("p")) { | ||
contents.append(element.text()).append("\n"); | ||
} | ||
|
||
for (Element element : contentElements.select("h3")) { | ||
contents.append(element.text()).append("\n"); | ||
} | ||
|
||
Notice notice = new Notice( | ||
title, | ||
contents.toString(), | ||
writeDate, | ||
fullUrl, | ||
statusBoolean, | ||
author); | ||
|
||
if (!contents.toString().trim().isEmpty() && !noticeRepository.existsByTitleAndContents(title, contents.toString())) { | ||
notices.add(notice); | ||
} | ||
} | ||
// 다음 페이지로 이동하기 위해 URL 변경 | ||
if (page < 5) { | ||
String nextPageUrl = BASE_URL + "/skhu/4198/subview.do?page=" + (page + 1); | ||
document = Jsoup.connect(nextPageUrl).get(); | ||
} | ||
} | ||
|
||
if (!notices.isEmpty()) { | ||
noticeRepository.saveAll(notices); | ||
} | ||
} | ||
} |
Oops, something went wrong.