Skip to content

Commit

Permalink
Merge pull request #45 from kakao-tech-campus-2nd-step3/feat/#9-summa…
Browse files Browse the repository at this point in the history
…rize-youtube

[Feat] 크롤링해 비디오와 장소 정보를 저장하는 기능을 추가했어요
  • Loading branch information
sanghee0820 authored Oct 9, 2024
2 parents 38e5617 + f685579 commit ab82199
Show file tree
Hide file tree
Showing 24 changed files with 562 additions and 256 deletions.
25 changes: 25 additions & 0 deletions src/main/java/team7/inplace/crawling/application/AddressUtil.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
package team7.inplace.crawling.application;

import static lombok.AccessLevel.PRIVATE;

import com.fasterxml.jackson.databind.JsonNode;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import lombok.NoArgsConstructor;

@NoArgsConstructor(access = PRIVATE)
public class AddressUtil {
private static final String ADDRESS_REGEX = "[가-힣0-9]+(?:도|시|구|군|읍|면|동|리|로|길)[^#,\\n()]+(?:동|읍|면|리|로|길|호|층|번지)[^#,\\n()]+";

public static String extractAddress(JsonNode snippet) {

String videoDescription = snippet.path("description").asText();

Pattern pattern = Pattern.compile(ADDRESS_REGEX);
Matcher matcher = pattern.matcher(videoDescription);
if (matcher.find()) {
return matcher.group();
}
return null;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
package team7.inplace.crawling.application;

import lombok.RequiredArgsConstructor;
import team7.inplace.global.annotation.Facade;
import team7.inplace.video.application.VideoFacade;

@Facade
@RequiredArgsConstructor
public class CrawlingFacade {
private final YoutubeCrawlingService youtubeCrawlingService;
private final VideoFacade videoFacade;

public void updateVideos() {
var crawlingInfos = youtubeCrawlingService.crawlAllVideos();
for (var crawlingInfo : crawlingInfos) {
var videoCommands = crawlingInfo.toVideoCommands();
var placesCommands = crawlingInfo.toPlacesCommands();

videoFacade.createVideos(videoCommands, placesCommands);
}
}
}
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
package team7.inplace.crawling.application;

import java.util.List;
import java.util.Objects;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import team7.inplace.crawling.application.dto.CrawlingInfo;
import team7.inplace.crawling.client.KakaoMapClient;
import team7.inplace.crawling.client.YoutubeClient;
import team7.inplace.crawling.persistence.YoutubeChannelRepository;
Expand All @@ -22,17 +24,30 @@ public class YoutubeCrawlingService {
3. 마지막 비디오 UUID를 업데이트 한다.
4. 카카오 API를 호출해 장소 정보를 가져온다
*/
public void crawlAllVideos() {
public List<CrawlingInfo> crawlAllVideos() {
var youtubeChannels = youtubeChannelRepository.findAll();
for (var channel : youtubeChannels) {
var rawVideoInfos = youtubeClient.getVideos(channel.getPlayListUUID(), channel.getLastVideoUUID());
channel.updateLastVideoUUID(rawVideoInfos.get(0).videoId());

var videos = rawVideoInfos.stream()
.map(rawVideoInfo -> kakaoMapClient.search(rawVideoInfo, channel.getChannelType().getCode()))
.filter(Objects::nonNull)
.toList();
}

var crawlInfos = youtubeChannels.stream()
.map(channel -> {
var videoSnippets = youtubeClient.getVideos(channel.getPlayListUUID(), channel.getLastVideoUUID());

var videoAddresses = videoSnippets.stream()
.map(AddressUtil::extractAddress)
.toList();

var placeNodes = videoAddresses.stream()
.map(address -> {
if (Objects.isNull(address)) {
return null;
}
return kakaoMapClient.search(address, channel.getChannelType().getCode());
})
.toList();

return new CrawlingInfo(channel.getInfluencerId(), videoSnippets, placeNodes);
}).toList();

return crawlInfos;
}
}

Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
package team7.inplace.crawling.application.dto;

import com.fasterxml.jackson.databind.JsonNode;
import java.util.List;
import team7.inplace.crawling.client.dto.PlaceNode;
import team7.inplace.place.application.command.PlacesCommand;
import team7.inplace.video.application.command.VideoCommand;

public record CrawlingInfo(
Long influencerId,
List<JsonNode> videoSnippets,
List<PlaceNode> placeNodes
) {
public List<VideoCommand.Create> toVideoCommands() {
return videoSnippets.stream()
.map(snippet -> VideoCommand.Create.from(snippet, influencerId))
.toList();
}

public List<PlacesCommand.Create> toPlacesCommands() {
return placeNodes.stream()
.map(placeNode -> PlacesCommand.Create.from(placeNode.locationNode(), placeNode.placeNode()))
.toList();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,7 @@
import org.springframework.http.ResponseEntity;
import org.springframework.stereotype.Component;
import org.springframework.web.client.RestTemplate;
import team7.inplace.crawling.client.dto.RawPlace;
import team7.inplace.crawling.client.dto.RawVideoInfo;
import team7.inplace.crawling.client.dto.PlaceNode;
import team7.inplace.global.kakao.config.KakaoApiProperties;

@Slf4j
Expand All @@ -24,8 +23,7 @@ public class KakaoMapClient {
private final KakaoApiProperties kakaoApiProperties;
private final RestTemplate restTemplate;

public RawPlace.Info search(RawVideoInfo videoInfo, String category) {
var address = videoInfo.address();
public PlaceNode search(String address, String category) {
var locationInfo = getLocateInfo(address, category);
var placeId = locationInfo.has("documents") ?
locationInfo.get("documents").get(0).get("id").asText() : null;
Expand All @@ -34,7 +32,7 @@ public RawPlace.Info search(RawVideoInfo videoInfo, String category) {
}

var placeInfo = getPlaceInfo(placeId);
return RawPlace.Info.from(locationInfo, placeInfo);
return PlaceNode.of(locationInfo, placeInfo);
}

private JsonNode getLocateInfo(String address, String category) {
Expand Down
35 changes: 6 additions & 29 deletions src/main/java/team7/inplace/crawling/client/YoutubeClient.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,31 +4,26 @@
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
import org.springframework.web.client.RestTemplate;
import team7.inplace.crawling.client.dto.RawVideoInfo;

@Slf4j
@Component
public class YoutubeClient {
private static final String PLAY_LIST_ITEMS_BASE_URL = "https://www.googleapis.com/youtube/v3/playlistItems";
private static final String PLAY_LIST_PARAMS = "?part=snippet&playlistId=%s&key=%s&maxResults=50";
private static final String ADDRESS_REGEX = "[가-힣0-9]+(?:도|시|구|군|읍|면|동|리|로|길)[^#,\\n()]+(?:동|읍|면|리|로|길|호|층|번지)[^#,\\n()]+";
private final RestTemplate restTemplate;
private final String apiKey;

public YoutubeClient(@Value("${youtube.api.key}") String apiKey, RestTemplate restTemplate) {
log.info("Youtube API Key: {}", apiKey);
this.restTemplate = restTemplate;
this.apiKey = apiKey;
}

public List<RawVideoInfo> getVideos(String playListId, String finalVideoUUID) {
List<RawVideoInfo> videoInfos = new ArrayList<>();
public List<JsonNode> getVideos(String playListId, String finalVideoUUID) {
List<JsonNode> snippets = new ArrayList<>();
String nextPageToken = null;
while (true) {
String url = PLAY_LIST_ITEMS_BASE_URL + String.format(PLAY_LIST_PARAMS, playListId, apiKey);
Expand All @@ -41,15 +36,14 @@ public List<RawVideoInfo> getVideos(String playListId, String finalVideoUUID) {
response = restTemplate.getForObject(url, JsonNode.class);
} catch (Exception e) {
log.error("Youtube API 호출이 실패했습니다. Youtuber Id {}", playListId);
log.info(e.getMessage());
break;
}
if (Objects.isNull(response)) {
log.error("Youtube API Response가 NULL입니다 {}.", playListId);
break;
}

var containsLastVideo = extractRawVideoInfo(videoInfos, response.path("items"), finalVideoUUID);
var containsLastVideo = extractSnippets(snippets, response.path("items"), finalVideoUUID);
if (containsLastVideo) {
break;
}
Expand All @@ -58,39 +52,22 @@ public List<RawVideoInfo> getVideos(String playListId, String finalVideoUUID) {
break;
}
}
return videoInfos;
return snippets;
}

private boolean isLastPage(String nextPageToken) {
return Objects.isNull(nextPageToken) || nextPageToken.isEmpty();
}

private boolean extractRawVideoInfo(List<RawVideoInfo> videoInfos, JsonNode items, String finalVideoUUID) {
private boolean extractSnippets(List<JsonNode> snippets, JsonNode items, String finalVideoUUID) {
for (JsonNode item : items) {
var snippet = item.path("snippet");
var videoId = snippet.path("resourceId").path("videoId").asText();
var videoTitle = snippet.path("title").asText();
var videoDescription = snippet.path("description").asText();
if (videoId.equals(finalVideoUUID)) {
return true;
}

var address = extractAddress(videoDescription);
if (Objects.nonNull(address)) {
videoInfos.add(new RawVideoInfo(videoId, videoTitle, address));
continue;
}
log.info("주소를 찾을 수 없습니다. {}", videoDescription);
snippets.add(snippet);
}
return false;
}

private String extractAddress(String description) {
Pattern pattern = Pattern.compile(ADDRESS_REGEX);
Matcher matcher = pattern.matcher(description);
if (matcher.find()) {
return matcher.group();
}
return null;
}
}
12 changes: 12 additions & 0 deletions src/main/java/team7/inplace/crawling/client/dto/PlaceNode.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
package team7.inplace.crawling.client.dto;

import com.fasterxml.jackson.databind.JsonNode;

public record PlaceNode(
JsonNode locationNode,
JsonNode placeNode
) {
public static PlaceNode of(JsonNode locationNode, JsonNode placeNode) {
return new PlaceNode(locationNode, placeNode);
}
}
Loading

0 comments on commit ab82199

Please sign in to comment.