Skip to content

Commit e96dcfc

Browse files
authored
Merge pull request #1 from CarToi/feat/peristalsis
[Feat/peristalsis] 데이터 수집 및 전처리 기초 세팅
2 parents 7852a3a + 06d61c0 commit e96dcfc

File tree

20 files changed

+524
-0
lines changed

20 files changed

+524
-0
lines changed

build.gradle

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,24 @@ repositories {
1919

2020
dependencies {
2121
implementation 'org.springframework.boot:spring-boot-starter-web'
22+
implementation 'org.springframework.boot:spring-boot-starter-webflux'
23+
24+
implementation 'org.projectlombok:lombok'
25+
annotationProcessor 'org.projectlombok:lombok'
26+
testCompileOnly 'org.projectlombok:lombok'
27+
testAnnotationProcessor 'org.projectlombok:lombok'
28+
2229
testImplementation 'org.springframework.boot:spring-boot-starter-test'
30+
testImplementation 'io.projectreactor:reactor-test'
2331
testRuntimeOnly 'org.junit.platform:junit-platform-launcher'
32+
33+
// selenium
34+
// https://mvnrepository.com/artifact/org.seleniumhq.selenium/selenium-java
35+
implementation("org.seleniumhq.selenium:selenium-java:4.33.0") // 셀레늄 의존성
36+
// // https://mvnrepository.com/artifact/io.github.bonigarcia/webdrivermanager
37+
// implementation("io.github.bonigarcia:webdrivermanager:6.1.0")
38+
// https://mvnrepository.com/artifact/edu.stanford.nlp/stanford-corenlp
39+
implementation("edu.stanford.nlp:stanford-corenlp:4.5.9") // 문장 분석을 통한 자연어 처리
2440
}
2541

2642
tasks.named('test') {
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
package org.jun.saemangeum.global.persistence.domain;
2+
3+
public enum Category {
4+
FESTIVAL, EVENT, TOUR
5+
}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
package org.jun.saemangeum.global.persistence.domain;
2+
3+
// H2의 엔티티로 쓰이게 될 클래스
4+
public class Content {
5+
private Long id;
6+
private String title;
7+
private Category category;
8+
private String image;
9+
private String introduction;
10+
}
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
package org.jun.saemangeum.process.application.service.api;
2+
3+
import org.jun.saemangeum.process.application.service.base.OpenApiCollector;
4+
import org.jun.saemangeum.process.domain.dto.RefinedDataDTO;
5+
import org.jun.saemangeum.process.infrastructure.api.RestTemplateClient;
6+
import org.jun.saemangeum.process.presentation.dto.Festival;
7+
import org.jun.saemangeum.process.presentation.dto.FestivalResponse;
8+
import org.springframework.stereotype.Component;
9+
10+
import java.util.List;
11+
12+
/**
13+
* 공공데이터 API 축제 CSV 받아오기
14+
*/
15+
@Component
16+
public class FestivalCollector extends OpenApiCollector {
17+
18+
private static final String LAST_PATH = "/15006172/v1/uddi:ede8925d-bfbd-49fc-9f3c-abf1ead5b402";
19+
20+
public FestivalCollector(RestTemplateClient restTemplateClient) {
21+
super(restTemplateClient);
22+
}
23+
24+
@Override
25+
public List<RefinedDataDTO> collectData() {
26+
FestivalResponse response = restTemplateClient.get(
27+
LAST_PATH,
28+
FestivalResponse.class,
29+
q -> q.queryParam("page", 1).queryParam("perPage", 100)
30+
);
31+
32+
return response.data().stream().map(Festival::convertToDTO).toList();
33+
}
34+
35+
}
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
package org.jun.saemangeum.process.application.service.base;
2+
3+
import org.jun.saemangeum.global.persistence.domain.Content;
4+
import org.jun.saemangeum.process.domain.dto.RefinedDataDTO;
5+
import org.jun.saemangeum.process.infrastructure.api.RestTemplateClient;
6+
7+
import java.util.List;
8+
9+
public abstract class OpenApiCollector implements Refiner {
10+
// protected final WebClient webClient;
11+
protected final RestTemplateClient restTemplateClient;
12+
13+
// public OpenApiCollector(WebClient webClient) {
14+
// this.webClient = webClient;
15+
// }
16+
17+
public OpenApiCollector(RestTemplateClient restTemplateClient) {
18+
this.restTemplateClient = restTemplateClient;
19+
}
20+
21+
@Override
22+
public List<Content> refine() {
23+
// 여기에 추상 메소드들 기반으로 로직 처리처리할 예정
24+
// 생성한 것들을 바탕으로 프로세스까지 처리하는 걸 여기에 책임을 부여하자(팩토리 메서드 취지)
25+
return List.of();
26+
}
27+
28+
// 각 OpenAPI 호출과 관련된 추상메소드들
29+
public abstract List<RefinedDataDTO> collectData();
30+
}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
package org.jun.saemangeum.process.application.service.base;
2+
3+
import org.jun.saemangeum.global.persistence.domain.Content;
4+
5+
import java.util.List;
6+
7+
public interface Refiner {
8+
List<Content> refine();
9+
}
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
package org.jun.saemangeum.process.application.service.base;
2+
3+
import org.jun.saemangeum.global.persistence.domain.Content;
4+
import org.openqa.selenium.WebDriver;
5+
6+
import java.util.List;
7+
8+
public abstract class SeleniumCollector implements Refiner {
9+
protected final WebDriver webDriver;
10+
11+
public SeleniumCollector(WebDriver webDriver) {
12+
this.webDriver = webDriver;
13+
}
14+
15+
@Override
16+
public List<Content> refine() {
17+
// 여기에 추상 메소드들 기반으로 로직 처리처리할 예정
18+
// 생성한 것들을 바탕으로 프로세스까지 처리하는 걸 여기에 책임을 부여하자(팩토리 메서드 취지)
19+
return List.of();
20+
}
21+
22+
// 각 페이지 크롤링과 관련된 추상메소드들
23+
}
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
package org.jun.saemangeum.process.domain.dto;
2+
3+
import org.jun.saemangeum.global.persistence.domain.Category;
4+
5+
public record RefinedDataDTO(
6+
String title,
7+
String position,
8+
Category category,
9+
String image,
10+
String introduction
11+
) {
12+
}
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
package org.jun.saemangeum.process.infrastructure.api;
2+
3+
import org.springframework.beans.factory.annotation.Value;
4+
import org.springframework.stereotype.Component;
5+
import org.springframework.web.client.RestTemplate;
6+
import org.springframework.web.util.UriComponentsBuilder;
7+
8+
import java.net.URI;
9+
import java.util.function.Consumer;
10+
11+
@Component
12+
public class RestTemplateClient {
13+
14+
@Value("${dev.baseUrl}")
15+
private String baseUrl;
16+
17+
private final RestTemplate restTemplate;
18+
19+
public RestTemplateClient(RestTemplate restTemplate) {
20+
this.restTemplate = restTemplate;
21+
}
22+
23+
public <T> T get(String path, Class<T> responseType, Consumer<UriComponentsBuilder> extraQuery) {
24+
UriComponentsBuilder builder = UriComponentsBuilder.fromUriString(baseUrl).path(path);
25+
26+
if (extraQuery != null) extraQuery.accept(builder);
27+
28+
URI uri = builder.build(true).toUri();
29+
return restTemplate.getForObject(uri, responseType);
30+
}
31+
}
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
package org.jun.saemangeum.process.infrastructure.config;
2+
3+
import org.springframework.beans.factory.annotation.Value;
4+
import org.springframework.context.annotation.Bean;
5+
import org.springframework.context.annotation.Configuration;
6+
import org.springframework.http.HttpRequest;
7+
import org.springframework.http.client.SimpleClientHttpRequestFactory;
8+
import org.springframework.http.client.support.HttpRequestWrapper;
9+
import org.springframework.web.client.RestTemplate;
10+
import org.springframework.web.util.UriComponentsBuilder;
11+
12+
import java.net.URI;
13+
import java.util.List;
14+
15+
@Configuration
16+
public class RestTemplateConfig {
17+
18+
@Value("${dev.openApiKey}")
19+
private String apiKey;
20+
21+
@Bean
22+
public RestTemplate restTemplate() {
23+
SimpleClientHttpRequestFactory requestFactory = new SimpleClientHttpRequestFactory();
24+
requestFactory.setConnectTimeout(5000);
25+
requestFactory.setReadTimeout(5000);
26+
27+
RestTemplate restTemplate = new RestTemplate(requestFactory);
28+
restTemplate.setInterceptors(List.of((request, body, execution) -> {
29+
URI uri = request.getURI();
30+
31+
URI updateUri = UriComponentsBuilder.fromUri(uri)
32+
.queryParam("serviceKey", apiKey)
33+
.build(true) // 인코딩 유지 설정을 통한 중복 인코딩 방지
34+
.toUri();
35+
36+
HttpRequest newRequest = new HttpRequestWrapper(request) {
37+
@Override
38+
public URI getURI() {
39+
return updateUri;
40+
}
41+
};
42+
43+
return execution.execute(newRequest, body);
44+
}));
45+
46+
return restTemplate;
47+
}
48+
49+
}

0 commit comments

Comments
 (0)