Skip to content

Commit

Permalink
hotfix: okky xss path selector
Browse files Browse the repository at this point in the history
  • Loading branch information
in-seo committed Jan 12, 2025
1 parent 76ca224 commit ee42eb2
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 15 deletions.
7 changes: 3 additions & 4 deletions SouP/src/main/java/Matching/SouP/common/SlackNotifier.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@
public class SlackNotifier {
private static final OkHttpClient client = new OkHttpClient();

public void sendMessageToSlack() {
public void sendMessageToSlack(String errorMessage) {
String webHookURL = PropertyUtil.getProperty("webhook.url");
String message = "OKKY 파싱 에러";

RequestBody body = RequestBody.create(
MediaType.parse("application/json; charset=utf-8"),
"{\"text\":\"" + message + "\"}"
"{\"text\":\"" + message + " " + errorMessage + "\"}"
);

Request request = new Request.Builder()
Expand All @@ -27,9 +27,8 @@ public void sendMessageToSlack() {
if (!response.isSuccessful()) {
throw new RuntimeException("Unexpected code " + response);
}
log.warn("Message sent successfully: " + response.body().string());
} catch (Exception e) {
e.printStackTrace();
log.error(e.toString());
}
}
}
21 changes: 10 additions & 11 deletions SouP/src/main/java/Matching/SouP/crawler/okky/OkkyService.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import lombok.extern.slf4j.Slf4j;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.openqa.selenium.WebDriver;
import org.springframework.stereotype.Service;
Expand Down Expand Up @@ -35,11 +36,11 @@ public void getOkkyPostData() {
driver.get(urlOkky + "?page=" + Page);
String html = driver.getPageSource();
Document doc = Jsoup.parse(html);
for (int i = 23; i > 4; i--) { //오래된 글부터 크롤링 그럼 반드시 최신글은 DB에서 가장 밑에꺼임.
if(i==10) // 공지, 광고 제거
continue;
Elements element = doc.select("#__next > main > div > div:nth-child(2) > div > div:nth-child(5) > div > ul > li:nth-child(" + i + ")");
Elements elements = doc.select("#__next > main > div > div:nth-child(2) > div > div:nth-child(5) > div > ul > li[class^=\"py\"]\n");
for (int i = elements.size() - 1; i >= 0; i--) {
Element element = elements.get(i);
Elements title = element.select("div > div.my-2 > a");
// 여기서 각 element에 대한 처리를 진행
String postName = title.text();
String num;
try {
Expand Down Expand Up @@ -99,35 +100,33 @@ private int startPage(WebDriver driver, int start) throws StringIndexOutOfBounds
* 디비에서 저장된 가장 최근 글이 1페이지에 있나 여부 판단. 만약 글 리젠이 많아서 2페이지 중반부터 크롤링 해야되면? 3페이지 첫글이 start보다 작아야 됌.
* !!다음 페이지의 맨 첫 번째 글이, 가장 최근에 디비에 저장된 글의 번호보다 크면 다음 페이지로 넘어가야됌
*/
int cnt = 4;
int cnt = 1;
while(true){
if (page > 5 || cnt > 6) {
SlackNotifier slackNotifier = new SlackNotifier();
slackNotifier.sendMessageToSlack();
slackNotifier.sendMessageToSlack("시작 페이지를 찾지 못했습니다.");
throw new IllegalStateException("오키 파싱 에러");
}
driver.get(urlOkky + "?page=" + page);
String html = driver.getPageSource();
Document doc = Jsoup.parse(html);
int num = Integer.MAX_VALUE;
try {
String href = doc.select("#__next > main > div > div:nth-child(2) > div > div:nth-child(5) > div > ul > li:nth-child(" + cnt + ") > div > div.my-2 > a")
.attr("href");

Elements elements = doc.select("#__next > main > div > div:nth-child(2) > div > div:nth-child(5) > div > ul > li[class^=\"py\"]\n");
String href = elements.get(0).select("div > div.my-2 > div > a").attr("href"); // 각 페이지 첫 글의 번호를 통해 페이지를 선택하자.
String sNum = href.substring(10, href.lastIndexOf('?'));
num = Integer.parseInt(sNum);
}catch (StringIndexOutOfBoundsException | NullPointerException e){
cnt++;
log.info("StringIndexOutOfBoundsException");
SlackNotifier slackNotifier = new SlackNotifier();
slackNotifier.sendMessageToSlack();
slackNotifier.sendMessageToSlack(e.getMessage());
continue;
}
if(num<start){
log.info("{}페이지부터 시작",page-1);
return page-1;
}
cnt=1;
page++;

}
Expand Down

0 comments on commit ee42eb2

Please sign in to comment.