From ee42eb227a23269b8a6070380fde2d6dff72e91b Mon Sep 17 00:00:00 2001 From: in-seo Date: Sun, 12 Jan 2025 13:07:03 +0900 Subject: [PATCH] hotfix: okky xss path selector --- .../Matching/SouP/common/SlackNotifier.java | 7 +++---- .../SouP/crawler/okky/OkkyService.java | 21 +++++++++---------- 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/SouP/src/main/java/Matching/SouP/common/SlackNotifier.java b/SouP/src/main/java/Matching/SouP/common/SlackNotifier.java index 414b0bb..ca5e081 100644 --- a/SouP/src/main/java/Matching/SouP/common/SlackNotifier.java +++ b/SouP/src/main/java/Matching/SouP/common/SlackNotifier.java @@ -9,13 +9,13 @@ public class SlackNotifier { private static final OkHttpClient client = new OkHttpClient(); - public void sendMessageToSlack() { + public void sendMessageToSlack(String errorMessage) { String webHookURL = PropertyUtil.getProperty("webhook.url"); String message = "OKKY 파싱 에러"; RequestBody body = RequestBody.create( MediaType.parse("application/json; charset=utf-8"), - "{\"text\":\"" + message + "\"}" + "{\"text\":\"" + message + " " + errorMessage + "\"}" ); Request request = new Request.Builder() @@ -27,9 +27,8 @@ public void sendMessageToSlack() { if (!response.isSuccessful()) { throw new RuntimeException("Unexpected code " + response); } - log.warn("Message sent successfully: " + response.body().string()); } catch (Exception e) { - e.printStackTrace(); + log.error(e.toString()); } } } diff --git a/SouP/src/main/java/Matching/SouP/crawler/okky/OkkyService.java b/SouP/src/main/java/Matching/SouP/crawler/okky/OkkyService.java index 8bfdeec..2c9c054 100644 --- a/SouP/src/main/java/Matching/SouP/crawler/okky/OkkyService.java +++ b/SouP/src/main/java/Matching/SouP/crawler/okky/OkkyService.java @@ -8,6 +8,7 @@ import lombok.extern.slf4j.Slf4j; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import org.openqa.selenium.WebDriver; import org.springframework.stereotype.Service; @@ -35,11 +36,11 @@ public void getOkkyPostData() { driver.get(urlOkky + "?page=" + Page); String html = driver.getPageSource(); Document doc = Jsoup.parse(html); - for (int i = 23; i > 4; i--) { //오래된 글부터 크롤링 그럼 반드시 최신글은 DB에서 가장 밑에꺼임. - if(i==10) // 공지, 광고 제거 - continue; - Elements element = doc.select("#__next > main > div > div:nth-child(2) > div > div:nth-child(5) > div > ul > li:nth-child(" + i + ")"); + Elements elements = doc.select("#__next > main > div > div:nth-child(2) > div > div:nth-child(5) > div > ul > li[class^=\"py\"]\n"); + for (int i = elements.size() - 1; i >= 0; i--) { + Element element = elements.get(i); Elements title = element.select("div > div.my-2 > a"); + // 여기서 각 element에 대한 처리를 진행 String postName = title.text(); String num; try { @@ -99,11 +100,11 @@ private int startPage(WebDriver driver, int start) throws StringIndexOutOfBounds * 디비에서 저장된 가장 최근 글이 1페이지에 있나 여부 판단. 만약 글 리젠이 많아서 2페이지 중반부터 크롤링 해야되면? 3페이지 첫글이 start보다 작아야 됌. * !!다음 페이지의 맨 첫 번째 글이, 가장 최근에 디비에 저장된 글의 번호보다 크면 다음 페이지로 넘어가야됌 */ - int cnt = 4; + int cnt = 1; while(true){ if (page > 5 || cnt > 6) { SlackNotifier slackNotifier = new SlackNotifier(); - slackNotifier.sendMessageToSlack(); + slackNotifier.sendMessageToSlack("시작 페이지를 찾지 못했습니다."); throw new IllegalStateException("오키 파싱 에러"); } driver.get(urlOkky + "?page=" + page); @@ -111,23 +112,21 @@ private int startPage(WebDriver driver, int start) throws StringIndexOutOfBounds Document doc = Jsoup.parse(html); int num = Integer.MAX_VALUE; try { - String href = doc.select("#__next > main > div > div:nth-child(2) > div > div:nth-child(5) > div > ul > li:nth-child(" + cnt + ") > div > div.my-2 > a") - .attr("href"); - + Elements elements = doc.select("#__next > main > div > div:nth-child(2) > div > div:nth-child(5) > div > ul > li[class^=\"py\"]\n"); + String href = elements.get(0).select("div > div.my-2 > div > a").attr("href"); // 각 페이지 첫 글의 번호를 통해 페이지를 선택하자. String sNum = href.substring(10, href.lastIndexOf('?')); num = Integer.parseInt(sNum); }catch (StringIndexOutOfBoundsException | NullPointerException e){ cnt++; log.info("StringIndexOutOfBoundsException"); SlackNotifier slackNotifier = new SlackNotifier(); - slackNotifier.sendMessageToSlack(); + slackNotifier.sendMessageToSlack(e.getMessage()); continue; } if(num