From 8b3bc529929cbd23f87c5de335911a594688215b Mon Sep 17 00:00:00 2001 From: in-seo Date: Sun, 12 Jan 2025 12:24:27 +0900 Subject: [PATCH] hotfix: okky xss path selector --- .../java/Matching/SouP/crawler/okky/OkkyService.java | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/SouP/src/main/java/Matching/SouP/crawler/okky/OkkyService.java b/SouP/src/main/java/Matching/SouP/crawler/okky/OkkyService.java index 8bfdeec..a8d8b26 100644 --- a/SouP/src/main/java/Matching/SouP/crawler/okky/OkkyService.java +++ b/SouP/src/main/java/Matching/SouP/crawler/okky/OkkyService.java @@ -8,6 +8,7 @@ import lombok.extern.slf4j.Slf4j; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import org.openqa.selenium.WebDriver; import org.springframework.stereotype.Service; @@ -35,11 +36,11 @@ public void getOkkyPostData() { driver.get(urlOkky + "?page=" + Page); String html = driver.getPageSource(); Document doc = Jsoup.parse(html); - for (int i = 23; i > 4; i--) { //오래된 글부터 크롤링 그럼 반드시 최신글은 DB에서 가장 밑에꺼임. - if(i==10) // 공지, 광고 제거 - continue; - Elements element = doc.select("#__next > main > div > div:nth-child(2) > div > div:nth-child(5) > div > ul > li:nth-child(" + i + ")"); + Elements elements = doc.select("#__next > main > div > div:nth-child(2) > div > div:nth-child(5) > div > ul > li[class*='py']"); + for (int i = elements.size() - 1; i >= 0; i--) { + Element element = elements.get(i); Elements title = element.select("div > div.my-2 > a"); + // 여기서 각 element에 대한 처리를 진행 String postName = title.text(); String num; try {