From ea3cf3b6bb168683ee9131caae5abfdb2518d2a3 Mon Sep 17 00:00:00 2001 From: in-seo Date: Wed, 24 Jul 2024 19:50:56 +0900 Subject: [PATCH] hotfix: okky crawler --- .../java/Matching/SouP/crawler/okky/OkkyService.java | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/SouP/src/main/java/Matching/SouP/crawler/okky/OkkyService.java b/SouP/src/main/java/Matching/SouP/crawler/okky/OkkyService.java index c1c17b5..90c709a 100644 --- a/SouP/src/main/java/Matching/SouP/crawler/okky/OkkyService.java +++ b/SouP/src/main/java/Matching/SouP/crawler/okky/OkkyService.java @@ -35,7 +35,7 @@ public void getOkkyPostData() { String html = driver.getPageSource(); Document doc = Jsoup.parse(html); for (int i = 20; i > 0; i--) { //오래된 글부터 크롤링 그럼 반드시 최신글은 DB에서 가장 밑에꺼임. - Elements element = doc.select("#__next > main > div > div:nth-child(2) > div > div:nth-child(6) > div > ul > li.py-4:nth-child(" + i + ")"); + Elements element = doc.select("#__next > main > div > div:nth-child(2) > div > div:nth-child(5) > div > ul > li.py-4:nth-child(" + i + ")"); Elements title = element.select("div > div.my-2 > a"); String postName = title.text(); String num; @@ -51,7 +51,7 @@ public void getOkkyPostData() { String link = "https://okky.kr/articles/"+num; Document realPost = click(driver, link); - String content = realPost.select("#__next > main > div > div:nth-child(2) > div > div:nth-child(3) > div:nth-child(2) > div:nth-child(3) > div > div > div").text(); + String content = realPost.select("#__next > main > div > div:nth-child(2) > div > div:nth-child(2) > div:nth-child(2) > div:nth-child(3) > div > div > div").text(); System.out.println(content); StringBuilder stack = CrawlerService.parseStack(postName,content); String talk = ""; @@ -99,12 +99,15 @@ private int startPage(WebDriver driver, int start) throws StringIndexOutOfBounds */ int cnt = 1; while(true){ + if (page > 5) { + throw new IllegalStateException("오키 파싱 에러"); + } driver.get(urlOkky + "?page=" + page); String html = driver.getPageSource(); Document doc = Jsoup.parse(html); int num = Integer.MAX_VALUE; try { - String href = doc.select("#__next > main > div > div:nth-child(2) > div > div:nth-child(6) > div > ul > li:nth-child(" + cnt + ") > div > div.my-2 > a") + String href = doc.select("#__next > main > div > div:nth-child(2) > div > div:nth-child(5) > div > ul > li:nth-child(" + cnt + ") > div > div.my-2 > a") .attr("href"); String sNum = href.substring(10, href.lastIndexOf('?')); num = Integer.parseInt(sNum); @@ -119,6 +122,7 @@ private int startPage(WebDriver driver, int start) throws StringIndexOutOfBounds } cnt=1; page++; + } }