Skip to content

Commit

Permalink
Update OkkyService.java
Browse files Browse the repository at this point in the history
  • Loading branch information
in-seo authored Aug 9, 2024
1 parent a8c4724 commit 7a537c9
Showing 1 changed file with 6 additions and 4 deletions.
10 changes: 6 additions & 4 deletions SouP/src/main/java/Matching/SouP/crawler/okky/OkkyService.java
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,10 @@ public void getOkkyPostData() {
driver.get(urlOkky + "?page=" + Page);
String html = driver.getPageSource();
Document doc = Jsoup.parse(html);
for (int i = 20; i > 0; i--) { //오래된 글부터 크롤링 그럼 반드시 최신글은 DB에서 가장 밑에꺼임.
Elements element = doc.select("#__next > main > div > div:nth-child(2) > div > div:nth-child(5) > div > ul > li.py-3\\.5.sm\\:py-4:nth-child(" + i + ")");
for (int i = 22; i > 0; i--) { //오래된 글부터 크롤링 그럼 반드시 최신글은 DB에서 가장 밑에꺼임.
if(i==1 || i==6)
continue;
Elements element = doc.select("#__next > main > div > div:nth-child(2) > div > div:nth-child(5) > div > ul > li:nth-child(" + i + ")");
Elements title = element.select("div > div.my-2 > a");
String postName = title.text();
String num;
Expand Down Expand Up @@ -97,7 +99,7 @@ private int startPage(WebDriver driver, int start) throws StringIndexOutOfBounds
* 디비에서 저장된 가장 최근 글이 1페이지에 있나 여부 판단. 만약 글 리젠이 많아서 2페이지 중반부터 크롤링 해야되면? 3페이지 첫글이 start보다 작아야 됌.
* !!다음 페이지의 맨 첫 번째 글이, 가장 최근에 디비에 저장된 글의 번호보다 크면 다음 페이지로 넘어가야됌
*/
int cnt = 1;
int cnt = 2;
while(true){
if (page > 5) {
throw new IllegalStateException("오키 파싱 에러");
Expand All @@ -107,7 +109,7 @@ private int startPage(WebDriver driver, int start) throws StringIndexOutOfBounds
Document doc = Jsoup.parse(html);
int num = Integer.MAX_VALUE;
try {
String href = doc.select("#__next > main > div > div:nth-child(2) > div > div:nth-child(5) > div > ul > li.py-3\\.5.sm\\:py-4:nth-child(" + cnt + ") > div > div.my-2 > a")
String href = doc.select("#__next > main > div > div:nth-child(2) > div > div:nth-child(5) > div > ul > li:nth-child(" + cnt + ") > div > div.my-2 > a")
.attr("href");
log.info(href);
String sNum = href.substring(10, href.lastIndexOf('?'));
Expand Down

0 comments on commit 7a537c9

Please sign in to comment.