diff --git a/src/main/java/ru/mcs/udk/UdkSiteParser.java b/src/main/java/ru/mcs/udk/UdkSiteParser.java index 22d8aed..73d2aff 100644 --- a/src/main/java/ru/mcs/udk/UdkSiteParser.java +++ b/src/main/java/ru/mcs/udk/UdkSiteParser.java @@ -1,39 +1,59 @@ package ru.mcs.udk; -import com.fasterxml.jackson.databind.ObjectMapper; import org.jsoup.Jsoup; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; -import java.io.FileNotFoundException; import java.io.FileOutputStream; +import java.io.IOException; import java.io.PrintStream; import java.nio.charset.StandardCharsets; public class UdkSiteParser { - public static void main(String[] args) throws InterruptedException, FileNotFoundException { + + public static final String HTTPS_WWW_TEACODE_COM_ONLINE_UDC = "https://www.teacode.com/online/udc"; + + public static void main(String[] args) throws IOException { System.setOut(new PrintStream(new FileOutputStream("out.txt"), true, StandardCharsets.UTF_8)); - ObjectMapper objectMapper = new ObjectMapper(); + getHierarchy(HTTPS_WWW_TEACODE_COM_ONLINE_UDC, 0, 1000); + + } + + public static void getHierarchy(String url, int level, int stopLevel) { try { - var document = Jsoup.connect("https://www.teacode.com/online/udc/") - .timeout(10000) + var document = Jsoup.connect(url) + .timeout(5000) .followRedirects(true) .userAgent("Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:25.0) Gecko/20100101 Firefox/25.0") .execute().parse(); - Elements one = document.select("table tr[bgcolor=#eaeaea] td:eq(0) a"); - Elements two = document.select("table tr[bgcolor=#eaeaea] td:eq(1)[align=left]"); + Elements rows = document.select("table tr[bgcolor=#eaeaea]"); - if (one.size() == two.size()) { - for (int i = 0; i < one.size(); i++) { - Element udkNumber = one.get(i); - Element udkText = two.get(i); - System.out.printf("%s\t%s%n", udkNumber.text(), udkText.text()); + // тут создаем новую переменную которую будем передавать в метод + int nextLevel = level + 1; + for (Element row : rows) { + Elements udkNumber = row.select("td:eq(0)"); + Elements udkTitle = row.select("td:eq(1)[align=left]"); + System.out.printf("%s%s\t%s%n", "\t".repeat(Math.max(0, level)), udkNumber.text(), udkTitle.text()); + + String pageUrl = udkNumber.select("a").attr("href"); + if (!pageUrl.isEmpty() && level <= stopLevel) { + String subUrl = getUrl(url, pageUrl); + + getHierarchy(subUrl, nextLevel, stopLevel); } } } catch (Exception ex) { System.out.println(ex.getMessage()); } } + + private static String getUrl(String url, String subUrl) { + if (subUrl.startsWith(".")) { + return String.format("%s%s", HTTPS_WWW_TEACODE_COM_ONLINE_UDC, subUrl.replaceFirst(".", "")); + } else { + return String.format("%s%s", url.replaceAll("/[^/]+\\.html$", "/"), subUrl); + } + } }