From 6e387141ca824fce0804abe99bc307d2fcef78f0 Mon Sep 17 00:00:00 2001 From: Yogesh Mudaliar Date: Thu, 22 Aug 2024 15:58:52 +1000 Subject: [PATCH 01/15] initial commit --- .idea/.gitignore | 3 + .idea/compiler.xml | 20 ++ .idea/encodings.xml | 7 + .idea/jarRepositories.xml | 45 ++++ .idea/misc.xml | 15 ++ .idea/vcs.xml | 6 + pom.xml | 2 +- .../MulechainwebcrawlerOperations.java | 237 +++++++++++++++++- .../META-INF/mule-artifact/mule-artifact.json | 25 ++ ...lechain-crawler-extension-descriptions.xml | 54 ++++ .../MulechainwebcrawlerConfiguration.class | Bin 798 -> 798 bytes .../MulechainwebcrawlerExtension.class | Bin 763 -> 763 bytes .../MulechainwebcrawlerOperations.class | Bin 1121 -> 10293 bytes .../internal/helpers/crawlingHelper.class | Bin 1090 -> 1090 bytes .../docs/mulechain-crawler-documentation.adoc | 7 +- .../docs/mulechain-crawler-documentation.html | 33 ++- ...eb-crawler-0.0.0-SNAPSHOT-mule-plugin.jar} | Bin 1355106 -> 1358294 bytes target/temporal-extension-model.json | 144 ++++++++++- ...ulechainwebcrawlerOperationsTestCase.class | Bin 1887 -> 1879 bytes 19 files changed, 572 insertions(+), 26 deletions(-) create mode 100644 .idea/.gitignore create mode 100644 .idea/compiler.xml create mode 100644 .idea/encodings.xml create mode 100644 .idea/jarRepositories.xml create mode 100644 .idea/misc.xml create mode 100644 .idea/vcs.xml create mode 100644 target/classes/META-INF/mule-artifact/mule-artifact.json create mode 100644 target/classes/META-INF/mulechain-crawler-extension-descriptions.xml rename target/{mulechain-web-crawler-0.0.0-mule-plugin.jar => mulechain-web-crawler-0.0.0-SNAPSHOT-mule-plugin.jar} (99%) diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..26d3352 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,3 @@ +# Default ignored files +/shelf/ +/workspace.xml diff --git a/.idea/compiler.xml b/.idea/compiler.xml new file mode 100644 index 0000000..6c3323e --- /dev/null +++ b/.idea/compiler.xml @@ -0,0 +1,20 @@ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/encodings.xml b/.idea/encodings.xml new file mode 100644 index 0000000..8a81040 --- /dev/null +++ b/.idea/encodings.xml @@ -0,0 +1,7 @@ + + + + + + + \ No newline at end of file diff --git a/.idea/jarRepositories.xml b/.idea/jarRepositories.xml new file mode 100644 index 0000000..3d41add --- /dev/null +++ b/.idea/jarRepositories.xml @@ -0,0 +1,45 @@ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..bff2946 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,15 @@ + + + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..35eb1dd --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/pom.xml b/pom.xml index 4343728..dfe1425 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ 4.0.0 com.mule.mulechain mulechain-web-crawler - 0.0.0 + 0.0.0-SNAPSHOT mule-extension Mulechain-web-crawler Extension diff --git a/src/main/java/com/mule/mulechain/crawler/internal/MulechainwebcrawlerOperations.java b/src/main/java/com/mule/mulechain/crawler/internal/MulechainwebcrawlerOperations.java index 330f4cd..df0c1c2 100644 --- a/src/main/java/com/mule/mulechain/crawler/internal/MulechainwebcrawlerOperations.java +++ b/src/main/java/com/mule/mulechain/crawler/internal/MulechainwebcrawlerOperations.java @@ -1,29 +1,244 @@ package com.mule.mulechain.crawler.internal; -import static org.mule.runtime.extension.api.annotation.param.MediaType.ANY; - +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; import org.mule.runtime.extension.api.annotation.Alias; -import org.mule.runtime.extension.api.annotation.param.MediaType; import org.mule.runtime.extension.api.annotation.param.Config; -import java.io.IOException; -import org.jsoup.Jsoup; -import org.jsoup.nodes.Document; +import org.mule.runtime.extension.api.annotation.param.MediaType; +import org.mule.runtime.extension.api.annotation.param.Optional; +import org.mule.runtime.extension.api.annotation.param.display.DisplayName; +import org.mule.runtime.extension.api.annotation.param.display.Example; +import org.mule.runtime.extension.api.annotation.param.display.Placement; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.*; +import java.net.MalformedURLException; +import java.net.URL; +import java.net.URLDecoder; +import java.nio.charset.StandardCharsets; +import java.text.SimpleDateFormat; +import java.util.Base64; +import java.util.Date; +import java.util.HashSet; +import java.util.Set; + +import static org.mule.runtime.extension.api.annotation.param.MediaType.ANY; /** * This class is a container for operations, every public method in this class will be taken as an extension operation. */ public class MulechainwebcrawlerOperations { + private static final Logger LOGGER = LoggerFactory.getLogger(MulechainwebcrawlerOperations.class); + /** * Example of an operation that uses the configuration and a connection instance to perform some action. - * @throws IOException + * + * @throws IOException + */ + + /* JSoup limitiations / web crawl challenges + - some sites prevent robots - use of User-Agent may be required but not always guaranteed to work + - JavaScript generated content is not read by jsoup + - some sites require cookies or sessions to be present */ - @MediaType(value = ANY, strict = false) - @Alias("Crawl-website") - public String crawlWebsite(String url, @Config MulechainwebcrawlerConfiguration configuration) throws IOException{ + @Alias("Crawl-website") + public String crawlWebsite(@Config MulechainwebcrawlerConfiguration configuration, + @DisplayName("Website URL") @Placement(order = 1) @Example("https://mac-project.ai/docs") String url, + @DisplayName("Maximum Depth") @Placement(order = 2) @Example("2") int maxDepth, + @DisplayName("Download Images") @Placement(order = 3) @Example("Yes") boolean downloadImages, + @DisplayName("Save Website Text to File") @Placement(order = 4) @Example("Yes") boolean savePageContents, + @Optional @DisplayName("Download Location") @Placement(order = 5) @Example("/users/mulesoft/downloads") String downloadPath) throws IOException { + LOGGER.info("Website crawl action"); + Set visitedLinks = new HashSet<>(); + String pageContents = startCrawling(url, 0, maxDepth, visitedLinks, downloadImages, downloadPath); - return ""; + if (savePageContents) { + LOGGER.info("Writing crawled contents to file"); + // Combine directory and filename into a single File object + File file = new File(downloadPath, "crawl-results.txt"); + + // Ensure the directory exists + file.getParentFile().mkdirs(); + + // Use try-with-resources to ensure the BufferedWriter is closed automatically + try (BufferedWriter writer = new BufferedWriter(new FileWriter(file))) { + writer.write(pageContents); + LOGGER.info("File saved successfully to " + file.getAbsolutePath()); + } catch (IOException e) { + LOGGER.info("An error occurred while writing to the file: " + e.getMessage()); + throw e; + } + } + return pageContents; } + + private String startCrawling(String url, int depth, int maxDepth, Set visitedLinks, boolean downloadImages, String downloadPath) { + if (depth > maxDepth || visitedLinks.contains(url)) { + return ""; + } + + StringBuilder collectedText = new StringBuilder(); + + try { + visitedLinks.add(url); + + LOGGER.info("Fetching content for : " + url); + Document document = Jsoup.connect(url) + //.userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3") + //.referrer("http://www.google.com") // to prevent "HTTP error fetching URL. Status=403" error + .get(); + + + // Extract the text content of the document and add it to the collected text + String textContent = document.text(); + collectedText.append(textContent).append("\n"); + + + if (downloadImages) { + LOGGER.info("Downloading images for : " + url); + // Save all images found on the page + Elements images = document.select("img[src]"); + LOGGER.info("Number of img[src] elements found : " + images.size()); + for (Element img : images) { + String imgUrl = img.absUrl("src"); + saveImage(imgUrl, downloadPath); + } + } + + // If not at max depth, find and crawl the links on the page + if (depth < maxDepth) { + Elements links = document.select("a[href]"); + for (Element link : links) { + String nextUrl = link.absUrl("href"); + collectedText.append(startCrawling(nextUrl, depth + 1, maxDepth, visitedLinks, downloadImages, downloadPath)); + } + } + + } catch (Exception e) { + LOGGER.error(e.toString()); + return e.toString(); + } + return collectedText.toString(); + } + + private void saveImage(String imageUrl, String saveDirectory) throws IOException { + LOGGER.info("Found image : " + imageUrl); + try { + // Check if the URL is a Data URL + if (imageUrl.startsWith("data:image/")) { + // Extract base64 data from the Data URL + String base64Data = imageUrl.substring(imageUrl.indexOf(",") + 1); + + if (base64Data.isEmpty()) { + LOGGER.info("Base64 data is empty for URL: " + imageUrl); + return; + } + + // Decode the base64 data + byte[] imageBytes; + + try { + imageBytes = Base64.getDecoder().decode(base64Data); + } catch (IllegalArgumentException e) { + LOGGER.info("Error decoding base64 data: " + e.getMessage()); + return; + } + + if (imageBytes.length == 0) { + LOGGER.info("Decoded image bytes are empty for URL: " + imageUrl); + return; + } + + // Determine the file extension from the Data URL + String fileType = imageUrl.substring(5, imageUrl.indexOf(";")); + String fileExtension = fileType.split("/")[1]; + + // Generate a unique filename using the current timestamp + String timestamp = new SimpleDateFormat("yyyyMMddHHmmssSSS").format(new Date()); + String fileName = "image_" + timestamp + "." + fileExtension; + File file = new File(saveDirectory, fileName); + + // Ensure the directory exists + file.getParentFile().mkdirs(); + + // Write the decoded bytes to the file + try (FileOutputStream out = new FileOutputStream(file)) { + out.write(imageBytes); + LOGGER.info("DataImage saved: " + file.getAbsolutePath()); + } + } else { + // Handle standard image URLs + URL url = new URL(imageUrl); + + // Extract the 'url' parameter from the query string + String decodedUrl = extractAndDecodeUrl(imageUrl); + // Extract the filename from the decoded URL + String fileName = extractFileNameFromUrl(decodedUrl); + + //String fileName = decodedUrl.substring(imageUrl.lastIndexOf("/") + 1); + File file = new File(saveDirectory, fileName); + + // Ensure the directory exists + file.getParentFile().mkdirs(); + + // Download and save the image + try (InputStream in = url.openStream(); + FileOutputStream out = new FileOutputStream(file)) { + + byte[] buffer = new byte[1024]; + int bytesRead; + while ((bytesRead = in.read(buffer)) != -1) { + out.write(buffer, 0, bytesRead); + } + } + LOGGER.info("Image saved: " + file.getAbsolutePath()); + + } + } catch (IOException e) { + LOGGER.error("Error saving image: " + imageUrl); + throw e; + } + } + + /* + "https://wp.salesforce.com/en-ap/wp-content/uploads/sites/14/2024/02/php-marquee-starter-lg-bg.jpg?w=1024", + "https://example.com/image?url=%2F_next%2Fstatic%2Fmedia%2Fcard-1.8b03e519.png&w=3840&q=75" + */ + private String extractAndDecodeUrl(String fullUrl) throws UnsupportedEncodingException, MalformedURLException { + + URL url = new URL(fullUrl); + String query = url.getQuery(); // Extract the query string from the URL + + if (query != null) { + // Extract and decode the 'url' parameter from the query string + String[] params = query.split("&"); + for (String param : params) { + String[] pair = param.split("="); + if (pair.length == 2 && "url".equals(pair[0])) { + return URLDecoder.decode(pair[1], StandardCharsets.UTF_8.name()); + } + } + // If 'url' parameter not found, return the URL without changes + return fullUrl; + } else { + // If there's no query string, return the URL as is + return fullUrl; + } + } + + + private String extractFileNameFromUrl(String url) { + // Extract the filename from the URL path + String fileName = url.substring(url.lastIndexOf("/") + 1, url.indexOf('?') > 0 ? url.indexOf('?') : url.length()); + + // if no extension for image found, then use .jpg as default + return fileName.contains(".") ? fileName : fileName + ".jpg"; + } } + diff --git a/target/classes/META-INF/mule-artifact/mule-artifact.json b/target/classes/META-INF/mule-artifact/mule-artifact.json new file mode 100644 index 0000000..d5db9ed --- /dev/null +++ b/target/classes/META-INF/mule-artifact/mule-artifact.json @@ -0,0 +1,25 @@ +{ + "extensionModelLoaderDescriptor": { + "id": "java", + "attributes": { + "type": "com.mule.mulechain.crawler.internal.MulechainwebcrawlerExtension", + "version": "0.0.0-SNAPSHOT" + } + }, + "name": "Mulechain Crawler", + "requiredProduct": "MULE", + "classLoaderModelLoaderDescriptor": { + "id": "mule", + "attributes": { + "privilegedExportedPackages": [], + "privilegedArtifactIds": [], + "exportedPackages": [], + "exportedResources": [] + } + }, + "bundleDescriptorLoader": { + "id": "mule", + "attributes": {} + }, + "minMuleVersion": "4.1.1" +} \ No newline at end of file diff --git a/target/classes/META-INF/mulechain-crawler-extension-descriptions.xml b/target/classes/META-INF/mulechain-crawler-extension-descriptions.xml new file mode 100644 index 0000000..0273182 --- /dev/null +++ b/target/classes/META-INF/mulechain-crawler-extension-descriptions.xml @@ -0,0 +1,54 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/target/classes/com/mule/mulechain/crawler/internal/MulechainwebcrawlerConfiguration.class b/target/classes/com/mule/mulechain/crawler/internal/MulechainwebcrawlerConfiguration.class index 93ff77369fb462e4402723f495d850c012cdb15c..09adbd7486d71e552d7a34910807ae7e7517fa1f 100644 GIT binary patch delta 214 zcmbQoHjho})W2Q(7#J8#7^JxvSQx}P8JHO)*cl|*8Kfp=O_fc<3j@;zAOl&mAQsI`3_@U4!VK&{nh&U1 N1c*hW85olp!~h7&7y1AI delta 223 zcmZupy$-=p7(Mq^Z`0dbT0cciR!avGFCbPAV5&w$)0o8S6*PC1K7fhDBqC-HB|e=D zhVT4*Ip-9j5Z>#3djJ@rXMrVBEnT!t=2vGf6K7E@g!8mWO;ZFY}hk{CRIS0du*Imvgv z%gOiFZ#{f{A5UOJ+rgk=)3j+RtnoJ49w!Q;*PkiOaKBpLE+3^`O!ULx}8ThJ04iev@dlxuQ0X`WBf#gp5S(t_m0 T13o1oyWL02YP0|I-MPu+;d9rYbMJS* z?>px^&;L31h9iG@<~bso%dh$9B0B1&6#b7*AC=L^^cRi#eB_~{K1wm^%o@9NF7c6< z-E#5BrBq{0W8Fui+3TZG?2}8G#)gl^&{3VsapzI8bF_>qWVupfzmJaNF&bCtJl03! zxw?#x;bV0U$o)8-$N6|XPblN#`2-&ad7{pfbe^p96nQ#P=aYOqm8S{E=`yM*<652} zmzf&R(mCX#llf$kd$!JVbpDFQbA2?AE1(yjqVqhRPxbM9K27Ja-0S4BKrRb)KHbM> z@R>ed#EUhq*SWz*^<1rUqs~iY%UL=vm3x!M%XD7uqvhPJaf^>uh^dzI3Y}N#yh`J< zebg#eY30>Ax5^!wukq2j+$^H4mCHHuxX#Dta+_>eFQZDGH|Ttxtemg&1v+1-(=~G9 zMlYM(?&AnYeQa@u%yr78OD-|FY|?qN#&I7fIH@uGR@%Jstg{+dGx?iS>CR9l-Z6Jm zs5#ZyX{GD1x*(Q}Wlv`+nKu0#CU<=*YB7y&jwP)XxkS5_USqb$F$3a=89&EN$K<&% z>CSe=GEDW&kyIj-$i*#niFBED3;7x>7*GCwP0N>ZME79iz~aWw3&^i zk{L*1sL6F!dnT5}fmcoYvRvy^$&OfO&gNX#yvf{RhT>+jGt`<*$C90OP3zYWpPW8? z3dY%*hAe5OW9`*gDzrG)(P5>n=sL9mvEc?9+rQkjvT<9)>K2`_=I)4z?MC%4FTpeB z%Ok}iGA;T4Q588dV{Wll z;Ta~(TFES?4QJmfQ%H?1F>y~6@v#}JqwSM&*;qW(YGuWX-Gj)h2Zzn}ZYOjm52jo& zT;$WmP_0=rvbn|VRsn~w*0@{ai!@FHiWKnp0U=Djeyc;MiWV!|m5Q!1(`Leg`!E$) zol9n83F{n4kMLcTOs2AS;4#&yv{2ETws(cBZO}dgABN2CSjg<(9qJaIp%yC|GuJ>D zNa)#O#&hs-NlmB*0h2+9MY1ITRAQR_1voE?$4tqB@_NaenaB}44JHkiykDt7D$M@^ z2iYpvIfC;Vzc6Vumg$b0+d~cZeFcmRK4pc(4X$0?EQlab^F?gxa9tHQBUS>jC$>$c zqgGms$xMsBG|R?qW};i=-MFr7wmTCJg%W0DW_LQZ$%7~n zbFH9E2?mfuX}2kp%5{g5s2!P5W8CpyX{>-P(H!dPrQE`@NFcd8p=VEaL zULBekjWZf&HO`@bVDh%cI+JELmqx;0@Hvb+BC35}Lwf~`RlqS#9pSKHV%JFl_bHn% z-kz1%bZuD7G^J=`6EyEMLOMT@RfRl{)rUTUD%=nwc&XY=fSmgG@o`W&jmIMQ25W=ucGr7bp|wqOKRm1z4$_ zGt*Wk7tdy9Ww(Jrf1Cnb4j%3!-YH_*2+&0LfWTIqBu9ZW?cxiq@H;MOiVRoX!@6OPJuS;{hD9C{;Pi;l$LoqQ7$ z8T@s*AG5^DM!J+yZDR&Iz%U%t_!|b_%(rO#O@nXc+kh|z-_F;f)c62K4&Do)itkK; z4ygpd-H=H~&NuiDz7vUUH|W7s2X@*kL4d0OrV!`%nZJl zuQ2#Nz8`W*r5*vl0q{7>;0Ne;2K_a?fxtF5bfvA1^U>GJ-Lc+apf&j0;=fT#)cHyS zNCAwF0yVBA^hlq4jZCHrfCJ!on3Yuwa=;@HS zPK4cyLCD}I`6-kpb-t9KP-{#Y4dA1-B$ZB>z|+d@_^+iU8eO_Hk;r6PTU#Z{#XyQm z<)Z92_-XzDKtvhnLU5mjmkzaM1Spmq$xCkl6E#VRwbMZqvD35wme0$jo;L$k$cLy)=yOGA1b2lAA+oA5?rqDndGGq~s>s~xYeO=fc4-KjJh zmBu8h9=y_DuaPEdn>f0~j00H`D2l6_i#AkN2+^h9m+ecEZ#2w zA9nGnm{zEUg%GSmd?q$WWAZ|UuLXEttSoSHHb?_9W?-)Hjx4htfllcG%uY1NsiKnH_d@Vu2E{hHwL}UnZR}uo9$66`M81hZVYp z=|$ZvVo)~P9}KDn7!Azh6i5XOgTe#r!@6L%KoBf!2Om@DGY%dSq%fU36T28{!?*ZA zFn(Io@MdVhAG&-PvE#Vfam1kb20yLK%&bsCxuwM)>av0vB`Df6cEhBqF4{cx0+U%6 z6F(0!y<*FTE39%u58Gs3!c1mG0POxFTh_AB-#iOblMXxlHucUdep4K$7+#bAy z$6(Tt-VCOX*r=8YN=QwbQaDVBB*mw0xT7JrXWE9v>Ww0WjD_9t4uEdK(RDCvVY?d} zkP3eCSC=&2+The#Rcp+BHiq zoG-6_!!{Q-^&2-?#Hg?afs*R2!|whH6|vui^G7iFF!UMOE@kk0w#kV-H{uHFmd34Q zr$pARZECkYp&F*U?0Ls=o+d z17IR)|1r`q#@hg{TksUZ9sMYIYW5ND#lS^daV=Go6KNa9hP{Ed)5UmVx`Zyp_kIw& z9tX40m@0XaT>g>+0|cRpP6AxGdlc=y~#k_8^S`?<)V;JXH(LF}UaH zSj8~lJ49;(WrygPfGPo_yf-B3%UzUU68#nKJq+T`dmA=o-2f z?w8SZbUoo45=LL68z85QZp6r=Mmw>u6teys-9%qU2uz~CqiXcu8_SJ_#(EXA)6JVt#6K|hhZ4|4lH zX*bbgjqW;1lj&{<rUp@nF8>H(VXA~o6* z&>-I)_;oy=$kS+EA$!vZCZg3;L9K|`bMVwgC(;Hw6FJ*J7a~U^SdC)TiE9^faT9WD zGkh1PmncE6Qx(zk$2_bR0))M4!L(dt)gC-$_EbB^n$E{-wDWlS$z+f@*`P=T)zO-%iyq@-k?L+ z=fxRg!2K}h5G?%&QLk`85Tc%50e?Y=`hJ9{R~R4&QLnH<5TYJABnUB{r_zt<2vESm z1zEcZDCt(X&`NrRtB>v~AOm0lkYFRjs~Wwgz~Oa`enRwTn*K3WX(Ug5bSmj8*9Q@( zEdObhli8Bhuiz({eG`6?-ICdVg`bOlh95px*a=elOUN?@(;pNC?`cDVx7HudQ{67k z)YkeJ4{O3+{O!Kd zP0#8;6fIDer}Nw98Ls=OBH#^_m5c@11wDQ52fWfgpd~m)G*j|R3{&K-rjJf5KJs4 zs0#Z*;4iQhfZ{*F4XP4XMbs2bmi;ud3h5K~OpW+I;7SeI9psUs;&l_AD zT@kDwah>hjTDV_{`JJv?TxgD5TU|R`c&2gG>N*0rTtio>`wn_Y-FLeVt9yxRjt-#> zB9!TI0F)^JpV@Q+TBRESwmU(2D|EOMTHHqMbUT`xJJ8`CH|67tsS$=_f z`3ODFZ{q$Iy~w|&m-zSeG9RVGutYDkJ`7EJq19p6!}N;lF#QA?yy|+DUUR)iue;u- zpDM_H7l3>|Am?uY_-;rX1lay9dAJho+0Rh`d^8)fybVoDXa|oj?AuGf0aRa$*6Uf& z{Zawz&j8`?V5C8(2T(So_4*iAQ0*P$8;7>}7&w4$GvMkA9VI^l*w^QzAGehLlIup|7R zx?R{0JwO|gf~b#M!sv^bX#XenoQ;!V3|+ zs}QUg;5iDpJK!eieU`%F3-M#>iRwb85Zrc>(^5Eo{-Z{JWJLF=eOz%<$*cWI zTvVg!7{~|i1f?e9JW*a;g){oqYbdm5lVK3aT$P7-t%p+M$YqC$+;8VTP&yMx5_C@= z5d!@7)%ZC&N!z5;fA@bLm?#;J|GfUxe(HXTT=M?P3YzUt1e0;#pD^d5KLaocQN;Ks KacA%&k3 zFo@O!w7J^8fWF>2Ze*b$lZSznvgM^{E8n)&wcK2;ZMXb0-)s9})7y(qgRr?Gkli>x zYMewtC+wOi2-wL?(DAl*{*=cr-4UrY$^uaDT1iFPf%?#Gb(LGVfD795NkEd(mT0o_ zn7IIB{u$|qM9vap85ls0nC}fR_puV_XWxKDuCp41O*Y1}LyTeQ{7`JU&T`<;1>zsB=#U(YW9R`m{mG_*wbL#qTy%CheR`6j>jN6DKVPgP; z5_ub06eQ+s%ws`f(T2uSGqtu!g{pJm=o6=Qq<0UiZpjzuZjCCQ-rD_rPN2`LAL(kt zJ3rGkuk1GTx>q_sb!#MKW`i-&lkWIKY#Urqw^oB&t2cPJrmm2qPf6cE1i~E0sMu*{ z!5k|*K-{vhkYsFY4JB;Wb_hTU?d%jfkfw+s?{gD1h{7YR;`}`#I}11LLydN#3_gbB z6;rAuRtO2`L>HGvyqlFENFswCQWd`}XA$&rALXtO{j3reNg&*4^2h$%utI<_pm;;{ e2{D1c3qlv2ur&dp4fXT#e{Hgiw!jdhh2d}R!A?E^ delta 569 zcmZWl$u0y@6g{tcsP?+hZJy_$HQhxbBqA)tg2b@05=nKdP#vo1E@I~!I`s#>fQWHn zVd*FQf-ex~RSOcEJLH~w&bjwB@tR1zeLlYcn8l0^O<P68$@g2{t=x7pX4oMu(T-*AY zNC>nTXhmC45^D*G4s_}04ED6{w1IB)2*iBHKe7qSRNo8JsSVpd^!B%{vUP0xcDW)k zf-xPV0*zkzz${g~vlDY&MFZo=1XtSpND^60>6j$RFJ{GaU0yd1OPk#ZjEl;}u diff --git a/target/docs/mulechain-crawler-documentation.adoc b/target/docs/mulechain-crawler-documentation.adoc index 9c0a430..9712c50 100644 --- a/target/docs/mulechain-crawler-documentation.adoc +++ b/target/docs/mulechain-crawler-documentation.adoc @@ -53,8 +53,11 @@ Example of an operation that uses the configuration and a connection instance to |====================== | Name | Type | Description | Default Value | Required | Configuration | String | The name of the configuration to use. | | *x*{nbsp} -| Url a| String | | | *x*{nbsp} -| Output Folder a| String | | | *x*{nbsp} +| Website URL a| String | | | *x*{nbsp} +| Maximum Depth a| Number | | | *x*{nbsp} +| Download Images a| Boolean | | +++false+++ | {nbsp} +| Save Website Text to File a| Boolean | | +++false+++ | {nbsp} +| Download Location a| String | | | {nbsp} | Output Mime Type a| String | +++The mime type of the payload that this operation outputs.+++ | | {nbsp} | Target Variable a| String | +++The name of a variable on which the operation's output will be placed+++ | | {nbsp} | Target Value a| String | +++An expression that will be evaluated against the operation's output and the outcome of that expression will be stored in the target variable+++ | +++#[payload]+++ | {nbsp} diff --git a/target/docs/mulechain-crawler-documentation.html b/target/docs/mulechain-crawler-documentation.html index c318eff..a179448 100644 --- a/target/docs/mulechain-crawler-documentation.html +++ b/target/docs/mulechain-crawler-documentation.html @@ -2210,7 +2210,7 @@

Parameters

x 

-

Url

+

Website URL

String

@@ -2219,15 +2219,42 @@

Parameters

x 

-

Output Folder

+

Maximum Depth

-

String

+

Number

x 

+

Download Images

+
+

Boolean

+
+ +

false

+

 

+ + +

Save Website Text to File

+
+

Boolean

+
+ +

false

+

 

+ + +

Download Location

+
+

String

+
+ + +

 

+ +

Output Mime Type

String

diff --git a/target/mulechain-web-crawler-0.0.0-mule-plugin.jar b/target/mulechain-web-crawler-0.0.0-SNAPSHOT-mule-plugin.jar similarity index 99% rename from target/mulechain-web-crawler-0.0.0-mule-plugin.jar rename to target/mulechain-web-crawler-0.0.0-SNAPSHOT-mule-plugin.jar index 547a91e7f55f75bd64b0285aedac07e601b5fdb1..d0d691dcac0fc143070fc6eb54fbd669a05fe1b6 100644 GIT binary patch delta 8433 zcmZvB1yEeu()Hl(gA5upxVyVM1b2r3K?awDgy0Y$KyZS)6Wk*}fZ)O1gS&>0-2Cs| zysCeyR_%56^zO5(tGiECueCYN=`&4CbtO1>R1gRm1RBl5*H6HthW}^fVZiylg8uqZ zrMeV@pF{pO|Lb26e`m*MYmOneiFg$u*kfc6$d40*xVoKw_ZO+ETCtu=qmh z)jT`8C|_6av2jWkd;ok@G&!%Mcs+rW?ta;fa^6S^y3&YcS76oO`h4Nt!vR;T2U#h7ygd~^voY(!or`3hk-vBWDcSJm z_##6x3%lwrd9xfB@j*A<4S|>#P`YV@2vV zbCgN7@-0ETf4ChULx|9i0H#DKIvOpirtP>@ZSrd5XsJQ9G&7@Jl%F>zEV&(rah^m$ zX>(EtW9tpr+W|UQ4e0!HTsS*-JLcTyL(p(5RbZxEXMLal=q8 zXGW>U@G3@VZ5OB)1$3zD3r4?PbG{Q~Zwh%SNFNSg0xzI$c$YPJ1TCAPm^kc}YSLZqYNV@FMy!RA{h`C@=`Fx_yJcUt~kd<31~wWmZO?N;KtV zI#cqrEt|8CH`$1_KJ0i@>Bpyz#bs3wQ0UNfw<^4=H{Eh&J~4*k4Hy4}piWPKhAB}; zja`JKe%&KdJvtK`iJLyo>%=mVwNXB%9rt*&HlX|Z?BDTc-Xp3jAtC>ru>Yp=@Kcc~ zAj3_f!+=2QNt6g&fD#-BcG%GazS0Pf-tk7|PKadr=Q6?*_KX;wbd_m^y2^JCM>h3s z#wF^CX2@rvOB*VeNz9hbXLgx&Huki8VF;1XXw|o6?X;d*2h{iiwCh;`5*Uti9^HPL zGRHfZ+WWW5sD_#|iK!BnjB9xBRzu9n6Ph{62ZFY~Nz7S{0nssoTSoG}^q#MtgIcT;+VMkFzSa>Fq?TYU;1uWONW$@owt+g*5D3s+53fsyXdq zLXCv7f0&a6Ml5{Da-FFB}9vAfFihigMmS&?kC_L)M>m1<;cG5JdnbwFP$U z&NHT>mUCD~zn|tXlySfoj?c7E#nV8w9wU4=W~j zio`m1o2d}W_|ac}*OyKtROJC6HorDcwk~~LAH0>AVg$v@ag9Hoo`F?JOUkG-T+Mur8h;FuLXYyPbRs)wNP|qzAaAm6`3PMq&2=wU zQBtKy8~T6&PUDgUp0PmKWld>64XN5YW4%(BU%~!y7a zndd$6DiBw;bFNXW9XoI6AnBovJMJ9wk^abmd4M80>MM^3I~sYnQO#scI~8(>om}rFt+kDJi0~SV>o(eNpk|2^M2wu87EF&$$$}EK+4JNg9vT;i1r_MroL#+o~ zBU}WJ!;1=~JA<6+xw3=#icW*&pN6*v*}M-qM@tHH=G~i96fIY6%U4-vSr@XGj#WCP<_uLgBKoS@$S(YOg+{p(gja1YsZF5ri%(7s`3lH*4b593w zCw7Pu_lWvv+c{p_fypLYa9@#A3ds$YD5jM2T;AD{#*a74ZjXtkcUv-gGgB10i!ZH1 z$&c1iglj|Mw=DfhHxvZbrhTGDv^Ib0WyPPJa982SO;~g1hX6mli-c|Lg$=jg#>Z{1 z%Nz;_exJHgBL03h=3f)v744cPtMAl=K3?Wjui|V#R5VQO;sn>E%(Y_# zL$qw0Ly&K8fyPjE!Y8dggGbbOdsEb4goU{2)-LS5gFyADPvJ;8w`iZ7lSMMGNu_&6 z`-dB1imcBpRC9oVPbNdoPk(=c>QoF8EA7fN*{ts=$?i~5-R(1ItETG0hIP_>0O@K; zJj>zR7F8eU05x6wu;8mP{=;?n=R9wg2Mu+yJq^|!=l~_gMabYMp)DuO#sWd1%5%vI z)+k!b*`%~VhvJ-pjVd)6mZAif+JZ)^RJ*3}mqVA$g%JR?&Al=gS<)-p@vMU=>-T$x zRHf4N3DJgoIJ=yd1L`zRm!B%3^eICIy9E%eMuHUI3Qt>C>!xJh`dV#HI0bZ`mJx^V zpXycu^bZI(4RRDcKMA_?ZilgC3|HFFJJShCIn6?uXIpA~`rb%AFQyPgSU)eIR7&5a z_v1-HWf=xMpnCR9t*Vt~SPSC$NGTb*yw1^ji+ffx#nf~*#2hsXuU`%6%0|gpIo!^d zpg!LkcRhK5VtoF(r4lC5Lt?|GAFOggv(CUZm&q=~!k@s${=#~wa+=XU^wqSY4Tlr> zvXV1wa)uc1N*ag4kQ4X$UV#xW0al~F{)xpE_9+J-)o0`9MF}lJFEoOd^goaOFjr?| zrBps!S%N)H?Df*W^NYQB0xY@Mv)2QBW*(XPQx4W`n%MkKbj&lDTESB-R=g=SBd^cR z8fem#tyOIAB$EjSukHPG`{xi(xb!g77Hi)M)Cf(o2uoG!aN9(&z8$TfirLOpn~fPo zhgU`f>g4LILE0ASX9E0ekzbVuLI!=kJ~X% zknuHz6YpQ4_QQ$dtF9sbsB*@UzRok!{X86zEPfo}q_ta;Okl}5k;pbw=bDkWqvONZ z$&_(0@>=S{Ye?Euma5Z7s@Yi^X0VEk+2=tWz}=8JK?(jvdVeiz!Dl0BpU@;Y2_b!} zQgl{RHXl4YsS$Xzx$0o$9!Ey{NY?oU<<;*j*NXT9Hl(<^SfnhGG!wm3p_WoD=#qvf z!v%tMA0)G*sM0FZM#{t`sW`Gs3}N9j!V3>=zkJfmbFa)*h9JI`vTqVd8uSQHEFnu% z2Q1rH#2g!It^LP}Vi3UJ2s{<~Un1nv8*s&XZD{Rpt4Q|3#iTMJFttZ|d6=V0Ax5!f9%21-_ z`2K({%M$63Gc{@gV~OvgJfTRc6M})-45-ce?_O5SEH=6_Oibp*4v)4^#IPxNG0x*> z6f-Ald9t#;U~->5G$L4F;RAA^2C=*cZfJX|FF<^A0LrU<2ukslUpoL|`I#HFx7Jz*- zPx4Y+kxD+_XyAw0ac`yxdVWDC**c}fF~rZ}IAprLCA1=|hrE{9ev@A`qsf!&M^!t_ zp_SBx@#S#B+uO|YQWW+J`6s;EQgU&0U+GkKwR?4}HE0gc%{xD8vy#=OYj3mPM5s(S z;(X0z-b7c#i>qSjemh6}Nnxhw#1jZ*TS0@N3+-tc8iG=aWB4Xsav>CE1Pk(J;bX{V z!m_H{T(Qf#8U-cK2{1WXyPY`NczPP@Y1)JT_vtR300Cq+wFD9fUE8cXwBGx>-0=$^hnG-Zi@NsrWpaS9krB&%M? zGnpSZ-cR+8k9Nk|PbXT9Di&+)nla)I6jJ)u6f3D7xDLI}uQN`mQzi-zjIEZGvpDwf z7r}$vd-%jV)zu}iT5He0@4;k8N8ReXb0&TwJGxuy&h7Cdp->)Cz9Fm0?pY>yjJ4C;X2N)P zjIWU^s}~SH+5_>YSjI}WC2mjZvOM-``LTjT;}~aBCQe6x7h0=4st5&ao0v#Iu+lk4 zko$l@y>&VCz;|Aha-w8Q@9DT10mf_WiH}(1RO2f3<{zHb^cLr)eSDLH8A%vb)5d`e zxhE3C?AP8_LP2f(TrdU_HBc#q_P)j_Z(8q~p0>+>`jjnPFzWw7*(-y3HPZM3#&*>f|-DhN47!`|+|$Tg+AezJv?arICd zdsDWW8o*DTvM>K(E~U)Rj*L;LlKGJE?aY_!vbo(pwyMoYma+FnB~RbL^-euwA!w)z zgol57A;Ee&-+9~&F!S^>RG56w&ndko5ac*t!FE>gqr?Ah&2q*fVC$o79=r5vQ@8M=y%M z;pw4_7eoz1Xz|HtN}BlQRPjv6Y}H+ZZd~ z8*v($`?93O7|iQ$04?HiqkupS#kIA z?A|s9iL0u*+*S zVT$9)dZl`Y6c#AE7RPB6$8Pq*i(JQlSr(Yb%!9HGPw;y)Hh+Xb{35)*eTPhj+P0); zO3xp2?*kYOS*+?YtOe1>2o^u6&3JLOtm6~DnSP94V4YLTzVIs%t{8r=&&!b;>xndY z^_E~-&d+mnzD~vKqt6ed!G}%}ZJbNQb|21D?G|UTSiP?AlQ!Of- zSp<+(Rlq$vZU%^~u`6}x!YQHOIDJ#ocku+tL>*onwt_GX)l$blpIaaM6W+7-q++#a z=YQ+&VZwCJWXP``#a}avsVXR634|D{{7_FLHdZnX%jCQzmL=zD#k?D&{0WR+oO`_P zwz`$OMcjh6_Mi~@9mrM$;Bo0DRa3m-;09V1@-ui^!_rPBcXTe=Rq|iiH-xe`<*#uI zr>s^}2+R7T_J9)FDBSKan=UM9TFWsr~y@s#!& z;PhUK?egPnT6(GPgbC8h70BaG zD8j!ADx73fo!a;H!?kwts!fD%OaSQ5b822~nTc-N6Vx?WP^^a3hZgnIe>z7eW4rm8 zlT_TYItU3?3{GLYxa{IFUkhj;UD#$g>>Jodrxue)K;mY9zp}*5u|j^JK}5@Ve%Upy zBp2o(PfwiP98VCjKy-j=)~C@%<9)4HRd}O%S%jl3U0xbBhh1yC_ELmQi3Hfy-f)D= z3MY*<2pJ=z*^0hHT~3@14{eC+kpOZ%(Im%iVAM z2a?$mgeQ?u4mL!jfz$93;s!yHu=aBJVu#$JiT;k4cK8<(x)l5|A8f7h@8cv6zwP;T zzoQsn+_&K&~boP4$0#l!(7=K__zUwKw*wcU9caeL92TEs8q39%2v zcjY;%G!N>q$(v%83|1elE|gSlOpmOZtS*;Jaj&@UA4q-6;TazBHy}XRE>;xmS;1=U z5!SZ-D(1-K)D7XLeXXBXwyFsSlUnh9u4UAYDw%AP9~(- zYl|>f6HQpFKe#hZvrM+dFkm+{R)t~Wy9CsV+qU0)yP7z?_?03u*_xvhGJ3RNuEdVYkdc-rojYw<2!5Nd#il7Z#`@HC||%O)_7mVq!IXotn|;SaI<-H zw_9=H;b->{1@U%9ZnK47@Uyd=v5Y>%8T%$fNh`=4;HhV+fiCzi8q);xu59uAU0i6R zxJ>qlzSy$MqSOnRSGjqP-2Ed52Y}cL{!IW?+>ZoP?rIlc<4W)p?gfY)KVUUMlD+hO zE}aoV2%}tJ8d}Ugnq99>s&UOPpWbN__424n5k56XEcuJeu<|f7O%4f_87$bYKAkPt zsQ{UgnT_lE8Au1Wz04RvJ^$+<+xlBiEsV+!?sj-n?r?nWXtC?Wm}hj`WKwxs*RY*L z%NgF${%s2YYxTG8s|^29eCW~y2N1%vZ=k$xB%zyiyR zq7TL*JehMS0iTx8V#wB(j>~MYaZ|qba2O2FQQmX!=bJZ_b6@>-83I#oudRLGEO+0j_{IS$-_MSA z5k%#Y%pyhY0zm1P66J7JNISGYU~W;ky+2=|cI^4ao49wViM@FHzzVt{-MP2|`Sa0W z!a6LcIQxr zsmWKicGe-6ODVX4{D{*u|A-+X&ZHnQsV0Y|VbZ!3+2?Cgjpj_vuUxirv?J99>xK=y zqS$IhwVRJ}t_b*CL1@3o48Y^l?PE3KtchYYhF?D;ajZ1mP+! zzJNrccoUvc7Elk~lfj}}J?s@A*%5n@u0Y6{Y&Nxy$|BQK@E8Wf?1-MIZ3rOm2~8Hx z$ULe%=A-YL*WgqC!jXlRmY)ZbGC!_&z_7rUo!VR#wU%j}Hf%a`?n|)Qcdr8cj9x#L z89t%BJ{&S3UI$$$mws0uDM7;6NP2^&sM6z;=RYcHDc*rtERq2m+OE|vW%TpIq_^9w z;=mojA(FLup0bA;g;LORodF$bR)N#_E`StB{=VSwR|E;*z~&*QTD7?MJg6rJxoD+Z zj6v!i62!2^$+oI>Ephq_UtNh53Hk5A;J;(^-y_3%U)(Ji_W%8J-yvi#Ba?5Bk?@2R#%k@th+c@ueB*cz5>?hxnF<_}aGb z;;WZI6|0nTK`%*tjJBJDt>ZG9c_QbV@m2umAV2gQjKXBU-nR_NSdhAmB1#AD_7vXf(6llsZFs_qU&2ruW2FlsUWeM<2-Wod&Pc7=d4bSn>wjyZiY1@m0uV7I!L~4^FX^s?B%87$7ceAUyi@{;Uki})rJYkF7{m5w+m|`W61z#C6ZM2CugESXo zp6J+lgpQqvfdn$@23L|IDC0${1xh7U$Z3mOI@)ehbGDVseI?a;eR@rr#cvJxRSkmT zAw!oLC>(M^zV6!P5k7+(=cdn05eXTx3umZU$DA!<62)EA6uz6y^v=})sw`Gr&%U~x zhexC>OCOuBPE5YrD7An6Fj+1J^LvQ>cZ&U+ka5vqeZ8yYBc(VnNQh1tMDZjCtH@|e zgc=fs8xp}h0rm-SPk?^{!V?gmfb<09C!jn5^$BQCKz{|R2>4Rp zkk9|C-~T(;b?>>)&hI???Kw}L4+jQ|Ukr%#G_imL0015UfL$_SmQ2hJ{3UPkUQ1i) zYgu4$El)&#$y5OhrfZ^$aeYT>0EWQzHzgRiuE|#nNlH9CJV4Hi3A2n#kBeAL0AK(Y z0FXn`o@$9WaLk z^XZRNPI&)p&6bZE+xO%LNk!jI_OqSl$ck(qRiE?Vg_4L<@-Ce=UY@T|v>wY8lyw8nYb=_ue5M&Y_9V82sCs2fwkm9xUTXn(qzWYJupQ0jar8Ejor)%LyFo+K%}dkv&cH|4wO^E zfyE9!ql9`A3|3!%%Z!_r5T9r7$>lJ`D(?0*P3lcfKdZzb>u0U^hq3|sRJYuGG`~u^ zoX)+4-1P-d`?wbQYVd&*MCAxt+COUGx)!n!P}w!2ba{2219G#B9rA-|Ee?uB?@hoB zS1*AZBKvf=mHoAj1v8RPBu8ar*&|j$DKvTDk+v3U_jhOo-|urdx+tC$lg7J=+K9?^ z;`%{mMTl?I7@q1W4U03r3b_lxs+2DtPT3?bJtCK<#n!f!(&G$}6xNJa@7eQSuokIe zb4SEmLd#M?0Y$^Zz%Ad`v?OZBiH!qnd?R4lq~gTkoUc*DDNiu;^Hp5wyG&0zs=}<` zho|w;L>mQdDn=yHFC=&T6}$~fBgRd_C9t@s2`BAU!Pp`miBxg8*7e;VgRA5H`wd<+ z*iY95ce>+ohYW1*9MfnWR?a4c$>HRDpTxYN zUPF$mGd@J|<$73n?+W9%*R3QE&rA203NO-hW}jSYovcfCG!r@o&A5qYp5y3g;^O`3 zsn@}A<-=I$;NWcjMuLd}0H~tPs|( z;c3s*zrgKoAyy8vy)}Gb$v*6va|3=*s#c@6Tr%QK>HTFBX~KJ-6-h6kN~L*&TczTu z)muKKX^%;x#!Xjt`z%N2pSUEv7TEkg9=Hghd3PHo`s1SZY;2a|nXcYh(HLpFvv8RZ z%>#~GZ=Kr83Z|&mq?J0i3`cYtz0V)bua2S8_*(eM<90}{O!udO{xuyY+H?Y}YLH&E z*|PCdzu?HgrO+`?I(NWVWzovv-RAE_4phhT!yE6}Cb!QYtk-(ThAn$`a@@hrY5w*G z{v^ws+Fa=I>{jX;b=#Y6oY3*nT;qA&F5`zLlEvD>s z27Rbx4l~C2>g4CNDkX^ok8T9cSh-Ir@z`gm&69iZW?ILKm7UFhe7uA1C-5a}MJ}d1 zNc4fjW%Ck7O5`i|KwJjIQ_&kskwZepGEJem?}vbO^Dw+a0&bQAvfPWrdI>V%O41fd?DpPnvU{e60PEf z;IP5R@+m2){eY0x51L>OQE_{3g1AQFdoTe`si^htC5J{yO zeD%@9{+B4j0ZxrpJSFEue`_S`ARoTn(Kwft?sF{IM`v1YmQ|36WNv3+USafp`lIx=p=`L~ z;#d6+z4?i{qv8xA`?ffxhU9s#&y@Vg4g0vEeD+Qai?AK&)Z}_P0ps5H{NnM-DL;L< z_#F|!x59N#MOc?>i6ME?5$$-FhUF=@^U4hbUyHq_cY0^fAQv|vuzhMfFAF;wGNLkP zH76Mqjbp!&@`;|?s9LXqHQI%zwl{=hagd>a#RcKHIyxF;Ys217#oqy~`zBZdCcUJS z+*BqMsgQ^aSjlI zxg(-fw3|`jKSKQYP_LKd$z`~^kSmEYY%bQZI#@dC0nOliUzf=x;L79w^=kn7{Hl`v zP8O0nW8+mo06>Kh0MG>h0Ct}4Lhk-BhpP{yTnmUM70yBh_i(WDbMf@h^YILDv3Kwh zw1e6B`c|5_o6;+T!f?pNil);i6|LSIDJ$jQb)Dx`=EZ)cs2~q=dv7C3q=}sMyN|W4 zdif~t3^y#z7gBk|kd$|>&y@EWrlj+zQQ*O+pqAs&gP9iJ-QkP#F=jX>DzcJa$L4K! ziMiHNy-vddJTaUWtKQttT4QW?bHWnmAozPrF8{itNE{^H@8EIx3SsBFzG|@9V8IP5 z=5Cn>sjovHJX^*iYBC<-^mYLf3Co_Y7~)g$@ic;=@SgCzEq$3}PRzM0i|xToDpGIp90JaMQ@_K3J)Yhk_Es2`_e7O+CG6;Row z30W+;U3^C$6PFuHHAwkbU55`CWknAc^nRM}s4w$`^0q~lO|umZoo%Tq!&t0}#K^JG zjiNW_Z2r-3CkEZwN?1j?wK~{uU;WeM-SqInwOC=wN{RTD9NOvm(x77c5N*j^2V9>a zVa>Zc3v3jThlo|_cwS;iAtU4~TBS%egW zyl-PV&ufI}-L>U&0;Hzv1LBcx?-&G1p9HCvtQ`5|85hP`^S-F?M#Y|nvhO|5J}3qj=G3VP z0_SNg9G53kJfa!+=7^BrtNmw6r?I!CL1rNRZiZPzADB*TUvO@!-k*YGVKc!!WAAV$ zamU~uW(oJBZW5n6Zt>L(|(AUZYUjs&G{<%ssXkEu$J2TM+XzR6HM- z;8%7keP10sOZZX|De7c%aJHs!X=H4!sDWJ$9@u*(P*%!CzV4=V?rj=nW1yu?;uy`3 z{E5KJ9%`0NJr^ig)7 zo^P&mxZ6)<9jRUm<`bn9H+IiY-nSiwoUI~n%pA&JbFZ zIkV35?{tYH(z`~ z-nzrS{Ji~+FO!3)*=v!2Wrj0Gne9)B_y+L`)C!PLp-TaLmCv^{s#zjlaxK8ICyg0ARb*yKhev@wuuYI}TH|XkyCrMSTg|Aw_Pm=3<(*Mq<-=_kP~#iqyfHRo6MjSMtsSkyvGBhBa<44C>GaQJSiI z5`rP9#X`huZ%h9*>O}sZ&P!R*LNb`{-6};zli~7q;;o0h1zk4zyF&!$N>-0_^wW!}9+zC+nDnpEcXL zs-;7#cE}34?ZrmFA!38UHW`G`9U>KHa_0I3G8Ew90PV@Owz5InDi>+qE|*>yOCx6* z!uUf%Z`Ti=r`~g#Bvv8}cT)rU55=HtI4NwC8JqJ?U&_O049W`j1>D?W6QNN7OzgT6 zybY)a_Qg8t!^#yX@pr4W2ZJBs_PVhlNp>44pD6TI)Sp_kD?iaF(|s;LCf4J%KqM`s zsv*v!6<{exRVq?ew|xI%Wtb!`xGeIer={YA2V%`Liywal;ggdM7to_-WWa2X;3cub zHG4xuk^{KyJ=CZ1a0<1(@C@)0)#vp+Z@eYB3{xabwoxKYa0L5=Y8K!IAKmkK=noU- zzg!Dt1GM9Hc6RV74SDf9@%EX1B$-fbyTH7ApTS`dZ%ne;Il-)Vm6Sp1v2FN=rNR>+ zqLV|ngl^4H7Pu54p5EGDBV1Q% zI*cIfujDmTVr1~qK1a;L`3fvbR9ohHqw&^nY=_x3kVP^UsBXO8u=g{u80!vu`+AqW za&?&ZT@21+%*2JjPMs-n1tF(15a%QZw-dbQQ#FqWf7;ec=u_lPxx(*K<#N&yru6gu z$-$2YWY0pi-toyXMU_3*YDmN%o;;Vy?kkgC{?WUwpmtv>HdC5N$LWix{kBKFy*}Ht zsQnMwpmH)9?MVVzmY~eTwNel3I_Rn(;Q%mWY}f82=EBL~LA>tHk|9Jb=bmS946gCd(-X@f8b(EX{bLCz@xNsVoFCa3xQV z|7}RF8%ZtiLw)N-C z<4K5}3P=43O%mU=ux#I*4$*4QF(H6X4Z7rX#4ylN1qgf-p#fW$9&tT*GyCLavXmM{ znMw)aW6?7E;88u#3pjss6rm$GF;J<6I}MeklB?#?LQNn*eJaiH9l9Bq2AQgDw_86v z6Za`};Q=otwHQ_9?FI3Wo0c1KAfi1k3nA2V<)32H$ir^hDc2kS!^YU(L!=wX5u?=m zGg(c@1!+rQC_yQJG1gB&)E0r85?DpE<6>yFn z6RGaNcktGdvzyeNpx6{!UKwbv!N5{RwEEHeO>cA894)ZNV=Lpp>h45hi^cAeHbI%j z%?fx8F~Pbo!M&J4!F3hJw3KxgbH&d#3{ElLoLjU!;pEFKs0UVnXq#LnIe!YrZSZ%J zZCS;@v}XqOSQ_tN<Xpo>miUt`P(^D zTqB8GY64NP{P)QHdl>*w6i$Hsp8=eSce>rGtg z{h>#9Rn_#T(X#n$I8hF~+qEw{A=fOIIfT0OJ`zAtY?>Ip^N{zUMjTN8npv<;$NBp8(v%s)kOa zRa{arprHeU8ZP5XJ2J?&V+g|iSN<`Y0~1Clook;Mm(3Z;-lQ(!w`1dHRGYVQFg6b z-6%V*WfW>+Tp1CMm6#C9bOTA+b0=*dP{6$T=?qb2P-GN+nfGAtD3K7Rv+dwgMHl7z z;Rkf1hbjfVh|rBE9;WDt<)HMI!bxez38{?6p_U;^jpYNaP2VLFr87af1*HYw9@-f8 zz!_RlKf_H{aA7&-P4RGgp^1eGMHeu7FJL5&_vdYbA?sD84P GkopTC0DcMp delta 823 zcmah{O-~b16g_V~c+;6uphCZBtCWhR(2gKtm5Lt~Y?bQ5#I;QwYDwE6?Fbkb{R@(p zxNz^*=%xY-1YijdIVAB; zD##p)_$jT%hw}AS-C1RjQ?rj5l!E=#(jmj+g^Hp5CvPGe#u&yOtK(#v&l^^zYPUA) zwd`KAvHvUL)7SQ@*0xi(TOE!Gk$TDFKVFpj9@}gK@Z`iv!7$&`lA|?uQS<`VA zQyk~SD>+O%E68a6~zDu~xC*TodMmudZgLK&@bO*Bxeqxq!rq4sJ5unU{D@f=vZWD<_m`wXa zJw$qlA0o+ch%`eF7e8ZpW6qyDz_kP1JXRa;&yK@`2~rmz{vg7{9zh7xlo?&33=D|> z+K7V6DE>#3b1bTKs;K+_ib~M>fEaPdAvEq?2vZSpe>D2sy`C Date: Wed, 28 Aug 2024 09:35:26 +1000 Subject: [PATCH 02/15] Connector renamed to MAC WebCrawler --- .DS_Store | Bin 0 -> 6148 bytes .idea/compiler.xml | 2 +- pom.xml | 4 +- .../MulechainwebcrawlerConfiguration.java | 17 +- .../MulechainwebcrawlerExtension.java | 4 +- .../MulechainwebcrawlerOperations.java | 130 +++++----- .../internal/helpers/crawlingHelper.java | 77 +++++- ...mac-webcrawler-extension-descriptions.xml} | 18 +- .../META-INF/mule-artifact/mule-artifact.json | 2 +- .../MulechainwebcrawlerConfiguration.class | Bin 798 -> 1282 bytes .../MulechainwebcrawlerExtension.class | Bin 763 -> 754 bytes .../MulechainwebcrawlerOperations.class | Bin 10293 -> 9965 bytes .../internal/helpers/crawlingHelper.class | Bin 1090 -> 2409 bytes ...adoc => mac-webcrawler-documentation.adoc} | 35 ++- ...html => mac-webcrawler-documentation.html} | 108 ++++++++- ...eb-crawler-0.0.0-SNAPSHOT-mule-plugin.jar} | Bin 1358294 -> 1358918 bytes target/temporal-extension-model.json | 227 +++++++++++++++++- 17 files changed, 526 insertions(+), 98 deletions(-) create mode 100644 .DS_Store rename target/classes/META-INF/{mulechain-crawler-extension-descriptions.xml => mac-webcrawler-extension-descriptions.xml} (76%) rename target/docs/{mulechain-crawler-documentation.adoc => mac-webcrawler-documentation.adoc} (73%) rename target/docs/{mulechain-crawler-documentation.html => mac-webcrawler-documentation.html} (93%) rename target/{mulechain-web-crawler-0.0.0-SNAPSHOT-mule-plugin.jar => mac-web-crawler-0.0.0-SNAPSHOT-mule-plugin.jar} (99%) diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..9a874b5768f336915163bb88cd434575b859f936 GIT binary patch literal 6148 zcmeH~Jr2S!425ml0g0s}V-^m;4I%_5-~tF3k&vj^b9A16778<}(6eNJu~Vz<8=6`~ zboab&MFtUB!i}=AFfm2m$tVxGT*u4pe81nUlA49C} z?O@64YO)2RT{MRe%{!}2F))pG(Sih~)xkgosK7*lF7m<7{{#Hn{6A@7N(HFEpDCdI z{ -