ibm-ecosystem-engineering · seansund · Nov 14, 2023 · Nov 14, 2023
diff --git a/nest-cli.json b/nest-cli.json
@@ -9,6 +9,11 @@
         "include": "../config/*.*",
         "outDir": "dist/config",
         "watchAssets": true
+      },
+      {
+        "include": "**/*.json",
+        "outDir": "dist/src",
+        "watchAssets": true
       }
     ]
   }

diff --git a/src/services/negative-news/negative-news.impl.ts b/src/services/negative-news/negative-news.impl.ts
@@ -45,332 +45,6 @@ const topicRiskScoreConfig = {
     "illegal activities": 4
 };
 
-/*
-    params_classify = GenerateParams(decoding_method="greedy")
-    #params = GenerateParams(
-        #decoding_method="sample",
-        #max_new_tokens=10,
-        #min_new_tokens=1,
-        #stream=False,
-        #temperature=0.7,
-        #top_k=50,
-        #top_p=1,
-    #)
-
-    #genai_model = Model(model="google/flan-ul2", params=params, credentials=creds)
-    langchain_model_classify = LangChainInterface(model="google/flan-ul2", params=params_classify, credentials=creds)
-
-    params_summary = GenerateParams(decoding_method="greedy", repetition_penalty=2, min_new_tokens=80, max_new_tokens=200)
-    langchain_model_summary = LangChainInterface(model="google/flan-ul2", params=params_summary, credentials=creds)
-
- */
-
-/*
-                data = search_func(query, num_results,api_key)
-        valid_url_details, bad_url_details = validate_urls(data)
-        report_bad_urls(bad_url_details)
-        scraped_news = scrape_func(valid_url_details, char_size)
-        neg_news, pos_news =  check_neg_news(scraped_news,langchain_model_classify)
-        report_pos_news(pos_news,langchain_model_summary)
-        tp,fp = apply_filters(neg_news,langchain_model_classify,subject_name)
-        report_fp(fp,langchain_model_summary)
-        report_tp(tp,langchain_model_summary)
-        final_conclusion(tp,fp, pos_news, subject_name, num_results)
-        st.success("Done!")
-
- */
-
-/*
-def search_func(query,num_results,api_key):
-    client = ScrapeitCloudClient(api_key)
-
-    try:
-        params = {
-            "q": query,
-            "gl": "us",
-            "hl": "en",
-            #"domain": "google.co.uk",
-            "num": num_results,
-            "tbm": "nws",
-            #"tbs": "qdr:y"
-        }
-
-        response = client.scrape(params)
-
-        data = response.json()
-        data = data['newsResults']
-        write_list("data.json", data)
-        r_data = read_list("data.json")
-        #r_data = read_list("data_UT.json")
-        return r_data
-
-    except Exception as e:
-        print(f"Error occurred: {e}")
-
-def validate_urls(data):
-    valid_url_details = []
-    bad_url_details = []
-
-    for x in range(len(data)):
-        title = data[x]['title']
-        URL = data[x]['link']
-        snippet = data[x]['snippet']
-        publish_date = data[x]['date']
-        n=0
-
-        try:
-            response  = requests.get(URL,timeout = (10, 10))
-            n=1
-        except requests.exceptions.Timeout:
-            n=2
-        except requests.exceptions.RequestException as e:
-            #print("An error occurred:", e)
-            n=3
-
-        if n == 1:
-            valid_news_ll = [title, URL, snippet, publish_date]
-            valid_url_details.append(valid_news_ll)
-        elif n == 2:
-            invalid_news_ll = [title, URL, snippet, publish_date,'TimeOut']
-            bad_url_details.append(invalid_news_ll)
-        elif n == 3:
-            invalid_news_ll = [title, URL, snippet, publish_date,'OtherError']
-            bad_url_details.append(invalid_news_ll)
-        else:
-            pass
-
-    return valid_url_details, bad_url_details
-
-def report_bad_urls(bad_url_details):
-    write_list("bad_url.json", bad_url_details)
-
-def scrape_func(valid_url_details, char_size):
-    scraped_news = []
-    r_bad_url = read_list("bad_url.json")
-    for x in range(len(valid_url_details)):
-        title = valid_url_details[x] [0]
-        URL = valid_url_details[x][1]
-        snippet = valid_url_details[x][2]
-        publish_date = valid_url_details[x][3]
-        url=[URL]
-        loader = UnstructuredURLLoader(urls=url)
-        sdata=loader.load()
-        sdata = sdata[0].page_content
-        if sdata == "Please enable JS and disable any ad blocker":
-            bad_url_ll=[title,URL,snippet, publish_date,"Blocking WebSites"]
-            r_bad_url.append(bad_url_ll)
-        else:
-            scraped_news_ll=[title,URL,snippet,publish_date,sdata[0:char_size]]
-            scraped_news.append(scraped_news_ll)
-
-    write_list("scraped_news.json", scraped_news)
-    write_list("bad_url.json", r_bad_url)
-    return scraped_news
-
-def check_neg_news(scraped_news,langchain_model):
-    neg_news = []
-    pos_news = []
-    r_topic_config = read_list("topic_risk_score_config.json")
-    topic_ll = list(r_topic_config.keys())
-    topic_prompt = ", ".join(topic_ll)
-    #print(topic_prompt)
-
-    for x in range(len(scraped_news)):
-        context = scraped_news[x][4]
-        langchain_model = langchain_model
-        neg_news_instr = f"From the context provided identify if there is any negetive news or news related to {topic_prompt} etc present or not. Provide a truthful answer in yes or no"
-        seed_pattern = PromptPattern.from_str(neg_news_instr+" : {{context}}")
-        template = seed_pattern.langchain.as_template()
-        #pattern = PromptPattern.langchain.from_template(template)
-        #print("")
-        #print("")
-        #print("")
-        response = langchain_model(template.format(context=context))
-        if response == 'yes':
-            news_topic = []
-            for i in range(len(topic_ll)):
-                indv_topic_prompt = topic_ll[i]
-                #topic_instr1 = f"From the context provided about news item can you suggest which of the following topics is this news related to ? {topic_prompt}"
-                topic_instr1 = f"From the context provided about news item can you suggest this news related to {indv_topic_prompt} or not. Provide a truthful answer in yes or no"
-                seed_pattern = PromptPattern.from_str(topic_instr1+" : {{context}}")
-                template = seed_pattern.langchain.as_template()
-                response = langchain_model(template.format(context=context))
-                if response == 'yes':
-                    response = indv_topic_prompt
-                    #print(response)
-                    news_topic.append(response)
-            scraped_news[x].append(news_topic)
-            neg_news.append(scraped_news[x])
-        elif response == 'no':
-            pos_news.append(scraped_news[x])
-    return neg_news, pos_news
-
-def report_pos_news(pos_news,langchain_model):
-    pos_news_results = []
-    langchain_model = langchain_model
-    seed_pattern = PromptPattern.from_str("Summarize the text in 2 or 3 sentences : {{text}}")
-    template = seed_pattern.langchain.as_template()
-    #pattern = PromptPattern.langchain.from_template(template)
-    for x in range(len(pos_news)) :
-        text = pos_news[x][4]
-        response = langchain_model(template.format(text=text))
-        summary = response.rstrip(".")
-        pos_news_results_ll = [pos_news[x][1],pos_news[x][3],summary]
-        pos_news_results.append(pos_news_results_ll)
-
-    write_list("pos_news_results.json", pos_news_results)
-
-def apply_filters(neg_news,langchain_model, subject_name):
-    tp = []
-    fp = []
-    r_filter = read_list("filter.json")
-    langchain_model = langchain_model
-
-    for x in range(len(neg_news)):
-        if len(r_filter) == 0:
-            subject_name = subject_name
-            instr1 = f"From the news text provided identify if the person {subject_name} is mentioned anywhere in the text. Provide a truthful answer in yes or no. If not sure then say not sure"
-            text = neg_news[x][4]
-            seed_pattern = PromptPattern.from_str(instr1+" : {{text}}")
-            template = seed_pattern.langchain.as_template()
-            response1 = langchain_model(template.format(text=text))
-            response2 = 'yes'
-            response3 = 'yes'
-            response4 = 'yes'
-
-            if (response1 == "yes"):
-                neg_news[x].extend([response1,response2,response3,response4])
-                tp.append(neg_news[x])
-            else:
-                neg_news[x].extend([response1,response2,response3,response4])
-                fp.append(neg_news[x])
-        else:
-            location = r_filter[0]
-            subject_name = subject_name
-
-            dob = r_filter[1]
-            dob_date = datetime.strptime(dob, '%b %Y')
-            #print(dob_date)
-
-            today = date.today()
-            age = today - dob_date.date()
-            age_yrs = round((age.days+age.seconds/86400)/365.2425)
-            #print(age_yrs)
-
-            instr1 = f"From the news text provided identify if the person {subject_name} is mentioned anywhere in the text. Provide a truthful answer in yes or no. If not sure then say not sure"
-            instr2 = f"From the news text provided identify if there is any mention of  {location} anywhere in the text. Provide a truthful answer in yes or no. If not sure then say not sure"
-            instr3 = f"From the news text provided identify if there is any mention of {dob_date} anywhere in the text. Provide a truthful answer in yes or no. If not sure then say not sure"
-            instr4 = f"From the news text provided identify if the age of {subject_name} is nearly around {age_yrs} years or so. Provide a truthful answer in yes or no. If not sure then say not sure"
-
-            text = neg_news[x][4]
-
-            seed_pattern = PromptPattern.from_str(instr1+" : {{text}}")
-            template = seed_pattern.langchain.as_template()
-            response1 = langchain_model(template.format(text=text))
-
-            seed_pattern = PromptPattern.from_str(instr2+" : {{text}}")
-            template = seed_pattern.langchain.as_template()
-            response2 = langchain_model(template.format(text=text))
-
-            seed_pattern = PromptPattern.from_str(instr3+" : {{text}}")
-            template = seed_pattern.langchain.as_template()
-            response3 = langchain_model(template.format(text=text))
-
-            seed_pattern = PromptPattern.from_str(instr4+" : {{text}}")
-            template = seed_pattern.langchain.as_template()
-            response4 = langchain_model(template.format(text=text))
-
-            if (response1 == "yes") and (response2 == "yes") and ((response3 == "yes") or (response4 == "yes")):
-                vmatch = 1
-                neg_news[x].extend([response1,response2,response3,response4])
-                tp.append(neg_news[x])
-            else:
-                vmmatch = 0
-                neg_news[x].extend([response1,response2,response3,response4])
-                fp.append(neg_news[x])
-    return tp, fp
-
-def report_fp(fp,langchain_model):
-    fp_results=[]
-    langchain_model = langchain_model
-    seed_pattern = PromptPattern.from_str("Summarize the text in 2 or 3 sentences : {{text}}")
-    template = seed_pattern.langchain.as_template()
-    #pattern = PromptPattern.langchain.from_template(template)
-    for x in range(len(fp)) :
-        text = fp[x][4]
-        response = langchain_model(template.format(text=text))
-        summary = response.rstrip(".")
-        fp_results_ll = [fp[x][1],fp[x][3],summary,fp[x][5],fp[x][6],fp[x][7],fp[x][8],fp[x][9]]
-        fp_results.append(fp_results_ll)
-
-    write_list("fp_results.json", fp_results)
-
-def report_tp(tp,langchain_model):
-    tp_results=[]
-    langchain_model = langchain_model
-    seed_pattern = PromptPattern.from_str("Summarize the text in 2 or 3 sentences : {{text}}")
-    template = seed_pattern.langchain.as_template()
-    #pattern = PromptPattern.langchain.from_template(template)
-    for x in range(len(tp)) :
-        text = tp[x][4]
-        response = langchain_model(template.format(text=text))
-        summary = response.rstrip(".")
-        tp_results_ll = [tp[x][1],tp[x][3],summary,tp[x][5],tp[x][6],tp[x][7],tp[x][8],tp[x][9]]
-        tp_results.append(tp_results_ll)
-
-    write_list("tp_results.json", tp_results)
-
-def  final_conclusion(tp,fp, pos_news,subject_name, num_results):
-    neg_news_conclusion = []
-    cpos = len(pos_news)
-    ctp = len(tp)
-    cfp = len(fp)
-    bad_url_details = read_list("bad_url.json")
-    cbadurl = len(bad_url_details)
-
-    conclusion_text_general = "Total News Screened: "+str(num_results)+"    Neg-News-"+str(ctp)+"  Un-related News-"+str(cfp)+"  Non-Neg News-"+str(cpos)+"  Bad-Url-"+str(cbadurl)+" "
-    neg_news_conclusion.append(conclusion_text_general)
-
-    tp_topic_unique = []
-    for x in range(len(tp)) :
-        tp_topic_unique.extend(tp[x][5])
-
-    fp_topic_unique = []
-    for x in range(len(fp)) :
-        fp_topic_unique.extend(fp[x][5])
-
-    l1 = list(set(tp_topic_unique))
-    l2 = list(set(fp_topic_unique))
-    l1str = ", ".join(l1)
-    l2str = ", ".join(l2)
-
-    if len(l1) > 0:
-        conclusion_text_topic_tp = "Screening process has found "+ str(ctp) + " Negative news. Topics identified are - "+l1str +". "
-    else:
-        conclusion_text_topic_tp = ""
-
-    if len(l2) > 0:
-        conclusion_text_topic_fp = "Screening process has found "+ str(cfp) + " unrelated -ve news. Topics identified are - "+l2str +"."
-    else:
-        conclusion_text_topic_fp = ""
-
-    conclusion_text_topic = conclusion_text_topic_tp + conclusion_text_topic_fp
-    neg_news_conclusion.append(conclusion_text_topic_tp)
-    neg_news_conclusion.append(conclusion_text_topic_fp)
-
-    if len(tp) > 0:
-        conclusion_text = "The screening process has found that there are Negative News present about "+subject_name +". Initiate L2 level Screening."
-        neg_news_conclusion.append(conclusion_text)
-    elif len(fp) > 0:
-        conclusion_text = "Even if the screening process has found that there are Negative News present but those seems not related to "+subject_name +". Further Manual Screening is recommended."
-        neg_news_conclusion.append(conclusion_text)
-    else:
-        conclusion_text = "There are No Negative News found about "+subject_name +"."
-        neg_news_conclusion.append(conclusion_text)
-    write_list("neg_news_conclusion.json", neg_news_conclusion)
-
- */
-
 interface ScrapeitResponse {
     searchInformation: {
         totalResults: string;