Skip to content
This repository has been archived by the owner on May 10, 2024. It is now read-only.

Commit

Permalink
Fix #1918: Send in page innerText to ads classifier (#1930)
Browse files Browse the repository at this point in the history
Instead of sending plain HTML, we now send in the document's `innerText`, which doesn't include any HTML tags, images, etc.
This allows the ads classifier to properly classify pages to their correct type
  • Loading branch information
kylehickinson authored Nov 13, 2019
1 parent 8346b2c commit a6da496
Show file tree
Hide file tree
Showing 9 changed files with 60 additions and 14 deletions.
Binary file modified BraveRewards/BraveRewards.framework/BraveRewards
Binary file not shown.
5 changes: 3 additions & 2 deletions BraveRewards/BraveRewards.framework/Headers/BATBraveAds.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,9 @@ NS_SWIFT_NAME(BraveAds)

#pragma mark - Reporting

/// Report that a page has loaded in the current browser tab, and the HTML is available for analysis
- (void)reportLoadedPageWithURL:(NSURL *)url html:(NSString *)html;
/// Report that a page has loaded in the current browser tab, and the inner text
/// within the page loaded for classification
- (void)reportLoadedPageWithURL:(NSURL *)url innerText:(NSString *)text;

/// Report that media has started on a tab with a given id
- (void)reportMediaStartedWithTabId:(NSInteger)tabId NS_SWIFT_NAME(reportMediaStarted(tabId:));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ NS_SWIFT_NAME(BraveLedger)
- (void)listOneTimeTips:(void (NS_NOESCAPE ^)(NSArray<BATPublisherInfo *> *))completion;

- (void)tipPublisherDirectly:(BATPublisherInfo *)publisher
amount:(int)amount
amount:(double)amount
currency:(NSString *)currency
completion:(void (^)(BATResult result))completion;

Expand Down
4 changes: 2 additions & 2 deletions BraveRewards/BraveRewards.framework/Headers/BATBraveRewards.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,14 +93,14 @@ NS_SWIFT_NAME(BraveRewards)
isPrivate:(BOOL)isPrivate;
/// Report that a page has loaded in the current browser tab, and the HTML is available for analysis
///
/// @note Send false for `shouldClassifyForAds` if the load happened due to tabs restoring
/// @note Send nil for `adsInnerText` if the load happened due to tabs restoring
/// after app launch or if response header for the page load contains
/// "cache-control: no-store"
- (void)reportLoadedPageWithURL:(NSURL *)url
faviconURL:(nullable NSURL *)faviconURL
tabId:(UInt32)tabId
html:(NSString *)html
shouldClassifyForAds:(BOOL)shouldClassify NS_SWIFT_NAME(reportLoadedPage(url:faviconUrl:tabId:html:shouldClassifyForAds:));
adsInnerText:(nullable NSString *)adsInnerText NS_SWIFT_NAME(reportLoadedPage(url:faviconUrl:tabId:html:adsInnerText:));
/// Report any XHR load happening in the page
- (void)reportXHRLoad:(NSURL *)url
tabId:(UInt32)tabId
Expand Down
2 changes: 1 addition & 1 deletion BraveRewardsUI/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,5 @@ The latest BraveRewards.framework was built on:

```
brave-browser/c9404a71bb301d1303df1fcd2c24f7f614174fe9
brave-core/888b808f99981e008bbac6c58123440eba64f230
brave-core/be26e03107bebecb17e120f81998915d28129de2
```
2 changes: 1 addition & 1 deletion BraveRewardsUI/Tipping/TippingViewController.swift
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ class TippingViewController: UIViewController, UIViewControllerTransitioningDele
// TODO: Handle started tip process
}
} else {
self.state.ledger.tipPublisherDirectly(self.publisherInfo, amount: Int32(amount), currency: "BAT") { _ in
self.state.ledger.tipPublisherDirectly(self.publisherInfo, amount: Double(amount), currency: "BAT") { _ in
// TODO: Handle started tip process
}
}
Expand Down
14 changes: 14 additions & 0 deletions BraveShared/Extensions/StringExtensions.swift
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,18 @@ extension String {
}
return nil
}

/// Obtain a list of words in a given string
public var words: [String] {
var words: [String] = []
enumerateSubstrings(
in: startIndex..<endIndex,
options: .byWords
) { (word, _, _, _) in
if let word = word {
words.append(word)
}
}
return words
}
}
15 changes: 15 additions & 0 deletions BraveSharedTests/StringExtensionTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,19 @@ class StringExtensionTests: XCTestCase {
let schemelessURL = "brave.com"
XCTAssertNotNil(schemelessURL.firstURL)
}

func testWords() {
let longMultilinedText = """
Multiple words
On multiple lines.
That will get stripped!\r
"""

XCTAssertEqual(longMultilinedText.words, ["Multiple", "words", "On", "multiple", "lines", "That", "will", "get", "stripped"])

let wordsWithPunctuation = "\"It's a wonderful life—isn't it…\""
XCTAssertEqual(wordsWithPunctuation.words, ["It's", "a", "wonderful", "life", "isn't", "it"])
}
}
30 changes: 23 additions & 7 deletions Client/Frontend/Browser/BrowserViewController/BVC+Rewards.swift
Original file line number Diff line number Diff line change
Expand Up @@ -115,18 +115,34 @@ extension Tab {
func reportPageLoad(to rewards: BraveRewards) {
guard let webView = webView, let url = webView.url else { return }
if url.isLocal || PrivateBrowsingManager.shared.isPrivateBrowsing { return }

var htmlBlob: String?
var classifierText: String?

let getHtmlToStringJSCall = "document.documentElement.outerHTML.toString()"
// Copy to var, as `shouldClassifyLoadsForAds` can be reset before JS completes
let shouldClassify = shouldClassifyLoadsForAds
webView.evaluateJavaScript(getHtmlToStringJSCall, completionHandler: { html, _ in
guard let htmlString = html as? String else { return }
let group = DispatchGroup()
group.enter()
webView.evaluateJavaScript("document.documentElement.outerHTML.toString()", completionHandler: { html, _ in
htmlBlob = html as? String
group.leave()
})

if shouldClassifyLoadsForAds {
group.enter()
webView.evaluateJavaScript("document.body.innerText", completionHandler: { text, _ in
// Get the list of words in the page and join them together with a space
// to send to the classifier
classifierText = (text as? String)?.words.joined(separator: " ")
group.leave()
})
}

group.notify(queue: .main) {
let faviconURL = URL(string: self.displayFavicon?.url ?? "")
if faviconURL == nil {
log.warning("No favicon found in \(self) to report to rewards panel")
}
rewards.reportLoadedPage(url: url, faviconUrl: faviconURL, tabId: self.rewardsId, html: htmlString, shouldClassifyForAds: shouldClassify)
})
rewards.reportLoadedPage(url: url, faviconUrl: faviconURL, tabId: self.rewardsId, html: htmlBlob ?? "", adsInnerText: classifierText)
}
}

func reportPageNaviagtion(to rewards: BraveRewards) {
Expand Down

0 comments on commit a6da496

Please sign in to comment.