Add matcher for matching an imported feed against existing feeds.

This commit is contained in:
akwizgran
2022-12-09 17:57:53 +00:00
parent b920382e44
commit 33d01aac8c
8 changed files with 299 additions and 16 deletions

View File

@@ -12,14 +12,14 @@ interface FeedFactory {
* Create a new feed based on the feed url
* and the metadata of an existing {@link SyndFeed}.
*/
Feed createFeed(String url, SyndFeed feed);
Feed createFeed(String url, SyndFeed sf);
/**
* Creates a new updated feed, based on the given existing feed,
* new metadata from the given {@link SyndFeed}
* and the time of the last feed entry.
*/
Feed createFeed(Feed feed, SyndFeed f, long lastEntryTime);
Feed updateFeed(Feed feed, SyndFeed sf, long lastEntryTime);
/**
* De-serializes a {@link BdfDictionary} into a {@link Feed}.

View File

@@ -13,7 +13,6 @@ import org.briarproject.bramble.api.identity.Author;
import org.briarproject.bramble.api.identity.AuthorFactory;
import org.briarproject.bramble.api.identity.LocalAuthor;
import org.briarproject.bramble.api.system.Clock;
import org.briarproject.bramble.util.StringUtils;
import org.briarproject.briar.api.blog.Blog;
import org.briarproject.briar.api.blog.BlogFactory;
import org.briarproject.briar.api.feed.Feed;
@@ -22,6 +21,7 @@ import org.briarproject.briar.api.feed.RssProperties;
import javax.inject.Inject;
import static org.briarproject.bramble.api.identity.AuthorConstants.MAX_AUTHOR_NAME_LENGTH;
import static org.briarproject.bramble.util.StringUtils.truncateUtf8;
import static org.briarproject.briar.api.feed.FeedConstants.KEY_FEED_ADDED;
import static org.briarproject.briar.api.feed.FeedConstants.KEY_FEED_AUTHOR;
import static org.briarproject.briar.api.feed.FeedConstants.KEY_FEED_DESC;
@@ -51,25 +51,29 @@ class FeedFactoryImpl implements FeedFactory {
}
@Override
public Feed createFeed(String url, SyndFeed syndFeed) {
String title = syndFeed.getTitle();
public Feed createFeed(String url, SyndFeed sf) {
String title = sf.getTitle();
if (title == null) title = "RSS";
else title = StringUtils.truncateUtf8(title, MAX_AUTHOR_NAME_LENGTH);
else title = truncateUtf8(title, MAX_AUTHOR_NAME_LENGTH);
LocalAuthor localAuthor = authorFactory.createLocalAuthor(title);
Blog blog = blogFactory.createFeedBlog(localAuthor);
long added = clock.currentTimeMillis();
RssProperties properties =
new RssProperties(url, null, null, null, null, null);
RssProperties properties = new RssProperties(url, sf.getTitle(),
sf.getDescription(), sf.getAuthor(), sf.getLink(), sf.getUri());
return new Feed(blog, localAuthor, properties, added, 0, 0);
}
@Override
public Feed createFeed(Feed feed, SyndFeed f, long lastEntryTime) {
public Feed updateFeed(Feed feed, SyndFeed sf, long lastEntryTime) {
long updated = clock.currentTimeMillis();
return new Feed(feed.getBlog(), feed.getLocalAuthor(),
feed.getProperties(), feed.getAdded(), updated, lastEntryTime);
String url = feed.getProperties().getUrl();
// Update the RSS properties
RssProperties properties = new RssProperties(url, sf.getTitle(),
sf.getDescription(), sf.getAuthor(), sf.getLink(), sf.getUri());
return new Feed(feed.getBlog(), feed.getLocalAuthor(), properties,
feed.getAdded(), updated, lastEntryTime);
}
@Override
@@ -115,12 +119,12 @@ class FeedFactoryImpl implements FeedFactory {
RssProperties properties = feed.getProperties();
if (properties.getUrl() != null)
d.put(KEY_FEED_URL, properties.getUrl());
if (properties.getTitle() != null)
d.put(KEY_FEED_RSS_TITLE, properties.getTitle());
if (properties.getDescription() != null)
d.put(KEY_FEED_DESC, properties.getDescription());
if (properties.getAuthor() != null)
d.put(KEY_FEED_RSS_AUTHOR, properties.getAuthor());
if (properties.getTitle() != null)
d.put(KEY_FEED_RSS_TITLE, properties.getTitle());
if (properties.getLink() != null)
d.put(KEY_FEED_RSS_LINK, properties.getLink());
if (properties.getUri() != null)

View File

@@ -179,7 +179,7 @@ class FeedManagerImpl implements FeedManager, EventListener, OpenDatabaseHook,
// post entries
long lastEntryTime = postFeedEntries(feed, sf.getEntries());
Feed updatedFeed = feedFactory.createFeed(feed, sf, lastEntryTime);
Feed updatedFeed = feedFactory.updateFeed(feed, sf, lastEntryTime);
// store feed metadata again to also store last entry time
txn = db.startTransaction(false);
@@ -303,7 +303,7 @@ class FeedManagerImpl implements FeedManager, EventListener, OpenDatabaseHook,
SyndFeed sf = fetchSyndFeed(url);
// sort and add new entries
long lastEntryTime = postFeedEntries(feed, sf.getEntries());
newFeeds.add(feedFactory.createFeed(feed, sf, lastEntryTime));
newFeeds.add(feedFactory.updateFeed(feed, sf, lastEntryTime));
} catch (IOException | DbException e) {
logException(LOG, WARNING, e);
newFeeds.add(feed);
@@ -328,7 +328,7 @@ class FeedManagerImpl implements FeedManager, EventListener, OpenDatabaseHook,
// clean title
String title = sf.getTitle();
if (title != null) title = cleanAll(title);
sf.setTitle(isNullOrEmpty(title) ? null : title);
sf.setTitle(isNullOrEmpty(title) ? "RSS" : title);
// clean description
String description = sf.getDescription();
@@ -340,6 +340,10 @@ class FeedManagerImpl implements FeedManager, EventListener, OpenDatabaseHook,
if (author != null) author = cleanAll(author);
sf.setAuthor(isNullOrEmpty(author) ? null : author);
// set other relevant fields to null if empty
if ("".equals(sf.getLink())) sf.setLink(null);
if ("".equals(sf.getUri())) sf.setUri(null);
return sf;
}

View File

@@ -0,0 +1,22 @@
package org.briarproject.briar.feed;
import org.briarproject.briar.api.feed.Feed;
import org.briarproject.briar.api.feed.RssProperties;
import org.briarproject.nullsafety.NotNullByDefault;
import java.util.List;
import javax.annotation.Nullable;
import javax.annotation.concurrent.ThreadSafe;
@ThreadSafe
@NotNullByDefault
public interface FeedMatcher {
/**
* Returns the best match for the given candidate from the given list of
* feeds, or null if there are no matches.
*/
@Nullable
Feed findMatchingFeed(RssProperties candidate, List<Feed> feeds);
}

View File

@@ -0,0 +1,64 @@
package org.briarproject.briar.feed;
import org.briarproject.briar.api.feed.Feed;
import org.briarproject.briar.api.feed.RssProperties;
import org.briarproject.nullsafety.NotNullByDefault;
import java.util.List;
import javax.annotation.Nullable;
@NotNullByDefault
class FeedMatcherImpl implements FeedMatcher {
private static final int MIN_MATCHING_FIELDS = 2;
@Nullable
@Override
public Feed findMatchingFeed(RssProperties candidate, List<Feed> feeds) {
// First pass: if the candidate was imported from a URL and we have
// a feed that was imported from the same URL then it's a match
String url = candidate.getUrl();
if (url != null) {
for (Feed f : feeds) {
if (url.equals(f.getProperties().getUrl())) return f;
}
}
// Second pass: if the candidate matches at least MIN_MATCHING_FIELDS
// out of the title, description, author, link and URI, then return the
// feed with the highest number of matching fields
int bestScore = 0;
Feed bestFeed = null;
String title = candidate.getTitle();
String description = candidate.getDescription();
String author = candidate.getAuthor();
String link = candidate.getLink();
String uri = candidate.getUri();
for (Feed f : feeds) {
int score = 0;
RssProperties p = f.getProperties();
if (title != null && title.equals(p.getTitle())) {
score++;
}
if (description != null && description.equals(p.getDescription())) {
score++;
}
if (author != null && author.equals(p.getAuthor())) {
score++;
}
if (link != null && link.equals(p.getLink())) {
score++;
}
if (uri != null && uri.equals(p.getUri())) {
score++;
}
if (score > bestScore) {
bestScore = score;
bestFeed = f;
}
}
if (bestScore >= MIN_MATCHING_FIELDS) return bestFeed;
// No match
return null;
}
}

View File

@@ -38,4 +38,9 @@ public class FeedModule {
FeedFactory provideFeedFactory(FeedFactoryImpl feedFactory) {
return feedFactory;
}
@Provides
FeedMatcher provideFeedMatcher(FeedMatcherImpl feedMatcher) {
return feedMatcher;
}
}

View File

@@ -0,0 +1,183 @@
package org.briarproject.briar.feed;
import org.briarproject.bramble.api.identity.LocalAuthor;
import org.briarproject.bramble.api.sync.ClientId;
import org.briarproject.bramble.test.BrambleTestCase;
import org.briarproject.briar.api.blog.Blog;
import org.briarproject.briar.api.feed.Feed;
import org.briarproject.briar.api.feed.RssProperties;
import org.junit.Test;
import java.util.Random;
import static java.util.Arrays.asList;
import static org.briarproject.bramble.test.TestUtils.getClientId;
import static org.briarproject.bramble.test.TestUtils.getGroup;
import static org.briarproject.bramble.test.TestUtils.getLocalAuthor;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertSame;
public class FeedMatcherImplTest extends BrambleTestCase {
private static final String URL = "url";
private static final String TITLE = "title";
private static final String DESCRIPTION = "description";
private static final String AUTHOR = "author";
private static final String LINK = "link";
private static final String URI = "uri";
private final Random random = new Random();
private final ClientId clientId = getClientId();
private final LocalAuthor localAuthor = getLocalAuthor();
private final FeedMatcher matcher = new FeedMatcherImpl();
@Test
public void testFeedWithMatchingUrlIsChosen() {
RssProperties candidate = new RssProperties(URL,
TITLE, DESCRIPTION, AUTHOR, LINK, URI);
// The first feed has a different/null URL but matching RSS fields
Feed feed1 = createFeed(new RssProperties(nope(),
TITLE, DESCRIPTION, AUTHOR, LINK, URI));
// The second feed has a matching URL but different/null RSS fields
Feed feed2 = createFeed(new RssProperties(URL,
nope(), nope(), nope(), nope(), nope()));
Feed match = matcher.findMatchingFeed(candidate, asList(feed1, feed2));
// The matcher should choose the feed with the matching URL
assertNotNull(match);
assertSame(feed2, match);
}
@Test
public void testNullUrlIsNotMatched() {
// The candidate has a null URL
RssProperties candidate = new RssProperties(null,
TITLE, DESCRIPTION, AUTHOR, LINK, URI);
// The first feed has a non-null URL and matching RSS fields
Feed feed1 = createFeed(new RssProperties(URL,
TITLE, DESCRIPTION, AUTHOR, LINK, URI));
// The second feed has a null URL and different/null RSS fields
Feed feed2 = createFeed(new RssProperties(null,
nope(), nope(), nope(), nope(), nope()));
Feed match = matcher.findMatchingFeed(candidate, asList(feed1, feed2));
// The matcher should choose the feed with the matching RSS fields
assertNotNull(match);
assertSame(feed1, match);
}
@Test
public void testDoesNotMatchOneRssField() {
testDoesNotMatchRssFields(TITLE, nope(), nope(), nope(), nope());
testDoesNotMatchRssFields(nope(), DESCRIPTION, nope(), nope(), nope());
testDoesNotMatchRssFields(nope(), nope(), AUTHOR, nope(), nope());
testDoesNotMatchRssFields(nope(), nope(), nope(), LINK, nope());
testDoesNotMatchRssFields(nope(), nope(), nope(), nope(), URL);
}
private void testDoesNotMatchRssFields(String title, String description,
String author, String link, String uri) {
RssProperties candidate = new RssProperties(null,
TITLE, DESCRIPTION, AUTHOR, LINK, URL);
// The first feed has no matching RSS fields
Feed feed1 = createFeed(new RssProperties(null,
nope(), nope(), nope(), nope(), nope()));
// The second feed has the given RSS fields
Feed feed2 = createFeed(new RssProperties(null,
title, description, author, link, uri));
// The third feed has no matching RSS fields
Feed feed3 = createFeed(new RssProperties(null,
nope(), nope(), nope(), nope(), nope()));
Feed match = matcher.findMatchingFeed(candidate,
asList(feed1, feed2, feed3));
// The matcher should not choose any of the feeds
assertNull(match);
}
@Test
public void testMatchesTwoRssFields() {
testMatchesRssFields(TITLE, DESCRIPTION, nope(), nope(), nope());
testMatchesRssFields(nope(), DESCRIPTION, AUTHOR, nope(), nope());
testMatchesRssFields(nope(), nope(), AUTHOR, LINK, nope());
testMatchesRssFields(nope(), nope(), nope(), LINK, URI);
}
private void testMatchesRssFields(String title, String description,
String author, String link, String uri) {
RssProperties candidate = new RssProperties(null,
TITLE, DESCRIPTION, AUTHOR, LINK, URI);
// The first feed has no matching RSS fields
Feed feed1 = createFeed(new RssProperties(null,
nope(), nope(), nope(), nope(), nope()));
// The second feed has the given RSS fields
Feed feed2 = createFeed(new RssProperties(null,
title, description, author, link, uri));
// The third feed has one matching RSS field
Feed feed3 = createFeed(new RssProperties(null,
TITLE, nope(), nope(), nope(), nope()));
FeedMatcher matcher = new FeedMatcherImpl();
Feed match = matcher.findMatchingFeed(candidate,
asList(feed1, feed2, feed3));
// The matcher should choose the second feed
assertSame(feed2, match);
}
@Test
public void testFeedWithMostMatchingRssFieldsIsChosen() {
RssProperties candidate = new RssProperties(null,
TITLE, DESCRIPTION, AUTHOR, LINK, URI);
// The first feed has no matching RSS fields
Feed feed1 = createFeed(new RssProperties(null,
nope(), nope(), nope(), nope(), nope()));
// The second feed has three matching RSS fields
Feed feed2 = createFeed(new RssProperties(null,
TITLE, DESCRIPTION, AUTHOR, nope(), nope()));
// The third feed has two matching RSS fields
Feed feed3 = createFeed(new RssProperties(null,
TITLE, DESCRIPTION, nope(), nope(), nope()));
Feed match = matcher.findMatchingFeed(candidate,
asList(feed1, feed2, feed3));
// The matcher should choose the second feed
assertSame(feed2, match);
}
@Test
public void testNullRssFieldsAreNotMatched() {
// The candidate has a null URL and null RSS fields
RssProperties candidate = new RssProperties(null,
null, null, null, null, null);
// The first feed has a null URL and non-null RSS fields
Feed feed1 = createFeed(new RssProperties(null,
TITLE, DESCRIPTION, AUTHOR, LINK, URI));
// The second feed has a non-null URL and null RSS fields
Feed feed2 = createFeed(new RssProperties(URL,
null, null, null, null, null));
Feed match = matcher.findMatchingFeed(candidate, asList(feed1, feed2));
// The matcher should not choose either of the feeds
assertNull(match);
}
/**
* Returns an RSS field that doesn't match the default, either because it's
* null or because it's a different non-null value.
*/
private String nope() {
return random.nextBoolean() ? null : "x";
}
private Feed createFeed(RssProperties properties) {
Blog blog = new Blog(getGroup(clientId, 123), localAuthor, true);
return new Feed(blog, localAuthor, properties, 0, 0, 0);
}
}