diff --git a/src/error.rs b/src/error.rs index 5fb2b0d..2d7d747 100644 --- a/src/error.rs +++ b/src/error.rs @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::{error, fmt, result}; +use std::{error, fmt, result, str::Utf8Error, string::FromUtf16Error}; use guid::Guid; use tree::Kind; @@ -28,7 +28,14 @@ impl Error { } } -impl error::Error for Error {} +impl error::Error for Error { + fn source(&self) -> Option<&(dyn error::Error + 'static)> { + match self.kind() { + ErrorKind::MalformedString(err) => Some(err.as_ref()), + _ => None + } + } +} impl From for Error { fn from(kind: ErrorKind) -> Error { @@ -36,27 +43,21 @@ impl From for Error { } } -impl fmt::Display for Error { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}", self) +impl From for Error { + fn from(error: FromUtf16Error) -> Error { + Error(ErrorKind::MalformedString(error.into())) } } -#[derive(Debug)] -pub enum ErrorKind { - MismatchedItemKind(Kind, Kind), - DuplicateItem(Guid), - InvalidParent(Guid, Guid), - MissingParent(Guid, Guid), - Storage(&'static str, u32), - MergeConflict, - UnmergedLocalItems, - UnmergedRemoteItems, +impl From for Error { + fn from(error: Utf8Error) -> Error { + Error(ErrorKind::MalformedString(error.into())) + } } -impl fmt::Display for ErrorKind { +impl fmt::Display for Error { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { + match self.kind() { ErrorKind::MismatchedItemKind(local_kind, remote_kind) => { write!(f, "Can't merge local kind {} and remote kind {}", local_kind, remote_kind) @@ -72,9 +73,6 @@ impl fmt::Display for ErrorKind { write!(f, "Can't insert item {} into nonexistent parent {}", child_guid, parent_guid) }, - ErrorKind::Storage(message, result) => { - write!(f, "Storage error: {} ({})", message, result) - }, ErrorKind::MergeConflict => { write!(f, "Local tree changed during merge") }, @@ -83,7 +81,28 @@ impl fmt::Display for ErrorKind { }, ErrorKind::UnmergedRemoteItems => { write!(f, "Merged tree doesn't mention all items from remote tree") - } + }, + ErrorKind::InvalidGuid(invalid_guid) => { + write!(f, "Merged tree contains invalid GUID {}", invalid_guid) + }, + ErrorKind::InvalidByte(b) => { + write!(f, "Invalid byte {} in UTF-16 encoding", b) + }, + ErrorKind::MalformedString(err) => err.fmt(f), } } } + +#[derive(Debug)] +pub enum ErrorKind { + MismatchedItemKind(Kind, Kind), + DuplicateItem(Guid), + InvalidParent(Guid, Guid), + MissingParent(Guid, Guid), + MergeConflict, + UnmergedLocalItems, + UnmergedRemoteItems, + InvalidGuid(Guid), + InvalidByte(u16), + MalformedString(Box), +} diff --git a/src/guid.rs b/src/guid.rs index 6b92d55..0080dc6 100644 --- a/src/guid.rs +++ b/src/guid.rs @@ -14,6 +14,8 @@ use std::{fmt, str, ops}; +use error::{Result, ErrorKind}; + #[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] pub struct Guid(Repr); @@ -30,12 +32,15 @@ enum Repr { /// The Places root GUID, used to root all items in a bookmark tree. pub const ROOT_GUID: Guid = Guid(Repr::Fast(*b"root________")); +/// The "Other Bookmarks" GUID, used to hold items without a parent. +pub const UNFILED_GUID: Guid = Guid(Repr::Fast(*b"unfiled_____")); + /// The syncable Places roots. All synced items should descend from these /// roots. pub const USER_CONTENT_ROOTS: [Guid; 4] = [ Guid(Repr::Fast(*b"toolbar_____")), Guid(Repr::Fast(*b"menu________")), - Guid(Repr::Fast(*b"unfiled_____")), + UNFILED_GUID, Guid(Repr::Fast(*b"mobile______")), ]; @@ -51,19 +56,7 @@ const VALID_GUID_BYTES: [u8; 255] = 0, 0, 0, 0, 0, 0, 0]; impl Guid { - pub fn new(s: &str) -> Guid { - let repr = if Guid::is_valid(s.as_bytes()) { - assert!(s.is_char_boundary(12)); - let mut bytes = [0u8; 12]; - bytes.copy_from_slice(s.as_bytes()); - Repr::Fast(bytes) - } else { - Repr::Slow(s.into()) - }; - Guid(repr) - } - - pub fn from_utf8(b: &[u8]) -> Option { + pub fn from_utf8(b: &[u8]) -> Result { let repr = if Guid::is_valid(b) { let mut bytes = [0u8; 12]; bytes.copy_from_slice(b); @@ -71,29 +64,29 @@ impl Guid { } else { match str::from_utf8(b) { Ok(s) => Repr::Slow(s.into()), - Err(_) => return None + Err(err) => return Err(err.into()), } }; - Some(Guid(repr)) + Ok(Guid(repr)) } - pub fn from_utf16(b: &[u16]) -> Option { + pub fn from_utf16(b: &[u16]) -> Result { let repr = if Guid::is_valid(b) { let mut bytes = [0u8; 12]; for (index, byte) in b.iter().enumerate() { match byte.into_byte() { Some(byte) => bytes[index] = byte, - None => return None + None => return Err(ErrorKind::InvalidByte(*byte).into()), }; } Repr::Fast(bytes) } else { match String::from_utf16(b) { Ok(s) => Repr::Slow(s), - Err(_) => return None + Err(err) => return Err(err.into()), } }; - Some(Guid(repr)) + Ok(Guid(repr)) } #[inline] @@ -117,6 +110,13 @@ impl Guid { } } + pub fn valid(&self) -> bool { + match self.0 { + Repr::Fast(_) => true, + Repr::Slow(_) => false, + } + } + /// Equivalent to `PlacesUtils.isValidGuid`. #[inline] fn is_valid(bytes: &[T]) -> bool { @@ -129,7 +129,15 @@ impl Guid { impl<'a> From<&'a str> for Guid { #[inline] fn from(s: &'a str) -> Guid { - Guid::new(s) + let repr = if Guid::is_valid(s.as_bytes()) { + assert!(s.is_char_boundary(12)); + let mut bytes = [0u8; 12]; + bytes.copy_from_slice(s.as_bytes()); + Repr::Fast(bytes) + } else { + Repr::Slow(s.into()) + }; + Guid(repr) } } diff --git a/src/lib.rs b/src/lib.rs index f50f572..d38e733 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -27,5 +27,5 @@ mod tests; pub use error::{Error, ErrorKind, Result}; pub use guid::{Guid, ROOT_GUID, USER_CONTENT_ROOTS}; pub use merge::{Deletion, Merger, StructureCounts}; -pub use store::{Store, merge}; -pub use tree::{Content, Item, Kind, MergeState, MergedNode, Tree}; +pub use store::Store; +pub use tree::{Child, Content, Item, Kind, MergeState, MergedNode, ParentGuidFrom, Tree}; diff --git a/src/merge.rs b/src/merge.rs index b613fb7..18edecb 100644 --- a/src/merge.rs +++ b/src/merge.rs @@ -16,7 +16,7 @@ use std::{collections::{HashMap, HashSet, VecDeque}, mem}; use error::{ErrorKind, Result}; -use guid::{Guid, ROOT_GUID, USER_CONTENT_ROOTS}; +use guid::Guid; use tree::{Content, MergeState, MergedNode, Node, Tree}; /// Structure change types, used to indicate if a node on one side is moved @@ -64,6 +64,36 @@ enum ConflictResolution { Unchanged, } +/// A merge driver provides methods to customize merging behavior. +pub trait Driver { + /// Generates a new GUID for the given invalid GUID. This is used to fix up + /// items with GUIDs that Places can't store (bug 1380606, bug 1313026). + /// + /// Implementations of `Driver` can either use the `rand` and `base64` + /// crates to generate a new, random GUID (9 bytes, Base64url-encoded + /// without padding), or use an existing method like Desktop's + /// `nsINavHistoryService::MakeGuid`. Dogear doesn't generate new GUIDs + /// automatically to avoid depending on those crates. + /// + /// An implementation can also return: + /// + /// - `Ok(invalid_guid.clone())` to pass through all invalid GUIDs, as the + /// tests do. + /// - An error to forbid them entirely, as `DefaultDriver` does. + fn generate_new_guid(&self, invalid_guid: &Guid) -> Result; +} + +/// A default implementation of the merge driver. +pub struct DefaultDriver; + +impl Driver for DefaultDriver { + /// The default implementation returns an error and fails the merge if any + /// items have invalid GUIDs. + fn generate_new_guid(&self, invalid_guid: &Guid) -> Result { + Err(ErrorKind::InvalidGuid(invalid_guid.clone()).into()) + } +} + /// A two-way merger that produces a complete merged tree from a complete local /// tree and a complete remote tree with changes since the last sync. /// @@ -86,8 +116,8 @@ enum ConflictResolution { /// nested hierarchies, or make conflicting changes on multiple devices /// simultaneously. A simpler two-way tree merge strikes a good balance between /// correctness and complexity. -#[derive(Debug)] -pub struct Merger<'t> { +pub struct Merger<'t, D = DefaultDriver> { + driver: D, local_tree: &'t Tree, new_local_contents: Option<&'t HashMap>, remote_tree: &'t Tree, @@ -99,9 +129,10 @@ pub struct Merger<'t> { structure_counts: StructureCounts, } -impl<'t> Merger<'t> { +impl<'t> Merger<'t, DefaultDriver> { pub fn new(local_tree: &'t Tree, remote_tree: &'t Tree) -> Merger<'t> { - Merger { local_tree, + Merger { driver: DefaultDriver, + local_tree, new_local_contents: None, remote_tree, new_remote_contents: None, @@ -118,7 +149,20 @@ impl<'t> Merger<'t> { new_remote_contents: &'t HashMap) -> Merger<'t> { - Merger { local_tree, + Merger::with_driver(DefaultDriver, local_tree, new_local_contents, remote_tree, + new_remote_contents) + } +} + +impl <'t, D: Driver> Merger<'t, D> { + pub fn with_driver(driver: D, local_tree: &'t Tree, + new_local_contents: &'t HashMap, + remote_tree: &'t Tree, + new_remote_contents: &'t HashMap) + -> Merger<'t, D> + { + Merger { driver, + local_tree, new_local_contents: Some(new_local_contents), remote_tree, new_remote_contents: Some(new_remote_contents), @@ -212,8 +256,18 @@ impl<'t> Merger<'t> { self.merged_guids.insert(local_node.guid.clone()); - let mut merged_node = MergedNode::new(local_node.guid.clone(), - MergeState::Local(local_node)); + let merged_guid = if local_node.guid.valid() { + local_node.guid.clone() + } else { + let new_guid = self.driver.generate_new_guid(&local_node.guid)?; + if new_guid != local_node.guid { + self.merged_guids.insert(new_guid.clone()); + } + new_guid + }; + + let mut merged_node = MergedNode::new(merged_guid, + MergeState::Local { local_node, remote_node: None }); if local_node.is_folder() { // The local folder doesn't exist remotely, but its children might, so // we still need to recursively walk and merge them. This method will @@ -226,6 +280,7 @@ impl<'t> Merger<'t> { local_child_node)?; } } + Ok(merged_node) } @@ -234,8 +289,20 @@ impl<'t> Merger<'t> { self.merged_guids.insert(remote_node.guid.clone()); - let mut merged_node = MergedNode::new(remote_node.guid.clone(), - MergeState::Remote(remote_node)); + let merged_guid = if remote_node.guid.valid() { + remote_node.guid.clone() + } else { + let new_guid = self.driver.generate_new_guid(&remote_node.guid)?; + if new_guid != remote_node.guid { + self.merged_guids.insert(new_guid.clone()); + // Upload tombstones for changed remote GUIDs. + self.delete_remotely.insert(remote_node.guid.clone()); + } + new_guid + }; + + let mut merged_node = MergedNode::new(merged_guid, + MergeState::Remote { local_node: None, remote_node }); if remote_node.is_folder() { // As above, a remote folder's children might still exist locally, so we // need to merge them and update the merge state from remote to new if @@ -247,6 +314,7 @@ impl<'t> Merger<'t> { remote_child_node)?; } } + Ok(merged_node) } @@ -261,6 +329,8 @@ impl<'t> Merger<'t> { remote_node); if !local_node.has_compatible_kind(&remote_node) { + // TODO(lina): Remove and replace items with mismatched kinds in + // `check_for_{}_structure_change_of_{}_node`. error!("Merging local {} and remote {} with different kinds", local_node, remote_node); return Err(ErrorKind::MismatchedItemKind(local_node.kind, remote_node.kind).into()); @@ -277,10 +347,10 @@ impl<'t> Merger<'t> { let mut merged_node = MergedNode::new(remote_node.guid.clone(), match item { ConflictResolution::Local => { - MergeState::Local(local_node) + MergeState::Local { local_node, remote_node: Some(remote_node) } }, ConflictResolution::Remote => { - MergeState::Remote(remote_node) + MergeState::Remote { local_node: Some(local_node), remote_node } }, ConflictResolution::Unchanged => { MergeState::Unchanged { local_node, remote_node } @@ -376,7 +446,7 @@ impl<'t> Merger<'t> { // The remote child isn't locally deleted. Does it exist in the local tree? if let Some(local_child_node) = self.local_tree.node_for_guid(&remote_child_node.guid) { - // Otherwise, the remote child exists in the local tree. Did it move? + // The remote child exists in the local tree. Did it move? let local_parent_node = local_child_node.parent() .expect("Can't merge existing remote child without local parent"); @@ -393,9 +463,16 @@ impl<'t> Merger<'t> { remote_parent_node, local_parent_node); - let merged_child_node = self.two_way_merge(local_child_node, remote_child_node)?; + let mut merged_child_node = self.two_way_merge(local_child_node, + remote_child_node)?; + if remote_child_node.diverged() || merged_node.remote_guid_changed() { + // If the remote structure diverged, or the merged parent + // GUID changed, flag the remote child for reupload so that + // its `parentid` is correct. + merged_child_node.merge_state = + merged_child_node.merge_state.with_new_structure(); + } merged_node.merged_children.push(merged_child_node); - return Ok(()); } @@ -430,8 +507,12 @@ impl<'t> Merger<'t> { local_parent_node, remote_parent_node); - let merged_child_node = self.two_way_merge(local_child_node, - remote_child_node)?; + let mut merged_child_node = self.two_way_merge(local_child_node, + remote_child_node)?; + if remote_child_node.diverged() || merged_node.remote_guid_changed() { + merged_child_node.merge_state = + merged_child_node.merge_state.with_new_structure(); + } merged_node.merged_children.push(merged_child_node); }, } @@ -445,7 +526,7 @@ impl<'t> Merger<'t> { trace!("Remote child {} doesn't exist locally; looking for local content match", remote_child_node); - let merged_child_node = if let Some(local_child_node_by_content) = + let mut merged_child_node = if let Some(local_child_node_by_content) = self.find_local_node_matching_remote_node(merged_node, local_parent_node, remote_parent_node, @@ -455,6 +536,9 @@ impl<'t> Merger<'t> { } else { self.merge_remote_node(remote_child_node) }?; + if remote_child_node.diverged() || merged_node.remote_guid_changed() { + merged_child_node.merge_state = merged_child_node.merge_state.with_new_structure(); + } merged_node.merged_children.push(merged_child_node); Ok(()) } @@ -567,9 +651,15 @@ impl<'t> Merger<'t> { // For position changes in the same folder, we only need to // merge and flag the parent for reupload. - let merged_child_node = self.two_way_merge(local_child_node, - remote_child_node)?; + let mut merged_child_node = self.two_way_merge(local_child_node, + remote_child_node)?; merged_node.merge_state = merged_node.merge_state.with_new_structure(); + if remote_child_node.diverged() || merged_node.remote_guid_changed() { + // ...But repositioning an item in a diverged folder, or in a folder + // with an invalid GUID, should reupload the item. + merged_child_node.merge_state = + merged_child_node.merge_state.with_new_structure(); + } merged_node.merged_children.push(merged_child_node); } }, @@ -603,7 +693,7 @@ impl<'t> Merger<'t> { trace!("Local child {} doesn't exist remotely; looking for remote content match", local_child_node); - if let Some(remote_child_node_by_content) = + let merged_child_node = if let Some(remote_child_node_by_content) = self.find_remote_node_matching_local_node(merged_node, local_parent_node, remote_parent_node, @@ -611,17 +701,22 @@ impl<'t> Merger<'t> { { // The local child has a remote content match, so take the remote GUID // and merge. - let merged_child_node = self.two_way_merge(local_child_node, - remote_child_node_by_content)?; - merged_node.merged_children.push(merged_child_node); - return Ok(()); - } - - // The local child doesn't exist remotely, so flag the merged parent and - // new child for upload, and walk its descendants. - let mut merged_child_node = self.merge_local_node(local_child_node)?; - merged_node.merge_state = merged_node.merge_state.with_new_structure(); - merged_child_node.merge_state = merged_child_node.merge_state.with_new_structure(); + let mut merged_child_node = self.two_way_merge(local_child_node, + remote_child_node_by_content)?; + if remote_child_node_by_content.diverged() || merged_node.remote_guid_changed() { + merged_node.merge_state = merged_node.merge_state.with_new_structure(); + merged_child_node.merge_state = + merged_child_node.merge_state.with_new_structure(); + } + merged_child_node + } else { + // The local child doesn't exist remotely, so flag the merged parent and + // new child for upload, and walk its descendants. + let mut merged_child_node = self.merge_local_node(local_child_node)?; + merged_node.merge_state = merged_node.merge_state.with_new_structure(); + merged_child_node.merge_state = merged_child_node.merge_state.with_new_structure(); + merged_child_node + }; merged_node.merged_children.push(merged_child_node); Ok(()) } @@ -633,29 +728,33 @@ impl<'t> Merger<'t> { remote_node: Node<'t>) -> (ConflictResolution, ConflictResolution) { - if remote_node.guid == ROOT_GUID { + if remote_node.is_root() { // Don't reorder local roots. return (ConflictResolution::Local, ConflictResolution::Local); } match (local_node.needs_merge, remote_node.needs_merge) { - (true, true) => { - // The item changed locally and remotely. - if local_node.age < remote_node.age { - // The local change is newer, so merge local children first, - // followed by remaining unmerged remote children. - (ConflictResolution::Local, ConflictResolution::Local) - } else { - // The remote change is newer, so walk and merge remote - // children first, then remaining local children. - if USER_CONTENT_ROOTS.contains(&remote_node.guid) { - // Don't update root titles or other properties, but - // use their newer remote structure. - (ConflictResolution::Local, ConflictResolution::Remote) + (true, true) => match (local_node.diverged(), remote_node.diverged()) { + (true, false) => (ConflictResolution::Remote, ConflictResolution::Remote), + (false, true) => (ConflictResolution::Local, ConflictResolution::Local), + _ => { + // The item changed locally and remotely. + if local_node.age < remote_node.age { + // The local change is newer, so merge local children first, + // followed by remaining unmerged remote children. + (ConflictResolution::Local, ConflictResolution::Local) } else { - (ConflictResolution::Remote, ConflictResolution::Remote) + // The remote change is newer, so walk and merge remote + // children first, then remaining local children. + if remote_node.is_user_content_root() { + // Don't update root titles or other properties, but + // use their newer remote structure. + (ConflictResolution::Local, ConflictResolution::Remote) + } else { + (ConflictResolution::Remote, ConflictResolution::Remote) + } } - } + }, }, (true, false) => { @@ -669,7 +768,7 @@ impl<'t> Merger<'t> { // The item changed remotely, but not locally. Take the // remote state, then merge remote children first, followed // by local children. - if USER_CONTENT_ROOTS.contains(&remote_node.guid) { + if remote_node.is_user_content_root() { (ConflictResolution::Local, ConflictResolution::Remote) } else { (ConflictResolution::Remote, ConflictResolution::Remote) @@ -691,38 +790,33 @@ impl<'t> Merger<'t> { remote_child_node: Node<'t>) -> ConflictResolution { - if USER_CONTENT_ROOTS.contains(&remote_child_node.guid) { + if remote_child_node.is_user_content_root() { // Always use the local parent and position for roots. return ConflictResolution::Local; } match (local_parent_node.needs_merge, remote_parent_node.needs_merge) { - (true, true) => { - // If both parents changed, compare timestamps to decide where - // to keep the local child. - let latest_local_age = local_child_node.age.min(local_parent_node.age); - let latest_remote_age = remote_child_node.age.min(remote_parent_node.age); - - if latest_local_age < latest_remote_age { - ConflictResolution::Local - } else { - ConflictResolution::Remote - } - }, - - (true, false) => { - // If only the local parent changed, keep the local child in its - // new parent. - ConflictResolution::Local + (true, true) => match (local_parent_node.diverged(), remote_parent_node.diverged()) { + (true, false) => ConflictResolution::Remote, + (false, true) => ConflictResolution::Local, + _ => { + // If both parents changed, compare timestamps to decide where + // to keep the local child. + let latest_local_age = local_child_node.age.min(local_parent_node.age); + let latest_remote_age = remote_child_node.age.min(remote_parent_node.age); + + if latest_local_age < latest_remote_age { + ConflictResolution::Local + } else { + ConflictResolution::Remote + } + }, }, - (false, true) => { - if USER_CONTENT_ROOTS.contains(&remote_child_node.guid) { - ConflictResolution::Local - } else { - ConflictResolution::Remote - } - }, + // If only the local or remote parent changed, keep the child in its + // new parent. + (true, false) => ConflictResolution::Local, + (false, true) => ConflictResolution::Remote, (false, false) => ConflictResolution::Unchanged } diff --git a/src/store.rs b/src/store.rs index 3ed468e..f20b0fe 100644 --- a/src/store.rs +++ b/src/store.rs @@ -1,28 +1,28 @@ use std::collections::HashMap; -use error::{ErrorKind, Result}; +use error::{Error, ErrorKind}; use guid::Guid; use merge::{Merger, Deletion}; use tree::{Content, MergedNode, Tree}; -pub trait Store { +pub trait Store> { /// Builds a fully rooted, consistent tree from the items and tombstones in /// the local store. - fn fetch_local_tree(&self, local_time_millis: i64) -> Result; + fn fetch_local_tree(&self, local_time_millis: i64) -> Result; /// Fetches content info for all new local items that haven't been uploaded /// or merged yet. We'll try to dedupe them to remotely changed items with /// similar contents and different GUIDs. - fn fetch_new_local_contents(&self) -> Result>; + fn fetch_new_local_contents(&self) -> Result, E>; /// Builds a fully rooted, consistent tree from the items and tombstones in /// the mirror. - fn fetch_remote_tree(&self, remote_time_millis: i64) -> Result; + fn fetch_remote_tree(&self, remote_time_millis: i64) -> Result; /// Fetches content info for all items in the mirror that changed since the /// last sync and don't exist locally. We'll try to match new local items to /// these. - fn fetch_new_remote_contents(&self) -> Result>; + fn fetch_new_remote_contents(&self) -> Result, E>; /// Applies the merged tree and stages items to upload. We keep this /// generic: on Desktop, we'll insert the merged tree into a temp @@ -31,29 +31,28 @@ pub trait Store { /// this flow might be simpler. fn apply>(&mut self, merged_root: &MergedNode, - deletions: D) -> Result<()>; -} + deletions: D) -> Result<(), E>; -pub fn merge(store: &mut S, local_time_millis: i64, - remote_time_millis: i64) -> Result<()> { - let local_tree = store.fetch_local_tree(local_time_millis)?; - let new_local_contents = store.fetch_new_local_contents()?; + fn merge(&mut self, local_time_millis: i64, remote_time_millis: i64) -> Result<(), E> { + let local_tree = self.fetch_local_tree(local_time_millis)?; + let new_local_contents = self.fetch_new_local_contents()?; - let remote_tree = store.fetch_remote_tree(remote_time_millis)?; - let new_remote_contents = store.fetch_new_remote_contents()?; + let remote_tree = self.fetch_remote_tree(remote_time_millis)?; + let new_remote_contents = self.fetch_new_remote_contents()?; - let mut merger = Merger::with_contents(&local_tree, &new_local_contents, - &remote_tree, &new_remote_contents); - let merged_root = merger.merge()?; + let mut merger = Merger::with_contents(&local_tree, &new_local_contents, + &remote_tree, &new_remote_contents); + let merged_root = merger.merge()?; - if !merger.subsumes(&local_tree) { - return Err(ErrorKind::UnmergedLocalItems.into()); - } - if !merger.subsumes(&remote_tree) { - return Err(ErrorKind::UnmergedRemoteItems.into()); - } + if !merger.subsumes(&local_tree) { + Err(E::from(ErrorKind::UnmergedLocalItems.into()))?; + } + if !merger.subsumes(&remote_tree) { + Err(E::from(ErrorKind::UnmergedRemoteItems.into()))?; + } - store.apply(&merged_root, merger.deletions())?; + self.apply(&merged_root, merger.deletions())?; - Ok(()) + Ok(()) + } } diff --git a/src/tests.rs b/src/tests.rs index f53a213..99503e4 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -17,9 +17,9 @@ extern crate env_logger; use std::{collections::HashMap, sync::Once}; use error::{ErrorKind, Result}; -use guid::{Guid, ROOT_GUID}; -use merge::{Merger, StructureCounts}; -use tree::{Content, Item, Kind, Tree}; +use guid::{Guid, ROOT_GUID, UNFILED_GUID}; +use merge::{Driver, Merger, StructureCounts}; +use tree::{Content, Item, Kind, ParentGuidFrom, Tree}; #[derive(Debug)] struct Node { @@ -35,7 +35,10 @@ impl Node { fn into_tree(self) -> Result { fn inflate(tree: &mut Tree, parent_guid: &Guid, node: Node) -> Result<()> { let guid = node.item.guid.clone(); - tree.insert(&parent_guid, node.item)?; + tree.insert(ParentGuidFrom::default() + .children(&parent_guid) + .item(&parent_guid), + node.item.into())?; for child in node.children { inflate(tree, &guid, *child)?; } @@ -43,7 +46,7 @@ impl Node { } let guid = self.item.guid.clone(); - let mut tree = Tree::new(self.item); + let mut tree = Tree::with_reparenting(self.item, &UNFILED_GUID); for child in self.children { inflate(&mut tree, &guid, *child)?; } @@ -1808,6 +1811,14 @@ fn mismatched_incompatible_bookmark_kinds() { fn invalid_guids() { before_each(); + struct AllowInvalidGuids; + + impl Driver for AllowInvalidGuids { + fn generate_new_guid(&self, invalid_guid: &Guid) -> Result { + Ok(invalid_guid.clone()) + } + } + let local_tree = nodes!({ ("toolbar_____", Folder[needs_merge = true, age = 5], { ("bookmarkAAAA", Bookmark[needs_merge = true, age = 5]), @@ -1818,6 +1829,7 @@ fn invalid_guids() { ("loooooongGUID", Bookmark[needs_merge = true]) }) }).into_tree().unwrap(); + let new_local_contents: HashMap = HashMap::new(); let remote_tree = nodes!({ ("toolbar_____", Folder[needs_merge = true, age = 5], { @@ -1831,8 +1843,13 @@ fn invalid_guids() { ("bookmarkBBBB", Bookmark[needs_merge = true]) }) }).into_tree().unwrap(); + let new_remote_contents: HashMap = HashMap::new(); - let mut merger = Merger::new(&local_tree, &remote_tree); + let mut merger = Merger::with_driver(AllowInvalidGuids, + &local_tree, + &new_local_contents, + &remote_tree, + &new_remote_contents); let merged_root = merger.merge().unwrap(); assert!(merger.subsumes(&local_tree)); assert!(merger.subsumes(&remote_tree)); @@ -1858,3 +1875,123 @@ fn invalid_guids() { assert_eq!(merger.telemetry(), &expected_telem); } + +#[test] +fn multiple_parents() { + before_each(); + + let local_tree = Tree::default(); + + let remote_tree = nodes!({ + ("toolbar_____", Folder[age = 5], { + ("bookmarkAAAA", Bookmark), + ("bookmarkBBBB", Bookmark), + ("folderCCCCCC", Folder, { + ("bookmarkDDDD", Bookmark), + ("bookmarkEEEE", Bookmark), + ("bookmarkFFFF", Bookmark) + }) + }), + ("menu________", Folder, { + ("bookmarkGGGG", Bookmark), + ("bookmarkAAAA", Bookmark), + ("folderCCCCCC", Folder, { + ("bookmarkHHHH", Bookmark), + ("bookmarkDDDD", Bookmark) + }) + }) + }).into_tree().unwrap(); + + let mut merger = Merger::new(&local_tree, &remote_tree); + let merged_root = merger.merge().unwrap(); + assert!(merger.subsumes(&local_tree)); + assert!(merger.subsumes(&remote_tree)); + + let expected_tree = nodes!(ROOT_GUID, Folder, { + ("toolbar_____", Folder[age = 5, needs_merge = true], { + ("bookmarkBBBB", Bookmark) + }), + ("menu________", Folder[needs_merge = true], { + ("bookmarkGGGG", Bookmark), + ("bookmarkAAAA", Bookmark[needs_merge = true]), + ("folderCCCCCC", Folder[needs_merge = true], { + ("bookmarkDDDD", Bookmark[needs_merge = true]), + ("bookmarkEEEE", Bookmark), + ("bookmarkFFFF", Bookmark), + ("bookmarkHHHH", Bookmark) + }) + }) + }).into_tree().unwrap(); + let expected_telem = StructureCounts::default(); + + let merged_tree = merged_root.into_tree().unwrap(); + assert_eq!(merged_tree, expected_tree); + + assert_eq!(merger.deletions().count(), 0); + + assert_eq!(merger.telemetry(), &expected_telem); +} + +#[test] +fn reparent_orphans() { + before_each(); + + let local_tree = nodes!({ + ("toolbar_____", Folder, { + ("bookmarkAAAA", Bookmark), + ("bookmarkBBBB", Bookmark) + }), + ("unfiled_____", Folder, { + ("bookmarkCCCC", Bookmark) + }) + }).into_tree().unwrap(); + + let mut remote_tree = nodes!({ + ("toolbar_____", Folder[needs_merge = true], { + ("bookmarkBBBB", Bookmark), + ("bookmarkAAAA", Bookmark) + }), + ("unfiled_____", Folder[needs_merge = true], { + ("bookmarkDDDD", Bookmark[needs_merge = true]), + ("bookmarkCCCC", Bookmark) + }) + }).into_tree().unwrap(); + remote_tree.insert(ParentGuidFrom::default().item(&"toolbar_____".into()), Item { + guid: "bookmarkEEEE".into(), + kind: Kind::Bookmark, + age: 0, + needs_merge: true, + }.into()).expect("Should insert orphan E"); + remote_tree.insert(ParentGuidFrom::default().item(&"nonexistent".into()), Item { + guid: "bookmarkFFFF".into(), + kind: Kind::Bookmark, + age: 0, + needs_merge: true, + }.into()).expect("Should insert orphan F"); + + let mut merger = Merger::new(&local_tree, &remote_tree); + let merged_root = merger.merge().unwrap(); + assert!(merger.subsumes(&local_tree)); + assert!(merger.subsumes(&remote_tree)); + + let expected_tree = nodes!({ + ("toolbar_____", Folder[needs_merge = true], { + ("bookmarkBBBB", Bookmark), + ("bookmarkAAAA", Bookmark), + ("bookmarkEEEE", Bookmark[needs_merge = true]) + }), + ("unfiled_____", Folder[needs_merge = true], { + ("bookmarkDDDD", Bookmark), + ("bookmarkCCCC", Bookmark), + ("bookmarkFFFF", Bookmark[needs_merge = true]) + }) + }).into_tree().unwrap(); + let expected_telem = StructureCounts::default(); + + let merged_tree = merged_root.into_tree().unwrap(); + assert_eq!(merged_tree, expected_tree); + + assert_eq!(merger.deletions().count(), 0); + + assert_eq!(merger.telemetry(), &expected_telem); +} diff --git a/src/tree.rs b/src/tree.rs index aad0369..028ace7 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -12,53 +12,216 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::{collections::{HashMap, HashSet}, +use std::{cmp::Ordering, + collections::{HashMap, HashSet}, fmt, - ops::Deref}; + ops::Deref, + ptr, + slice}; use error::{ErrorKind, Result}; -use guid::{Guid, ROOT_GUID, USER_CONTENT_ROOTS}; +use guid::{Guid, ROOT_GUID, UNFILED_GUID, USER_CONTENT_ROOTS}; + +/// The type for entry indices in the tree. +type Index = usize; + +/// Describes where a new item's parent GUID comes from. A valid tree +/// should have matching GUIDs from both an item's parent's `children` and +/// an item's `parentid`. +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +pub struct ParentGuidFrom(Option, Option); + +impl Default for ParentGuidFrom { + fn default() -> Self { + ParentGuidFrom(None, None) + } +} + +impl ParentGuidFrom { + /// Notes that the parent `guid` comes from an item's parent's `children`. + pub fn children(self, guid: &Guid) -> ParentGuidFrom { + ParentGuidFrom(Some(guid.clone()), self.1) + } + + /// Notes that the parent `guid` comes from an item's `parentid`. + pub fn item(self, guid: &Guid) -> ParentGuidFrom { + ParentGuidFrom(self.0, Some(guid.clone())) + } +} + +#[derive(Debug, Eq, PartialEq)] +pub enum Child { + Missing(Guid), + Exists(Item), +} + +impl Child { + fn guid(&self) -> &Guid { + match self { + Child::Missing(guid) => guid, + Child::Exists(item) => &item.guid, + } + } +} + +impl From for Child { + fn from(item: Item) -> Child { + Child::Exists(item) + } +} /// A complete, rooted bookmark tree with tombstones. /// -/// The tree stores bookmark nodes in a vector, and uses indices in the vector +/// The tree stores bookmark items in a vector, and uses indices in the vector /// to identify parents and children. This makes traversal and lookup very -/// efficient. Retrieving a node's parent takes one indexing operation, -/// retrieving children takes one indexing operation per child, and retrieving a -/// node by random GUID takes one hash map lookup and one indexing operation. +/// efficient for well-formed trees. Retrieving a node's parent takes one +/// indexing operation, retrieving children takes one indexing operation per +/// child, and retrieving a node by random GUID takes one hash map lookup and +/// one indexing operation. +/// +/// We need to do more work to fix up trees with structure inconsistencies. +/// However, the cost should amortize well across merges. +/// +/// # Tree structure +/// +/// In a well-formed tree: +/// +/// - Each item exists in exactly one folder. Two different folder's +/// `children` should never reference the same item. +/// - Each folder contains existing children. A folder's `children` should +/// never reference tombstones, or items that don't exist in the tree at all. +/// - Each item has a `parentid` that agrees with its parent's `children`. In +/// other words, if item B's `parentid` is A, then A's `children` should +/// contain B. +/// +/// Because of Reasons, things are (a lot) messier in practice. +/// +/// # Structure inconsistencies +/// +/// Sync stores structure in two places: a `parentid` property on each item, +/// which points to its parent's GUID, and a list of ordered `children` on the +/// item's parent. They're duplicated because, historically, Sync clients didn't +/// stage incoming records. Instead, they applied records one at a time, +/// directly to the live local tree. This meant that, if a client saw a child +/// before its parent, it would first use the `parentid` to decide where to keep +/// the child, then fix up parents and positions using the parent's `children`. +/// +/// This is also why moving an item into a different folder uploads records for +/// the item, old folder, and new folder. The item has a new `parentid`, and the +/// folders have new `children`. Similarly, deleting an item uploads a tombstone +/// for the item, and a record for the item's old parent. +/// +/// Unfortunately, bugs (bug 1258127) and missing features (bug 1253051) in +/// older clients sometimes caused them to upload invalid or incomplete changes. +/// For example, a client might have: +/// +/// - Uploaded a moved child, but not its parents. This means the child now +/// appears in multiple parents. In the most extreme case, an item might be +/// referenced in two different sets of `children`, _and_ have a third, +/// completely unrelated `parentid`. +/// - Deleted a child, and tracked the deletion, but didn't flag the parent for +/// reupload. The parent folder now has a tombstone child. +/// - Tracked and uploaded items that shouldn't exist on the server at all, +/// like the left pane or reading list roots (bug 1309255). +/// - Missed new folders created during a sync, creating holes in the tree. +/// +/// Newer clients shouldn't do this, but we might still have inconsistent +/// records on the server that will confuse older clients. Additionally, Firefox +/// for iOS includes a much stricter bookmarks engine that refuses to sync if +/// it detects inconsistencies. +/// +/// # Divergences +/// +/// To work around this, our tree lets the structure _diverge_. This allows: +/// +/// - Items with multiple parents. +/// - Items with missing `parentid`s. +/// - Folders with `children` whose `parentid`s don't match the folder. +/// - Items whose `parentid`s don't mention the item in their `children`. +/// - Items with `parentid`s that point to nonexistent or deleted folders. +/// - Folders with nonexistent `children`. +/// - Non-syncable items, like custom roots. +/// - Any combination of these. +/// +/// Each item in the tree has an `EntryParentFrom` tag that indicates where +/// its structure comes from. Structure from `children` is validated and +/// resolved at `insert` time, so trying to add an item to a parent that +/// doesn't exist or isn't a folder returns a `MissingParent` or +/// `InvalidParent` error. +/// +/// Structure from `parentid`, if it diverges from `children`, is resolved at +/// merge time, when the merger walks the complete tree. You can think of this +/// distinction as similar to early vs. late binding. The `parentid`, if +/// different from the parent's `children`, might not exist in the tree at +/// `insert` time, either because the parent hasn't been added yet, or because +/// it doesn't exist in the tree at all. +/// +/// # Resolving divergences +/// +/// Walking the tree, using `Tree::node_for_guid`, `Node::parent`, and +/// `Node::children`, resolves divergences using these rules: +/// +/// 1. Items that appear in multiple `children`, and items with mismatched +/// `parentid`s, use the chronologically newer parent, based on the parent's +/// last modified time. We always prefer structure from `children` over +/// `parentid,` because `children` also gives us the item's position. +/// 2. Items that aren't mentioned in any parent's `children`, but have a +/// `parentid` that references an existing folder in the tree, are reparented +/// to the end of that folder, after the folder's `children`. +/// 3. Items that reference a nonexistent or non-folder `parentid`, or don't +/// have a `parentid` at all, are reparented to the default folder, after any +/// `children` and items from rule 2. +/// 4. If the default folder isn't set, or doesn't exist, items from rule 3 are +/// reparented to the root instead. +/// +/// The result is a well-formed tree structure that can be merged. The merger +/// detects if the structure diverged, and flags affected items for reupload. #[derive(Debug)] pub struct Tree { entries: Vec, - index_by_guid: HashMap, + entry_index_by_guid: HashMap, deleted_guids: HashSet, + orphan_indices_by_parent_guid: HashMap>, + reparent_orphans_to: Option, } impl Default for Tree { fn default() -> Self { - Tree::new(Item::new(ROOT_GUID.clone(), Kind::Folder)) + Tree::with_reparenting(Item::new(ROOT_GUID.clone(), Kind::Folder), &UNFILED_GUID) } } impl Tree { /// Constructs a new rooted tree. pub fn new(root: Item) -> Tree { - let mut index_by_guid = HashMap::new(); - index_by_guid.insert(root.guid.clone(), 0); + let mut entry_index_by_guid = HashMap::new(); + entry_index_by_guid.insert(root.guid.clone(), 0); - let entries = vec![Entry { parent_index: None, - item: root, - level: 0, - is_syncable: false, - child_indices: Vec::new(), }]; + Tree { + entries: vec![Entry::root(root)], + entry_index_by_guid, + deleted_guids: HashSet::new(), + orphan_indices_by_parent_guid: HashMap::new(), + reparent_orphans_to: None, + } + } - Tree { entries, - index_by_guid, - deleted_guids: HashSet::new(), } + pub fn with_reparenting(root: Item, reparent_orphans_to: &Guid) -> Tree { + let mut entry_index_by_guid = HashMap::new(); + entry_index_by_guid.insert(root.guid.clone(), 0); + + Tree { + entries: vec![Entry::root(root)], + entry_index_by_guid, + deleted_guids: HashSet::new(), + orphan_indices_by_parent_guid: HashMap::new(), + reparent_orphans_to: Some(reparent_orphans_to.clone()), + } } #[inline] pub fn root(&self) -> Node { - self.node(0) + Node(self, &self.entries[0]) } pub fn deletions<'t>(&'t self) -> impl Iterator + 't { @@ -74,61 +237,279 @@ impl Tree { } pub fn guids<'t>(&'t self) -> impl Iterator + 't { - self.index_by_guid - .keys() + assert_eq!(self.entries.len(), self.entry_index_by_guid.len()); + self.entries.iter() + .map(|entry| &entry.item.guid) .chain(self.deleted_guids.iter()) } + /// Returns the node for a given `guid`, or `None` if a node with the `guid` + /// doesn't exist in the tree, or was deleted. pub fn node_for_guid(&self, guid: &Guid) -> Option { - self.index_by_guid - .get(guid) - .map(|index| self.node(*index)) + assert_eq!(self.entries.len(), self.entry_index_by_guid.len()); + self.entry_index_by_guid.get(guid).map(|&index| { + Node(self, &self.entries[index]) + }) } - pub fn insert(&mut self, parent_guid: &Guid, item: Item) -> Result<()> { - if self.index_by_guid.contains_key(&item.guid) { - return Err(ErrorKind::DuplicateItem(item.guid.clone()).into()); - } - let child_index = self.entries.len(); - let (parent_index, level, is_syncable) = match self.index_by_guid.get(&parent_guid) { - Some(parent_index) => { - let parent = &mut self.entries[*parent_index]; - if !parent.item.is_folder() { - return Err(ErrorKind::InvalidParent(item.guid.clone(), - parent.item.guid.clone()).into()); + /// Adds a child to the tree, allowing for orphans and multiple parents. + pub fn insert(&mut self, parent_guid: ParentGuidFrom, child: Child) -> Result<()> { + assert_eq!(self.entries.len(), self.entry_index_by_guid.len()); + match self.entry_index(&child)? { + // An entry for the item already exists in the tree, so we need to + // mark the item, its existing parents, and new parents as + // divergent. + EntryIndex::Existing(entry_index) => { + let (_, new_parents) = self.structure_for_insert(parent_guid, &child)?; + + // Add the item to the new parents. We do this before diverging, + // since we don't want to re-add the item to its existing + // parents. + for parent_from in new_parents.iter() { + match parent_from { + EntryParentFrom::Children(parent_index) => { + let parent_entry = &mut self.entries[*parent_index]; + parent_entry.child_indices.push(entry_index); + }, + EntryParentFrom::Item(parent_guid) => { + let child_indices = self.orphan_indices_by_parent_guid + .entry(parent_guid.clone()) + .or_default(); + child_indices.push(entry_index); + }, + } + } + + // Diverge the item's existing parents to add the new parents. + // It's safe to use `expect` here, since `entry_index` returns + // errors for duplicate roots, and `structure_for_insert` never + // returns roots. + let divergent_parents = self.entries[entry_index].parents + .clone() + .diverge(new_parents) + .expect("Can't diverge tree root"); + + // Mark all parents as divergent. + for parent_from in divergent_parents.iter() { + let parent_index = match parent_from { + EntryParentFrom::Children(parent_index) => *parent_index, + EntryParentFrom::Item(parent_guid) => { + match self.entry_index_by_guid.get(parent_guid) { + Some(&parent_index) => parent_index, + None => continue, + } + }, + }; + let parent_entry = &mut self.entries[parent_index]; + parent_entry.divergence = Divergence::Diverged; + } + + // Update the existing item with new data and divergent parents. + let mut entry = &mut self.entries[entry_index]; + if let Child::Exists(item) = child { + // Don't replace existing items with placeholders for + // missing children. + entry.item = item; + } + entry.divergence = Divergence::Diverged; + entry.parents = divergent_parents; + }, + + // The item doesn't exist in the tree yet, so just add it. This is + // the happy path for valid trees: a `New` entry index for a child + // that `Exists` with `One` parent from `Children`. + EntryIndex::New(entry_index) => { + let (divergence, parents) = self.structure_for_insert(parent_guid, &child)?; + match child { + Child::Exists(item) => { + // The child exists, so add it to its parents. + for parent_from in parents.iter() { + match parent_from { + EntryParentFrom::Children(parent_index) => { + let parent_entry = &mut self.entries[*parent_index]; + if let Divergence::Diverged = divergence { + // If the new item diverged, mark its parents as divergent, + // too. + parent_entry.divergence = Divergence::Diverged; + } + parent_entry.child_indices.push(entry_index); + }, + EntryParentFrom::Item(parent_guid) => { + // If the new item has a divergent `parentid`, and + // we have an entry for the `parentid`, mark the + // parent as divergent now. + let parent_index = self.entry_index_by_guid.get(parent_guid); + if let Some(&parent_index) = parent_index { + let parent_entry = &mut self.entries[parent_index]; + parent_entry.divergence = Divergence::Diverged; + } + // ...And add the item to the list of orphans for + // its `parentid`. + let child_indices = self.orphan_indices_by_parent_guid + .entry(parent_guid.clone()) + .or_default(); + child_indices.push(entry_index); + }, + } + } + + self.entry_index_by_guid.insert(item.guid.clone(), entry_index); + self.entries.insert(entry_index, Entry { + item, + divergence, + parents, + child_indices: Vec::new(), + }); + }, + + // The parent's `children` is referencing an item that + // doesn't exist in the tree, so flag the parent as + // divergent. + Child::Missing(_) => { + for parent_from in parents.iter() { + let parent_index = match parent_from { + EntryParentFrom::Children(parent_index) => *parent_index, + EntryParentFrom::Item(parent_guid) => { + match self.entry_index_by_guid.get(parent_guid) { + Some(&parent_index) => parent_index, + None => continue, + } + }, + }; + let parent_entry = &mut self.entries[parent_index]; + parent_entry.divergence = Divergence::Diverged; + } + }, } - parent.child_indices.push(child_index); - - // Syncable items descend from the four user content roots. Any - // other roots and their descendants, like the left pane root, - // left pane queries, and custom roots, are non-syncable. - // - // Newer Desktops should never reupload non-syncable items - // (bug 1274496), and should have removed them in Places - // migrations (bug 1310295). However, these items may be - // orphaned in the unfiled root, in which case they're seen as - // syncable locally. If the remote tree has the missing parents - // and roots, we'll determine that the items are non-syncable - // when merging, remove them locally, and mark them for deletion - // remotely. - let is_syncable = USER_CONTENT_ROOTS.contains(&item.guid) || parent.is_syncable; - - (*parent_index, parent.level + 1, is_syncable) }, - None => return Err(ErrorKind::MissingParent(item.guid.clone(), - parent_guid.clone()).into()), }; - self.index_by_guid.insert(item.guid.clone(), child_index); - self.entries.push(Entry { parent_index: Some(parent_index), - item, - level, - is_syncable, - child_indices: Vec::new(), }); Ok(()) } - fn node(&self, index: usize) -> Node { - Node(self, &self.entries[index]) + /// Returns the index of an entry for an item in the tree, or the index at + /// which the item should be inserted if it doesn't exist in the tree. + fn entry_index(&self, child: &Child) -> Result { + match self.entry_index_by_guid.get(child.guid()) { + Some(&entry_index) => { + let entry = &self.entries[entry_index]; + if let EntryParents::Root = &entry.parents { + // Don't allow duplicate roots. `Tree::insert` panics if + // roots diverge. + return Err(ErrorKind::DuplicateItem(child.guid().clone()).into()); + } + Ok(EntryIndex::Existing(entry_index)) + }, + None => Ok(EntryIndex::New(self.entries.len())), + } + } + + /// Determines the structure for a new or duplicate item. + fn structure_for_insert(&self, parent_guid: ParentGuidFrom, child: &Child) + -> Result<(Divergence, EntryParents)> { + Ok(match parent_guid { + // The item is mentioned in at least one folder's `children`, + // and has a `parentid`. + ParentGuidFrom(Some(from_children), Some(from_item)) => { + // For parents from `children`, we expect the parent folder to + // exist in the tree before adding its children. Unlike + // `parentid`s, `children` can form a complete tree with item + // parents and positions. + let parent_index = match self.entry_index_by_guid.get(&from_children) { + Some(parent_index) => *parent_index, + None => return Err(ErrorKind::MissingParent(child.guid().clone(), + from_children.clone()).into()), + }; + let parent_entry = &self.entries[parent_index]; + if !parent_entry.item.is_folder() { + return Err(ErrorKind::InvalidParent(child.guid().clone(), + parent_entry.item.guid.clone()).into()); + } + if from_item == from_children { + let divergence = if self.orphan_indices_by_parent_guid.contains_key(child.guid()) { + // We're adding a folder with orphaned children, where + // one or more items reference this folder as their + // `parentid`, but aren't mentioned in this folder's + // `children` (rule 2). + Divergence::Diverged + } else { + // The item's `parentid` matches its parent's + // `children`, and has no orphaned children. + // Great! This is the happy path for valid trees. + Divergence::Consistent + }; + (divergence, EntryParents::One(EntryParentFrom::Children(parent_index))) + } else { + // Otherwise, the item has a parent-child disagreement. + // Store both parents and mark it as diverged. + (Divergence::Diverged, EntryParents::Many(vec![ + EntryParentFrom::Children(parent_index), + EntryParentFrom::Item(from_item), + ])) + } + }, + + // The item is mentioned in a folder's `children`, but doesn't have + // a `parentid`. Mark it as diverged for the merger to reupload. + ParentGuidFrom(Some(from_children), None) => { + let parent_index = match self.entry_index_by_guid.get(&from_children) { + Some(parent_index) => *parent_index, + None => return Err(ErrorKind::MissingParent(child.guid().clone(), + from_children.clone()).into()), + }; + let parent_entry = &self.entries[parent_index]; + if !parent_entry.item.is_folder() { + return Err(ErrorKind::InvalidParent(child.guid().clone(), + parent_entry.item.guid.clone()).into()); + } + (Divergence::Diverged, + EntryParents::One(EntryParentFrom::Children(parent_index))) + }, + + // The item isn't mentioned in a folder's `children`, but has a + // `parentid` (rule 2). + ParentGuidFrom(None, Some(from_item)) => { + (Divergence::Diverged, EntryParents::One(EntryParentFrom::Item(from_item))) + }, + + // The item isn't mentioned in a folder's `children`, and doesn't + // have a `parentid`. Fall back to the default folder for orphans + // (rule 3), or the root (rule 4). + ParentGuidFrom(None, None) => { + let parent_from = match &self.reparent_orphans_to { + Some(parent_guid) => EntryParentFrom::Item(parent_guid.clone()), + None => EntryParentFrom::Children(0), + }; + (Divergence::Diverged, EntryParents::One(parent_from)) + }, + }) + } + + /// Returns the index of the default parent entry for reparented orphans. + /// This is either the default folder (rule 3), or the root, if the + /// default folder isn't set, doesn't exist, or isn't a folder (rule 4). + fn reparent_orphans_to_default_index(&self) -> Index { + self.reparent_orphans_to + .as_ref() + .and_then(|guid| self.entry_index_by_guid.get(guid)) + .cloned() + .filter(|&parent_index| { + let parent_entry = &self.entries[parent_index]; + parent_entry.item.is_folder() + }) + .unwrap_or(0) + } + + /// Returns the index of the entry for the `parent_guid`, to use for + /// reparenting orphans (rule 2), or the index of the default parent entry + /// if the `parent_guid` doesn't exist or isn't a folder. + fn reparent_orphans_to_index(&self, parent_guid: &Guid) -> Index { + self.entry_index_by_guid.get(parent_guid) + .cloned() + .filter(|&parent_index| { + let parent_entry = &self.entries[parent_index]; + parent_entry.item.is_folder() + }) + .unwrap_or_else(|| self.reparent_orphans_to_default_index()) } } @@ -154,57 +535,24 @@ impl fmt::Display for Tree { #[cfg(test)] impl PartialEq for Tree { fn eq(&self, other: &Tree) -> bool { - if self.index_by_guid.len() != other.index_by_guid.len() { - return false; - } - for (guid, index) in &self.index_by_guid { - if let Some(other_index) = other.index_by_guid.get(guid) { - let entry = &self.entries[*index]; - let other_entry = &other.entries[*other_index]; - if entry.item != other_entry.item { - return false; - } - match (entry.parent_index, other_entry.parent_index) { - (Some(parent_index), Some(other_parent_index)) => { - let parent_guid = &self.entries[parent_index].item.guid; - let other_parent_guid = &other.entries[other_parent_index].item.guid; - if parent_guid != other_parent_guid { - return false; - } - }, - (None, None) => {}, - _ => return false, - }; - let child_guids = entry.child_indices - .iter() - .map(|index| &self.entries[*index].item.guid); - let other_child_guids = - other_entry.child_indices - .iter() - .map(|other_index| &other.entries[*other_index].item.guid); - if child_guids.ne(other_child_guids) { - return false; - } - } else { - return false; - } - } - for other_guid in other.index_by_guid.keys() { - if !self.index_by_guid.contains_key(other_guid) { - return false; - } - } - true + &self.root() == &other.root() } } -/// An entry wraps a tree item with references to its parent and children, which -/// index into the tree's `entries` vector. This indirection exists because -/// Rust is more strict about ownership of parents and children. +/// The index of an existing entry in the tree, or the index at which a new +/// entry should be inserted. +enum EntryIndex { + New(Index), + Existing(Index), +} + +/// An entry wraps a tree item with references to its parents and children, +/// which index into the tree's `entries` vector. This indirection exists +/// because Rust is more strict about ownership of parents and children. /// /// For example, we can't have entries own their children without sacrificing -/// fast random lookup, because we'd need to store references to the entries -/// in the lookup map, but a struct can't hold references into itself. +/// fast random lookup: we'd need to store references to the entries in the +/// lookup map, but a struct can't hold references into itself. /// /// Similarly, we can't have entries hold `Weak` pointers to `Rc` entries for /// the parent and children, because we need to update the parent when we insert @@ -215,13 +563,108 @@ impl PartialEq for Tree { /// `HashMap`, but that's inefficient: we'd need to store N /// copies of the GUID for parent and child lookups, and retrieving children /// would take one hash map lookup *per child*. +/// +/// Note that we always compare references to entries, instead of deriving +/// `PartialEq`, because two entries with the same fields but in different +/// trees should never compare equal. #[derive(Debug)] struct Entry { - parent_index: Option, item: Item, - level: i64, - is_syncable: bool, - child_indices: Vec, + divergence: Divergence, + parents: EntryParents, + child_indices: Vec, +} + +impl Entry { + fn root(root: Item) -> Entry { + Entry { + item: root, + divergence: Divergence::Consistent, + parents: EntryParents::Root, + child_indices: Vec::new(), + } + } +} + +/// Stores potential parents for an entry in the tree. +#[derive(Clone, Debug)] +enum EntryParents { + /// The entry is a top-level root, from which all other entries descend. + /// A tree can only have one root. + Root, + + /// The entry has exactly one parent. This is the case for records with + /// `parentid`s that match their parents' `children`, and orphans with + /// a `parentid` that aren't mentioned in any parents' `children`. + One(EntryParentFrom), + + /// The entry has multiple parents. This is the case where an item appears + /// in multiple parents, or where the item's `parentid` doesn't match its + /// parent's `children`. + Many(Vec), +} + +impl EntryParents { + /// Returns an iterator over the parents. + fn iter<'p>(&'p self) -> impl Iterator + 'p { + match self { + EntryParents::Root => &[], + EntryParents::One(parent_from) => slice::from_ref(parent_from), + EntryParents::Many(parents_from) => parents_from, + }.iter() + } + + /// Merges a set of existing parents with a set of new parents. + fn diverge(self, parents: EntryParents) -> Option { + match (self, parents) { + (EntryParents::Root, EntryParents::Root) => Some(EntryParents::Root), + (EntryParents::One(old_parent), EntryParents::One(new_parent)) => { + Some(EntryParents::Many(vec![old_parent, new_parent])) + }, + (EntryParents::One(old_parent), EntryParents::Many(mut new_parents)) => { + new_parents.push(old_parent); + Some(EntryParents::Many(new_parents)) + }, + (EntryParents::Many(mut old_parents), EntryParents::One(new_parent)) => { + old_parents.push(new_parent); + Some(EntryParents::Many(old_parents)) + }, + (EntryParents::Many(old_parents), EntryParents::Many(mut new_parents)) => { + new_parents.extend(old_parents); + Some(EntryParents::Many(new_parents)) + }, + _ => None, + } + } +} + + +/// Describes where an entry's parent comes from. +/// +/// `EntryParentFrom` notes inconsistencies like orphans, multiple parents, +/// and parent-child disagreements, which the `Merger` uses to resolve invalid +/// structure and produce a consistent tree. +#[derive(Clone, Debug)] +enum EntryParentFrom { + /// The entry's parent references the entry in its `children`, meaning we + /// know the index of the parent. + Children(Index), + + /// The entry's parent comes from its `parentid`. We can only store the + /// GUID and not the index because the tree might not have an entry for the + /// `parentid` yet. + Item(Guid), +} + +#[derive(Debug)] +enum Divergence { + /// The node's structure is already correct, and doesn't need to be + /// reuploaded. + Consistent, + + /// The node exists in multiple parents, or is a reparented orphan. + /// The merger should reupload the node. + Diverged, } /// A convenience wrapper around `Entry` that dereferences to the containing @@ -230,28 +673,142 @@ struct Entry { pub struct Node<'t>(&'t Tree, &'t Entry); impl<'t> Node<'t> { + /// Returns an iterator for all resolved children of this node, including + /// reparented orphans. pub fn children<'n>(&'n self) -> impl Iterator> + 'n { - self.1 - .child_indices + let orphans = self.tree().orphan_indices_by_parent_guid.get(&self.entry().item.guid) .iter() - .map(move |index| self.0.node(*index)) + .flat_map(|orphan_indices| orphan_indices.iter()) + .collect::>(); + + let default_orphans = if self.is_default_parent_for_orphans() { + self.tree().orphan_indices_by_parent_guid.iter() + .filter(|(guid, _)| !self.tree().entry_index_by_guid.contains_key(guid)) + .flat_map(|(_, child_indices)| child_indices) + .collect() + } else { + Vec::new() + }; + + self.entry().child_indices.iter() + .chain(orphans.into_iter()) + .chain(default_orphans.into_iter()) + .filter_map(move |&child_index| { + // Filter out children that resolve to other parents. + let child_node = Node(self.tree(), &self.tree().entries[child_index]); + child_node.parent() + .filter(|parent_node| ptr::eq(parent_node.entry(), self.entry())) + .map(|_| child_node) + }) } - pub fn parent(&self) -> Option> { - self.1 - .parent_index - .as_ref() - .map(|index| self.0.node(*index)) + /// Returns the resolved parent of this node. + pub fn parent(&self) -> Option { + match &self.entry().parents { + EntryParents::Root => None, + EntryParents::One(parent_from) => { + let parent_index = match parent_from { + EntryParentFrom::Children(parent_index) => *parent_index, + EntryParentFrom::Item(guid) => self.tree().reparent_orphans_to_index(guid), + }; + let parent_entry = &self.tree().entries[parent_index]; + Some(Node(self.tree(), parent_entry)) + }, + EntryParents::Many(parents_from) => { + parents_from.iter() + .min_by(|a, b| { + // For multiple parents, pick the newest (minimum age), + // preferring parents from `children` over `parentid` + // (rule 1). + let (parent_index, other_parent_index) = match (a, b) { + (EntryParentFrom::Children(_), EntryParentFrom::Item(_)) => { + return Ordering::Less; + }, + (EntryParentFrom::Item(_), EntryParentFrom::Children(_)) => { + return Ordering::Greater; + }, + (EntryParentFrom::Children(parent_index), + EntryParentFrom::Children(other_parent_index)) => { + + (*parent_index, *other_parent_index) + }, + (EntryParentFrom::Item(guid), EntryParentFrom::Item(other_guid)) => { + (self.tree().reparent_orphans_to_index(guid), + self.tree().reparent_orphans_to_index(other_guid)) + }, + }; + let entry = &self.tree().entries[parent_index]; + let other_entry = &self.tree().entries[other_parent_index]; + entry.item.age.cmp(&other_entry.item.age) + }).map(|parent_from| { + let parent_index = match parent_from { + EntryParentFrom::Children(parent_index) => *parent_index, + EntryParentFrom::Item(guid) => { + self.tree().reparent_orphans_to_index(guid) + }, + }; + let parent_entry = &self.tree().entries[parent_index]; + Node(self.tree(), parent_entry) + }) + }, + } } - #[inline] + /// Returns the resolved level of this node. pub fn level(&self) -> i64 { - self.1.level + if self.is_root() { + return 0; + } + self.parent() + .map(|parent| parent.level() + 1) + .unwrap_or(-1) } - #[inline] + /// Indicates if this node is for a syncable item. + /// + /// Syncable items descend from the four user content roots. Any + /// other roots and their descendants, like the left pane root, + /// left pane queries, and custom roots, are non-syncable. + /// + /// Newer Desktops should never reupload non-syncable items + /// (bug 1274496), and should have removed them in Places + /// migrations (bug 1310295). However, these items may be + /// reparented locally to the unfiled root, in which case they're seen as + /// syncable. If the remote tree has the missing parents + /// and roots, we'll determine that the items are non-syncable + /// when merging, remove them locally, and mark them for deletion + /// remotely. pub fn is_syncable(&self) -> bool { - self.1.is_syncable + if self.is_root() { + return false; + } + if self.is_user_content_root() { + return true; + } + self.parent() + .map(|parent| parent.is_syncable()) + .unwrap_or(false) + } + + pub fn diverged(&self) -> bool { + match &self.entry().divergence { + Divergence::Diverged => true, + Divergence::Consistent => { + if self.is_default_parent_for_orphans() { + // If this node is the default folder for reparented orphans, + // check if we have any remaining orphans that reference + // nonexistent or non-folder parents (rule 3). + let needs_reparenting = |guid| { + self.tree().entry_index_by_guid.get(guid) + .map_or(true, |&index| !self.tree().entries[index].item.is_folder()) + }; + if self.tree().orphan_indices_by_parent_guid.keys().any(needs_reparenting) { + return true; + } + } + false + }, + } } pub fn to_ascii_string(&self) -> String { @@ -261,34 +818,73 @@ impl<'t> Node<'t> { fn to_ascii_fragment(&self, prefix: &str) -> String { match self.1.item.kind { Kind::Folder => { - match self.1.child_indices.len() { - 0 => format!("{}📂 {}", prefix, &self.1.item), - _ => { - let children_prefix = format!("{}| ", prefix); - let children = self.children() - .map(|n| n.to_ascii_fragment(&children_prefix)) - .collect::>() - .join("\n"); - format!("{}📂 {}\n{}", prefix, &self.1.item, children) - }, + let children_prefix = format!("{}| ", prefix); + let children = self.children() + .map(|n| n.to_ascii_fragment(&children_prefix)) + .collect::>(); + if children.is_empty() { + format!("{}📂 {}", prefix, &self.entry().item) + } else { + format!("{}📂 {}\n{}", prefix, &self.entry().item, children.join("\n")) } }, - _ => format!("{}🔖 {}", prefix, &self.1.item), + _ => format!("{}🔖 {}", prefix, &self.entry().item), } } + + /// Indicates if this node is the root node. + pub fn is_root(&self) -> bool { + ptr::eq(self.entry(), &self.tree().entries[0]) + } + + /// Indicates if this node is a user content root. + pub fn is_user_content_root(&self) -> bool { + USER_CONTENT_ROOTS.contains(&self.entry().item.guid) + } + + /// Indicates if this node is the default parent node for reparented + /// orphans. + pub fn is_default_parent_for_orphans(&self) -> bool { + ptr::eq(self.entry(), &self.tree().entries[self.tree().reparent_orphans_to_default_index()]) + } + + #[inline] + fn tree(&self) -> &'t Tree { &self.0 } + + #[inline] + fn entry(&self) -> &'t Entry { &self.1 } } impl<'t> Deref for Node<'t> { type Target = Item; fn deref(&self) -> &Item { - &self.1.item + &self.entry().item } } impl<'t> fmt::Display for Node<'t> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - self.1.item.fmt(f) + self.entry().item.fmt(f) + } +} + +#[cfg(test)] +impl<'t> PartialEq for Node<'t> { + fn eq(&self, other: &Node) -> bool { + match (self.parent(), other.parent()) { + (Some(parent), Some(other_parent)) => { + if parent.entry().item != other_parent.entry().item { + return false; + } + }, + (Some(_), None) | (None, Some(_)) => return false, + (None, None) => {} + } + if self.entry().item != other.entry().item { + return false; + } + self.children().eq(other.children()) } } @@ -368,33 +964,24 @@ impl<'t> MergedNode<'t> { merged_children: Vec::new(), } } - /// Indicates whether to prefer the remote side when applying the merged tree. - pub fn use_remote(&self) -> bool { + pub fn remote_guid_changed(&self) -> bool { match self.merge_state { - MergeState::Remote { .. } | MergeState::RemoteWithNewStructure { .. } => true, - _ => false, - } - } - - /// Indicates whether the merged item should be (re)uploaded to the server. - pub fn needs_upload(&self) -> bool { - match self.merge_state { - MergeState::Local(_) - | MergeState::LocalWithNewStructure(_) - | MergeState::RemoteWithNewStructure(_) => true, + MergeState::Remote { remote_node, .. } + | MergeState::RemoteWithNewStructure { remote_node, .. } + | MergeState::Unchanged { remote_node, .. } => remote_node.guid != self.guid, _ => false, } } #[cfg(test)] pub fn into_tree(self) -> Result { - fn to_item<'t>(merged_node: &MergedNode<'t>) -> Item { + fn to_item(merged_node: &MergedNode) -> Item { let node = merged_node.node(); let mut item = Item::new(merged_node.guid.clone(), node.kind); item.age = node.age; item.needs_merge = match merged_node.merge_state { - MergeState::Local(_) - | MergeState::Remote(_) + MergeState::Local { .. } + | MergeState::Remote { .. } | MergeState::Unchanged { .. } => false, _ => true, }; @@ -407,7 +994,10 @@ impl<'t> MergedNode<'t> { -> Result<()> { let guid = merged_node.guid.clone(); - tree.insert(&parent_guid, to_item(&merged_node))?; + tree.insert(ParentGuidFrom::default() + .children(parent_guid) + .item(parent_guid), + to_item(&merged_node).into())?; for merged_child_node in merged_node.merged_children { inflate(tree, &guid, merged_child_node)?; } @@ -428,17 +1018,17 @@ impl<'t> MergedNode<'t> { fn to_ascii_fragment(&self, prefix: &str) -> String { match self.node().kind { - Kind::Folder => match self.merged_children.len() { - 0 => format!("{}📂 {}", prefix, &self), - _ => { - let children_prefix = format!("{}| ", prefix); - let children = self.merged_children - .iter() - .map(|n| n.to_ascii_fragment(&children_prefix)) - .collect::>() - .join("\n"); - format!("{}📂 {}\n{}", prefix, &self, children) - }, + Kind::Folder => { + let children_prefix = format!("{}| ", prefix); + let children = self.merged_children + .iter() + .map(|n| n.to_ascii_fragment(&children_prefix)) + .collect::>(); + if children.is_empty() { + format!("{}📂 {}", prefix, &self) + } else { + format!("{}📂 {}\n{}", prefix, &self, children.join("\n")) + } }, _ => format!("{}🔖 {}", prefix, &self), } @@ -446,10 +1036,10 @@ impl<'t> MergedNode<'t> { fn node(&self) -> Node { match self.merge_state { - MergeState::Local(node) => node, - MergeState::Remote(node) => node, - MergeState::LocalWithNewStructure(node) => node, - MergeState::RemoteWithNewStructure(node) => node, + MergeState::Local { local_node, .. } => local_node, + MergeState::Remote { remote_node, .. } => remote_node, + MergeState::LocalWithNewStructure { local_node, .. } => local_node, + MergeState::RemoteWithNewStructure { remote_node, .. } => remote_node, MergeState::Unchanged { local_node, .. } => local_node, } } @@ -469,23 +1059,23 @@ pub enum MergeState<'t> { /// structure state. This could mean that the item doesn't exist on the /// server yet, or that it has newer local changes that we should /// upload. - Local(Node<'t>), + Local { local_node: Node<'t>, remote_node: Option> }, /// A remote merge state means we should update the local value and /// structure state. The item might not exist locally yet, or might have /// newer remote changes that we should apply. - Remote(Node<'t>), + Remote { local_node: Option>, remote_node: Node<'t> }, /// A local merge state with new structure means we should prefer the local /// value state, and upload the new structure state to the server. We use /// new structure states to resolve conflicts caused by moving local items /// out of a remotely deleted folder, or remote items out of a locally /// deleted folder. - LocalWithNewStructure(Node<'t>), + LocalWithNewStructure { local_node: Node<'t>, remote_node: Option> }, /// A remote merge state with new structure means we should prefer the /// remote value and reupload the new structure. - RemoteWithNewStructure(Node<'t>), + RemoteWithNewStructure { local_node: Option>, remote_node: Node<'t> }, /// An unchanged merge state means we don't need to do anything to the item. Unchanged { local_node: Node<'t>, remote_node: Node<'t> }, @@ -494,16 +1084,16 @@ pub enum MergeState<'t> { impl<'t> MergeState<'t> { pub fn with_new_structure(&self) -> MergeState<'t> { match *self { - MergeState::Local(node) => { - MergeState::LocalWithNewStructure(node) + MergeState::Local { local_node, remote_node } => { + MergeState::LocalWithNewStructure { local_node, remote_node } }, - MergeState::Remote(node) => { - MergeState::RemoteWithNewStructure(node) + MergeState::Remote { local_node, remote_node } => { + MergeState::RemoteWithNewStructure { local_node, remote_node } }, - MergeState::Unchanged { local_node, .. } => { + MergeState::Unchanged { local_node, remote_node } => { // Once the structure changes, it doesn't matter which side we // pick; we'll need to reupload the item to the server, anyway. - MergeState::LocalWithNewStructure(local_node) + MergeState::LocalWithNewStructure { local_node, remote_node: Some(remote_node) } }, state => state, } @@ -513,10 +1103,10 @@ impl<'t> MergeState<'t> { impl<'t> fmt::Display for MergeState<'t> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { f.write_str(match self { - MergeState::Local(_) => "(Local, Local)", - MergeState::Remote(_) => "(Remote, Remote)", - MergeState::LocalWithNewStructure(_) => "(Local, New)", - MergeState::RemoteWithNewStructure(_) => "(Remote, New)", + MergeState::Local { .. } => "(Local, Local)", + MergeState::Remote { .. } => "(Remote, Remote)", + MergeState::LocalWithNewStructure { .. } => "(Local, New)", + MergeState::RemoteWithNewStructure { .. } => "(Remote, New)", MergeState::Unchanged { .. } => "(Unchanged, Unchanged)" }) }