Skip to content

Commit

Permalink
import19: add option latest-rev-only
Browse files Browse the repository at this point in the history
  • Loading branch information
UlrichB22 committed Oct 27, 2024
1 parent 0ec608c commit fa1ffae
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 9 deletions.
14 changes: 12 additions & 2 deletions docs/admin/upgrade.rst
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,10 @@ and index as described in the install section above using commands::
moin create-instance
moin index-create

The import19 cli subcommand will read your 1.9 data_dir (pages, attachments and users),
convert the data, write it to your moin2 storage and build the index::
The import19 cli subcommand needs your 1.9 data directory with pages, attachments and users.
Usually you set up moin2 on a new current operating system and copy the old data directory to
a temporary location. The utility will read the moin1.9 data, convert it and write it to
your moin2 storage and build the index::

moin import19 --data_dir /<path to moin1.9>/wiki/data

Expand All @@ -110,6 +112,10 @@ convert the last revision of all pages with moin wiki markup to markdown::

-m markdown

With the `--latest-rev-only` option, you can omit the history of the pages and only import the
latest revision of each item into the new wiki. This is particularly useful for testing the
migration to moin2.

The import19 process will create a wiki directory structure different from moin 1.9.
There will be three namespaces under /wiki/data: "default", "userprofiles", and "users".
Each namespace will have "data" and "meta" subdirectories. Additional custom namespaces can
Expand All @@ -128,6 +134,10 @@ part of the import will be time-consuming. You can use the following options to
Choose the values according to your available hardware resources. The defaults are 1 process and 256 mb memory.
See the `Whoosh Tips for speeding up batch indexing docs <https://whoosh.readthedocs.io/en/latest/batch.html>`_ for details.

Use the following command to get an overview of all available options::

moin import19 --help

Testing
-------
Review the logs for error messages. Start the moin server and try the "Index" and "History"
Expand Down
47 changes: 40 additions & 7 deletions src/moin/cli/migration/moin19/import19.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,14 @@ def migr_statistics(unknown_macros):
default=NAMESPACE_DEFAULT,
help="target namespace, e.g. used for members of a wikifarm.",
)
@click.option(
"--latest-rev-only",
"-r",
is_flag=True,
required=False,
default=False,
help="Import only the latest revision of each item.",
)
@click.option("--procs", "-p", required=False, type=int, default=1, help="Number of processors the writer will use.")
@click.option(
"--limitmb",
Expand All @@ -152,7 +160,7 @@ def migr_statistics(unknown_macros):
default=256,
help="Maximum memory (in megabytes) each index-writer will use for the indexing pool.",
)
def ImportMoin19(data_dir=None, markup_out=None, namespace=None, procs=None, limitmb=None):
def ImportMoin19(data_dir=None, markup_out=None, namespace=None, procs=None, limitmb=None, latest_rev_only=False):
"""Import content and user data from a moin wiki with version 1.9"""

target_namespace = namespace
Expand All @@ -176,7 +184,11 @@ def ImportMoin19(data_dir=None, markup_out=None, namespace=None, procs=None, lim

logging.info("PHASE2: Converting Pages and Attachments ...")
for rev in PageBackend(
data_dir, deleted_mode=DELETED_MODE_KILL, default_markup="wiki", target_namespace=target_namespace
data_dir,
deleted_mode=DELETED_MODE_KILL,
default_markup="wiki",
target_namespace=target_namespace,
latest_rev_only=latest_rev_only,
):
for user_name in user_names:
if rev.meta[NAME][0] == user_name or rev.meta[NAME][0].startswith(user_name + "/"):
Expand Down Expand Up @@ -305,6 +317,7 @@ def __init__(
default_markup="wiki",
target_namespace="",
item_category_regex=r"(?P<all>Category(?P<key>(?!Template)\S+))",
latest_rev_only=False,
):
"""
:param path: storage path (data_dir)
Expand All @@ -319,13 +332,15 @@ def __init__(
:param default_markup: used if a page has no #format line, moin 1.9's default
'wiki' and we also use this default here.
:param target_namespace : target namespace
:param latest_rev_only: import only the latest revision of each item
"""
self._path = path
assert deleted_mode in (DELETED_MODE_KILL, DELETED_MODE_KEEP)
self.deleted_mode = deleted_mode
self.format_default = default_markup
self.target_namespace = target_namespace
self.item_category_regex = re.compile(item_category_regex, re.UNICODE)
self.latest_rev_only = latest_rev_only

def __iter__(self):
pages_dir = os.path.join(self._path, "pages")
Expand All @@ -334,7 +349,13 @@ def __iter__(self):
for f in pages:
itemname = unquoteWikiname(f)
try:
item = PageItem(self, os.path.join(pages_dir, f), itemname, self.target_namespace)
item = PageItem(
self,
os.path.join(pages_dir, f),
itemname,
self.target_namespace,
latest_rev_only=self.latest_rev_only,
)
except KillRequested:
pass # a message was already output
except (OSError, AttributeError):
Expand All @@ -355,11 +376,12 @@ class PageItem:
moin 1.9 page
"""

def __init__(self, backend, path, itemname, target_namespace):
def __init__(self, backend, path, itemname, target_namespace, latest_rev_only=False):
self.backend = backend
self.name = itemname
self.path = path
self.target_namespace = target_namespace
self.latest_rev_only = latest_rev_only
try:

logging.debug(f"Processing item {itemname}")
Expand Down Expand Up @@ -388,10 +410,18 @@ def iter_revisions(self):
except OSError:
fnames = []
parent_id = None
if self.latest_rev_only and f"{self.current:08d}" in fnames:
fnames = [f"{self.current:08d}"] # process only the current revision
for fname in fnames:
try:
revno = int(fname)
page_rev = PageRevision(self, revno, os.path.join(revisionspath, fname), self.target_namespace)
page_rev = PageRevision(
self,
revno,
os.path.join(revisionspath, fname),
self.target_namespace,
latest_rev_only=self.latest_rev_only,
)
if parent_id:
page_rev.meta[PARENTID] = parent_id
parent_id = page_rev.meta[REVID]
Expand Down Expand Up @@ -428,7 +458,7 @@ class PageRevision:
moin 1.9 page revision
"""

def __init__(self, item, revno, path, target_namespace):
def __init__(self, item, revno, path, target_namespace, latest_rev_only=False):
item_name = item.name
itemid = item.itemid
editlog = item.editlog
Expand Down Expand Up @@ -493,7 +523,10 @@ def __init__(self, item, revno, path, target_namespace):
meta[SIZE] = size
meta[ITEMID] = itemid
meta[REVID] = make_uuid()
meta[REV_NUMBER] = revno
if latest_rev_only:
meta[REV_NUMBER] = 1
else:
meta[REV_NUMBER] = revno
meta[NAMESPACE] = target_namespace
meta[ITEMTYPE] = ITEMTYPE_DEFAULT
if LANGUAGE not in meta:
Expand Down

0 comments on commit fa1ffae

Please sign in to comment.