Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ENH] File: Allow selecting files with arbitrary extensions #6894

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 71 additions & 25 deletions Orange/widgets/data/owfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
QStyle, QComboBox, QMessageBox, QGridLayout, QLabel, \
QLineEdit, QSizePolicy as Policy, QCompleter
from AnyQt.QtCore import Qt, QTimer, QSize, QUrl
from AnyQt.QtGui import QBrush

from orangewidget.utils.filedialogs import format_filter
from orangewidget.workflow.drophandler import SingleUrlDropHandler
Expand Down Expand Up @@ -38,7 +37,7 @@
# module's namespace so that old saved settings still work
from Orange.widgets.utils.filedialogs import RecentPath

DEFAULT_READER_TEXT = "Automatically detect type"
DEFAULT_READER_TEXT = "Determine type from the file extension"

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -147,8 +146,11 @@ class Warning(widget.OWWidget.Warning):
class Error(widget.OWWidget.Error):
file_not_found = Msg("File not found.")
missing_reader = Msg("Missing reader.")
select_file_type = Msg("Select file type.")
sheet_error = Msg("Error listing available sheets.")
unknown = Msg("Read error:\n{}")
unknown_select = Msg(
"Read error, possibly due to incorrect choice of file type:\n{}")

UserAdviceMessages = [
widget.Message(
Expand Down Expand Up @@ -264,7 +266,7 @@ def package(w):
self.reader_combo = QComboBox(self)
self.reader_combo.setSizePolicy(Policy.Expanding, Policy.Fixed)
self.reader_combo.setMinimumSize(QSize(100, 1))
self.reader_combo.activated[int].connect(self.select_reader)
self.reader_combo.activated[int].connect(self.on_reader_change)

box.layout().addWidget(self.reader_combo)
layout.addWidget(box, 0, 1)
Expand Down Expand Up @@ -324,25 +326,27 @@ def select_file(self, n):
self.set_file_list()

def select_sheet(self):
# pylint: disable=unsubscriptable-object
self.recent_paths[0].sheet = self.sheet_combo.currentText()
self.load_data()

def on_reader_change(self, n):
self.select_reader(n)
self.load_data()

def select_reader(self, n):
if self.source != self.LOCAL_FILE:
return # ignore for URL's

if self.recent_paths:
path = self.recent_paths[0]
path = self.recent_paths[0] # pylint: disable=unsubscriptable-object
if n == 0: # default
path.file_format = None
self.load_data()
elif n <= len(self.available_readers):
reader = self.available_readers[n - 1]
path.file_format = reader.qualified_name()
self.load_data()
else: # the rest include just qualified names
path.file_format = self.reader_combo.itemText(n)
self.load_data()

def _url_set(self):
index = self.url_combo.currentIndex()
Expand Down Expand Up @@ -373,11 +377,14 @@ def browse_file(self, in_demos=False):
else:
start_file = self.last_path() or os.path.expanduser("~/")

filename, reader, _ = open_filename_dialog(start_file, None, self.available_readers)
filename, reader, _ = open_filename_dialog(
start_file, None, self.available_readers,
add_all="*")
if not filename:
return
self.add_path(filename)
if reader is not None:
# pylint: disable=unsubscriptable-object
self.recent_paths[0].file_format = reader.qualified_name()

self.source = self.LOCAL_FILE
Expand Down Expand Up @@ -415,20 +422,20 @@ def _try_load(self):
if not url:
return self.Information.no_file_selected

def mark_problematic_reader():
self.reader_combo.setItemData(self.reader_combo.currentIndex(),
QBrush(Qt.red), Qt.ForegroundRole)

try:
self.reader = self._get_reader() # also sets current reader index
assert self.reader is not None
except MissingReaderException:
mark_problematic_reader()
return self.Error.missing_reader
if self.reader_combo.currentIndex() > 0:
return self.Error.missing_reader
else:
return self.Error.select_file_type
except Exception as ex:
mark_problematic_reader()
log.exception(ex)
return lambda x=ex: self.Error.unknown(str(x))
if self.reader_combo.currentIndex() > 0:
return lambda x=ex: self.Error.unknown(str(x))
else:
return lambda x=ex: self.Error.unknown_select(str(x))

try:
self._update_sheet_combo()
Expand All @@ -439,7 +446,6 @@ def mark_problematic_reader():
try:
data = self.reader.read()
except Exception as ex:
mark_problematic_reader()
log.exception(ex)
return lambda x=ex: self.Error.unknown(str(x))
if warnings:
Expand All @@ -455,9 +461,26 @@ def mark_problematic_reader():
return None

def _get_reader(self) -> FileFormat:
"""
Get the reader for the current file.

For local files, this also observes the stored settings and the reader
combo, as follows:

1. If the file format is known (from stored settings), use it and set
the reader combo to the corresponding index (as in settings)
2. Otherwise, detect it from the extension and set the combo to
Auto detect, overriding any previous user-set choice
3. Otherwise, use the current combo state.

Returns:
FileFormat: reader instance
"""
if self.source == self.LOCAL_FILE:
path = self.last_path()
self.reader_combo.setEnabled(True)

# pylint: disable=unsubscriptable-object
if self.recent_paths and self.recent_paths[0].file_format:
qname = self.recent_paths[0].file_format
qname_index = {r.qualified_name(): i for i, r in enumerate(self.available_readers)}
Expand All @@ -473,9 +496,21 @@ def _get_reader(self) -> FileFormat:
except Exception as ex:
raise MissingReaderException(f'Can not find reader "{qname}"') from ex
reader = reader_class(path)

else:
self.reader_combo.setCurrentIndex(0)
reader = FileFormat.get_reader(path)
old_idx = self.reader_combo.currentIndex()
try:
self.reader_combo.setCurrentIndex(0)
reader = FileFormat.get_reader(path)
except MissingReaderException:
if old_idx == 0:
raise
# Set the path for the current file format,
# and repeat the call to return the corresponding reader
self.select_reader(old_idx)
return self._get_reader()

# pylint: disable=unsubscriptable-object
if self.recent_paths and self.recent_paths[0].sheet:
reader.select_sheet(self.recent_paths[0].sheet)
return reader
Expand Down Expand Up @@ -504,12 +539,21 @@ def _select_active_sheet(self):
self.sheet_combo.setCurrentIndex(0)

def _initialize_reader_combo(self):
self.reader_combo.clear()
filters = [format_filter(f) for f in self.available_readers]
self.reader_combo.addItems([DEFAULT_READER_TEXT] + filters)
self.reader_combo.setCurrentIndex(0)
self.reader_combo.setDisabled(True)
# additional readers may be added in self._get_reader()
# Reset to initial state without losing the current index or
# emitting any signals.
combo = self.reader_combo
if not combo.count():
filters = [format_filter(f) for f in self.available_readers]
combo.addItems([DEFAULT_READER_TEXT] + filters)
combo.setCurrentIndex(0)
else:
# additional readers may be added in self._get_reader()
n = len(self.available_readers) + 1
if combo.currentIndex() >= n:
combo.setCurrentIndex(0)
while combo.count() > n:
combo.removeItem(combo.count() - 1)
combo.setDisabled(True)

@staticmethod
def _describe(table):
Expand Down Expand Up @@ -556,10 +600,12 @@ def _describe(table):
return text

def storeSpecificSettings(self):
# pylint: disable=unsubscriptable-object
self.current_context.modified_variables = self.variables[:]

def retrieveSpecificSettings(self):
if hasattr(self.current_context, "modified_variables"):
# pylint: disable=unsubscriptable-object
self.variables[:] = self.current_context.modified_variables

def reset_domain_edit(self):
Expand Down
27 changes: 27 additions & 0 deletions Orange/widgets/data/tests/actually-a-tab-file.xlsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
age prescription astigmatic tear_rate lenses
discrete discrete discrete discrete discrete
class
young myope no reduced none
young myope no normal soft
young myope yes reduced none
young myope yes normal hard
young hypermetrope no reduced none
young hypermetrope no normal soft
young hypermetrope yes reduced none
young hypermetrope yes normal hard
pre-presbyopic myope no reduced none
pre-presbyopic myope no normal soft
pre-presbyopic myope yes reduced none
pre-presbyopic myope yes normal hard
pre-presbyopic hypermetrope no reduced none
pre-presbyopic hypermetrope no normal soft
pre-presbyopic hypermetrope yes reduced none
pre-presbyopic hypermetrope yes normal none
presbyopic myope no reduced none
presbyopic myope no normal none
presbyopic myope yes reduced none
presbyopic myope yes normal hard
presbyopic hypermetrope no reduced none
presbyopic hypermetrope no normal soft
presbyopic hypermetrope yes reduced none
presbyopic hypermetrope yes normal none
Binary file added Orange/widgets/data/tests/an_excel_file-too.foo
Binary file not shown.
Binary file added Orange/widgets/data/tests/an_excel_file.foo
Binary file not shown.
Binary file added Orange/widgets/data/tests/an_excel_file.xlsx
Binary file not shown.
Loading
Loading