Skip to content

Commit

Permalink
[delimitedtext] QRegEx -> QRegularExpression
Browse files Browse the repository at this point in the history
  • Loading branch information
nirvn authored and nyalldawson committed Apr 1, 2021
1 parent f8a35e3 commit be1ba89
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 22 deletions.
22 changes: 14 additions & 8 deletions src/providers/delimitedtext/qgsdelimitedtextfile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -717,8 +717,10 @@ QgsDelimitedTextFile::Status QgsDelimitedTextFile::parseRegexp( QString &buffer,
// and extract capture groups
if ( mAnchoredRegexp )
{
if ( mDelimRegexp.indexIn( buffer ) < 0 ) return RecordInvalid;
QStringList groups = mDelimRegexp.capturedTexts();
const QRegularExpressionMatch match = mDelimRegexp.match( buffer );
if ( !match.hasMatch() )
return RecordInvalid;
const QStringList groups = match.capturedTexts();
for ( int i = 1; i < groups.size(); i++ )
{
appendField( fields, groups[i] );
Expand All @@ -730,15 +732,19 @@ QgsDelimitedTextFile::Status QgsDelimitedTextFile::parseRegexp( QString &buffer,
int size = buffer.size();
while ( true )
{
if ( pos >= size ) break;
int matchPos = mDelimRegexp.indexIn( buffer, pos );
if ( pos >= size )
break;
QRegularExpressionMatch match = mDelimRegexp.match( buffer, pos );

int matchPos = match.capturedStart();
// If match won't advance cursor, then need to force it along one place
// to avoid infinite loop.
int matchLen = mDelimRegexp.matchedLength();
int matchLen = match.capturedLength();
if ( matchPos == pos && matchLen == 0 )
{
matchPos = mDelimRegexp.indexIn( buffer, pos + 1 );
matchLen = mDelimRegexp.matchedLength();
match = mDelimRegexp.match( buffer, pos + 1 );
matchPos = match.capturedStart();
matchLen = match.capturedLength();
}
// If no match, then field is to end of record
if ( matchPos < 0 )
Expand All @@ -751,7 +757,7 @@ QgsDelimitedTextFile::Status QgsDelimitedTextFile::parseRegexp( QString &buffer,
appendField( fields, buffer.mid( pos, matchPos - pos ) );
if ( mDelimRegexp.captureCount() > 0 )
{
QStringList groups = mDelimRegexp.capturedTexts();
QStringList groups = match.capturedTexts();
for ( int i = 1; i < groups.size(); i++ )
{
appendField( fields, groups[i] );
Expand Down
2 changes: 1 addition & 1 deletion src/providers/delimitedtext/qgsdelimitedtextfile.h
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,7 @@ class QgsDelimitedTextFile : public QObject
int mMaxNameLength = 200;

// Parameters used by parsers
QRegExp mDelimRegexp;
QRegularExpression mDelimRegexp;
bool mAnchoredRegexp = false;
QString mDelimChars;
QString mQuoteChar;
Expand Down
20 changes: 11 additions & 9 deletions src/providers/delimitedtext/qgsdelimitedtextprovider.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -222,9 +222,9 @@ QStringList QgsDelimitedTextProvider::readCsvtFieldTypes( const QString &filenam
// This is a slightly generous regular expression in that it allows spaces and unquoted field types
// not allowed in OGR CSVT files. Also doesn't care if int and string fields have

strTypeList = strTypeList.toLower();
QRegExp reTypeList( "^(?:\\s*(\\\"?)(?:integer|real|double|long|longlong|int8|string|date|datetime|time)(?:\\(\\d+(?:\\.\\d+)?\\))?\\1\\s*(?:,|$))+" );
if ( ! reTypeList.exactMatch( strTypeList ) )
const QRegularExpression reTypeList( QRegularExpression::anchoredPattern( QStringLiteral( "^(?:\\s*(\\\"?)(?:integer|real|double|long|longlong|int8|string|date|datetime|time)(?:\\(\\d+(?:\\.\\d+)?\\))?\\1\\s*(?:,|$))+" ) ) );
const QRegularExpressionMatch match = reTypeList.match( strTypeList );
if ( !match.hasMatch() )
{
// Looks like this was supposed to be a CSVT file, so report bad formatted string
if ( message ) { *message = tr( "File type string in %1 is not correctly formatted" ).arg( csvtInfo.fileName() ); }
Expand All @@ -236,13 +236,15 @@ QStringList QgsDelimitedTextProvider::readCsvtFieldTypes( const QString &filenam
QgsDebugMsgLevel( QStringLiteral( "Field type string: %1" ).arg( strTypeList ), 2 );

int pos = 0;
QRegExp reType( "(integer|real|double|string|date|datetime|time)" );

while ( ( pos = reType.indexIn( strTypeList, pos ) ) != -1 )
const QRegularExpression reType( QStringLiteral( "(integer|real|double|string|date|datetime|time)" ) );
QRegularExpressionMatch typeMatch = reType.match( strTypeList, pos );
while ( typeMatch.hasMatch() )
{
QgsDebugMsgLevel( QStringLiteral( "Found type: %1" ).arg( reType.cap( 1 ) ), 2 );
types << reType.cap( 1 );
pos += reType.matchedLength();
QgsDebugMsgLevel( QStringLiteral( "Found type: %1" ).arg( typeMatch.captured( 1 ) ), 2 );
types << typeMatch.captured( 1 );
pos = typeMatch.capturedEnd();

typeMatch = reType.match( strTypeList, pos );
}

if ( message )
Expand Down
8 changes: 4 additions & 4 deletions tests/src/python/test_qgsdelimitedtextprovider_wanted.py
Original file line number Diff line number Diff line change
Expand Up @@ -893,11 +893,11 @@ def test_018_regular_expression_2():
'GEXP': 'GEXP',
'description': 'RE',
'RE_1': 'RE',
'GEXP_1': 'NULL',
'data': 'data2',
'RE_2': 'RE',
'GEXP_1': 'data2',
'data': 'RE',
'RE_2': 'info2',
'GEXP_2': 'NULL',
'info': 'info2',
'info': 'NULL',
'#fid': 3,
'#geometry': 'None',
},
Expand Down

0 comments on commit be1ba89

Please sign in to comment.