Skip to content

Commit

Permalink
Move snowball_create.sql creation into perl file
Browse files Browse the repository at this point in the history
BABELFISH-CONFLICT: see Postgres community repo for original commit

This is in preparation for building postgres with meson / ninja.

We already have duplicated code for this between the make and msvc
builds. Adding a third copy seems like a bad plan, thus move the generation
into a perl script.

As we don't want to rely on perl being available for builds from tarballs,
generate the file during distprep.

Author: Peter Eisentraut <[email protected]>
Author: Andres Freund <[email protected]>
Discussion: https://postgr.es/m/[email protected]
(cherry picked from commit b3a0d8324cf1f02c04a7099a436cfd68cfbf4566)
  • Loading branch information
anarazel authored and 2jungkook committed Nov 30, 2023
1 parent a11a8e1 commit 389028d
Show file tree
Hide file tree
Showing 3 changed files with 180 additions and 113 deletions.
109 changes: 29 additions & 80 deletions src/backend/snowball/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -72,40 +72,23 @@ OBJS += \
stem_UTF_8_turkish.o \
stem_UTF_8_yiddish.o

# first column is language name and also name of dictionary for not-all-ASCII
# words, second is name of dictionary for all-ASCII words
# Note order dependency: use of some other language as ASCII dictionary
# must come after creation of that language
LANGUAGES= \
arabic arabic \
armenian armenian \
basque basque \
catalan catalan \
danish danish \
dutch dutch \
english english \
finnish finnish \
french french \
german german \
greek greek \
hindi english \
hungarian hungarian \
indonesian indonesian \
irish irish \
italian italian \
lithuanian lithuanian \
nepali nepali \
norwegian norwegian \
portuguese portuguese \
romanian romanian \
russian english \
serbian serbian \
spanish spanish \
swedish swedish \
tamil tamil \
turkish turkish \
yiddish yiddish

stop_files = \
danish.stop \
dutch.stop \
english.stop \
finnish.stop \
french.stop \
german.stop \
hungarian.stop \
italian.stop \
nepali.stop \
norwegian.stop \
portuguese.stop \
russian.stop \
spanish.stop \
swedish.stop \
tsql_contains.stop \
turkish.stop

SQLSCRIPT= snowball_create.sql
DICTDIR=tsearch_data
Expand All @@ -119,58 +102,24 @@ all: all-shared-lib $(SQLSCRIPT)

include $(top_srcdir)/src/Makefile.shlib

$(SQLSCRIPT): Makefile snowball_func.sql.in snowball.sql.in
echo '-- Language-specific snowball dictionaries' > $@
cat $(srcdir)/snowball_func.sql.in >> $@
@set -e; \
set $(LANGUAGES) ; \
while [ "$$#" -gt 0 ] ; \
do \
lang=$$1; shift; \
nonascdictname=$$lang; \
ascdictname=$$1; shift; \
if [ -s $(srcdir)/stopwords/$${lang}.stop ] ; then \
stop=", StopWords=$${lang}" ; \
else \
stop=""; \
fi; \
cat $(srcdir)/snowball.sql.in | \
sed -e "s#_LANGNAME_#$$lang#g" | \
sed -e "s#_DICTNAME_#$${lang}_stem#g" | \
sed -e "s#_CFGNAME_#$$lang#g" | \
sed -e "s#_ASCDICTNAME_#$${ascdictname}_stem#g" | \
sed -e "s#_NONASCDICTNAME_#$${nonascdictname}_stem#g" | \
sed -e "s#_STOPWORDS_#$$stop#g" ; \
done >> $@
$(SQLSCRIPT): snowball_create.pl snowball_func.sql.in snowball.sql.in
$(PERL) $< --input ${srcdir} --outdir .

distprep: $(SQLSCRIPT)

install: all installdirs install-lib
$(INSTALL_DATA) $(SQLSCRIPT) '$(DESTDIR)$(datadir)'
@set -e; \
set $(LANGUAGES) ; \
while [ "$$#" -gt 0 ] ; \
do \
lang=$$1; shift; shift; \
$(INSTALL_DATA) $(srcdir)/stopwords/tsql_contains.stop '$(DESTDIR)$(datadir)/$(DICTDIR)' ; \
if [ -s $(srcdir)/stopwords/$${lang}.stop ] ; then \
$(INSTALL_DATA) $(srcdir)/stopwords/$${lang}.stop '$(DESTDIR)$(datadir)/$(DICTDIR)' ; \
fi \
done
$(INSTALL_DATA) $(addprefix $(srcdir)/stopwords/,$(stop_files)) '$(DESTDIR)$(datadir)/$(DICTDIR)'

installdirs: installdirs-lib
$(MKDIR_P) '$(DESTDIR)$(datadir)' '$(DESTDIR)$(datadir)/$(DICTDIR)'

uninstall: uninstall-lib
rm -f '$(DESTDIR)$(datadir)/$(SQLSCRIPT)'
@set -e; \
set $(LANGUAGES) ; \
while [ "$$#" -gt 0 ] ; \
do \
lang=$$1; shift; shift; \
rm -f '$(DESTDIR)$(datadir)/$(DICTDIR)/'tsql_contains.stop ; \
if [ -s $(srcdir)/stopwords/$${lang}.stop ] ; then \
rm -f '$(DESTDIR)$(datadir)/$(DICTDIR)/'$${lang}.stop ; \
fi \
done

clean distclean maintainer-clean: clean-lib
rm -f $(OBJS) $(SQLSCRIPT)
rm -f $(addprefix '$(DESTDIR)$(datadir)/$(DICTDIR)/',$(stop_files))

clean distclean: clean-lib
rm -f $(OBJS)

maintainer-clean: distclean
rm -f $(SQLSCRIPT)
148 changes: 148 additions & 0 deletions src/backend/snowball/snowball_create.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
#!/usr/bin/perl

use strict;
use warnings;
use Getopt::Long;

my $outdir_path = '';
my $makefile_path = '';
my $input_path = '';
my $depfile;

our @languages = qw(
arabic
armenian
basque
catalan
danish
dutch
english
finnish
french
german
greek
hindi
hungarian
indonesian
irish
italian
lithuanian
nepali
norwegian
portuguese
romanian
russian
serbian
spanish
swedish
tamil
turkish
yiddish
);

# Names of alternative dictionaries for all-ASCII words. If not
# listed, the language itself is used. Note order dependency: Use of
# some other language as ASCII dictionary must come after creation of
# that language, so the "backup" language must be listed earlier in
# @languages.

our %ascii_languages = (
'hindi' => 'english',
'russian' => 'english',
);

GetOptions(
'depfile' => \$depfile,
'outdir:s' => \$outdir_path,
'input:s' => \$input_path) || usage();

# Make sure input_path ends in a slash if needed.
if ($input_path ne '' && substr($input_path, -1) ne '/')
{
$outdir_path .= '/';
}

# Make sure outdir_path ends in a slash if needed.
if ($outdir_path ne '' && substr($outdir_path, -1) ne '/')
{
$outdir_path .= '/';
}

GenerateTsearchFiles();

sub usage
{
die <<EOM;
Usage: snowball_create.pl --input/-i <path> --outdir/-o <path>
--depfile Write dependency file
--outdir Output directory (default '.')
--input Input directory
snowball_create.pl creates snowball.sql from snowball.sql.in
EOM
}

sub GenerateTsearchFiles
{
my $target = shift;
my $outdir_file = "$outdir_path/snowball_create.sql";

my $F;
my $D;
my $tmpl = read_file("$input_path/snowball.sql.in");

if ($depfile)
{
open($D, '>', "$outdir_path/snowball_create.dep")
|| die "Could not write snowball_create.dep";
}

print $D "$outdir_file: $input_path/snowball.sql.in\n" if $depfile;
print $D "$outdir_file: $input_path/snowball_func.sql.in\n" if $depfile;

open($F, '>', $outdir_file)
|| die "Could not write snowball_create.sql";

print $F "-- Language-specific snowball dictionaries\n";

print $F read_file("$input_path/snowball_func.sql.in");

foreach my $lang (@languages)
{
my $asclang = $ascii_languages{$lang} || $lang;
my $txt = $tmpl;
my $stop = '';
my $stopword_path = "$input_path/stopwords/$lang.stop";

if (-s "$stopword_path")
{
$stop = ", StopWords=$lang";

print $D "$outdir_file: $stopword_path\n" if $depfile;
}

$txt =~ s#_LANGNAME_#${lang}#gs;
$txt =~ s#_DICTNAME_#${lang}_stem#gs;
$txt =~ s#_CFGNAME_#${lang}#gs;
$txt =~ s#_ASCDICTNAME_#${asclang}_stem#gs;
$txt =~ s#_NONASCDICTNAME_#${lang}_stem#gs;
$txt =~ s#_STOPWORDS_#$stop#gs;
print $F $txt;
}
close($F);
close($D) if $depfile;
return;
}


sub read_file
{
my $filename = shift;
my $F;
local $/ = undef;
open($F, '<', $filename) || die "Could not open file $filename\n";
my $txt = <$F>;
close($F);

return $txt;
}
36 changes: 3 additions & 33 deletions src/tools/msvc/Install.pm
Original file line number Diff line number Diff line change
Expand Up @@ -389,39 +389,9 @@ sub GenerateTsearchFiles
my $target = shift;

print "Generating tsearch script...";
my $F;
my $tmpl = read_file('src/backend/snowball/snowball.sql.in');
my $mf = read_file('src/backend/snowball/Makefile');
$mf =~ s{\\\r?\n}{}g;
$mf =~ /^LANGUAGES\s*=\s*(.*)$/m
|| die "Could not find LANGUAGES line in snowball Makefile\n";
my @pieces = split /\s+/, $1;
open($F, '>', "$target/share/snowball_create.sql")
|| die "Could not write snowball_create.sql";
print $F read_file('src/backend/snowball/snowball_func.sql.in');

while ($#pieces > 0)
{
my $lang = shift @pieces || last;
my $asclang = shift @pieces || last;
my $txt = $tmpl;
my $stop = '';

if (-s "src/backend/snowball/stopwords/$lang.stop")
{
$stop = ", StopWords=$lang";
}

$txt =~ s#_LANGNAME_#${lang}#gs;
$txt =~ s#_DICTNAME_#${lang}_stem#gs;
$txt =~ s#_CFGNAME_#${lang}#gs;
$txt =~ s#_ASCDICTNAME_#${asclang}_stem#gs;
$txt =~ s#_NONASCDICTNAME_#${lang}_stem#gs;
$txt =~ s#_STOPWORDS_#$stop#gs;
print $F $txt;
print ".";
}
close($F);
system('perl', 'src/backend/snowball/snowball_create.pl',
'--input', 'src/backend/snowball/',
'--outdir', "$target/share/");
print "\n";
return;
}
Expand Down

0 comments on commit 389028d

Please sign in to comment.