add new command for cleaning word tokens

Just pulls outdated housenumbers for the moment.
This commit is contained in:
Sarah Hoffmann
2022-01-20 20:05:15 +01:00
parent 86588419fb
commit 344a2bfc1a
4 changed files with 53 additions and 0 deletions

View File

@@ -39,6 +39,8 @@ class UpdateRefresh:
group = parser.add_argument_group('Data arguments')
group.add_argument('--postcodes', action='store_true',
help='Update postcode centroid table')
group.add_argument('--word-tokens', action='store_true',
help='Clean up search terms')
group.add_argument('--word-counts', action='store_true',
help='Compute frequency of full-word search terms')
group.add_argument('--address-levels', action='store_true',
@@ -76,6 +78,10 @@ class UpdateRefresh:
LOG.error("The place table doesn't exist. "
"Postcode updates on a frozen database is not possible.")
if args.word_tokens:
tokenizer = self._get_tokenizer(args.config)
tokenizer.update_word_tokens()
if args.word_counts:
LOG.warning('Recompute word statistics')
self._get_tokenizer(args.config).update_statistics()