[PATCH v2 4/5] logging: add log cleanup for obsolete domains
Martin Kletzander
mkletzan at redhat.com
Wed Jan 25 13:49:00 UTC 2023
On Fri, Jan 20, 2023 at 04:50:51PM +0600, Oleg Vasilev wrote:
>
>
>On 09.01.2023 18:31, Martin Kletzander wrote:
>> On Thu, Dec 15, 2022 at 01:25:49AM +0600, Oleg Vasilev wrote:
>>> Before, logs from deleted machines have been piling up, since there were
>>> no garbage collection mechanism. Now, virtlogd can be configured to
>>> periodically scan the log folder for orphan logs with no recent
>>> modifications
>>> and delete it.
>>>
>>> A single chain of recent and rotated logs is deleted in a single
>>> transaction.
>>>
>>
>> This seems very complicated for what this is trying to do. Couple of
>> notes below.
>
>That's true, but I don't see any other simpler approach, which achieves
>the same result. We don't want the chains to be deleted in parts, so the
>only way I see is with two phases:
> - First, we aggregate information about the state of the filesystem
> - Then we do the deletion
>
>Do you have any suggestions this can be simplified.
>
What would be the issue with deleting the chains in parts?
>>
>>> Signed-off-by: Oleg Vasilev <oleg.vasilev at virtuozzo.com>
>>> ---
>>> po/POTFILES | 1 +
>>> src/logging/log_cleaner.c | 276 ++++++++++++++++++++++++++++++++++++++
>>> src/logging/log_cleaner.h | 29 ++++
>>> src/logging/log_handler.h | 2 +
>>> src/logging/meson.build | 1 +
>>> 5 files changed, 309 insertions(+)
>>> create mode 100644 src/logging/log_cleaner.c
>>> create mode 100644 src/logging/log_cleaner.h
>>>
>>> diff --git a/po/POTFILES b/po/POTFILES
>>> index 169e2a41dc..2fb6d18e27 100644
>>> --- a/po/POTFILES
>>> +++ b/po/POTFILES
>>> @@ -123,6 +123,7 @@ src/locking/lock_driver_lockd.c
>>> src/locking/lock_driver_sanlock.c
>>> src/locking/lock_manager.c
>>> src/locking/sanlock_helper.c
>>> +src/logging/log_cleaner.c
>>> src/logging/log_daemon.c
>>> src/logging/log_daemon_dispatch.c
>>> src/logging/log_handler.c
>>> diff --git a/src/logging/log_cleaner.c b/src/logging/log_cleaner.c
>>> new file mode 100644
>>> index 0000000000..03de9c17ee
>>> --- /dev/null
>>> +++ b/src/logging/log_cleaner.c
>>> @@ -0,0 +1,276 @@
>>> +/*
>>> + * log_cleaner.c: cleans obsolete log files
>>> + *
>>> + * Copyright (C) 2022 Virtuozzo International GmbH
>>> + *
>>> + * This library is free software; you can redistribute it and/or
>>> + * modify it under the terms of the GNU Lesser General Public
>>> + * License as published by the Free Software Foundation; either
>>> + * version 2.1 of the License, or (at your option) any later version.
>>> + *
>>> + * This library is distributed in the hope that it will be useful,
>>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
>>> + * Lesser General Public License for more details.
>>> + *
>>> + * You should have received a copy of the GNU Lesser General Public
>>> + * License along with this library; If not, see
>>> + * <http://www.gnu.org/licenses/>.
>>> + */
>>> +
>>> +#include <config.h>
>>> +
>>> +#include "log_cleaner.h"
>>> +#include "log_handler.h"
>>> +
>>> +#include "virerror.h"
>>> +#include "virobject.h"
>>> +#include "virfile.h"
>>> +#include "viralloc.h"
>>> +#include "virlog.h"
>>> +#include "virrotatingfile.h"
>>> +#include "virstring.h"
>>> +
>>> +#define VIR_FROM_THIS VIR_FROM_LOGGING
>>> +
>>> +/* Cleanup log root (/var/log/libvirt) and all subfolders (e.g.
>>> /var/log/libvirt/qemu) */
>>> +#define CLEANER_LOG_DEPTH 1
>>> +#define CLEANER_LOG_TIMEOUT_MS (24 * 3600 * 1000) /* One day */
>>> +#define MAX_TIME ((time_t) G_MAXINT64)
>>
>> Unused
>>
>>> +
>>> +static GRegex *log_regex;
>>> +
>>> +typedef struct _virLogCleanerChain virLogCleanerChain;
>>> +struct _virLogCleanerChain {
>>> + int rotated_max_index;
>>> + time_t last_modified;
>>> +};
>>> +
>>> +typedef struct _virLogCleanerData virLogCleanerData;
>>> +struct _virLogCleanerData {
>>> + virLogHandler *handler;
>>> + time_t oldest_to_keep;
>>> + GHashTable *chains;
>>> +};
>>> +
>>> +static const char*
>>> +virLogCleanerParseFilename(const char* path,
>>> + int* rotated_index)
>>> +{
>>> + g_autoptr(GMatchInfo) matchInfo = NULL;
>>> + g_autofree const char* rotated_index_str = NULL;
>>> + g_autofree const char *clear_path = NULL;
>>> + const char* chain_prefix = NULL;
>>> +
>>> + clear_path = realpath(path, NULL);
>>> + if (!clear_path) {
>>> + virReportSystemError(errno,
>>> + _("Failed to resolve path '%s'"),
>>> + path);
>>> + return NULL;
>>> + }
>>> +
>>> + if (!g_regex_match(log_regex, path, 0, &matchInfo)) {
>>> + return NULL;
>>> + }
>>> +
>>> + chain_prefix = g_match_info_fetch(matchInfo, 1);
>>> + if (!rotated_index)
>>> + return chain_prefix;
>>> +
>>> + *rotated_index = 0;
>>> + rotated_index_str = g_match_info_fetch(matchInfo, 2);
>>
>> You could just use 3 instead of 2 here and ...
>>
>>> +
>>> + if (!rotated_index_str)
>>> + return chain_prefix;
>>> +
>>> + if (virStrToLong_i(rotated_index_str+1, NULL, 10, rotated_index) <
>>> 0) {
>>
>> ... avoid this +1 here.
>>
>>> + virReportError(VIR_ERR_INTERNAL_ERROR,
>>> + _("Failed to parse rotated index from '%s'"),
>>> + rotated_index_str);
>>> + return NULL;
>>> + }
>>> + return chain_prefix;
>>> +}
>>> +
>>> +static void
>>> +virLogCleanerProcessFile(virLogCleanerData* data,
>>> + const char* path,
>>> + struct stat* sb)
>>
>> Inconsistency in the pointer declaration (asterisk position).
>>
>>> +{
>>> + int rotated_index = 0;
>>> + g_autofree const char *chain_prefix = NULL;
>>> + virLogCleanerChain *chain;
>>> +
>>> + if (!S_ISREG(sb->st_mode)) {
>>> + return;
>>> + }
>>> +
>>> + chain_prefix = virLogCleanerParseFilename(path, &rotated_index);
>>> +
>>> + if (!chain_prefix) {
>>> + return;
>>> + }
>>
>> Here (and in bunch more places) the braces should be avoided.
>>
>>> +
>>> + if (rotated_index > data->handler->config->max_backups) {
>>> + if (unlink(path) < 0) {
>>> + virReportSystemError(errno, _("Unable to delete %s"), path);
>>> + }
>>> + return;
>>> + }
>>> +
>>> + chain = g_hash_table_lookup(data->chains, chain_prefix);
>>> +
>>> + if (!chain) {
>>> + chain = g_new0(virLogCleanerChain, 1);
>>> + g_hash_table_insert(data->chains, (void *) chain_prefix, chain);
>>> + chain_prefix = NULL; /* Moved to hash table */
>>
>> You can use g_steal_pointer() to collapse these two lines.
>>
>>> + }
>>> +
>>> + chain->last_modified = MAX(chain->last_modified, sb->st_mtime);
>>
>> I think you meant MIN here.
>
>No, MAX should be used since we are looking for the most recent file. If
>the chain was written to recent enough, we would skip it during the
>deletion phase.
>
last_modified is set to G_MAXINT64, MAX with anything else int64 will
always return G_MAXINT64. If you wanted to do MIN() here, then
Also I recently learned that MAX is from glibc, so setting it
conditionally might be a better choice.
>>
>>> + chain->rotated_max_index = MAX(chain->rotated_max_index,
>>> + rotated_index);
>>> +}
>>> +
>>> +static GHashTable*
>>> +virLogCleanerCreateTable(virLogHandler *handler)
>>> +{
>>> + /* (const char*) chain_prefix -> (virLogCleanerChain*) chain */
>>> + GHashTable *chains = g_hash_table_new_full(g_str_hash, g_str_equal,
>>> + g_free, g_free);
>>> + size_t i;
>>> + virLogHandlerLogFile *file;
>>> + const char *chain_prefix;
>>> + virLogCleanerChain* chain;
>>> + VIR_LOCK_GUARD lock = virObjectLockGuard(handler);
>>> +
>>
>> You are locking the handler in an event loop thread, which feels a bit
>> messy to me.
>
>But this is only for this function, which doesn't have any IO or
>anything heavy.
>
That's why I did not say it's unacceptable, it just *feels* dirty. If
this was done in a worker job, that would be a different thing. But I
do not oppose to this completely.
>The rest is clear to me, I will fix in a v3.
>
>Oleg
>
>>
>>> + for (i = 0; i < handler->nfiles; i++) {
>>> + file = handler->files[i];
>>> + chain_prefix =
>>> virLogCleanerParseFilename(virRotatingFileWriterGetPath(file->file),
>>> + NULL);
>>> + if (!chain_prefix) {
>>> + continue;
>>> + }
>>> +
>>> + chain = g_new0(virLogCleanerChain, 1);
>>> + chain->last_modified = (time_t) G_MAXINT64;
>>> + g_hash_table_insert(chains, (void *) chain_prefix, chain);
>>> + }
>>> +
>>> + return chains;
>>> +}
>>> +
>>> +static void
>>> +virLogCleanerProcessFolder(virLogCleanerData *data,
>>> + const char *path,
>>> + int depth_left)
>>> +{
>>> + DIR *dir;
>>> + struct dirent *entry;
>>> + struct stat sb;
>>> +
>>> + if (virDirOpenIfExists(&dir, path) < 0)
>>> + return;
>>> +
>>> + while (virDirRead(dir, &entry, path) > 0) {
>>> + g_autofree char *newpath = NULL;
>>> + if (STREQ(entry->d_name, "."))
>>> + continue;
>>> + if (STREQ(entry->d_name, ".."))
>>> + continue;
>>> +
>>
>> These two entries are skipped in virDirRead() already.
>>
>>> + newpath = g_strdup_printf("%s/%s", path, entry->d_name);
>>> +
>>> + if (stat(newpath, &sb) < 0) {
>>> + virReportSystemError(errno, _("Unable to stat %s"),
>>> newpath);
>>> + continue;
>>> + }
>>> +
>>> + if (S_ISDIR(sb.st_mode)) {
>>> + if (depth_left > 0)
>>> + virLogCleanerProcessFolder(data, newpath, depth_left
>>> - 1);
>>> + continue;
>>> + }
>>> +
>>> + virLogCleanerProcessFile(data, newpath, &sb);
>>> + }
>>> +
>>> + virDirClose(dir);
>>> +}
>>> +
>>> +static void
>>> +virLogCleanerChainCB(gpointer key,
>>> + gpointer value,
>>> + gpointer user_data)
>>> +{
>>> + const char* chain_prefix = key;
>>> + virLogCleanerChain* chain = value;
>>> + virLogCleanerData* data = user_data;
>>> + size_t i;
>>> +
>>> + if (chain->last_modified > data->oldest_to_keep) {
>>> + return;
>>> + }
>>> +
>>> + for (i = 0; i <= chain->rotated_max_index; i++) {
>>> + g_autofree char* path = NULL;
>>> + if (i == 0) {
>>> + path = g_strdup_printf("%s.log", chain_prefix);
>>> + } else {
>>> + path = g_strdup_printf("%s.log.%ld", chain_prefix, i);
>>> + }
>>
>> Here you're not even considering .log.0
>>
>>> + if (unlink(path) < 0) {
>>
>> You should also skip ENOENT be here.
>>
>>> + virReportSystemError(errno, _("Unable to delete %s"), path);
>>
>> Since you are running this in the event loop thread I'd prefer VIR_WARN
>> for these kind of things.
>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: not available
URL: <http://listman.redhat.com/archives/libvir-list/attachments/20230125/fb290760/attachment-0001.sig>
More information about the libvir-list
mailing list