[PATCH v2 4/5] logging: add log cleanup for obsolete domains

Martin Kletzander mkletzan at redhat.com
Wed Jan 25 13:49:00 UTC 2023


On Fri, Jan 20, 2023 at 04:50:51PM +0600, Oleg Vasilev wrote:
>
>
>On 09.01.2023 18:31, Martin Kletzander wrote:
>> On Thu, Dec 15, 2022 at 01:25:49AM +0600, Oleg Vasilev wrote:
>>> Before, logs from deleted machines have been piling up, since there were
>>> no garbage collection mechanism. Now, virtlogd can be configured to
>>> periodically scan the log folder for orphan logs with no recent
>>> modifications
>>> and delete it.
>>>
>>> A single chain of recent and rotated logs is deleted in a single
>>> transaction.
>>>
>>
>> This seems very complicated for what this is trying to do.  Couple of
>> notes below.
>
>That's true, but I don't see any other simpler approach, which achieves
>the same result. We don't want the chains to be deleted in parts, so the
>only way I see is with two phases:
>  - First, we aggregate information about the state of the filesystem
>  - Then we do the deletion
>
>Do you have any suggestions this can be simplified.
>

What would be the issue with deleting the chains in parts?

>>
>>> Signed-off-by: Oleg Vasilev <oleg.vasilev at virtuozzo.com>
>>> ---
>>> po/POTFILES               |   1 +
>>> src/logging/log_cleaner.c | 276 ++++++++++++++++++++++++++++++++++++++
>>> src/logging/log_cleaner.h |  29 ++++
>>> src/logging/log_handler.h |   2 +
>>> src/logging/meson.build   |   1 +
>>> 5 files changed, 309 insertions(+)
>>> create mode 100644 src/logging/log_cleaner.c
>>> create mode 100644 src/logging/log_cleaner.h
>>>
>>> diff --git a/po/POTFILES b/po/POTFILES
>>> index 169e2a41dc..2fb6d18e27 100644
>>> --- a/po/POTFILES
>>> +++ b/po/POTFILES
>>> @@ -123,6 +123,7 @@ src/locking/lock_driver_lockd.c
>>> src/locking/lock_driver_sanlock.c
>>> src/locking/lock_manager.c
>>> src/locking/sanlock_helper.c
>>> +src/logging/log_cleaner.c
>>> src/logging/log_daemon.c
>>> src/logging/log_daemon_dispatch.c
>>> src/logging/log_handler.c
>>> diff --git a/src/logging/log_cleaner.c b/src/logging/log_cleaner.c
>>> new file mode 100644
>>> index 0000000000..03de9c17ee
>>> --- /dev/null
>>> +++ b/src/logging/log_cleaner.c
>>> @@ -0,0 +1,276 @@
>>> +/*
>>> + * log_cleaner.c: cleans obsolete log files
>>> + *
>>> + * Copyright (C) 2022 Virtuozzo International GmbH
>>> + *
>>> + * This library is free software; you can redistribute it and/or
>>> + * modify it under the terms of the GNU Lesser General Public
>>> + * License as published by the Free Software Foundation; either
>>> + * version 2.1 of the License, or (at your option) any later version.
>>> + *
>>> + * This library is distributed in the hope that it will be useful,
>>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>>> + * Lesser General Public License for more details.
>>> + *
>>> + * You should have received a copy of the GNU Lesser General Public
>>> + * License along with this library;  If not, see
>>> + * <http://www.gnu.org/licenses/>.
>>> + */
>>> +
>>> +#include <config.h>
>>> +
>>> +#include "log_cleaner.h"
>>> +#include "log_handler.h"
>>> +
>>> +#include "virerror.h"
>>> +#include "virobject.h"
>>> +#include "virfile.h"
>>> +#include "viralloc.h"
>>> +#include "virlog.h"
>>> +#include "virrotatingfile.h"
>>> +#include "virstring.h"
>>> +
>>> +#define VIR_FROM_THIS VIR_FROM_LOGGING
>>> +
>>> +/* Cleanup log root (/var/log/libvirt) and all subfolders (e.g.
>>> /var/log/libvirt/qemu) */
>>> +#define CLEANER_LOG_DEPTH 1
>>> +#define CLEANER_LOG_TIMEOUT_MS (24 * 3600 * 1000) /* One day */
>>> +#define MAX_TIME ((time_t) G_MAXINT64)
>>
>> Unused
>>
>>> +
>>> +static GRegex *log_regex;
>>> +
>>> +typedef struct _virLogCleanerChain virLogCleanerChain;
>>> +struct _virLogCleanerChain {
>>> +    int rotated_max_index;
>>> +    time_t last_modified;
>>> +};
>>> +
>>> +typedef struct _virLogCleanerData virLogCleanerData;
>>> +struct _virLogCleanerData {
>>> +    virLogHandler *handler;
>>> +    time_t oldest_to_keep;
>>> +    GHashTable *chains;
>>> +};
>>> +
>>> +static const char*
>>> +virLogCleanerParseFilename(const char* path,
>>> +                           int* rotated_index)
>>> +{
>>> +    g_autoptr(GMatchInfo) matchInfo = NULL;
>>> +    g_autofree const char* rotated_index_str = NULL;
>>> +    g_autofree const char *clear_path = NULL;
>>> +    const char* chain_prefix = NULL;
>>> +
>>> +    clear_path = realpath(path, NULL);
>>> +    if (!clear_path) {
>>> +        virReportSystemError(errno,
>>> +                             _("Failed to resolve path '%s'"),
>>> +                             path);
>>> +        return NULL;
>>> +    }
>>> +
>>> +    if (!g_regex_match(log_regex, path, 0, &matchInfo)) {
>>> +        return NULL;
>>> +    }
>>> +
>>> +    chain_prefix = g_match_info_fetch(matchInfo, 1);
>>> +    if (!rotated_index)
>>> +        return chain_prefix;
>>> +
>>> +    *rotated_index = 0;
>>> +    rotated_index_str = g_match_info_fetch(matchInfo, 2);
>>
>> You could just use 3 instead of 2 here and ...
>>
>>> +
>>> +    if (!rotated_index_str)
>>> +        return chain_prefix;
>>> +
>>> +    if (virStrToLong_i(rotated_index_str+1, NULL, 10, rotated_index) <
>>> 0) {
>>
>> ... avoid this +1 here.
>>
>>> +        virReportError(VIR_ERR_INTERNAL_ERROR,
>>> +                       _("Failed to parse rotated index from '%s'"),
>>> +                       rotated_index_str);
>>> +        return NULL;
>>> +    }
>>> +    return chain_prefix;
>>> +}
>>> +
>>> +static void
>>> +virLogCleanerProcessFile(virLogCleanerData* data,
>>> +                         const char* path,
>>> +                         struct stat* sb)
>>
>> Inconsistency in the pointer declaration (asterisk position).
>>
>>> +{
>>> +    int rotated_index = 0;
>>> +    g_autofree const char *chain_prefix = NULL;
>>> +    virLogCleanerChain *chain;
>>> +
>>> +    if (!S_ISREG(sb->st_mode)) {
>>> +        return;
>>> +    }
>>> +
>>> +    chain_prefix = virLogCleanerParseFilename(path, &rotated_index);
>>> +
>>> +    if (!chain_prefix) {
>>> +        return;
>>> +    }
>>
>> Here (and in bunch more places) the braces should be avoided.
>>
>>> +
>>> +    if (rotated_index > data->handler->config->max_backups) {
>>> +        if (unlink(path) < 0) {
>>> +            virReportSystemError(errno, _("Unable to delete %s"), path);
>>> +        }
>>> +        return;
>>> +    }
>>> +
>>> +    chain = g_hash_table_lookup(data->chains, chain_prefix);
>>> +
>>> +    if (!chain) {
>>> +        chain = g_new0(virLogCleanerChain, 1);
>>> +        g_hash_table_insert(data->chains, (void *) chain_prefix, chain);
>>> +        chain_prefix = NULL; /* Moved to hash table */
>>
>> You can use g_steal_pointer() to collapse these two lines.
>>
>>> +    }
>>> +
>>> +    chain->last_modified = MAX(chain->last_modified, sb->st_mtime);
>>
>> I think you meant MIN here.
>
>No, MAX should be used since we are looking for the most recent file. If
>the chain was written to recent enough, we would skip it during the
>deletion phase.
>

last_modified is set to G_MAXINT64, MAX with anything else int64 will
always return G_MAXINT64.  If you wanted to do MIN() here, then

Also I recently learned that MAX is from glibc, so setting it
conditionally might be a better choice.

>>
>>> +    chain->rotated_max_index = MAX(chain->rotated_max_index,
>>> +                                   rotated_index);
>>> +}
>>> +
>>> +static GHashTable*
>>> +virLogCleanerCreateTable(virLogHandler *handler)
>>> +{
>>> +    /* (const char*) chain_prefix -> (virLogCleanerChain*) chain */
>>> +    GHashTable *chains = g_hash_table_new_full(g_str_hash, g_str_equal,
>>> +                                               g_free, g_free);
>>> +    size_t i;
>>> +    virLogHandlerLogFile *file;
>>> +    const char *chain_prefix;
>>> +    virLogCleanerChain* chain;
>>> +    VIR_LOCK_GUARD lock = virObjectLockGuard(handler);
>>> +
>>
>> You are locking the handler in an event loop thread, which feels a bit
>> messy to me.
>
>But this is only for this function, which doesn't have any IO or
>anything heavy.
>

That's why I did not say it's unacceptable, it just *feels* dirty.  If
this was done in a worker job, that would be a different thing.  But I
do not oppose to this completely.

>The rest is clear to me, I will fix in a v3.
>
>Oleg
>
>>
>>> +    for (i = 0; i < handler->nfiles; i++) {
>>> +        file = handler->files[i];
>>> +        chain_prefix =
>>> virLogCleanerParseFilename(virRotatingFileWriterGetPath(file->file),
>>> +                                                  NULL);
>>> +        if (!chain_prefix) {
>>> +            continue;
>>> +        }
>>> +
>>> +        chain = g_new0(virLogCleanerChain, 1);
>>> +        chain->last_modified = (time_t) G_MAXINT64;
>>> +        g_hash_table_insert(chains, (void *) chain_prefix, chain);
>>> +    }
>>> +
>>> +    return chains;
>>> +}
>>> +
>>> +static void
>>> +virLogCleanerProcessFolder(virLogCleanerData *data,
>>> +                           const char *path,
>>> +                           int depth_left)
>>> +{
>>> +    DIR *dir;
>>> +    struct dirent *entry;
>>> +    struct stat sb;
>>> +
>>> +    if (virDirOpenIfExists(&dir, path) < 0)
>>> +        return;
>>> +
>>> +    while (virDirRead(dir, &entry, path) > 0) {
>>> +        g_autofree char *newpath = NULL;
>>> +        if (STREQ(entry->d_name, "."))
>>> +            continue;
>>> +        if (STREQ(entry->d_name, ".."))
>>> +            continue;
>>> +
>>
>> These two entries are skipped in virDirRead() already.
>>
>>> +        newpath = g_strdup_printf("%s/%s", path, entry->d_name);
>>> +
>>> +        if (stat(newpath, &sb) < 0) {
>>> +            virReportSystemError(errno, _("Unable to stat %s"),
>>> newpath);
>>> +            continue;
>>> +        }
>>> +
>>> +        if (S_ISDIR(sb.st_mode)) {
>>> +            if (depth_left > 0)
>>> +                virLogCleanerProcessFolder(data, newpath, depth_left
>>> - 1);
>>> +            continue;
>>> +        }
>>> +
>>> +        virLogCleanerProcessFile(data, newpath, &sb);
>>> +    }
>>> +
>>> +    virDirClose(dir);
>>> +}
>>> +
>>> +static void
>>> +virLogCleanerChainCB(gpointer key,
>>> +                     gpointer value,
>>> +                     gpointer user_data)
>>> +{
>>> +    const char* chain_prefix = key;
>>> +    virLogCleanerChain* chain = value;
>>> +    virLogCleanerData* data = user_data;
>>> +    size_t i;
>>> +
>>> +    if (chain->last_modified > data->oldest_to_keep) {
>>> +        return;
>>> +    }
>>> +
>>> +    for (i = 0; i <= chain->rotated_max_index; i++) {
>>> +        g_autofree char* path = NULL;
>>> +        if (i == 0) {
>>> +            path = g_strdup_printf("%s.log", chain_prefix);
>>> +        } else {
>>> +            path = g_strdup_printf("%s.log.%ld", chain_prefix, i);
>>> +        }
>>
>> Here you're not even considering .log.0
>>
>>> +        if (unlink(path) < 0) {
>>
>> You should also skip ENOENT be here.
>>
>>> +            virReportSystemError(errno, _("Unable to delete %s"), path);
>>
>> Since you are running this in the event loop thread I'd prefer VIR_WARN
>> for these kind of things.
>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: not available
URL: <http://listman.redhat.com/archives/libvir-list/attachments/20230125/fb290760/attachment-0001.sig>


More information about the libvir-list mailing list