[PATCH v2 4/5] logging: add log cleanup for obsolete domains

Oleg Vasilev oleg.vasilev at virtuozzo.com
Mon Jan 30 15:01:17 UTC 2023



On 25.01.2023 19:49, Martin Kletzander wrote:
> On Fri, Jan 20, 2023 at 04:50:51PM +0600, Oleg Vasilev wrote:
>>
>>
>> On 09.01.2023 18:31, Martin Kletzander wrote:
>>> On Thu, Dec 15, 2022 at 01:25:49AM +0600, Oleg Vasilev wrote:
>>>> Before, logs from deleted machines have been piling up, since there 
>>>> were
>>>> no garbage collection mechanism. Now, virtlogd can be configured to
>>>> periodically scan the log folder for orphan logs with no recent
>>>> modifications
>>>> and delete it.
>>>>
>>>> A single chain of recent and rotated logs is deleted in a single
>>>> transaction.
>>>>
>>>
>>> This seems very complicated for what this is trying to do.  Couple of
>>> notes below.
>>
>> That's true, but I don't see any other simpler approach, which achieves
>> the same result. We don't want the chains to be deleted in parts, so the
>> only way I see is with two phases:
>>  - First, we aggregate information about the state of the filesystem
>>  - Then we do the deletion
>>
>> Do you have any suggestions this can be simplified.
>>
> 
> What would be the issue with deleting the chains in parts?

For an alive log chain, we want to keep the rotated files as history.
That was actually the issue raised by you: :)

   "One more thing is that if larger logs are rotated, then this would
   remove the old ones even if they are kept as number of backups.  That
   seems like something that should be avoided (probably not even
   configurable)."

> 
>>>
>>>> Signed-off-by: Oleg Vasilev <oleg.vasilev at virtuozzo.com>
>>>> ---
>>>> po/POTFILES               |   1 +
>>>> src/logging/log_cleaner.c | 276 ++++++++++++++++++++++++++++++++++++++
>>>> src/logging/log_cleaner.h |  29 ++++
>>>> src/logging/log_handler.h |   2 +
>>>> src/logging/meson.build   |   1 +
>>>> 5 files changed, 309 insertions(+)
>>>> create mode 100644 src/logging/log_cleaner.c
>>>> create mode 100644 src/logging/log_cleaner.h
>>>>
>>>> diff --git a/po/POTFILES b/po/POTFILES
>>>> index 169e2a41dc..2fb6d18e27 100644
>>>> --- a/po/POTFILES
>>>> +++ b/po/POTFILES
>>>> @@ -123,6 +123,7 @@ src/locking/lock_driver_lockd.c
>>>> src/locking/lock_driver_sanlock.c
>>>> src/locking/lock_manager.c
>>>> src/locking/sanlock_helper.c
>>>> +src/logging/log_cleaner.c
>>>> src/logging/log_daemon.c
>>>> src/logging/log_daemon_dispatch.c
>>>> src/logging/log_handler.c
>>>> diff --git a/src/logging/log_cleaner.c b/src/logging/log_cleaner.c
>>>> new file mode 100644
>>>> index 0000000000..03de9c17ee
>>>> --- /dev/null
>>>> +++ b/src/logging/log_cleaner.c
>>>> @@ -0,0 +1,276 @@
>>>> +/*
>>>> + * log_cleaner.c: cleans obsolete log files
>>>> + *
>>>> + * Copyright (C) 2022 Virtuozzo International GmbH
>>>> + *
>>>> + * This library is free software; you can redistribute it and/or
>>>> + * modify it under the terms of the GNU Lesser General Public
>>>> + * License as published by the Free Software Foundation; either
>>>> + * version 2.1 of the License, or (at your option) any later version.
>>>> + *
>>>> + * This library is distributed in the hope that it will be useful,
>>>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>>>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>>>> + * Lesser General Public License for more details.
>>>> + *
>>>> + * You should have received a copy of the GNU Lesser General Public
>>>> + * License along with this library;  If not, see
>>>> + * <http://www.gnu.org/licenses/>.
>>>> + */
>>>> +
>>>> +#include <config.h>
>>>> +
>>>> +#include "log_cleaner.h"
>>>> +#include "log_handler.h"
>>>> +
>>>> +#include "virerror.h"
>>>> +#include "virobject.h"
>>>> +#include "virfile.h"
>>>> +#include "viralloc.h"
>>>> +#include "virlog.h"
>>>> +#include "virrotatingfile.h"
>>>> +#include "virstring.h"
>>>> +
>>>> +#define VIR_FROM_THIS VIR_FROM_LOGGING
>>>> +
>>>> +/* Cleanup log root (/var/log/libvirt) and all subfolders (e.g.
>>>> /var/log/libvirt/qemu) */
>>>> +#define CLEANER_LOG_DEPTH 1
>>>> +#define CLEANER_LOG_TIMEOUT_MS (24 * 3600 * 1000) /* One day */
>>>> +#define MAX_TIME ((time_t) G_MAXINT64)
>>>
>>> Unused
>>>
>>>> +
>>>> +static GRegex *log_regex;
>>>> +
>>>> +typedef struct _virLogCleanerChain virLogCleanerChain;
>>>> +struct _virLogCleanerChain {
>>>> +    int rotated_max_index;
>>>> +    time_t last_modified;
>>>> +};
>>>> +
>>>> +typedef struct _virLogCleanerData virLogCleanerData;
>>>> +struct _virLogCleanerData {
>>>> +    virLogHandler *handler;
>>>> +    time_t oldest_to_keep;
>>>> +    GHashTable *chains;
>>>> +};
>>>> +
>>>> +static const char*
>>>> +virLogCleanerParseFilename(const char* path,
>>>> +                           int* rotated_index)
>>>> +{
>>>> +    g_autoptr(GMatchInfo) matchInfo = NULL;
>>>> +    g_autofree const char* rotated_index_str = NULL;
>>>> +    g_autofree const char *clear_path = NULL;
>>>> +    const char* chain_prefix = NULL;
>>>> +
>>>> +    clear_path = realpath(path, NULL);
>>>> +    if (!clear_path) {
>>>> +        virReportSystemError(errno,
>>>> +                             _("Failed to resolve path '%s'"),
>>>> +                             path);
>>>> +        return NULL;
>>>> +    }
>>>> +
>>>> +    if (!g_regex_match(log_regex, path, 0, &matchInfo)) {
>>>> +        return NULL;
>>>> +    }
>>>> +
>>>> +    chain_prefix = g_match_info_fetch(matchInfo, 1);
>>>> +    if (!rotated_index)
>>>> +        return chain_prefix;
>>>> +
>>>> +    *rotated_index = 0;
>>>> +    rotated_index_str = g_match_info_fetch(matchInfo, 2);
>>>
>>> You could just use 3 instead of 2 here and ...
>>>
>>>> +
>>>> +    if (!rotated_index_str)
>>>> +        return chain_prefix;
>>>> +
>>>> +    if (virStrToLong_i(rotated_index_str+1, NULL, 10, rotated_index) <
>>>> 0) {
>>>
>>> ... avoid this +1 here.
>>>
>>>> +        virReportError(VIR_ERR_INTERNAL_ERROR,
>>>> +                       _("Failed to parse rotated index from '%s'"),
>>>> +                       rotated_index_str);
>>>> +        return NULL;
>>>> +    }
>>>> +    return chain_prefix;
>>>> +}
>>>> +
>>>> +static void
>>>> +virLogCleanerProcessFile(virLogCleanerData* data,
>>>> +                         const char* path,
>>>> +                         struct stat* sb)
>>>
>>> Inconsistency in the pointer declaration (asterisk position).

By the way, should we have it in linters somewhere?

>>>
>>>> +{
>>>> +    int rotated_index = 0;
>>>> +    g_autofree const char *chain_prefix = NULL;
>>>> +    virLogCleanerChain *chain;
>>>> +
>>>> +    if (!S_ISREG(sb->st_mode)) {
>>>> +        return;
>>>> +    }
>>>> +
>>>> +    chain_prefix = virLogCleanerParseFilename(path, &rotated_index);
>>>> +
>>>> +    if (!chain_prefix) {
>>>> +        return;
>>>> +    }
>>>
>>> Here (and in bunch more places) the braces should be avoided.
>>>
>>>> +
>>>> +    if (rotated_index > data->handler->config->max_backups) {
>>>> +        if (unlink(path) < 0) {
>>>> +            virReportSystemError(errno, _("Unable to delete %s"), 
>>>> path);
>>>> +        }
>>>> +        return;
>>>> +    }
>>>> +
>>>> +    chain = g_hash_table_lookup(data->chains, chain_prefix);
>>>> +
>>>> +    if (!chain) {
>>>> +        chain = g_new0(virLogCleanerChain, 1);
>>>> +        g_hash_table_insert(data->chains, (void *) chain_prefix, 
>>>> chain);
>>>> +        chain_prefix = NULL; /* Moved to hash table */
>>>
>>> You can use g_steal_pointer() to collapse these two line >>>
>>>> +    }
>>>> +
>>>> +    chain->last_modified = MAX(chain->last_modified, sb->st_mtime);
>>>
>>> I think you meant MIN here.
>>
>> No, MAX should be used since we are looking for the most recent file. If
>> the chain was written to recent enough, we would skip it during the
>> deletion phase.
>>
> 
> last_modified is set to G_MAXINT64, MAX with anything else int64 will
> always return G_MAXINT64.  If you wanted to do MIN() here, then

That's the desired thing. For opened log files, we want to set 
"the-most-recent-time" aka G_MAXINT64 to prevent its deletion. I've 
added a comment explaining this.

There should be a v3 nearby.

Oleg

> 
> Also I recently learned that MAX is from glibc, so setting it
> conditionally might be a better choice.
> 
>>>
>>>> +    chain->rotated_max_index = MAX(chain->rotated_max_index,
>>>> +                                   rotated_index);
>>>> +}
>>>> +
>>>> +static GHashTable*
>>>> +virLogCleanerCreateTable(virLogHandler *handler)
>>>> +{
>>>> +    /* (const char*) chain_prefix -> (virLogCleanerChain*) chain */
>>>> +    GHashTable *chains = g_hash_table_new_full(g_str_hash, 
>>>> g_str_equal,
>>>> +                                               g_free, g_free);
>>>> +    size_t i;
>>>> +    virLogHandlerLogFile *file;
>>>> +    const char *chain_prefix;
>>>> +    virLogCleanerChain* chain;
>>>> +    VIR_LOCK_GUARD lock = virObjectLockGuard(handler);
>>>> +
>>>
>>> You are locking the handler in an event loop thread, which feels a bit
>>> messy to me.
>>
>> But this is only for this function, which doesn't have any IO or
>> anything heavy.
>>
> 
> That's why I did not say it's unacceptable, it just *feels* dirty.  If
> this was done in a worker job, that would be a different thing.  But I
> do not oppose to this completely.
> 
>> The rest is clear to me, I will fix in a v3.
>>
>> Oleg
>>
>>>
>>>> +    for (i = 0; i < handler->nfiles; i++) {
>>>> +        file = handler->files[i];
>>>> +        chain_prefix =
>>>> virLogCleanerParseFilename(virRotatingFileWriterGetPath(file->file),
>>>> +                                                  NULL);
>>>> +        if (!chain_prefix) {
>>>> +            continue;
>>>> +        }
>>>> +
>>>> +        chain = g_new0(virLogCleanerChain, 1);
>>>> +        chain->last_modified = (time_t) G_MAXINT64;
>>>> +        g_hash_table_insert(chains, (void *) chain_prefix, chain);
>>>> +    }
>>>> +
>>>> +    return chains;
>>>> +}
>>>> +
>>>> +static void
>>>> +virLogCleanerProcessFolder(virLogCleanerData *data,
>>>> +                           const char *path,
>>>> +                           int depth_left)
>>>> +{
>>>> +    DIR *dir;
>>>> +    struct dirent *entry;
>>>> +    struct stat sb;
>>>> +
>>>> +    if (virDirOpenIfExists(&dir, path) < 0)
>>>> +        return;
>>>> +
>>>> +    while (virDirRead(dir, &entry, path) > 0) {
>>>> +        g_autofree char *newpath = NULL;
>>>> +        if (STREQ(entry->d_name, "."))
>>>> +            continue;
>>>> +        if (STREQ(entry->d_name, ".."))
>>>> +            continue;
>>>> +
>>>
>>> These two entries are skipped in virDirRead() already.
>>>
>>>> +        newpath = g_strdup_printf("%s/%s", path, entry->d_name);
>>>> +
>>>> +        if (stat(newpath, &sb) < 0) {
>>>> +            virReportSystemError(errno, _("Unable to stat %s"),
>>>> newpath);
>>>> +            continue;
>>>> +        }
>>>> +
>>>> +        if (S_ISDIR(sb.st_mode)) {
>>>> +            if (depth_left > 0)
>>>> +                virLogCleanerProcessFolder(data, newpath, depth_left
>>>> - 1);
>>>> +            continue;
>>>> +        }
>>>> +
>>>> +        virLogCleanerProcessFile(data, newpath, &sb);
>>>> +    }
>>>> +
>>>> +    virDirClose(dir);
>>>> +}
>>>> +
>>>> +static void
>>>> +virLogCleanerChainCB(gpointer key,
>>>> +                     gpointer value,
>>>> +                     gpointer user_data)
>>>> +{
>>>> +    const char* chain_prefix = key;
>>>> +    virLogCleanerChain* chain = value;
>>>> +    virLogCleanerData* data = user_data;
>>>> +    size_t i;
>>>> +
>>>> +    if (chain->last_modified > data->oldest_to_keep) {
>>>> +        return;
>>>> +    }
>>>> +
>>>> +    for (i = 0; i <= chain->rotated_max_index; i++) {
>>>> +        g_autofree char* path = NULL;
>>>> +        if (i == 0) {
>>>> +            path = g_strdup_printf("%s.log", chain_prefix);
>>>> +        } else {
>>>> +            path = g_strdup_printf("%s.log.%ld", chain_prefix, i);
>>>> +        }
>>>
>>> Here you're not even considering .log.0
>>>
>>>> +        if (unlink(path) < 0) {
>>>
>>> You should also skip ENOENT be here.
>>>
>>>> +            virReportSystemError(errno, _("Unable to delete %s"), 
>>>> path);
>>>
>>> Since you are running this in the event loop thread I'd prefer VIR_WARN
>>> for these kind of things.
>>



More information about the libvir-list mailing list