rpms/beagle/F-8 beagle-0.2.18-doc-extractor.patch, NONE, 1.1 beagle.spec, 1.107, 1.108
Adel Gadllah (drago01)
fedora-extras-commits at redhat.com
Mon Feb 11 08:46:33 UTC 2008
Author: drago01
Update of /cvs/pkgs/rpms/beagle/F-8
In directory cvs-int.fedora.redhat.com:/tmp/cvs-serv13761
Modified Files:
beagle.spec
Added Files:
beagle-0.2.18-doc-extractor.patch
Log Message:
Replace doc-extractor fix with upstream version
beagle-0.2.18-doc-extractor.patch:
--- NEW FILE beagle-0.2.18-doc-extractor.patch ---
diff -upNr beagle-0.2.18.orign/Filters/FilterDOC.cs beagle-0.2.18/Filters/FilterDOC.cs
--- beagle-0.2.18.orign/Filters/FilterDOC.cs 2007-02-07 20:04:18.000000000 +0100
+++ beagle-0.2.18/Filters/FilterDOC.cs 2008-02-11 09:39:29.000000000 +0100
@@ -100,6 +100,7 @@ namespace Beagle.Filters {
pc.Arguments = new string [] { exe, FileInfo.FullName };
pc.RedirectStandardOutput = true;
pc.RedirectStandardError = true;
+ pc.UseLangC = true;
// Let beagle-doc-extractor run for 90 CPU seconds, max.
pc.CpuLimit = 90;
diff -upNr beagle-0.2.18.orign/Filters/FilterDOC.cs.orig beagle-0.2.18/Filters/FilterDOC.cs.orig
--- beagle-0.2.18.orign/Filters/FilterDOC.cs.orig 1970-01-01 01:00:00.000000000 +0100
+++ beagle-0.2.18/Filters/FilterDOC.cs.orig 2007-02-07 20:04:18.000000000 +0100
@@ -0,0 +1,163 @@
+//
+// FilterDOC.cs: MS Word filter. Uses an external extractor utility to
+// actually get the text.
+//
+// Copyright (C) 2004-2007 Novell, Inc.
+//
+
+//
+// Permission is hereby granted, free of charge, to any person obtaining a
+// copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+// DEALINGS IN THE SOFTWARE.
+//
+
+using System;
+using System.IO;
+using System.Runtime.InteropServices;
+
+using Beagle.Util;
+using Beagle.Daemon;
+
+using Gsf;
+
+namespace Beagle.Filters {
+
+ public class FilterDOC : FilterOle {
+
+ //////////////////////////////////////////////////////////
+
+ public FilterDOC ()
+ {
+ SnippetMode = true;
+ }
+
+ protected override void RegisterSupportedTypes ()
+ {
+ AddSupportedFlavor (FilterFlavor.NewFromMimeType ("application/msword"));
+ AddSupportedFlavor (FilterFlavor.NewFromMimeType ("application/vnd.ms-word"));
+ AddSupportedFlavor (FilterFlavor.NewFromMimeType ("application/x-msword"));
+ }
+
+ override protected void OpenStorage (FileInfo info)
+ {
+ FileName = info.FullName;
+ }
+
+ override protected void ExtractMetaData (Gsf.Input sumStream, Gsf.Input docSumStream)
+ {
+ int count = 0;
+ DocProp prop = null;
+
+ if (sumMeta != null) {
+ prop = sumMeta.Lookup ("gsf:word-count");
+ if (prop != null)
+ count = (int) prop.Val;
+ if (count > 0)
+ AddProperty (Beagle.Property.NewUnsearched ("fixme:word-count", count));
+
+ count = 0;
+ prop = sumMeta.Lookup ("gsf:page-count");
+ if (prop != null)
+ count = (int) prop.Val;
+ if (count > 0)
+ AddProperty (Beagle.Property.NewUnsearched ("fixme:page-count", count));
+ }
+ }
+
+ private bool pull_started = false;
+ private SafeProcess pc;
+ private StreamReader pout;
+
+ private bool RunExtractor ()
+ {
+ string extractor_path, exe;
+
+ // Hack, along with magic in beagled-index-helper.in
+ // and tools/wrapper.in to make this work in the
+ // uninstalled case.
+ extractor_path = Environment.GetEnvironmentVariable ("BEAGLE_TOOL_PATH");
+
+ if (extractor_path != null)
+ exe = Path.Combine (extractor_path, "beagle-doc-extractor");
+ else
+ exe = "beagle-doc-extractor";
+
+ pc = new SafeProcess ();
+ pc.Arguments = new string [] { exe, FileInfo.FullName };
+ pc.RedirectStandardOutput = true;
+ pc.RedirectStandardError = true;
+
+ // Let beagle-doc-extractor run for 90 CPU seconds, max.
+ pc.CpuLimit = 90;
+
+ // Some documents make wv1 go crazy with memory. Limit
+ // it to 100 megs of address space, too.
+ pc.MemLimit = 100*1024*1024;
+
+ try {
+ pc.Start ();
+ } catch (SafeProcessException e) {
+ Log.Warn (e);
+ Error ();
+ return false;
+ }
+
+ pout = new StreamReader (pc.StandardOutput);
+ pull_started = true;
+
+ return true;
+ }
+
+ override protected void DoPull ()
+ {
+ // RunExtractor() calls Error() if it fails
+ if (! pull_started && ! RunExtractor ())
+ return;
+
+ string line = pout.ReadLine ();
+ if (line == null) {
+ Finished ();
+ return;
+ }
+
+ if (line.StartsWith ("**BREAK**"))
+ AppendStructuralBreak ();
+ else if (line.StartsWith ("**HOT**")) {
+ string l = line.Substring (7);
+ AppendText (l, l);
+ } else
+ AppendText (line);
+ }
+
+ override protected void DoClose ()
+ {
+ if (! pull_started)
+ return;
+
+ pout.Close ();
+
+ pout = new StreamReader (pc.StandardError);
+
+ string line;
+ while ((line = pout.ReadLine ()) != null)
+ Log.Warn ("doc extractor [{0}]: {1}", Uri, line);
+
+ pout.Close ();
+ pc.Close ();
+ }
+ }
+}
diff -upNr beagle-0.2.18.orign/Util/SafeProcess.cs beagle-0.2.18/Util/SafeProcess.cs
--- beagle-0.2.18.orign/Util/SafeProcess.cs 2007-02-07 20:04:12.000000000 +0100
+++ beagle-0.2.18/Util/SafeProcess.cs 2008-02-11 09:39:29.000000000 +0100
@@ -26,6 +26,7 @@
using System;
using System.IO;
+using System.Collections;
using System.Runtime.InteropServices;
using Mono.Unix;
using GLib;
@@ -39,6 +40,7 @@ namespace Beagle.Util {
private UnixStream stdin_stream, stdout_stream, stderr_stream;
private int pid;
private int cpu_limit, mem_limit;
+ private bool use_lang_c = false;
public string[] Arguments {
get { return args; }
@@ -86,8 +88,14 @@ namespace Beagle.Util {
set { mem_limit = value; }
}
+ public bool UseLangC {
+ get { return use_lang_c; }
+ set { use_lang_c = value; }
+ }
+
[DllImport ("libbeagleglue")]
static extern void spawn_async_with_pipes_and_limits (string[] argv,
+ string[] env,
int cpu_limit,
int mem_limit,
out int pid,
@@ -110,6 +118,23 @@ namespace Beagle.Util {
args = tmp_args;
}
+ // If LANG=C needs to be specified, then
+ // copy the parents environment variable
+ // and appand LANG=C to it.
+ // Make sure to null-terminate the env array.
+ string[] env = null;
+ if (use_lang_c) {
+ IDictionary env_dict = Environment.GetEnvironmentVariables ();
+ env = new string [env_dict.Count + 2];
+ int count = 0;
+ foreach (DictionaryEntry entry in env_dict)
+ if (entry.Key != "LANG")
+ env [count ++] = String.Concat (entry.Key, "=", entry.Value);
+
+ env [count ++] = "LANG=C";
+ env [count] = null;
+ }
+
IntPtr in_ptr = IntPtr.Zero, out_ptr = IntPtr.Zero, err_ptr = IntPtr.Zero;
try {
@@ -123,6 +148,7 @@ namespace Beagle.Util {
err_ptr = Marshal.AllocHGlobal (IntPtr.Size);
spawn_async_with_pipes_and_limits (args,
+ env,
cpu_limit,
mem_limit,
out pid,
@@ -179,4 +205,4 @@ namespace Beagle.Util {
internal SafeProcessException (GException gexception) : base (gexception.Message) { }
}
-}
\ No newline at end of file
+}
diff -upNr beagle-0.2.18.orign/glue/spawn-glue.c beagle-0.2.18/glue/spawn-glue.c
--- beagle-0.2.18.orign/glue/spawn-glue.c 2007-05-08 19:57:43.000000000 +0200
+++ beagle-0.2.18/glue/spawn-glue.c 2008-02-11 09:39:29.000000000 +0100
@@ -54,6 +54,7 @@ static void limit_setup_func (gpointer u
void
spawn_async_with_pipes_and_limits (char **argv,
+ char **envp,
int cpu_limit,
int mem_limit,
GPid *child_pid,
@@ -77,7 +78,7 @@ spawn_async_with_pipes_and_limits (char
g_spawn_async_with_pipes (NULL,
argv,
- NULL,
+ envp,
flag,
limit_setup_func,
&info,
Index: beagle.spec
===================================================================
RCS file: /cvs/pkgs/rpms/beagle/F-8/beagle.spec,v
retrieving revision 1.107
retrieving revision 1.108
diff -u -r1.107 -r1.108
--- beagle.spec 14 Jan 2008 21:37:45 -0000 1.107
+++ beagle.spec 11 Feb 2008 08:45:19 -0000 1.108
@@ -1,11 +1,11 @@
Name: beagle
Version: 0.2.18
-Release: 4%{?dist}
+Release: 5%{?dist}
Summary: The Beagle Search Infrastructure
Group: User Interface/Desktops
# see COPYING for details
License: ASL 2.0 and MIT and BSD and CC-BY and LGPLv2+ and (AFL or LPGLv2+)
-URL: http://beagle-project.org/
+URL: http://beagle-project.org/
Source0: http://download.gnome.org/sources/beagle/0.2/%{name}-%{version}.tar.bz2
BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n)
@@ -46,7 +46,7 @@
Patch5: beagle-0.2.15.1-runuser.patch
Patch6: beagle-0.2.15.1-libdir.patch
#http://bugzilla.gnome.org/show_bug.cgi?id=509487
-Patch7: beagle-0.2.18-increase-doc-memorylimit.patch
+Patch7: beagle-0.2.18-doc-extractor.patch
# Mono only available on these:
ExclusiveArch: %ix86 x86_64 ppc ia64 armv4l sparc alpha
# no mono on s390 for now: s390 s390x
@@ -113,7 +113,7 @@
%patch4 -p1 -b .beagleidx
%patch5 -p1 -b .runuser
%patch6 -p1 -b .libdir
-%patch7 -p1 -b .docmemory
+%patch7 -p1 -b .doc
%build
@@ -337,6 +337,9 @@
%{_libdir}/python*/site-packages/beagle.so
%changelog
+* Mon Feb 11 2008 Adel Gadllah <adel.gadllah at gmail.com> - 0.2.18-5
+- Replace doc-extractor fix with upstream version (GNOME #509487)
+
* Mon Jan 14 2008 Adel Gadllah <adel.gadllah at gmail.com> - 0.2.18-4
- Increase memorylimit for doc extractor (GNOME #509487)
More information about the fedora-extras-commits
mailing list