rpms/beagle/F-8 beagle-0.2.18-doc-extractor.patch, NONE, 1.1 beagle.spec, 1.107, 1.108

Adel Gadllah (drago01) fedora-extras-commits at redhat.com
Mon Feb 11 08:46:33 UTC 2008


Author: drago01

Update of /cvs/pkgs/rpms/beagle/F-8
In directory cvs-int.fedora.redhat.com:/tmp/cvs-serv13761

Modified Files:
	beagle.spec 
Added Files:
	beagle-0.2.18-doc-extractor.patch 
Log Message:
Replace doc-extractor fix with upstream version

beagle-0.2.18-doc-extractor.patch:

--- NEW FILE beagle-0.2.18-doc-extractor.patch ---
diff -upNr beagle-0.2.18.orign/Filters/FilterDOC.cs beagle-0.2.18/Filters/FilterDOC.cs
--- beagle-0.2.18.orign/Filters/FilterDOC.cs	2007-02-07 20:04:18.000000000 +0100
+++ beagle-0.2.18/Filters/FilterDOC.cs	2008-02-11 09:39:29.000000000 +0100
@@ -100,6 +100,7 @@ namespace Beagle.Filters {
 			pc.Arguments = new string [] { exe, FileInfo.FullName };
 			pc.RedirectStandardOutput = true;
 			pc.RedirectStandardError = true;
+			pc.UseLangC = true;
 
 			// Let beagle-doc-extractor run for 90 CPU seconds, max.
 			pc.CpuLimit = 90;
diff -upNr beagle-0.2.18.orign/Filters/FilterDOC.cs.orig beagle-0.2.18/Filters/FilterDOC.cs.orig
--- beagle-0.2.18.orign/Filters/FilterDOC.cs.orig	1970-01-01 01:00:00.000000000 +0100
+++ beagle-0.2.18/Filters/FilterDOC.cs.orig	2007-02-07 20:04:18.000000000 +0100
@@ -0,0 +1,163 @@
+//
+// FilterDOC.cs: MS Word filter.  Uses an external extractor utility to
+//               actually get the text.
+//
+// Copyright (C) 2004-2007 Novell, Inc.
+// 
+
+//
+// Permission is hereby granted, free of charge, to any person obtaining a
+// copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+// DEALINGS IN THE SOFTWARE.
+//
+
+using System;
+using System.IO;
+using System.Runtime.InteropServices;
+
+using Beagle.Util;
+using Beagle.Daemon;
+
+using Gsf;
+
+namespace Beagle.Filters {
+    
+	public class FilterDOC : FilterOle {
+
+		//////////////////////////////////////////////////////////
+
+		public FilterDOC () 
+		{
+			SnippetMode = true;
+		}
+
+		protected override void RegisterSupportedTypes ()
+		{
+			AddSupportedFlavor (FilterFlavor.NewFromMimeType ("application/msword"));
+			AddSupportedFlavor (FilterFlavor.NewFromMimeType ("application/vnd.ms-word"));
+			AddSupportedFlavor (FilterFlavor.NewFromMimeType ("application/x-msword"));
+		}
+		
+		override protected void OpenStorage (FileInfo info)
+		{
+			FileName = info.FullName;
+		}
+
+		override protected void ExtractMetaData (Gsf.Input sumStream, Gsf.Input docSumStream)
+		{
+			int count = 0;
+			DocProp prop = null;
+
+			if (sumMeta != null) {
+				prop = sumMeta.Lookup ("gsf:word-count");
+				if (prop != null)
+					count = (int) prop.Val;
+				if (count > 0)
+					AddProperty (Beagle.Property.NewUnsearched ("fixme:word-count", count));
+
+				count = 0;
+				prop = sumMeta.Lookup ("gsf:page-count");		
+				if (prop != null)
+					count = (int) prop.Val;
+				if (count > 0)
+					AddProperty (Beagle.Property.NewUnsearched ("fixme:page-count", count));
+			}
+		}
+
+		private bool pull_started = false;
+		private SafeProcess pc;
+		private StreamReader pout;
+
+		private bool RunExtractor ()
+		{
+			string extractor_path, exe;
+
+			// Hack, along with magic in beagled-index-helper.in
+			// and tools/wrapper.in to make this work in the
+			// uninstalled case.
+			extractor_path = Environment.GetEnvironmentVariable ("BEAGLE_TOOL_PATH");
+
+			if (extractor_path != null)
+				exe = Path.Combine (extractor_path, "beagle-doc-extractor");
+			else
+				exe = "beagle-doc-extractor";
+
+			pc = new SafeProcess ();
+			pc.Arguments = new string [] { exe, FileInfo.FullName };
+			pc.RedirectStandardOutput = true;
+			pc.RedirectStandardError = true;
+
+			// Let beagle-doc-extractor run for 90 CPU seconds, max.
+			pc.CpuLimit = 90;
+
+			// Some documents make wv1 go crazy with memory.  Limit
+			// it to 100 megs of address space, too.
+			pc.MemLimit = 100*1024*1024;
+
+			try {
+				pc.Start ();
+			} catch (SafeProcessException e) {
+				Log.Warn (e);
+				Error ();
+				return false;
+			}
+
+			pout = new StreamReader (pc.StandardOutput);
+			pull_started = true;
+
+			return true;
+		}
+
+		override protected void DoPull ()
+		{
+			// RunExtractor() calls Error() if it fails
+			if (! pull_started && ! RunExtractor ())
+				return;
+
+			string line = pout.ReadLine ();
+			if (line == null) {
+				Finished ();
+				return;
+			}
+
+			if (line.StartsWith ("**BREAK**"))
+				AppendStructuralBreak ();
+			else if (line.StartsWith ("**HOT**")) {
+				string l = line.Substring (7);
+				AppendText (l, l);
+			} else
+				AppendText (line);
+		}
+
+		override protected void DoClose ()
+		{
+			if (! pull_started)
+				return;
+
+			pout.Close ();
+
+			pout = new StreamReader (pc.StandardError);
+			
+			string line;
+			while ((line = pout.ReadLine ()) != null)
+				Log.Warn ("doc extractor [{0}]: {1}", Uri, line);
+
+			pout.Close ();
+			pc.Close ();
+		}
+	}
+}
diff -upNr beagle-0.2.18.orign/Util/SafeProcess.cs beagle-0.2.18/Util/SafeProcess.cs
--- beagle-0.2.18.orign/Util/SafeProcess.cs	2007-02-07 20:04:12.000000000 +0100
+++ beagle-0.2.18/Util/SafeProcess.cs	2008-02-11 09:39:29.000000000 +0100
@@ -26,6 +26,7 @@
 
 using System;
 using System.IO;
+using System.Collections;
 using System.Runtime.InteropServices;
 using Mono.Unix;
 using GLib;
@@ -39,6 +40,7 @@ namespace Beagle.Util {
 		private UnixStream stdin_stream, stdout_stream, stderr_stream;
 		private int pid;
 		private int cpu_limit, mem_limit;
+		private bool use_lang_c = false;
 
 		public string[] Arguments {
 			get { return args; }
@@ -86,8 +88,14 @@ namespace Beagle.Util {
 			set { mem_limit = value; }
 		}
 
+		public bool UseLangC {
+			get { return use_lang_c; }
+			set { use_lang_c = value; }
+		}
+
 		[DllImport ("libbeagleglue")]
 		static extern void spawn_async_with_pipes_and_limits (string[] argv,
+								      string[] env,
 								      int cpu_limit,
 								      int mem_limit,
 								      out int pid,
@@ -110,6 +118,23 @@ namespace Beagle.Util {
 				args = tmp_args;
 			}
 
+			// If LANG=C needs to be specified, then
+			// copy the parents environment variable
+			// and appand LANG=C to it.
+			// Make sure to null-terminate the env array.
+			string[] env = null;
+			if (use_lang_c) {
+				IDictionary env_dict = Environment.GetEnvironmentVariables ();
+				env = new string [env_dict.Count + 2];
+				int count = 0;
+				foreach (DictionaryEntry entry in env_dict)
+					if (entry.Key != "LANG")
+						env [count ++] = String.Concat (entry.Key, "=", entry.Value);
+
+				env [count ++] = "LANG=C";
+				env [count] = null;
+			}
+
 			IntPtr in_ptr = IntPtr.Zero, out_ptr = IntPtr.Zero, err_ptr = IntPtr.Zero;
 
 			try {
@@ -123,6 +148,7 @@ namespace Beagle.Util {
 					err_ptr = Marshal.AllocHGlobal (IntPtr.Size);
 
 				spawn_async_with_pipes_and_limits (args,
+								   env,
 								   cpu_limit,
 								   mem_limit,
 								   out pid,
@@ -179,4 +205,4 @@ namespace Beagle.Util {
 		internal SafeProcessException (GException gexception) : base (gexception.Message) { }
 	}
 			
-}
\ No newline at end of file
+}
diff -upNr beagle-0.2.18.orign/glue/spawn-glue.c beagle-0.2.18/glue/spawn-glue.c
--- beagle-0.2.18.orign/glue/spawn-glue.c	2007-05-08 19:57:43.000000000 +0200
+++ beagle-0.2.18/glue/spawn-glue.c	2008-02-11 09:39:29.000000000 +0100
@@ -54,6 +54,7 @@ static void limit_setup_func (gpointer u
 
 void
 spawn_async_with_pipes_and_limits (char   **argv,
+				   char   **envp,
 				   int      cpu_limit,
 				   int      mem_limit,
 				   GPid    *child_pid,
@@ -77,7 +78,7 @@ spawn_async_with_pipes_and_limits (char 
 
 	g_spawn_async_with_pipes (NULL,
 				  argv,
-				  NULL,
+				  envp,
 				  flag,
 				  limit_setup_func,
 				  &info,


Index: beagle.spec
===================================================================
RCS file: /cvs/pkgs/rpms/beagle/F-8/beagle.spec,v
retrieving revision 1.107
retrieving revision 1.108
diff -u -r1.107 -r1.108
--- beagle.spec	14 Jan 2008 21:37:45 -0000	1.107
+++ beagle.spec	11 Feb 2008 08:45:19 -0000	1.108
@@ -1,11 +1,11 @@
 Name:           beagle
 Version:        0.2.18
-Release:        4%{?dist}
+Release:        5%{?dist}
 Summary:        The Beagle Search Infrastructure
 Group:          User Interface/Desktops
 # see COPYING for details
 License:        ASL 2.0 and MIT and BSD and CC-BY and LGPLv2+ and (AFL or LPGLv2+)
-URL:            http://beagle-project.org/
+URL:		 http://beagle-project.org/
 Source0:        http://download.gnome.org/sources/beagle/0.2/%{name}-%{version}.tar.bz2
 BuildRoot:      %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n)
 
@@ -46,7 +46,7 @@
 Patch5: beagle-0.2.15.1-runuser.patch
 Patch6: beagle-0.2.15.1-libdir.patch
 #http://bugzilla.gnome.org/show_bug.cgi?id=509487
-Patch7: beagle-0.2.18-increase-doc-memorylimit.patch
+Patch7: beagle-0.2.18-doc-extractor.patch
 # Mono only available on these:
 ExclusiveArch: %ix86 x86_64 ppc ia64 armv4l sparc alpha
 # no mono on s390 for now: s390 s390x 
@@ -113,7 +113,7 @@
 %patch4 -p1 -b .beagleidx
 %patch5 -p1 -b .runuser
 %patch6 -p1 -b .libdir
-%patch7 -p1 -b .docmemory
+%patch7 -p1 -b .doc
 
 
 %build
@@ -337,6 +337,9 @@
 %{_libdir}/python*/site-packages/beagle.so
 
 %changelog
+* Mon Feb 11 2008 Adel Gadllah <adel.gadllah at gmail.com> - 0.2.18-5
+- Replace doc-extractor fix with upstream version (GNOME #509487)
+
 * Mon Jan 14 2008 Adel Gadllah <adel.gadllah at gmail.com> - 0.2.18-4
 - Increase memorylimit for doc extractor (GNOME #509487)
 




More information about the fedora-extras-commits mailing list