[augeas-devel] [PATCH] Generic square lens

Francis Giraldeau francis.giraldeau at gmail.com
Tue Aug 7 10:59:42 UTC 2012


Le 2012-08-07 10:06, Raphaël Pinson a écrit :
> This looks great!
> 
> 
> Here are other tests to add for square quotes:

Thanks for this feedback! Make sens, I can add them to the patch.

> 
>      (* Test quotes *)   
>      let label_quotes =
>             let k = label "quote" in
>              let q = del /["']/ "\"" in
>             let s = store /.*/ in
>              [ k . square q s q ]
> 
>      test label_quotes get "'value'" = { "quote" = "value" }
>      test label_quotes get "\"value\"" = { "quote" = "value" }
> 
>      let square_quotes =
>              let k = key /[A-Za-z0-9]+/ . del /[ \t]*=[ \t]*/ " = " in
>              let q = del /["']/ "\"" in
>             let s = store /[A-Za-z0-9]+/ in
>              [ k . square q s q ]
> 
>      test square_quotes get "key = \"value\"" = { "key" = "value" }
>      test square_quotes get "key = 'value'" = { "key" = "value" }
>      (* quote mismatch *)
>      test square_quotes get "key = \"value'" = *
> 
> 
> I'm surprised that these don't cause ambiguities:
> 
> 
>      (* quotes in value *)
>      test square_quotes get "key = \"va'lue\"" = { "key" = "va'lue" }
>      test square_quotes get "key = 'va\"lue'" = { "key" = "va\"lue" }

There is no ambiguity here because the quoted string must start and end
with a quote. There is no overlap between the concatenated lenses. This
would not be the case if there would be a variable number of quotes,
like this:

module Bug =

let ambig =
        let k = key /[a-z]*/ in
        let q = del /[x]*/ "x" in
        [ square q k q ]

test ambig get "xxax" = ?

exception: ambiguous concatenation
      First regexp: /[x]*/
      Second regexp: /[a-z]*/
      'x' can be split into
      '|=|x'

     and
      'x|=|'


> 
> 
> especially these, which pass (should they?):
> 
>      test square_quotes get "key = \"va\"lue\"" = { "key" = "va\"lue" }
>      test square_quotes get "key = 'va'lue'" = { "key" = "va'lue" }

No, they should be refused, not because of Augeas, but because the XML
standard says:

  Literal data is any quoted string not containing the quotation mark
  used as a delimiter for that string. [1]

So, how to handle it and avoid ambiguities? Say we stay close to the XML
grammar. There are two regexp for AttValue, a union for single or double
quotes.

let del1 = dels "'"
let del2 = dels "\""
let att1 = store /[^']*/
let att2 = store /[^\"]*/
let att_value =
[ square del1 att1 del1 ] | [ square del2 att2 del2 ]

A problem occurs for the put direction, because both att_single and
att_double are matching strings without single AND without double
quotes. That means there is an ambiguity: should we use single or double
quotes by default when creating new attributes that do not contains any
quotes? One solution is to exclude explicitly strings without quotes
from one set and make it it's own union. See the lens attached.

>    
> I'm wondering now if it actually solves the problem with xml.aug's
> quotes, because xml.aug has constructs like:
> 
>     [ label "#literal" . sep_spc . sto_quote ]*
> 
> and this generates a concatenation ambiguity:
> 
>      let iter_quotes = label_quotes*
> 

I think we can find a solution to these one. Each literal isn't
separated by a space?

Cheers,

Francis Giraldeau

[1] http://www.w3.org/TR/REC-xml/
-------------- next part --------------
module Quotes =

let dels (s:string) = del s s

let del1 = dels "'"
let del2 = dels "\""
let del3 = del /['\"]/ "\""

let att1 = key /[^'\/]*[\"][^'\/]*/
let att2 = key /[^\"\/]*['][^\"\/]*/
let att3 = key /[^'\"\/]*/

let att_value =
[ square del1 att1 del1 ] |
[ square del2 att2 del2 ] |
[ square del3 att3 del3 ]

(* Put ambiguity with empty string is raised
   because atype is not complete at the
   typecheck time on the union, must have a
   complete subtree
let att_value = 
[ square del1 att1 del1 |
  square del2 att2 del2 |
  square del3 att3 del3 ]
*)

(* well formed values *)
test att_value get "\"value\"" = { "value" }
test att_value get "'value'" = { "value" }
test att_value get "'va\"lue'" = { "va\"lue" }
test att_value get "\"va'lue\"" = { "va'lue" }

(* illegal as per the XML standard *)
test att_value get "\"va\"lue\"" = *
test att_value get "'va'lue'" = *

(* malformed values *)
test att_value get "\"value'" = *
test att_value get "'value\"" = *


-------------- next part --------------
A non-text attachment was scrubbed...
Name: smime.p7s
Type: application/pkcs7-signature
Size: 4489 bytes
Desc: Signature cryptographique S/MIME
URL: <http://listman.redhat.com/archives/augeas-devel/attachments/20120807/258425b7/attachment.p7s>


More information about the augeas-devel mailing list