-
Notifications
You must be signed in to change notification settings - Fork 45
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Stylesheets: implement CSS selectors specificity #170
Conversation
There was already some code to process selectors by ascending order of specificity, but LVCssSelector._specificity was always 0. We now add to it some weight depending of the kind of each LVCssSelectorRule it may include.
crengine/src/lvstsheet.cpp
Outdated
// and don't have a selector here. | ||
switch (_type) | ||
{ | ||
case cssrt_id: // E#id |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Missing an indentation level here /whistles
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I know, and I would have put the {
on the switch
line.
But as it is, it's just similar to the other code just above or below in that file.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I would say don't copy the wrong indentation on the other code. But where?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good here:
crengine/crengine/src/lvstsheet.cpp
Lines 994 to 1016 in 012a175
switch (n1) { | |
case 0: | |
buf<<(lUInt32) css_val_px; | |
buf<<(lUInt32) 1; | |
break; | |
case 1: | |
buf<<(lUInt32) css_val_px; | |
buf<<(lUInt32) 3; | |
break; | |
case 2: | |
buf<<(lUInt32) css_val_px; | |
buf<<(lUInt32) 5; | |
break; | |
case 3: | |
buf<<(lUInt32) css_val_px; | |
buf<<(lUInt32) 3; | |
break; | |
case 4: | |
buf<<(lUInt32) css_val_inherited; | |
buf<<(lUInt32) 0; | |
break; | |
default:break; | |
} |
crengine/crengine/src/lvstsheet.cpp
Lines 1091 to 1096 in 012a175
switch (i) | |
{ | |
case 1: len[1] = len[0]; /* fall through */ | |
case 2: len[2] = len[0]; /* fall through */ | |
case 3: len[3] = len[1]; | |
} |
crengine/crengine/src/lvstsheet.cpp
Lines 1131 to 1136 in 012a175
switch (i) | |
{ | |
case 1: len[1] = len[0]; /* fall through */ | |
case 2: len[2] = len[0]; /* fall through */ | |
case 3: len[3] = len[1]; | |
} |
crengine/crengine/src/lvstsheet.cpp
Lines 1174 to 1212 in 012a175
switch (sum) { | |
case 1: | |
{ | |
buf<<(lUInt32) (prop_code | parse_important(decl)); | |
buf<<(lUInt32) n1; | |
buf<<(lUInt32) n1; | |
buf<<(lUInt32) n1; | |
buf<<(lUInt32) n1; | |
} | |
break; | |
case 2: | |
{ | |
buf<<(lUInt32) (prop_code | parse_important(decl)); | |
buf<<(lUInt32) n1; | |
buf<<(lUInt32) n2; | |
buf<<(lUInt32) n1; | |
buf<<(lUInt32) n2; | |
} | |
break; | |
case 3: | |
{ | |
buf<<(lUInt32) (prop_code | parse_important(decl)); | |
buf<<(lUInt32) n1; | |
buf<<(lUInt32) n2; | |
buf<<(lUInt32) n3; | |
buf<<(lUInt32) n2; | |
} | |
break; | |
case 4: | |
{ | |
buf<<(lUInt32) (prop_code | parse_important(decl)); | |
buf<<(lUInt32) n1; | |
buf<<(lUInt32) n2; | |
buf<<(lUInt32) n3; | |
buf<<(lUInt32) n4; | |
} | |
break; | |
default:break; | |
} |
I could go on, but I can't find what you're talking about. :-P
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
crengine/crengine/src/lvstsheet.cpp
Lines 1720 to 1723 in 012a175
switch (prop_code) | |
{ | |
case cssd_display: | |
style->Apply( (css_display_t) *p++, &style->display, imp_bit_display, is_important ); |
crengine/crengine/src/lvstsheet.cpp
Lines 1953 to 1957 in 012a175
switch (_type) | |
{ | |
case cssrt_parent: // E > F | |
// | |
{ |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Weird. Anyway, both forms are present in the code and we both agree it should be indented so there's nothing to discuss really, is there. :-P
(No need to change the old stuff of course.)
Great work! :-) |
Should I include that too in the not-yet-merged base PR, or wait a day? |
Whichever works for you. Unless you want me to do the bumping, then it'll be tomorrow or the day after. :-P |
OK, done.
|
I think this will need a small fix, outside of LVCssSelectorRule. I forgot to add 1 for the main element (if any): |
Some specificity bug and issue noticed while testing with https://www.brunildo.org/test/IEASpec.html. --- a/crengine/src/lvstsheet.cpp
+++ b/crengine/src/lvstsheet.cpp
@@ -2321,7 +2321,9 @@ lUInt32 LVCssSelectorRule::getWeight() {
case cssrt_predecessor: // E + F
case cssrt_predsibling: // E ~ F
// But not when they don't have an element (_id=0)
- return _id != 0 ? 1 : 0;
+ // return _id != 0 ? 1 : 0;
+ // But we already added it in LVCssSelector::parse()
+ return 0;
break;
case cssrt_universal: // *
return 0; And we have a little issue where we lose the order of CSS selectors. --- a/crengine/src/lvstsheet.cpp
+++ b/crengine/src/lvstsheet.cpp
@@ -3214,19 +3217,39 @@ void LVStyleSheet::apply( const ldomNode * node, css_style_rec_t * style )
LVCssSelector * selector_0 = _selectors[0];
LVCssSelector * selector_id = id>0 && id<_selectors.length() ? _selectors[id] : NULL;
for (;;)
{
if (selector_0!=NULL)
{
- if (selector_id==NULL || selector_0->getSpecificity() < selector_id->getSpecificity() )
+ // Note that by splitting elements in selector_0 and selector_id (for performance reasons,
+ // segmenting the selectors by their start element ID - A, SPAN, P...), we lose a bit
+ // of their ordering in the stylesheet, and we may apply selectors with the same
+ // specificity in a different order than the one in the stylesheet.
+ // e.g with https://www.brunildo.org/test/IEASpec.html
+ // <style type="text/css">
+ // p.c12 { color: red; } /* 0.0.1.1 specificity, goes into selector_id */
+ // div .c14 { color: red; } /* 0.0.1.1 specificity, goes into selector_0 */
+ // div .c12 { color: green; } /* 0.0.1.1 specificity, goes into selector_0 */
+ // p.c14 { color: green; } /* 0.0.1.1 specificity, goes into selector_id */
+ // </style>
+ // <div><p class="c12">el.class (red), el .class (green) (wrong in IE/Win, IE/Mac and Op7-)</p></div>
+ // <div><p class="c14">el .class (red), el.class (green)</p></div>
+ // Both text should be green (because green are specified in late selectors in the CSS, but
+ // depending on whether we use '<' or '<=' in the next line, one of them will be red...
+ // Let's use '<=' so "p.c12" gets applied after "div .c12" (they have the same specificity
+ // per specs, but let's have those with an element last have a little more) - so, we'll
+ // be in the situation of "wrong in IE/Win, IE/Mac and Op7").
+ if (selector_id==NULL || selector_0->getSpecificity() <= selector_id->getSpecificity() )
{
// step by sel_0
selector_0->apply( node, style );
selector_0 = selector_0->getNext();
}
else
{
// step by sel_id
selector_id->apply( node, style );
selector_id = selector_id->getNext();
} We are currently storing the specificity in a lUInt32, the 3 numbers being stored each in a byte. Anything wrong with that, that you could think of ? |
Essentially make the specifity part a bit-field in however space is needed for that? Sounds fine, AFAICT :). |
Yep. // We are storing specificity in a lUint32.
// We also want to store in its lower bits some counter to ensure
// selectors with the same specificity keep the order we've seen
// when parsing them.
// So, apply the real CSS specificity in higher bits, allowing
// for the bellow number of such rules in a single selector
// (we're not checking for overflow thus...)
#define CSS_SPECIFICITY_ID 1<<28 // allow for 8 #id (b in comment below)
#define CSS_SPECIFICITY_ATTR_CLASS 1<<23 // allow for 32 .class and [attr...] (c)
#define CSS_SPECIFICITY_ELEMENT 1<<18 // allow for 32 element names div > p span (d)
// This allows for counting 1<<18 (262144) selectors and storing
// its position in the 18 lower bits (we're not checking for overflow either). We'll be doing stuff like |
Hmm, I'll admit that I was thinking in terms of an actual bitmask (so, one value per flag, and then OR'ing stuff around), but if I get the full picture (which I very well might not be :D), that doesn't quite work here... If, on the other hand, you want to chuck 3 different (and possibly arbitrary) values in a single int, you can let the compiler do the bit twiddling for you with an union: typedef union
{
uint32_t _x;
struct
{
uint8_t id;
uint8_t class;
uint16_t element;
} specificity;
} CSSSpec; Then you can just set/get Unless there's a C++ shenanigan that craps on my parade ;p. EDIT: Except, if I followed, you want 4 elements, in which case that leaves only 8 bits for a counter if everything's made an uint8_t... Perhaps a mix'n match of both approaches, with a bitfield somewhere for some of that stuff in order to have an extra 8 bits available? |
Nope, I don't really need that. For a single selector, i'll be adding multiple weight-parts as I'm parsing the CSS.
According to the specificity rules, that makes a specificity of (1, 2, 3) (a=1, b=2, c=3). Currently, we are storing these 3 numbers in a lUint32, each in a 8bits slot, so allowing in a selector 256 ids + 256 classnames + 256 elements, which is helluvah more than what we might see in classic CSS, even complex ones: I now want to store in that lUint32 the counted number of the selector, its sequence number in the whole set of selectors I'm parsing, incrementing that number when I have added a selector - which can be more than 256 (what's available in that lUint32). And I seem to have my bit maths wrong :|
(earlier this afternoon, 1<<28... was feeling right :) Anyway, :bit fields would might be help with readability (and my maths :), allowing to set individual bit segments, still being able to use the lUint32 as a single int for comparisons. - _specificity += +CSS_SPECIFICITY_ID
+ _specificity.id += 1
- _specificity += +CSS_SPECIFICITY_CLASS_ATTR
+ _specificity.cls_attr += 1 but not strictly necessary, if you just confirm me which of the bin((1<<28) or bin((1<<29) is right :) |
OK, rerechecked again and #define CSS_SPECIFICITY_ID 1<<29 // allow for 8 #id (b in comment below)
#define CSS_SPECIFICITY_ATTR_CLASS 1<<24 // allow for 32 .class and [attr...] (c)
#define CSS_SPECIFICITY_ELEMENT 1<<19 // allow for 32 element names div > p span (d) |
32 - 3 = 29 - 5 = 24 - 5 = 19 2^3 = 8 Napkin maths checks out ;). |
Also, my bad, I said bitfield (twice) earlier when I meant bitmask :D. Although, yeah, GCC bitfields might do the trick in an union, too. Never tried it for anything fancier than faking an uint24_t ;). |
Random PoC: #include <stdio.h>
#include <stdint.h>
typedef union
{
uint32_t specificity;
struct
{
uint8_t id:3;
uint8_t class:5;
uint8_t element:5;
uint32_t count:19;
} spec;
} CSSSpec;
int main(void)
{
CSSSpec test = { 0 };
printf("sizeof(test): %zu\n", sizeof(test));
test.spec.id = 1U;
test.spec.class = 2U;
test.spec.element = 3U;
test.spec.count = 1U;
printf("id: %hhu // class: %hhu // element: %hhu // count: %u // full: %#.8x\n", test.spec.id, test.spec.class, test.spec.element, test.spec.count, test.specificity);
CSSSpec test2 = test;
printf("id: %hhu // class: %hhu // element: %hhu // count: %u // full: %#.8x\n", test2.spec.id, test2.spec.class, test2.spec.element, test2.spec.count, test2.specificity);
CSSSpec test3 = { .specificity = 0x2311 };
printf("id: %hhu // class: %hhu // element: %hhu // count: %u // full: %#.8x\n", test3.spec.id, test3.spec.class, test3.spec.element, test3.spec.count, test3.specificity);
}
Seems to work out okay ;). |
With constants, Clang shouts at you if there's an overflow, but since everything's unsigned, it wraps around:
Runtime behavior should be identical (i.e, wraparound). |
I'd be expecting
(But don't bother, except for fun/science - I'm done with it, having it working by using the #define, and I don't want to change too much of crengine lvstsheet to introduce this type/struct - although it indeed looks nicer :) |
Endianess ;). #include <stdio.h>
#include <stdint.h>
typedef union
{
uint32_t specificity;
struct
{
uint8_t id:3;
uint8_t class:5;
uint8_t element:5;
uint32_t count:19;
} spec;
} CSSSpec;
typedef union
{
uint32_t specificity;
struct
{
uint32_t count:19;
uint8_t element:5;
uint8_t class:5;
uint8_t id:3;
} spec;
} CSSSpec2;
int main(void)
{
CSSSpec test = { 0 };
printf("sizeof(test): %zu\n", sizeof(test));
test.spec.id = 1U;
test.spec.class = 2U;
test.spec.element = 3U;
test.spec.count = 1U;
printf("id: %hhu // class: %hhu // element: %hhu // count: %u // full: %#.8x\n", test.spec.id, test.spec.class, test.spec.element, test.spec.count, test.specificity);
CSSSpec test2 = test;
printf("id: %hhu // class: %hhu // element: %hhu // count: %u // full: %#.8x\n", test2.spec.id, test2.spec.class, test2.spec.element, test2.spec.count, test2.specificity);
CSSSpec test3 = { .specificity = 0x2311 };
printf("id: %hhu // class: %hhu // element: %hhu // count: %u // full: %#.8x\n", test3.spec.id, test3.spec.class, test3.spec.element, test3.spec.count, test3.specificity);
CSSSpec2 test4 = { .spec.id = test.spec.id, .spec.class = test.spec.class, .spec.element = test.spec.element, .spec.count = test.spec.count };
printf("id: %hhu // class: %hhu // element: %hhu // count: %u // full: %#.8x\n", test4.spec.id, test4.spec.class, test4.spec.element, test4.spec.count, test4.specificity);
} sizeof(test): 4
|
Ok :) so may be an argument to not go with that, as I would worry about the order in the bits stuff themselves - I'd fear getting on some archs:
|
Don't worry. Unless you'll be feeding full int constants, that's abstracted for you ;). (i.e., everything will be BE or everything will be LE, but everything will be in the "right" place for the current endianness, you don't actually care about the internal layout unless you fiddle with the full int directly from another endianness). (i.e., a left shift always shifts left, a right shift always shifts right, that's the magic of bit twiddling). |
Generally, you don't actually care about byte-order. That's only a potential issue when you have to deal with a stream of data that may be encoded in another endianness (that generally encompasses dealing with networking stuff at the driver level, or when dealing with file formats, and that's pretty much it?). |
Ok, I get the |
Nope, nothing's crossing endianness here, whichever version you choose is up to you. You don't care about the internal layout ;). (The first one just felt more "natural" to me, since, again, don't care about the internal layout ;p). (FWIW, I'm pretty sure no-one's ever actually run KOReader on a BE machine anyway ^^. The only valid and current example I can think of is IBM's Power9 (i.e., PPC 64), although they technically can run either in BE or LE, they just happen to default to BE for reasons (legacy?)). |
But I do care about the resulting lUInt32 :) 0x21180001 is right, 0x00002311 is wrong :) |
Which then begs the question: why, exactly, do you care about the full int? ;). (I'd expect for stuff to basically check counter if id && class && elem match or something. I can't quite see why you'd want to rely on the full int, except to serialize/deserialize it somewhere in which case endianness won't matter, as everything's done on the same machine). |
Also, more bonus points for GCC bitfields: ARM has a fun set of dedicated bitfield instructions. I'd imagine the compiler has an easier time being convinced to use those when actual bitfields are used instead of manual bit twiddling ;). |
Existing code does :) the whole point is to apply rules by increased specificity order (so, it does that at parsing time, so it does not to do that at each apply time): crengine/crengine/src/lvstsheet.cpp Lines 3343 to 3373 in f204541
Or you'd have to add a lt/gt wrapping methods to your struct so it computes some comparion weight internally - but then, the resulting lUInt32 is just, with no cost induced. |
I was afraid it was going to be something like that ;p. That's admittedly already broken on BE, though, isn't it? |
Not as long as we're using a simple lUInt32, to which we just add numbers (I add the number that 1<<29 gives, I'm not shifting anything) - and I just compare these numbers. |
Hmm, if we're reading an uint, and not raw memory (i.e., a pointer to the first bytes of that uint), endianess doesn't actually matter? I think? My brain hurts >_<". There's also the fact that C++ forbids type-punning, so I'm not sure my C union shenanigans are actually doable in C++... |
No, that works in C++, yay. (Mostly, class is a reserved keyword, duh. And C++ doesn't do designated aggregate list init o_O). #include <iostream>
#include <cstdint>
typedef union
{
uint32_t specificity;
struct
{
uint8_t id:3;
uint8_t cl:5;
uint8_t element:5;
uint32_t count:19;
} spec;
} CSSSpec;
typedef union
{
uint32_t specificity;
struct
{
uint32_t count:19;
uint8_t element:5;
uint8_t cl:5;
uint8_t id:3;
} spec;
} CSSSpec2;
int main(void)
{
CSSSpec test = { 0 };
printf("sizeof(test): %zu\n", sizeof(test));
test.spec.id = 1U;
test.spec.cl = 2U;
test.spec.element = 3U;
test.spec.count = 1U;
printf("id: %hhu // class: %hhu // element: %hhu // count: %u // full: %#.8x\n", test.spec.id, test.spec.cl, test.spec.element, test.spec.count, test.specificity);
CSSSpec test2 = test;
printf("id: %hhu // class: %hhu // element: %hhu // count: %u // full: %#.8x\n", test2.spec.id, test2.spec.cl, test2.spec.element, test2.spec.count, test2.specificity);
CSSSpec test3 = { .specificity = 0x2311 };
printf("id: %hhu // class: %hhu // element: %hhu // count: %u // full: %#.8x\n", test3.spec.id, test3.spec.cl, test3.spec.element, test3.spec.count, test3.specificity);
CSSSpec2 test4 = { 0 };
test4.spec.id = test.spec.id;
test4.spec.cl = test.spec.cl;
test4.spec.element = test.spec.element;
test4.spec.count = test.spec.count;
printf("id: %hhu // class: %hhu // element: %hhu // count: %u // full: %#.8x\n", test4.spec.id, test4.spec.cl, test4.spec.element, test4.spec.count, test4.specificity);
// Mind twist!
char *pointer = (char *) &test.specificity;
printf("full (in memory, byte by byte): 0x");
for (int32_t i = 0; i < 4; i++)
{
printf("%02x", (uint32_t) pointer[i]);
}
printf("\n");
} With the LE mind-twist:
|
But we use/access never a subset of that lUInt32. We always use it as full lUInt32 (a 4-bytes single integer). |
Then I'm pretty sure endianness is either irrelevant or already broken ;). My vote would be irrelevant :). |
There was already some code to process selectors by ascending order of specificity, but LVCssSelector._specificity was always 0.
We now add to it some weight depending of the kind of each LVCssSelectorRule it may include.
See #167 (comment)
I haven't tested that much nor looked much at the selector order navigation, I just added these simple rules and the 2 test cases in koreader/koreader#2841 seem ok:
Before => after
=>
=>