Строка 1: |
Строка 1: |
| local sort, concat, clone = table.sort, table.concat, mw.clone | | local sort, concat, clone = table.sort, table.concat, mw.clone |
− | local match = mw.ustring.match | + | local match, gmatch, gsub = mw.ustring.match, mw.ustring.gmatch, mw.ustring.gsub |
| local lpeg = lpeg | | local lpeg = lpeg |
| local P, S, V, C, Cg, Cb, Ct, Cf, Cmt = lpeg.P, lpeg.S, lpeg.V, lpeg.C, lpeg.Cg, lpeg.Cb, lpeg.Ct, lpeg.Cf, lpeg.Cmt | | local P, S, V, C, Cg, Cb, Ct, Cf, Cmt = lpeg.P, lpeg.S, lpeg.V, lpeg.C, lpeg.Cg, lpeg.Cb, lpeg.Ct, lpeg.Cf, lpeg.Cmt |
Строка 79: |
Строка 79: |
| end | | end |
| | | |
− | -- If there is only one chilf node, and it can be safely assimilated into the parent node: | + | -- Allowed CSS attributes: |
| + | local css_whitelist = to_set { 'float', 'text-align', 'vertical-align' } |
| + | |
| + | -- Sanitise CSS here: |
| + | local single, double = P"'", P'"' |
| + | local style = (P'style' * spaces * equals * spaces * ( |
| + | single * C ((any - single + backslash * single / "'") ^ 1) * single |
| + | + double * C ((any - double + backslash * double / '"') ^ 1) * double |
| + | )) / function (css) |
| + | local sanitised = {} |
| + | for attr, value in gmatch (css, '(%w+)%s*:%s*([^;]+)') do |
| + | if css_whitelist [attr] then |
| + | sanitised [attr] = value |
| + | end |
| + | end |
| + | local serialised = {} |
| + | for attr, value in pairs (sanitised) do |
| + | serialised [#serialised + 1] = attr .. ': ' .. value |
| + | end |
| + | return concat (serialised, '; ') |
| + | end / function (css) |
| + | if css == '' then |
| + | return nil |
| + | else |
| + | return { name = 'style', value = css } |
| + | end |
| + | end |
| + | |
| + | -- If there is only one child node, and it can be safely assimilated into the parent node: |
| local function assimilate (node) | | local function assimilate (node) |
| if #node == 0 or #node > 1 then | | if #node == 0 or #node > 1 then |
Строка 85: |
Строка 113: |
| end | | end |
| local child = clone (node [1]) | | local child = clone (node [1]) |
− | if type (child) == 'string' or table_set [node.__name] and not table_set [child.__name] or table_set [child.__name] then | + | if type (child) == 'string' or table_set [child.__name] then |
| return node | | return node |
| end | | end |
Строка 100: |
Строка 128: |
| return node | | return node |
| end | | end |
− | | + | |
| + | -- Serialise a parsed tag back: |
| + | local function serialise (tag) |
| + | if type (tag) == 'string' then |
| + | return tag |
| + | end |
| + | local serialised = { '<' .. tag.__name } |
| + | for attr, value in pairs (tag) do |
| + | if type (attr) ~= 'number' and attr ~= '__name' then |
| + | serialised [#serialised + 1] = attr .. '="' .. gsub (value, '"', '\\"', 1, true) .. '"' |
| + | end |
| + | end |
| + | serialised [#serialised + 1] = '>' |
| + | for _, node in ipairs (tag) do |
| + | serialised [#serialised + 1] = serialise (node) |
| + | end |
| + | serialised [#serialised + 1] = '</' .. tag.__name .. '>' |
| + | return concat (serialised, ' ') |
| + | end |
| + | |
| local function quoted (quote) | | local function quoted (quote) |
| return P (quote) * Cg ((any - quote + backslash * quote / quote) ^ 0, 'value') * P (quote) | | return P (quote) * Cg ((any - quote + backslash * quote / quote) ^ 0, 'value') * P (quote) |
| end | | end |
− |
| + | |
| local grammar = P { V'fragment' * -1, | | local grammar = P { V'fragment' * -1, |
| fragment = (V'tag' + C (V'char' ^ 1)) ^ 0, | | fragment = (V'tag' + C (V'char' ^ 1)) ^ 0, |
| tag = Ct (V'open' * V'fragment' * V'close' + V'void') / function (tbl) | | tag = Ct (V'open' * V'fragment' * V'close' + V'void') / function (tbl) |
− | local tag = { __name = tbl.__name } | + | local tag = { __name = tbl.__name } --setmetatable ({ __name = tbl.__name }, { __tostring = serialise }) |
| for _, attr in ipairs (tbl.__attrs) do | | for _, attr in ipairs (tbl.__attrs) do |
| tag [attr.name] = attr.value | | tag [attr.name] = attr.value |
| end | | end |
| for _, node in ipairs (tbl) do | | for _, node in ipairs (tbl) do |
− | tag [#tag], tag [#tag + 1] = merge_nodes (tag [#tag], node) | + | if #tag == 0 then |
| + | tag [1] = node |
| + | else |
| + | tag [#tag], tag [#tag + 1] = merge_nodes (tag [#tag], node) |
| + | end |
| end | | end |
− | if type (tag [1]) == 'string' and match (tag [1], '^%s*$') and not empty [tag.__name] then | + | if --[[#tag == 0 or ]]type (tag [1]) == 'string' and match (tag [1], '^%s*$') and not empty [tag.__name] then |
| -- An empty tag of this type can be safely dropped: | | -- An empty tag of this type can be safely dropped: |
| return nil | | return nil |
Строка 127: |
Строка 178: |
| void = lt * spaces * Cg (possible, '__name') * V'attributes' * spaces * slash * gt, | | void = lt * spaces * Cg (possible, '__name') * V'attributes' * spaces * slash * gt, |
| attributes = Cg (Ct ((spaces * V'attr') ^ 0), '__attrs'), | | attributes = Cg (Ct ((spaces * V'attr') ^ 0), '__attrs'), |
− | attr = Ct (Cg ((alnum + colon + hyphen) ^ 1, 'name') * spaces | + | attr = style + Ct (Cg ((alnum + colon + hyphen) ^ 1, 'name') * spaces |
| * equals * spaces * (V'quoted' + V'unquoted')), | | * equals * spaces * (V'quoted' + V'unquoted')), |
| quoted = quoted'"' + quoted"'", | | quoted = quoted'"' + quoted"'", |