Изменения

вертикальная ассимиляция
Строка 1: Строка 1: −
local sort, concat = table.sort, table.concat
+
local sort, concat, clone = table.sort, table.concat, mw.clone
 +
local match = mw.ustring.match
 
local lpeg = lpeg
 
local lpeg = lpeg
 
local P, S, V, C, Cg, Cb, Ct, Cf, Cmt = lpeg.P, lpeg.S, lpeg.V, lpeg.C, lpeg.Cg, lpeg.Cb, lpeg.Ct, lpeg.Cf, lpeg.Cmt
 
local P, S, V, C, Cg, Cb, Ct, Cf, Cmt = lpeg.P, lpeg.S, lpeg.V, lpeg.C, lpeg.Cg, lpeg.Cb, lpeg.Ct, lpeg.Cf, lpeg.Cmt
Строка 7: Строка 8:  
local colon, hyphen, equals = P':', P':', P'='
 
local colon, hyphen, equals = P':', P':', P'='
   −
local possible = (function (list)
+
-- Service functions:
 +
local function ordered_choice (list)
 
local choice = never
 
local choice = never
 
sort (list, function (a, b)
 
sort (list, function (a, b)
Строка 16: Строка 18:  
end
 
end
 
return choice
 
return choice
end) {
+
end
 +
 
 +
local function to_set (list)
 +
local set = {}
 +
for _, tag in ipairs (list) do
 +
set [tag] = true
 +
end
 +
return set
 +
end
 +
 
 +
-- Expected HTML tags:
 +
local possible = ordered_choice {
 
'a', 'span', 'b', 'i', 'strong', 'em',
 
'a', 'span', 'b', 'i', 'strong', 'em',
 
'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
 
'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
Строка 23: Строка 36:  
}
 
}
   −
local table_tags = (function (list)
+
-- Only these tags can be legitimately empty:
local set = {}
+
local empty = to_set { 'th', 'td', 'br', 'hr' }
for _, tag in ipairs (list) do
  −
set [tag] = true
  −
end
  −
return set
  −
end) { 'table', 'thead', 'tbody', 'tfoot', 'tr', 'th', 'td' }
     −
local function quoted (quote)
+
-- Table tags are unmergeable:
return P (quote) * Cg ((any - quote + backslash * quote / quote) ^ 0, 'value') * P (quote)
+
local table_set = to_set { 'table', 'thead', 'tbody', 'tfoot', 'tr', 'th', 'td' }
end
      
-- Return true, if node1 and node2 are tags ot the same type with exactly the same attributes:
 
-- Return true, if node1 and node2 are tags ot the same type with exactly the same attributes:
Строка 40: Строка 47:  
return false
 
return false
 
end
 
end
if table_tags [node1.__name] or table_tags [node2.__name] then
+
if table_set [node1.__name] or table_set [node2.__name] then
 +
-- Table elements should not be merged:
 
return false
 
return false
 
end
 
end
Строка 69: Строка 77:  
-- Unmergeable:
 
-- Unmergeable:
 
return node1, node2
 
return node1, node2
 +
end
 +
 +
-- If there is only one chilf node, and it can be safely assimilated into the parent node:
 +
local function assimilate (node)
 +
if #node == 0 or #node > 1 then
 +
return node
 +
end
 +
local child = clone (node [1])
 +
if type (child) == 'string' or table_set [node.__name] and not table_set [child.__name] or table_set [child.__name] then
 +
return node
 +
end
 +
--if node.__name == child.__name then
 +
for key, value in pairs (child) do
 +
if key == 'style' and node [key] then
 +
node [key] = node [key] .. '; ' .. value
 +
else
 +
node [key] = value
 +
end
 +
end
 +
node [1] = child [1]
 +
--end
 +
return node
 
end
 
end
 
 
 +
local function quoted (quote)
 +
return P (quote) * Cg ((any - quote + backslash * quote / quote) ^ 0, 'value') * P (quote)
 +
end
 
 
 
local grammar = P { V'fragment' * -1,
 
local grammar = P { V'fragment' * -1,
Строка 82: Строка 115:  
tag [#tag], tag [#tag + 1] = merge_nodes (tag [#tag], node)
 
tag [#tag], tag [#tag + 1] = merge_nodes (tag [#tag], node)
 
end
 
end
return tag
+
if type (tag [1]) == 'string' and match (tag [1], '^%s*$') and not empty [tag.__name] then
 +
-- An empty tag of this type can be safely dropped:
 +
return nil
 +
end
 +
return assimilate (tag) -- try to assimilate the only child.
 
end,
 
end,
 
open = lt * spaces * Cg (possible, '__name') * V'attributes' * spaces * gt,
 
open = lt * spaces * Cg (possible, '__name') * V'attributes' * spaces * gt,
Строка 116: Строка 153:  
test = function (frame)
 
test = function (frame)
 
return mw.dumpObject {grammar:match (test)}
 
return mw.dumpObject {grammar:match (test)}
end, same = same_tag_and_attributes
+
end
 
}
 
}