Изменения

обновление из песочницы, оригинал https://en.wikipedia.org/w/index.php?title=Module:Citation/CS1/COinS от 20 ноября 2023
Строка 1: Строка 1: −
  −
local coins = {};
  −
  −
   
--[[--------------------------< F O R W A R D  D E C L A R A T I O N S >--------------------------------------
 
--[[--------------------------< F O R W A R D  D E C L A R A T I O N S >--------------------------------------
 
]]
 
]]
local is_set, in_array, remove_wiki_link; -- functions in Module:Citation/CS1/Utilities
+
 
 +
local has_accept_as_written, is_set, in_array, remove_wiki_link, strip_apostrophe_markup; -- functions in Module:Citation/CS1/Utilities
    
local cfg; -- table of configuration tables that are defined in Module:Citation/CS1/Configuration
 
local cfg; -- table of configuration tables that are defined in Module:Citation/CS1/Configuration
  −
  −
--[[--------------------------< S T R I P _ A P O S T R O P H E _ M A R K U P >--------------------------------
  −
  −
Strip wiki italic and bold markup from argument so that it doesn't contaminate COinS metadata.
  −
This function strips common patterns of apostrophe markup.  We presume that editors who have taken the time to
  −
markup a title have, as a result, provided valid markup. When they don't, some single apostrophes are left behind.
  −
  −
]]
  −
  −
local function strip_apostrophe_markup (argument)
  −
if not is_set (argument) then return argument; end
  −
  −
if argument:find ( "''", 1, true ) == nil then -- Is there at least one double apostrophe?  If not, exit.
  −
return argument;
  −
end
  −
  −
while true do
  −
if argument:find ( "'''''", 1, true ) then -- bold italic (5)
  −
argument=argument:gsub("%'%'%'%'%'", ""); -- remove all instances of it
  −
elseif argument:find ( "''''", 1, true ) then -- italic start and end without content (4)
  −
argument=argument:gsub("%'%'%'%'", "");
  −
elseif argument:find ( "'''", 1, true ) then -- bold (3)
  −
argument=argument:gsub("%'%'%'", "");
  −
elseif argument:find ( "''", 1, true ) then -- italic (2)
  −
argument=argument:gsub("%'%'", "");
  −
else
  −
break;
  −
end
  −
end
  −
return argument; -- done
  −
end
        Строка 52: Строка 17:     
local function make_coins_title (title, script)
 
local function make_coins_title (title, script)
 +
title = has_accept_as_written (title);
 
if is_set (title) then
 
if is_set (title) then
 
title = strip_apostrophe_markup (title); -- strip any apostrophe markup
 
title = strip_apostrophe_markup (title); -- strip any apostrophe markup
 
else
 
else
title=''; -- if not set, make sure title is an empty string
+
title = ''; -- if not set, make sure title is an empty string
 
end
 
end
 
if is_set (script) then
 
if is_set (script) then
Строка 61: Строка 27:  
script = strip_apostrophe_markup (script); -- strip any apostrophe markup
 
script = strip_apostrophe_markup (script); -- strip any apostrophe markup
 
else
 
else
script=''; -- if not set, make sure script is an empty string
+
script = ''; -- if not set, make sure script is an empty string
 
end
 
end
 
if is_set (title) and is_set (script) then
 
if is_set (title) and is_set (script) then
Строка 72: Строка 38:  
--[[--------------------------< E S C A P E _ L U A _ M A G I C _ C H A R S >----------------------------------
 
--[[--------------------------< E S C A P E _ L U A _ M A G I C _ C H A R S >----------------------------------
   −
Returns a string where all of lua's magic characters have been escaped.  This is important because functions like
+
Returns a string where all of Lua's magic characters have been escaped.  This is important because functions like
 
string.gsub() treat their pattern and replace strings as patterns, not literal strings.
 
string.gsub() treat their pattern and replace strings as patterns, not literal strings.
 
]]
 
]]
Строка 78: Строка 44:  
local function escape_lua_magic_chars (argument)
 
local function escape_lua_magic_chars (argument)
 
argument = argument:gsub("%%", "%%%%"); -- replace % with %%
 
argument = argument:gsub("%%", "%%%%"); -- replace % with %%
argument = argument:gsub("([%^%$%(%)%.%[%]%*%+%-%?])", "%%%1"); -- replace all other lua magic pattern characters
+
argument = argument:gsub("([%^%$%(%)%.%[%]%*%+%-%?])", "%%%1"); -- replace all other Lua magic pattern characters
 
return argument;
 
return argument;
 
end
 
end
Строка 94: Строка 60:  
 
 
while true do
 
while true do
pattern = pages:match("%[(%w*:?//[^ ]+%s+)[%w%d].*%]"); -- pattern is the opening bracket, the url and following space(s): "[url "
+
pattern = pages:match("%[(%w*:?//[^ ]+%s+)[%w%d].*%]"); -- pattern is the opening bracket, the URL and following space(s): "[url "
if nil == pattern then break; end -- no more urls
+
if nil == pattern then break; end -- no more URLs
pattern = escape_lua_magic_chars (pattern); -- pattern is not a literal string; escape lua's magic pattern characters
+
pattern = escape_lua_magic_chars (pattern); -- pattern is not a literal string; escape Lua's magic pattern characters
 
pages = pages:gsub(pattern, ""); -- remove as many instances of pattern as possible
 
pages = pages:gsub(pattern, ""); -- remove as many instances of pattern as possible
 
end
 
end
 
pages = pages:gsub("[%[%]]", ""); -- remove the brackets
 
pages = pages:gsub("[%[%]]", ""); -- remove the brackets
pages = pages:gsub("–", "-" ); -- replace endashes with hyphens
+
pages = pages:gsub("–", "-" ); -- replace endashes with hyphens
pages = pages:gsub("&%w+;", "-" ); -- and replace html entities (&ndash; etc.) with hyphens; do we need to replace numerical entities like &#32; and the like?
+
pages = pages:gsub("&%w+;", "-" ); -- and replace HTML entities (&ndash; etc.) with hyphens; do we need to replace numerical entities like &#32; and the like?
 
return pages;
 
return pages;
 
end
 
end
Строка 114: Строка 80:  
MathML with SVG or PNG fallback
 
MathML with SVG or PNG fallback
   −
All three are heavy with html and css which doesn't belong in the metadata.
+
All three are heavy with HTML and CSS which doesn't belong in the metadata.
    
Without this function, the metadata saved in the raw wikitext contained the rendering determined by the settings
 
Without this function, the metadata saved in the raw wikitext contained the rendering determined by the settings
Строка 121: Строка 87:  
This function gets the rendered form of an equation according to the editor's preference before the page is saved.  It
 
This function gets the rendered form of an equation according to the editor's preference before the page is saved.  It
 
then searches the rendering for the text equivalent of the rendered equation and replaces the rendering with that so
 
then searches the rendering for the text equivalent of the rendered equation and replaces the rendering with that so
that the page is saved without extraneous html/css markup and with a reasonably readable text form of the equation.
+
that the page is saved without extraneous HTML/CSS markup and with a reasonably readable text form of the equation.
   −
When a replacement is made, this function returns true and the value with replacement; otherwise false and the intital
+
When a replacement is made, this function returns true and the value with replacement; otherwise false and the initial
value.  To replace multipe equations it is necesary to call this function from within a loop.
+
value.  To replace multipe equations it is necessary to call this function from within a loop.
    
]=]
 
]=]
Строка 154: Строка 120:  
--[[--------------------------< C O I N S _ C L E A N U P >----------------------------------------------------
 
--[[--------------------------< C O I N S _ C L E A N U P >----------------------------------------------------
   −
Cleanup parameter values for the metadata by removing or replacing invisible characters and certain html entities.
+
Cleanup parameter values for the metadata by removing or replacing invisible characters and certain HTML entities.
    
2015-12-10: there is a bug in mw.text.unstripNoWiki ().  It replaces math stripmarkers with the appropriate content
 
2015-12-10: there is a bug in mw.text.unstripNoWiki ().  It replaces math stripmarkers with the appropriate content
Строка 171: Строка 137:  
end
 
end
   −
value = value:gsub (cfg.stripmarkers['math'], "MATH RENDER ERROR"); -- one or more couldn't be replaced; insert vague error message
+
value = value:gsub (cfg.stripmarkers['math'], "MATH RENDER ERROR"); -- one or more couldn't be replaced; insert vague error message
 
 
 
value = mw.text.unstripNoWiki (value); -- replace nowiki stripmarkers with their content
 
value = mw.text.unstripNoWiki (value); -- replace nowiki stripmarkers with their content
Строка 177: Строка 143:  
value = value:gsub ('&nbsp;', ' '); -- replace &nbsp; entity with plain space
 
value = value:gsub ('&nbsp;', ' '); -- replace &nbsp; entity with plain space
 
value = value:gsub ('\226\128\138', ' '); -- replace hair space with plain space
 
value = value:gsub ('\226\128\138', ' '); -- replace hair space with plain space
if not mw.ustring.find (value, cfg.indic_script) then -- don't remove zero width joiner characters from indic script
+
if not mw.ustring.find (value, cfg.indic_script) then -- don't remove zero-width joiner characters from indic script
value = value:gsub ('&zwj;', ''); -- remove &zwj; entities
+
value = value:gsub ('&zwj;', ''); -- remove &zwj; entities
 
value = mw.ustring.gsub (value, '[\226\128\141\226\128\139\194\173]', ''); -- remove zero-width joiner, zero-width space, soft hyphen
 
value = mw.ustring.gsub (value, '[\226\128\141\226\128\139\194\173]', ''); -- remove zero-width joiner, zero-width space, soft hyphen
 
end
 
end
value = value:gsub ('[\009\010\013]', ' '); -- replace horizontal tab, line feed, carriage return with plain space
+
value = value:gsub ('[\009\010\013 ]+', ' '); -- replace horizontal tab, line feed, carriage return with plain space
 
return value;
 
return value;
 
end
 
end
Строка 214: Строка 180:  
});
 
});
 
 
if in_array (class, {'arxiv', 'biorxiv', 'citeseerx', 'journal', 'news', 'magazine'}) or (in_array (class, {'conference', 'interview', 'map', 'press release', 'web'}) and is_set(data.Periodical)) or  
+
if in_array (class, {'arxiv', 'biorxiv', 'citeseerx', 'medrxiv', 'ssrn', 'journal', 'news', 'magazine'}) or
 +
(in_array (class, {'conference', 'interview', 'map', 'press release', 'web'}) and is_set(data.Periodical)) or
 
('citation' == class and is_set(data.Periodical) and not is_set (data.Encyclopedia)) then
 
('citation' == class and is_set(data.Periodical) and not is_set (data.Encyclopedia)) then
 
OCinSoutput.rft_val_fmt = "info:ofi/fmt:kev:mtx:journal"; -- journal metadata identifier
 
OCinSoutput.rft_val_fmt = "info:ofi/fmt:kev:mtx:journal"; -- journal metadata identifier
if in_array (class, {'arxiv', 'biorxiv', 'citeseerx'}) then -- set genre according to the type of citation template we are rendering
+
if in_array (class, {'arxiv', 'biorxiv', 'citeseerx', 'medrxiv', 'ssrn'}) then -- set genre according to the type of citation template we are rendering
OCinSoutput["rft.genre"] = "preprint"; -- cite arxiv, cite biorxiv, cite citeseerx
+
OCinSoutput["rft.genre"] = "preprint"; -- cite arxiv, cite biorxiv, cite citeseerx, cite medrxiv, cite ssrn
 
elseif 'conference' == class then
 
elseif 'conference' == class then
 
OCinSoutput["rft.genre"] = "conference"; -- cite conference (when Periodical set)
 
OCinSoutput["rft.genre"] = "conference"; -- cite conference (when Periodical set)
Строка 231: Строка 198:  
-- these used only for periodicals
 
-- these used only for periodicals
 
OCinSoutput["rft.ssn"] = data.Season; -- keywords: winter, spring, summer, fall
 
OCinSoutput["rft.ssn"] = data.Season; -- keywords: winter, spring, summer, fall
 +
OCinSoutput["rft.quarter"] = data.Quarter; -- single digits 1->first quarter, etc.
 
OCinSoutput["rft.chron"] = data.Chron; -- free-form date components
 
OCinSoutput["rft.chron"] = data.Chron; -- free-form date components
 
OCinSoutput["rft.volume"] = data.Volume; -- does not apply to books
 
OCinSoutput["rft.volume"] = data.Volume; -- does not apply to books
 
OCinSoutput["rft.issue"] = data.Issue;
 
OCinSoutput["rft.issue"] = data.Issue;
 +
OCinSoutput['rft.artnum'] = data.ArticleNumber; -- {{cite journal}} only
 
OCinSoutput["rft.pages"] = data.Pages; -- also used in book metadata
 
OCinSoutput["rft.pages"] = data.Pages; -- also used in book metadata
   Строка 254: Строка 223:  
end
 
end
 
end
 
end
else --{'audio-visual', 'AV-media-notes', 'DVD-notes', 'episode', 'interview', 'mailinglist', 'map', 'newsgroup', 'podcast', 'press release', 'serial', 'sign', 'speech', 'web'}
+
else -- {'audio-visual', 'AV-media-notes', 'DVD-notes', 'episode', 'interview', 'mailinglist', 'map', 'newsgroup', 'podcast', 'press release', 'serial', 'sign', 'speech', 'web'}
 
OCinSoutput["rft.genre"] = "unknown";
 
OCinSoutput["rft.genre"] = "unknown";
 
end
 
end
Строка 270: Строка 239:  
OCinSoutput['rft.inst'] = data.PublisherName; -- book and dissertation
 
OCinSoutput['rft.inst'] = data.PublisherName; -- book and dissertation
 
end
 
end
 +
-- NB. Not currently supported are "info:ofi/fmt:kev:mtx:patent", "info:ofi/fmt:kev:mtx:dc", "info:ofi/fmt:kev:mtx:sch_svc", "info:ofi/fmt:kev:mtx:ctx"
 
-- and now common parameters (as much as possible)
 
-- and now common parameters (as much as possible)
 
OCinSoutput["rft.date"] = data.Date; -- book, journal, dissertation
 
OCinSoutput["rft.date"] = data.Date; -- book, journal, dissertation
+
 
 
for k, v in pairs( data.ID_list ) do -- what to do about these? For now assume that they are common to all?
 
for k, v in pairs( data.ID_list ) do -- what to do about these? For now assume that they are common to all?
-- if k == 'ISBN' then v = clean_isbn( v ) end
   
if k == 'ISBN' then v = v:gsub( "[^-0-9X]", "" ); end
 
if k == 'ISBN' then v = v:gsub( "[^-0-9X]", "" ); end
 
local id = cfg.id_handlers[k].COinS;
 
local id = cfg.id_handlers[k].COinS;
 
if string.sub( id or "", 1, 4 ) == 'info' then -- for ids that are in the info:registry
 
if string.sub( id or "", 1, 4 ) == 'info' then -- for ids that are in the info:registry
 
OCinSoutput["rft_id"] = table.concat{ id, "/", v };
 
OCinSoutput["rft_id"] = table.concat{ id, "/", v };
elseif string.sub (id or "", 1, 3 ) == 'rft' then -- for isbn, issn, eissn, etc that have defined COinS keywords
+
elseif string.sub (id or "", 1, 3 ) == 'rft' then -- for isbn, issn, eissn, etc. that have defined COinS keywords
 
OCinSoutput[ id ] = v;
 
OCinSoutput[ id ] = v;
elseif id then -- when cfg.id_handlers[k].COinS is not nil
+
elseif 'url' == id then -- for urls that are assembled in ~/Identifiers; |asin= and |ol=
OCinSoutput["rft_id"] = table.concat{ cfg.id_handlers[k].prefix, v }; -- others; provide a url
+
OCinSoutput["rft_id"] = table.concat ({data.ID_list[k], "#id-name=", cfg.id_handlers[k].label});
 +
elseif id then -- when cfg.id_handlers[k].COinS is not nil so urls created here
 +
OCinSoutput["rft_id"] = table.concat{ cfg.id_handlers[k].prefix, v, cfg.id_handlers[k].suffix or '', "#id-name=", cfg.id_handlers[k].label }; -- others; provide a URL and indicate identifier name as #fragment (human-readable, but transparent to browsers)
 
end
 
end
 
end
 
end
   −
--[[
  −
for k, v in pairs( data.ID_list ) do -- what to do about these? For now assume that they are common to all?
  −
local id, value = cfg.id_handlers[k].COinS;
  −
if k == 'ISBN' then value = clean_isbn( v ); else value = v; end
  −
if string.sub( id or "", 1, 4 ) == 'info' then
  −
OCinSoutput["rft_id"] = table.concat{ id, "/", v };
  −
else
  −
OCinSoutput[ id ] = value;
  −
end
  −
end
  −
]]
   
local last, first;
 
local last, first;
 
for k, v in ipairs( data.Authors ) do
 
for k, v in ipairs( data.Authors ) do
last, first = coins_cleanup (v.last), coins_cleanup (v.first or ''); -- replace any nowiki strip markers, non-printing or invisible characers
+
last, first = coins_cleanup (v.last), coins_cleanup (v.first or ''); -- replace any nowiki stripmarkers, non-printing or invisible characters
 
if k == 1 then -- for the first author name only
 
if k == 1 then -- for the first author name only
if is_set(last) and is_set(first) then -- set these COinS values if |first= and |last= specify the first author name
+
if is_set(last) and is_set(first) then -- set these COinS values if |first= and |last= specify the first author name
 
OCinSoutput["rft.aulast"] = last; -- book, journal, dissertation
 
OCinSoutput["rft.aulast"] = last; -- book, journal, dissertation
 
OCinSoutput["rft.aufirst"] = first; -- book, journal, dissertation
 
OCinSoutput["rft.aufirst"] = first; -- book, journal, dissertation
Строка 313: Строка 273:  
OCinSoutput["rft.au"] = last; -- book, journal, dissertation
 
OCinSoutput["rft.au"] = last; -- book, journal, dissertation
 
end
 
end
 +
-- TODO: At present we do not report "et al.". Add anything special if this condition applies?
 
end
 
end
 
end
 
end
Строка 318: Строка 279:  
OCinSoutput.rft_id = data.URL;
 
OCinSoutput.rft_id = data.URL;
 
OCinSoutput.rfr_id = table.concat{ "info:sid/", mw.site.server:match( "[^/]*$" ), ":", data.RawPage };
 
OCinSoutput.rfr_id = table.concat{ "info:sid/", mw.site.server:match( "[^/]*$" ), ":", data.RawPage };
 +
 +
-- TODO: Add optional extra info:
 +
-- rfr_dat=#REVISION<version> (referrer private data)
 +
-- ctx_id=<data.RawPage>#<ref> (identifier for the context object)
 +
-- ctx_tim=<ts> (timestamp in format yyyy-mm-ddThh:mm:ssTZD or yyyy-mm-dd)
 +
-- ctx_enc=info:ofi/enc:UTF-8 (character encoding)
 +
 
OCinSoutput = setmetatable( OCinSoutput, nil );
 
OCinSoutput = setmetatable( OCinSoutput, nil );
+
 
 
-- sort with version string always first, and combine.
 
-- sort with version string always first, and combine.
--table.sort( OCinSoutput );
+
-- table.sort( OCinSoutput );
table.insert( OCinSoutput, 1, "ctx_ver=" .. ctx_ver ); -- such as "Z39.88-2004"
+
table.insert( OCinSoutput, 1, "ctx_ver=" .. ctx_ver ); -- such as "Z39.88-2004"
 
return table.concat(OCinSoutput, "&");
 
return table.concat(OCinSoutput, "&");
 
end
 
end
Строка 336: Строка 304:  
cfg = cfg_table_ptr;
 
cfg = cfg_table_ptr;
   −
is_set = utilities_page_ptr.is_set; -- import functions from select Module:Citation/CS1/Utilities module
+
has_accept_as_written = utilities_page_ptr.has_accept_as_written; -- import functions from selected Module:Citation/CS1/Utilities module
 +
is_set = utilities_page_ptr.is_set;
 
in_array = utilities_page_ptr.in_array;
 
in_array = utilities_page_ptr.in_array;
 
remove_wiki_link = utilities_page_ptr.remove_wiki_link;
 
remove_wiki_link = utilities_page_ptr.remove_wiki_link;
 +
strip_apostrophe_markup = utilities_page_ptr.strip_apostrophe_markup;
 
end
 
end
       +
--[[--------------------------< E X P O R T E D  F U N C T I O N S >------------------------------------------
 +
]]
    
return {
 
return {
Анонимный участник