Изменения

Модуль:Citation/CS1/COinS (посмотреть исходный код)

Версия от 17:22, 11 декабря 2023

240 байт убрано , 1 год назад

обновление из песочницы, оригинал https://en.wikipedia.org/w/index.php?title=Module:Citation/CS1/COinS от 20 ноября 2023

Строка 1: Строка 1: −

−

~~local coins = {};~~

−

--[[--------------------------< F O R W A R D D E C L A R A T I O N S >--------------------------------------

]]

−

local is_set, in_array, remove_wiki_link; -- functions in Module:Citation/CS1/Utilities

+

local has_accept_as_written, is_set, in_array, remove_wiki_link, strip_apostrophe_markup; -- functions in Module:Citation/CS1/Utilities

local cfg; -- table of configuration tables that are defined in Module:Citation/CS1/Configuration

−

~~--[[--------------------------< S T R I P _ A P O S T R O P H E _ M A R K U P >--------------------------------~~

−

~~Strip wiki italic and bold markup from argument so that it doesn't contaminate COinS metadata.~~

−

~~This function strips common patterns of apostrophe markup. We presume that editors who have taken the time to~~

−

~~markup a title have, as a result, provided valid markup. When they don't, some single apostrophes are left behind.~~

−

]]

−

~~local function strip_apostrophe_markup (argument)~~

−

~~if not is_set (argument) then return argument; end~~

−

~~if argument:find ( "''", 1, true ) == nil then -- Is there at least one double apostrophe? If not, exit.~~

−

~~return argument;~~

−

~~end~~

−

~~while true do~~

−

~~if argument:find ( "'''''", 1, true ) then -- bold italic (5)~~

−

~~argument=argument:gsub("%'%'%'%'%'", ""); -- remove all instances of it~~

−

~~elseif argument:find ( "''''", 1, true ) then -- italic start and end without content (4)~~

−

~~argument=argument:gsub("%'%'%'%'", "");~~

−

~~elseif argument:find ( "'''", 1, true ) then -- bold (3)~~

−

~~argument=argument:gsub("%'%'%'", "");~~

−

~~elseif argument:find ( "''", 1, true ) then -- italic (2)~~

−

~~argument=argument:gsub("%'%'", "");~~

−

~~else~~

−

~~break;~~

−

~~end~~

−

~~end~~

−

~~return argument; -- done~~

−

~~end~~

Строка 52: Строка 17:

local function make_coins_title (title, script)

+

title = has_accept_as_written (title);

if is_set (title) then

title = strip_apostrophe_markup (title); -- strip any apostrophe markup

else

−

title=''; -- if not set, make sure title is an empty string

+

title = ''; -- if not set, make sure title is an empty string

end

if is_set (script) then

Строка 61: Строка 27:

script = strip_apostrophe_markup (script); -- strip any apostrophe markup

else

−

script=''; -- if not set, make sure script is an empty string

+

script = ''; -- if not set, make sure script is an empty string

end

if is_set (title) and is_set (script) then

Строка 72: Строка 38:

--[[--------------------------< E S C A P E _ L U A _ M A G I C _ C H A R S >----------------------------------

−

Returns a string where all of ~~lua~~'s magic characters have been escaped. This is important because functions like

+

Returns a string where all of Lua's magic characters have been escaped. This is important because functions like

string.gsub() treat their pattern and replace strings as patterns, not literal strings.

]]

Строка 78: Строка 44:

local function escape_lua_magic_chars (argument)

argument = argument:gsub("%%", "%%%%"); -- replace % with %%

−

argument = argument:gsub("([%^%$%(%)%.%[%]%*%+%-%?])", "%%%1"); -- replace all other ~~lua~~ magic pattern characters

+

argument = argument:gsub("([%^%$%(%)%.%[%]%*%+%-%?])", "%%%1"); -- replace all other Lua magic pattern characters

return argument;

end

Строка 94: Строка 60:

while true do

−

pattern = pages:match("%[(%w*:?//[^ ]+%s+)[%w%d].*%]"); -- pattern is the opening bracket, the ~~url~~ and following space(s): "[url "

+

pattern = pages:match("%[(%w*:?//[^ ]+%s+)[%w%d].*%]"); -- pattern is the opening bracket, the URL and following space(s): "[url "

−

if nil == pattern then break; end -- no more ~~urls~~

+

if nil == pattern then break; end -- no more URLs

−

pattern = escape_lua_magic_chars (pattern); -- pattern is not a literal string; escape ~~lua~~'s magic pattern characters

+

pattern = escape_lua_magic_chars (pattern); -- pattern is not a literal string; escape Lua's magic pattern characters

pages = pages:gsub(pattern, ""); -- remove as many instances of pattern as possible

end

pages = pages:gsub("[%[%]]", ""); -- remove the brackets

−

pages = pages:gsub("–", "-" ); -- replace endashes with hyphens

+

pages = pages:gsub("–", "-" ); -- replace endashes with hyphens

−

pages = pages:gsub("&%w+;", "-" ); -- and replace ~~html~~ entities (– etc.) with hyphens; do we need to replace numerical entities like   and the like?

+

pages = pages:gsub("&%w+;", "-" ); -- and replace HTML entities (– etc.) with hyphens; do we need to replace numerical entities like   and the like?

return pages;

end

Строка 114: Строка 80:

MathML with SVG or PNG fallback

−

All three are heavy with ~~html~~ and ~~css~~ which doesn't belong in the metadata.

+

All three are heavy with HTML and CSS which doesn't belong in the metadata.

Without this function, the metadata saved in the raw wikitext contained the rendering determined by the settings

Строка 121: Строка 87:

This function gets the rendered form of an equation according to the editor's preference before the page is saved. It

then searches the rendering for the text equivalent of the rendered equation and replaces the rendering with that so

−

that the page is saved without extraneous ~~html~~/~~css~~ markup and with a reasonably readable text form of the equation.

+

that the page is saved without extraneous HTML/CSS markup and with a reasonably readable text form of the equation.

−

When a replacement is made, this function returns true and the value with replacement; otherwise false and the ~~intital~~

+

When a replacement is made, this function returns true and the value with replacement; otherwise false and the initial

−

value. To replace multipe equations it is ~~necesary~~ to call this function from within a loop.

+

value. To replace multipe equations it is necessary to call this function from within a loop.

]=]

Строка 154: Строка 120:

--[[--------------------------< C O I N S _ C L E A N U P >----------------------------------------------------

−

Cleanup parameter values for the metadata by removing or replacing invisible characters and certain ~~html~~ entities.

+

Cleanup parameter values for the metadata by removing or replacing invisible characters and certain HTML entities.

2015-12-10: there is a bug in mw.text.unstripNoWiki (). It replaces math stripmarkers with the appropriate content

Строка 171: Строка 137:

end

−

value = value:gsub (cfg.stripmarkers['math'], "MATH RENDER ERROR"); -- one or more couldn't be replaced; insert vague error message

+

value = value:gsub (cfg.stripmarkers['math'], "MATH RENDER ERROR"); -- one or more couldn't be replaced; insert vague error message

value = mw.text.unstripNoWiki (value); -- replace nowiki stripmarkers with their content

Строка 177: Строка 143:

value = value:gsub (' ', ' '); -- replace   entity with plain space

value = value:gsub ('\226\128\138', ' '); -- replace hair space with plain space

−

if not mw.ustring.find (value, cfg.indic_script) then -- don't remove zero width joiner characters from indic script

+

if not mw.ustring.find (value, cfg.indic_script) then -- don't remove zero-width joiner characters from indic script

−

value = value:gsub ('&zwj;', ''); -- remove &zwj; entities

+

value = value:gsub ('&zwj;', ''); -- remove &zwj; entities

value = mw.ustring.gsub (value, '[\226\128\141\226\128\139\194\173]', ''); -- remove zero-width joiner, zero-width space, soft hyphen

end

−

value = value:gsub ('[\009\010\013]', ' '); -- replace horizontal tab, line feed, carriage return with plain space

+

value = value:gsub ('[\009\010\013 ]+', ' '); -- replace horizontal tab, line feed, carriage return with plain space

return value;

end

Строка 214: Строка 180:

});

−

if in_array (class, {'arxiv', 'biorxiv', 'citeseerx', 'journal', 'news', 'magazine'}) or (in_array (class, {'conference', 'interview', 'map', 'press release', 'web'}) and is_set(data.Periodical)) or

+

if in_array (class, {'arxiv', 'biorxiv', 'citeseerx', 'medrxiv', 'ssrn', 'journal', 'news', 'magazine'}) or

+

(in_array (class, {'conference', 'interview', 'map', 'press release', 'web'}) and is_set(data.Periodical)) or

('citation' == class and is_set(data.Periodical) and not is_set (data.Encyclopedia)) then

OCinSoutput.rft_val_fmt = "info:ofi/fmt:kev:mtx:journal"; -- journal metadata identifier

−

if in_array (class, {'arxiv', 'biorxiv', 'citeseerx'}) then -- set genre according to the type of citation template we are rendering

+

if in_array (class, {'arxiv', 'biorxiv', 'citeseerx', 'medrxiv', 'ssrn'}) then -- set genre according to the type of citation template we are rendering

−

OCinSoutput["rft.genre"] = "preprint"; -- cite arxiv, cite biorxiv, cite citeseerx

+

OCinSoutput["rft.genre"] = "preprint"; -- cite arxiv, cite biorxiv, cite citeseerx, cite medrxiv, cite ssrn

elseif 'conference' == class then

OCinSoutput["rft.genre"] = "conference"; -- cite conference (when Periodical set)

Строка 231: Строка 198:

-- these used only for periodicals

OCinSoutput["rft.ssn"] = data.Season; -- keywords: winter, spring, summer, fall

+

OCinSoutput["rft.quarter"] = data.Quarter; -- single digits 1->first quarter, etc.

OCinSoutput["rft.chron"] = data.Chron; -- free-form date components

OCinSoutput["rft.volume"] = data.Volume; -- does not apply to books

OCinSoutput["rft.issue"] = data.Issue;

+

OCinSoutput['rft.artnum'] = data.ArticleNumber; -- {{cite journal}} only

OCinSoutput["rft.pages"] = data.Pages; -- also used in book metadata

Строка 254: Строка 223:

end

−

else --{'audio-visual', 'AV-media-notes', 'DVD-notes', 'episode', 'interview', 'mailinglist', 'map', 'newsgroup', 'podcast', 'press release', 'serial', 'sign', 'speech', 'web'}

+

else -- {'audio-visual', 'AV-media-notes', 'DVD-notes', 'episode', 'interview', 'mailinglist', 'map', 'newsgroup', 'podcast', 'press release', 'serial', 'sign', 'speech', 'web'}

OCinSoutput["rft.genre"] = "unknown";

end

Строка 270: Строка 239:

OCinSoutput['rft.inst'] = data.PublisherName; -- book and dissertation

end

+

-- NB. Not currently supported are "info:ofi/fmt:kev:mtx:patent", "info:ofi/fmt:kev:mtx:dc", "info:ofi/fmt:kev:mtx:sch_svc", "info:ofi/fmt:kev:mtx:ctx"

-- and now common parameters (as much as possible)

OCinSoutput["rft.date"] = data.Date; -- book, journal, dissertation

−

+

for k, v in pairs( data.ID_list ) do -- what to do about these? For now assume that they are common to all?

−

~~-- if k == 'ISBN' then v = clean_isbn( v ) end~~

if k == 'ISBN' then v = v:gsub( "[^-0-9X]", "" ); end

local id = cfg.id_handlers[k].COinS;

if string.sub( id or "", 1, 4 ) == 'info' then -- for ids that are in the info:registry

OCinSoutput["rft_id"] = table.concat{ id, "/", v };

−

elseif string.sub (id or "", 1, 3 ) == 'rft' then -- for isbn, issn, eissn, etc that have defined COinS keywords

+

elseif string.sub (id or "", 1, 3 ) == 'rft' then -- for isbn, issn, eissn, etc. that have defined COinS keywords

OCinSoutput[ id ] = v;

−

elseif id then -- when cfg.id_handlers[k].COinS is not nil

+

elseif 'url' == id then -- for urls that are assembled in ~/Identifiers; |asin= and |ol=

−

OCinSoutput["rft_id"] = table.concat{ cfg.id_handlers[k].prefix, v }; -- others; provide a ~~url~~

+

OCinSoutput["rft_id"] = table.concat ({data.ID_list[k], "#id-name=", cfg.id_handlers[k].label});

+

elseif id then -- when cfg.id_handlers[k].COinS is not nil so urls created here

+

OCinSoutput["rft_id"] = table.concat{ cfg.id_handlers[k].prefix, v, cfg.id_handlers[k].suffix or '', "#id-name=", cfg.id_handlers[k].label }; -- others; provide a URL and indicate identifier name as #fragment (human-readable, but transparent to browsers)

end

−

~~--[[~~

−

~~for k, v in pairs( data.ID_list ) do -- what to do about these? For now assume that they are common to all?~~

−

~~local id, value = cfg.id_handlers[k].COinS;~~

−

~~if k == 'ISBN' then value = clean_isbn( v ); else value = v; end~~

−

~~if string.sub( id or "", 1, 4 ) == 'info' then~~

−

~~OCinSoutput["rft_id"] = table.concat{ id, "/", v };~~

−

~~else~~

−

~~OCinSoutput[ id ] = value;~~

−

~~end~~

−

~~end~~

−

]]

local last, first;

for k, v in ipairs( data.Authors ) do

−

last, first = coins_cleanup (v.last), coins_cleanup (v.first or ''); -- replace any nowiki ~~strip markers~~, non-printing or invisible ~~characers~~

+

last, first = coins_cleanup (v.last), coins_cleanup (v.first or ''); -- replace any nowiki stripmarkers, non-printing or invisible characters

if k == 1 then -- for the first author name only

−

if is_set(last) and is_set(first) then -- set these COinS values if |first= and |last= specify the first author name

+

if is_set(last) and is_set(first) then -- set these COinS values if |first= and |last= specify the first author name

OCinSoutput["rft.aulast"] = last; -- book, journal, dissertation

OCinSoutput["rft.aufirst"] = first; -- book, journal, dissertation

Строка 313: Строка 273:

OCinSoutput["rft.au"] = last; -- book, journal, dissertation

end

+

-- TODO: At present we do not report "et al.". Add anything special if this condition applies?

end

Строка 318: Строка 279:

OCinSoutput.rft_id = data.URL;

OCinSoutput.rfr_id = table.concat{ "info:sid/", mw.site.server:match( "[^/]*$" ), ":", data.RawPage };

+

-- TODO: Add optional extra info:

+

-- rfr_dat=#REVISION<version> (referrer private data)

+

-- ctx_id=<data.RawPage>#<ref> (identifier for the context object)

+

-- ctx_tim=<ts> (timestamp in format yyyy-mm-ddThh:mm:ssTZD or yyyy-mm-dd)

+

-- ctx_enc=info:ofi/enc:UTF-8 (character encoding)

+

OCinSoutput = setmetatable( OCinSoutput, nil );

−

+

-- sort with version string always first, and combine.

−

--table.sort( OCinSoutput );

+

-- table.sort( OCinSoutput );

−

table.insert( OCinSoutput, 1, "ctx_ver=" .. ctx_ver ); -- such as "Z39.88-2004"

+

table.insert( OCinSoutput, 1, "ctx_ver=" .. ctx_ver ); -- such as "Z39.88-2004"

return table.concat(OCinSoutput, "&");

end

Строка 336: Строка 304:

cfg = cfg_table_ptr;

−

~~is_set~~ = utilities_page_ptr.~~is_set~~; -- import functions from ~~select~~ Module:Citation/CS1/Utilities module

+

has_accept_as_written = utilities_page_ptr.has_accept_as_written; -- import functions from selected Module:Citation/CS1/Utilities module

+

is_set = utilities_page_ptr.is_set;

in_array = utilities_page_ptr.in_array;

remove_wiki_link = utilities_page_ptr.remove_wiki_link;

+

strip_apostrophe_markup = utilities_page_ptr.strip_apostrophe_markup;

end

+

--[[--------------------------< E X P O R T E D F U N C T I O N S >------------------------------------------

+

]]

return {

Анонимный участник

w>Iniquity