link.py 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230
  1. # -*- coding: utf-8 -*-
  2. import logging
  3. logger = logging.getLogger(__name__)
  4. from sefaria.model import *
  5. from sefaria.system.exceptions import DuplicateRecordError, InputError
  6. from sefaria.utils.talmud import section_to_daf
  7. import sefaria.tracker as tracker
  8. try:
  9. from sefaria.settings import USE_VARNISH
  10. except ImportError:
  11. USE_VARNISH = False
  12. if USE_VARNISH:
  13. from sefaria.system.sf_varnish import invalidate_ref
  14. #TODO: should all the functions here be decoupled from the need to enter a userid?
  15. def add_commentary_links(oref, user, **kwargs):
  16. """
  17. Automatically add links for each comment in the commentary text denoted by 'tref'.
  18. E.g., for the ref 'Sforno on Kohelet 3:2', automatically set links for
  19. Kohelet 3:2 <-> Sforno on Kohelet 3:2:1, Kohelet 3:2 <-> Sforno on Kohelet 3:2:2, etc.
  20. for each segment of text (comment) that is in 'Sforno on Kohelet 3:2'.
  21. """
  22. try:
  23. text = TextFamily(oref, commentary=0, context=0, pad=False).contents()
  24. except AssertionError:
  25. logger.warning(u"Structure node passed to add_commentary_links: {}".format(oref.normal()))
  26. return
  27. assert oref.is_commentary()
  28. tref = oref.normal()
  29. base_tref = tref[tref.find(" on ") + 4:]
  30. if len(text["sections"]) == len(text["sectionNames"]):
  31. # this is a single comment, trim the last section number (comment) from ref
  32. base_tref = base_tref[0:base_tref.rfind(":")]
  33. link = {
  34. "refs": [base_tref, tref],
  35. "type": "commentary",
  36. "anchorText": "",
  37. "auto": True,
  38. "generated_by": "add_commentary_links"
  39. }
  40. try:
  41. tracker.add(user, Link, link, **kwargs)
  42. except DuplicateRecordError as e:
  43. pass
  44. elif len(text["sections"]) == (len(text["sectionNames"]) - 1):
  45. # This means that the text (and it's corresponding ref) being posted has the amount of sections like the parent text
  46. # (the text being commented on) so this is single group of comments on the lowest unit of the parent text.
  47. # and we simply iterate and create a link for each existing one to point to the same unit of parent text
  48. length = max(len(text["text"]), len(text["he"]))
  49. for i in range(length):
  50. link = {
  51. "refs": [base_tref, tref + ":" + str(i + 1)],
  52. "type": "commentary",
  53. "anchorText": "",
  54. "auto": True,
  55. "generated_by": "add_commentary_links"
  56. }
  57. try:
  58. tracker.add(user, Link, link, **kwargs)
  59. except DuplicateRecordError as e:
  60. pass
  61. elif len(text["sections"]) > 0:
  62. # any other case where the posted ref sections do not match the length of the parent texts sections
  63. # this is a larger group of comments meaning it needs to be further broken down
  64. # in order to be able to match the commentary to the basic parent text units,
  65. # recur on each section
  66. length = max(len(text["text"]), len(text["he"]))
  67. for r in oref.subrefs(length):
  68. add_commentary_links(r, user, **kwargs)
  69. else:
  70. #This is a special case of the above, where the sections length is 0 and that means this is
  71. # a whole text that has been posted. For this we need a better way than get_text() to get the correct length of
  72. # highest order section counts.
  73. # We use the counts document for that.
  74. #text_counts = counts.count_texts(tref)
  75. #length = len(text_counts["counts"])
  76. sn = StateNode(tref)
  77. if not sn.versionState.is_new_state:
  78. sn.versionState.refresh() # Needed when saving multiple nodes in a complex text. This may be moderately inefficient.
  79. sn = StateNode(tref)
  80. length = sn.ja('all').length()
  81. for r in oref.subrefs(length):
  82. add_commentary_links(r, user, **kwargs)
  83. if USE_VARNISH:
  84. invalidate_ref(oref)
  85. invalidate_ref(Ref(base_tref))
  86. def rebuild_commentary_links(tref, user, **kwargs):
  87. """
  88. Deletes any commentary links for which there is no content (in any ref),
  89. then adds all commentary links again.
  90. """
  91. try:
  92. oref = Ref(tref)
  93. except InputError:
  94. # Allow commentators alone, rebuild for each text we have
  95. i = library.get_index(tref)
  96. for c in library.get_commentary_version_titles(i.title):
  97. rebuild_commentary_links(c, user, **kwargs)
  98. return
  99. links = LinkSet(oref)
  100. for link in links:
  101. try:
  102. oref1, oref2 = Ref(link.refs[0]), Ref(link.refs[1])
  103. except InputError:
  104. link.delete()
  105. if USE_VARNISH:
  106. invalidate_ref(oref1)
  107. invalidate_ref(oref2)
  108. continue
  109. t1, t2 = TextFamily(oref1, commentary=0, context=0), TextFamily(oref2, commentary=0, context=0)
  110. if not (t1.text + t1.he) or not (t2.text + t2.he):
  111. # Delete any link that doesn't have some textual content on one side or the other
  112. link.delete()
  113. if USE_VARNISH:
  114. invalidate_ref(oref1)
  115. invalidate_ref(oref2)
  116. add_commentary_links(oref, user, **kwargs)
  117. # todo: Currently supports only
  118. def add_links_from_text(ref, lang, text, text_id, user, **kwargs):
  119. """
  120. Scan a text for explicit references to other texts and automatically add new links between
  121. ref and the mentioned text.
  122. text["text"] may be a list of segments, an individual segment, or None.
  123. The set of no longer supported links (`existingLinks` - `found`) is deleted.
  124. If Varnish is used, all linked refs, old and new, are refreshed
  125. Returns `links` - the list of links added.
  126. """
  127. if not text:
  128. return []
  129. elif isinstance(text, list):
  130. oref = Ref(ref)
  131. subrefs = oref.subrefs(len(text))
  132. links = []
  133. for i in range(len(text)):
  134. single = add_links_from_text(subrefs[i].normal(), lang, text[i], text_id, user, **kwargs)
  135. links += single
  136. return links
  137. elif isinstance(text, basestring):
  138. """
  139. Keeps three lists:
  140. * existingLinks - The links that existed before the text was rescanned
  141. * found - The links found in this scan of the text
  142. * links - The new links added in this scan of the text
  143. The set of no longer supported links (`existingLinks` - `found`) is deleted.
  144. The set of all links (`existingLinks` + `Links`) is refreshed in Varnish.
  145. """
  146. existingLinks = LinkSet({
  147. "refs": ref,
  148. "auto": True,
  149. "generated_by": "add_links_from_text",
  150. "source_text_oid": text_id
  151. }).array() # Added the array here to force population, so that new links don't end up in this set
  152. found = [] # The normal refs of the links found in this text
  153. links = [] # New link objects created by this processes
  154. refs = library.get_refs_in_string(text, lang)
  155. for oref in refs:
  156. link = {
  157. # Note -- ref of the citing text is in the first position
  158. "refs": [ref, oref.normal()],
  159. "type": "",
  160. "auto": True,
  161. "generated_by": "add_links_from_text",
  162. "source_text_oid": text_id
  163. }
  164. found += [oref.normal()] # Keep this here, since tracker.add will throw an error if the link exists
  165. try:
  166. tracker.add(user, Link, link, **kwargs)
  167. links += [link]
  168. if USE_VARNISH:
  169. invalidate_ref(oref)
  170. except InputError as e:
  171. pass
  172. # Remove existing links that are no longer supported by the text
  173. for exLink in existingLinks:
  174. for r in exLink.refs:
  175. if r == ref: # current base ref
  176. continue
  177. if USE_VARNISH:
  178. invalidate_ref(Ref(r))
  179. if r not in found:
  180. tracker.delete(user, Link, exLink._id)
  181. break
  182. return links
  183. def delete_links_from_text(title, user):
  184. """
  185. Deletes all of the citation generated links from text 'title'
  186. """
  187. regex = Ref(title).regex()
  188. links = LinkSet({"refs.0": {"$regex": regex}, "generated_by": "add_links_from_text"})
  189. for link in links:
  190. tracker.delete(user, Link, link._id)
  191. def rebuild_links_from_text(title, user):
  192. """
  193. Deletes all of the citation generated links from text 'title'
  194. then rebuilds them.
  195. """
  196. delete_links_from_text(title, user)
  197. title = Ref(title).normal()
  198. versions = VersionSet({"title": title})
  199. for version in versions:
  200. add_links_from_text(title, version.language, version.chapter, version._id, user)