Browse Source

add all_children() method to DictionaryNode. fix small bug with dict autocomplete

Noah Santacruz 1 month ago
parent
commit
df53bfb4bc
4 changed files with 42 additions and 9 deletions
  1. 1 1
      reader/views.py
  2. 12 2
      sefaria/model/schema.py
  3. 15 3
      sefaria/model/text.py
  4. 14 3
      sefaria/search.py

+ 1 - 1
reader/views.py

@@ -2178,7 +2178,7 @@ def get_name_completions(name, limit, ref_only):
         # A dictionary beginning, but not a valid entry
         lexicon_ac = library.lexicon_auto_completer(e.lexicon_name)
         t = [e.base_title + u", " + t[1] for t in lexicon_ac.items(e.word)[:limit or None]]
-        d["completions"] = list(OrderedDict.fromkeys(t))  # filter out dupes
+        completions = list(OrderedDict.fromkeys(t))  # filter out dupes
     except InputError:
         completions = completer.complete(name, limit)
         object_data = completer.get_data(name)

+ 12 - 2
sefaria/model/schema.py

@@ -14,7 +14,7 @@ except ImportError:
 import regex
 from . import abstract as abst
 from sefaria.system.database import db
-
+from sefaria.model.lexicon import LexiconEntrySet
 from sefaria.system.exceptions import InputError, IndexSchemaError
 from sefaria.utils.hebrew import decode_hebrew_numeral, encode_hebrew_numeral, encode_hebrew_daf, hebrew_term
 
@@ -1414,7 +1414,7 @@ class DictionaryEntryNode(TitledTreeNode):
     is_virtual = True
     supported_languages = ["en"]
 
-    def __init__(self, parent, title=None, tref=None, word=None):
+    def __init__(self, parent, title=None, tref=None, word=None, lexicon_entry=None):
         """
         A schema node created on the fly, in memory, to correspond to a dictionary entry.
         Created by a DictionaryNode object.
@@ -1423,6 +1423,7 @@ class DictionaryEntryNode(TitledTreeNode):
         :param title:
         :param tref:
         :param word:
+        :param lexicon_entry: LexiconEntry. if you pass this param and dont pass title, tref or word, then this will bootstrap the DictionaryEntryNode and avoid an extra mongo call
         """
         if title and tref:
             self.title = title
@@ -1431,6 +1432,10 @@ class DictionaryEntryNode(TitledTreeNode):
             self.word = self._match.group(1) or ""
         elif word:
             self.word = word
+        elif lexicon_entry:
+            self.lexicon_entry = lexicon_entry
+            self.has_word_match = bool(self.lexicon_entry)
+            self.word = self.lexicon_entry.headword
 
         super(DictionaryEntryNode, self).__init__({
             "titles": [{
@@ -1571,6 +1576,11 @@ class DictionaryNode(VirtualNode):
         except DictionaryEntryNotFound:
             return None
 
+    def all_children(self):
+        lexicon_entry_set = LexiconEntrySet({"parent_lexicon": self.lexiconName})
+        for lexicon_entry in lexicon_entry_set:
+            yield self.entry_class(self, lexicon_entry=lexicon_entry)
+
     def serialize(self, **kwargs):
         """
         :return string: serialization of the subtree rooted in this node

+ 15 - 3
sefaria/model/text.py

@@ -22,7 +22,7 @@ except ImportError:
     import re
 
 from . import abstract as abst
-from schema import deserialize_tree, SchemaNode, JaggedArrayNode, TitledTreeNode, AddressTalmud, Term, TermSet, TitleGroup, AddressType, DictionaryEntryNotFound
+from schema import deserialize_tree, SchemaNode, VirtualNode, DictionaryNode, JaggedArrayNode, TitledTreeNode, AddressTalmud, Term, TermSet, TitleGroup, AddressType, DictionaryEntryNotFound
 from sefaria.system.database import db
 
 import sefaria.system.cache as scache
@@ -1087,7 +1087,11 @@ class Version(abst.AbstractMongoRecord, AbstractTextRecord, AbstractSchemaConten
             addressTypes = schema[u"addressTypes"] if u"addressTypes" in schema else None
         if type(item) is dict:
             for n in schema[u"nodes"]:
-                if n.get(u"default", False):
+                try:
+                    is_virtual_node = VirtualNode in globals()[n.get(u"nodeType", u"")].__bases__
+                except KeyError:
+                    is_virtual_node = False
+                if n.get(u"default", False) or is_virtual_node:
                     node_title_en = node_title_he = u""
                 elif n.get(u"sharedTitle", False):
                     titles = terms_dict[n[u"sharedTitle"]][u"titles"] if terms_dict is not None else Term().load({"name": n[u"sharedTitle"]}).titles
@@ -1097,7 +1101,15 @@ class Version(abst.AbstractMongoRecord, AbstractTextRecord, AbstractSchemaConten
                     node_title_en = u", " + get_primary_title(u"en", n[u"titles"])
                     node_title_he = u", " + get_primary_title(u"he", n[u"titles"])
 
-                self.walk_thru_contents(action, item[n[u"key"]], tref + node_title_en, heTref + node_title_he, n, addressTypes)
+                if is_virtual_node:
+                    curr_ref = Ref(tref)
+                    vnode = next(x for x in curr_ref.index_node.all_children() if x.nodeType == n.get(u"nodeType", u"") and x.firstWord == n[u"firstWord"])
+                    for vchild in vnode.all_children():
+                        vstring = u" ".join(vchild.get_text())
+                        vref = vchild.ref()
+                        self.walk_thru_contents(action, vstring, vref.normal(), vref.he_normal(), n, [])
+                else:
+                    self.walk_thru_contents(action, item[n[u"key"]], tref + node_title_en, heTref + node_title_he, n, addressTypes)
         elif type(item) is list:
             for ii, i in enumerate(item):
                 try:

+ 14 - 3
sefaria/search.py

@@ -519,10 +519,21 @@ class TextIndexer(object):
                 vcount += 1
             bulk(es_client, cls._bulk_actions, stats_only=True, raise_on_error=False)
 
-
     @classmethod
-    def index_version(cls, version):
-        version.walk_thru_contents(cls._cache_action, heTref=cls.curr_index.get_title('he'), schema=cls.curr_index.schema, terms_dict=cls.terms_dict)
+    def index_version(cls, version, tries=0):
+        try:
+            version.walk_thru_contents(cls._cache_action, heTref=cls.curr_index.get_title('he'), schema=cls.curr_index.schema, terms_dict=cls.terms_dict)
+        except pymongo.errors.AutoReconnect as e:
+            # Adding this because there is a mongo call for dictionary words in walk_thru_contents()
+            if tries < 200:
+                pytime.sleep(5)
+                print u"Retrying {}. Try {}".format(version.title, tries)
+                cls.index_version(version, tries+1)
+            else:
+                print u"Tried {} times to get {}. I have failed you...".format(tries, version.title)
+                raise e
+        except StopIteration:
+            print u"Could not find dictionary node in {}".format(version.title)
 
     @classmethod
     def index_ref(cls, index_name, oref, version_title, lang, merged):