Source code for sci_watch.parser.query_parser

import re

from lark import Transformer

from sci_watch.parser.tree_node import Node


[docs]class CreateCustomTree(Transformer): """ Custom tree transformer class based on Lark Transformers each of the methods below is called on the corresponding clause to create a custom Node """
[docs] def scoped_query(self, nodes) -> Node: # pylint: disable=missing-function-docstring return nodes[0]
[docs] def and_clause(self, nodes) -> Node: # pylint: disable=missing-function-docstring left, _, right = nodes return Node( type="and_clause", value="AND", start_pos=left.start_pos, end_pos=right.end_pos, children=[left, right], )
[docs] def or_clause(self, nodes) -> Node: # pylint: disable=missing-function-docstring left, _, right = nodes return Node( type="or_clause", value="OR", start_pos=left.start_pos, end_pos=right.end_pos, children=[left, right], )
[docs] def word_with_wildcard( self, values ) -> Node: # pylint: disable=missing-function-docstring wildcard_node = values[0] wildcard_word = wildcard_node.value.replace("?", "(?:.{0,1})") wildcard_word = wildcard_word.replace("*", "(?:.*)") return Node( type="word_with_wildcard", value=rf"\b{wildcard_word}\b", end_pos=wildcard_node.end_pos, start_pos=wildcard_node.start_pos, search_scope=None, )
[docs] def expression(self, values) -> Node: # pylint: disable=missing-function-docstring expression_str = " ".join(token for token in values[1:-1]) return Node( type="expression", value=rf"\b{expression_str}\b", start_pos=values[0].start_pos, end_pos=values[-1].end_pos, search_scope=None, )
[docs] def distance(self, values) -> int: # pylint: disable=missing-function-docstring return values[0]
[docs] def proximity(self, values) -> Node: # pylint: disable=missing-function-docstring distance = int(values[-1]) pattern = ( r"\b" + r"\s+(?:\S+\s+){{0,{}}}".format(distance).join( map(re.escape, values[1:-3]) ) + r"\b" ) return Node( type="proximity", value=pattern, end_pos=values[-1].end_pos, start_pos=values[0].start_pos, search_scope=None, )
[docs] def not_clause(self, values) -> Node: # pylint: disable=missing-function-docstring left, _, right = values return Node( type="not_clause", value="NOT", start_pos=left.start_pos, end_pos=right.end_pos, children=[left, right], )
[docs] def parenthesis_clause( self, nodes ) -> Node: # pylint: disable=missing-function-docstring left_par, node, right_par = nodes return Node( type="parenthesis_clause", value="()", start_pos=left_par.start_pos, end_pos=right_par.end_pos, search_scope=None, children=[node], )
[docs] def in_title_clause( self, nodes ) -> Node: # pylint: disable=missing-function-docstring return Node( type="in_title_clause", value="Title", start_pos=nodes[0].start_pos, end_pos=nodes[-1].end_pos, search_scope=None, children=[nodes[-1]], )
[docs] def in_content_clause( self, nodes ) -> Node: # pylint: disable=missing-function-docstring return Node( type="in_content_clause", value="Content", start_pos=nodes[0].start_pos, end_pos=nodes[-1].end_pos, search_scope=None, children=[nodes[-1]], )
[docs] def begin_clause(self, nodes) -> Node: # pylint: disable=missing-function-docstring return Node( type="begin", value="Begin", start_pos=nodes[0].start_pos, end_pos=nodes[-1].end_pos, search_scope=None, children=[nodes[-1]], )
[docs] def query(self, nodes) -> Node: # pylint: disable=missing-function-docstring return nodes[0]
[docs] def token(self, nodes) -> Node: # pylint: disable=missing-function-docstring return nodes[0]
[docs] def scoped_token(self, nodes) -> Node: # pylint: disable=missing-function-docstring return nodes[0]
def _update_search_scope(tree: Node, search_scope: str) -> None: """ Given a node and a search scope (e.g. "in_content"), propagate it to all the subtree, in place Parameters ---------- tree : Node Root node of the tree where to propagate the scope search_scope: str The search scope """ tree.search_scope = search_scope if tree.children is not None: for child in tree.children: _update_search_scope(tree=child, search_scope=search_scope)
[docs]def update_search_scope(tree: Node) -> None: """ Given the root node of a tree, traverse it and update the search scope for each node, in place Parameters ---------- tree: Node Root node of the tree to traverse """ if tree.type == "in_title_clause": _update_search_scope(tree, search_scope="title") elif tree.type == "in_content_clause": _update_search_scope(tree, search_scope="content") elif tree.type == "begin_clause": _update_search_scope(tree, search_scope="begin") else: if tree.children is not None: for child in tree.children: update_search_scope(tree=child)