Commit c1b57ae4 authored by Jaromír Plhák's avatar Jaromír Plhák
Browse files

updates to allow gold stadard annotation

parent 44303cc8
Loading
Loading
Loading
Loading
+57 −282
Original line number Diff line number Diff line
@@ -13,8 +13,8 @@ from zipfile import ZipFile

app = Flask(__name__)

DIR = "/var/www/webApp/webApp/"
# DIR = ""
# DIR = "/var/www/webApp/webApp/"
DIR = ""
ANNOTATION_DIR = path.join(DIR, "annotations")
JSON_DIR = path.join(DIR, "json")
DATA_DIR = path.join(DIR, "data")
@@ -24,6 +24,7 @@ ANNOTATION_CSV = "annotation_only.csv"
ANNOTATION_XLSX = "textual_annotations.xlsx"

SURROUNDING_LIMIT = 10
SURROUNDING_LINES = 3
NO_TAG = "NO TAG"
EXTENDED_TAG = "ext_"

@@ -50,11 +51,10 @@ class UtilsJson:
    BACKUP_DELTA = 3600

    def __init__(self, my_dir, json_dir, annotation_dir, download_dir):
        SOURCE_ANNOTATIONS = "messenger_annotation.json"
        SOURCE_ANNOTATIONS = "gold_standard_annotation.json"
        ASSIGNMENTS = "assignments.json"
        TAG_SETS = "tagsets.json"
        TAG_TABLE = "tags.json"
        USERS_JSON = "users.json"

        self.json_dir = json_dir
        self.annot_dir = annotation_dir
@@ -64,7 +64,6 @@ class UtilsJson:
        self.assignments = UtilsJson.load_json(json_dir, ASSIGNMENTS)
        self.tag_sets = UtilsJson.load_json(json_dir, TAG_SETS)
        self.tag_table = UtilsJson.load_json(json_dir, TAG_TABLE)
        self.users = UtilsJson.load_json(json_dir, USERS_JSON)

        self.backup_dir = path.join(my_dir, UtilsJson.BACKUP_DIR)
        self.backup_path = path.join(self.backup_dir, UtilsJson.BACKUP_FILE)
@@ -106,6 +105,11 @@ class UtilsJson:
        with open(file_path, encoding="utf-8") as md_file:
            return markdown("\n\n".join(md_file.read().split("\n")))

    @staticmethod
    def load_annotators_data(tagset: int):
        return UtilsJson.load_json(
            JSON_DIR, f"gold_standard_annotators_data_tagset_{tagset}.json")

    def create_backup(self, take_all: bool = False):
        """Create backup to json file."""
        def get_path(index: str):
@@ -282,7 +286,9 @@ class Utils:
    def get_first_unfinished(json_files: UtilsJson, data_json: Dict[Any, Any],
                             assignment: str) -> str:
        """Get index of first unfinished annotation. """
        annot = json_files.source_annot[assignment]

        assignment_id = json_files.assignments[SUPERUSER_NAME][assignment][0]
        annot = json_files.source_annot[assignment_id]
        for i in range(len(annot)):
            if not str(i) in data_json["conversations"] or \
                    not data_json["conversations"][str(i)][2]:
@@ -297,16 +303,13 @@ class Utils:

    @staticmethod
    def get_tags(json_files: UtilsJson, tagset: str,
                 tag_json: Dict[Any, Any]) -> List[Tuple[str, str, List[str]]]:
        """Return list tags with their id and subtags."""
                 tag_json: Dict[Any, Any]) -> List[Tuple[str, str]]:
        """Return list tags with their id."""
        tag_table = json_files.tag_table
        tag_ids_lst = [lst for lst in tag_json[tagset]["tagset"]]
        tag_ids: List[Tuple[str, str, List[str]]] = []
        for dct in tag_ids_lst:
            for key, value in dct.items():
                tag_ids.append((key, tag_table[key], []))
                for my_id in value:
                    tag_ids[-1][2].append(tag_table[my_id])
        tag_ids: List[Tuple[str, str]] = []
        for tag in tag_ids_lst:
            tag_ids.append((tag, tag_table[tag]))
        return tag_ids

    @staticmethod
@@ -386,7 +389,7 @@ class Utils:
        tags = {}  # return translated tag ids
        for key, value in data_json["conversations"][str(index)][0].items():
            if not isinstance(value, list):
                tags[key] = id_table.get(value, "")
                tags[key] = value
        return tags

    @staticmethod
@@ -394,187 +397,13 @@ class Utils:
                             index: str, data: Any) -> None:
        """Save data to json file with annotation."""
        data_json = json_files.load_data_json(user, assignment)

        for line, tag in data.to_dict().items():
            data_json["conversations"][index][0][line] = Utils.get_tag_id(
                json_files, tag.strip())
            data_json["conversations"][index][0][line] = tag.strip()
        data_json["conversations"][str(index)][2].append(
            round(datetime.now().timestamp(), 1))
        json_files.save_json(data_json, user, assignment)

    @staticmethod
    def prepare_admin_data(json_files: UtilsJson, active: str,
                           finished: str) -> Any:
        output = []
        users = json_files.users["users"]
        assignments = json_files.assignments

        for user_tuple in users:
            user = user_tuple[0]
            if user in assignments:
                for assignment in assignments[user]:
                    status, start, finish = Utils.get_status(
                        json_files, user, assignment, active, finished)
                    lines, time, fully = "NaN", "NaN", "NaN"
                    if status != active:
                        _, stats = UtilsStats.count_stats(
                            json_files, user, assignment)
                        lines, time, fully = stats[6], stats[7], stats[8]
                    color = "danger" if status == active else "success" if \
                        status == finished else "warning"
                    output.append((user_tuple[1], user_tuple[0], assignment,
                                   assignments[user][assignment][2],
                                   assignments[user][assignment][0],
                                   assignments[user][assignment][1], status,
                                   color, start, finish, lines, time, fully))

        return output

    @staticmethod
    def prepare_xlsx_generation_data():
        tagset_list = []
        for tagset_id, value in JSON_FILES.tag_sets.items():
            data = Utils.get_tags(JSON_FILES, tagset_id, JSON_FILES.tag_sets)
            output = ""
            for i in range(1, min(4, len(data))):
                output += data[i][1] + ", "
            output += "..."
            tagset_list.append((tagset_id, output))

        burst_list = []
        for burst_id, value in JSON_FILES.source_annot.items():
            output = "" if not value else "{}-{}-{}, {}-{}-{}, ...".format(
                value[0][0], value[0][1], value[0][2], value[1][0], value[1][1],
                value[1][2])
            burst_list.append((burst_id, output))

        users_list = []
        for user_id, value in JSON_FILES.assignments.items():
            for assignment_id, assignment in value.items():
                status = Utils.get_status(JSON_FILES, user_id, assignment_id,
                                          ACTIVE_STATUS, FINISHED_STATUS)
                if status[0] != ACTIVE_STATUS:
                    users_list.append((user_id, Utils.get_nickname(user_id),
                                       assignment_id, assignment[0],
                                       assignment[1],
                                       status))

        return tagset_list, burst_list, users_list

    @staticmethod
    def get_nickname(user_id):
        for entry in JSON_FILES.users["users"]:
            if entry[0] == user_id:
                return entry[1]
        return ""

    @staticmethod
    def generate_xslx_files(form_data):
        def get_letter(index):
            return chr(ord('A') + index)

        def generate_header(wlist, header, users):
            for i in range(len(header)):
                wlist["{}1".format(get_letter(i))] = header[i]
            for i in range(len(users)):
                wlist["{}1".format(get_letter(len(header) + i * 2))] = users[i][0]

        def generate_body(wlist, record, ln_counter, cln_number, ln, c_id):
            wlist["A{}".format(i)] = int(record[0])
            wlist["B{}".format(i)] = int(record[1])
            wlist["C{}".format(i)] = int(record[2])
            wlist["D{}".format(i)] = c_id
            wlist["E{}".format(i)] = record[3]
            wlist["F{}".format(i)] = ln_counter
            wlist["G{}".format(i)] = cln_number
            wlist["H{}".format(i)] = ln

        def is_extended_question_mark(cline, data, conv):
            if str("{}{}".format(EXTENDED_TAG, cline)) in data[str(conv)][0]:
                return data[str(conv)][0]["{}{}".format(
                    EXTENDED_TAG, cline)] == "14"
            return False

        def generate_data(wlist, csv_in, my_users, users_js, conv, cline, my_i,
                          question_check):
            output = csv_in
            for j in range(len(my_users)):
                data = users_js[j]["conversations"]
                if str(conv) in data and data[str(conv)][0]:
                    if str(cline) in data[str(conv)][0]:
                        value = data[str(conv)][0][str(cline)]
                        wlist["{}{}".format(get_letter(8 + j * 2), my_i)] = \
                            Utils.get_tag_name(JSON_FILES, value)
                        if not value or (not question_check and
                                         is_extended_question_mark(cline, data,
                                                                   conv)):
                            output += "0;"
                        else:
                            output += value + ";"
                    else:
                        output += "0;"
                    if str("{}{}".format(EXTENDED_TAG, cline)) \
                            in data[str(conv)][0]:
                        wlist["{}{}".format(get_letter(9 + j * 2), my_i)] = \
                            Utils.get_tag_name(
                                JSON_FILES,
                                data[str(conv)][0]["{}{}".format(
                                    EXTENDED_TAG, cline)])
                else:
                    output += "0;"
            return output + "\n"

        try:
            burst = form_data["burst"]
            users_request = form_data.getlist("users")
            question_check = True if "question_check" in form_data else False
        except Exception:
            return Utils.get_prompt(BAD_GENERATION_REQUEST)

        users = []
        for entry in users_request:
            users.append(entry.split("_"))

        users_json = []
        for i in range(len(users)):
            users_json.append(JSON_FILES.load_data_json(users[i][0],
                                                        users[i][1]))

        wb = Workbook()
        ws = wb.active
        csv_out = ""
        generate_header(ws, ["user", "thread", "conversation", "conv_id",
                             "is_group", "line_number_in_file",
                             "line_number_in_conversation", "line"], users)

        i = 2
        line_counter = 1
        for conv_id, entry in enumerate(JSON_FILES.source_annot[burst]):
            my_path = path.join(DATA_DIR, entry[0], entry[1], entry[2] + ".csv")
            with open(my_path, encoding="UTF-8") as conversation_file:
                line = conversation_file.readline()
                cline_number = 1
                while line:
                    generate_body(ws, entry, line_counter, cline_number, line,
                                  conv_id)
                    csv_out = generate_data(ws, csv_out, users, users_json,
                                            conv_id, cline_number, i,
                                            question_check)

                    line = conversation_file.readline().strip()
                    i += 1
                    line_counter += 1
                    cline_number += 1

            i += 2  # skip two lines

        try:
            wb.save(path.join(DOWNLOAD_DIR, ANNOTATION_XLSX))
            with open(path.join(DOWNLOAD_DIR, ANNOTATION_CSV),
                      "w") as csv_file:
                csv_file.write(csv_out.replace(";\n", "\n"))
        except Exception:
            return Utils.get_prompt(PERMISSION_ERROR)


""" Main endpoints """
JSON_FILES = UtilsJson(DIR, JSON_DIR, ANNOTATION_DIR, DOWNLOAD_DIR)
@@ -591,29 +420,26 @@ def return_next_conversation(user: str, assignment: str, index: str,

@app.route("/<user>", methods=["GET", "POST"])
def set_assignment(user: str) -> Any:
    users_json = JSON_FILES.users
    assignments_json = JSON_FILES.assignments
    user_list = [user for user, _ in users_json["users"]]
    if user not in user_list:
        return Utils.get_prompt(MISSING_USER_DB_PROMPT)
    if user != SUPERUSER_NAME:
        return f"<h1>{MISSING_USER_DB_PROMPT}</h1>"

    assignments_json = JSON_FILES.assignments
    assign_strings = [Utils.get_status(JSON_FILES, user, key, ACTIVE_STATUS,
                                       FINISHED_STATUS)
                      for key in assignments_json[user].keys()]
    assigned_keys = []
    lines = []
    for key, value in assignments_json[user].items():
        assigned_keys.append((key, value[0], value[1]))
        lines.append(len(JSON_FILES.source_annot[value[0]]))

    return render_template("user.html", user=user, assignments=assigned_keys,
                           statuses=assign_strings, all_statuses=STATUSES)
                           statuses=assign_strings, all_statuses=STATUSES,
                           lines=lines)


@app.route("/<user>/<assignment>", methods=["POST"])
def implicit_entry(user: str, assignment: str) -> Any:
    if assignment == "stats":
        return redirect(url_for("return_stat_data", user=request.form["user"],
                                assignment=request.form["assignment"]))

    # load json if exist, create it otherwise
    file_path = JSON_FILES.get_output_json_path(user, assignment)
    if not path.isfile(file_path) or not access(file_path, R_OK):
@@ -683,94 +509,43 @@ def home(user: str, assignment: str, index: str) -> Any:

    tagsets_json = JSON_FILES.tag_sets
    burst, tagset, _ = JSON_FILES.assignments[user][assignment]
    user_id, thread, conv = Utils.get_ids(annot_json, burst, ind)

    previous_strings, result_string, following_strings = Utils.get_strings(
        DATA_DIR, annot_json, burst, ind, SURROUNDING_LIMIT)
    tags = Utils.get_tags(JSON_FILES, tagset, tagsets_json)
    user_id, thread, conv, line, _ = annot_json[burst][ind]

    result_string = Utils.get_conv_string(DATA_DIR, annot_json, burst, ind)
    line = int(line)
    lines = [i for i in range(
        max(0, line - SURROUNDING_LINES),
        min(line + SURROUNDING_LINES + 1, len(result_string) + 1))]
    annotators_data = JSON_FILES.load_annotators_data(
        tagset)[user_id][thread][conv]

    annotators_line_tags = []
    for i in range(len(result_string)):
        key = str(i + 1)
        result_tags = []
        for j in [0, 1]:
            tag = ""
            if key in annotators_data and j < len(annotators_data[key]):
                tag, q_mark, t_plus, _ = annotators_data[key][j]
                tag += "?" if q_mark else ""
                tag += "+" if t_plus else ""
            result_tags.append(tag)
        annotators_line_tags.append(result_tags)

    loaded_tags = Utils.open_record(JSON_FILES, user, assignment, str(ind))
    manual = JSON_FILES.load_manual(JSON_DIR, tagsets_json[tagset]["manual"])

    return render_template(
        "home.html", result=result_string, previous=previous_strings,
        following=following_strings, source_ids=(user_id, thread, conv),
        tags=Utils.get_tags(JSON_FILES, tagset, tagsets_json),
        loaded_tags=loaded_tags, user=user, assignment=assignment, index=index,
        annots=len(annot_json[burst]), extended_tag=EXTENDED_TAG,
        manual=manual, hidden=is_hidden
        "home.html", result=result_string,
        show_prev_foll=(1 in lines, len(result_string) in lines),
        source_ids=(user_id, thread, conv), line=line, lines=lines,
        annotators_tags=annotators_line_tags, tags=tags,
        loaded_tags=loaded_tags, user=user, assignment=assignment,
        index=index, annots=len(annot_json[burst]),
        extended_tag=EXTENDED_TAG, manual=manual, hidden=is_hidden
    )


@app.route("/{}/stats/<user>/<assignment>".format(SUPERUSER_NAME),
           methods=["GET", "POST"])
def return_stat_data(user: str, assignment: str) -> Any:
    output, footer = UtilsStats.count_stats(JSON_FILES, user, assignment)
    return render_template("stats.html", user=user, assignment=assignment,
                           data=output, footer=footer)


@app.route("/{}".format(SUPERUSER_NAME), methods=["GET", "POST"])
def admin_start_page() -> Any:
    return redirect(url_for("admin_land_page"))


@app.route("/{}/admin".format(SUPERUSER_NAME), methods=["GET", "POST"])
def admin_land_page():
    return render_template("admin_land_page.html", superuser=SUPERUSER_NAME)


@app.route("/{}/admin/stats".format(SUPERUSER_NAME), methods=["GET", "POST"])
def admin_stats():
    return render_template("admin_data.html", superuser=SUPERUSER_NAME,
                           data=Utils.prepare_admin_data(
                               JSON_FILES, ACTIVE_STATUS, FINISHED_STATUS))


def generate_xslx_files():
    pass


@app.route("/{}/admin/xlsxGeneration".format(SUPERUSER_NAME),
           methods=["GET", "POST"])
def xlsx_generation():
    tagset_list, burst_list, users_list = Utils.prepare_xlsx_generation_data()
    return render_template("admin_xlsx_generation.html",
                           superuser=SUPERUSER_NAME,
                           tagset_list=tagset_list, burst_list=burst_list,
                           users_list=users_list)


@app.route("/{}/admin/xlsxGeneration/Generate".format(SUPERUSER_NAME),
           methods=["GET", "POST"])
def xlsx_generation_generate():
    Utils.generate_xslx_files(request.form)
    return render_template("admin_xlsx_generation_download.html",
                           superuser=SUPERUSER_NAME)


@app.route("/{}/admin/GetBackup".format(SUPERUSER_NAME),
           methods=["GET", "POST"])
def get_backup() -> Any:
    JSON_FILES.create_backup(True)
    index, _ = JSON_FILES.get_last_backup_info()
    return send_from_directory(
        directory=DOWNLOAD_DIR, filename="backup_{}.zip".format(index),
        as_attachment=True)


@app.route("/{}/admin/xlsxGeneration/DownloadCsv".format(SUPERUSER_NAME),
           methods=["GET", "POST"])
def get_annotation_csv() -> Any:
    return send_from_directory(
        directory=DOWNLOAD_DIR, filename=ANNOTATION_CSV, as_attachment=True)


@app.route("/{}/admin/xlsxGeneration/DownloadXlsx".format(SUPERUSER_NAME),
           methods=["GET", "POST"])
def get_annotation_xlsx() -> Any:
    return send_from_directory(
        directory=DOWNLOAD_DIR, filename=ANNOTATION_XLSX, as_attachment=True)


if __name__ == "__main__":
    app.run(debug=True)
+44 −167

File changed.

Preview size limit exceeded, changes collapsed.

+1 −0

File added.

Preview size limit exceeded, changes collapsed.

+1 −0

File added.

Preview size limit exceeded, changes collapsed.

+1 −0

File added.

Preview size limit exceeded, changes collapsed.

Loading