diff --git a/Excel/raw_OBS-GYN_2016-2019.xlsx b/Excel/raw_OBS-GYN_2016-2019.xlsx new file mode 100644 index 0000000..40ee1a8 Binary files /dev/null and b/Excel/raw_OBS-GYN_2016-2019.xlsx differ diff --git a/Excel/raw_kirurgji_2016-2019.xlsx b/Excel/raw_kirurgji_2016-2019.xlsx new file mode 100644 index 0000000..ffb6115 Binary files /dev/null and b/Excel/raw_kirurgji_2016-2019.xlsx differ diff --git a/Excel/raw_urgjenca_2016-2019.xlsx b/Excel/raw_urgjenca_2016-2019.xlsx new file mode 100644 index 0000000..9fed97a Binary files /dev/null and b/Excel/raw_urgjenca_2016-2019.xlsx differ diff --git a/Word/OBS-GYN_2016.docx b/Word/OBS-GYN_2016.docx new file mode 100644 index 0000000..16a1189 Binary files /dev/null and b/Word/OBS-GYN_2016.docx differ diff --git a/Word/OBS-GYN_2017.docx b/Word/OBS-GYN_2017.docx new file mode 100644 index 0000000..bc063a2 Binary files /dev/null and b/Word/OBS-GYN_2017.docx differ diff --git a/Word/OBS-GYN_2018.docx b/Word/OBS-GYN_2018.docx new file mode 100644 index 0000000..381b26e Binary files /dev/null and b/Word/OBS-GYN_2018.docx differ diff --git a/Word/OBS-GYN_2019.docx b/Word/OBS-GYN_2019.docx new file mode 100644 index 0000000..d81bd51 Binary files /dev/null and b/Word/OBS-GYN_2019.docx differ diff --git a/Word/kirurgji_2016.docx b/Word/kirurgji_2016.docx new file mode 100644 index 0000000..b1b80dd Binary files /dev/null and b/Word/kirurgji_2016.docx differ diff --git a/Word/kirurgji_2017.docx b/Word/kirurgji_2017.docx new file mode 100644 index 0000000..38835a9 Binary files /dev/null and b/Word/kirurgji_2017.docx differ diff --git a/Word/kirurgji_2018.docx b/Word/kirurgji_2018.docx new file mode 100644 index 0000000..0879e80 Binary files /dev/null and b/Word/kirurgji_2018.docx differ diff --git a/Word/kirurgji_2019.docx b/Word/kirurgji_2019.docx new file mode 100644 index 0000000..97829bf Binary files /dev/null and b/Word/kirurgji_2019.docx differ diff --git a/Word/urgjenca_2016.docx b/Word/urgjenca_2016.docx new file mode 100644 index 0000000..ae4a5fe Binary files /dev/null and b/Word/urgjenca_2016.docx differ diff --git a/Word/urgjenca_2017.docx b/Word/urgjenca_2017.docx new file mode 100644 index 0000000..dd16774 Binary files /dev/null and b/Word/urgjenca_2017.docx differ diff --git a/Word/urgjenca_2018.docx b/Word/urgjenca_2018.docx new file mode 100644 index 0000000..d140ad9 Binary files /dev/null and b/Word/urgjenca_2018.docx differ diff --git a/Word/urgjenca_2019.docx b/Word/urgjenca_2019.docx new file mode 100644 index 0000000..30ff87a Binary files /dev/null and b/Word/urgjenca_2019.docx differ diff --git a/convert.py b/convert.py new file mode 100644 index 0000000..abb0d5c --- /dev/null +++ b/convert.py @@ -0,0 +1,49 @@ +import pandas as pd +from os import listdir +from os.path import isfile, join +from docx.api import Document +from enum import Enum + + +class Klinika(Enum): + KIRURGJI = 'kirurgji' + OBS = 'OBS-GYN' + URGJENCA = 'urgjenca' + + +def convert(klinika: str): + print(klinika) + columns = [] + filtered_columns = [] + docx_path = './Word/' + xlsx_path = './Excel/' + list_files = [f for f in listdir(docx_path) if isfile(join(docx_path, f))] + list_paths = [] + df = pd.DataFrame() + for file in list_files: + if klinika in file and "lock" not in file: + list_paths.append(join(docx_path, file)) + for path in list_paths: + document = Document(path) + for table in document.tables: + for cell in table.rows[0].cells: + columns.append(cell.text.strip().replace('\n', ' ')) + RowA = table.rows[0] + table_element = table._tbl + table_element.remove(RowA._tr) + for row in table.rows: + text = [cell.text for cell in row.cells] + df = df.append([text], ignore_index=True) + print(f'{path} Done') + + for word in columns: + if word not in filtered_columns: + filtered_columns.append(word) + print(f'{len(filtered_columns)} Columns found') + print('writing to excel...') + df.columns = filtered_columns + excel_path = join(xlsx_path, f"raw_{klinika}_2016-2019.xlsx") + df.to_excel(excel_path, index=False) + + +convert(Klinika.OBS.value) diff --git a/llogarit_moshen.py b/llogarit_moshen.py new file mode 100644 index 0000000..2c1f3cc --- /dev/null +++ b/llogarit_moshen.py @@ -0,0 +1,122 @@ +import pandas as pd +from datetime import datetime +import calendar +from os.path import join +import time +from enum import Enum + + +class Klinika(Enum): + KIRURGJI = 'kirurgji' + OBS = 'OBS-GYN' + URGJENCA = 'urgjenca' + + +klinika = Klinika.KIRURGJI.value + +xlsx_path = './Excel/' +excel_path = join(xlsx_path, "raw_{klinika}_2016-2019.xlsx") + +df = pd.read_excel(excel_path, sheet_name='Sheet1') +df = df.reset_index() +df['MOSHA'] = [None] * 2384 +df['ERROR'] = [None] * 2384 +i = 0 +datat_pattern = "%d/%m/%y" +data_shtri_korigj = [] +datelindja_korigj = [] + + +def llogarit_moshen(datelindja: datetime, data_shtri: datetime): + return data_shtri.year - datelindja.year - ((data_shtri.month, data_shtri.day) < (datelindja.month, datelindja.day)) + + +def konverto_data(datat: str, i: int): + # i +=1 + datat_pattern = "%d/%m/%y" + try: + if "2572/81" in datat or datat == "?" or datat == "nan" or datat == "Pensionist": + df.iat[i, df.columns.get_loc('MOSHA')] = "ERROR" + df.iat[i, df.columns.get_loc('ERROR')] = datat + return "" + if "vj" in datat: + datat = datat.replace("vjec", "") + datat = datat.replace("vj", "") + df.iat[i, df.columns.get_loc('MOSHA')] = int(datat) + return "DEKLARUAR" + if "muaj" in datat: + datat = datat.replace("muaj", "") + datat = int(datat) / 12 + df.iat[i, df.columns.get_loc('MOSHA')] = float(datat) + return "DEKLARUAR" + datat = datat.split('/') + if len(datat) == 1: + datat.insert(0, '1') + datat.insert(1, '1') + if datat[0] == "0": + datat[0] = "1" + if len(datat) < 3 and len(datat) > 1: + datat.insert(0, '1') + if (len(datat)) > 2: + if datat[2] in ['1', '2', '3', '4', '5', '6', '7', '8', '9']: + datat[2] = f"0{datat[2]}" + if int(datat[2]) > 99: + datat_pattern = "%d/%m/%Y" + # Detyrohem te bej nje supozim qe nuk ka datelindje 1919 e poshte, bazuar mbi datat e shtrimit qe jane max 2019 + elif int(datat[2]) > 19 and int(datat[2]) < 100: + datat[2] = f"19{datat[2]}" + datat_pattern = "%d/%m/%Y" + else: + datat_pattern = "%d/%m/%y" + datat = "/".join(datat) + try: + datat = datetime.strptime(datat, datat_pattern) + return datat + except ValueError as e: + print(f"{i}: {e} -- {datat}") + return None + + except TypeError as e: + print(f"{i}: {e} -- {datat}") + return None + + +def pastro_string(datat: str, i: int): + datat = str(datat) + datat = datat.strip() + datat = datat.replace(" ", "") + datat = datat.replace(".", "/") + datat = datat.replace("'", "") + datat = konverto_data(datat, i) + return datat + + +for index, row in df.iterrows(): + data_shtri = row['DATA E SHTRIMIT'] + datelindja = row['DATELINDJA'] + emer = row['EMER'] + data_shtri = pastro_string(data_shtri, i) + datelindja = pastro_string(datelindja, i) + if data_shtri is None or datelindja is None: + print(f"{i}: {data_shtri} --{emer}-- {datelindja}") + break + if type(data_shtri) is datetime and type(datelindja) is datetime: + mosha = llogarit_moshen(datelindja, data_shtri) + if mosha == 0: + df.iat[i, df.columns.get_loc('MOSHA')] = "ERROR" + df.iat[i, df.columns.get_loc('ERROR')] = mosha + else: + df.iat[i, df.columns.get_loc('MOSHA')] = mosha + data_shtri_korigj.append(data_shtri) + datelindja_korigj.append(datelindja) + i += 1 + +df.insert(16, "DT_SHTRIMI_KORIGJ", data_shtri_korigj) +df.insert(17, "DATELINDJA_KORIGJ", datelindja_korigj) +df.drop(columns=df.columns[0], axis=1, inplace=True) + +current_GMT = time.gmtime() +ts = calendar.timegm(current_GMT) + +excel_path = join(xlsx_path, "{klinika}_{ts}.xlsx") +df.to_excel(excel_path, index=False)