Shume shume keq data input. Done!

This commit is contained in:
2022-09-12 16:37:18 +02:00
parent b7c71eee3d
commit 991170c510
8 changed files with 33 additions and 13 deletions

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -3,6 +3,7 @@ from os import listdir
from os.path import isfile, join from os.path import isfile, join
from docx.api import Document from docx.api import Document
from enum import Enum from enum import Enum
from datetime import datetime
class Klinika(Enum): class Klinika(Enum):
@@ -12,7 +13,7 @@ class Klinika(Enum):
def convert(klinika: str): def convert(klinika: str):
print(klinika) print(f"Filloi {klinika} me {datetime.now()}")
columns = [] columns = []
filtered_columns = [] filtered_columns = []
docx_path = './Word/' docx_path = './Word/'
@@ -26,6 +27,14 @@ def convert(klinika: str):
for path in list_paths: for path in list_paths:
document = Document(path) document = Document(path)
for table in document.tables: for table in document.tables:
if "urgjenca_2016.docx" in path:
grid = table._tbl.find("w:tblGrid", table._tbl.nsmap)
for cell in table.column_cells(2):
cell._tc.getparent().remove(cell._tc)
col_elem = grid[2]
grid.remove(col_elem)
print(f"U FSHI PAVIONI")
for cell in table.rows[0].cells: for cell in table.rows[0].cells:
columns.append(cell.text.strip().replace('\n', ' ')) columns.append(cell.text.strip().replace('\n', ' '))
RowA = table.rows[0] RowA = table.rows[0]
@@ -34,16 +43,17 @@ def convert(klinika: str):
for row in table.rows: for row in table.rows:
text = [cell.text for cell in row.cells] text = [cell.text for cell in row.cells]
df = df.append([text], ignore_index=True) df = df.append([text], ignore_index=True)
print(f'{path} Done') print(f"Mbaroi {path} me {datetime.now()}")
for word in columns: for word in columns:
if word not in filtered_columns: if word not in filtered_columns:
filtered_columns.append(word) filtered_columns.append(word)
print(f'{len(filtered_columns)} Columns found') print(f'{len(filtered_columns)} Columns found')
print('writing to excel...') print('writing to excel...')
print(f"Perfundoi {klinika} me {datetime.now()}")
df.columns = filtered_columns df.columns = filtered_columns
excel_path = join(xlsx_path, f"raw_{klinika}_2016-2019.xlsx") excel_path = join(xlsx_path, f"raw_{klinika}_2016-2019.xlsx")
df.to_excel(excel_path, index=False) df.to_excel(excel_path, index=False)
convert(Klinika.OBS.value) convert(Klinika.URGJENCA.value)

View File

@@ -37,7 +37,7 @@ def konverto_data(datat: str, i: int):
# i +=1 # i +=1
datat_pattern = "%d/%m/%y" datat_pattern = "%d/%m/%y"
try: try:
if "2572/81" in datat or datat == "?" or datat == "nan" or datat == "Pensionist" or datat == "15/13/92" or datat == "29/2/94": if "2572/81" in datat or datat == "?" or datat == "nan" or datat == "Pensionist" or datat == "15/13/92" or datat == "29/2/94" or datat == "20/0/81" or "Colicaabdpd" in datat or "2379/11" in datat or "2710/44" in datat or "31/4/56" in datat or "11/1/199" in datat or "27/7/889" in datat or "31/9/84" in datat:
df.iat[i, df.columns.get_loc('MOSHA')] = "ERROR" df.iat[i, df.columns.get_loc('MOSHA')] = "ERROR"
df.iat[i, df.columns.get_loc('ERROR')] = datat df.iat[i, df.columns.get_loc('ERROR')] = datat
return "" return ""
@@ -53,7 +53,6 @@ def konverto_data(datat: str, i: int):
return "DEKLARUAR" return "DEKLARUAR"
if datat.endswith('/'): if datat.endswith('/'):
datat = datat[:-1] datat = datat[:-1]
print(datat)
datat = datat.split('/') datat = datat.split('/')
if len(datat) == 1: if len(datat) == 1:
datat.insert(0, '1') datat.insert(0, '1')
@@ -65,14 +64,17 @@ def konverto_data(datat: str, i: int):
if (len(datat)) > 2: if (len(datat)) > 2:
if datat[2] in ['1', '2', '3', '4', '5', '6', '7', '8', '9']: if datat[2] in ['1', '2', '3', '4', '5', '6', '7', '8', '9']:
datat[2] = f"0{datat[2]}" datat[2] = f"0{datat[2]}"
if int(datat[2]) > 99: try:
datat_pattern = "%d/%m/%Y" if int(datat[2]) > 99:
# Detyrohem te bej nje supozim qe nuk ka datelindje 1919 e poshte, bazuar mbi datat e shtrimit qe jane max 2019 datat_pattern = "%d/%m/%Y"
elif int(datat[2]) > 19 and int(datat[2]) < 100: # Detyrohem te bej nje supozim qe nuk ka datelindje 1919 e poshte, bazuar mbi datat e shtrimit qe jane max 2019
datat[2] = f"19{datat[2]}" elif int(datat[2]) > 19 and int(datat[2]) < 100:
datat_pattern = "%d/%m/%Y" datat[2] = f"19{datat[2]}"
else: datat_pattern = "%d/%m/%Y"
datat_pattern = "%d/%m/%y" else:
datat_pattern = "%d/%m/%y"
except:
print(i, datat)
datat = "/".join(datat) datat = "/".join(datat)
try: try:
datat = datetime.strptime(datat, datat_pattern) datat = datetime.strptime(datat, datat_pattern)
@@ -92,6 +94,7 @@ def pastro_string(datat: str, i: int):
datat = datat.replace(" ", "") datat = datat.replace(" ", "")
datat = datat.replace(".", "/") datat = datat.replace(".", "/")
datat = datat.replace("'", "") datat = datat.replace("'", "")
datat = konverto_data(datat, i) datat = konverto_data(datat, i)
return datat return datat
@@ -120,11 +123,18 @@ for index, row in df.iterrows():
datelindja_korigj.append(datelindja) datelindja_korigj.append(datelindja)
i += 1 i += 1
df.insert(len(df.columns), "DT_SHTRIMI_KORIGJ", data_shtri_korigj) df.insert(len(df.columns), "DT_SHTRIMI_KORIGJ", data_shtri_korigj)
df.insert(len(df.columns), "DATELINDJA_KORIGJ", datelindja_korigj) df.insert(len(df.columns), "DATELINDJA_KORIGJ", datelindja_korigj)
df.drop(columns=df.columns[0], axis=1, inplace=True)
df.drop(columns=df.columns[0], axis=1, inplace=True) df.drop(columns=df.columns[0], axis=1, inplace=True)
cols = df.columns.tolist()
index = cols.index('MOSHA')
del cols[index]
cols.insert(3,'MOSHA')
df = df[cols]
current_GMT = time.gmtime() current_GMT = time.gmtime()
ts = calendar.timegm(current_GMT) ts = calendar.timegm(current_GMT)