Shume shume keq data input. Done!
This commit is contained in:
Binary file not shown.
BIN
Excel/OBS-GYN_1662993385.xlsx
Normal file
BIN
Excel/OBS-GYN_1662993385.xlsx
Normal file
Binary file not shown.
Binary file not shown.
BIN
Excel/kirurgji_1662993337.xlsx
Normal file
BIN
Excel/kirurgji_1662993337.xlsx
Normal file
Binary file not shown.
Binary file not shown.
BIN
Excel/urgjenca_1662993051.xlsx
Normal file
BIN
Excel/urgjenca_1662993051.xlsx
Normal file
Binary file not shown.
16
convert.py
16
convert.py
@@ -3,6 +3,7 @@ from os import listdir
|
|||||||
from os.path import isfile, join
|
from os.path import isfile, join
|
||||||
from docx.api import Document
|
from docx.api import Document
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
|
||||||
class Klinika(Enum):
|
class Klinika(Enum):
|
||||||
@@ -12,7 +13,7 @@ class Klinika(Enum):
|
|||||||
|
|
||||||
|
|
||||||
def convert(klinika: str):
|
def convert(klinika: str):
|
||||||
print(klinika)
|
print(f"Filloi {klinika} me {datetime.now()}")
|
||||||
columns = []
|
columns = []
|
||||||
filtered_columns = []
|
filtered_columns = []
|
||||||
docx_path = './Word/'
|
docx_path = './Word/'
|
||||||
@@ -26,6 +27,14 @@ def convert(klinika: str):
|
|||||||
for path in list_paths:
|
for path in list_paths:
|
||||||
document = Document(path)
|
document = Document(path)
|
||||||
for table in document.tables:
|
for table in document.tables:
|
||||||
|
if "urgjenca_2016.docx" in path:
|
||||||
|
grid = table._tbl.find("w:tblGrid", table._tbl.nsmap)
|
||||||
|
for cell in table.column_cells(2):
|
||||||
|
cell._tc.getparent().remove(cell._tc)
|
||||||
|
col_elem = grid[2]
|
||||||
|
grid.remove(col_elem)
|
||||||
|
print(f"U FSHI PAVIONI")
|
||||||
|
|
||||||
for cell in table.rows[0].cells:
|
for cell in table.rows[0].cells:
|
||||||
columns.append(cell.text.strip().replace('\n', ' '))
|
columns.append(cell.text.strip().replace('\n', ' '))
|
||||||
RowA = table.rows[0]
|
RowA = table.rows[0]
|
||||||
@@ -34,16 +43,17 @@ def convert(klinika: str):
|
|||||||
for row in table.rows:
|
for row in table.rows:
|
||||||
text = [cell.text for cell in row.cells]
|
text = [cell.text for cell in row.cells]
|
||||||
df = df.append([text], ignore_index=True)
|
df = df.append([text], ignore_index=True)
|
||||||
print(f'{path} Done')
|
print(f"Mbaroi {path} me {datetime.now()}")
|
||||||
|
|
||||||
for word in columns:
|
for word in columns:
|
||||||
if word not in filtered_columns:
|
if word not in filtered_columns:
|
||||||
filtered_columns.append(word)
|
filtered_columns.append(word)
|
||||||
print(f'{len(filtered_columns)} Columns found')
|
print(f'{len(filtered_columns)} Columns found')
|
||||||
print('writing to excel...')
|
print('writing to excel...')
|
||||||
|
print(f"Perfundoi {klinika} me {datetime.now()}")
|
||||||
df.columns = filtered_columns
|
df.columns = filtered_columns
|
||||||
excel_path = join(xlsx_path, f"raw_{klinika}_2016-2019.xlsx")
|
excel_path = join(xlsx_path, f"raw_{klinika}_2016-2019.xlsx")
|
||||||
df.to_excel(excel_path, index=False)
|
df.to_excel(excel_path, index=False)
|
||||||
|
|
||||||
|
|
||||||
convert(Klinika.OBS.value)
|
convert(Klinika.URGJENCA.value)
|
||||||
|
|||||||
@@ -37,7 +37,7 @@ def konverto_data(datat: str, i: int):
|
|||||||
# i +=1
|
# i +=1
|
||||||
datat_pattern = "%d/%m/%y"
|
datat_pattern = "%d/%m/%y"
|
||||||
try:
|
try:
|
||||||
if "2572/81" in datat or datat == "?" or datat == "nan" or datat == "Pensionist" or datat == "15/13/92" or datat == "29/2/94":
|
if "2572/81" in datat or datat == "?" or datat == "nan" or datat == "Pensionist" or datat == "15/13/92" or datat == "29/2/94" or datat == "20/0/81" or "Colicaabdpd" in datat or "2379/11" in datat or "2710/44" in datat or "31/4/56" in datat or "11/1/199" in datat or "27/7/889" in datat or "31/9/84" in datat:
|
||||||
df.iat[i, df.columns.get_loc('MOSHA')] = "ERROR"
|
df.iat[i, df.columns.get_loc('MOSHA')] = "ERROR"
|
||||||
df.iat[i, df.columns.get_loc('ERROR')] = datat
|
df.iat[i, df.columns.get_loc('ERROR')] = datat
|
||||||
return ""
|
return ""
|
||||||
@@ -53,7 +53,6 @@ def konverto_data(datat: str, i: int):
|
|||||||
return "DEKLARUAR"
|
return "DEKLARUAR"
|
||||||
if datat.endswith('/'):
|
if datat.endswith('/'):
|
||||||
datat = datat[:-1]
|
datat = datat[:-1]
|
||||||
print(datat)
|
|
||||||
datat = datat.split('/')
|
datat = datat.split('/')
|
||||||
if len(datat) == 1:
|
if len(datat) == 1:
|
||||||
datat.insert(0, '1')
|
datat.insert(0, '1')
|
||||||
@@ -65,14 +64,17 @@ def konverto_data(datat: str, i: int):
|
|||||||
if (len(datat)) > 2:
|
if (len(datat)) > 2:
|
||||||
if datat[2] in ['1', '2', '3', '4', '5', '6', '7', '8', '9']:
|
if datat[2] in ['1', '2', '3', '4', '5', '6', '7', '8', '9']:
|
||||||
datat[2] = f"0{datat[2]}"
|
datat[2] = f"0{datat[2]}"
|
||||||
if int(datat[2]) > 99:
|
try:
|
||||||
datat_pattern = "%d/%m/%Y"
|
if int(datat[2]) > 99:
|
||||||
# Detyrohem te bej nje supozim qe nuk ka datelindje 1919 e poshte, bazuar mbi datat e shtrimit qe jane max 2019
|
datat_pattern = "%d/%m/%Y"
|
||||||
elif int(datat[2]) > 19 and int(datat[2]) < 100:
|
# Detyrohem te bej nje supozim qe nuk ka datelindje 1919 e poshte, bazuar mbi datat e shtrimit qe jane max 2019
|
||||||
datat[2] = f"19{datat[2]}"
|
elif int(datat[2]) > 19 and int(datat[2]) < 100:
|
||||||
datat_pattern = "%d/%m/%Y"
|
datat[2] = f"19{datat[2]}"
|
||||||
else:
|
datat_pattern = "%d/%m/%Y"
|
||||||
datat_pattern = "%d/%m/%y"
|
else:
|
||||||
|
datat_pattern = "%d/%m/%y"
|
||||||
|
except:
|
||||||
|
print(i, datat)
|
||||||
datat = "/".join(datat)
|
datat = "/".join(datat)
|
||||||
try:
|
try:
|
||||||
datat = datetime.strptime(datat, datat_pattern)
|
datat = datetime.strptime(datat, datat_pattern)
|
||||||
@@ -92,6 +94,7 @@ def pastro_string(datat: str, i: int):
|
|||||||
datat = datat.replace(" ", "")
|
datat = datat.replace(" ", "")
|
||||||
datat = datat.replace(".", "/")
|
datat = datat.replace(".", "/")
|
||||||
datat = datat.replace("'", "")
|
datat = datat.replace("'", "")
|
||||||
|
|
||||||
datat = konverto_data(datat, i)
|
datat = konverto_data(datat, i)
|
||||||
return datat
|
return datat
|
||||||
|
|
||||||
@@ -120,11 +123,18 @@ for index, row in df.iterrows():
|
|||||||
datelindja_korigj.append(datelindja)
|
datelindja_korigj.append(datelindja)
|
||||||
i += 1
|
i += 1
|
||||||
|
|
||||||
|
|
||||||
df.insert(len(df.columns), "DT_SHTRIMI_KORIGJ", data_shtri_korigj)
|
df.insert(len(df.columns), "DT_SHTRIMI_KORIGJ", data_shtri_korigj)
|
||||||
df.insert(len(df.columns), "DATELINDJA_KORIGJ", datelindja_korigj)
|
df.insert(len(df.columns), "DATELINDJA_KORIGJ", datelindja_korigj)
|
||||||
|
|
||||||
|
df.drop(columns=df.columns[0], axis=1, inplace=True)
|
||||||
df.drop(columns=df.columns[0], axis=1, inplace=True)
|
df.drop(columns=df.columns[0], axis=1, inplace=True)
|
||||||
|
|
||||||
|
cols = df.columns.tolist()
|
||||||
|
index = cols.index('MOSHA')
|
||||||
|
del cols[index]
|
||||||
|
cols.insert(3,'MOSHA')
|
||||||
|
df = df[cols]
|
||||||
current_GMT = time.gmtime()
|
current_GMT = time.gmtime()
|
||||||
ts = calendar.timegm(current_GMT)
|
ts = calendar.timegm(current_GMT)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user