import re from collections.abc import Iterable, Iterator from pathlib import Path from textwrap import dedent # for doctests def prepare_doctest(x: str) -> list[str]: return dedent(x).strip().splitlines(True) def fix_f2c_input(f2c_input_path: str) -> None: """ OpenBLAS has been manually modified to remove useless arguments generated by f2c. But the mismatches between the f2c ABI and the human-curated sensible ABI in OpenBLAS cause us great pain. This stuff applies to actual source files, but scipy also has multiple templating engines for Fortran, so these changes have to be applied immediately prior to f2c'ing a .f file to ensure that they also work correctly on templated files. Fortran seems to be mostly case insensitive. The templated files in particular can include weird mixtures of lower and upper case. Mostly the issues are related to 'character' types. Most LAPACK functions that take string arguments use them as enums and only care about the first character of the string. f2c generates a 'length' argument to indicate how long the string is, but OpenBLAS leaves these length arguments out because the strings are assumed to have length 1. So the goal is to cause f2c to generate no length argument. We can achieve this by replacing the string with the ascii code of the first character e.g.,: f('UPPER') --> f(85) Coming from C this surprises me a bit. I would expect `f(85)` to cause a segfault or something when f tries to find its string at memory address 85. f("UPPER") gets f2c'd to: f("UPPER", 5) But f2c compiles f(85) to the C code: static integer c__85 = 85; f(&c__85); This is perfect. Not sure why it does this, but it's very convenient for us. chla_transtype is a special case. The OpenBLAS version of chla_transtype takes a return argument, whereas f2c thinks it should return the value. """ f2c_input = Path(f2c_input_path) with open(f2c_input) as f: lines = f.readlines() new_lines = [] lines = char1_args_to_int(lines) for line in lines: line = fix_string_args(line) if f2c_input_path.endswith("_flapack-f2pywrappers.f"): line = line.replace("character cmach", "integer cmach") line = line.replace("character norm", "integer norm") if "id_dist" in str(f2c_input): line = line.replace("character*1 jobz", "integer jobz") if "jobz =" in line: line = re.sub("'(.)'", lambda r: str(ord(r.group(1))), line) if f2c_input.name in [ "_lapack_subroutine_wrappers.f", "_blas_subroutine_wrappers.f", ]: line = line.replace("character", "integer") line = line.replace("ret = chla_transtype(", "call chla_transtype(ret, 1,") # f2c has no support for variable sized arrays, so we replace them with # dummy fixed sized arrays and then put the formulas back in in # fix_f2c_output. Luckily, variable sized arrays are scarce in the scipy # code base. if "PROPACK" in str(f2c_input): line = line.replace("ylocal(n)", "ylocal(123001)") line = line.replace("character*1", "integer") if f2c_input.name == "mvndst.f": line = re.sub(r"(infin|stdev|nlower|nupper)\(d\)", r"\1(123001)", line) line = line.replace("rho(d*(d-1)/2)", "rho(123002)") new_lines.append(line) with open(f2c_input_path, "w") as f: f.writelines(new_lines) def fix_string_args(line: str) -> str: """ Replace all single character strings in (the first line of) "call" statements with their ascci codes. """ if ( not re.search("call", line, re.I) and "SIGNST" not in line and "TRANST" not in line ): return line else: return re.sub("'[A-Za-z0-9]'", lambda y: str(ord(y.group(0)[1])), line) def char1_to_int(x: str) -> str: """ Replace multicharacter strings with the ascii code of their first character. >>> char1_to_int("CALL sTRSV( 'UPPER', 'NOTRANS', 'NONUNIT', J, H, LDH, Y, 1 )") 'CALL sTRSV( 85, 78, 78, J, H, LDH, Y, 1 )' """ return re.sub("'(.)[A-Za-z -]*'", lambda r: str(ord(r.group(1))), x) def char1_args_to_int(lines: list[str]) -> list[str]: """ Replace strings with the ascii code of their first character if they are arguments to one of a long list of hard coded LAPACK functions (see fncstems). This handles multiline function calls. >>> print(char1_args_to_int(["CALL sTRSV( 'UPPER', 'NOTRANS', 'NONUNIT', J, H, LDH, Y, 1 )"])) ['CALL sTRSV( 85, 78, 78, J, H, LDH, Y, 1 )'] >>> print("".join(char1_args_to_int(prepare_doctest(''' ... call cvout (logfil, nconv, workl(ihbds), ndigit, ... & '_neupd: Last row of the eigenvector matrix for T') ... call ctrmm('Right' , 'Upper' , 'No transpose', ... & 'Non-unit', n , nconv , ... & one , workl(invsub), ldq , ... & z , ldz) ... ''')))) call cvout (logfil, nconv, workl(ihbds), ndigit, & '_neupd: Last row of the eigenvector matrix for T') call ctrmm(82 , 85 , 78, & 78, n , nconv , & one , workl(invsub), ldq , & z , ldz) """ fncstems = [ "gemm", "ggbak", "gghrd", "lacpy", "lamch", "lanhs", "lanst", "larf", "lascl", "laset", "lasr", "ormqr", "orm2r", "steqr", "stevr", "trevc", "trmm", "trsen", "trsv", "unm2r", "unmqr", ] fncnames = [] for c in "cdsz": for stem in fncstems: fncnames.append(c + stem) fncnames += ["lsame"] funcs_pattern = "|".join(fncnames) new_lines = [] replace = False for line in lines: if re.search(funcs_pattern, line, re.IGNORECASE): replace = True if replace: line = char1_to_int(line) if not re.search(r",\s*$", line): replace = False new_lines.append(line) return new_lines def fix_f2c_output(f2c_output_path: str) -> str | None: """ This function is called on the name of each C output file. It fixes up the C output in various ways to compensate for the lack of f2c support for Fortran 90 and Fortran 95. """ f2c_output = Path(f2c_output_path) with open(f2c_output) as f: lines = f.readlines() if "id_dist" in f2c_output_path: # Fix implicit casts in id_dist. lines = fix_inconsistent_decls(lines) if "odepack" in f2c_output_path or f2c_output.name == "mvndst.c": # Mark all but one declaration of each struct as extern. if f2c_output.name == "blkdta000.c": # extern marking in blkdata000.c doesn't work properly so we let it # define the one copy of the structs. It doesn't talk about lsa001 # at all though, so we need to add a definition of it. lines.append( """ struct { doublereal rownd2, pdest, pdlast, ratio, cm1[12], cm2[5], pdnorm; integer iownd2[3], icount, irflag, jtyp, mused, mxordn, mxords; } lsa001_; """ ) else: add_externs_to_structs(lines) if f2c_output.name == "_lapack_subroutine_wrappers.c": lines = [ line.replace("integer chla_transtype__", "void chla_transtype__") for line in lines ] # Substitute back the dummy fixed array sizes. We also have to remove the # "static" storage specifier since variable sized arrays can't have static # storage. if f2c_output.name == "mvndst.c": lines = fix_inconsistent_decls(lines) def fix_line(line: str) -> str: if "12300" in line: return ( line.replace("static", "") .replace("123001", "(*d__)") .replace("123002", "(*d__)*((*d__)-1)/2") ) return line lines = list(map(fix_line, lines)) if "PROPACK" in str(f2c_output): def fix_line(line: str) -> str: if f2c_output.name != "cgemm_ovwr.c": line = line.replace("struct", "extern struct") if "12300" in line: return line.replace("static", "").replace("123001", "(*n)") return line lines = list(map(fix_line, lines)) if f2c_output.name.endswith("lansvd.c"): lines.append( """ #include int second_(real *t) { *t = clock()/1000; return 0; } """ ) # In numpy 1.24 f2py changed its treatment of character argument. In # particular it does not generate a ftnlen parameter for each # character parameter but f2c still generates it. The following code # removes unneeded ftnlen parameters from the f2ced signature. The # problematic subroutines and parameters are the ones with a type character # in scipy/sparse/linalg/_eigen/arpack/arpack.pyf.src if "eupd.c" in str(f2c_output): # put signature on a single line to make replacement more # straightforward regrouped_lines = regroup_lines(lines) lines = [ re.sub(r",?\s*ftnlen\s*(howmny_len|bmat_len)", "", line) for line in regrouped_lines ] # Fix signature of c_abs to match the OpenBLAS one if "REVCOM.c" in str(f2c_output): lines = [line.replace("double c_abs(", "float c_abs(") for line in lines] with open(f2c_output, "w") as f: f.writelines(lines) return None def add_externs_to_structs(lines: list[str]) -> None: """ The fortran "common" keyword is supposed to share variables between a bunch of files. f2c doesn't handle this correctly (it isn't possible for it to handle it correctly because it only looks one file at a time). We mark all the structs as externs and then (separately) add one non extern version to each file. >>> lines = prepare_doctest(''' ... struct { doublereal rls[218]; ... integer ils[39]; ... } ls0001_; ... struct { doublereal rlsa[22]; ... integer ilsa[9]; ... } lsa001_; ... struct { integer ieh[2]; ... } eh0001_; ... ''') >>> add_externs_to_structs(lines) >>> print("".join(lines)) extern struct { doublereal rls[218]; integer ils[39]; } ls0001_; extern struct { doublereal rlsa[22]; integer ilsa[9]; } lsa001_; extern struct { integer ieh[2]; } eh0001_; """ for idx, line in enumerate(lines): if line.startswith("struct"): lines[idx] = "extern " + lines[idx] def regroup_lines(lines: Iterable[str]) -> Iterator[str]: """ Make sure that functions and declarations have their argument list only on one line. >>> print("".join(regroup_lines(prepare_doctest(''' ... /* Subroutine */ int clanhfwrp_(real *ret, char *norm, char *transr, char * ... uplo, integer *n, complex *a, real *work, ftnlen norm_len, ftnlen ... transr_len, ftnlen uplo_len) ... { ... static doublereal psum[52]; ... extern /* Subroutine */ int dqelg_(integer *, doublereal *, doublereal *, ... doublereal *, doublereal *, integer *); ... ''')))) /* Subroutine */ int clanhfwrp_(real *ret, char *norm, char *transr, char * uplo, integer *n, complex *a, real *work, ftnlen norm_len, ftnlen transr_len, ftnlen uplo_len){ static doublereal psum[52]; extern /* Subroutine */ int dqelg_(integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *); """ line_iter = iter(lines) for line in line_iter: if "/* Subroutine */" not in line: yield line continue is_definition = line.startswith("/* Subroutine */") stop = ")" if is_definition else ";" if stop in line: yield line continue sub_lines = [line.rstrip()] for line in line_iter: sub_lines.append(line.strip()) if stop in line: break joined_line = " ".join(sub_lines) if is_definition: yield joined_line else: yield from (x + ";" for x in joined_line.split(";")[:-1]) def fix_inconsistent_decls(lines: list[str]) -> list[str]: """ Fortran functions in id_dist use implicit casting of function args which f2c doesn't support. The fortran equivalent of the following code: double f(double x){ return x + 5; } double g(int x){ return f(x); } gets f2c'd to: double f(double x){ return x + 5; } double g(int x){ double f(int); return f(x); } which fails to compile because the declaration of f type clashes with the definition. Gather up all the definitions in each file and then gathers the declarations and fixes them if necessary so that the declaration matches the definition. >>> print("".join(fix_inconsistent_decls(prepare_doctest(''' ... /* Subroutine */ double f(double x){ ... return x + 5; ... } ... /* Subroutine */ double g(int x){ ... extern /* Subroutine */ double f(int); ... return f(x); ... } ... ''')))) /* Subroutine */ double f(double x){ return x + 5; } /* Subroutine */ double g(int x){ extern /* Subroutine */ double f(double); return f(x); } """ func_types = {} lines = list(regroup_lines(lines)) for line in lines: if not line.startswith("/* Subroutine */"): continue [func_name, types] = get_subroutine_decl(line) func_types[func_name] = types for idx, line in enumerate(lines): if "extern /* Subroutine */" not in line: continue decls = line.split(")")[:-1] for decl in decls: [func_name, types] = get_subroutine_decl(decl) if func_name not in func_types or types == func_types[func_name]: continue types = func_types[func_name] l = list(line.partition(func_name + "(")) l[2:] = list(l[2].partition(")")) l[2] = ", ".join(types) line = "".join(l) lines[idx] = line return lines def get_subroutine_decl(sub: str) -> tuple[str, list[str]]: """ >>> get_subroutine_decl( ... "extern /* Subroutine */ int dqelg_(integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *);" ... ) ('dqelg_', ['integer *', 'doublereal *', 'doublereal *', 'doublereal *', 'doublereal *', 'integer *']) """ func_name = sub.partition("(")[0].rpartition(" ")[2] args_str = sub.partition("(")[2].partition(")")[0] args = args_str.split(",") types = [] for arg in args: arg = arg.strip() if "*" in arg: type = "".join(arg.partition("*")[:-1]) else: type = arg.partition(" ")[0] types.append(type.strip()) return (func_name, types) def scipy_fix_cfile(path: str) -> None: """ Replace void return types with int return types in various generated .c and .h files. We can't achieve this with a simple patch because these files are not in the sdist, they are generated as part of the build. """ source_path = Path(path) text = source_path.read_text() text = text.replace("extern void F_WRAPPEDFUNC", "extern int F_WRAPPEDFUNC") text = text.replace("extern void F_FUNC", "extern int F_FUNC") text = text.replace("void (*f2py_func)", "int (*f2py_func)") text = text.replace("static void cb_", "static int cb_") text = text.replace("typedef void(*cb_", "typedef int(*cb_") text = text.replace("void(*)", "int(*)") text = text.replace("static void f2py_setup_", "static int f2py_setup_") if path.endswith("_flapackmodule.c"): text = text.replace(",size_t", "") text = re.sub(r",slen\([a-z]*\)\)", ")", text) if path.endswith("stats/statlib/spearman.c"): # in scipy/stats/statlib/swilk.f ALNORM is called with a double, and in # scipy/stats/statlib/spearman.f with a real this generates # inconsistent signature. Let's use double in both, I don't think this # code path will work (but at least it will compile) since it needs # "ALNORM = algorithm AS66", which I don't think we have with the f2c # route text = text.replace("extern real alnorm_", "extern doublereal alnorm_") source_path.write_text(text) for lib in ["lapack", "blas"]: if path.endswith(f"cython_{lib}.c"): header_path = Path(path).with_name(f"_{lib}_subroutines.h") header_text = header_path.read_text() header_text = header_text.replace("void F_FUNC", "int F_FUNC") header_path.write_text(header_text) def scipy_fixes(args: list[str]) -> None: for arg in args: if arg.endswith(".c"): scipy_fix_cfile(arg)